make image description a server action

2026-04-25 00:16:29 +02:00 · 2025-09-26 19:48:51 +05:30 · 2025-09-26 19:48:51 +05:30 · 2ef59f414d
commit 2ef59f414d
parent 9b53e4d880
3 changed files with 97 additions and 108 deletions
--- a/apps/rowboat/app/actions/uploaded-images.actions.ts
+++ b/apps/rowboat/app/actions/uploaded-images.actions.ts
@ -4,6 +4,9 @@ import { getSignedUrl } from '@aws-sdk/s3-request-presigner';
 import crypto from 'crypto';
 import { authCheck } from '@/app/actions/auth.actions';
 import { USE_AUTH } from '@/app/lib/feature_flags';
+import { GoogleGenerativeAI } from '@google/generative-ai';
+import { UsageTracker } from '@/app/lib/billing';
+import { logUsage } from '@/app/actions/billing.actions';

 export async function getUploadUrlForImage(mimeType: string): Promise<{ id: string; key: string; uploadUrl: string; url: string; mimeType: string }> {
  // Enforce auth in server action context (supports guest mode when auth disabled)
@ -46,3 +49,93 @@ export async function getUploadUrlForImage(mimeType: string): Promise<{ id: stri

  return { id: idWithExt, key, uploadUrl, url: `/api/uploaded-images/${idWithExt}`, mimeType };
 }
+
+export async function describeUploadedImage(id: string): Promise<{ id: string; description: string | null }> {
+  if (USE_AUTH) {
+    await authCheck();
+  }
+
+  if (!id || typeof id !== 'string') {
+    throw new Error('id is required');
+  }
+
+  const bucket = process.env.RAG_UPLOADS_S3_BUCKET || '';
+  if (!bucket) {
+    throw new Error('S3 bucket not configured');
+  }
+
+  const region = process.env.RAG_UPLOADS_S3_REGION || 'us-east-1';
+  const s3 = new S3Client({
+    region,
+    credentials: process.env.AWS_ACCESS_KEY_ID && process.env.AWS_SECRET_ACCESS_KEY ? {
+      accessKeyId: process.env.AWS_ACCESS_KEY_ID as string,
+      secretAccessKey: process.env.AWS_SECRET_ACCESS_KEY as string,
+    } : undefined,
+  });
+
+  const lastDot = id.lastIndexOf('.');
+  const idWithoutExt = lastDot > 0 ? id.slice(0, lastDot) : id;
+  const last2 = idWithoutExt.slice(-2).padStart(2, '0');
+  const dirA = last2.charAt(0);
+  const dirB = last2.charAt(1);
+  const key = `uploaded_images/${dirA}/${dirB}/${id}`;
+
+  // Fetch object bytes from S3
+  const { GetObjectCommand } = await import('@aws-sdk/client-s3');
+  const resp = await s3.send(new GetObjectCommand({ Bucket: bucket, Key: key }));
+  const contentType = resp.ContentType || 'application/octet-stream';
+  const body = resp.Body as any;
+  const chunks: Uint8Array[] = [];
+  await new Promise<void>((resolve, reject) => {
+    body.on('data', (c: Uint8Array) => chunks.push(c));
+    body.on('end', () => resolve());
+    body.on('error', reject);
+  });
+  const buf = Buffer.concat(chunks);
+
+  let descriptionMarkdown: string | null = null;
+  const usageTracker = new UsageTracker();
+  try {
+    const apiKey = process.env.GOOGLE_API_KEY || process.env.GEMINI_API_KEY || '';
+    if (apiKey) {
+      const genAI = new GoogleGenerativeAI(apiKey);
+      const model = genAI.getGenerativeModel({ model: 'gemini-2.5-flash' });
+      const prompt = 'Describe this image in concise, high-quality Markdown. Focus on key objects, text, layout, style, colors, and any notable details. Do not include extra commentary or instructions.';
+      const result = await model.generateContent([
+        { inlineData: { data: buf.toString('base64'), mimeType: contentType } },
+        prompt,
+      ]);
+      const response: any = result.response as any;
+      descriptionMarkdown = response?.text?.() || null;
+
+      // Track usage
+      try {
+        const inputTokens = response?.usageMetadata?.promptTokenCount || 0;
+        const outputTokens = response?.usageMetadata?.candidatesTokenCount || 0;
+        usageTracker.track({
+          type: 'LLM_USAGE',
+          modelName: 'gemini-2.5-flash',
+          inputTokens,
+          outputTokens,
+          context: 'uploaded_images.describe',
+        });
+      } catch {
+        // ignore
+      }
+    }
+  } catch (e) {
+    console.warn('Gemini description failed', e);
+  }
+
+  // Log usage to billing
+  try {
+    const items = usageTracker.flush();
+    if (items.length > 0) {
+      await logUsage({ items });
+    }
+  } catch {
+    // ignore billing logging errors
+  }
+
+  return { id, description: descriptionMarkdown };
+}
--- a/apps/rowboat/app/api/uploaded-images/describe/route.ts
+++ b/apps/rowboat/app/api/uploaded-images/describe/route.ts
@ -1,99 +0,0 @@
-import { NextRequest, NextResponse } from 'next/server';
-import { S3Client, GetObjectCommand } from '@aws-sdk/client-s3';
-import { GoogleGenerativeAI } from '@google/generative-ai';
-import { UsageTracker } from '@/app/lib/billing';
-import { logUsage } from '@/app/actions/billing.actions';
-import { requireAuth } from '@/app/lib/auth';
-
-export async function POST(request: NextRequest) {
-  try {
-    // Require authentication (handles guest mode internally when auth disabled)
-    await requireAuth();
-
-    const { id } = await request.json();
-    if (!id || typeof id !== 'string') {
-      return NextResponse.json({ error: 'id is required' }, { status: 400 });
-    }
-
-    const bucket = process.env.RAG_UPLOADS_S3_BUCKET || '';
-    if (!bucket) {
-      return NextResponse.json({ error: 'S3 bucket not configured' }, { status: 500 });
-    }
-
-    const region = process.env.RAG_UPLOADS_S3_REGION || 'us-east-1';
-    const s3 = new S3Client({
-      region,
-      credentials: process.env.AWS_ACCESS_KEY_ID && process.env.AWS_SECRET_ACCESS_KEY ? {
-        accessKeyId: process.env.AWS_ACCESS_KEY_ID as string,
-        secretAccessKey: process.env.AWS_SECRET_ACCESS_KEY as string,
-      } : undefined,
-    });
-
-    // `id` includes extension (e.g., "<uuid>.png"). Shard using the UUID part.
-    const lastDot = id.lastIndexOf('.');
-    const idWithoutExt = lastDot > 0 ? id.slice(0, lastDot) : id;
-    const last2 = idWithoutExt.slice(-2).padStart(2, '0');
-    const dirA = last2.charAt(0);
-    const dirB = last2.charAt(1);
-    const key = `uploaded_images/${dirA}/${dirB}/${id}`;
-    const resp = await s3.send(new GetObjectCommand({ Bucket: bucket, Key: key }));
-    const contentType = resp.ContentType || 'application/octet-stream';
-    const body = resp.Body as any;
-    const chunks: Uint8Array[] = [];
-    await new Promise<void>((resolve, reject) => {
-      body.on('data', (c: Uint8Array) => chunks.push(c));
-      body.on('end', () => resolve());
-      body.on('error', reject);
-    });
-    const buf = Buffer.concat(chunks);
-
-    let descriptionMarkdown: string | null = null;
-    const usageTracker = new UsageTracker();
-    try {
-      const apiKey = process.env.GOOGLE_API_KEY || process.env.GEMINI_API_KEY || '';
-      if (apiKey) {
-        const genAI = new GoogleGenerativeAI(apiKey);
-        const model = genAI.getGenerativeModel({ model: 'gemini-2.5-flash' });
-        const prompt = 'Describe this image in concise, high-quality Markdown. Focus on key objects, text, layout, style, colors, and any notable details. Do not include extra commentary or instructions.';
-        const result = await model.generateContent([
-          { inlineData: { data: buf.toString('base64'), mimeType: contentType } },
-          prompt,
-        ]);
-        const response: any = result.response as any;
-        descriptionMarkdown = response?.text?.() || null;
-
-        // Track usage similar to agents-runtime
-        try {
-          const inputTokens = response?.usageMetadata?.promptTokenCount || 0;
-          const outputTokens = response?.usageMetadata?.candidatesTokenCount || 0;
-          usageTracker.track({
-            type: 'LLM_USAGE',
-            modelName: 'gemini-2.5-flash',
-            inputTokens,
-            outputTokens,
-            context: 'uploaded_images.describe',
-          });
-        } catch (_) {
-          // ignore usage tracking errors
-        }
-      }
-    } catch (e) {
-      console.warn('Gemini description failed', e);
-    }
-
-    // Log usage to billing if available
-    try {
-      const items = usageTracker.flush();
-      if (items.length > 0) {
-        await logUsage({ items });
-      }
-    } catch (_) {
-      // ignore billing logging errors
-    }
-
-    return NextResponse.json({ id, description: descriptionMarkdown });
-  } catch (e) {
-    console.error('describe error', e);
-    return NextResponse.json({ error: 'Failed to describe' }, { status: 500 });
-  }
-}
--- a/apps/rowboat/components/common/compose-box-playground.tsx
+++ b/apps/rowboat/components/common/compose-box-playground.tsx
@ -112,19 +112,14 @@ export function ComposeBoxPlayground({
                }

                // 4) Ask server to generate description from S3 image
-                const descRes = await fetch('/api/uploaded-images/describe', {
-                    method: 'POST',
-                    headers: { 'Content-Type': 'application/json' },
-                    body: JSON.stringify({ id: imageId }),
-                    signal: controller.signal,
-                });
-                if (descRes.ok) {
-                    const descData = await descRes.json();
+                try {
+                    const { describeUploadedImage } = await import('@/app/actions/uploaded-images.actions');
+                    const descData = await describeUploadedImage(imageId);
                    const description: string | null = descData?.description ?? null;
                    if (uploadAbortRef.current === controller) {
                        setPendingImage({ url: imageUrl, previewSrc, mimeType: file.type, description });
                    }
-                } else {
+                } catch {
                    // If description fails, still allow sending
                    if (uploadAbortRef.current === controller) {
                        setPendingImage({ url: imageUrl, previewSrc, mimeType: file.type, description: null });