feat: allow recording audio in workflow builder

2026-07-22 11:51:04 +02:00 · 2026-03-25 15:01:39 +05:30 · 2026-03-25 15:01:39 +05:30 · 2fa4191d9b
commit 2fa4191d9b
parent ac0731a374
22 changed files with 700 additions and 246 deletions
--- a/api/routes/workflow_recording.py
+++ b/api/routes/workflow_recording.py
@ -2,9 +2,10 @@

 from typing import Annotated, Optional

-from fastapi import APIRouter, Depends, HTTPException, Query
+from fastapi import APIRouter, Depends, File, Form, HTTPException, Query, UploadFile
 from loguru import logger

+from api.constants import DEPLOYMENT_MODE
 from api.db import db_client
 from api.db.workflow_recording_client import generate_short_id
 from api.enums import StorageBackend
@ -16,6 +17,7 @@ from api.schemas.workflow_recording import (
    RecordingUploadResponseSchema,
 )
 from api.services.auth.depends import get_user
+from api.services.mps_service_key_client import mps_service_key_client
 from api.services.storage import storage_fs

 router = APIRouter(prefix="/workflow-recordings", tags=["workflow-recordings"])
@ -216,3 +218,42 @@ async def delete_recording(
        raise HTTPException(
            status_code=500, detail="Failed to delete recording"
        ) from exc
+
+
+@router.post(
+    "/transcribe",
+    summary="Transcribe an audio file",
+)
+async def transcribe_audio(
+    file: UploadFile = File(...),
+    language: str = Form("en"),
+    user=Depends(get_user),
+):
+    """Transcribe an uploaded audio file using MPS STT."""
+    try:
+        audio_data = await file.read()
+
+        if DEPLOYMENT_MODE == "oss":
+            result = await mps_service_key_client.transcribe_audio(
+                audio_data=audio_data,
+                filename=file.filename or "audio.wav",
+                content_type=file.content_type or "audio/wav",
+                language=language,
+                created_by=str(user.provider_id),
+            )
+        else:
+            result = await mps_service_key_client.transcribe_audio(
+                audio_data=audio_data,
+                filename=file.filename or "audio.wav",
+                content_type=file.content_type or "audio/wav",
+                language=language,
+                organization_id=user.selected_organization_id,
+            )
+
+        return result
+
+    except Exception as exc:
+        logger.error(f"Error transcribing audio: {exc}")
+        raise HTTPException(
+            status_code=500, detail="Failed to transcribe audio"
+        ) from exc
--- a/api/services/mps_service_key_client.py
+++ b/api/services/mps_service_key_client.py
@ -351,6 +351,71 @@ class MPSServiceKeyClient:
                    response=response,
                )

+    async def transcribe_audio(
+        self,
+        audio_data: bytes,
+        filename: str = "audio.wav",
+        content_type: str = "audio/wav",
+        language: str = "en",
+        model: str = "default",
+        correlation_id: Optional[str] = None,
+        organization_id: Optional[int] = None,
+        created_by: Optional[str] = None,
+    ) -> dict:
+        """
+        Transcribe an audio file via MPS STT API.
+
+        Args:
+            audio_data: Raw audio bytes
+            filename: Name of the audio file
+            content_type: MIME type of the audio (e.g., audio/wav, audio/mp3)
+            language: Language code for transcription (default: "en")
+            model: Model tier name (default: "default")
+            correlation_id: Optional correlation ID for tracking
+            organization_id: Organization ID (for authenticated mode)
+            created_by: User provider ID (for OSS mode)
+
+        Returns:
+            Dictionary containing transcription result with keys like
+            'transcript', 'duration_seconds', etc.
+
+        Raises:
+            httpx.HTTPStatusError: If the API call fails
+        """
+        async with httpx.AsyncClient(timeout=httpx.Timeout(60.0)) as client:
+            files = {
+                "file": (filename, audio_data, content_type),
+            }
+            data = {
+                "language": language,
+                "model": model,
+            }
+            if correlation_id:
+                data["correlation_id"] = correlation_id
+
+            headers = self._get_headers(organization_id, created_by)
+            # Remove Content-Type so httpx sets the correct multipart boundary
+            headers.pop("Content-Type", None)
+
+            response = await client.post(
+                f"{self.base_url}/api/v1/stt/transcribe",
+                files=files,
+                data=data,
+                headers=headers,
+            )
+
+            if response.status_code == 200:
+                return response.json()
+            else:
+                logger.error(
+                    f"Failed to transcribe audio: {response.status_code} - {response.text}"
+                )
+                raise httpx.HTTPStatusError(
+                    f"Failed to transcribe audio: {response.text}",
+                    request=response.request,
+                    response=response,
+                )
+
    def validate_service_key(self, service_key: str) -> bool:
        """
        Synchronously validate a Dograh service key by checking usage via MPS.
--- a/api/services/pipecat/realtime_feedback_observer.py
+++ b/api/services/pipecat/realtime_feedback_observer.py
@ -165,49 +165,39 @@ class RealtimeFeedbackObserver(BaseObserver):
        frame = data.frame
        frame_direction = data.direction

-        logger.trace(f"{self} Received Frame: {frame} Direction: {frame_direction}")
-
-        # Handle pipeline termination - stop clock task
-        if isinstance(frame, (EndFrame, CancelFrame, StopFrame)):
-            await self._cancel_clock_task()
-            return
-
-        # Handle interruptions - clear any queued bot text
-        if isinstance(frame, InterruptionFrame):
-            await self._handle_interruption()
-            return
-
-        # Bot speaking state - WS only (ephemeral state signals, not persisted)
-        if isinstance(frame, BotStartedSpeakingFrame):
-            await self._send_ws(
-                {"type": RealtimeFeedbackType.BOT_STARTED_SPEAKING.value, "payload": {}}
-            )
-            return
-        if isinstance(frame, BotStoppedSpeakingFrame):
-            await self._send_ws(
-                {"type": RealtimeFeedbackType.BOT_STOPPED_SPEAKING.value, "payload": {}}
-            )
-            return
-
-        # User mute state - WS only (ephemeral state signals, not persisted)
-        if isinstance(frame, UserMuteStartedFrame):
-            await self._send_ws(
-                {"type": RealtimeFeedbackType.USER_MUTE_STARTED.value, "payload": {}}
-            )
-            return
-        if isinstance(frame, UserMuteStoppedFrame):
-            await self._send_ws(
-                {"type": RealtimeFeedbackType.USER_MUTE_STOPPED.value, "payload": {}}
-            )
-            return
-
        # Skip already processed frames (frames can be observed multiple times)
        if frame.id in self._frames_seen:
            return
        self._frames_seen.add(frame.id)

+        logger.trace(f"{self} Received Frame: {frame} Direction: {frame_direction}")
+
+        # Handle pipeline termination - stop clock task
+        if isinstance(frame, (EndFrame, CancelFrame, StopFrame)):
+            await self._cancel_clock_task()
+        # Handle interruptions - clear any queued bot text
+        elif isinstance(frame, InterruptionFrame):
+            await self._handle_interruption()
+        # Bot speaking state - WS only (ephemeral state signals, not persisted)
+        elif isinstance(frame, BotStartedSpeakingFrame):
+            await self._send_ws(
+                {"type": RealtimeFeedbackType.BOT_STARTED_SPEAKING.value, "payload": {}}
+            )
+        elif isinstance(frame, BotStoppedSpeakingFrame):
+            await self._send_ws(
+                {"type": RealtimeFeedbackType.BOT_STOPPED_SPEAKING.value, "payload": {}}
+            )
+        # User mute state - WS only (ephemeral state signals, not persisted)
+        elif isinstance(frame, UserMuteStartedFrame):
+            await self._send_ws(
+                {"type": RealtimeFeedbackType.USER_MUTE_STARTED.value, "payload": {}}
+            )
+        elif isinstance(frame, UserMuteStoppedFrame):
+            await self._send_ws(
+                {"type": RealtimeFeedbackType.USER_MUTE_STOPPED.value, "payload": {}}
+            )
        # Handle user transcriptions (interim) - WebSocket only
-        if isinstance(frame, InterimTranscriptionFrame):
+        elif isinstance(frame, InterimTranscriptionFrame):
            await self._send_ws(
                {
                    "type": RealtimeFeedbackType.USER_TRANSCRIPTION.value,
--- a/api/services/workflow/pipecat_engine_context_composer.py
+++ b/api/services/workflow/pipecat_engine_context_composer.py
@ -77,11 +77,8 @@ def compose_system_prompt_for_node(

    parts = [p for p in (global_prompt, formatted_node_prompt) if p]

-    if has_recordings:
+    if has_recordings and "RECORDING_ID:" in formatted_node_prompt:
        parts.append(RECORDING_RESPONSE_MODE_INSTRUCTIONS)
-        # TODO: Append per-node available recordings list here once
-        # Node.recording_ids is populated. The list should include
-        # recording_id and a short description so the LLM can choose.

    return "\n\n".join(parts)

--- a/2
+++ b/2
@ -1 +1 @@
-Subproject commit 3f566a4ba1e112255cc7459735bdb4b716948d59
+Subproject commit 2e2171e2a64ec87b3964fbc2440b5291489912a8
--- a/ui/src/app/workflow/[workflowId]/RenderWorkflow.tsx
+++ b/ui/src/app/workflow/[workflowId]/RenderWorkflow.tsx
@ -14,6 +14,7 @@ import type { DocumentResponseSchema, RecordingResponseSchema, ToolResponse } fr
 import { FlowEdge, FlowNode, NodeType } from "@/components/flow/types";
 import { Button } from '@/components/ui/button';
 import { Tooltip, TooltipContent, TooltipProvider, TooltipTrigger } from '@/components/ui/tooltip';
+import { useUserConfig } from '@/context/UserConfigContext';
 import { WorkflowConfigurations } from '@/types/workflow-configurations';

 import AddNodePanel from "../../../components/flow/AddNodePanel";
@ -64,6 +65,11 @@ interface RenderWorkflowProps {
 }

 function RenderWorkflow({ initialWorkflowName, workflowId, initialFlow, initialTemplateContextVariables, initialWorkflowConfigurations, user }: RenderWorkflowProps) {
+    const { userConfig } = useUserConfig();
+    const ttsProvider = (userConfig?.tts?.provider as string) ?? "";
+    const ttsModel = (userConfig?.tts?.model as string) ?? "";
+    const ttsVoiceId = (userConfig?.tts?.voice as string) ?? "";
+
    const [isContextVarsDialogOpen, setIsContextVarsDialogOpen] = useState(false);
    const [isConfigurationsDialogOpen, setIsConfigurationsDialogOpen] = useState(false);
    const [isDictionaryDialogOpen, setIsDictionaryDialogOpen] = useState(false);
@ -125,10 +131,15 @@ function RenderWorkflow({ initialWorkflowName, workflowId, initialFlow, initialT
                    setTools(toolsResponse.data);
                }

-                // Fetch recordings for this workflow
+                // Fetch recordings for this workflow filtered by active TTS config
                try {
                    const recordingsResponse = await listRecordingsApiV1WorkflowRecordingsGet({
-                        query: { workflow_id: workflowId },
+                        query: {
+                            workflow_id: workflowId,
+                            tts_provider: ttsProvider || undefined,
+                            tts_model: ttsModel || undefined,
+                            tts_voice_id: ttsVoiceId || undefined,
+                        },
                    });
                    if (recordingsResponse.data) {
                        setRecordings(recordingsResponse.data.recordings);
@ -142,7 +153,7 @@ function RenderWorkflow({ initialWorkflowName, workflowId, initialFlow, initialT
        };

        fetchData();
-    }, [workflowId]);
+    }, [workflowId, ttsProvider, ttsModel, ttsVoiceId]);

    // Memoize defaultEdgeOptions to prevent unnecessary re-renders
    const defaultEdgeOptions = useMemo(() => ({
--- a/ui/src/app/workflow/[workflowId]/components/RecordingsDialog.tsx
+++ b/ui/src/app/workflow/[workflowId]/components/RecordingsDialog.tsx
@ -1,4 +1,4 @@
-import { Loader2, Trash2Icon, Upload } from "lucide-react";
+import { Loader2, Mic, Square, Trash2Icon, Upload } from "lucide-react";
 import { useCallback, useEffect, useRef, useState } from "react";

 import {
@ -6,6 +6,7 @@ import {
    deleteRecordingApiV1WorkflowRecordingsRecordingIdDelete,
    getUploadUrlApiV1WorkflowRecordingsUploadUrlPost,
    listRecordingsApiV1WorkflowRecordingsGet,
+    transcribeAudioApiV1WorkflowRecordingsTranscribePost,
 } from "@/client";
 import type { RecordingResponseSchema } from "@/client/types.gen";
 import { Button } from "@/components/ui/button";
@ -18,6 +19,15 @@ import {
 } from "@/components/ui/dialog";
 import { Input } from "@/components/ui/input";
 import { Label } from "@/components/ui/label";
+import {
+    Select,
+    SelectContent,
+    SelectItem,
+    SelectTrigger,
+    SelectValue,
+} from "@/components/ui/select";
+import { Textarea } from "@/components/ui/textarea";
+import { LANGUAGE_DISPLAY_NAMES } from "@/constants/languages";
 import { useUserConfig } from "@/context/UserConfigContext";

 interface RecordingsDialogProps {
@ -29,6 +39,8 @@ interface RecordingsDialogProps {

 const MAX_FILE_SIZE = 5 * 1024 * 1024; // 5MB

+type RecordingStep = "idle" | "naming" | "recording" | "transcribing";
+
 export const RecordingsDialog = ({
    open,
    onOpenChange,
@ -42,7 +54,16 @@ export const RecordingsDialog = ({
    const [transcript, setTranscript] = useState("");
    const [selectedFile, setSelectedFile] = useState<File | null>(null);
    const [error, setError] = useState<string | null>(null);
+    const [language, setLanguage] = useState("multi");
+    const [recordingStep, setRecordingStep] = useState<RecordingStep>("idle");
+    const [recordingFilename, setRecordingFilename] = useState("");
+    const [recordingDuration, setRecordingDuration] = useState(0);
+    const mediaRecorderRef = useRef<MediaRecorder | null>(null);
+    const audioChunksRef = useRef<Blob[]>([]);
+    const recordingTimerRef = useRef<ReturnType<typeof setInterval> | null>(null);
    const fileInputRef = useRef<HTMLInputElement>(null);
+    const languageRef = useRef(language);
+    languageRef.current = language;

    const ttsProvider = (userConfig?.tts?.provider as string) ?? "";
    const ttsModel = (userConfig?.tts?.model as string) ?? "";
@ -70,14 +91,119 @@ export const RecordingsDialog = ({
        }
    }, [workflowId, ttsProvider, ttsModel, ttsVoiceId, onRecordingsChange]);

+    const stopRecordingTimer = useCallback(() => {
+        if (recordingTimerRef.current) {
+            clearInterval(recordingTimerRef.current);
+            recordingTimerRef.current = null;
+        }
+    }, []);
+
+    const stopRecording = useCallback(() => {
+        if (mediaRecorderRef.current && mediaRecorderRef.current.state !== "inactive") {
+            mediaRecorderRef.current.stop();
+        }
+    }, []);
+
+    const resetRecordingState = useCallback(() => {
+        setRecordingStep("idle");
+        setRecordingFilename("");
+        setRecordingDuration(0);
+    }, []);
+
    useEffect(() => {
        if (open) {
            fetchRecordings();
            setError(null);
            setTranscript("");
            setSelectedFile(null);
+            setLanguage("multi");
+            resetRecordingState();
        }
-    }, [open, fetchRecordings]);
+    }, [open, fetchRecordings, resetRecordingState]);
+
+    useEffect(() => {
+        if (!open) {
+            stopRecording();
+            stopRecordingTimer();
+        }
+    }, [open, stopRecording, stopRecordingTimer]);
+
+    const transcribeFile = async (file: File) => {
+        setRecordingStep("transcribing");
+        try {
+            const currentLang = languageRef.current;
+            const result = await transcribeAudioApiV1WorkflowRecordingsTranscribePost({
+                body: { file, language: currentLang },
+            });
+            const data = result.data as Record<string, unknown> | undefined;
+            if (data?.transcript) {
+                setTranscript(data.transcript as string);
+            }
+        } catch {
+            // Transcription failed — user can still type manually
+            setError("Auto-transcription failed. You can type the transcript manually.");
+        } finally {
+            setRecordingStep("idle");
+        }
+    };
+
+    const startRecording = async () => {
+        try {
+            const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
+            const mediaRecorder = new MediaRecorder(stream);
+            mediaRecorderRef.current = mediaRecorder;
+            audioChunksRef.current = [];
+
+            mediaRecorder.ondataavailable = (e) => {
+                if (e.data.size > 0) audioChunksRef.current.push(e.data);
+            };
+
+            const filename = recordingFilename.trim() || "recording";
+            mediaRecorder.onstop = () => {
+                stream.getTracks().forEach((t) => t.stop());
+                stopRecordingTimer();
+
+                const blob = new Blob(audioChunksRef.current, { type: mediaRecorder.mimeType });
+                if (blob.size > MAX_FILE_SIZE) {
+                    setError(`Recording (${(blob.size / (1024 * 1024)).toFixed(1)}MB) exceeds the maximum allowed size of 5MB.`);
+                    resetRecordingState();
+                    return;
+                }
+                const ext = mediaRecorder.mimeType.includes("webm") ? "webm" : "mp4";
+                const file = new File([blob], `${filename}.${ext}`, { type: mediaRecorder.mimeType });
+                setSelectedFile(file);
+                setError(null);
+                transcribeFile(file);
+            };
+
+            mediaRecorder.start();
+            setRecordingStep("recording");
+            setRecordingDuration(0);
+            setError(null);
+            recordingTimerRef.current = setInterval(() => {
+                setRecordingDuration((d) => d + 1);
+            }, 1000);
+        } catch {
+            setError("Microphone access denied. Please allow microphone permissions.");
+            resetRecordingState();
+        }
+    };
+
+    const handleStopRecording = () => {
+        stopRecording();
+    };
+
+    const handleFileSelect = (file: File | null) => {
+        if (file && file.size > MAX_FILE_SIZE) {
+            setError(`File size (${(file.size / (1024 * 1024)).toFixed(1)}MB) exceeds the maximum allowed size of 5MB.`);
+            setSelectedFile(null);
+            if (fileInputRef.current) fileInputRef.current.value = "";
+            return;
+        }
+        setError(null);
+        setSelectedFile(file);
+        if (file) transcribeFile(file);
+    };

    const handleUpload = async () => {
        if (!selectedFile || !transcript.trim()) return;
@ -137,6 +263,7 @@ export const RecordingsDialog = ({
                        original_filename: selectedFile.name,
                        file_size_bytes: selectedFile.size,
                        mime_type: selectedFile.type,
+                        language,
                    },
                },
            });
@ -144,6 +271,8 @@ export const RecordingsDialog = ({
            // Reset form and refresh list
            setTranscript("");
            setSelectedFile(null);
+            setLanguage("multi");
+            resetRecordingState();
            if (fileInputRef.current) fileInputRef.current.value = "";
            await fetchRecordings();
        } catch (err) {
@ -166,13 +295,17 @@ export const RecordingsDialog = ({
        }
    };

+    const isRecording = recordingStep === "recording";
+    const isTranscribing = recordingStep === "transcribing";
+    const isBusy = uploading || isRecording || isTranscribing;
+
    return (
        <Dialog open={open} onOpenChange={onOpenChange}>
            <DialogContent className="max-w-lg max-h-[80vh] overflow-y-auto">
                <DialogHeader>
                    <DialogTitle>Workflow Recordings</DialogTitle>
                    <DialogDescription>
-                        Upload audio recordings for hybrid prompts. Recordings are
+                        Upload or record audio for hybrid prompts. Recordings are
                        scoped to your current TTS configuration. Use{" "}
                        <code className="text-xs bg-muted px-1 rounded">@</code> in
                        prompt fields to insert them.
@ -211,61 +344,158 @@ export const RecordingsDialog = ({

                {/* Upload Section */}
                <div className="space-y-3 border rounded-md p-3">
-                    <Label className="text-sm font-medium">Upload New Recording</Label>
+                    <Label className="text-sm font-medium">Add New Recording</Label>
+
+                    {/* Audio source: file picker or record */}
                    <div>
                        <Label className="text-xs text-muted-foreground">
                            Audio File
                        </Label>
-                        <input
-                            ref={fileInputRef}
-                            type="file"
-                            accept="audio/*"
-                            onChange={(e) => {
-                                const file = e.target.files?.[0] ?? null;
-                                if (file && file.size > MAX_FILE_SIZE) {
-                                    setError(
-                                        `File size (${(file.size / (1024 * 1024)).toFixed(1)}MB) exceeds the maximum allowed size of 5MB.`
-                                    );
-                                    setSelectedFile(null);
-                                    if (fileInputRef.current) fileInputRef.current.value = "";
-                                    return;
-                                }
-                                setError(null);
-                                setSelectedFile(file);
-                            }}
-                            className="hidden"
-                        />
-                        <Button
-                            type="button"
-                            variant="outline"
-                            size="sm"
-                            className="w-full justify-start text-sm font-normal"
-                            onClick={() => fileInputRef.current?.click()}
-                        >
-                            <Upload className="w-4 h-4 mr-2 shrink-0" />
-                            {selectedFile ? (
-                                <span className="truncate">
-                                    {selectedFile.name} ({(selectedFile.size / (1024 * 1024)).toFixed(1)}MB)
-                                </span>
-                            ) : (
-                                <span className="text-muted-foreground">Choose audio file (max 5MB)</span>
+                        <div className="flex gap-2">
+                            <input
+                                ref={fileInputRef}
+                                type="file"
+                                accept="audio/*"
+                                onChange={(e) => handleFileSelect(e.target.files?.[0] ?? null)}
+                                className="hidden"
+                            />
+                            <Button
+                                type="button"
+                                variant="outline"
+                                size="sm"
+                                className="flex-1 justify-start text-sm font-normal"
+                                onClick={() => fileInputRef.current?.click()}
+                                disabled={isBusy}
+                            >
+                                <Upload className="w-4 h-4 mr-2 shrink-0" />
+                                {selectedFile && recordingStep !== "naming" ? (
+                                    <span className="truncate">
+                                        {selectedFile.name} ({(selectedFile.size / (1024 * 1024)).toFixed(1)}MB)
+                                    </span>
+                                ) : (
+                                    <span className="text-muted-foreground">Choose audio file (max 5MB)</span>
+                                )}
+                            </Button>
+                            {recordingStep === "idle" && (
+                                <Button
+                                    type="button"
+                                    variant="outline"
+                                    size="sm"
+                                    onClick={() => setRecordingStep("naming")}
+                                    disabled={uploading || isTranscribing}
+                                >
+                                    <Mic className="w-4 h-4 mr-1" />
+                                    Record
+                                </Button>
                            )}
-                        </Button>
+                        </div>
                    </div>
+
+                    {/* Recording: filename + start/stop */}
+                    {(recordingStep === "naming" || isRecording) && (
+                        <div className="space-y-2 rounded-md border border-dashed p-3 bg-muted/20">
+                            {recordingStep === "naming" && (
+                                <>
+                                    <div>
+                                        <Label className="text-xs text-muted-foreground">
+                                            Recording Name
+                                        </Label>
+                                        <Input
+                                            placeholder="e.g. greeting, hold-message"
+                                            value={recordingFilename}
+                                            onChange={(e) => setRecordingFilename(e.target.value)}
+                                            autoFocus
+                                        />
+                                    </div>
+                                    <div className="flex gap-2">
+                                        <Button
+                                            size="sm"
+                                            onClick={startRecording}
+                                            disabled={!recordingFilename.trim()}
+                                        >
+                                            <Mic className="w-4 h-4 mr-1" />
+                                            Start Recording
+                                        </Button>
+                                        <Button
+                                            size="sm"
+                                            variant="ghost"
+                                            onClick={resetRecordingState}
+                                        >
+                                            Cancel
+                                        </Button>
+                                    </div>
+                                </>
+                            )}
+                            {isRecording && (
+                                <div className="flex items-center gap-3">
+                                    <span className="relative flex h-3 w-3">
+                                        <span className="animate-ping absolute inline-flex h-full w-full rounded-full bg-red-400 opacity-75" />
+                                        <span className="relative inline-flex rounded-full h-3 w-3 bg-red-500" />
+                                    </span>
+                                    <span className="text-sm font-mono">
+                                        {Math.floor(recordingDuration / 60)}:{(recordingDuration % 60).toString().padStart(2, "0")}
+                                    </span>
+                                    <span className="text-xs text-muted-foreground">{recordingFilename}</span>
+                                    <Button
+                                        size="sm"
+                                        variant="destructive"
+                                        onClick={handleStopRecording}
+                                        className="ml-auto"
+                                    >
+                                        <Square className="w-4 h-4 mr-1" />
+                                        Stop
+                                    </Button>
+                                </div>
+                            )}
+                        </div>
+                    )}
+
+                    {/* Transcribing progress */}
+                    {isTranscribing && (
+                        <div className="flex items-center gap-2 text-sm text-muted-foreground">
+                            <Loader2 className="w-4 h-4 animate-spin" />
+                            Transcribing audio...
+                        </div>
+                    )}
+
+                    {/* Language */}
+                    <div>
+                        <Label className="text-xs text-muted-foreground">
+                            Language
+                        </Label>
+                        <Select value={language} onValueChange={setLanguage}>
+                            <SelectTrigger className="h-9 text-sm">
+                                <SelectValue />
+                            </SelectTrigger>
+                            <SelectContent>
+                                {Object.entries(LANGUAGE_DISPLAY_NAMES).map(([code, name]) => (
+                                    <SelectItem key={code} value={code}>
+                                        {name}
+                                    </SelectItem>
+                                ))}
+                            </SelectContent>
+                        </Select>
+                    </div>
+
+                    {/* Transcript */}
                    <div>
                        <Label className="text-xs text-muted-foreground">
                            Transcript
                        </Label>
-                        <Input
-                            placeholder="What does this recording say?"
+                        <Textarea
+                            placeholder={isTranscribing ? "Transcribing..." : "What does this recording say?"}
                            value={transcript}
                            onChange={(e) => setTranscript(e.target.value)}
+                            disabled={isTranscribing}
+                            rows={3}
+                            className="resize-none text-sm"
                        />
                    </div>
+
                    <Button
                        size="sm"
                        onClick={handleUpload}
-                        disabled={!selectedFile || !transcript.trim() || uploading}
+                        disabled={!selectedFile || !transcript.trim() || isBusy}
                    >
                        {uploading ? (
                            <Loader2 className="w-4 h-4 mr-1 animate-spin" />
--- a/ui/src/app/workflow/[workflowId]/hooks/useWorkflowState.ts
+++ b/ui/src/app/workflow/[workflowId]/hooks/useWorkflowState.ts
@ -363,7 +363,13 @@ export const useWorkflowState = ({
    // Save workflow function
    const saveWorkflow = useCallback(async (updateWorkflowDefinition: boolean = true) => {
        if (!user || !rfInstance.current) return;
-        const flow = rfInstance.current.toObject();
+        // Read nodes/edges from the Zustand store (synchronously up-to-date)
+        // and viewport from the ReactFlow instance to build the flow object.
+        // This avoids a race condition where rfInstance.toObject() may return
+        // stale node data if React hasn't re-rendered yet after a store update.
+        const { nodes: currentNodes, edges: currentEdges } = useWorkflowStore.getState();
+        const viewport = rfInstance.current.getViewport();
+        const flow = { nodes: currentNodes, edges: currentEdges, viewport };
        try {
            await updateWorkflowApiV1WorkflowWorkflowIdPut({
                path: {
--- a/ui/src/client/sdk.gen.ts
+++ b/ui/src/client/sdk.gen.ts
--- a/ui/src/client/types.gen.ts
+++ b/ui/src/client/types.gen.ts
@ -80,6 +80,11 @@ export type AuthUserResponse = {
    is_superuser: boolean;
 };

+export type BodyTranscribeAudioApiV1WorkflowRecordingsTranscribePost = {
+    file: Blob | File;
+    language?: string;
+};
+
 export type CallDispositionCodes = {
    disposition_codes?: Array<string>;
 };
@ -5508,6 +5513,37 @@ export type DeleteRecordingApiV1WorkflowRecordingsRecordingIdDeleteResponses = {
    200: unknown;
 };

+export type TranscribeAudioApiV1WorkflowRecordingsTranscribePostData = {
+    body: BodyTranscribeAudioApiV1WorkflowRecordingsTranscribePost;
+    headers?: {
+        authorization?: string | null;
+        'X-API-Key'?: string | null;
+    };
+    path?: never;
+    query?: never;
+    url: '/api/v1/workflow-recordings/transcribe';
+};
+
+export type TranscribeAudioApiV1WorkflowRecordingsTranscribePostErrors = {
+    /**
+     * Not found
+     */
+    404: unknown;
+    /**
+     * Validation Error
+     */
+    422: HttpValidationError;
+};
+
+export type TranscribeAudioApiV1WorkflowRecordingsTranscribePostError = TranscribeAudioApiV1WorkflowRecordingsTranscribePostErrors[keyof TranscribeAudioApiV1WorkflowRecordingsTranscribePostErrors];
+
+export type TranscribeAudioApiV1WorkflowRecordingsTranscribePostResponses = {
+    /**
+     * Successful Response
+     */
+    200: unknown;
+};
+
 export type SignupApiV1AuthSignupPostData = {
    body: SignupRequest;
    path?: never;
--- a/ui/src/components/ServiceConfiguration.tsx
+++ b/ui/src/components/ServiceConfiguration.tsx
@ -13,6 +13,7 @@ import { Label } from "@/components/ui/label";
 import { Select, SelectContent, SelectItem, SelectTrigger, SelectValue } from "@/components/ui/select";
 import { Tabs, TabsContent, TabsList, TabsTrigger } from "@/components/ui/tabs";
 import { VoiceSelector } from "@/components/VoiceSelector";
+import { LANGUAGE_DISPLAY_NAMES } from "@/constants/languages";
 import { useUserConfig } from "@/context/UserConfigContext";

 type ServiceSegment = "llm" | "tts" | "stt" | "embeddings";
@ -46,105 +47,6 @@ const TAB_CONFIG: { key: ServiceSegment; label: string }[] = [
    { key: "embeddings", label: "Embedding" },
 ];

-// Display names for language codes (Deepgram + Sarvam)
-const LANGUAGE_DISPLAY_NAMES: Record<string, string> = {
-    "multi": "Multilingual (Auto-detect)",
-    // Arabic
-    "ar": "Arabic",
-    "ar-AE": "Arabic (UAE)",
-    "ar-SA": "Arabic (Saudi Arabia)",
-    "ar-QA": "Arabic (Qatar)",
-    "ar-KW": "Arabic (Kuwait)",
-    "ar-SY": "Arabic (Syria)",
-    "ar-LB": "Arabic (Lebanon)",
-    "ar-PS": "Arabic (Palestine)",
-    "ar-JO": "Arabic (Jordan)",
-    "ar-EG": "Arabic (Egypt)",
-    "ar-SD": "Arabic (Sudan)",
-    "ar-TD": "Arabic (Chad)",
-    "ar-MA": "Arabic (Morocco)",
-    "ar-DZ": "Arabic (Algeria)",
-    "ar-TN": "Arabic (Tunisia)",
-    "ar-IQ": "Arabic (Iraq)",
-    "ar-IR": "Arabic (Iran)",
-    // Other languages
-    "be": "Belarusian",
-    "bn": "Bengali",
-    "bs": "Bosnian",
-    "bg": "Bulgarian",
-    "ca": "Catalan",
-    "cs": "Czech",
-    "da": "Danish",
-    "da-DK": "Danish (Denmark)",
-    "de": "German",
-    "de-CH": "German (Switzerland)",
-    "el": "Greek",
-    "en": "English",
-    "en-US": "English (US)",
-    "en-AU": "English (Australia)",
-    "en-GB": "English (UK)",
-    "en-IN": "English (India)",
-    "en-NZ": "English (New Zealand)",
-    "es": "Spanish",
-    "es-419": "Spanish (Latin America)",
-    "et": "Estonian",
-    "fa": "Persian",
-    "fi": "Finnish",
-    "fr": "French",
-    "fr-CA": "French (Canada)",
-    "he": "Hebrew",
-    "hi": "Hindi",
-    "hr": "Croatian",
-    "hu": "Hungarian",
-    "id": "Indonesian",
-    "it": "Italian",
-    "ja": "Japanese",
-    "kn": "Kannada",
-    "ko": "Korean",
-    "ko-KR": "Korean (South Korea)",
-    "lt": "Lithuanian",
-    "lv": "Latvian",
-    "mk": "Macedonian",
-    "mr": "Marathi",
-    "ms": "Malay",
-    "nl": "Dutch",
-    "nl-BE": "Flemish",
-    "no": "Norwegian",
-    "pl": "Polish",
-    "pt": "Portuguese",
-    "pt-BR": "Portuguese (Brazil)",
-    "pt-PT": "Portuguese (Portugal)",
-    "ro": "Romanian",
-    "ru": "Russian",
-    "sk": "Slovak",
-    "sl": "Slovenian",
-    "sr": "Serbian",
-    "sv": "Swedish",
-    "sv-SE": "Swedish (Sweden)",
-    "ta": "Tamil",
-    "te": "Telugu",
-    "th": "Thai",
-    "tl": "Tagalog",
-    "tr": "Turkish",
-    "uk": "Ukrainian",
-    "ur": "Urdu",
-    "vi": "Vietnamese",
-    "zh-CN": "Chinese (Simplified)",
-    "zh-TW": "Chinese (Traditional)",
-    // Sarvam Indian languages
-    "bn-IN": "Bengali",
-    "gu-IN": "Gujarati",
-    "hi-IN": "Hindi",
-    "kn-IN": "Kannada",
-    "ml-IN": "Malayalam",
-    "mr-IN": "Marathi",
-    "od-IN": "Odia",
-    "pa-IN": "Punjabi",
-    "ta-IN": "Tamil",
-    "te-IN": "Telugu",
-    "as-IN": "Assamese",
-};
-
 // Display names for Sarvam voices
 const VOICE_DISPLAY_NAMES: Record<string, string> = {
    "anushka": "Anushka (Female)",
--- a/ui/src/components/flow/edges/CustomEdge.tsx
+++ b/ui/src/components/flow/edges/CustomEdge.tsx
@ -215,11 +215,7 @@ export default function CustomEdge(props: CustomEdgeProps) {
    const handleSaveEdgeData = useCallback(async (updatedData: FlowEdgeData) => {
        // Use the workflow store's updateEdge method to properly track history
        updateEdge(id, { data: updatedData });
-
-        // Save the workflow after updating edge data with a small delay to ensure state is updated
-        setTimeout(async () => {
-            await saveWorkflow();
-        }, 100);
+        await saveWorkflow();
    }, [id, updateEdge, saveWorkflow]);

    return (
--- a/ui/src/components/flow/nodes/AgentNode.tsx
+++ b/ui/src/components/flow/nodes/AgentNode.tsx
@ -89,10 +89,7 @@ export const AgentNode = memo(({ data, selected, id }: AgentNodeProps) => {
            document_uuids: documentUuids.length > 0 ? documentUuids : undefined,
        });
        setOpen(false);
-        // Save the workflow after updating node data with a small delay to ensure state is updated
-        setTimeout(async () => {
-            await saveWorkflow();
-        }, 100);
+        await saveWorkflow();
    };

    // Reset form state when dialog opens
@ -127,27 +124,23 @@ export const AgentNode = memo(({ data, selected, id }: AgentNodeProps) => {
    }, [data, open]);

    // Handle cleanup of stale document UUIDs
-    const handleStaleDocuments = useCallback((staleUuids: string[]) => {
+    const handleStaleDocuments = useCallback(async (staleUuids: string[]) => {
        const cleanedUuids = (data.document_uuids ?? []).filter(uuid => !staleUuids.includes(uuid));
        handleSaveNodeData({
            ...data,
            document_uuids: cleanedUuids.length > 0 ? cleanedUuids : undefined,
        });
-        setTimeout(async () => {
-            await saveWorkflow();
-        }, 100);
+        await saveWorkflow();
    }, [data, handleSaveNodeData, saveWorkflow]);

    // Handle cleanup of stale tool UUIDs
-    const handleStaleTools = useCallback((staleUuids: string[]) => {
+    const handleStaleTools = useCallback(async (staleUuids: string[]) => {
        const cleanedUuids = (data.tool_uuids ?? []).filter(uuid => !staleUuids.includes(uuid));
        handleSaveNodeData({
            ...data,
            tool_uuids: cleanedUuids.length > 0 ? cleanedUuids : undefined,
        });
-        setTimeout(async () => {
-            await saveWorkflow();
-        }, 100);
+        await saveWorkflow();
    }, [data, handleSaveNodeData, saveWorkflow]);

    return (
--- a/ui/src/components/flow/nodes/EndCall.tsx
+++ b/ui/src/components/flow/nodes/EndCall.tsx
@ -75,10 +75,7 @@ export const EndCall = memo(({ data, selected, id }: EndCallNodeProps) => {
            add_global_prompt: addGlobalPrompt,
        });
        setOpen(false);
-        // Save the workflow after updating node data with a small delay to ensure state is updated
-        setTimeout(async () => {
-            await saveWorkflow();
-        }, 100);
+        await saveWorkflow();
    };

    // Reset form state when dialog opens
--- a/ui/src/components/flow/nodes/GlobalNode.tsx
+++ b/ui/src/components/flow/nodes/GlobalNode.tsx
@ -52,10 +52,7 @@ export const GlobalNode = memo(({ data, selected, id }: GlobalNodeProps) => {
            name
        });
        setOpen(false);
-        // Save the workflow after updating node data with a small delay to ensure state is updated
-        setTimeout(async () => {
-            await saveWorkflow();
-        }, 100);
+        await saveWorkflow();
    };

    // Reset form state when dialog opens
--- a/ui/src/components/flow/nodes/QANode.tsx
+++ b/ui/src/components/flow/nodes/QANode.tsx
@ -66,9 +66,7 @@ export const QANode = memo(({ data, selected, id }: QANodeProps) => {
            qa_sample_rate: qaSampleRate,
        });
        setOpen(false);
-        setTimeout(async () => {
-            await saveWorkflow();
-        }, 100);
+        await saveWorkflow();
    };

    const resetFormState = () => {
--- a/ui/src/components/flow/nodes/StartCall.tsx
+++ b/ui/src/components/flow/nodes/StartCall.tsx
@ -104,10 +104,7 @@ export const StartCall = memo(({ data, selected, id }: StartCallNodeProps) => {
            document_uuids: documentUuids.length > 0 ? documentUuids : undefined,
        });
        setOpen(false);
-        // Save the workflow after updating node data with a small delay to ensure state is updated
-        setTimeout(async () => {
-            await saveWorkflow();
-        }, 100);
+        await saveWorkflow();
    };

    // Reset form state when dialog opens
@ -148,27 +145,23 @@ export const StartCall = memo(({ data, selected, id }: StartCallNodeProps) => {
    }, [data, open]);

    // Handle cleanup of stale document UUIDs
-    const handleStaleDocuments = useCallback((staleUuids: string[]) => {
+    const handleStaleDocuments = useCallback(async (staleUuids: string[]) => {
        const cleanedUuids = (data.document_uuids ?? []).filter(uuid => !staleUuids.includes(uuid));
        handleSaveNodeData({
            ...data,
            document_uuids: cleanedUuids.length > 0 ? cleanedUuids : undefined,
        });
-        setTimeout(async () => {
-            await saveWorkflow();
-        }, 100);
+        await saveWorkflow();
    }, [data, handleSaveNodeData, saveWorkflow]);

    // Handle cleanup of stale tool UUIDs
-    const handleStaleTools = useCallback((staleUuids: string[]) => {
+    const handleStaleTools = useCallback(async (staleUuids: string[]) => {
        const cleanedUuids = (data.tool_uuids ?? []).filter(uuid => !staleUuids.includes(uuid));
        handleSaveNodeData({
            ...data,
            tool_uuids: cleanedUuids.length > 0 ? cleanedUuids : undefined,
        });
-        setTimeout(async () => {
-            await saveWorkflow();
-        }, 100);
+        await saveWorkflow();
    }, [data, handleSaveNodeData, saveWorkflow]);

    return (
--- a/ui/src/components/flow/nodes/TriggerNode.tsx
+++ b/ui/src/components/flow/nodes/TriggerNode.tsx
@ -61,10 +61,7 @@ export const TriggerNode = memo(({ data, selected, id }: TriggerNodeProps) => {
            trigger_path: triggerPath,
        });
        setOpen(false);
-        // Save the workflow after updating node data
-        setTimeout(async () => {
-            await saveWorkflow();
-        }, 100);
+        await saveWorkflow();
    };

    // Reset form state when dialog opens
--- a/ui/src/components/flow/nodes/WebhookNode.tsx
+++ b/ui/src/components/flow/nodes/WebhookNode.tsx
@ -86,9 +86,7 @@ export const WebhookNode = memo(({ data, selected, id }: WebhookNodeProps) => {
            payload_template: validation.parsed as Record<string, unknown>,
        });
        setOpen(false);
-        setTimeout(async () => {
-            await saveWorkflow();
-        }, 100);
+        await saveWorkflow();
    };

    const handleOpenChange = (newOpen: boolean) => {
--- a/ui/src/components/flow/nodes/common/NodeEditDialog.tsx
+++ b/ui/src/components/flow/nodes/common/NodeEditDialog.tsx
@ -71,12 +71,13 @@ export const NodeEditDialog = ({
        const handleKeyDown = (e: KeyboardEvent) => {
            if ((e.metaKey || e.ctrlKey) && e.key === 's') {
                e.preventDefault();
+                e.stopImmediatePropagation();
                handleSave();
            }
        };

-        window.addEventListener('keydown', handleKeyDown);
-        return () => window.removeEventListener('keydown', handleKeyDown);
+        window.addEventListener('keydown', handleKeyDown, true);
+        return () => window.removeEventListener('keydown', handleKeyDown, true);
    }, [open, handleSave]);

    return (
--- a/ui/src/components/workflow/CreateWorkflowButton.tsx
+++ b/ui/src/components/workflow/CreateWorkflowButton.tsx
@ -1,22 +1,114 @@
 'use client';

-import { PlusIcon } from 'lucide-react';
+import { Bot, ChevronDown, LayoutTemplate, PlusIcon } from 'lucide-react';
 import { useRouter } from 'next/navigation';
+import { useState } from 'react';
+import { toast } from 'sonner';

+import { createWorkflowApiV1WorkflowCreateDefinitionPost } from '@/client/sdk.gen';
 import { Button } from "@/components/ui/button";
+import {
+    DropdownMenu,
+    DropdownMenuContent,
+    DropdownMenuItem,
+    DropdownMenuTrigger,
+} from "@/components/ui/dropdown-menu";
+import { useAuth } from '@/lib/auth';
+import logger from '@/lib/logger';
+import { getRandomId } from '@/lib/utils';
+
+const BLANK_WORKFLOW_DEFINITION = {
+    nodes: [
+        {
+            id: "1",
+            type: "startCall",
+            position: { x: 175, y: 60 },
+            data: {
+                prompt: "# Goal\nYou are a helpful agent who is handing a conversation over voice with a human. This is a voice conversation, so transcripts can be error prone.\n\n## Rules\n- Language: UK English but does not have to be correct english\n- Keep responses short and 2-3 sentences max\n- If you have to repeat something that you said in your previous two turns, then rephrase a bit while keeping the same meaning. Never repeat the exact same words as in your previous 2 responses.\n\n## Speech Handling\n- There could be multiple transcription errors. \n- Accept variations: yes/yeah/yep/aye, no/nah/nope\n- If user says \"sorry?\" or \"pardon me\" or \"can you repeat\"  or \"what?\", they might not have heard you- so just repeat what you just said.\n\n### Flow\nStart by saying \"Hi\". Be polite and courteous. ",
+                name: "start call",
+                allow_interrupt: false,
+                invalid: false,
+                validationMessage: null,
+                is_static: false,
+                add_global_prompt: false,
+                wait_for_user_response: false,
+                detect_voicemail: true,
+                delayed_start: false,
+                is_start: true,
+                selected_through_edge: false,
+                hovered_through_edge: false,
+                extraction_enabled: false,
+                selected: false,
+                dragging: false,
+            },
+        },
+    ],
+    edges: [],
+    viewport: { x: 808, y: 269, zoom: 0.75 },
+};

 export function CreateWorkflowButton() {
    const router = useRouter();
-    const handleClick = () => {
+    const { user, getAccessToken } = useAuth();
+    const [isCreating, setIsCreating] = useState(false);
+
+    const handleAgentBuilder = () => {
        router.push('/workflow/create');
    };

+    const handleBlankCanvas = async () => {
+        if (isCreating || !user) return;
+        setIsCreating(true);
+
+        try {
+            const accessToken = await getAccessToken();
+            const name = `Workflow-${getRandomId()}`;
+            const response = await createWorkflowApiV1WorkflowCreateDefinitionPost({
+                body: {
+                    name,
+                    workflow_definition: BLANK_WORKFLOW_DEFINITION as unknown as { [key: string]: unknown },
+                },
+                headers: {
+                    'Authorization': `Bearer ${accessToken}`,
+                },
+            });
+
+            if (response.data?.id) {
+                router.push(`/workflow/${response.data.id}`);
+            }
+        } catch (err) {
+            logger.error(`Error creating blank workflow: ${err}`);
+            toast.error('Failed to create workflow');
+        } finally {
+            setIsCreating(false);
+        }
+    };
+
    return (
-        <Button
-            onClick={handleClick}
-        >
-            <PlusIcon className="w-4 h-4" />
-            Create Agent
-        </Button>
+        <DropdownMenu>
+            <DropdownMenuTrigger asChild>
+                <Button disabled={isCreating}>
+                    <PlusIcon className="w-4 h-4" />
+                    {isCreating ? 'Creating...' : 'Create Agent'}
+                    <ChevronDown className="w-4 h-4" />
+                </Button>
+            </DropdownMenuTrigger>
+            <DropdownMenuContent align="end">
+                <DropdownMenuItem onClick={handleAgentBuilder} className="cursor-pointer">
+                    <Bot className="w-4 h-4 mr-2" />
+                    <div>
+                        <div className="font-medium">Use Agent Builder</div>
+                        <div className="text-xs text-muted-foreground">AI generates a workflow from your description</div>
+                    </div>
+                </DropdownMenuItem>
+                <DropdownMenuItem onClick={handleBlankCanvas} disabled={isCreating} className="cursor-pointer">
+                    <LayoutTemplate className="w-4 h-4 mr-2" />
+                    <div>
+                        <div className="font-medium">Blank Canvas</div>
+                        <div className="text-xs text-muted-foreground">Start from scratch with an empty workflow</div>
+                    </div>
+                </DropdownMenuItem>
+            </DropdownMenuContent>
+        </DropdownMenu>
    );
 }
--- a/ui/src/constants/languages.ts
+++ b/ui/src/constants/languages.ts
@ -0,0 +1,98 @@
+// Display names for language codes (Deepgram + Sarvam)
+export const LANGUAGE_DISPLAY_NAMES: Record<string, string> = {
+    "multi": "Multilingual (Auto-detect)",
+    // Arabic
+    "ar": "Arabic",
+    "ar-AE": "Arabic (UAE)",
+    "ar-SA": "Arabic (Saudi Arabia)",
+    "ar-QA": "Arabic (Qatar)",
+    "ar-KW": "Arabic (Kuwait)",
+    "ar-SY": "Arabic (Syria)",
+    "ar-LB": "Arabic (Lebanon)",
+    "ar-PS": "Arabic (Palestine)",
+    "ar-JO": "Arabic (Jordan)",
+    "ar-EG": "Arabic (Egypt)",
+    "ar-SD": "Arabic (Sudan)",
+    "ar-TD": "Arabic (Chad)",
+    "ar-MA": "Arabic (Morocco)",
+    "ar-DZ": "Arabic (Algeria)",
+    "ar-TN": "Arabic (Tunisia)",
+    "ar-IQ": "Arabic (Iraq)",
+    "ar-IR": "Arabic (Iran)",
+    // Other languages
+    "be": "Belarusian",
+    "bn": "Bengali",
+    "bs": "Bosnian",
+    "bg": "Bulgarian",
+    "ca": "Catalan",
+    "cs": "Czech",
+    "da": "Danish",
+    "da-DK": "Danish (Denmark)",
+    "de": "German",
+    "de-CH": "German (Switzerland)",
+    "el": "Greek",
+    "en": "English",
+    "en-US": "English (US)",
+    "en-AU": "English (Australia)",
+    "en-GB": "English (UK)",
+    "en-IN": "English (India)",
+    "en-NZ": "English (New Zealand)",
+    "es": "Spanish",
+    "es-419": "Spanish (Latin America)",
+    "et": "Estonian",
+    "fa": "Persian",
+    "fi": "Finnish",
+    "fr": "French",
+    "fr-CA": "French (Canada)",
+    "he": "Hebrew",
+    "hi": "Hindi",
+    "hr": "Croatian",
+    "hu": "Hungarian",
+    "id": "Indonesian",
+    "it": "Italian",
+    "ja": "Japanese",
+    "kn": "Kannada",
+    "ko": "Korean",
+    "ko-KR": "Korean (South Korea)",
+    "lt": "Lithuanian",
+    "lv": "Latvian",
+    "mk": "Macedonian",
+    "mr": "Marathi",
+    "ms": "Malay",
+    "nl": "Dutch",
+    "nl-BE": "Flemish",
+    "no": "Norwegian",
+    "pl": "Polish",
+    "pt": "Portuguese",
+    "pt-BR": "Portuguese (Brazil)",
+    "pt-PT": "Portuguese (Portugal)",
+    "ro": "Romanian",
+    "ru": "Russian",
+    "sk": "Slovak",
+    "sl": "Slovenian",
+    "sr": "Serbian",
+    "sv": "Swedish",
+    "sv-SE": "Swedish (Sweden)",
+    "ta": "Tamil",
+    "te": "Telugu",
+    "th": "Thai",
+    "tl": "Tagalog",
+    "tr": "Turkish",
+    "uk": "Ukrainian",
+    "ur": "Urdu",
+    "vi": "Vietnamese",
+    "zh-CN": "Chinese (Simplified)",
+    "zh-TW": "Chinese (Traditional)",
+    // Sarvam Indian languages
+    "bn-IN": "Bengali",
+    "gu-IN": "Gujarati",
+    "hi-IN": "Hindi",
+    "kn-IN": "Kannada",
+    "ml-IN": "Malayalam",
+    "mr-IN": "Marathi",
+    "od-IN": "Odia",
+    "pa-IN": "Punjabi",
+    "ta-IN": "Tamil",
+    "te-IN": "Telugu",
+    "as-IN": "Assamese",
+};