feat: allow recording audio in workflow builder

This commit is contained in:
Abhishek Kumar 2026-03-25 15:01:39 +05:30
parent ac0731a374
commit 2fa4191d9b
22 changed files with 700 additions and 246 deletions

View file

@ -2,9 +2,10 @@
from typing import Annotated, Optional
from fastapi import APIRouter, Depends, HTTPException, Query
from fastapi import APIRouter, Depends, File, Form, HTTPException, Query, UploadFile
from loguru import logger
from api.constants import DEPLOYMENT_MODE
from api.db import db_client
from api.db.workflow_recording_client import generate_short_id
from api.enums import StorageBackend
@ -16,6 +17,7 @@ from api.schemas.workflow_recording import (
RecordingUploadResponseSchema,
)
from api.services.auth.depends import get_user
from api.services.mps_service_key_client import mps_service_key_client
from api.services.storage import storage_fs
router = APIRouter(prefix="/workflow-recordings", tags=["workflow-recordings"])
@ -216,3 +218,42 @@ async def delete_recording(
raise HTTPException(
status_code=500, detail="Failed to delete recording"
) from exc
@router.post(
"/transcribe",
summary="Transcribe an audio file",
)
async def transcribe_audio(
file: UploadFile = File(...),
language: str = Form("en"),
user=Depends(get_user),
):
"""Transcribe an uploaded audio file using MPS STT."""
try:
audio_data = await file.read()
if DEPLOYMENT_MODE == "oss":
result = await mps_service_key_client.transcribe_audio(
audio_data=audio_data,
filename=file.filename or "audio.wav",
content_type=file.content_type or "audio/wav",
language=language,
created_by=str(user.provider_id),
)
else:
result = await mps_service_key_client.transcribe_audio(
audio_data=audio_data,
filename=file.filename or "audio.wav",
content_type=file.content_type or "audio/wav",
language=language,
organization_id=user.selected_organization_id,
)
return result
except Exception as exc:
logger.error(f"Error transcribing audio: {exc}")
raise HTTPException(
status_code=500, detail="Failed to transcribe audio"
) from exc

View file

@ -351,6 +351,71 @@ class MPSServiceKeyClient:
response=response,
)
async def transcribe_audio(
self,
audio_data: bytes,
filename: str = "audio.wav",
content_type: str = "audio/wav",
language: str = "en",
model: str = "default",
correlation_id: Optional[str] = None,
organization_id: Optional[int] = None,
created_by: Optional[str] = None,
) -> dict:
"""
Transcribe an audio file via MPS STT API.
Args:
audio_data: Raw audio bytes
filename: Name of the audio file
content_type: MIME type of the audio (e.g., audio/wav, audio/mp3)
language: Language code for transcription (default: "en")
model: Model tier name (default: "default")
correlation_id: Optional correlation ID for tracking
organization_id: Organization ID (for authenticated mode)
created_by: User provider ID (for OSS mode)
Returns:
Dictionary containing transcription result with keys like
'transcript', 'duration_seconds', etc.
Raises:
httpx.HTTPStatusError: If the API call fails
"""
async with httpx.AsyncClient(timeout=httpx.Timeout(60.0)) as client:
files = {
"file": (filename, audio_data, content_type),
}
data = {
"language": language,
"model": model,
}
if correlation_id:
data["correlation_id"] = correlation_id
headers = self._get_headers(organization_id, created_by)
# Remove Content-Type so httpx sets the correct multipart boundary
headers.pop("Content-Type", None)
response = await client.post(
f"{self.base_url}/api/v1/stt/transcribe",
files=files,
data=data,
headers=headers,
)
if response.status_code == 200:
return response.json()
else:
logger.error(
f"Failed to transcribe audio: {response.status_code} - {response.text}"
)
raise httpx.HTTPStatusError(
f"Failed to transcribe audio: {response.text}",
request=response.request,
response=response,
)
def validate_service_key(self, service_key: str) -> bool:
"""
Synchronously validate a Dograh service key by checking usage via MPS.

View file

@ -165,49 +165,39 @@ class RealtimeFeedbackObserver(BaseObserver):
frame = data.frame
frame_direction = data.direction
logger.trace(f"{self} Received Frame: {frame} Direction: {frame_direction}")
# Handle pipeline termination - stop clock task
if isinstance(frame, (EndFrame, CancelFrame, StopFrame)):
await self._cancel_clock_task()
return
# Handle interruptions - clear any queued bot text
if isinstance(frame, InterruptionFrame):
await self._handle_interruption()
return
# Bot speaking state - WS only (ephemeral state signals, not persisted)
if isinstance(frame, BotStartedSpeakingFrame):
await self._send_ws(
{"type": RealtimeFeedbackType.BOT_STARTED_SPEAKING.value, "payload": {}}
)
return
if isinstance(frame, BotStoppedSpeakingFrame):
await self._send_ws(
{"type": RealtimeFeedbackType.BOT_STOPPED_SPEAKING.value, "payload": {}}
)
return
# User mute state - WS only (ephemeral state signals, not persisted)
if isinstance(frame, UserMuteStartedFrame):
await self._send_ws(
{"type": RealtimeFeedbackType.USER_MUTE_STARTED.value, "payload": {}}
)
return
if isinstance(frame, UserMuteStoppedFrame):
await self._send_ws(
{"type": RealtimeFeedbackType.USER_MUTE_STOPPED.value, "payload": {}}
)
return
# Skip already processed frames (frames can be observed multiple times)
if frame.id in self._frames_seen:
return
self._frames_seen.add(frame.id)
logger.trace(f"{self} Received Frame: {frame} Direction: {frame_direction}")
# Handle pipeline termination - stop clock task
if isinstance(frame, (EndFrame, CancelFrame, StopFrame)):
await self._cancel_clock_task()
# Handle interruptions - clear any queued bot text
elif isinstance(frame, InterruptionFrame):
await self._handle_interruption()
# Bot speaking state - WS only (ephemeral state signals, not persisted)
elif isinstance(frame, BotStartedSpeakingFrame):
await self._send_ws(
{"type": RealtimeFeedbackType.BOT_STARTED_SPEAKING.value, "payload": {}}
)
elif isinstance(frame, BotStoppedSpeakingFrame):
await self._send_ws(
{"type": RealtimeFeedbackType.BOT_STOPPED_SPEAKING.value, "payload": {}}
)
# User mute state - WS only (ephemeral state signals, not persisted)
elif isinstance(frame, UserMuteStartedFrame):
await self._send_ws(
{"type": RealtimeFeedbackType.USER_MUTE_STARTED.value, "payload": {}}
)
elif isinstance(frame, UserMuteStoppedFrame):
await self._send_ws(
{"type": RealtimeFeedbackType.USER_MUTE_STOPPED.value, "payload": {}}
)
# Handle user transcriptions (interim) - WebSocket only
if isinstance(frame, InterimTranscriptionFrame):
elif isinstance(frame, InterimTranscriptionFrame):
await self._send_ws(
{
"type": RealtimeFeedbackType.USER_TRANSCRIPTION.value,

View file

@ -77,11 +77,8 @@ def compose_system_prompt_for_node(
parts = [p for p in (global_prompt, formatted_node_prompt) if p]
if has_recordings:
if has_recordings and "RECORDING_ID:" in formatted_node_prompt:
parts.append(RECORDING_RESPONSE_MODE_INSTRUCTIONS)
# TODO: Append per-node available recordings list here once
# Node.recording_ids is populated. The list should include
# recording_id and a short description so the LLM can choose.
return "\n\n".join(parts)

@ -1 +1 @@
Subproject commit 3f566a4ba1e112255cc7459735bdb4b716948d59
Subproject commit 2e2171e2a64ec87b3964fbc2440b5291489912a8

View file

@ -14,6 +14,7 @@ import type { DocumentResponseSchema, RecordingResponseSchema, ToolResponse } fr
import { FlowEdge, FlowNode, NodeType } from "@/components/flow/types";
import { Button } from '@/components/ui/button';
import { Tooltip, TooltipContent, TooltipProvider, TooltipTrigger } from '@/components/ui/tooltip';
import { useUserConfig } from '@/context/UserConfigContext';
import { WorkflowConfigurations } from '@/types/workflow-configurations';
import AddNodePanel from "../../../components/flow/AddNodePanel";
@ -64,6 +65,11 @@ interface RenderWorkflowProps {
}
function RenderWorkflow({ initialWorkflowName, workflowId, initialFlow, initialTemplateContextVariables, initialWorkflowConfigurations, user }: RenderWorkflowProps) {
const { userConfig } = useUserConfig();
const ttsProvider = (userConfig?.tts?.provider as string) ?? "";
const ttsModel = (userConfig?.tts?.model as string) ?? "";
const ttsVoiceId = (userConfig?.tts?.voice as string) ?? "";
const [isContextVarsDialogOpen, setIsContextVarsDialogOpen] = useState(false);
const [isConfigurationsDialogOpen, setIsConfigurationsDialogOpen] = useState(false);
const [isDictionaryDialogOpen, setIsDictionaryDialogOpen] = useState(false);
@ -125,10 +131,15 @@ function RenderWorkflow({ initialWorkflowName, workflowId, initialFlow, initialT
setTools(toolsResponse.data);
}
// Fetch recordings for this workflow
// Fetch recordings for this workflow filtered by active TTS config
try {
const recordingsResponse = await listRecordingsApiV1WorkflowRecordingsGet({
query: { workflow_id: workflowId },
query: {
workflow_id: workflowId,
tts_provider: ttsProvider || undefined,
tts_model: ttsModel || undefined,
tts_voice_id: ttsVoiceId || undefined,
},
});
if (recordingsResponse.data) {
setRecordings(recordingsResponse.data.recordings);
@ -142,7 +153,7 @@ function RenderWorkflow({ initialWorkflowName, workflowId, initialFlow, initialT
};
fetchData();
}, [workflowId]);
}, [workflowId, ttsProvider, ttsModel, ttsVoiceId]);
// Memoize defaultEdgeOptions to prevent unnecessary re-renders
const defaultEdgeOptions = useMemo(() => ({

View file

@ -1,4 +1,4 @@
import { Loader2, Trash2Icon, Upload } from "lucide-react";
import { Loader2, Mic, Square, Trash2Icon, Upload } from "lucide-react";
import { useCallback, useEffect, useRef, useState } from "react";
import {
@ -6,6 +6,7 @@ import {
deleteRecordingApiV1WorkflowRecordingsRecordingIdDelete,
getUploadUrlApiV1WorkflowRecordingsUploadUrlPost,
listRecordingsApiV1WorkflowRecordingsGet,
transcribeAudioApiV1WorkflowRecordingsTranscribePost,
} from "@/client";
import type { RecordingResponseSchema } from "@/client/types.gen";
import { Button } from "@/components/ui/button";
@ -18,6 +19,15 @@ import {
} from "@/components/ui/dialog";
import { Input } from "@/components/ui/input";
import { Label } from "@/components/ui/label";
import {
Select,
SelectContent,
SelectItem,
SelectTrigger,
SelectValue,
} from "@/components/ui/select";
import { Textarea } from "@/components/ui/textarea";
import { LANGUAGE_DISPLAY_NAMES } from "@/constants/languages";
import { useUserConfig } from "@/context/UserConfigContext";
interface RecordingsDialogProps {
@ -29,6 +39,8 @@ interface RecordingsDialogProps {
const MAX_FILE_SIZE = 5 * 1024 * 1024; // 5MB
type RecordingStep = "idle" | "naming" | "recording" | "transcribing";
export const RecordingsDialog = ({
open,
onOpenChange,
@ -42,7 +54,16 @@ export const RecordingsDialog = ({
const [transcript, setTranscript] = useState("");
const [selectedFile, setSelectedFile] = useState<File | null>(null);
const [error, setError] = useState<string | null>(null);
const [language, setLanguage] = useState("multi");
const [recordingStep, setRecordingStep] = useState<RecordingStep>("idle");
const [recordingFilename, setRecordingFilename] = useState("");
const [recordingDuration, setRecordingDuration] = useState(0);
const mediaRecorderRef = useRef<MediaRecorder | null>(null);
const audioChunksRef = useRef<Blob[]>([]);
const recordingTimerRef = useRef<ReturnType<typeof setInterval> | null>(null);
const fileInputRef = useRef<HTMLInputElement>(null);
const languageRef = useRef(language);
languageRef.current = language;
const ttsProvider = (userConfig?.tts?.provider as string) ?? "";
const ttsModel = (userConfig?.tts?.model as string) ?? "";
@ -70,14 +91,119 @@ export const RecordingsDialog = ({
}
}, [workflowId, ttsProvider, ttsModel, ttsVoiceId, onRecordingsChange]);
const stopRecordingTimer = useCallback(() => {
if (recordingTimerRef.current) {
clearInterval(recordingTimerRef.current);
recordingTimerRef.current = null;
}
}, []);
const stopRecording = useCallback(() => {
if (mediaRecorderRef.current && mediaRecorderRef.current.state !== "inactive") {
mediaRecorderRef.current.stop();
}
}, []);
const resetRecordingState = useCallback(() => {
setRecordingStep("idle");
setRecordingFilename("");
setRecordingDuration(0);
}, []);
useEffect(() => {
if (open) {
fetchRecordings();
setError(null);
setTranscript("");
setSelectedFile(null);
setLanguage("multi");
resetRecordingState();
}
}, [open, fetchRecordings]);
}, [open, fetchRecordings, resetRecordingState]);
useEffect(() => {
if (!open) {
stopRecording();
stopRecordingTimer();
}
}, [open, stopRecording, stopRecordingTimer]);
const transcribeFile = async (file: File) => {
setRecordingStep("transcribing");
try {
const currentLang = languageRef.current;
const result = await transcribeAudioApiV1WorkflowRecordingsTranscribePost({
body: { file, language: currentLang },
});
const data = result.data as Record<string, unknown> | undefined;
if (data?.transcript) {
setTranscript(data.transcript as string);
}
} catch {
// Transcription failed — user can still type manually
setError("Auto-transcription failed. You can type the transcript manually.");
} finally {
setRecordingStep("idle");
}
};
const startRecording = async () => {
try {
const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
const mediaRecorder = new MediaRecorder(stream);
mediaRecorderRef.current = mediaRecorder;
audioChunksRef.current = [];
mediaRecorder.ondataavailable = (e) => {
if (e.data.size > 0) audioChunksRef.current.push(e.data);
};
const filename = recordingFilename.trim() || "recording";
mediaRecorder.onstop = () => {
stream.getTracks().forEach((t) => t.stop());
stopRecordingTimer();
const blob = new Blob(audioChunksRef.current, { type: mediaRecorder.mimeType });
if (blob.size > MAX_FILE_SIZE) {
setError(`Recording (${(blob.size / (1024 * 1024)).toFixed(1)}MB) exceeds the maximum allowed size of 5MB.`);
resetRecordingState();
return;
}
const ext = mediaRecorder.mimeType.includes("webm") ? "webm" : "mp4";
const file = new File([blob], `${filename}.${ext}`, { type: mediaRecorder.mimeType });
setSelectedFile(file);
setError(null);
transcribeFile(file);
};
mediaRecorder.start();
setRecordingStep("recording");
setRecordingDuration(0);
setError(null);
recordingTimerRef.current = setInterval(() => {
setRecordingDuration((d) => d + 1);
}, 1000);
} catch {
setError("Microphone access denied. Please allow microphone permissions.");
resetRecordingState();
}
};
const handleStopRecording = () => {
stopRecording();
};
const handleFileSelect = (file: File | null) => {
if (file && file.size > MAX_FILE_SIZE) {
setError(`File size (${(file.size / (1024 * 1024)).toFixed(1)}MB) exceeds the maximum allowed size of 5MB.`);
setSelectedFile(null);
if (fileInputRef.current) fileInputRef.current.value = "";
return;
}
setError(null);
setSelectedFile(file);
if (file) transcribeFile(file);
};
const handleUpload = async () => {
if (!selectedFile || !transcript.trim()) return;
@ -137,6 +263,7 @@ export const RecordingsDialog = ({
original_filename: selectedFile.name,
file_size_bytes: selectedFile.size,
mime_type: selectedFile.type,
language,
},
},
});
@ -144,6 +271,8 @@ export const RecordingsDialog = ({
// Reset form and refresh list
setTranscript("");
setSelectedFile(null);
setLanguage("multi");
resetRecordingState();
if (fileInputRef.current) fileInputRef.current.value = "";
await fetchRecordings();
} catch (err) {
@ -166,13 +295,17 @@ export const RecordingsDialog = ({
}
};
const isRecording = recordingStep === "recording";
const isTranscribing = recordingStep === "transcribing";
const isBusy = uploading || isRecording || isTranscribing;
return (
<Dialog open={open} onOpenChange={onOpenChange}>
<DialogContent className="max-w-lg max-h-[80vh] overflow-y-auto">
<DialogHeader>
<DialogTitle>Workflow Recordings</DialogTitle>
<DialogDescription>
Upload audio recordings for hybrid prompts. Recordings are
Upload or record audio for hybrid prompts. Recordings are
scoped to your current TTS configuration. Use{" "}
<code className="text-xs bg-muted px-1 rounded">@</code> in
prompt fields to insert them.
@ -211,61 +344,158 @@ export const RecordingsDialog = ({
{/* Upload Section */}
<div className="space-y-3 border rounded-md p-3">
<Label className="text-sm font-medium">Upload New Recording</Label>
<Label className="text-sm font-medium">Add New Recording</Label>
{/* Audio source: file picker or record */}
<div>
<Label className="text-xs text-muted-foreground">
Audio File
</Label>
<input
ref={fileInputRef}
type="file"
accept="audio/*"
onChange={(e) => {
const file = e.target.files?.[0] ?? null;
if (file && file.size > MAX_FILE_SIZE) {
setError(
`File size (${(file.size / (1024 * 1024)).toFixed(1)}MB) exceeds the maximum allowed size of 5MB.`
);
setSelectedFile(null);
if (fileInputRef.current) fileInputRef.current.value = "";
return;
}
setError(null);
setSelectedFile(file);
}}
className="hidden"
/>
<Button
type="button"
variant="outline"
size="sm"
className="w-full justify-start text-sm font-normal"
onClick={() => fileInputRef.current?.click()}
>
<Upload className="w-4 h-4 mr-2 shrink-0" />
{selectedFile ? (
<span className="truncate">
{selectedFile.name} ({(selectedFile.size / (1024 * 1024)).toFixed(1)}MB)
</span>
) : (
<span className="text-muted-foreground">Choose audio file (max 5MB)</span>
<div className="flex gap-2">
<input
ref={fileInputRef}
type="file"
accept="audio/*"
onChange={(e) => handleFileSelect(e.target.files?.[0] ?? null)}
className="hidden"
/>
<Button
type="button"
variant="outline"
size="sm"
className="flex-1 justify-start text-sm font-normal"
onClick={() => fileInputRef.current?.click()}
disabled={isBusy}
>
<Upload className="w-4 h-4 mr-2 shrink-0" />
{selectedFile && recordingStep !== "naming" ? (
<span className="truncate">
{selectedFile.name} ({(selectedFile.size / (1024 * 1024)).toFixed(1)}MB)
</span>
) : (
<span className="text-muted-foreground">Choose audio file (max 5MB)</span>
)}
</Button>
{recordingStep === "idle" && (
<Button
type="button"
variant="outline"
size="sm"
onClick={() => setRecordingStep("naming")}
disabled={uploading || isTranscribing}
>
<Mic className="w-4 h-4 mr-1" />
Record
</Button>
)}
</Button>
</div>
</div>
{/* Recording: filename + start/stop */}
{(recordingStep === "naming" || isRecording) && (
<div className="space-y-2 rounded-md border border-dashed p-3 bg-muted/20">
{recordingStep === "naming" && (
<>
<div>
<Label className="text-xs text-muted-foreground">
Recording Name
</Label>
<Input
placeholder="e.g. greeting, hold-message"
value={recordingFilename}
onChange={(e) => setRecordingFilename(e.target.value)}
autoFocus
/>
</div>
<div className="flex gap-2">
<Button
size="sm"
onClick={startRecording}
disabled={!recordingFilename.trim()}
>
<Mic className="w-4 h-4 mr-1" />
Start Recording
</Button>
<Button
size="sm"
variant="ghost"
onClick={resetRecordingState}
>
Cancel
</Button>
</div>
</>
)}
{isRecording && (
<div className="flex items-center gap-3">
<span className="relative flex h-3 w-3">
<span className="animate-ping absolute inline-flex h-full w-full rounded-full bg-red-400 opacity-75" />
<span className="relative inline-flex rounded-full h-3 w-3 bg-red-500" />
</span>
<span className="text-sm font-mono">
{Math.floor(recordingDuration / 60)}:{(recordingDuration % 60).toString().padStart(2, "0")}
</span>
<span className="text-xs text-muted-foreground">{recordingFilename}</span>
<Button
size="sm"
variant="destructive"
onClick={handleStopRecording}
className="ml-auto"
>
<Square className="w-4 h-4 mr-1" />
Stop
</Button>
</div>
)}
</div>
)}
{/* Transcribing progress */}
{isTranscribing && (
<div className="flex items-center gap-2 text-sm text-muted-foreground">
<Loader2 className="w-4 h-4 animate-spin" />
Transcribing audio...
</div>
)}
{/* Language */}
<div>
<Label className="text-xs text-muted-foreground">
Language
</Label>
<Select value={language} onValueChange={setLanguage}>
<SelectTrigger className="h-9 text-sm">
<SelectValue />
</SelectTrigger>
<SelectContent>
{Object.entries(LANGUAGE_DISPLAY_NAMES).map(([code, name]) => (
<SelectItem key={code} value={code}>
{name}
</SelectItem>
))}
</SelectContent>
</Select>
</div>
{/* Transcript */}
<div>
<Label className="text-xs text-muted-foreground">
Transcript
</Label>
<Input
placeholder="What does this recording say?"
<Textarea
placeholder={isTranscribing ? "Transcribing..." : "What does this recording say?"}
value={transcript}
onChange={(e) => setTranscript(e.target.value)}
disabled={isTranscribing}
rows={3}
className="resize-none text-sm"
/>
</div>
<Button
size="sm"
onClick={handleUpload}
disabled={!selectedFile || !transcript.trim() || uploading}
disabled={!selectedFile || !transcript.trim() || isBusy}
>
{uploading ? (
<Loader2 className="w-4 h-4 mr-1 animate-spin" />

View file

@ -363,7 +363,13 @@ export const useWorkflowState = ({
// Save workflow function
const saveWorkflow = useCallback(async (updateWorkflowDefinition: boolean = true) => {
if (!user || !rfInstance.current) return;
const flow = rfInstance.current.toObject();
// Read nodes/edges from the Zustand store (synchronously up-to-date)
// and viewport from the ReactFlow instance to build the flow object.
// This avoids a race condition where rfInstance.toObject() may return
// stale node data if React hasn't re-rendered yet after a store update.
const { nodes: currentNodes, edges: currentEdges } = useWorkflowStore.getState();
const viewport = rfInstance.current.getViewport();
const flow = { nodes: currentNodes, edges: currentEdges, viewport };
try {
await updateWorkflowApiV1WorkflowWorkflowIdPut({
path: {

File diff suppressed because one or more lines are too long

View file

@ -80,6 +80,11 @@ export type AuthUserResponse = {
is_superuser: boolean;
};
export type BodyTranscribeAudioApiV1WorkflowRecordingsTranscribePost = {
file: Blob | File;
language?: string;
};
export type CallDispositionCodes = {
disposition_codes?: Array<string>;
};
@ -5508,6 +5513,37 @@ export type DeleteRecordingApiV1WorkflowRecordingsRecordingIdDeleteResponses = {
200: unknown;
};
export type TranscribeAudioApiV1WorkflowRecordingsTranscribePostData = {
body: BodyTranscribeAudioApiV1WorkflowRecordingsTranscribePost;
headers?: {
authorization?: string | null;
'X-API-Key'?: string | null;
};
path?: never;
query?: never;
url: '/api/v1/workflow-recordings/transcribe';
};
export type TranscribeAudioApiV1WorkflowRecordingsTranscribePostErrors = {
/**
* Not found
*/
404: unknown;
/**
* Validation Error
*/
422: HttpValidationError;
};
export type TranscribeAudioApiV1WorkflowRecordingsTranscribePostError = TranscribeAudioApiV1WorkflowRecordingsTranscribePostErrors[keyof TranscribeAudioApiV1WorkflowRecordingsTranscribePostErrors];
export type TranscribeAudioApiV1WorkflowRecordingsTranscribePostResponses = {
/**
* Successful Response
*/
200: unknown;
};
export type SignupApiV1AuthSignupPostData = {
body: SignupRequest;
path?: never;

View file

@ -13,6 +13,7 @@ import { Label } from "@/components/ui/label";
import { Select, SelectContent, SelectItem, SelectTrigger, SelectValue } from "@/components/ui/select";
import { Tabs, TabsContent, TabsList, TabsTrigger } from "@/components/ui/tabs";
import { VoiceSelector } from "@/components/VoiceSelector";
import { LANGUAGE_DISPLAY_NAMES } from "@/constants/languages";
import { useUserConfig } from "@/context/UserConfigContext";
type ServiceSegment = "llm" | "tts" | "stt" | "embeddings";
@ -46,105 +47,6 @@ const TAB_CONFIG: { key: ServiceSegment; label: string }[] = [
{ key: "embeddings", label: "Embedding" },
];
// Display names for language codes (Deepgram + Sarvam)
const LANGUAGE_DISPLAY_NAMES: Record<string, string> = {
"multi": "Multilingual (Auto-detect)",
// Arabic
"ar": "Arabic",
"ar-AE": "Arabic (UAE)",
"ar-SA": "Arabic (Saudi Arabia)",
"ar-QA": "Arabic (Qatar)",
"ar-KW": "Arabic (Kuwait)",
"ar-SY": "Arabic (Syria)",
"ar-LB": "Arabic (Lebanon)",
"ar-PS": "Arabic (Palestine)",
"ar-JO": "Arabic (Jordan)",
"ar-EG": "Arabic (Egypt)",
"ar-SD": "Arabic (Sudan)",
"ar-TD": "Arabic (Chad)",
"ar-MA": "Arabic (Morocco)",
"ar-DZ": "Arabic (Algeria)",
"ar-TN": "Arabic (Tunisia)",
"ar-IQ": "Arabic (Iraq)",
"ar-IR": "Arabic (Iran)",
// Other languages
"be": "Belarusian",
"bn": "Bengali",
"bs": "Bosnian",
"bg": "Bulgarian",
"ca": "Catalan",
"cs": "Czech",
"da": "Danish",
"da-DK": "Danish (Denmark)",
"de": "German",
"de-CH": "German (Switzerland)",
"el": "Greek",
"en": "English",
"en-US": "English (US)",
"en-AU": "English (Australia)",
"en-GB": "English (UK)",
"en-IN": "English (India)",
"en-NZ": "English (New Zealand)",
"es": "Spanish",
"es-419": "Spanish (Latin America)",
"et": "Estonian",
"fa": "Persian",
"fi": "Finnish",
"fr": "French",
"fr-CA": "French (Canada)",
"he": "Hebrew",
"hi": "Hindi",
"hr": "Croatian",
"hu": "Hungarian",
"id": "Indonesian",
"it": "Italian",
"ja": "Japanese",
"kn": "Kannada",
"ko": "Korean",
"ko-KR": "Korean (South Korea)",
"lt": "Lithuanian",
"lv": "Latvian",
"mk": "Macedonian",
"mr": "Marathi",
"ms": "Malay",
"nl": "Dutch",
"nl-BE": "Flemish",
"no": "Norwegian",
"pl": "Polish",
"pt": "Portuguese",
"pt-BR": "Portuguese (Brazil)",
"pt-PT": "Portuguese (Portugal)",
"ro": "Romanian",
"ru": "Russian",
"sk": "Slovak",
"sl": "Slovenian",
"sr": "Serbian",
"sv": "Swedish",
"sv-SE": "Swedish (Sweden)",
"ta": "Tamil",
"te": "Telugu",
"th": "Thai",
"tl": "Tagalog",
"tr": "Turkish",
"uk": "Ukrainian",
"ur": "Urdu",
"vi": "Vietnamese",
"zh-CN": "Chinese (Simplified)",
"zh-TW": "Chinese (Traditional)",
// Sarvam Indian languages
"bn-IN": "Bengali",
"gu-IN": "Gujarati",
"hi-IN": "Hindi",
"kn-IN": "Kannada",
"ml-IN": "Malayalam",
"mr-IN": "Marathi",
"od-IN": "Odia",
"pa-IN": "Punjabi",
"ta-IN": "Tamil",
"te-IN": "Telugu",
"as-IN": "Assamese",
};
// Display names for Sarvam voices
const VOICE_DISPLAY_NAMES: Record<string, string> = {
"anushka": "Anushka (Female)",

View file

@ -215,11 +215,7 @@ export default function CustomEdge(props: CustomEdgeProps) {
const handleSaveEdgeData = useCallback(async (updatedData: FlowEdgeData) => {
// Use the workflow store's updateEdge method to properly track history
updateEdge(id, { data: updatedData });
// Save the workflow after updating edge data with a small delay to ensure state is updated
setTimeout(async () => {
await saveWorkflow();
}, 100);
await saveWorkflow();
}, [id, updateEdge, saveWorkflow]);
return (

View file

@ -89,10 +89,7 @@ export const AgentNode = memo(({ data, selected, id }: AgentNodeProps) => {
document_uuids: documentUuids.length > 0 ? documentUuids : undefined,
});
setOpen(false);
// Save the workflow after updating node data with a small delay to ensure state is updated
setTimeout(async () => {
await saveWorkflow();
}, 100);
await saveWorkflow();
};
// Reset form state when dialog opens
@ -127,27 +124,23 @@ export const AgentNode = memo(({ data, selected, id }: AgentNodeProps) => {
}, [data, open]);
// Handle cleanup of stale document UUIDs
const handleStaleDocuments = useCallback((staleUuids: string[]) => {
const handleStaleDocuments = useCallback(async (staleUuids: string[]) => {
const cleanedUuids = (data.document_uuids ?? []).filter(uuid => !staleUuids.includes(uuid));
handleSaveNodeData({
...data,
document_uuids: cleanedUuids.length > 0 ? cleanedUuids : undefined,
});
setTimeout(async () => {
await saveWorkflow();
}, 100);
await saveWorkflow();
}, [data, handleSaveNodeData, saveWorkflow]);
// Handle cleanup of stale tool UUIDs
const handleStaleTools = useCallback((staleUuids: string[]) => {
const handleStaleTools = useCallback(async (staleUuids: string[]) => {
const cleanedUuids = (data.tool_uuids ?? []).filter(uuid => !staleUuids.includes(uuid));
handleSaveNodeData({
...data,
tool_uuids: cleanedUuids.length > 0 ? cleanedUuids : undefined,
});
setTimeout(async () => {
await saveWorkflow();
}, 100);
await saveWorkflow();
}, [data, handleSaveNodeData, saveWorkflow]);
return (

View file

@ -75,10 +75,7 @@ export const EndCall = memo(({ data, selected, id }: EndCallNodeProps) => {
add_global_prompt: addGlobalPrompt,
});
setOpen(false);
// Save the workflow after updating node data with a small delay to ensure state is updated
setTimeout(async () => {
await saveWorkflow();
}, 100);
await saveWorkflow();
};
// Reset form state when dialog opens

View file

@ -52,10 +52,7 @@ export const GlobalNode = memo(({ data, selected, id }: GlobalNodeProps) => {
name
});
setOpen(false);
// Save the workflow after updating node data with a small delay to ensure state is updated
setTimeout(async () => {
await saveWorkflow();
}, 100);
await saveWorkflow();
};
// Reset form state when dialog opens

View file

@ -66,9 +66,7 @@ export const QANode = memo(({ data, selected, id }: QANodeProps) => {
qa_sample_rate: qaSampleRate,
});
setOpen(false);
setTimeout(async () => {
await saveWorkflow();
}, 100);
await saveWorkflow();
};
const resetFormState = () => {

View file

@ -104,10 +104,7 @@ export const StartCall = memo(({ data, selected, id }: StartCallNodeProps) => {
document_uuids: documentUuids.length > 0 ? documentUuids : undefined,
});
setOpen(false);
// Save the workflow after updating node data with a small delay to ensure state is updated
setTimeout(async () => {
await saveWorkflow();
}, 100);
await saveWorkflow();
};
// Reset form state when dialog opens
@ -148,27 +145,23 @@ export const StartCall = memo(({ data, selected, id }: StartCallNodeProps) => {
}, [data, open]);
// Handle cleanup of stale document UUIDs
const handleStaleDocuments = useCallback((staleUuids: string[]) => {
const handleStaleDocuments = useCallback(async (staleUuids: string[]) => {
const cleanedUuids = (data.document_uuids ?? []).filter(uuid => !staleUuids.includes(uuid));
handleSaveNodeData({
...data,
document_uuids: cleanedUuids.length > 0 ? cleanedUuids : undefined,
});
setTimeout(async () => {
await saveWorkflow();
}, 100);
await saveWorkflow();
}, [data, handleSaveNodeData, saveWorkflow]);
// Handle cleanup of stale tool UUIDs
const handleStaleTools = useCallback((staleUuids: string[]) => {
const handleStaleTools = useCallback(async (staleUuids: string[]) => {
const cleanedUuids = (data.tool_uuids ?? []).filter(uuid => !staleUuids.includes(uuid));
handleSaveNodeData({
...data,
tool_uuids: cleanedUuids.length > 0 ? cleanedUuids : undefined,
});
setTimeout(async () => {
await saveWorkflow();
}, 100);
await saveWorkflow();
}, [data, handleSaveNodeData, saveWorkflow]);
return (

View file

@ -61,10 +61,7 @@ export const TriggerNode = memo(({ data, selected, id }: TriggerNodeProps) => {
trigger_path: triggerPath,
});
setOpen(false);
// Save the workflow after updating node data
setTimeout(async () => {
await saveWorkflow();
}, 100);
await saveWorkflow();
};
// Reset form state when dialog opens

View file

@ -86,9 +86,7 @@ export const WebhookNode = memo(({ data, selected, id }: WebhookNodeProps) => {
payload_template: validation.parsed as Record<string, unknown>,
});
setOpen(false);
setTimeout(async () => {
await saveWorkflow();
}, 100);
await saveWorkflow();
};
const handleOpenChange = (newOpen: boolean) => {

View file

@ -71,12 +71,13 @@ export const NodeEditDialog = ({
const handleKeyDown = (e: KeyboardEvent) => {
if ((e.metaKey || e.ctrlKey) && e.key === 's') {
e.preventDefault();
e.stopImmediatePropagation();
handleSave();
}
};
window.addEventListener('keydown', handleKeyDown);
return () => window.removeEventListener('keydown', handleKeyDown);
window.addEventListener('keydown', handleKeyDown, true);
return () => window.removeEventListener('keydown', handleKeyDown, true);
}, [open, handleSave]);
return (

View file

@ -1,22 +1,114 @@
'use client';
import { PlusIcon } from 'lucide-react';
import { Bot, ChevronDown, LayoutTemplate, PlusIcon } from 'lucide-react';
import { useRouter } from 'next/navigation';
import { useState } from 'react';
import { toast } from 'sonner';
import { createWorkflowApiV1WorkflowCreateDefinitionPost } from '@/client/sdk.gen';
import { Button } from "@/components/ui/button";
import {
DropdownMenu,
DropdownMenuContent,
DropdownMenuItem,
DropdownMenuTrigger,
} from "@/components/ui/dropdown-menu";
import { useAuth } from '@/lib/auth';
import logger from '@/lib/logger';
import { getRandomId } from '@/lib/utils';
const BLANK_WORKFLOW_DEFINITION = {
nodes: [
{
id: "1",
type: "startCall",
position: { x: 175, y: 60 },
data: {
prompt: "# Goal\nYou are a helpful agent who is handing a conversation over voice with a human. This is a voice conversation, so transcripts can be error prone.\n\n## Rules\n- Language: UK English but does not have to be correct english\n- Keep responses short and 2-3 sentences max\n- If you have to repeat something that you said in your previous two turns, then rephrase a bit while keeping the same meaning. Never repeat the exact same words as in your previous 2 responses.\n\n## Speech Handling\n- There could be multiple transcription errors. \n- Accept variations: yes/yeah/yep/aye, no/nah/nope\n- If user says \"sorry?\" or \"pardon me\" or \"can you repeat\" or \"what?\", they might not have heard you- so just repeat what you just said.\n\n### Flow\nStart by saying \"Hi\". Be polite and courteous. ",
name: "start call",
allow_interrupt: false,
invalid: false,
validationMessage: null,
is_static: false,
add_global_prompt: false,
wait_for_user_response: false,
detect_voicemail: true,
delayed_start: false,
is_start: true,
selected_through_edge: false,
hovered_through_edge: false,
extraction_enabled: false,
selected: false,
dragging: false,
},
},
],
edges: [],
viewport: { x: 808, y: 269, zoom: 0.75 },
};
export function CreateWorkflowButton() {
const router = useRouter();
const handleClick = () => {
const { user, getAccessToken } = useAuth();
const [isCreating, setIsCreating] = useState(false);
const handleAgentBuilder = () => {
router.push('/workflow/create');
};
const handleBlankCanvas = async () => {
if (isCreating || !user) return;
setIsCreating(true);
try {
const accessToken = await getAccessToken();
const name = `Workflow-${getRandomId()}`;
const response = await createWorkflowApiV1WorkflowCreateDefinitionPost({
body: {
name,
workflow_definition: BLANK_WORKFLOW_DEFINITION as unknown as { [key: string]: unknown },
},
headers: {
'Authorization': `Bearer ${accessToken}`,
},
});
if (response.data?.id) {
router.push(`/workflow/${response.data.id}`);
}
} catch (err) {
logger.error(`Error creating blank workflow: ${err}`);
toast.error('Failed to create workflow');
} finally {
setIsCreating(false);
}
};
return (
<Button
onClick={handleClick}
>
<PlusIcon className="w-4 h-4" />
Create Agent
</Button>
<DropdownMenu>
<DropdownMenuTrigger asChild>
<Button disabled={isCreating}>
<PlusIcon className="w-4 h-4" />
{isCreating ? 'Creating...' : 'Create Agent'}
<ChevronDown className="w-4 h-4" />
</Button>
</DropdownMenuTrigger>
<DropdownMenuContent align="end">
<DropdownMenuItem onClick={handleAgentBuilder} className="cursor-pointer">
<Bot className="w-4 h-4 mr-2" />
<div>
<div className="font-medium">Use Agent Builder</div>
<div className="text-xs text-muted-foreground">AI generates a workflow from your description</div>
</div>
</DropdownMenuItem>
<DropdownMenuItem onClick={handleBlankCanvas} disabled={isCreating} className="cursor-pointer">
<LayoutTemplate className="w-4 h-4 mr-2" />
<div>
<div className="font-medium">Blank Canvas</div>
<div className="text-xs text-muted-foreground">Start from scratch with an empty workflow</div>
</div>
</DropdownMenuItem>
</DropdownMenuContent>
</DropdownMenu>
);
}

View file

@ -0,0 +1,98 @@
// Display names for language codes (Deepgram + Sarvam)
export const LANGUAGE_DISPLAY_NAMES: Record<string, string> = {
"multi": "Multilingual (Auto-detect)",
// Arabic
"ar": "Arabic",
"ar-AE": "Arabic (UAE)",
"ar-SA": "Arabic (Saudi Arabia)",
"ar-QA": "Arabic (Qatar)",
"ar-KW": "Arabic (Kuwait)",
"ar-SY": "Arabic (Syria)",
"ar-LB": "Arabic (Lebanon)",
"ar-PS": "Arabic (Palestine)",
"ar-JO": "Arabic (Jordan)",
"ar-EG": "Arabic (Egypt)",
"ar-SD": "Arabic (Sudan)",
"ar-TD": "Arabic (Chad)",
"ar-MA": "Arabic (Morocco)",
"ar-DZ": "Arabic (Algeria)",
"ar-TN": "Arabic (Tunisia)",
"ar-IQ": "Arabic (Iraq)",
"ar-IR": "Arabic (Iran)",
// Other languages
"be": "Belarusian",
"bn": "Bengali",
"bs": "Bosnian",
"bg": "Bulgarian",
"ca": "Catalan",
"cs": "Czech",
"da": "Danish",
"da-DK": "Danish (Denmark)",
"de": "German",
"de-CH": "German (Switzerland)",
"el": "Greek",
"en": "English",
"en-US": "English (US)",
"en-AU": "English (Australia)",
"en-GB": "English (UK)",
"en-IN": "English (India)",
"en-NZ": "English (New Zealand)",
"es": "Spanish",
"es-419": "Spanish (Latin America)",
"et": "Estonian",
"fa": "Persian",
"fi": "Finnish",
"fr": "French",
"fr-CA": "French (Canada)",
"he": "Hebrew",
"hi": "Hindi",
"hr": "Croatian",
"hu": "Hungarian",
"id": "Indonesian",
"it": "Italian",
"ja": "Japanese",
"kn": "Kannada",
"ko": "Korean",
"ko-KR": "Korean (South Korea)",
"lt": "Lithuanian",
"lv": "Latvian",
"mk": "Macedonian",
"mr": "Marathi",
"ms": "Malay",
"nl": "Dutch",
"nl-BE": "Flemish",
"no": "Norwegian",
"pl": "Polish",
"pt": "Portuguese",
"pt-BR": "Portuguese (Brazil)",
"pt-PT": "Portuguese (Portugal)",
"ro": "Romanian",
"ru": "Russian",
"sk": "Slovak",
"sl": "Slovenian",
"sr": "Serbian",
"sv": "Swedish",
"sv-SE": "Swedish (Sweden)",
"ta": "Tamil",
"te": "Telugu",
"th": "Thai",
"tl": "Tagalog",
"tr": "Turkish",
"uk": "Ukrainian",
"ur": "Urdu",
"vi": "Vietnamese",
"zh-CN": "Chinese (Simplified)",
"zh-TW": "Chinese (Traditional)",
// Sarvam Indian languages
"bn-IN": "Bengali",
"gu-IN": "Gujarati",
"hi-IN": "Hindi",
"kn-IN": "Kannada",
"ml-IN": "Malayalam",
"mr-IN": "Marathi",
"od-IN": "Odia",
"pa-IN": "Punjabi",
"ta-IN": "Tamil",
"te-IN": "Telugu",
"as-IN": "Assamese",
};