feat: allow recording audio in workflow builder

This commit is contained in:
Abhishek Kumar 2026-03-25 15:01:39 +05:30
parent ac0731a374
commit 2fa4191d9b
22 changed files with 700 additions and 246 deletions

View file

@ -14,6 +14,7 @@ import type { DocumentResponseSchema, RecordingResponseSchema, ToolResponse } fr
import { FlowEdge, FlowNode, NodeType } from "@/components/flow/types";
import { Button } from '@/components/ui/button';
import { Tooltip, TooltipContent, TooltipProvider, TooltipTrigger } from '@/components/ui/tooltip';
import { useUserConfig } from '@/context/UserConfigContext';
import { WorkflowConfigurations } from '@/types/workflow-configurations';
import AddNodePanel from "../../../components/flow/AddNodePanel";
@ -64,6 +65,11 @@ interface RenderWorkflowProps {
}
function RenderWorkflow({ initialWorkflowName, workflowId, initialFlow, initialTemplateContextVariables, initialWorkflowConfigurations, user }: RenderWorkflowProps) {
const { userConfig } = useUserConfig();
const ttsProvider = (userConfig?.tts?.provider as string) ?? "";
const ttsModel = (userConfig?.tts?.model as string) ?? "";
const ttsVoiceId = (userConfig?.tts?.voice as string) ?? "";
const [isContextVarsDialogOpen, setIsContextVarsDialogOpen] = useState(false);
const [isConfigurationsDialogOpen, setIsConfigurationsDialogOpen] = useState(false);
const [isDictionaryDialogOpen, setIsDictionaryDialogOpen] = useState(false);
@ -125,10 +131,15 @@ function RenderWorkflow({ initialWorkflowName, workflowId, initialFlow, initialT
setTools(toolsResponse.data);
}
// Fetch recordings for this workflow
// Fetch recordings for this workflow filtered by active TTS config
try {
const recordingsResponse = await listRecordingsApiV1WorkflowRecordingsGet({
query: { workflow_id: workflowId },
query: {
workflow_id: workflowId,
tts_provider: ttsProvider || undefined,
tts_model: ttsModel || undefined,
tts_voice_id: ttsVoiceId || undefined,
},
});
if (recordingsResponse.data) {
setRecordings(recordingsResponse.data.recordings);
@ -142,7 +153,7 @@ function RenderWorkflow({ initialWorkflowName, workflowId, initialFlow, initialT
};
fetchData();
}, [workflowId]);
}, [workflowId, ttsProvider, ttsModel, ttsVoiceId]);
// Memoize defaultEdgeOptions to prevent unnecessary re-renders
const defaultEdgeOptions = useMemo(() => ({

View file

@ -1,4 +1,4 @@
import { Loader2, Trash2Icon, Upload } from "lucide-react";
import { Loader2, Mic, Square, Trash2Icon, Upload } from "lucide-react";
import { useCallback, useEffect, useRef, useState } from "react";
import {
@ -6,6 +6,7 @@ import {
deleteRecordingApiV1WorkflowRecordingsRecordingIdDelete,
getUploadUrlApiV1WorkflowRecordingsUploadUrlPost,
listRecordingsApiV1WorkflowRecordingsGet,
transcribeAudioApiV1WorkflowRecordingsTranscribePost,
} from "@/client";
import type { RecordingResponseSchema } from "@/client/types.gen";
import { Button } from "@/components/ui/button";
@ -18,6 +19,15 @@ import {
} from "@/components/ui/dialog";
import { Input } from "@/components/ui/input";
import { Label } from "@/components/ui/label";
import {
Select,
SelectContent,
SelectItem,
SelectTrigger,
SelectValue,
} from "@/components/ui/select";
import { Textarea } from "@/components/ui/textarea";
import { LANGUAGE_DISPLAY_NAMES } from "@/constants/languages";
import { useUserConfig } from "@/context/UserConfigContext";
interface RecordingsDialogProps {
@ -29,6 +39,8 @@ interface RecordingsDialogProps {
const MAX_FILE_SIZE = 5 * 1024 * 1024; // 5MB
type RecordingStep = "idle" | "naming" | "recording" | "transcribing";
export const RecordingsDialog = ({
open,
onOpenChange,
@ -42,7 +54,16 @@ export const RecordingsDialog = ({
const [transcript, setTranscript] = useState("");
const [selectedFile, setSelectedFile] = useState<File | null>(null);
const [error, setError] = useState<string | null>(null);
const [language, setLanguage] = useState("multi");
const [recordingStep, setRecordingStep] = useState<RecordingStep>("idle");
const [recordingFilename, setRecordingFilename] = useState("");
const [recordingDuration, setRecordingDuration] = useState(0);
const mediaRecorderRef = useRef<MediaRecorder | null>(null);
const audioChunksRef = useRef<Blob[]>([]);
const recordingTimerRef = useRef<ReturnType<typeof setInterval> | null>(null);
const fileInputRef = useRef<HTMLInputElement>(null);
const languageRef = useRef(language);
languageRef.current = language;
const ttsProvider = (userConfig?.tts?.provider as string) ?? "";
const ttsModel = (userConfig?.tts?.model as string) ?? "";
@ -70,14 +91,119 @@ export const RecordingsDialog = ({
}
}, [workflowId, ttsProvider, ttsModel, ttsVoiceId, onRecordingsChange]);
const stopRecordingTimer = useCallback(() => {
if (recordingTimerRef.current) {
clearInterval(recordingTimerRef.current);
recordingTimerRef.current = null;
}
}, []);
const stopRecording = useCallback(() => {
if (mediaRecorderRef.current && mediaRecorderRef.current.state !== "inactive") {
mediaRecorderRef.current.stop();
}
}, []);
const resetRecordingState = useCallback(() => {
setRecordingStep("idle");
setRecordingFilename("");
setRecordingDuration(0);
}, []);
useEffect(() => {
if (open) {
fetchRecordings();
setError(null);
setTranscript("");
setSelectedFile(null);
setLanguage("multi");
resetRecordingState();
}
}, [open, fetchRecordings]);
}, [open, fetchRecordings, resetRecordingState]);
useEffect(() => {
if (!open) {
stopRecording();
stopRecordingTimer();
}
}, [open, stopRecording, stopRecordingTimer]);
const transcribeFile = async (file: File) => {
setRecordingStep("transcribing");
try {
const currentLang = languageRef.current;
const result = await transcribeAudioApiV1WorkflowRecordingsTranscribePost({
body: { file, language: currentLang },
});
const data = result.data as Record<string, unknown> | undefined;
if (data?.transcript) {
setTranscript(data.transcript as string);
}
} catch {
// Transcription failed — user can still type manually
setError("Auto-transcription failed. You can type the transcript manually.");
} finally {
setRecordingStep("idle");
}
};
const startRecording = async () => {
try {
const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
const mediaRecorder = new MediaRecorder(stream);
mediaRecorderRef.current = mediaRecorder;
audioChunksRef.current = [];
mediaRecorder.ondataavailable = (e) => {
if (e.data.size > 0) audioChunksRef.current.push(e.data);
};
const filename = recordingFilename.trim() || "recording";
mediaRecorder.onstop = () => {
stream.getTracks().forEach((t) => t.stop());
stopRecordingTimer();
const blob = new Blob(audioChunksRef.current, { type: mediaRecorder.mimeType });
if (blob.size > MAX_FILE_SIZE) {
setError(`Recording (${(blob.size / (1024 * 1024)).toFixed(1)}MB) exceeds the maximum allowed size of 5MB.`);
resetRecordingState();
return;
}
const ext = mediaRecorder.mimeType.includes("webm") ? "webm" : "mp4";
const file = new File([blob], `${filename}.${ext}`, { type: mediaRecorder.mimeType });
setSelectedFile(file);
setError(null);
transcribeFile(file);
};
mediaRecorder.start();
setRecordingStep("recording");
setRecordingDuration(0);
setError(null);
recordingTimerRef.current = setInterval(() => {
setRecordingDuration((d) => d + 1);
}, 1000);
} catch {
setError("Microphone access denied. Please allow microphone permissions.");
resetRecordingState();
}
};
const handleStopRecording = () => {
stopRecording();
};
const handleFileSelect = (file: File | null) => {
if (file && file.size > MAX_FILE_SIZE) {
setError(`File size (${(file.size / (1024 * 1024)).toFixed(1)}MB) exceeds the maximum allowed size of 5MB.`);
setSelectedFile(null);
if (fileInputRef.current) fileInputRef.current.value = "";
return;
}
setError(null);
setSelectedFile(file);
if (file) transcribeFile(file);
};
const handleUpload = async () => {
if (!selectedFile || !transcript.trim()) return;
@ -137,6 +263,7 @@ export const RecordingsDialog = ({
original_filename: selectedFile.name,
file_size_bytes: selectedFile.size,
mime_type: selectedFile.type,
language,
},
},
});
@ -144,6 +271,8 @@ export const RecordingsDialog = ({
// Reset form and refresh list
setTranscript("");
setSelectedFile(null);
setLanguage("multi");
resetRecordingState();
if (fileInputRef.current) fileInputRef.current.value = "";
await fetchRecordings();
} catch (err) {
@ -166,13 +295,17 @@ export const RecordingsDialog = ({
}
};
const isRecording = recordingStep === "recording";
const isTranscribing = recordingStep === "transcribing";
const isBusy = uploading || isRecording || isTranscribing;
return (
<Dialog open={open} onOpenChange={onOpenChange}>
<DialogContent className="max-w-lg max-h-[80vh] overflow-y-auto">
<DialogHeader>
<DialogTitle>Workflow Recordings</DialogTitle>
<DialogDescription>
Upload audio recordings for hybrid prompts. Recordings are
Upload or record audio for hybrid prompts. Recordings are
scoped to your current TTS configuration. Use{" "}
<code className="text-xs bg-muted px-1 rounded">@</code> in
prompt fields to insert them.
@ -211,61 +344,158 @@ export const RecordingsDialog = ({
{/* Upload Section */}
<div className="space-y-3 border rounded-md p-3">
<Label className="text-sm font-medium">Upload New Recording</Label>
<Label className="text-sm font-medium">Add New Recording</Label>
{/* Audio source: file picker or record */}
<div>
<Label className="text-xs text-muted-foreground">
Audio File
</Label>
<input
ref={fileInputRef}
type="file"
accept="audio/*"
onChange={(e) => {
const file = e.target.files?.[0] ?? null;
if (file && file.size > MAX_FILE_SIZE) {
setError(
`File size (${(file.size / (1024 * 1024)).toFixed(1)}MB) exceeds the maximum allowed size of 5MB.`
);
setSelectedFile(null);
if (fileInputRef.current) fileInputRef.current.value = "";
return;
}
setError(null);
setSelectedFile(file);
}}
className="hidden"
/>
<Button
type="button"
variant="outline"
size="sm"
className="w-full justify-start text-sm font-normal"
onClick={() => fileInputRef.current?.click()}
>
<Upload className="w-4 h-4 mr-2 shrink-0" />
{selectedFile ? (
<span className="truncate">
{selectedFile.name} ({(selectedFile.size / (1024 * 1024)).toFixed(1)}MB)
</span>
) : (
<span className="text-muted-foreground">Choose audio file (max 5MB)</span>
<div className="flex gap-2">
<input
ref={fileInputRef}
type="file"
accept="audio/*"
onChange={(e) => handleFileSelect(e.target.files?.[0] ?? null)}
className="hidden"
/>
<Button
type="button"
variant="outline"
size="sm"
className="flex-1 justify-start text-sm font-normal"
onClick={() => fileInputRef.current?.click()}
disabled={isBusy}
>
<Upload className="w-4 h-4 mr-2 shrink-0" />
{selectedFile && recordingStep !== "naming" ? (
<span className="truncate">
{selectedFile.name} ({(selectedFile.size / (1024 * 1024)).toFixed(1)}MB)
</span>
) : (
<span className="text-muted-foreground">Choose audio file (max 5MB)</span>
)}
</Button>
{recordingStep === "idle" && (
<Button
type="button"
variant="outline"
size="sm"
onClick={() => setRecordingStep("naming")}
disabled={uploading || isTranscribing}
>
<Mic className="w-4 h-4 mr-1" />
Record
</Button>
)}
</Button>
</div>
</div>
{/* Recording: filename + start/stop */}
{(recordingStep === "naming" || isRecording) && (
<div className="space-y-2 rounded-md border border-dashed p-3 bg-muted/20">
{recordingStep === "naming" && (
<>
<div>
<Label className="text-xs text-muted-foreground">
Recording Name
</Label>
<Input
placeholder="e.g. greeting, hold-message"
value={recordingFilename}
onChange={(e) => setRecordingFilename(e.target.value)}
autoFocus
/>
</div>
<div className="flex gap-2">
<Button
size="sm"
onClick={startRecording}
disabled={!recordingFilename.trim()}
>
<Mic className="w-4 h-4 mr-1" />
Start Recording
</Button>
<Button
size="sm"
variant="ghost"
onClick={resetRecordingState}
>
Cancel
</Button>
</div>
</>
)}
{isRecording && (
<div className="flex items-center gap-3">
<span className="relative flex h-3 w-3">
<span className="animate-ping absolute inline-flex h-full w-full rounded-full bg-red-400 opacity-75" />
<span className="relative inline-flex rounded-full h-3 w-3 bg-red-500" />
</span>
<span className="text-sm font-mono">
{Math.floor(recordingDuration / 60)}:{(recordingDuration % 60).toString().padStart(2, "0")}
</span>
<span className="text-xs text-muted-foreground">{recordingFilename}</span>
<Button
size="sm"
variant="destructive"
onClick={handleStopRecording}
className="ml-auto"
>
<Square className="w-4 h-4 mr-1" />
Stop
</Button>
</div>
)}
</div>
)}
{/* Transcribing progress */}
{isTranscribing && (
<div className="flex items-center gap-2 text-sm text-muted-foreground">
<Loader2 className="w-4 h-4 animate-spin" />
Transcribing audio...
</div>
)}
{/* Language */}
<div>
<Label className="text-xs text-muted-foreground">
Language
</Label>
<Select value={language} onValueChange={setLanguage}>
<SelectTrigger className="h-9 text-sm">
<SelectValue />
</SelectTrigger>
<SelectContent>
{Object.entries(LANGUAGE_DISPLAY_NAMES).map(([code, name]) => (
<SelectItem key={code} value={code}>
{name}
</SelectItem>
))}
</SelectContent>
</Select>
</div>
{/* Transcript */}
<div>
<Label className="text-xs text-muted-foreground">
Transcript
</Label>
<Input
placeholder="What does this recording say?"
<Textarea
placeholder={isTranscribing ? "Transcribing..." : "What does this recording say?"}
value={transcript}
onChange={(e) => setTranscript(e.target.value)}
disabled={isTranscribing}
rows={3}
className="resize-none text-sm"
/>
</div>
<Button
size="sm"
onClick={handleUpload}
disabled={!selectedFile || !transcript.trim() || uploading}
disabled={!selectedFile || !transcript.trim() || isBusy}
>
{uploading ? (
<Loader2 className="w-4 h-4 mr-1 animate-spin" />

View file

@ -363,7 +363,13 @@ export const useWorkflowState = ({
// Save workflow function
const saveWorkflow = useCallback(async (updateWorkflowDefinition: boolean = true) => {
if (!user || !rfInstance.current) return;
const flow = rfInstance.current.toObject();
// Read nodes/edges from the Zustand store (synchronously up-to-date)
// and viewport from the ReactFlow instance to build the flow object.
// This avoids a race condition where rfInstance.toObject() may return
// stale node data if React hasn't re-rendered yet after a store update.
const { nodes: currentNodes, edges: currentEdges } = useWorkflowStore.getState();
const viewport = rfInstance.current.getViewport();
const flow = { nodes: currentNodes, edges: currentEdges, viewport };
try {
await updateWorkflowApiV1WorkflowWorkflowIdPut({
path: {