mirror of
https://github.com/dograh-hq/dograh.git
synced 2026-06-16 08:25:18 +02:00
feat: allow recording audio in workflow builder
This commit is contained in:
parent
ac0731a374
commit
2fa4191d9b
22 changed files with 700 additions and 246 deletions
|
|
@ -14,6 +14,7 @@ import type { DocumentResponseSchema, RecordingResponseSchema, ToolResponse } fr
|
|||
import { FlowEdge, FlowNode, NodeType } from "@/components/flow/types";
|
||||
import { Button } from '@/components/ui/button';
|
||||
import { Tooltip, TooltipContent, TooltipProvider, TooltipTrigger } from '@/components/ui/tooltip';
|
||||
import { useUserConfig } from '@/context/UserConfigContext';
|
||||
import { WorkflowConfigurations } from '@/types/workflow-configurations';
|
||||
|
||||
import AddNodePanel from "../../../components/flow/AddNodePanel";
|
||||
|
|
@ -64,6 +65,11 @@ interface RenderWorkflowProps {
|
|||
}
|
||||
|
||||
function RenderWorkflow({ initialWorkflowName, workflowId, initialFlow, initialTemplateContextVariables, initialWorkflowConfigurations, user }: RenderWorkflowProps) {
|
||||
const { userConfig } = useUserConfig();
|
||||
const ttsProvider = (userConfig?.tts?.provider as string) ?? "";
|
||||
const ttsModel = (userConfig?.tts?.model as string) ?? "";
|
||||
const ttsVoiceId = (userConfig?.tts?.voice as string) ?? "";
|
||||
|
||||
const [isContextVarsDialogOpen, setIsContextVarsDialogOpen] = useState(false);
|
||||
const [isConfigurationsDialogOpen, setIsConfigurationsDialogOpen] = useState(false);
|
||||
const [isDictionaryDialogOpen, setIsDictionaryDialogOpen] = useState(false);
|
||||
|
|
@ -125,10 +131,15 @@ function RenderWorkflow({ initialWorkflowName, workflowId, initialFlow, initialT
|
|||
setTools(toolsResponse.data);
|
||||
}
|
||||
|
||||
// Fetch recordings for this workflow
|
||||
// Fetch recordings for this workflow filtered by active TTS config
|
||||
try {
|
||||
const recordingsResponse = await listRecordingsApiV1WorkflowRecordingsGet({
|
||||
query: { workflow_id: workflowId },
|
||||
query: {
|
||||
workflow_id: workflowId,
|
||||
tts_provider: ttsProvider || undefined,
|
||||
tts_model: ttsModel || undefined,
|
||||
tts_voice_id: ttsVoiceId || undefined,
|
||||
},
|
||||
});
|
||||
if (recordingsResponse.data) {
|
||||
setRecordings(recordingsResponse.data.recordings);
|
||||
|
|
@ -142,7 +153,7 @@ function RenderWorkflow({ initialWorkflowName, workflowId, initialFlow, initialT
|
|||
};
|
||||
|
||||
fetchData();
|
||||
}, [workflowId]);
|
||||
}, [workflowId, ttsProvider, ttsModel, ttsVoiceId]);
|
||||
|
||||
// Memoize defaultEdgeOptions to prevent unnecessary re-renders
|
||||
const defaultEdgeOptions = useMemo(() => ({
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
import { Loader2, Trash2Icon, Upload } from "lucide-react";
|
||||
import { Loader2, Mic, Square, Trash2Icon, Upload } from "lucide-react";
|
||||
import { useCallback, useEffect, useRef, useState } from "react";
|
||||
|
||||
import {
|
||||
|
|
@ -6,6 +6,7 @@ import {
|
|||
deleteRecordingApiV1WorkflowRecordingsRecordingIdDelete,
|
||||
getUploadUrlApiV1WorkflowRecordingsUploadUrlPost,
|
||||
listRecordingsApiV1WorkflowRecordingsGet,
|
||||
transcribeAudioApiV1WorkflowRecordingsTranscribePost,
|
||||
} from "@/client";
|
||||
import type { RecordingResponseSchema } from "@/client/types.gen";
|
||||
import { Button } from "@/components/ui/button";
|
||||
|
|
@ -18,6 +19,15 @@ import {
|
|||
} from "@/components/ui/dialog";
|
||||
import { Input } from "@/components/ui/input";
|
||||
import { Label } from "@/components/ui/label";
|
||||
import {
|
||||
Select,
|
||||
SelectContent,
|
||||
SelectItem,
|
||||
SelectTrigger,
|
||||
SelectValue,
|
||||
} from "@/components/ui/select";
|
||||
import { Textarea } from "@/components/ui/textarea";
|
||||
import { LANGUAGE_DISPLAY_NAMES } from "@/constants/languages";
|
||||
import { useUserConfig } from "@/context/UserConfigContext";
|
||||
|
||||
interface RecordingsDialogProps {
|
||||
|
|
@ -29,6 +39,8 @@ interface RecordingsDialogProps {
|
|||
|
||||
const MAX_FILE_SIZE = 5 * 1024 * 1024; // 5MB
|
||||
|
||||
type RecordingStep = "idle" | "naming" | "recording" | "transcribing";
|
||||
|
||||
export const RecordingsDialog = ({
|
||||
open,
|
||||
onOpenChange,
|
||||
|
|
@ -42,7 +54,16 @@ export const RecordingsDialog = ({
|
|||
const [transcript, setTranscript] = useState("");
|
||||
const [selectedFile, setSelectedFile] = useState<File | null>(null);
|
||||
const [error, setError] = useState<string | null>(null);
|
||||
const [language, setLanguage] = useState("multi");
|
||||
const [recordingStep, setRecordingStep] = useState<RecordingStep>("idle");
|
||||
const [recordingFilename, setRecordingFilename] = useState("");
|
||||
const [recordingDuration, setRecordingDuration] = useState(0);
|
||||
const mediaRecorderRef = useRef<MediaRecorder | null>(null);
|
||||
const audioChunksRef = useRef<Blob[]>([]);
|
||||
const recordingTimerRef = useRef<ReturnType<typeof setInterval> | null>(null);
|
||||
const fileInputRef = useRef<HTMLInputElement>(null);
|
||||
const languageRef = useRef(language);
|
||||
languageRef.current = language;
|
||||
|
||||
const ttsProvider = (userConfig?.tts?.provider as string) ?? "";
|
||||
const ttsModel = (userConfig?.tts?.model as string) ?? "";
|
||||
|
|
@ -70,14 +91,119 @@ export const RecordingsDialog = ({
|
|||
}
|
||||
}, [workflowId, ttsProvider, ttsModel, ttsVoiceId, onRecordingsChange]);
|
||||
|
||||
const stopRecordingTimer = useCallback(() => {
|
||||
if (recordingTimerRef.current) {
|
||||
clearInterval(recordingTimerRef.current);
|
||||
recordingTimerRef.current = null;
|
||||
}
|
||||
}, []);
|
||||
|
||||
const stopRecording = useCallback(() => {
|
||||
if (mediaRecorderRef.current && mediaRecorderRef.current.state !== "inactive") {
|
||||
mediaRecorderRef.current.stop();
|
||||
}
|
||||
}, []);
|
||||
|
||||
const resetRecordingState = useCallback(() => {
|
||||
setRecordingStep("idle");
|
||||
setRecordingFilename("");
|
||||
setRecordingDuration(0);
|
||||
}, []);
|
||||
|
||||
useEffect(() => {
|
||||
if (open) {
|
||||
fetchRecordings();
|
||||
setError(null);
|
||||
setTranscript("");
|
||||
setSelectedFile(null);
|
||||
setLanguage("multi");
|
||||
resetRecordingState();
|
||||
}
|
||||
}, [open, fetchRecordings]);
|
||||
}, [open, fetchRecordings, resetRecordingState]);
|
||||
|
||||
useEffect(() => {
|
||||
if (!open) {
|
||||
stopRecording();
|
||||
stopRecordingTimer();
|
||||
}
|
||||
}, [open, stopRecording, stopRecordingTimer]);
|
||||
|
||||
const transcribeFile = async (file: File) => {
|
||||
setRecordingStep("transcribing");
|
||||
try {
|
||||
const currentLang = languageRef.current;
|
||||
const result = await transcribeAudioApiV1WorkflowRecordingsTranscribePost({
|
||||
body: { file, language: currentLang },
|
||||
});
|
||||
const data = result.data as Record<string, unknown> | undefined;
|
||||
if (data?.transcript) {
|
||||
setTranscript(data.transcript as string);
|
||||
}
|
||||
} catch {
|
||||
// Transcription failed — user can still type manually
|
||||
setError("Auto-transcription failed. You can type the transcript manually.");
|
||||
} finally {
|
||||
setRecordingStep("idle");
|
||||
}
|
||||
};
|
||||
|
||||
const startRecording = async () => {
|
||||
try {
|
||||
const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
|
||||
const mediaRecorder = new MediaRecorder(stream);
|
||||
mediaRecorderRef.current = mediaRecorder;
|
||||
audioChunksRef.current = [];
|
||||
|
||||
mediaRecorder.ondataavailable = (e) => {
|
||||
if (e.data.size > 0) audioChunksRef.current.push(e.data);
|
||||
};
|
||||
|
||||
const filename = recordingFilename.trim() || "recording";
|
||||
mediaRecorder.onstop = () => {
|
||||
stream.getTracks().forEach((t) => t.stop());
|
||||
stopRecordingTimer();
|
||||
|
||||
const blob = new Blob(audioChunksRef.current, { type: mediaRecorder.mimeType });
|
||||
if (blob.size > MAX_FILE_SIZE) {
|
||||
setError(`Recording (${(blob.size / (1024 * 1024)).toFixed(1)}MB) exceeds the maximum allowed size of 5MB.`);
|
||||
resetRecordingState();
|
||||
return;
|
||||
}
|
||||
const ext = mediaRecorder.mimeType.includes("webm") ? "webm" : "mp4";
|
||||
const file = new File([blob], `${filename}.${ext}`, { type: mediaRecorder.mimeType });
|
||||
setSelectedFile(file);
|
||||
setError(null);
|
||||
transcribeFile(file);
|
||||
};
|
||||
|
||||
mediaRecorder.start();
|
||||
setRecordingStep("recording");
|
||||
setRecordingDuration(0);
|
||||
setError(null);
|
||||
recordingTimerRef.current = setInterval(() => {
|
||||
setRecordingDuration((d) => d + 1);
|
||||
}, 1000);
|
||||
} catch {
|
||||
setError("Microphone access denied. Please allow microphone permissions.");
|
||||
resetRecordingState();
|
||||
}
|
||||
};
|
||||
|
||||
const handleStopRecording = () => {
|
||||
stopRecording();
|
||||
};
|
||||
|
||||
const handleFileSelect = (file: File | null) => {
|
||||
if (file && file.size > MAX_FILE_SIZE) {
|
||||
setError(`File size (${(file.size / (1024 * 1024)).toFixed(1)}MB) exceeds the maximum allowed size of 5MB.`);
|
||||
setSelectedFile(null);
|
||||
if (fileInputRef.current) fileInputRef.current.value = "";
|
||||
return;
|
||||
}
|
||||
setError(null);
|
||||
setSelectedFile(file);
|
||||
if (file) transcribeFile(file);
|
||||
};
|
||||
|
||||
const handleUpload = async () => {
|
||||
if (!selectedFile || !transcript.trim()) return;
|
||||
|
|
@ -137,6 +263,7 @@ export const RecordingsDialog = ({
|
|||
original_filename: selectedFile.name,
|
||||
file_size_bytes: selectedFile.size,
|
||||
mime_type: selectedFile.type,
|
||||
language,
|
||||
},
|
||||
},
|
||||
});
|
||||
|
|
@ -144,6 +271,8 @@ export const RecordingsDialog = ({
|
|||
// Reset form and refresh list
|
||||
setTranscript("");
|
||||
setSelectedFile(null);
|
||||
setLanguage("multi");
|
||||
resetRecordingState();
|
||||
if (fileInputRef.current) fileInputRef.current.value = "";
|
||||
await fetchRecordings();
|
||||
} catch (err) {
|
||||
|
|
@ -166,13 +295,17 @@ export const RecordingsDialog = ({
|
|||
}
|
||||
};
|
||||
|
||||
const isRecording = recordingStep === "recording";
|
||||
const isTranscribing = recordingStep === "transcribing";
|
||||
const isBusy = uploading || isRecording || isTranscribing;
|
||||
|
||||
return (
|
||||
<Dialog open={open} onOpenChange={onOpenChange}>
|
||||
<DialogContent className="max-w-lg max-h-[80vh] overflow-y-auto">
|
||||
<DialogHeader>
|
||||
<DialogTitle>Workflow Recordings</DialogTitle>
|
||||
<DialogDescription>
|
||||
Upload audio recordings for hybrid prompts. Recordings are
|
||||
Upload or record audio for hybrid prompts. Recordings are
|
||||
scoped to your current TTS configuration. Use{" "}
|
||||
<code className="text-xs bg-muted px-1 rounded">@</code> in
|
||||
prompt fields to insert them.
|
||||
|
|
@ -211,61 +344,158 @@ export const RecordingsDialog = ({
|
|||
|
||||
{/* Upload Section */}
|
||||
<div className="space-y-3 border rounded-md p-3">
|
||||
<Label className="text-sm font-medium">Upload New Recording</Label>
|
||||
<Label className="text-sm font-medium">Add New Recording</Label>
|
||||
|
||||
{/* Audio source: file picker or record */}
|
||||
<div>
|
||||
<Label className="text-xs text-muted-foreground">
|
||||
Audio File
|
||||
</Label>
|
||||
<input
|
||||
ref={fileInputRef}
|
||||
type="file"
|
||||
accept="audio/*"
|
||||
onChange={(e) => {
|
||||
const file = e.target.files?.[0] ?? null;
|
||||
if (file && file.size > MAX_FILE_SIZE) {
|
||||
setError(
|
||||
`File size (${(file.size / (1024 * 1024)).toFixed(1)}MB) exceeds the maximum allowed size of 5MB.`
|
||||
);
|
||||
setSelectedFile(null);
|
||||
if (fileInputRef.current) fileInputRef.current.value = "";
|
||||
return;
|
||||
}
|
||||
setError(null);
|
||||
setSelectedFile(file);
|
||||
}}
|
||||
className="hidden"
|
||||
/>
|
||||
<Button
|
||||
type="button"
|
||||
variant="outline"
|
||||
size="sm"
|
||||
className="w-full justify-start text-sm font-normal"
|
||||
onClick={() => fileInputRef.current?.click()}
|
||||
>
|
||||
<Upload className="w-4 h-4 mr-2 shrink-0" />
|
||||
{selectedFile ? (
|
||||
<span className="truncate">
|
||||
{selectedFile.name} ({(selectedFile.size / (1024 * 1024)).toFixed(1)}MB)
|
||||
</span>
|
||||
) : (
|
||||
<span className="text-muted-foreground">Choose audio file (max 5MB)</span>
|
||||
<div className="flex gap-2">
|
||||
<input
|
||||
ref={fileInputRef}
|
||||
type="file"
|
||||
accept="audio/*"
|
||||
onChange={(e) => handleFileSelect(e.target.files?.[0] ?? null)}
|
||||
className="hidden"
|
||||
/>
|
||||
<Button
|
||||
type="button"
|
||||
variant="outline"
|
||||
size="sm"
|
||||
className="flex-1 justify-start text-sm font-normal"
|
||||
onClick={() => fileInputRef.current?.click()}
|
||||
disabled={isBusy}
|
||||
>
|
||||
<Upload className="w-4 h-4 mr-2 shrink-0" />
|
||||
{selectedFile && recordingStep !== "naming" ? (
|
||||
<span className="truncate">
|
||||
{selectedFile.name} ({(selectedFile.size / (1024 * 1024)).toFixed(1)}MB)
|
||||
</span>
|
||||
) : (
|
||||
<span className="text-muted-foreground">Choose audio file (max 5MB)</span>
|
||||
)}
|
||||
</Button>
|
||||
{recordingStep === "idle" && (
|
||||
<Button
|
||||
type="button"
|
||||
variant="outline"
|
||||
size="sm"
|
||||
onClick={() => setRecordingStep("naming")}
|
||||
disabled={uploading || isTranscribing}
|
||||
>
|
||||
<Mic className="w-4 h-4 mr-1" />
|
||||
Record
|
||||
</Button>
|
||||
)}
|
||||
</Button>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Recording: filename + start/stop */}
|
||||
{(recordingStep === "naming" || isRecording) && (
|
||||
<div className="space-y-2 rounded-md border border-dashed p-3 bg-muted/20">
|
||||
{recordingStep === "naming" && (
|
||||
<>
|
||||
<div>
|
||||
<Label className="text-xs text-muted-foreground">
|
||||
Recording Name
|
||||
</Label>
|
||||
<Input
|
||||
placeholder="e.g. greeting, hold-message"
|
||||
value={recordingFilename}
|
||||
onChange={(e) => setRecordingFilename(e.target.value)}
|
||||
autoFocus
|
||||
/>
|
||||
</div>
|
||||
<div className="flex gap-2">
|
||||
<Button
|
||||
size="sm"
|
||||
onClick={startRecording}
|
||||
disabled={!recordingFilename.trim()}
|
||||
>
|
||||
<Mic className="w-4 h-4 mr-1" />
|
||||
Start Recording
|
||||
</Button>
|
||||
<Button
|
||||
size="sm"
|
||||
variant="ghost"
|
||||
onClick={resetRecordingState}
|
||||
>
|
||||
Cancel
|
||||
</Button>
|
||||
</div>
|
||||
</>
|
||||
)}
|
||||
{isRecording && (
|
||||
<div className="flex items-center gap-3">
|
||||
<span className="relative flex h-3 w-3">
|
||||
<span className="animate-ping absolute inline-flex h-full w-full rounded-full bg-red-400 opacity-75" />
|
||||
<span className="relative inline-flex rounded-full h-3 w-3 bg-red-500" />
|
||||
</span>
|
||||
<span className="text-sm font-mono">
|
||||
{Math.floor(recordingDuration / 60)}:{(recordingDuration % 60).toString().padStart(2, "0")}
|
||||
</span>
|
||||
<span className="text-xs text-muted-foreground">{recordingFilename}</span>
|
||||
<Button
|
||||
size="sm"
|
||||
variant="destructive"
|
||||
onClick={handleStopRecording}
|
||||
className="ml-auto"
|
||||
>
|
||||
<Square className="w-4 h-4 mr-1" />
|
||||
Stop
|
||||
</Button>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Transcribing progress */}
|
||||
{isTranscribing && (
|
||||
<div className="flex items-center gap-2 text-sm text-muted-foreground">
|
||||
<Loader2 className="w-4 h-4 animate-spin" />
|
||||
Transcribing audio...
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Language */}
|
||||
<div>
|
||||
<Label className="text-xs text-muted-foreground">
|
||||
Language
|
||||
</Label>
|
||||
<Select value={language} onValueChange={setLanguage}>
|
||||
<SelectTrigger className="h-9 text-sm">
|
||||
<SelectValue />
|
||||
</SelectTrigger>
|
||||
<SelectContent>
|
||||
{Object.entries(LANGUAGE_DISPLAY_NAMES).map(([code, name]) => (
|
||||
<SelectItem key={code} value={code}>
|
||||
{name}
|
||||
</SelectItem>
|
||||
))}
|
||||
</SelectContent>
|
||||
</Select>
|
||||
</div>
|
||||
|
||||
{/* Transcript */}
|
||||
<div>
|
||||
<Label className="text-xs text-muted-foreground">
|
||||
Transcript
|
||||
</Label>
|
||||
<Input
|
||||
placeholder="What does this recording say?"
|
||||
<Textarea
|
||||
placeholder={isTranscribing ? "Transcribing..." : "What does this recording say?"}
|
||||
value={transcript}
|
||||
onChange={(e) => setTranscript(e.target.value)}
|
||||
disabled={isTranscribing}
|
||||
rows={3}
|
||||
className="resize-none text-sm"
|
||||
/>
|
||||
</div>
|
||||
|
||||
<Button
|
||||
size="sm"
|
||||
onClick={handleUpload}
|
||||
disabled={!selectedFile || !transcript.trim() || uploading}
|
||||
disabled={!selectedFile || !transcript.trim() || isBusy}
|
||||
>
|
||||
{uploading ? (
|
||||
<Loader2 className="w-4 h-4 mr-1 animate-spin" />
|
||||
|
|
|
|||
|
|
@ -363,7 +363,13 @@ export const useWorkflowState = ({
|
|||
// Save workflow function
|
||||
const saveWorkflow = useCallback(async (updateWorkflowDefinition: boolean = true) => {
|
||||
if (!user || !rfInstance.current) return;
|
||||
const flow = rfInstance.current.toObject();
|
||||
// Read nodes/edges from the Zustand store (synchronously up-to-date)
|
||||
// and viewport from the ReactFlow instance to build the flow object.
|
||||
// This avoids a race condition where rfInstance.toObject() may return
|
||||
// stale node data if React hasn't re-rendered yet after a store update.
|
||||
const { nodes: currentNodes, edges: currentEdges } = useWorkflowStore.getState();
|
||||
const viewport = rfInstance.current.getViewport();
|
||||
const flow = { nodes: currentNodes, edges: currentEdges, viewport };
|
||||
try {
|
||||
await updateWorkflowApiV1WorkflowWorkflowIdPut({
|
||||
path: {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue