feat: allow uploading recording as part of node transition

This commit is contained in:
Abhishek Kumar 2026-04-10 11:54:00 +05:30
parent bb5f56bfb7
commit 65c76ca7ff
36 changed files with 2255 additions and 201 deletions

View file

@ -0,0 +1,97 @@
import type { RecordingResponseSchema } from "@/client/types.gen";
import { Label } from "@/components/ui/label";
import { RadioGroup, RadioGroupItem } from "@/components/ui/radio-group";
import { Select, SelectContent, SelectItem, SelectTrigger, SelectValue } from "@/components/ui/select";
interface TextOrAudioInputProps {
type: 'text' | 'audio';
onTypeChange: (type: 'text' | 'audio') => void;
recordingId: string;
onRecordingIdChange: (id: string) => void;
recordings?: RecordingResponseSchema[];
/** Rendered when type === 'text' */
children: React.ReactNode;
}
export function TextOrAudioInput({
type,
onTypeChange,
recordingId,
onRecordingIdChange,
recordings = [],
children,
}: TextOrAudioInputProps) {
return (
<>
<RadioGroup
value={type}
onValueChange={(value) => onTypeChange(value as 'text' | 'audio')}
className="flex items-center gap-4"
>
<div className="flex items-center gap-2">
<RadioGroupItem value="text" id="toa-text" />
<Label htmlFor="toa-text" className="font-normal cursor-pointer">Text</Label>
</div>
<div className="flex items-center gap-2">
<RadioGroupItem value="audio" id="toa-audio" />
<Label htmlFor="toa-audio" className="font-normal cursor-pointer">Audio</Label>
</div>
</RadioGroup>
{type === 'text' ? (
children
) : (
<RecordingSelect
value={recordingId}
onChange={onRecordingIdChange}
recordings={recordings}
/>
)}
</>
);
}
interface RecordingSelectProps {
value: string;
onChange: (id: string) => void;
recordings: RecordingResponseSchema[];
}
/**
* Dropdown to select a pre-recorded audio file.
* Re-exported so callers that only need the dropdown (e.g. tool configs with
* their own none/custom/audio radio) can use it directly.
*/
export function RecordingSelect({ value, onChange, recordings }: RecordingSelectProps) {
return (
<div className="space-y-2">
<Label className="text-xs text-muted-foreground">
Select a pre-recorded audio file to play.
</Label>
<Select value={value} onValueChange={onChange}>
<SelectTrigger className="w-full">
<SelectValue placeholder="Select a recording" />
</SelectTrigger>
<SelectContent>
{recordings.length === 0 ? (
<SelectItem value="__empty__" disabled>
No recordings available
</SelectItem>
) : (
recordings.map((r) => (
<SelectItem key={r.recording_id} value={r.recording_id}>
<span className="truncate">
{(r.metadata?.original_filename as string) || r.recording_id}
</span>
{r.transcript && (
<span className="text-xs text-muted-foreground ml-2 truncate">
{r.transcript}
</span>
)}
</SelectItem>
))
)}
</SelectContent>
</Select>
</div>
);
}

View file

@ -4,6 +4,7 @@ import { useCallback, useEffect, useState } from 'react';
import { useWorkflow, useWorkflowOptional } from "@/app/workflow/[workflowId]/contexts/WorkflowContext";
import { useWorkflowStore } from "@/app/workflow/[workflowId]/stores/workflowStore";
import { TextOrAudioInput } from "@/components/flow/TextOrAudioInput";
import { Button } from "@/components/ui/button";
import { Dialog, DialogContent, DialogFooter, DialogHeader, DialogTitle } from "@/components/ui/dialog";
import { Input } from "@/components/ui/input";
@ -24,9 +25,12 @@ interface EdgeDetailsDialogProps {
const EdgeDetailsDialog = ({ open, onOpenChange, data, onSave }: EdgeDetailsDialogProps) => {
const readOnly = useWorkflowOptional()?.readOnly ?? false;
const { recordings } = useWorkflow();
const [condition, setCondition] = useState(data?.condition ?? '');
const [label, setLabel] = useState(data?.label ?? '');
const [transitionSpeech, setTransitionSpeech] = useState(data?.transition_speech ?? '');
const [transitionSpeechType, setTransitionSpeechType] = useState<'text' | 'audio'>(data?.transition_speech_type ?? 'text');
const [transitionSpeechRecordingId, setTransitionSpeechRecordingId] = useState(data?.transition_speech_recording_id ?? '');
// Update form state when data changes (e.g., from undo/redo)
useEffect(() => {
@ -34,13 +38,21 @@ const EdgeDetailsDialog = ({ open, onOpenChange, data, onSave }: EdgeDetailsDial
setCondition(data?.condition ?? '');
setLabel(data?.label ?? '');
setTransitionSpeech(data?.transition_speech ?? '');
setTransitionSpeechType(data?.transition_speech_type ?? 'text');
setTransitionSpeechRecordingId(data?.transition_speech_recording_id ?? '');
}
}, [data, open]);
const handleSave = useCallback(() => {
onSave({ condition: condition, label: label, transition_speech: transitionSpeech || undefined });
onSave({
condition,
label,
transition_speech: transitionSpeechType === 'text' ? (transitionSpeech || undefined) : undefined,
transition_speech_type: transitionSpeechType,
transition_speech_recording_id: transitionSpeechType === 'audio' ? (transitionSpeechRecordingId || undefined) : undefined,
});
onOpenChange(false);
}, [condition, label, transitionSpeech, onSave, onOpenChange]);
}, [condition, label, transitionSpeech, transitionSpeechType, transitionSpeechRecordingId, onSave, onOpenChange]);
// Handle Cmd+S / Ctrl+S keyboard shortcut to save
useEffect(() => {
@ -99,18 +111,28 @@ const EdgeDetailsDialog = ({ open, onOpenChange, data, onSave }: EdgeDetailsDial
<div className="grid gap-2">
<Label>Transition Speech</Label>
<Label className="text-xs text-muted-foreground">
Optional text the assistant will speak right before transitioning to the node.
This text will not be attached in Conversation Context. Use this as simple filler to reduce latency.
Optional text or audio the assistant will play right before transitioning to the node.
This will not be attached in Conversation Context. Use this as simple filler to reduce latency.
</Label>
<div className="flex items-start gap-2 rounded-md bg-amber-50 p-2 text-xs text-amber-700 border border-amber-200">
<AlertCircle className="h-3.5 w-3.5 mt-0.5 shrink-0" />
<span>This text is spoken as-is. For multilingual workflows, choose your phrasing carefully.</span>
</div>
<Textarea
value={transitionSpeech}
placeholder="e.g. Let me transfer you to our billing department..."
onChange={(e) => setTransitionSpeech(e.target.value)}
/>
<TextOrAudioInput
type={transitionSpeechType}
onTypeChange={setTransitionSpeechType}
recordingId={transitionSpeechRecordingId}
onRecordingIdChange={setTransitionSpeechRecordingId}
recordings={recordings ?? []}
>
<>
<div className="flex items-start gap-2 rounded-md bg-amber-50 p-2 text-xs text-amber-700 border border-amber-200">
<AlertCircle className="h-3.5 w-3.5 mt-0.5 shrink-0" />
<span>This text is spoken as-is. For multilingual workflows, choose your phrasing carefully.</span>
</div>
<Textarea
value={transitionSpeech}
placeholder="e.g. Let me transfer you to our billing department..."
onChange={(e) => setTransitionSpeech(e.target.value)}
/>
</>
</TextOrAudioInput>
</div>
</div>
<DialogFooter>

View file

@ -8,6 +8,7 @@ import type { RecordingResponseSchema } from "@/client/types.gen";
import { DocumentBadges } from "@/components/flow/DocumentBadges";
import { DocumentSelector } from "@/components/flow/DocumentSelector";
import { MentionTextarea } from "@/components/flow/MentionTextarea";
import { TextOrAudioInput } from "@/components/flow/TextOrAudioInput";
import { ToolBadges } from "@/components/flow/ToolBadges";
import { ToolSelector } from "@/components/flow/ToolSelector";
import { ExtractionVariable, FlowNodeData } from "@/components/flow/types";
@ -26,8 +27,12 @@ import { useNodeHandlers } from "./common/useNodeHandlers";
interface StartCallEditFormProps {
nodeData: FlowNodeData;
greetingType: 'text' | 'audio';
setGreetingType: (value: 'text' | 'audio') => void;
greeting: string;
setGreeting: (value: string) => void;
greetingRecordingId: string;
setGreetingRecordingId: (value: string) => void;
prompt: string;
setPrompt: (value: string) => void;
name: string;
@ -73,7 +78,9 @@ export const StartCall = memo(({ data, selected, id }: StartCallNodeProps) => {
const { saveWorkflow, tools, documents, recordings } = useWorkflow();
// Form state
const [greetingType, setGreetingType] = useState<'text' | 'audio'>(data.greeting_type ?? "text");
const [greeting, setGreeting] = useState(data.greeting ?? "");
const [greetingRecordingId, setGreetingRecordingId] = useState(data.greeting_recording_id ?? "");
const [prompt, setPrompt] = useState(data.prompt ?? "");
const [name, setName] = useState(data.name);
const [allowInterrupt, setAllowInterrupt] = useState(data.allow_interrupt ?? true);
@ -109,7 +116,9 @@ export const StartCall = memo(({ data, selected, id }: StartCallNodeProps) => {
handleSaveNodeData({
...data,
greeting: greeting || undefined,
greeting_type: greetingType,
greeting: greetingType === 'text' ? (greeting || undefined) : undefined,
greeting_recording_id: greetingType === 'audio' ? (greetingRecordingId || undefined) : undefined,
prompt,
name,
allow_interrupt: allowInterrupt,
@ -132,7 +141,9 @@ export const StartCall = memo(({ data, selected, id }: StartCallNodeProps) => {
// Reset form state when dialog opens
const handleOpenChange = (newOpen: boolean) => {
if (newOpen) {
setGreetingType(data.greeting_type ?? "text");
setGreeting(data.greeting ?? "");
setGreetingRecordingId(data.greeting_recording_id ?? "");
setPrompt(data.prompt ?? "");
setName(data.name);
setAllowInterrupt(data.allow_interrupt ?? true);
@ -154,7 +165,9 @@ export const StartCall = memo(({ data, selected, id }: StartCallNodeProps) => {
// Update form state when data changes (e.g., from undo/redo)
useEffect(() => {
if (open) {
setGreetingType(data.greeting_type ?? "text");
setGreeting(data.greeting ?? "");
setGreetingRecordingId(data.greeting_recording_id ?? "");
setPrompt(data.prompt ?? "");
setName(data.name);
setAllowInterrupt(data.allow_interrupt ?? true);
@ -247,8 +260,12 @@ export const StartCall = memo(({ data, selected, id }: StartCallNodeProps) => {
{open && (
<StartCallEditForm
nodeData={data}
greetingType={greetingType}
setGreetingType={setGreetingType}
greeting={greeting}
setGreeting={setGreeting}
greetingRecordingId={greetingRecordingId}
setGreetingRecordingId={setGreetingRecordingId}
prompt={prompt}
setPrompt={setPrompt}
name={name}
@ -288,8 +305,12 @@ export const StartCall = memo(({ data, selected, id }: StartCallNodeProps) => {
});
const StartCallEditForm = ({
greetingType,
setGreetingType,
greeting,
setGreeting,
greetingRecordingId,
setGreetingRecordingId,
prompt,
setPrompt,
name,
@ -362,15 +383,22 @@ const StartCallEditForm = ({
<Label>Greeting</Label>
<Label className="text-xs text-muted-foreground">
Optional greeting message played via TTS when the call starts. If set, this will be spoken directly instead of generating a response from the LLM. Supports template variables like {"{{variable_name}}"}.
Optional greeting played when the call starts. Choose between a text message (spoken via TTS) or a pre-recorded audio file.
</Label>
<MentionTextarea
value={greeting}
onChange={setGreeting}
className="min-h-[60px] max-h-[200px] resize-none overflow-y-auto"
placeholder="e.g. Hello {{first_name}}, this is Sarah calling from Acme Corp."
<TextOrAudioInput
type={greetingType}
onTypeChange={setGreetingType}
recordingId={greetingRecordingId}
onRecordingIdChange={setGreetingRecordingId}
recordings={recordings}
/>
>
<Textarea
value={greeting}
onChange={(e) => setGreeting(e.target.value)}
className="min-h-[60px] max-h-[200px] resize-none overflow-y-auto"
placeholder="e.g. Hello {{first_name}}, this is Sarah calling from Acme Corp."
/>
</TextOrAudioInput>
<Label>Prompt</Label>
<Label className="text-xs text-muted-foreground">

View file

@ -24,6 +24,8 @@ export type FlowNodeData = {
extraction_variables?: ExtractionVariable[];
add_global_prompt?: boolean;
greeting?: string;
greeting_type?: 'text' | 'audio';
greeting_recording_id?: string;
wait_for_user_greeting?: boolean;
detect_voicemail?: boolean;
delayed_start?: boolean;
@ -79,6 +81,8 @@ export type FlowEdgeData = {
condition: string;
label: string;
transition_speech?: string;
transition_speech_type?: 'text' | 'audio';
transition_speech_recording_id?: string;
invalid?: boolean;
validationMessage?: string | null;
}

View file

@ -2,6 +2,7 @@
import type { Team } from "@stackframe/stack";
import {
AudioLines,
Brain,
ChevronLeft,
ChevronRight,
@ -135,6 +136,11 @@ export function AppSidebar() {
url: "/files",
icon: Database,
},
{
title: "Recordings",
url: "/recordings",
icon: AudioLines,
},
// {
// title: "Integrations",
// url: "/integrations",