mirror of
https://github.com/dograh-hq/dograh.git
synced 2026-06-10 08:05:22 +02:00
feat: allow uploading recording as part of node transition
This commit is contained in:
parent
bb5f56bfb7
commit
65c76ca7ff
36 changed files with 2255 additions and 201 deletions
97
ui/src/components/flow/TextOrAudioInput.tsx
Normal file
97
ui/src/components/flow/TextOrAudioInput.tsx
Normal file
|
|
@ -0,0 +1,97 @@
|
|||
import type { RecordingResponseSchema } from "@/client/types.gen";
|
||||
import { Label } from "@/components/ui/label";
|
||||
import { RadioGroup, RadioGroupItem } from "@/components/ui/radio-group";
|
||||
import { Select, SelectContent, SelectItem, SelectTrigger, SelectValue } from "@/components/ui/select";
|
||||
|
||||
interface TextOrAudioInputProps {
|
||||
type: 'text' | 'audio';
|
||||
onTypeChange: (type: 'text' | 'audio') => void;
|
||||
recordingId: string;
|
||||
onRecordingIdChange: (id: string) => void;
|
||||
recordings?: RecordingResponseSchema[];
|
||||
/** Rendered when type === 'text' */
|
||||
children: React.ReactNode;
|
||||
}
|
||||
|
||||
export function TextOrAudioInput({
|
||||
type,
|
||||
onTypeChange,
|
||||
recordingId,
|
||||
onRecordingIdChange,
|
||||
recordings = [],
|
||||
children,
|
||||
}: TextOrAudioInputProps) {
|
||||
return (
|
||||
<>
|
||||
<RadioGroup
|
||||
value={type}
|
||||
onValueChange={(value) => onTypeChange(value as 'text' | 'audio')}
|
||||
className="flex items-center gap-4"
|
||||
>
|
||||
<div className="flex items-center gap-2">
|
||||
<RadioGroupItem value="text" id="toa-text" />
|
||||
<Label htmlFor="toa-text" className="font-normal cursor-pointer">Text</Label>
|
||||
</div>
|
||||
<div className="flex items-center gap-2">
|
||||
<RadioGroupItem value="audio" id="toa-audio" />
|
||||
<Label htmlFor="toa-audio" className="font-normal cursor-pointer">Audio</Label>
|
||||
</div>
|
||||
</RadioGroup>
|
||||
{type === 'text' ? (
|
||||
children
|
||||
) : (
|
||||
<RecordingSelect
|
||||
value={recordingId}
|
||||
onChange={onRecordingIdChange}
|
||||
recordings={recordings}
|
||||
/>
|
||||
)}
|
||||
</>
|
||||
);
|
||||
}
|
||||
|
||||
interface RecordingSelectProps {
|
||||
value: string;
|
||||
onChange: (id: string) => void;
|
||||
recordings: RecordingResponseSchema[];
|
||||
}
|
||||
|
||||
/**
|
||||
* Dropdown to select a pre-recorded audio file.
|
||||
* Re-exported so callers that only need the dropdown (e.g. tool configs with
|
||||
* their own none/custom/audio radio) can use it directly.
|
||||
*/
|
||||
export function RecordingSelect({ value, onChange, recordings }: RecordingSelectProps) {
|
||||
return (
|
||||
<div className="space-y-2">
|
||||
<Label className="text-xs text-muted-foreground">
|
||||
Select a pre-recorded audio file to play.
|
||||
</Label>
|
||||
<Select value={value} onValueChange={onChange}>
|
||||
<SelectTrigger className="w-full">
|
||||
<SelectValue placeholder="Select a recording" />
|
||||
</SelectTrigger>
|
||||
<SelectContent>
|
||||
{recordings.length === 0 ? (
|
||||
<SelectItem value="__empty__" disabled>
|
||||
No recordings available
|
||||
</SelectItem>
|
||||
) : (
|
||||
recordings.map((r) => (
|
||||
<SelectItem key={r.recording_id} value={r.recording_id}>
|
||||
<span className="truncate">
|
||||
{(r.metadata?.original_filename as string) || r.recording_id}
|
||||
</span>
|
||||
{r.transcript && (
|
||||
<span className="text-xs text-muted-foreground ml-2 truncate">
|
||||
— {r.transcript}
|
||||
</span>
|
||||
)}
|
||||
</SelectItem>
|
||||
))
|
||||
)}
|
||||
</SelectContent>
|
||||
</Select>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
|
@ -4,6 +4,7 @@ import { useCallback, useEffect, useState } from 'react';
|
|||
|
||||
import { useWorkflow, useWorkflowOptional } from "@/app/workflow/[workflowId]/contexts/WorkflowContext";
|
||||
import { useWorkflowStore } from "@/app/workflow/[workflowId]/stores/workflowStore";
|
||||
import { TextOrAudioInput } from "@/components/flow/TextOrAudioInput";
|
||||
import { Button } from "@/components/ui/button";
|
||||
import { Dialog, DialogContent, DialogFooter, DialogHeader, DialogTitle } from "@/components/ui/dialog";
|
||||
import { Input } from "@/components/ui/input";
|
||||
|
|
@ -24,9 +25,12 @@ interface EdgeDetailsDialogProps {
|
|||
|
||||
const EdgeDetailsDialog = ({ open, onOpenChange, data, onSave }: EdgeDetailsDialogProps) => {
|
||||
const readOnly = useWorkflowOptional()?.readOnly ?? false;
|
||||
const { recordings } = useWorkflow();
|
||||
const [condition, setCondition] = useState(data?.condition ?? '');
|
||||
const [label, setLabel] = useState(data?.label ?? '');
|
||||
const [transitionSpeech, setTransitionSpeech] = useState(data?.transition_speech ?? '');
|
||||
const [transitionSpeechType, setTransitionSpeechType] = useState<'text' | 'audio'>(data?.transition_speech_type ?? 'text');
|
||||
const [transitionSpeechRecordingId, setTransitionSpeechRecordingId] = useState(data?.transition_speech_recording_id ?? '');
|
||||
|
||||
// Update form state when data changes (e.g., from undo/redo)
|
||||
useEffect(() => {
|
||||
|
|
@ -34,13 +38,21 @@ const EdgeDetailsDialog = ({ open, onOpenChange, data, onSave }: EdgeDetailsDial
|
|||
setCondition(data?.condition ?? '');
|
||||
setLabel(data?.label ?? '');
|
||||
setTransitionSpeech(data?.transition_speech ?? '');
|
||||
setTransitionSpeechType(data?.transition_speech_type ?? 'text');
|
||||
setTransitionSpeechRecordingId(data?.transition_speech_recording_id ?? '');
|
||||
}
|
||||
}, [data, open]);
|
||||
|
||||
const handleSave = useCallback(() => {
|
||||
onSave({ condition: condition, label: label, transition_speech: transitionSpeech || undefined });
|
||||
onSave({
|
||||
condition,
|
||||
label,
|
||||
transition_speech: transitionSpeechType === 'text' ? (transitionSpeech || undefined) : undefined,
|
||||
transition_speech_type: transitionSpeechType,
|
||||
transition_speech_recording_id: transitionSpeechType === 'audio' ? (transitionSpeechRecordingId || undefined) : undefined,
|
||||
});
|
||||
onOpenChange(false);
|
||||
}, [condition, label, transitionSpeech, onSave, onOpenChange]);
|
||||
}, [condition, label, transitionSpeech, transitionSpeechType, transitionSpeechRecordingId, onSave, onOpenChange]);
|
||||
|
||||
// Handle Cmd+S / Ctrl+S keyboard shortcut to save
|
||||
useEffect(() => {
|
||||
|
|
@ -99,18 +111,28 @@ const EdgeDetailsDialog = ({ open, onOpenChange, data, onSave }: EdgeDetailsDial
|
|||
<div className="grid gap-2">
|
||||
<Label>Transition Speech</Label>
|
||||
<Label className="text-xs text-muted-foreground">
|
||||
Optional text the assistant will speak right before transitioning to the node.
|
||||
This text will not be attached in Conversation Context. Use this as simple filler to reduce latency.
|
||||
Optional text or audio the assistant will play right before transitioning to the node.
|
||||
This will not be attached in Conversation Context. Use this as simple filler to reduce latency.
|
||||
</Label>
|
||||
<div className="flex items-start gap-2 rounded-md bg-amber-50 p-2 text-xs text-amber-700 border border-amber-200">
|
||||
<AlertCircle className="h-3.5 w-3.5 mt-0.5 shrink-0" />
|
||||
<span>This text is spoken as-is. For multilingual workflows, choose your phrasing carefully.</span>
|
||||
</div>
|
||||
<Textarea
|
||||
value={transitionSpeech}
|
||||
placeholder="e.g. Let me transfer you to our billing department..."
|
||||
onChange={(e) => setTransitionSpeech(e.target.value)}
|
||||
/>
|
||||
<TextOrAudioInput
|
||||
type={transitionSpeechType}
|
||||
onTypeChange={setTransitionSpeechType}
|
||||
recordingId={transitionSpeechRecordingId}
|
||||
onRecordingIdChange={setTransitionSpeechRecordingId}
|
||||
recordings={recordings ?? []}
|
||||
>
|
||||
<>
|
||||
<div className="flex items-start gap-2 rounded-md bg-amber-50 p-2 text-xs text-amber-700 border border-amber-200">
|
||||
<AlertCircle className="h-3.5 w-3.5 mt-0.5 shrink-0" />
|
||||
<span>This text is spoken as-is. For multilingual workflows, choose your phrasing carefully.</span>
|
||||
</div>
|
||||
<Textarea
|
||||
value={transitionSpeech}
|
||||
placeholder="e.g. Let me transfer you to our billing department..."
|
||||
onChange={(e) => setTransitionSpeech(e.target.value)}
|
||||
/>
|
||||
</>
|
||||
</TextOrAudioInput>
|
||||
</div>
|
||||
</div>
|
||||
<DialogFooter>
|
||||
|
|
|
|||
|
|
@ -8,6 +8,7 @@ import type { RecordingResponseSchema } from "@/client/types.gen";
|
|||
import { DocumentBadges } from "@/components/flow/DocumentBadges";
|
||||
import { DocumentSelector } from "@/components/flow/DocumentSelector";
|
||||
import { MentionTextarea } from "@/components/flow/MentionTextarea";
|
||||
import { TextOrAudioInput } from "@/components/flow/TextOrAudioInput";
|
||||
import { ToolBadges } from "@/components/flow/ToolBadges";
|
||||
import { ToolSelector } from "@/components/flow/ToolSelector";
|
||||
import { ExtractionVariable, FlowNodeData } from "@/components/flow/types";
|
||||
|
|
@ -26,8 +27,12 @@ import { useNodeHandlers } from "./common/useNodeHandlers";
|
|||
|
||||
interface StartCallEditFormProps {
|
||||
nodeData: FlowNodeData;
|
||||
greetingType: 'text' | 'audio';
|
||||
setGreetingType: (value: 'text' | 'audio') => void;
|
||||
greeting: string;
|
||||
setGreeting: (value: string) => void;
|
||||
greetingRecordingId: string;
|
||||
setGreetingRecordingId: (value: string) => void;
|
||||
prompt: string;
|
||||
setPrompt: (value: string) => void;
|
||||
name: string;
|
||||
|
|
@ -73,7 +78,9 @@ export const StartCall = memo(({ data, selected, id }: StartCallNodeProps) => {
|
|||
const { saveWorkflow, tools, documents, recordings } = useWorkflow();
|
||||
|
||||
// Form state
|
||||
const [greetingType, setGreetingType] = useState<'text' | 'audio'>(data.greeting_type ?? "text");
|
||||
const [greeting, setGreeting] = useState(data.greeting ?? "");
|
||||
const [greetingRecordingId, setGreetingRecordingId] = useState(data.greeting_recording_id ?? "");
|
||||
const [prompt, setPrompt] = useState(data.prompt ?? "");
|
||||
const [name, setName] = useState(data.name);
|
||||
const [allowInterrupt, setAllowInterrupt] = useState(data.allow_interrupt ?? true);
|
||||
|
|
@ -109,7 +116,9 @@ export const StartCall = memo(({ data, selected, id }: StartCallNodeProps) => {
|
|||
|
||||
handleSaveNodeData({
|
||||
...data,
|
||||
greeting: greeting || undefined,
|
||||
greeting_type: greetingType,
|
||||
greeting: greetingType === 'text' ? (greeting || undefined) : undefined,
|
||||
greeting_recording_id: greetingType === 'audio' ? (greetingRecordingId || undefined) : undefined,
|
||||
prompt,
|
||||
name,
|
||||
allow_interrupt: allowInterrupt,
|
||||
|
|
@ -132,7 +141,9 @@ export const StartCall = memo(({ data, selected, id }: StartCallNodeProps) => {
|
|||
// Reset form state when dialog opens
|
||||
const handleOpenChange = (newOpen: boolean) => {
|
||||
if (newOpen) {
|
||||
setGreetingType(data.greeting_type ?? "text");
|
||||
setGreeting(data.greeting ?? "");
|
||||
setGreetingRecordingId(data.greeting_recording_id ?? "");
|
||||
setPrompt(data.prompt ?? "");
|
||||
setName(data.name);
|
||||
setAllowInterrupt(data.allow_interrupt ?? true);
|
||||
|
|
@ -154,7 +165,9 @@ export const StartCall = memo(({ data, selected, id }: StartCallNodeProps) => {
|
|||
// Update form state when data changes (e.g., from undo/redo)
|
||||
useEffect(() => {
|
||||
if (open) {
|
||||
setGreetingType(data.greeting_type ?? "text");
|
||||
setGreeting(data.greeting ?? "");
|
||||
setGreetingRecordingId(data.greeting_recording_id ?? "");
|
||||
setPrompt(data.prompt ?? "");
|
||||
setName(data.name);
|
||||
setAllowInterrupt(data.allow_interrupt ?? true);
|
||||
|
|
@ -247,8 +260,12 @@ export const StartCall = memo(({ data, selected, id }: StartCallNodeProps) => {
|
|||
{open && (
|
||||
<StartCallEditForm
|
||||
nodeData={data}
|
||||
greetingType={greetingType}
|
||||
setGreetingType={setGreetingType}
|
||||
greeting={greeting}
|
||||
setGreeting={setGreeting}
|
||||
greetingRecordingId={greetingRecordingId}
|
||||
setGreetingRecordingId={setGreetingRecordingId}
|
||||
prompt={prompt}
|
||||
setPrompt={setPrompt}
|
||||
name={name}
|
||||
|
|
@ -288,8 +305,12 @@ export const StartCall = memo(({ data, selected, id }: StartCallNodeProps) => {
|
|||
});
|
||||
|
||||
const StartCallEditForm = ({
|
||||
greetingType,
|
||||
setGreetingType,
|
||||
greeting,
|
||||
setGreeting,
|
||||
greetingRecordingId,
|
||||
setGreetingRecordingId,
|
||||
prompt,
|
||||
setPrompt,
|
||||
name,
|
||||
|
|
@ -362,15 +383,22 @@ const StartCallEditForm = ({
|
|||
|
||||
<Label>Greeting</Label>
|
||||
<Label className="text-xs text-muted-foreground">
|
||||
Optional greeting message played via TTS when the call starts. If set, this will be spoken directly instead of generating a response from the LLM. Supports template variables like {"{{variable_name}}"}.
|
||||
Optional greeting played when the call starts. Choose between a text message (spoken via TTS) or a pre-recorded audio file.
|
||||
</Label>
|
||||
<MentionTextarea
|
||||
value={greeting}
|
||||
onChange={setGreeting}
|
||||
className="min-h-[60px] max-h-[200px] resize-none overflow-y-auto"
|
||||
placeholder="e.g. Hello {{first_name}}, this is Sarah calling from Acme Corp."
|
||||
<TextOrAudioInput
|
||||
type={greetingType}
|
||||
onTypeChange={setGreetingType}
|
||||
recordingId={greetingRecordingId}
|
||||
onRecordingIdChange={setGreetingRecordingId}
|
||||
recordings={recordings}
|
||||
/>
|
||||
>
|
||||
<Textarea
|
||||
value={greeting}
|
||||
onChange={(e) => setGreeting(e.target.value)}
|
||||
className="min-h-[60px] max-h-[200px] resize-none overflow-y-auto"
|
||||
placeholder="e.g. Hello {{first_name}}, this is Sarah calling from Acme Corp."
|
||||
/>
|
||||
</TextOrAudioInput>
|
||||
|
||||
<Label>Prompt</Label>
|
||||
<Label className="text-xs text-muted-foreground">
|
||||
|
|
|
|||
|
|
@ -24,6 +24,8 @@ export type FlowNodeData = {
|
|||
extraction_variables?: ExtractionVariable[];
|
||||
add_global_prompt?: boolean;
|
||||
greeting?: string;
|
||||
greeting_type?: 'text' | 'audio';
|
||||
greeting_recording_id?: string;
|
||||
wait_for_user_greeting?: boolean;
|
||||
detect_voicemail?: boolean;
|
||||
delayed_start?: boolean;
|
||||
|
|
@ -79,6 +81,8 @@ export type FlowEdgeData = {
|
|||
condition: string;
|
||||
label: string;
|
||||
transition_speech?: string;
|
||||
transition_speech_type?: 'text' | 'audio';
|
||||
transition_speech_recording_id?: string;
|
||||
invalid?: boolean;
|
||||
validationMessage?: string | null;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -2,6 +2,7 @@
|
|||
|
||||
import type { Team } from "@stackframe/stack";
|
||||
import {
|
||||
AudioLines,
|
||||
Brain,
|
||||
ChevronLeft,
|
||||
ChevronRight,
|
||||
|
|
@ -135,6 +136,11 @@ export function AppSidebar() {
|
|||
url: "/files",
|
||||
icon: Database,
|
||||
},
|
||||
{
|
||||
title: "Recordings",
|
||||
url: "/recordings",
|
||||
icon: AudioLines,
|
||||
},
|
||||
// {
|
||||
// title: "Integrations",
|
||||
// url: "/integrations",
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue