mirror of
https://github.com/dograh-hq/dograh.git
synced 2026-07-01 08:59:46 +02:00
feat: better interrupt strategies (#479)
* chore: drain active calls before rolling updates * Use provisional VAD interruption strategy * feat: wire provisional VAD configuration * chore: refactor user turn strategies * chore: bump pipecat
This commit is contained in:
parent
962d5afa12
commit
6937e01b49
12 changed files with 645 additions and 193 deletions
|
|
@ -6,7 +6,15 @@ import { Input } from "@/components/ui/input";
|
|||
import { Label } from "@/components/ui/label";
|
||||
import { Select, SelectContent, SelectItem, SelectTrigger, SelectValue } from "@/components/ui/select";
|
||||
import { Switch } from "@/components/ui/switch";
|
||||
import { AmbientNoiseConfiguration, TurnStopStrategy, WorkflowConfigurations } from "@/types/workflow-configurations";
|
||||
import {
|
||||
AmbientNoiseConfiguration,
|
||||
DEFAULT_PROVISIONAL_VAD_PAUSE_SECS,
|
||||
DEFAULT_TURN_START_MIN_WORDS,
|
||||
TURN_START_STRATEGY_OPTIONS,
|
||||
TurnStartStrategy,
|
||||
TurnStopStrategy,
|
||||
WorkflowConfigurations,
|
||||
} from "@/types/workflow-configurations";
|
||||
|
||||
interface ConfigurationsDialogProps {
|
||||
open: boolean;
|
||||
|
|
@ -41,6 +49,15 @@ export const ConfigurationsDialog = ({
|
|||
const [smartTurnStopSecs, setSmartTurnStopSecs] = useState<number>(
|
||||
workflowConfigurations?.smart_turn_stop_secs || 2 // Default 2 seconds
|
||||
);
|
||||
const [turnStartStrategy, setTurnStartStrategy] = useState<TurnStartStrategy>(
|
||||
workflowConfigurations?.turn_start_strategy || 'default'
|
||||
);
|
||||
const [turnStartMinWords, setTurnStartMinWords] = useState<number>(
|
||||
workflowConfigurations?.turn_start_min_words || DEFAULT_TURN_START_MIN_WORDS
|
||||
);
|
||||
const [provisionalVadPauseSecs, setProvisionalVadPauseSecs] = useState<number>(
|
||||
workflowConfigurations?.provisional_vad_pause_secs || DEFAULT_PROVISIONAL_VAD_PAUSE_SECS
|
||||
);
|
||||
const [turnStopStrategy, setTurnStopStrategy] = useState<TurnStopStrategy>(
|
||||
workflowConfigurations?.turn_stop_strategy || 'transcription'
|
||||
);
|
||||
|
|
@ -48,6 +65,9 @@ export const ConfigurationsDialog = ({
|
|||
workflowConfigurations?.context_compaction_enabled ?? false
|
||||
);
|
||||
const [isSaving, setIsSaving] = useState(false);
|
||||
const selectedTurnStartStrategy = TURN_START_STRATEGY_OPTIONS.find(
|
||||
(option) => option.value === turnStartStrategy
|
||||
);
|
||||
|
||||
const handleSave = async () => {
|
||||
setIsSaving(true);
|
||||
|
|
@ -57,6 +77,9 @@ export const ConfigurationsDialog = ({
|
|||
max_call_duration: maxCallDuration,
|
||||
max_user_idle_timeout: maxUserIdleTimeout,
|
||||
smart_turn_stop_secs: smartTurnStopSecs,
|
||||
turn_start_strategy: turnStartStrategy,
|
||||
turn_start_min_words: turnStartMinWords,
|
||||
provisional_vad_pause_secs: provisionalVadPauseSecs,
|
||||
turn_stop_strategy: turnStopStrategy,
|
||||
context_compaction_enabled: contextCompactionEnabled,
|
||||
}, name);
|
||||
|
|
@ -76,6 +99,9 @@ export const ConfigurationsDialog = ({
|
|||
setMaxCallDuration(workflowConfigurations?.max_call_duration || 600);
|
||||
setMaxUserIdleTimeout(workflowConfigurations?.max_user_idle_timeout || 10);
|
||||
setSmartTurnStopSecs(workflowConfigurations?.smart_turn_stop_secs || 2);
|
||||
setTurnStartStrategy(workflowConfigurations?.turn_start_strategy || 'default');
|
||||
setTurnStartMinWords(workflowConfigurations?.turn_start_min_words || DEFAULT_TURN_START_MIN_WORDS);
|
||||
setProvisionalVadPauseSecs(workflowConfigurations?.provisional_vad_pause_secs || DEFAULT_PROVISIONAL_VAD_PAUSE_SECS);
|
||||
setTurnStopStrategy(workflowConfigurations?.turn_stop_strategy || 'transcription');
|
||||
setContextCompactionEnabled(workflowConfigurations?.context_compaction_enabled ?? false);
|
||||
}
|
||||
|
|
@ -218,6 +244,80 @@ export const ConfigurationsDialog = ({
|
|||
</p>
|
||||
</div>
|
||||
)}
|
||||
|
||||
<div className="space-y-2">
|
||||
<Label htmlFor="turn_start_strategy" className="text-xs">
|
||||
Interruption Strategy
|
||||
</Label>
|
||||
<Select
|
||||
value={turnStartStrategy}
|
||||
onValueChange={(value: TurnStartStrategy) => setTurnStartStrategy(value)}
|
||||
>
|
||||
<SelectTrigger id="turn_start_strategy">
|
||||
<SelectValue placeholder="Select strategy" />
|
||||
</SelectTrigger>
|
||||
<SelectContent>
|
||||
{TURN_START_STRATEGY_OPTIONS.map((option) => (
|
||||
<SelectItem key={option.value} value={option.value}>
|
||||
{option.label}
|
||||
</SelectItem>
|
||||
))}
|
||||
</SelectContent>
|
||||
</Select>
|
||||
<p className="text-xs text-muted-foreground">
|
||||
{selectedTurnStartStrategy?.description}
|
||||
</p>
|
||||
</div>
|
||||
|
||||
{turnStartStrategy === 'min_words' && (
|
||||
<div className="space-y-2">
|
||||
<Label htmlFor="turn_start_min_words" className="text-xs">
|
||||
Minimum Words Before Interruption
|
||||
</Label>
|
||||
<Input
|
||||
id="turn_start_min_words"
|
||||
type="number"
|
||||
step="1"
|
||||
min="1"
|
||||
max="10"
|
||||
value={turnStartMinWords}
|
||||
onChange={(e) => {
|
||||
const value = parseInt(e.target.value);
|
||||
if (!isNaN(value) && value >= 1) {
|
||||
setTurnStartMinWords(value);
|
||||
}
|
||||
}}
|
||||
/>
|
||||
<p className="text-xs text-muted-foreground">
|
||||
Number of transcribed words needed to interrupt while the bot is speaking. Default: {DEFAULT_TURN_START_MIN_WORDS}
|
||||
</p>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{turnStartStrategy === 'provisional_vad' && (
|
||||
<div className="space-y-2">
|
||||
<Label htmlFor="provisional_vad_pause_secs" className="text-xs">
|
||||
Provisional Pause (seconds)
|
||||
</Label>
|
||||
<Input
|
||||
id="provisional_vad_pause_secs"
|
||||
type="number"
|
||||
step="0.1"
|
||||
min="0.1"
|
||||
max="5"
|
||||
value={provisionalVadPauseSecs}
|
||||
onChange={(e) => {
|
||||
const value = parseFloat(e.target.value);
|
||||
if (!isNaN(value) && value >= 0.1) {
|
||||
setProvisionalVadPauseSecs(value);
|
||||
}
|
||||
}}
|
||||
/>
|
||||
<p className="text-xs text-muted-foreground">
|
||||
Seconds to pause bot audio while waiting for transcript confirmation. Default: {DEFAULT_PROVISIONAL_VAD_PAUSE_SECS}
|
||||
</p>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
|
||||
{/* Context Management Section */}
|
||||
|
|
@ -306,4 +406,3 @@ export const ConfigurationsDialog = ({
|
|||
</Dialog>
|
||||
);
|
||||
};
|
||||
|
||||
|
|
|
|||
|
|
@ -52,6 +52,7 @@ interface WorkflowRunResponse {
|
|||
|
||||
const RUN_SHELL_HEIGHT_CLASS = "h-[calc(100svh-49px)] min-h-[calc(100svh-49px)] max-h-[calc(100svh-49px)]";
|
||||
const WAVEFORM_BAR_COUNT = 96;
|
||||
type SplitTrackPlaybackMode = 'both' | 'user' | 'bot';
|
||||
|
||||
function formatDuration(seconds?: number | null) {
|
||||
if (seconds == null || Number.isNaN(seconds)) return 'N/A';
|
||||
|
|
@ -124,19 +125,38 @@ async function loadWaveformPeaks(url: string) {
|
|||
}
|
||||
}
|
||||
|
||||
function getAudioDuration(audio: HTMLAudioElement | null) {
|
||||
return audio && Number.isFinite(audio.duration) ? audio.duration : 0;
|
||||
}
|
||||
|
||||
function getAudioTimelineState(audios: HTMLAudioElement[]) {
|
||||
const duration = Math.max(0, ...audios.map((audio) => getAudioDuration(audio)));
|
||||
const currentTime = Math.max(0, ...audios.map((audio) => audio.currentTime));
|
||||
|
||||
return { duration, currentTime };
|
||||
}
|
||||
|
||||
function syncAudioCurrentTime(audio: HTMLAudioElement, startTime: number) {
|
||||
const duration = getAudioDuration(audio);
|
||||
audio.currentTime = Math.min(startTime, duration || startTime);
|
||||
}
|
||||
|
||||
function WaveformLane({
|
||||
peaks,
|
||||
track,
|
||||
position,
|
||||
isActive,
|
||||
}: {
|
||||
peaks: number[] | null;
|
||||
track: 'user' | 'bot';
|
||||
position: 'top' | 'bottom';
|
||||
isActive: boolean;
|
||||
}) {
|
||||
return (
|
||||
<div
|
||||
className={cn(
|
||||
'absolute left-3 right-3 flex gap-0.5',
|
||||
isActive ? 'opacity-85' : 'opacity-25',
|
||||
position === 'top' ? 'top-5 h-12 items-end' : 'bottom-5 h-12 items-start'
|
||||
)}
|
||||
>
|
||||
|
|
@ -145,7 +165,7 @@ function WaveformLane({
|
|||
<span
|
||||
key={`${track}-${index}`}
|
||||
className={cn(
|
||||
'min-h-1 flex-1 rounded-full opacity-85',
|
||||
'min-h-1 flex-1 rounded-full',
|
||||
track === 'user' ? 'bg-sky-500' : 'bg-emerald-500'
|
||||
)}
|
||||
style={{ height: `${Math.round(peak * 100)}%` }}
|
||||
|
|
@ -178,6 +198,21 @@ function SplitTracksSection({
|
|||
const [isLoading, setIsLoading] = useState(false);
|
||||
const [isPlaying, setIsPlaying] = useState(false);
|
||||
const [progress, setProgress] = useState(0);
|
||||
const [playbackMode, setPlaybackMode] = useState<SplitTrackPlaybackMode>('both');
|
||||
|
||||
const getPlaybackAudios = (mode: SplitTrackPlaybackMode) => {
|
||||
const audios: HTMLAudioElement[] = [];
|
||||
|
||||
if (mode !== 'bot' && userAudioRef.current) {
|
||||
audios.push(userAudioRef.current);
|
||||
}
|
||||
|
||||
if (mode !== 'user' && botAudioRef.current) {
|
||||
audios.push(botAudioRef.current);
|
||||
}
|
||||
|
||||
return audios;
|
||||
};
|
||||
|
||||
useEffect(() => {
|
||||
let isActive = true;
|
||||
|
|
@ -190,6 +225,7 @@ function SplitTracksSection({
|
|||
setPeaks({ user: null, bot: null });
|
||||
setIsPlaying(false);
|
||||
setProgress(0);
|
||||
setPlaybackMode('both');
|
||||
setIsLoading(true);
|
||||
|
||||
async function loadTracks() {
|
||||
|
|
@ -234,12 +270,17 @@ function SplitTracksSection({
|
|||
|
||||
let frameId: number;
|
||||
const updateProgress = () => {
|
||||
const userAudio = userAudioRef.current;
|
||||
const botAudio = botAudioRef.current;
|
||||
const userDuration = Number.isFinite(userAudio?.duration) ? userAudio?.duration ?? 0 : 0;
|
||||
const botDuration = Number.isFinite(botAudio?.duration) ? botAudio?.duration ?? 0 : 0;
|
||||
const duration = Math.max(userDuration, botDuration);
|
||||
const currentTime = Math.max(userAudio?.currentTime ?? 0, botAudio?.currentTime ?? 0);
|
||||
const activeAudios: HTMLAudioElement[] = [];
|
||||
|
||||
if (playbackMode !== 'bot' && userAudioRef.current) {
|
||||
activeAudios.push(userAudioRef.current);
|
||||
}
|
||||
|
||||
if (playbackMode !== 'user' && botAudioRef.current) {
|
||||
activeAudios.push(botAudioRef.current);
|
||||
}
|
||||
|
||||
const { duration, currentTime } = getAudioTimelineState(activeAudios);
|
||||
|
||||
setProgress(duration > 0 ? Math.min(1, currentTime / duration) : 0);
|
||||
frameId = window.requestAnimationFrame(updateProgress);
|
||||
|
|
@ -247,7 +288,7 @@ function SplitTracksSection({
|
|||
|
||||
frameId = window.requestAnimationFrame(updateProgress);
|
||||
return () => window.cancelAnimationFrame(frameId);
|
||||
}, [isPlaying]);
|
||||
}, [isPlaying, playbackMode]);
|
||||
|
||||
const pauseTracks = () => {
|
||||
userAudioRef.current?.pause();
|
||||
|
|
@ -256,38 +297,68 @@ function SplitTracksSection({
|
|||
};
|
||||
|
||||
const handleTrackEnded = () => {
|
||||
const userAudio = userAudioRef.current;
|
||||
const botAudio = botAudioRef.current;
|
||||
const userDone = !userAudio || userAudio.ended;
|
||||
const botDone = !botAudio || botAudio.ended;
|
||||
const activeAudios = getPlaybackAudios(playbackMode);
|
||||
const activeTracksDone = activeAudios.length > 0 && activeAudios.every((audio) => audio.ended);
|
||||
|
||||
if (userDone && botDone) {
|
||||
if (activeTracksDone) {
|
||||
setIsPlaying(false);
|
||||
setProgress(1);
|
||||
}
|
||||
};
|
||||
|
||||
const handlePlaybackModeChange = async (nextMode: SplitTrackPlaybackMode) => {
|
||||
if (nextMode === playbackMode) return;
|
||||
|
||||
const { currentTime } = getAudioTimelineState(getPlaybackAudios(playbackMode));
|
||||
const nextAudios = getPlaybackAudios(nextMode);
|
||||
const { duration } = getAudioTimelineState(nextAudios);
|
||||
const startTime = duration > 0 && currentTime >= duration - 0.1 ? 0 : currentTime;
|
||||
|
||||
userAudioRef.current?.pause();
|
||||
botAudioRef.current?.pause();
|
||||
nextAudios.forEach((audio) => syncAudioCurrentTime(audio, startTime));
|
||||
setPlaybackMode(nextMode);
|
||||
setProgress(duration > 0 ? Math.min(1, startTime / duration) : 0);
|
||||
|
||||
if (!isPlaying) return;
|
||||
|
||||
if (nextAudios.length === 0) {
|
||||
setIsPlaying(false);
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
await Promise.all(nextAudios.map((audio) => audio.play()));
|
||||
setIsPlaying(true);
|
||||
} catch (error) {
|
||||
pauseTracks();
|
||||
console.error('Error switching split track playback:', error);
|
||||
}
|
||||
};
|
||||
|
||||
const handleTrackButtonClick = (track: 'user' | 'bot') => {
|
||||
const nextMode = playbackMode === track ? 'both' : track;
|
||||
void handlePlaybackModeChange(nextMode);
|
||||
};
|
||||
|
||||
const togglePlayback = async () => {
|
||||
const userAudio = userAudioRef.current;
|
||||
const botAudio = botAudioRef.current;
|
||||
if (!userAudio || !botAudio || !signedUrls.user || !signedUrls.bot) return;
|
||||
const playbackAudios = getPlaybackAudios(playbackMode);
|
||||
if (!canPlay || playbackAudios.length === 0) return;
|
||||
|
||||
if (isPlaying) {
|
||||
pauseTracks();
|
||||
return;
|
||||
}
|
||||
|
||||
const userDuration = Number.isFinite(userAudio.duration) ? userAudio.duration : 0;
|
||||
const botDuration = Number.isFinite(botAudio.duration) ? botAudio.duration : 0;
|
||||
const duration = Math.max(userDuration, botDuration);
|
||||
const currentTime = Math.max(userAudio.currentTime, botAudio.currentTime);
|
||||
const { duration, currentTime } = getAudioTimelineState(playbackAudios);
|
||||
const startTime = duration > 0 && currentTime >= duration - 0.1 ? 0 : currentTime;
|
||||
|
||||
userAudio.currentTime = Math.min(startTime, userDuration || startTime);
|
||||
botAudio.currentTime = Math.min(startTime, botDuration || startTime);
|
||||
userAudioRef.current?.pause();
|
||||
botAudioRef.current?.pause();
|
||||
playbackAudios.forEach((audio) => syncAudioCurrentTime(audio, startTime));
|
||||
|
||||
try {
|
||||
await Promise.all([userAudio.play(), botAudio.play()]);
|
||||
await Promise.all(playbackAudios.map((audio) => audio.play()));
|
||||
setIsPlaying(true);
|
||||
} catch (error) {
|
||||
pauseTracks();
|
||||
|
|
@ -295,8 +366,16 @@ function SplitTracksSection({
|
|||
}
|
||||
};
|
||||
|
||||
const canPlay = Boolean(signedUrls.user && signedUrls.bot);
|
||||
const canPlay =
|
||||
playbackMode === 'both'
|
||||
? Boolean(signedUrls.user && signedUrls.bot)
|
||||
: playbackMode === 'user'
|
||||
? Boolean(signedUrls.user)
|
||||
: Boolean(signedUrls.bot);
|
||||
const progressPercent = Math.round(progress * 1000) / 10;
|
||||
const userTrackActive = playbackMode !== 'bot';
|
||||
const botTrackActive = playbackMode !== 'user';
|
||||
const playbackTargetLabel = playbackMode === 'both' ? 'split tracks' : `${playbackMode} track`;
|
||||
|
||||
return (
|
||||
<Card className="border-border">
|
||||
|
|
@ -319,16 +398,42 @@ function SplitTracksSection({
|
|||
</CardHeader>
|
||||
<CardContent className="space-y-4">
|
||||
<div className="flex flex-wrap items-center justify-between gap-3">
|
||||
<div className="flex items-center gap-2">
|
||||
<span className="inline-flex items-center gap-1.5 text-sm font-medium text-sky-600">
|
||||
<div className="flex items-center gap-2" role="group" aria-label="Playback tracks">
|
||||
<Button
|
||||
type="button"
|
||||
variant="outline"
|
||||
size="sm"
|
||||
aria-pressed={userTrackActive}
|
||||
aria-label={playbackMode === 'user' ? 'Play both tracks' : 'Play user track only'}
|
||||
onClick={() => handleTrackButtonClick('user')}
|
||||
className={cn(
|
||||
'gap-1.5',
|
||||
userTrackActive
|
||||
? 'border-sky-200 bg-sky-50 text-sky-700 hover:bg-sky-100 dark:border-sky-900/60 dark:bg-sky-950/30 dark:text-sky-300'
|
||||
: 'text-muted-foreground opacity-60'
|
||||
)}
|
||||
>
|
||||
<UserRound className="h-4 w-4" />
|
||||
User
|
||||
</span>
|
||||
</Button>
|
||||
<span className="h-4 w-px bg-border" />
|
||||
<span className="inline-flex items-center gap-1.5 text-sm font-medium text-emerald-600">
|
||||
<Button
|
||||
type="button"
|
||||
variant="outline"
|
||||
size="sm"
|
||||
aria-pressed={botTrackActive}
|
||||
aria-label={playbackMode === 'bot' ? 'Play both tracks' : 'Play bot track only'}
|
||||
onClick={() => handleTrackButtonClick('bot')}
|
||||
className={cn(
|
||||
'gap-1.5',
|
||||
botTrackActive
|
||||
? 'border-emerald-200 bg-emerald-50 text-emerald-700 hover:bg-emerald-100 dark:border-emerald-900/60 dark:bg-emerald-950/30 dark:text-emerald-300'
|
||||
: 'text-muted-foreground opacity-60'
|
||||
)}
|
||||
>
|
||||
<Bot className="h-4 w-4" />
|
||||
Bot
|
||||
</span>
|
||||
</Button>
|
||||
</div>
|
||||
<div className="flex items-center gap-2">
|
||||
<Button
|
||||
|
|
@ -360,15 +465,15 @@ function SplitTracksSection({
|
|||
variant={isPlaying ? 'default' : 'outline'}
|
||||
onClick={togglePlayback}
|
||||
disabled={!canPlay}
|
||||
aria-label={isPlaying ? 'Pause split tracks' : 'Play split tracks'}
|
||||
aria-label={isPlaying ? `Pause ${playbackTargetLabel}` : `Play ${playbackTargetLabel}`}
|
||||
className="h-10 w-10 shrink-0"
|
||||
>
|
||||
{isPlaying ? <Pause className="h-4 w-4" /> : <Play className="h-4 w-4" />}
|
||||
</Button>
|
||||
<div className="relative h-36 min-w-0 flex-1 overflow-hidden rounded-lg border border-border/70 bg-background">
|
||||
<div className="absolute left-3 right-3 top-1/2 h-px bg-border/80" />
|
||||
<WaveformLane peaks={peaks.user} track="user" position="top" />
|
||||
<WaveformLane peaks={peaks.bot} track="bot" position="bottom" />
|
||||
<WaveformLane peaks={peaks.user} track="user" position="top" isActive={userTrackActive} />
|
||||
<WaveformLane peaks={peaks.bot} track="bot" position="bottom" isActive={botTrackActive} />
|
||||
{canPlay && (
|
||||
<div className="pointer-events-none absolute inset-x-3 inset-y-3">
|
||||
<div
|
||||
|
|
|
|||
|
|
@ -45,8 +45,12 @@ import { useAuth } from "@/lib/auth";
|
|||
import logger from "@/lib/logger";
|
||||
import {
|
||||
type AmbientNoiseConfiguration,
|
||||
DEFAULT_PROVISIONAL_VAD_PAUSE_SECS,
|
||||
DEFAULT_TURN_START_MIN_WORDS,
|
||||
DEFAULT_VOICEMAIL_DETECTION_CONFIGURATION,
|
||||
DEFAULT_WORKFLOW_CONFIGURATIONS,
|
||||
TURN_START_STRATEGY_OPTIONS,
|
||||
type TurnStartStrategy,
|
||||
type TurnStopStrategy,
|
||||
type VoicemailDetectionConfiguration,
|
||||
type WorkflowConfigurations,
|
||||
|
|
@ -280,6 +284,15 @@ function GeneralSection({
|
|||
const [maxCallDuration, setMaxCallDuration] = useState(workflowConfigurations.max_call_duration || 600);
|
||||
const [maxUserIdleTimeout, setMaxUserIdleTimeout] = useState(workflowConfigurations.max_user_idle_timeout || 10);
|
||||
const [smartTurnStopSecs, setSmartTurnStopSecs] = useState(workflowConfigurations.smart_turn_stop_secs || 2);
|
||||
const [turnStartStrategy, setTurnStartStrategy] = useState<TurnStartStrategy>(
|
||||
workflowConfigurations.turn_start_strategy || "default",
|
||||
);
|
||||
const [turnStartMinWords, setTurnStartMinWords] = useState(
|
||||
workflowConfigurations.turn_start_min_words || DEFAULT_TURN_START_MIN_WORDS,
|
||||
);
|
||||
const [provisionalVadPauseSecs, setProvisionalVadPauseSecs] = useState(
|
||||
workflowConfigurations.provisional_vad_pause_secs || DEFAULT_PROVISIONAL_VAD_PAUSE_SECS,
|
||||
);
|
||||
const [turnStopStrategy, setTurnStopStrategy] = useState<TurnStopStrategy>(
|
||||
workflowConfigurations.turn_stop_strategy || "transcription",
|
||||
);
|
||||
|
|
@ -291,6 +304,9 @@ function GeneralSection({
|
|||
const [audioUploadError, setAudioUploadError] = useState<string | null>(null);
|
||||
const ambientFileInputRef = useRef<HTMLInputElement>(null);
|
||||
const { playingId, toggle: togglePlayback } = useAudioPlayback();
|
||||
const selectedTurnStartStrategy = TURN_START_STRATEGY_OPTIONS.find(
|
||||
(option) => option.value === turnStartStrategy,
|
||||
);
|
||||
|
||||
const isDirty = useMemo(() => {
|
||||
const initAmbient = workflowConfigurations.ambient_noise_configuration || DEFAULT_AMBIENT_NOISE_CONFIG;
|
||||
|
|
@ -300,10 +316,13 @@ function GeneralSection({
|
|||
maxCallDuration !== (workflowConfigurations.max_call_duration || 600) ||
|
||||
maxUserIdleTimeout !== (workflowConfigurations.max_user_idle_timeout || 10) ||
|
||||
smartTurnStopSecs !== (workflowConfigurations.smart_turn_stop_secs || 2) ||
|
||||
turnStartStrategy !== (workflowConfigurations.turn_start_strategy || "default") ||
|
||||
turnStartMinWords !== (workflowConfigurations.turn_start_min_words || DEFAULT_TURN_START_MIN_WORDS) ||
|
||||
provisionalVadPauseSecs !== (workflowConfigurations.provisional_vad_pause_secs || DEFAULT_PROVISIONAL_VAD_PAUSE_SECS) ||
|
||||
turnStopStrategy !== (workflowConfigurations.turn_stop_strategy || "transcription") ||
|
||||
contextCompactionEnabled !== (workflowConfigurations.context_compaction_enabled ?? false)
|
||||
);
|
||||
}, [name, workflowName, ambientNoiseConfig, maxCallDuration, maxUserIdleTimeout, smartTurnStopSecs, turnStopStrategy, contextCompactionEnabled, workflowConfigurations]);
|
||||
}, [name, workflowName, ambientNoiseConfig, maxCallDuration, maxUserIdleTimeout, smartTurnStopSecs, turnStartStrategy, turnStartMinWords, provisionalVadPauseSecs, turnStopStrategy, contextCompactionEnabled, workflowConfigurations]);
|
||||
|
||||
useUnsavedChanges("general", isDirty);
|
||||
|
||||
|
|
@ -375,6 +394,9 @@ function GeneralSection({
|
|||
max_call_duration: maxCallDuration,
|
||||
max_user_idle_timeout: maxUserIdleTimeout,
|
||||
smart_turn_stop_secs: smartTurnStopSecs,
|
||||
turn_start_strategy: turnStartStrategy,
|
||||
turn_start_min_words: turnStartMinWords,
|
||||
provisional_vad_pause_secs: provisionalVadPauseSecs,
|
||||
turn_stop_strategy: turnStopStrategy,
|
||||
context_compaction_enabled: contextCompactionEnabled,
|
||||
},
|
||||
|
|
@ -589,6 +611,71 @@ function GeneralSection({
|
|||
</p>
|
||||
</div>
|
||||
)}
|
||||
<div className="space-y-2">
|
||||
<Label htmlFor="turn_start_strategy" className="text-xs">Interruption Strategy</Label>
|
||||
<Select
|
||||
value={turnStartStrategy}
|
||||
onValueChange={(value: TurnStartStrategy) => setTurnStartStrategy(value)}
|
||||
>
|
||||
<SelectTrigger id="turn_start_strategy">
|
||||
<SelectValue placeholder="Select strategy" />
|
||||
</SelectTrigger>
|
||||
<SelectContent>
|
||||
{TURN_START_STRATEGY_OPTIONS.map((option) => (
|
||||
<SelectItem key={option.value} value={option.value}>
|
||||
{option.label}
|
||||
</SelectItem>
|
||||
))}
|
||||
</SelectContent>
|
||||
</Select>
|
||||
<p className="text-xs text-muted-foreground">
|
||||
{selectedTurnStartStrategy?.description}
|
||||
</p>
|
||||
</div>
|
||||
{turnStartStrategy === "min_words" && (
|
||||
<div className="space-y-2">
|
||||
<Label htmlFor="turn_start_min_words" className="text-xs">
|
||||
Minimum Words Before Interruption
|
||||
</Label>
|
||||
<Input
|
||||
id="turn_start_min_words"
|
||||
type="number"
|
||||
step="1"
|
||||
min="1"
|
||||
max="10"
|
||||
value={turnStartMinWords}
|
||||
onChange={(e) => {
|
||||
const value = parseInt(e.target.value);
|
||||
if (!isNaN(value) && value >= 1) setTurnStartMinWords(value);
|
||||
}}
|
||||
/>
|
||||
<p className="text-xs text-muted-foreground">
|
||||
Number of transcribed words needed to interrupt while the bot is speaking. Default: {DEFAULT_TURN_START_MIN_WORDS}
|
||||
</p>
|
||||
</div>
|
||||
)}
|
||||
{turnStartStrategy === "provisional_vad" && (
|
||||
<div className="space-y-2">
|
||||
<Label htmlFor="provisional_vad_pause_secs" className="text-xs">
|
||||
Provisional Pause (seconds)
|
||||
</Label>
|
||||
<Input
|
||||
id="provisional_vad_pause_secs"
|
||||
type="number"
|
||||
step="0.1"
|
||||
min="0.1"
|
||||
max="5"
|
||||
value={provisionalVadPauseSecs}
|
||||
onChange={(e) => {
|
||||
const value = parseFloat(e.target.value);
|
||||
if (!isNaN(value) && value >= 0.1) setProvisionalVadPauseSecs(value);
|
||||
}}
|
||||
/>
|
||||
<p className="text-xs text-muted-foreground">
|
||||
Seconds to pause bot audio while waiting for transcript confirmation. Default: {DEFAULT_PROVISIONAL_VAD_PAUSE_SECS}
|
||||
</p>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
|
||||
<Separator />
|
||||
|
|
|
|||
|
|
@ -9,6 +9,31 @@ export interface AmbientNoiseConfiguration {
|
|||
}
|
||||
|
||||
export type TurnStopStrategy = 'transcription' | 'turn_analyzer';
|
||||
export type TurnStartStrategy = 'default' | 'min_words' | 'provisional_vad';
|
||||
export const DEFAULT_TURN_START_MIN_WORDS = 3;
|
||||
export const DEFAULT_PROVISIONAL_VAD_PAUSE_SECS = 1.5;
|
||||
|
||||
export const TURN_START_STRATEGY_OPTIONS: Array<{
|
||||
value: TurnStartStrategy;
|
||||
label: string;
|
||||
description: string;
|
||||
}> = [
|
||||
{
|
||||
value: 'default',
|
||||
label: 'Default',
|
||||
description: 'Use the platform default: external STT turn signals when available, otherwise local VAD.',
|
||||
},
|
||||
{
|
||||
value: 'min_words',
|
||||
label: 'Minimum words',
|
||||
description: 'Wait for a minimum number of transcribed words before interrupting bot speech.',
|
||||
},
|
||||
{
|
||||
value: 'provisional_vad',
|
||||
label: 'Provisional VAD',
|
||||
description: 'Pause bot audio on voice activity, then confirm the interruption with transcription.',
|
||||
},
|
||||
];
|
||||
|
||||
export interface VoicemailDetectionConfiguration {
|
||||
enabled: boolean;
|
||||
|
|
@ -61,6 +86,9 @@ export interface WorkflowConfigurations {
|
|||
max_call_duration: number; // Maximum call duration in seconds
|
||||
max_user_idle_timeout: number; // Maximum user idle time in seconds
|
||||
smart_turn_stop_secs: number; // Timeout in seconds for incomplete turn detection
|
||||
turn_start_strategy: TurnStartStrategy; // Strategy for detecting start of user turn/interruption
|
||||
turn_start_min_words: number; // Minimum transcribed words required for minimum-word interruptions
|
||||
provisional_vad_pause_secs: number; // Seconds to pause bot output while awaiting transcript confirmation
|
||||
turn_stop_strategy: TurnStopStrategy; // Strategy for detecting end of user turn
|
||||
dictionary?: string; // Comma-separated words for voice agent to listen for
|
||||
voicemail_detection?: VoicemailDetectionConfiguration;
|
||||
|
|
@ -78,6 +106,9 @@ export const DEFAULT_WORKFLOW_CONFIGURATIONS: WorkflowConfigurations = {
|
|||
max_call_duration: 600, // 10 minutes
|
||||
max_user_idle_timeout: 10, // 10 seconds
|
||||
smart_turn_stop_secs: 2, // 2 seconds
|
||||
turn_start_strategy: 'default', // Default to platform-chosen user turn start detection
|
||||
turn_start_min_words: DEFAULT_TURN_START_MIN_WORDS,
|
||||
provisional_vad_pause_secs: DEFAULT_PROVISIONAL_VAD_PAUSE_SECS,
|
||||
turn_stop_strategy: 'transcription', // Default to transcription-based detection
|
||||
dictionary: ''
|
||||
};
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue