feat: persist split user and bot audio

This commit is contained in:
Abhishek Kumar 2026-06-16 15:19:49 +05:30
parent dd3f2e7323
commit 3d1886c450
30 changed files with 1322 additions and 253 deletions

View file

@ -19,6 +19,16 @@ interface UseWebSocketRTCProps {
onNodeTransition?: (transition: ConversationNodeTransitionItem) => void;
}
type ConnectionStatus = 'idle' | 'connecting' | 'connected' | 'failed';
interface CleanupConnectionOptions {
graceful?: boolean;
status?: ConnectionStatus;
closeWebSocket?: boolean;
closePeerConnection?: boolean;
delayPeerClose?: boolean;
}
const HANDLED_SERVICE_ERROR_TYPES = new Set([
'quota_exceeded',
'insufficient_credits',
@ -27,7 +37,7 @@ const HANDLED_SERVICE_ERROR_TYPES = new Set([
]);
export const useWebSocketRTC = ({ workflowId, workflowRunId, accessToken, initialContextVariables, onNodeTransition }: UseWebSocketRTCProps) => {
const [connectionStatus, setConnectionStatus] = useState<'idle' | 'connecting' | 'connected' | 'failed'>('idle');
const [connectionStatus, setConnectionStatus] = useState<ConnectionStatus>('idle');
const [connectionActive, setConnectionActive] = useState(false);
const [isCompleted, setIsCompleted] = useState(false);
const [apiKeyModalOpen, setApiKeyModalOpen] = useState(false);
@ -62,11 +72,22 @@ export const useWebSocketRTC = ({ workflowId, workflowRunId, accessToken, initia
const wsRef = useRef<WebSocket | null>(null);
const timeStartRef = useRef<number | null>(null);
const onNodeTransitionRef = useRef(onNodeTransition);
const connectionActiveRef = useRef(connectionActive);
const isCompletedRef = useRef(isCompleted);
const gracefulDisconnectRef = useRef(false);
useEffect(() => {
onNodeTransitionRef.current = onNodeTransition;
}, [onNodeTransition]);
useEffect(() => {
connectionActiveRef.current = connectionActive;
}, [connectionActive]);
useEffect(() => {
isCompletedRef.current = isCompleted;
}, [isCompleted]);
// Generate a cryptographically secure unique ID
const generateSecureId = () => {
// Use Web Crypto API to generate random bytes
@ -95,6 +116,68 @@ export const useWebSocketRTC = ({ workflowId, workflowRunId, accessToken, initia
return `${wsUrl}/api/v1/ws/signaling/${workflowId}/${workflowRunId}?token=${accessToken}`;
}, [workflowId, workflowRunId, accessToken]);
const closePeerConnection = useCallback((pc: RTCPeerConnection | null, delayClose = false) => {
if (!pc) return;
if (pc.getTransceivers) {
pc.getTransceivers().forEach((transceiver) => {
if (transceiver.stop) {
try {
transceiver.stop();
} catch (e) {
logger.debug('Failed to stop transceiver during cleanup:', e);
}
}
});
}
pc.getSenders().forEach((sender) => {
if (sender.track) {
sender.track.stop();
}
});
const close = () => {
if (pcRef.current === pc) {
pcRef.current = null;
}
if (pc.signalingState !== 'closed') {
pc.close();
}
};
if (delayClose) {
setTimeout(close, 500);
} else {
close();
}
}, []);
const cleanupConnection = useCallback((options: CleanupConnectionOptions = {}) => {
const graceful = options.graceful ?? true;
const status = options.status ?? (graceful ? 'idle' : 'failed');
gracefulDisconnectRef.current = graceful;
connectionActiveRef.current = false;
isCompletedRef.current = graceful;
setConnectionActive(false);
setIsCompleted(graceful);
setConnectionStatus(status);
if (options.closeWebSocket !== false) {
const ws = wsRef.current;
if (ws && ws.readyState !== WebSocket.CLOSED && ws.readyState !== WebSocket.CLOSING) {
ws.close();
}
wsRef.current = null;
}
if (options.closePeerConnection !== false) {
closePeerConnection(pcRef.current, options.delayPeerClose ?? false);
}
}, [closePeerConnection]);
const createPeerConnection = () => {
// Build ICE servers list
const iceServers: RTCIceServer[] = [];
@ -155,43 +238,36 @@ export const useWebSocketRTC = ({ workflowId, workflowRunId, accessToken, initia
}
});
pc.addEventListener('iceconnectionstatechange', () => {
logger.info(`ICE connection state changed: ${pc.iceConnectionState}`);
if (pc.iceConnectionState === 'connected' || pc.iceConnectionState === 'completed') {
const handlePeerStateChange = () => {
logger.info(`Peer connection state changed: ${pc.connectionState}; ICE: ${pc.iceConnectionState}`);
if (
pc.connectionState === 'connected' ||
pc.iceConnectionState === 'connected' ||
pc.iceConnectionState === 'completed'
) {
setConnectionStatus('connected');
} else if (pc.iceConnectionState === 'failed') {
setConnectionStatus('failed');
} else if (pc.iceConnectionState === 'disconnected') {
// Server-initiated disconnect - clean up gracefully
logger.info('Server initiated disconnect - cleaning up connection');
// Close WebSocket if still open
if (wsRef.current) {
wsRef.current.close();
wsRef.current = null;
}
// Mark as completed to trigger recording check
setConnectionActive(false);
setIsCompleted(true);
setConnectionStatus('idle');
// Clean up peer connection
if (pc.getTransceivers) {
pc.getTransceivers().forEach((transceiver) => {
if (transceiver.stop) {
transceiver.stop();
}
});
}
pc.getSenders().forEach((sender) => {
if (sender.track) {
sender.track.stop();
}
});
return;
}
});
if (pc.connectionState === 'failed' || pc.iceConnectionState === 'failed') {
cleanupConnection({ graceful: false, status: 'failed' });
return;
}
if (
pc.connectionState === 'closed' ||
pc.connectionState === 'disconnected' ||
pc.iceConnectionState === 'closed' ||
pc.iceConnectionState === 'disconnected'
) {
logger.info('Peer connection ended - cleaning up connection');
cleanupConnection({ graceful: true, status: 'idle' });
}
};
pc.addEventListener('iceconnectionstatechange', handlePeerStateChange);
pc.addEventListener('connectionstatechange', handlePeerStateChange);
pc.addEventListener('track', (evt) => {
if (evt.track.kind === 'audio' && audioRef.current) {
@ -221,11 +297,23 @@ export const useWebSocketRTC = ({ workflowId, workflowRunId, accessToken, initia
reject(error);
};
ws.onclose = () => {
ws.onclose = (event) => {
logger.info('WebSocket closed');
wsRef.current = null;
if (event.reason === 'call ended') {
cleanupConnection({
graceful: true,
status: 'idle',
closeWebSocket: false,
});
return;
}
// Don't set failed status if already completed (graceful disconnect)
if (connectionActive && !isCompleted) {
if (
connectionActiveRef.current &&
!isCompletedRef.current &&
!gracefulDisconnectRef.current
) {
setConnectionStatus('failed');
}
};
@ -245,6 +333,7 @@ export const useWebSocketRTC = ({ workflowId, workflowRunId, accessToken, initia
type: 'answer',
sdp: answer.sdp
});
connectionActiveRef.current = true;
setConnectionActive(true);
logger.info('Remote description set');
}
@ -281,25 +370,19 @@ export const useWebSocketRTC = ({ workflowId, workflowRunId, accessToken, initia
setApiKeyError(message.payload.message || 'Service quota exceeded');
setApiKeyModalOpen(true);
// Stop the connection gracefully
setConnectionStatus('failed');
setConnectionActive(false);
// Close WebSocket and peer connection
if (wsRef.current) {
wsRef.current.close();
wsRef.current = null;
}
if (pcRef.current) {
pcRef.current.close();
pcRef.current = null;
}
// Stop the connection and surface the handled service error.
cleanupConnection({ graceful: false, status: 'failed' });
} else {
// Log other errors as actual errors
logger.error('Server error:', message.payload);
}
break;
case 'call-ended':
logger.info('Call ended by server:', message.payload);
cleanupConnection({ graceful: true, status: 'idle' });
break;
case 'rtf-user-transcription': {
const transcription = message.payload;
@ -503,7 +586,7 @@ export const useWebSocketRTC = ({ workflowId, workflowRunId, accessToken, initia
}
};
});
}, [getWebSocketUrl, connectionActive, isCompleted]);
}, [getWebSocketUrl, cleanupConnection]);
const negotiate = async () => {
const pc = pcRef.current;
@ -552,7 +635,12 @@ export const useWebSocketRTC = ({ workflowId, workflowRunId, accessToken, initia
const start = async () => {
if (isStarting || !accessToken) return;
gracefulDisconnectRef.current = false;
connectionActiveRef.current = false;
isCompletedRef.current = false;
setIsStarting(true);
setConnectionActive(false);
setIsCompleted(false);
setConnectionStatus('connecting');
try {
@ -676,40 +764,7 @@ export const useWebSocketRTC = ({ workflowId, workflowRunId, accessToken, initia
};
const stop = () => {
setConnectionActive(false);
setIsCompleted(true);
setConnectionStatus('idle');
// Close WebSocket
if (wsRef.current) {
wsRef.current.close();
wsRef.current = null;
}
// Close peer connection
const pc = pcRef.current;
if (!pc) return;
if (pc.getTransceivers) {
pc.getTransceivers().forEach((transceiver) => {
if (transceiver.stop) {
transceiver.stop();
}
});
}
pc.getSenders().forEach((sender) => {
if (sender.track) {
sender.track.stop();
}
});
setTimeout(() => {
if (pcRef.current) {
pcRef.current.close();
pcRef.current = null;
}
}, 500);
cleanupConnection({ graceful: true, status: 'idle', delayPeerClose: true });
};
// Cleanup on unmount

View file

@ -1,6 +1,18 @@
'use client';
import { Check, Copy, ExternalLink, FileText, Video } from 'lucide-react';
import {
Bot,
Check,
Copy,
Download,
ExternalLink,
FileText,
Loader2,
Pause,
Play,
UserRound,
Video,
} from 'lucide-react';
import Link from 'next/link';
import { useParams } from 'next/navigation';
import posthog from 'posthog-js';
@ -18,13 +30,16 @@ import { PostHogEvent } from '@/constants/posthog-events';
import { WORKFLOW_RUN_MODES } from '@/constants/workflowRunModes';
import { useOnboarding } from '@/context/OnboardingContext';
import { useAuth } from '@/lib/auth';
import { downloadFile } from '@/lib/files';
import { downloadFile, getSignedUrl } from '@/lib/files';
import { cn } from '@/lib/utils';
interface WorkflowRunResponse {
mode: string;
is_completed: boolean;
transcript_url: string | null;
recording_url: string | null;
user_recording_url: string | null;
bot_recording_url: string | null;
cost_info: {
dograh_token_usage?: number | null;
call_duration_seconds?: number | null;
@ -36,6 +51,7 @@ interface WorkflowRunResponse {
}
const RUN_SHELL_HEIGHT_CLASS = "h-[calc(100svh-49px)] min-h-[calc(100svh-49px)] max-h-[calc(100svh-49px)]";
const WAVEFORM_BAR_COUNT = 96;
function formatDuration(seconds?: number | null) {
if (seconds == null || Number.isNaN(seconds)) return 'N/A';
@ -71,6 +87,309 @@ function MetricCard({ label, value }: { label: string; value: string }) {
);
}
function buildWaveformPeaks(audioBuffer: AudioBuffer) {
const channel = audioBuffer.getChannelData(0);
const samplesPerBar = Math.max(1, Math.floor(channel.length / WAVEFORM_BAR_COUNT));
return Array.from({ length: WAVEFORM_BAR_COUNT }, (_, index) => {
const start = index * samplesPerBar;
const end = Math.min(start + samplesPerBar, channel.length);
let sum = 0;
for (let i = start; i < end; i += 1) {
sum += channel[i] * channel[i];
}
const rms = Math.sqrt(sum / Math.max(1, end - start));
return Math.max(0.08, Math.min(1, rms * 5));
});
}
async function loadWaveformPeaks(url: string) {
const response = await fetch(url);
const audioData = await response.arrayBuffer();
const AudioContextConstructor =
window.AudioContext ||
(window as typeof window & { webkitAudioContext?: typeof AudioContext })
.webkitAudioContext;
if (!AudioContextConstructor) return null;
const audioContext = new AudioContextConstructor();
try {
const decoded = await audioContext.decodeAudioData(audioData);
return buildWaveformPeaks(decoded);
} finally {
void audioContext.close();
}
}
function WaveformLane({
peaks,
track,
position,
}: {
peaks: number[] | null;
track: 'user' | 'bot';
position: 'top' | 'bottom';
}) {
return (
<div
className={cn(
'absolute left-3 right-3 flex gap-0.5',
position === 'top' ? 'top-5 h-12 items-end' : 'bottom-5 h-12 items-start'
)}
>
{peaks ? (
peaks.map((peak, index) => (
<span
key={`${track}-${index}`}
className={cn(
'min-h-1 flex-1 rounded-full opacity-85',
track === 'user' ? 'bg-sky-500' : 'bg-emerald-500'
)}
style={{ height: `${Math.round(peak * 100)}%` }}
/>
))
) : (
<div className="my-auto h-px w-full bg-border" />
)}
</div>
);
}
function SplitTracksSection({
userRecordingUrl,
botRecordingUrl,
}: {
userRecordingUrl: string;
botRecordingUrl: string;
}) {
const userAudioRef = useRef<HTMLAudioElement | null>(null);
const botAudioRef = useRef<HTMLAudioElement | null>(null);
const [signedUrls, setSignedUrls] = useState<{ user: string | null; bot: string | null }>({
user: null,
bot: null,
});
const [peaks, setPeaks] = useState<{ user: number[] | null; bot: number[] | null }>({
user: null,
bot: null,
});
const [isLoading, setIsLoading] = useState(false);
const [isPlaying, setIsPlaying] = useState(false);
const [progress, setProgress] = useState(0);
useEffect(() => {
let isActive = true;
const userAudio = userAudioRef.current;
const botAudio = botAudioRef.current;
userAudio?.pause();
botAudio?.pause();
setSignedUrls({ user: null, bot: null });
setPeaks({ user: null, bot: null });
setIsPlaying(false);
setProgress(0);
setIsLoading(true);
async function loadTracks() {
try {
const [userUrl, botUrl] = await Promise.all([
getSignedUrl(userRecordingUrl, true),
getSignedUrl(botRecordingUrl, true),
]);
if (!isActive) return;
setSignedUrls({ user: userUrl, bot: botUrl });
if (!userUrl || !botUrl) return;
const [userPeaks, botPeaks] = await Promise.all([
loadWaveformPeaks(userUrl),
loadWaveformPeaks(botUrl),
]);
if (isActive) {
setPeaks({ user: userPeaks, bot: botPeaks });
}
} catch (error) {
console.error('Error loading split track waveforms:', error);
} finally {
if (isActive) {
setIsLoading(false);
}
}
}
void loadTracks();
return () => {
isActive = false;
userAudio?.pause();
botAudio?.pause();
};
}, [userRecordingUrl, botRecordingUrl]);
useEffect(() => {
if (!isPlaying) return;
let frameId: number;
const updateProgress = () => {
const userAudio = userAudioRef.current;
const botAudio = botAudioRef.current;
const userDuration = Number.isFinite(userAudio?.duration) ? userAudio?.duration ?? 0 : 0;
const botDuration = Number.isFinite(botAudio?.duration) ? botAudio?.duration ?? 0 : 0;
const duration = Math.max(userDuration, botDuration);
const currentTime = Math.max(userAudio?.currentTime ?? 0, botAudio?.currentTime ?? 0);
setProgress(duration > 0 ? Math.min(1, currentTime / duration) : 0);
frameId = window.requestAnimationFrame(updateProgress);
};
frameId = window.requestAnimationFrame(updateProgress);
return () => window.cancelAnimationFrame(frameId);
}, [isPlaying]);
const pauseTracks = () => {
userAudioRef.current?.pause();
botAudioRef.current?.pause();
setIsPlaying(false);
};
const handleTrackEnded = () => {
const userAudio = userAudioRef.current;
const botAudio = botAudioRef.current;
const userDone = !userAudio || userAudio.ended;
const botDone = !botAudio || botAudio.ended;
if (userDone && botDone) {
setIsPlaying(false);
setProgress(1);
}
};
const togglePlayback = async () => {
const userAudio = userAudioRef.current;
const botAudio = botAudioRef.current;
if (!userAudio || !botAudio || !signedUrls.user || !signedUrls.bot) return;
if (isPlaying) {
pauseTracks();
return;
}
const userDuration = Number.isFinite(userAudio.duration) ? userAudio.duration : 0;
const botDuration = Number.isFinite(botAudio.duration) ? botAudio.duration : 0;
const duration = Math.max(userDuration, botDuration);
const currentTime = Math.max(userAudio.currentTime, botAudio.currentTime);
const startTime = duration > 0 && currentTime >= duration - 0.1 ? 0 : currentTime;
userAudio.currentTime = Math.min(startTime, userDuration || startTime);
botAudio.currentTime = Math.min(startTime, botDuration || startTime);
try {
await Promise.all([userAudio.play(), botAudio.play()]);
setIsPlaying(true);
} catch (error) {
pauseTracks();
console.error('Error playing split tracks:', error);
}
};
const canPlay = Boolean(signedUrls.user && signedUrls.bot);
const progressPercent = Math.round(progress * 1000) / 10;
return (
<Card className="border-border">
<audio
ref={userAudioRef}
src={signedUrls.user ?? undefined}
preload="metadata"
className="hidden"
onEnded={handleTrackEnded}
/>
<audio
ref={botAudioRef}
src={signedUrls.bot ?? undefined}
preload="metadata"
className="hidden"
onEnded={handleTrackEnded}
/>
<CardHeader className="pb-3">
<CardTitle className="text-lg">Split Tracks</CardTitle>
</CardHeader>
<CardContent className="space-y-4">
<div className="flex flex-wrap items-center justify-between gap-3">
<div className="flex items-center gap-2">
<span className="inline-flex items-center gap-1.5 text-sm font-medium text-sky-600">
<UserRound className="h-4 w-4" />
User
</span>
<span className="h-4 w-px bg-border" />
<span className="inline-flex items-center gap-1.5 text-sm font-medium text-emerald-600">
<Bot className="h-4 w-4" />
Bot
</span>
</div>
<div className="flex items-center gap-2">
<Button
type="button"
variant="outline"
size="sm"
onClick={() => downloadFile(userRecordingUrl)}
className="gap-2"
>
<Download className="h-4 w-4" />
User
</Button>
<Button
type="button"
variant="outline"
size="sm"
onClick={() => downloadFile(botRecordingUrl)}
className="gap-2"
>
<Download className="h-4 w-4" />
Bot
</Button>
</div>
</div>
<div className="flex items-center gap-4">
<Button
type="button"
size="icon"
variant={isPlaying ? 'default' : 'outline'}
onClick={togglePlayback}
disabled={!canPlay}
aria-label={isPlaying ? 'Pause split tracks' : 'Play split tracks'}
className="h-10 w-10 shrink-0"
>
{isPlaying ? <Pause className="h-4 w-4" /> : <Play className="h-4 w-4" />}
</Button>
<div className="relative h-36 min-w-0 flex-1 overflow-hidden rounded-lg border border-border/70 bg-background">
<div className="absolute left-3 right-3 top-1/2 h-px bg-border/80" />
<WaveformLane peaks={peaks.user} track="user" position="top" />
<WaveformLane peaks={peaks.bot} track="bot" position="bottom" />
{canPlay && (
<div className="pointer-events-none absolute inset-x-3 inset-y-3">
<div
className="absolute top-0 bottom-0 w-px bg-foreground/50"
style={{ left: `${progressPercent}%` }}
/>
</div>
)}
{isLoading && (
<div className="absolute inset-0 flex items-center justify-center bg-background/70 text-xs text-muted-foreground">
<Loader2 className="mr-2 h-3.5 w-3.5 animate-spin" />
Loading
</div>
)}
</div>
</div>
</CardContent>
</Card>
);
}
function RunMetricsSection({
costInfo,
logs,
@ -180,6 +499,8 @@ export default function WorkflowRunPage() {
is_completed: response.data?.is_completed ?? false,
transcript_url: response.data?.transcript_url ?? null,
recording_url: response.data?.recording_url ?? null,
user_recording_url: response.data?.user_recording_url ?? null,
bot_recording_url: response.data?.bot_recording_url ?? null,
cost_info: response.data?.cost_info ?? null,
initial_context: response.data?.initial_context as Record<string, string> | null ?? null,
gathered_context: response.data?.gathered_context as Record<string, string> | null ?? null,
@ -192,6 +513,7 @@ export default function WorkflowRunPage() {
run_id: Number(runId),
is_completed: runData.is_completed,
has_recording: !!runData.recording_url,
has_split_recordings: !!runData.user_recording_url && !!runData.bot_recording_url,
has_transcript: !!runData.transcript_url,
});
};
@ -201,6 +523,9 @@ export default function WorkflowRunPage() {
let returnValue = null;
const isTextChatRun = workflowRun?.mode === WORKFLOW_RUN_MODES.TEXTCHAT;
const showRunDetailsView = Boolean(workflowRun?.is_completed || isTextChatRun);
const userSplitRecordingUrl = workflowRun?.user_recording_url ?? null;
const botSplitRecordingUrl = workflowRun?.bot_recording_url ?? null;
const hasSplitTracks = Boolean(userSplitRecordingUrl && botSplitRecordingUrl);
if (isLoading) {
returnValue = (
@ -336,6 +661,13 @@ export default function WorkflowRunPage() {
gatheredContext={workflowRun?.gathered_context ?? null}
/>
{!isTextChatRun && hasSplitTracks && (
<SplitTracksSection
userRecordingUrl={userSplitRecordingUrl as string}
botRecordingUrl={botSplitRecordingUrl as string}
/>
)}
<div className="grid gap-6 md:grid-cols-2">
<ContextDisplay
title="Initial Context"

File diff suppressed because one or more lines are too long

View file

@ -1152,9 +1152,9 @@ export const getCampaignDefaultsApiV1OrganizationsCampaignDefaultsGet = <ThrowOn
* Return a short-lived signed URL for a file stored on S3 / MinIO.
*
* Access Control:
* * Keys that embed an organization ID (``{prefix}/{org_id}/...``) are
* authorized by matching the org_id against the requesting user's
* organization.
* * Known org-scoped keys (for example ``campaigns/{org_id}/...`` and
* ``knowledge_base/{org_id}/...``) are authorized by matching the org_id
* against the requesting user's organization.
* * Legacy keys (``recordings/{run_id}.wav``, ``transcripts/{run_id}.txt``)
* are authorized via the workflow run they belong to.
* * Superusers can request any key.
@ -1474,13 +1474,15 @@ export const initiateCallTestByWorkflowUuidApiV1PublicAgentTestWorkflowWorkflowU
*
* Args:
* token: The public access token (UUID format)
* artifact_type: Type of artifact - "recording" or "transcript"
* artifact_type: Type of artifact - "recording", "transcript",
* "user_recording", or "bot_recording"
* inline: If true, sets Content-Disposition to inline for browser preview
*
* Returns:
* RedirectResponse to the signed URL (302 redirect)
*
* Raises:
* HTTPException 400: If artifact type is unsupported
* HTTPException 404: If token is invalid or artifact not found
*/
export const downloadWorkflowArtifactApiV1PublicDownloadWorkflowTokenArtifactTypeGet = <ThrowOnError extends boolean = false>(options: Options<DownloadWorkflowArtifactApiV1PublicDownloadWorkflowTokenArtifactTypeGetData, ThrowOnError>) => (options.client ?? client).get<DownloadWorkflowArtifactApiV1PublicDownloadWorkflowTokenArtifactTypeGetResponses, DownloadWorkflowArtifactApiV1PublicDownloadWorkflowTokenArtifactTypeGetErrors, ThrowOnError>({ url: '/api/v1/public/download/workflow/{token}/{artifact_type}', ...options });

View file

@ -499,6 +499,8 @@ export type ByokPipelineAiModelConfiguration = {
} & AwsBedrockLlmConfiguration) | ({
provider: 'speaches';
} & SpeachesLlmConfiguration) | ({
provider: 'huggingface';
} & HuggingFaceLlmConfiguration) | ({
provider: 'minimax';
} & MiniMaxLlmConfiguration) | ({
provider: 'sarvam';
@ -551,6 +553,8 @@ export type ByokPipelineAiModelConfiguration = {
} & SarvamSttConfiguration) | ({
provider: 'speaches';
} & SpeachesSttConfiguration) | ({
provider: 'huggingface';
} & HuggingFaceSttConfiguration) | ({
provider: 'assemblyai';
} & AssemblyAisttConfiguration) | ({
provider: 'gladia';
@ -613,6 +617,8 @@ export type ByokRealtimeAiModelConfiguration = {
} & AwsBedrockLlmConfiguration) | ({
provider: 'speaches';
} & SpeachesLlmConfiguration) | ({
provider: 'huggingface';
} & HuggingFaceLlmConfiguration) | ({
provider: 'minimax';
} & MiniMaxLlmConfiguration) | ({
provider: 'sarvam';
@ -2916,6 +2922,80 @@ export type HttpApiToolDefinition = {
config: HttpApiConfig;
};
/**
* Hugging Face
*
* Hosted Hugging Face Inference Providers API for usage-based inference.
*/
export type HuggingFaceLlmConfiguration = {
/**
* Provider
*/
provider?: 'huggingface';
/**
* Api Key
*/
api_key: string | Array<string>;
/**
* Model
*
* Hugging Face chat-completion model identifier, optionally with provider suffix.
*/
model?: string;
/**
* Base Url
*
* Hugging Face OpenAI-compatible chat-completions router base URL.
*/
base_url?: string;
/**
* Bill To
*
* Optional Hugging Face organization or user to bill using X-HF-Bill-To.
*/
bill_to?: string | null;
};
/**
* Hugging Face
*
* Hosted Hugging Face Inference Providers API for usage-based inference.
*/
export type HuggingFaceSttConfiguration = {
/**
* Provider
*/
provider?: 'huggingface';
/**
* Api Key
*/
api_key: string | Array<string>;
/**
* Model
*
* Hugging Face ASR model identifier served through Inference Providers.
*/
model?: string;
/**
* Base Url
*
* Hugging Face Inference Providers router base URL.
*/
base_url?: string;
/**
* Bill To
*
* Optional Hugging Face organization or user to bill using X-HF-Bill-To.
*/
bill_to?: string | null;
/**
* Return Timestamps
*
* Request timestamp chunks when supported by the selected provider/model.
*/
return_timestamps?: boolean;
};
/**
* ImpersonateRequest
*
@ -6360,6 +6440,14 @@ export type WorkflowRunResponseSchema = {
* Recording Url
*/
recording_url: string | null;
/**
* User Recording Url
*/
user_recording_url?: string | null;
/**
* Bot Recording Url
*/
bot_recording_url?: string | null;
/**
* Transcript Public Url
*/
@ -6368,6 +6456,14 @@ export type WorkflowRunResponseSchema = {
* Recording Public Url
*/
recording_public_url?: string | null;
/**
* User Recording Public Url
*/
user_recording_public_url?: string | null;
/**
* Bot Recording Public Url
*/
bot_recording_public_url?: string | null;
/**
* Public Access Token
*/
@ -6527,6 +6623,14 @@ export type WorkflowRunUsageResponse = {
* Transcript Url
*/
transcript_url?: string | null;
/**
* User Recording Url
*/
user_recording_url?: string | null;
/**
* Bot Recording Url
*/
bot_recording_url?: string | null;
/**
* Recording Public Url
*/
@ -6535,6 +6639,14 @@ export type WorkflowRunUsageResponse = {
* Transcript Public Url
*/
transcript_public_url?: string | null;
/**
* User Recording Public Url
*/
user_recording_public_url?: string | null;
/**
* Bot Recording Public Url
*/
bot_recording_public_url?: string | null;
/**
* Public Access Token
*/
@ -12331,7 +12443,7 @@ export type DownloadWorkflowArtifactApiV1PublicDownloadWorkflowTokenArtifactType
/**
* Artifact Type
*/
artifact_type: 'recording' | 'transcript';
artifact_type: string;
};
query?: {
/**

View file

@ -11,6 +11,7 @@ import {
type ServiceSegment,
} from "@/components/ServiceConfigurationForm";
import { Button } from "@/components/ui/button";
import { Checkbox } from "@/components/ui/checkbox";
import { Input } from "@/components/ui/input";
import { Label } from "@/components/ui/label";
import { Select, SelectContent, SelectItem, SelectTrigger, SelectValue } from "@/components/ui/select";
@ -21,6 +22,7 @@ type ModelMode = "realtime" | "dograh" | "byok";
interface DograhDefaults {
voices: string[];
allow_custom_input?: boolean;
speeds: number[];
languages: string[];
defaults: {
@ -265,16 +267,21 @@ export function AIModelConfigurationV2Editor({
const [realtimeInitialConfig, setRealtimeInitialConfig] = useState<Record<string, unknown> | null>(null);
const [pipelineInitialConfig, setPipelineInitialConfig] = useState<Record<string, unknown> | null>(null);
const [isSavingDograh, setIsSavingDograh] = useState(false);
const [isCustomVoice, setIsCustomVoice] = useState(false);
const [error, setError] = useState<string | null>(null);
const allowCustomVoice = defaults.dograh.allow_custom_input ?? false;
useEffect(() => {
const rawConfiguration = asRecord(configuration);
const rawEffectiveConfiguration = asRecord(effectiveConfiguration);
setMode(preferredMode(rawConfiguration, rawEffectiveConfiguration));
setDograh(buildDograhState(defaults, rawConfiguration, rawEffectiveConfiguration));
const nextDograh = buildDograhState(defaults, rawConfiguration, rawEffectiveConfiguration);
setDograh(nextDograh);
setIsCustomVoice(allowCustomVoice && !defaults.dograh.voices.includes(nextDograh.voice));
setRealtimeInitialConfig(getByokInitialConfig(rawConfiguration, rawEffectiveConfiguration, true));
setPipelineInitialConfig(getByokInitialConfig(rawConfiguration, rawEffectiveConfiguration, false));
}, [configuration, defaults, effectiveConfiguration]);
}, [configuration, defaults, effectiveConfiguration, allowCustomVoice]);
const saveDograhConfiguration = async () => {
setIsSavingDograh(true);
@ -363,18 +370,44 @@ export function AIModelConfigurationV2Editor({
<div className="grid gap-4 sm:grid-cols-2">
<div className="space-y-2">
<Label>Voice</Label>
<Select value={dograh.voice} onValueChange={(voice) => setDograh({ ...dograh, voice })}>
<SelectTrigger className="w-full">
<SelectValue placeholder="Select voice" />
</SelectTrigger>
<SelectContent>
{defaults.dograh.voices.map((voice) => (
<SelectItem key={voice} value={voice}>
{voice}
</SelectItem>
))}
</SelectContent>
</Select>
{isCustomVoice ? (
<Input
placeholder="Enter voice"
value={dograh.voice}
onChange={(event) => setDograh({ ...dograh, voice: event.target.value })}
/>
) : (
<Select value={dograh.voice} onValueChange={(voice) => setDograh({ ...dograh, voice })}>
<SelectTrigger className="w-full">
<SelectValue placeholder="Select voice" />
</SelectTrigger>
<SelectContent>
{defaults.dograh.voices.map((voice) => (
<SelectItem key={voice} value={voice}>
{voice}
</SelectItem>
))}
</SelectContent>
</Select>
)}
{allowCustomVoice && (
<div className="flex items-center space-x-2">
<Checkbox
id="dograh-custom-voice"
checked={isCustomVoice}
onCheckedChange={(checked) => {
const custom = checked as boolean;
setIsCustomVoice(custom);
if (!custom) {
setDograh({ ...dograh, voice: defaults.dograh.defaults.voice });
}
}}
/>
<Label htmlFor="dograh-custom-voice" className="text-sm font-normal cursor-pointer">
Enter Custom Value
</Label>
</div>
)}
</div>
<div className="space-y-2">