mirror of
https://github.com/dograh-hq/dograh.git
synced 2026-06-25 08:48:13 +02:00
feat: add transcript panel during live call for better visibility (#116)
* chore: remove old signaling route * Show real time feedbacks
This commit is contained in:
parent
ad4cff73c8
commit
e7712474c1
15 changed files with 599 additions and 469 deletions
|
|
@ -9,6 +9,7 @@ import {
|
|||
ApiKeyErrorDialog,
|
||||
AudioControls,
|
||||
ConnectionStatus,
|
||||
RealtimeFeedbackPanel,
|
||||
WorkflowConfigErrorDialog
|
||||
} from "./components";
|
||||
import { useWebSocketRTC } from "./hooks";
|
||||
|
|
@ -40,7 +41,8 @@ const BrowserCall = ({ workflowId, workflowRunId, accessToken, initialContextVar
|
|||
start,
|
||||
stop,
|
||||
isStarting,
|
||||
getAudioInputDevices
|
||||
getAudioInputDevices,
|
||||
feedbackMessages,
|
||||
} = useWebSocketRTC({ workflowId, workflowRunId, accessToken, initialContextVariables });
|
||||
|
||||
// Poll for recording availability after call ends
|
||||
|
|
@ -93,44 +95,61 @@ const BrowserCall = ({ workflowId, workflowRunId, accessToken, initialContextVar
|
|||
|
||||
return (
|
||||
<>
|
||||
<Card className="w-full max-w-4xl mx-auto">
|
||||
<CardHeader>
|
||||
<CardTitle>Call Voice Agent</CardTitle>
|
||||
</CardHeader>
|
||||
<div className="flex h-full w-full">
|
||||
{/* Main content - 2/3 width when panel visible, full width otherwise */}
|
||||
<div className="w-2/3 h-full">
|
||||
<div className="flex justify-center items-center h-full px-8">
|
||||
<Card className="w-full max-w-xl">
|
||||
<CardHeader>
|
||||
<CardTitle>Call Voice Agent</CardTitle>
|
||||
</CardHeader>
|
||||
|
||||
<CardContent>
|
||||
{isCompleted && checkingForRecording ? (
|
||||
<div className="flex flex-col items-center justify-center space-y-4 p-8">
|
||||
<Loader2 className="h-8 w-8 animate-spin text-primary" />
|
||||
<div className="text-center space-y-2">
|
||||
<p className="text-foreground font-medium">Processing your call</p>
|
||||
<p className="text-sm text-muted-foreground">Fetching transcript and recording...</p>
|
||||
</div>
|
||||
</div>
|
||||
) : (
|
||||
<>
|
||||
<AudioControls
|
||||
audioInputs={audioInputs}
|
||||
selectedAudioInput={selectedAudioInput}
|
||||
setSelectedAudioInput={setSelectedAudioInput}
|
||||
isCompleted={isCompleted}
|
||||
connectionActive={connectionActive}
|
||||
permissionError={permissionError}
|
||||
start={start}
|
||||
stop={stop}
|
||||
isStarting={isStarting}
|
||||
getAudioInputDevices={getAudioInputDevices}
|
||||
/>
|
||||
<CardContent>
|
||||
{isCompleted && checkingForRecording ? (
|
||||
<div className="flex flex-col items-center justify-center space-y-4 p-8">
|
||||
<Loader2 className="h-8 w-8 animate-spin text-primary" />
|
||||
<div className="text-center space-y-2">
|
||||
<p className="text-foreground font-medium">Processing your call</p>
|
||||
<p className="text-sm text-muted-foreground">Fetching transcript and recording...</p>
|
||||
</div>
|
||||
</div>
|
||||
) : (
|
||||
<>
|
||||
<AudioControls
|
||||
audioInputs={audioInputs}
|
||||
selectedAudioInput={selectedAudioInput}
|
||||
setSelectedAudioInput={setSelectedAudioInput}
|
||||
isCompleted={isCompleted}
|
||||
connectionActive={connectionActive}
|
||||
permissionError={permissionError}
|
||||
start={start}
|
||||
stop={stop}
|
||||
isStarting={isStarting}
|
||||
getAudioInputDevices={getAudioInputDevices}
|
||||
/>
|
||||
|
||||
<ConnectionStatus
|
||||
connectionStatus={connectionStatus}
|
||||
/>
|
||||
</>
|
||||
)}
|
||||
</CardContent>
|
||||
<ConnectionStatus
|
||||
connectionStatus={connectionStatus}
|
||||
/>
|
||||
</>
|
||||
)}
|
||||
</CardContent>
|
||||
|
||||
<audio ref={audioRef} autoPlay playsInline className="hidden" />
|
||||
</Card>
|
||||
<audio ref={audioRef} autoPlay playsInline className="hidden" />
|
||||
</Card>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Show transcript panel */}
|
||||
<div className="w-1/3 h-full shrink-0">
|
||||
<RealtimeFeedbackPanel
|
||||
messages={feedbackMessages}
|
||||
isVisible={true}
|
||||
isCallActive={connectionActive}
|
||||
isCallCompleted={isCompleted}
|
||||
/>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<ApiKeyErrorDialog
|
||||
open={apiKeyModalOpen}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,152 @@
|
|||
"use client";
|
||||
|
||||
import { Loader2, MessageSquare, Mic, MicOff, Wrench } from "lucide-react";
|
||||
import { useEffect, useRef } from "react";
|
||||
|
||||
import { cn } from "@/lib/utils";
|
||||
|
||||
import { FeedbackMessage } from "../hooks/useWebSocketRTC";
|
||||
|
||||
interface RealtimeFeedbackPanelProps {
|
||||
messages: FeedbackMessage[];
|
||||
isVisible: boolean;
|
||||
isCallActive: boolean;
|
||||
isCallCompleted: boolean;
|
||||
}
|
||||
|
||||
const MessageItem = ({ msg }: { msg: FeedbackMessage }) => {
|
||||
// Function call message - centered
|
||||
if (msg.type === 'function-call') {
|
||||
return (
|
||||
<div className="flex justify-center">
|
||||
<div className="px-3 py-1.5 rounded-full text-xs bg-amber-500/10 border border-amber-500/20 inline-flex items-center gap-2">
|
||||
{msg.status === 'running' ? (
|
||||
<Loader2 className="h-3 w-3 animate-spin text-amber-500" />
|
||||
) : (
|
||||
<Wrench className="h-3 w-3 text-amber-500" />
|
||||
)}
|
||||
<span className="font-mono text-amber-700 dark:text-amber-400">
|
||||
{msg.functionName}()
|
||||
</span>
|
||||
{msg.status === 'completed' && (
|
||||
<span className="text-muted-foreground">✓</span>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
const isUser = msg.type === 'user-transcription';
|
||||
|
||||
// User messages on right, bot messages on left
|
||||
return (
|
||||
<div className={cn(
|
||||
"flex",
|
||||
isUser ? "justify-end" : "justify-start"
|
||||
)}>
|
||||
<div
|
||||
className={cn(
|
||||
"max-w-[85%] px-3 py-2 rounded-2xl text-sm",
|
||||
isUser
|
||||
? "bg-primary text-primary-foreground rounded-br-md"
|
||||
: "bg-muted rounded-bl-md",
|
||||
!msg.final && "opacity-70"
|
||||
)}
|
||||
>
|
||||
<div className="whitespace-pre-wrap leading-relaxed">{msg.text}</div>
|
||||
{!msg.final && (
|
||||
<div className={cn(
|
||||
"text-[10px] mt-1 italic",
|
||||
isUser ? "text-primary-foreground/70" : "text-muted-foreground"
|
||||
)}>
|
||||
speaking...
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
};
|
||||
|
||||
export const RealtimeFeedbackPanel = ({
|
||||
messages,
|
||||
isVisible,
|
||||
isCallActive,
|
||||
isCallCompleted
|
||||
}: RealtimeFeedbackPanelProps) => {
|
||||
const scrollRef = useRef<HTMLDivElement>(null);
|
||||
|
||||
// Auto-scroll to bottom when new messages arrive
|
||||
useEffect(() => {
|
||||
if (scrollRef.current) {
|
||||
scrollRef.current.scrollTop = scrollRef.current.scrollHeight;
|
||||
}
|
||||
}, [messages]);
|
||||
|
||||
if (!isVisible) return null;
|
||||
|
||||
return (
|
||||
<div className="w-full h-full flex flex-col bg-background border-l border-border">
|
||||
{/* Header */}
|
||||
<div className="px-4 py-3 border-b border-border shrink-0">
|
||||
<div className="flex items-center justify-center gap-2">
|
||||
<MessageSquare className="h-4 w-4 text-muted-foreground shrink-0" />
|
||||
<span className="font-medium text-sm whitespace-nowrap">Live Transcript</span>
|
||||
<div className={cn(
|
||||
"flex items-center gap-1 text-xs px-2 py-0.5 rounded-full shrink-0",
|
||||
isCallActive
|
||||
? "bg-green-500/10 text-green-600 dark:text-green-400"
|
||||
: isCallCompleted
|
||||
? "bg-muted text-muted-foreground"
|
||||
: "bg-muted text-muted-foreground"
|
||||
)}>
|
||||
{isCallActive ? (
|
||||
<>
|
||||
<Mic className="h-3 w-3" />
|
||||
<span>Live</span>
|
||||
</>
|
||||
) : isCallCompleted ? (
|
||||
<>
|
||||
<MicOff className="h-3 w-3" />
|
||||
<span>Ended</span>
|
||||
</>
|
||||
) : (
|
||||
<>
|
||||
<MicOff className="h-3 w-3" />
|
||||
<span>Ready</span>
|
||||
</>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Messages */}
|
||||
<div ref={scrollRef} className="flex-1 overflow-y-auto">
|
||||
{messages.length === 0 ? (
|
||||
<div className="flex flex-col items-center justify-center h-full text-muted-foreground text-sm">
|
||||
<MessageSquare className="h-10 w-10 mb-4 opacity-30" />
|
||||
<p className="font-medium">No messages yet</p>
|
||||
<p className="text-xs mt-1 text-center px-4">
|
||||
{isCallActive
|
||||
? "Start speaking to see the transcript"
|
||||
: "Start the call to begin the conversation"
|
||||
}
|
||||
</p>
|
||||
</div>
|
||||
) : (
|
||||
<div className="space-y-3 p-4">
|
||||
{messages.map((msg) => (
|
||||
<MessageItem key={msg.id} msg={msg} />
|
||||
))}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
|
||||
{/* Footer with message count */}
|
||||
{messages.length > 0 && (
|
||||
<div className="px-4 py-2 border-t border-border text-xs text-muted-foreground shrink-0">
|
||||
{messages.filter(m => m.type !== 'function-call').length} messages
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
};
|
||||
|
|
@ -2,4 +2,5 @@ export * from './ApiKeyErrorDialog';
|
|||
export * from './AudioControls';
|
||||
export * from './ConnectionStatus';
|
||||
export * from './ContextDisplay';
|
||||
export * from './RealtimeFeedbackPanel';
|
||||
export * from './WorkflowConfigErrorDialog'
|
||||
|
|
|
|||
|
|
@ -1,3 +1,2 @@
|
|||
export * from './useDeviceInputs';
|
||||
export * from './useWebRTC';
|
||||
export * from './useWebSocketRTC';
|
||||
|
|
|
|||
|
|
@ -1,287 +0,0 @@
|
|||
import { useRef, useState } from "react";
|
||||
|
||||
import { offerApiV1PipecatRtcOfferPost, validateUserConfigurationsApiV1UserConfigurationsUserValidateGet, validateWorkflowApiV1WorkflowWorkflowIdValidatePost } from "@/client/sdk.gen";
|
||||
import { WorkflowValidationError } from "@/components/flow/types";
|
||||
import logger from '@/lib/logger';
|
||||
|
||||
import { sdpFilterCodec } from "../utils";
|
||||
import { useDeviceInputs } from "./useDeviceInputs";
|
||||
|
||||
interface UseWebRTCProps {
|
||||
workflowId: number;
|
||||
workflowRunId: number;
|
||||
accessToken: string | null;
|
||||
initialContextVariables?: Record<string, string> | null;
|
||||
}
|
||||
|
||||
export const useWebRTC = ({ workflowId, workflowRunId, accessToken, initialContextVariables }: UseWebRTCProps) => {
|
||||
const [connectionStatus, setConnectionStatus] = useState<'idle' | 'connecting' | 'connected' | 'failed'>('idle');
|
||||
const [connectionActive, setConnectionActive] = useState(false);
|
||||
const [isCompleted, setIsCompleted] = useState(false);
|
||||
const [apiKeyModalOpen, setApiKeyModalOpen] = useState(false);
|
||||
const [apiKeyError, setApiKeyError] = useState<string | null>(null);
|
||||
const [workflowConfigModalOpen, setWorkflowConfigModalOpen] = useState(false);
|
||||
const [workflowConfigError, setWorkflowConfigError] = useState<string | null>(null);
|
||||
const [isStarting, setIsStarting] = useState(false);
|
||||
// Use initial context variables directly, no UI for editing
|
||||
const initialContext = initialContextVariables || {};
|
||||
|
||||
const {
|
||||
audioInputs,
|
||||
selectedAudioInput,
|
||||
setSelectedAudioInput,
|
||||
permissionError,
|
||||
setPermissionError
|
||||
} = useDeviceInputs();
|
||||
|
||||
const useStun = true;
|
||||
const useAudio = true;
|
||||
const audioCodec = 'default';
|
||||
|
||||
const audioRef = useRef<HTMLAudioElement>(null);
|
||||
const pcRef = useRef<RTCPeerConnection | null>(null);
|
||||
const timeStartRef = useRef<number | null>(null);
|
||||
|
||||
// Generate a cryptographically secure unique ID
|
||||
const generateSecureId = () => {
|
||||
// Use Web Crypto API to generate random bytes
|
||||
const array = new Uint8Array(16);
|
||||
crypto.getRandomValues(array);
|
||||
// Convert to hex string
|
||||
return 'PC-' + Array.from(array)
|
||||
.map(b => b.toString(16).padStart(2, '0'))
|
||||
.join('');
|
||||
};
|
||||
|
||||
const pc_id = generateSecureId();
|
||||
|
||||
const createPeerConnection = () => {
|
||||
const config: RTCConfiguration = {
|
||||
iceServers: useStun ? [{ urls: ['stun:stun.l.google.com:19302'] }] : []
|
||||
};
|
||||
|
||||
const pc = new RTCPeerConnection(config);
|
||||
|
||||
pc.addEventListener('icegatheringstatechange', () => {
|
||||
logger.info(`ICE gathering state changed in createPeerConnection, ${pc.iceGatheringState}`);
|
||||
});
|
||||
|
||||
pc.addEventListener('iceconnectionstatechange', () => {
|
||||
logger.info(`ICE connection state changed: ${pc.iceConnectionState}`);
|
||||
if (pc.iceConnectionState === 'connected' || pc.iceConnectionState === 'completed') {
|
||||
setConnectionStatus('connected');
|
||||
} else if (pc.iceConnectionState === 'failed' || pc.iceConnectionState === 'disconnected') {
|
||||
setConnectionStatus('failed');
|
||||
}
|
||||
});
|
||||
|
||||
pc.addEventListener('track', (evt) => {
|
||||
if (evt.track.kind === 'audio' && audioRef.current) {
|
||||
audioRef.current.srcObject = evt.streams[0];
|
||||
}
|
||||
});
|
||||
|
||||
pcRef.current = pc;
|
||||
return pc;
|
||||
};
|
||||
|
||||
const negotiate = async () => {
|
||||
const pc = pcRef.current;
|
||||
if (!pc) return;
|
||||
|
||||
try {
|
||||
const offer = await pc.createOffer();
|
||||
await pc.setLocalDescription(offer);
|
||||
|
||||
await new Promise<void>((resolve) => {
|
||||
if (pc.iceGatheringState === 'complete') {
|
||||
resolve();
|
||||
} else {
|
||||
const checkState = () => {
|
||||
if (pc.iceGatheringState === 'complete') {
|
||||
logger.debug(`ICE gathering is complete in negotiate, ${pc.iceGatheringState}`);
|
||||
pc.removeEventListener('icegatheringstatechange', checkState);
|
||||
resolve();
|
||||
}
|
||||
};
|
||||
pc.addEventListener('icegatheringstatechange', checkState);
|
||||
}
|
||||
});
|
||||
|
||||
const localDescription = pc.localDescription;
|
||||
if (!localDescription) return;
|
||||
|
||||
let sdp = localDescription.sdp;
|
||||
|
||||
if (audioCodec !== 'default') {
|
||||
sdp = sdpFilterCodec('audio', audioCodec, sdp);
|
||||
}
|
||||
|
||||
if (!accessToken) return;
|
||||
|
||||
const response = await offerApiV1PipecatRtcOfferPost({
|
||||
headers: {
|
||||
'Authorization': `Bearer ${accessToken}`,
|
||||
},
|
||||
body: {
|
||||
sdp: sdp,
|
||||
type: 'offer',
|
||||
pc_id: pc_id,
|
||||
restart_pc: false,
|
||||
workflow_id: workflowId,
|
||||
workflow_run_id: workflowRunId,
|
||||
call_context_vars: initialContext
|
||||
}
|
||||
});
|
||||
|
||||
if (response && response.data) {
|
||||
const answerSdpText = typeof response.data === 'object' && 'sdp' in response.data
|
||||
? response.data.sdp as string
|
||||
: '';
|
||||
|
||||
await pc.setRemoteDescription({
|
||||
type: 'answer',
|
||||
sdp: answerSdpText
|
||||
});
|
||||
setConnectionActive(true);
|
||||
}
|
||||
} catch (e) {
|
||||
logger.error(`Negotiation failed: ${e}`);
|
||||
}
|
||||
};
|
||||
|
||||
const start = async () => {
|
||||
if (isStarting || !accessToken) return;
|
||||
setIsStarting(true);
|
||||
setConnectionStatus('connecting');
|
||||
try {
|
||||
const response = await validateUserConfigurationsApiV1UserConfigurationsUserValidateGet({
|
||||
headers: {
|
||||
'Authorization': `Bearer ${accessToken}`,
|
||||
},
|
||||
query: {
|
||||
validity_ttl_seconds: 86400
|
||||
},
|
||||
});
|
||||
if (response.error) {
|
||||
setApiKeyModalOpen(true);
|
||||
let msg = 'API Key Error';
|
||||
const detail = (response.error as unknown as { detail?: { errors: { model: string; message: string }[] } }).detail;
|
||||
if (Array.isArray(detail)) {
|
||||
msg = detail
|
||||
.map((e: { model: string; message: string }) => `${e.model}: ${e.message}`)
|
||||
.join('\n');
|
||||
}
|
||||
setApiKeyError(msg);
|
||||
return;
|
||||
}
|
||||
|
||||
// Then check workflow validation
|
||||
const workflowResponse = await validateWorkflowApiV1WorkflowWorkflowIdValidatePost({
|
||||
path: {
|
||||
workflow_id: workflowId,
|
||||
},
|
||||
headers: {
|
||||
'Authorization': `Bearer ${accessToken}`,
|
||||
},
|
||||
});
|
||||
|
||||
if (workflowResponse.error) {
|
||||
setWorkflowConfigModalOpen(true);
|
||||
let msg = 'Workflow validation failed';
|
||||
const errorDetail = workflowResponse.error as { detail?: { errors: WorkflowValidationError[] } };
|
||||
if (errorDetail?.detail?.errors) {
|
||||
msg = errorDetail.detail.errors
|
||||
.map(err => `${err.kind}: ${err.message}`)
|
||||
.join('\n');
|
||||
}
|
||||
setWorkflowConfigError(msg);
|
||||
return;
|
||||
}
|
||||
|
||||
timeStartRef.current = null;
|
||||
const pc = createPeerConnection();
|
||||
|
||||
const constraints: MediaStreamConstraints = {
|
||||
audio: false,
|
||||
};
|
||||
|
||||
if (useAudio) {
|
||||
const audioConstraints: MediaTrackConstraints = {};
|
||||
if (selectedAudioInput) {
|
||||
audioConstraints.deviceId = { exact: selectedAudioInput };
|
||||
}
|
||||
constraints.audio = Object.keys(audioConstraints).length ? audioConstraints : true;
|
||||
}
|
||||
|
||||
if (constraints.audio) {
|
||||
try {
|
||||
const stream = await navigator.mediaDevices.getUserMedia(constraints);
|
||||
stream.getTracks().forEach((track) => {
|
||||
pc.addTrack(track, stream);
|
||||
});
|
||||
await negotiate();
|
||||
} catch (err) {
|
||||
logger.error(`Could not acquire media: ${err}`);
|
||||
setPermissionError('Could not acquire media');
|
||||
setConnectionStatus('failed');
|
||||
}
|
||||
} else {
|
||||
await negotiate();
|
||||
}
|
||||
} finally {
|
||||
setIsStarting(false);
|
||||
}
|
||||
};
|
||||
|
||||
const stop = () => {
|
||||
setConnectionActive(false);
|
||||
setIsCompleted(true);
|
||||
setConnectionStatus('idle');
|
||||
|
||||
const pc = pcRef.current;
|
||||
if (!pc) return;
|
||||
|
||||
if (pc.getTransceivers) {
|
||||
pc.getTransceivers().forEach((transceiver) => {
|
||||
if (transceiver.stop) {
|
||||
transceiver.stop();
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
pc.getSenders().forEach((sender) => {
|
||||
if (sender.track) {
|
||||
sender.track.stop();
|
||||
}
|
||||
});
|
||||
|
||||
setTimeout(() => {
|
||||
if (pcRef.current) {
|
||||
pcRef.current.close();
|
||||
pcRef.current = null;
|
||||
}
|
||||
}, 500);
|
||||
};
|
||||
|
||||
return {
|
||||
audioRef,
|
||||
audioInputs,
|
||||
selectedAudioInput,
|
||||
setSelectedAudioInput,
|
||||
connectionActive,
|
||||
permissionError,
|
||||
isCompleted,
|
||||
apiKeyModalOpen,
|
||||
setApiKeyModalOpen,
|
||||
apiKeyError,
|
||||
workflowConfigError,
|
||||
workflowConfigModalOpen,
|
||||
setWorkflowConfigModalOpen,
|
||||
connectionStatus,
|
||||
start,
|
||||
stop,
|
||||
isStarting,
|
||||
initialContext
|
||||
};
|
||||
};
|
||||
|
|
@ -15,6 +15,16 @@ interface UseWebSocketRTCProps {
|
|||
initialContextVariables?: Record<string, string> | null;
|
||||
}
|
||||
|
||||
export interface FeedbackMessage {
|
||||
id: string;
|
||||
type: 'user-transcription' | 'bot-text' | 'function-call';
|
||||
text: string;
|
||||
final?: boolean;
|
||||
timestamp: string;
|
||||
functionName?: string;
|
||||
status?: 'running' | 'completed';
|
||||
}
|
||||
|
||||
export const useWebSocketRTC = ({ workflowId, workflowRunId, accessToken, initialContextVariables }: UseWebSocketRTCProps) => {
|
||||
const [connectionStatus, setConnectionStatus] = useState<'idle' | 'connecting' | 'connected' | 'failed'>('idle');
|
||||
const [connectionActive, setConnectionActive] = useState(false);
|
||||
|
|
@ -24,6 +34,7 @@ export const useWebSocketRTC = ({ workflowId, workflowRunId, accessToken, initia
|
|||
const [workflowConfigModalOpen, setWorkflowConfigModalOpen] = useState(false);
|
||||
const [workflowConfigError, setWorkflowConfigError] = useState<string | null>(null);
|
||||
const [isStarting, setIsStarting] = useState(false);
|
||||
const [feedbackMessages, setFeedbackMessages] = useState<FeedbackMessage[]>([]);
|
||||
const initialContext = initialContextVariables || {};
|
||||
|
||||
const {
|
||||
|
|
@ -271,6 +282,105 @@ export const useWebSocketRTC = ({ workflowId, workflowRunId, accessToken, initia
|
|||
}
|
||||
break;
|
||||
|
||||
case 'rtf-user-transcription': {
|
||||
const transcription = message.payload;
|
||||
setFeedbackMessages(prev => {
|
||||
// Mark last bot message as final (user started speaking)
|
||||
const withBotFinalized = prev.map((m, i) =>
|
||||
i === prev.length - 1 && m.type === 'bot-text' && !m.final
|
||||
? { ...m, final: true }
|
||||
: m
|
||||
);
|
||||
|
||||
// For interim transcriptions, replace the last interim
|
||||
if (!transcription.final) {
|
||||
const withoutLastInterim = withBotFinalized.filter(
|
||||
m => !(m.type === 'user-transcription' && !m.final)
|
||||
);
|
||||
return [...withoutLastInterim, {
|
||||
id: `user-${Date.now()}`,
|
||||
type: 'user-transcription',
|
||||
text: transcription.text,
|
||||
final: false,
|
||||
timestamp: new Date().toISOString(),
|
||||
}];
|
||||
}
|
||||
// For final transcriptions, replace interim with final
|
||||
const withoutInterim = withBotFinalized.filter(
|
||||
m => !(m.type === 'user-transcription' && !m.final)
|
||||
);
|
||||
return [...withoutInterim, {
|
||||
id: `user-${Date.now()}`,
|
||||
type: 'user-transcription',
|
||||
text: transcription.text,
|
||||
final: true,
|
||||
timestamp: new Date().toISOString(),
|
||||
}];
|
||||
});
|
||||
break;
|
||||
}
|
||||
|
||||
case 'rtf-bot-text': {
|
||||
// TTS text comes as sentences/phrases, concatenate with space
|
||||
setFeedbackMessages(prev => {
|
||||
const last = prev[prev.length - 1];
|
||||
if (last && last.type === 'bot-text' && !last.final) {
|
||||
// Append to existing bot message with space if needed
|
||||
const existingText = last.text;
|
||||
const newText = message.payload.text;
|
||||
// Add space between chunks if previous doesn't end with space
|
||||
// and new doesn't start with space or punctuation
|
||||
const needsSpace = existingText.length > 0 &&
|
||||
!existingText.endsWith(' ') &&
|
||||
!newText.startsWith(' ') &&
|
||||
!/^[.,!?;:]/.test(newText);
|
||||
return [
|
||||
...prev.slice(0, -1),
|
||||
{ ...last, text: existingText + (needsSpace ? ' ' : '') + newText }
|
||||
];
|
||||
}
|
||||
// Start new bot message
|
||||
return [...prev, {
|
||||
id: `bot-${Date.now()}`,
|
||||
type: 'bot-text',
|
||||
text: message.payload.text,
|
||||
final: false,
|
||||
timestamp: new Date().toISOString(),
|
||||
}];
|
||||
});
|
||||
break;
|
||||
}
|
||||
|
||||
case 'rtf-function-call-start': {
|
||||
const { function_name, tool_call_id } = message.payload;
|
||||
setFeedbackMessages(prev => {
|
||||
// Check if we already have this function call
|
||||
const existingId = `func-${tool_call_id}`;
|
||||
if (prev.some(msg => msg.id === existingId)) {
|
||||
return prev;
|
||||
}
|
||||
return [...prev, {
|
||||
id: existingId,
|
||||
type: 'function-call',
|
||||
text: function_name,
|
||||
functionName: function_name,
|
||||
status: 'running',
|
||||
timestamp: new Date().toISOString(),
|
||||
}];
|
||||
});
|
||||
break;
|
||||
}
|
||||
|
||||
case 'rtf-function-call-end': {
|
||||
const { tool_call_id, result } = message.payload;
|
||||
setFeedbackMessages(prev => prev.map(msg =>
|
||||
msg.id === `func-${tool_call_id}`
|
||||
? { ...msg, status: 'completed' as const, text: result || msg.text }
|
||||
: msg
|
||||
));
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
logger.warn('Unknown message type:', message.type);
|
||||
}
|
||||
|
|
@ -505,6 +615,7 @@ export const useWebSocketRTC = ({ workflowId, workflowRunId, accessToken, initia
|
|||
stop,
|
||||
isStarting,
|
||||
initialContext,
|
||||
getAudioInputDevices
|
||||
getAudioInputDevices,
|
||||
feedbackMessages,
|
||||
};
|
||||
};
|
||||
|
|
|
|||
File diff suppressed because one or more lines are too long
|
|
@ -483,18 +483,6 @@ export type PresignedUploadUrlResponse = {
|
|||
expires_in: number;
|
||||
};
|
||||
|
||||
export type RtcOfferRequest = {
|
||||
pc_id: string | null;
|
||||
sdp: string;
|
||||
type: string;
|
||||
workflow_id: number;
|
||||
workflow_run_id: number;
|
||||
restart_pc?: boolean;
|
||||
call_context_vars?: {
|
||||
[key: string]: unknown;
|
||||
} | null;
|
||||
};
|
||||
|
||||
export type S3SignedUrlResponse = {
|
||||
url: string;
|
||||
expires_in: number;
|
||||
|
|
@ -1260,37 +1248,6 @@ export type HandleInboundFallbackApiV1TelephonyInboundFallbackPostResponses = {
|
|||
200: unknown;
|
||||
};
|
||||
|
||||
export type OfferApiV1PipecatRtcOfferPostData = {
|
||||
body: RtcOfferRequest;
|
||||
headers?: {
|
||||
authorization?: string | null;
|
||||
'X-API-Key'?: string | null;
|
||||
};
|
||||
path?: never;
|
||||
query?: never;
|
||||
url: '/api/v1/pipecat/rtc-offer';
|
||||
};
|
||||
|
||||
export type OfferApiV1PipecatRtcOfferPostErrors = {
|
||||
/**
|
||||
* Not found
|
||||
*/
|
||||
404: unknown;
|
||||
/**
|
||||
* Validation Error
|
||||
*/
|
||||
422: HttpValidationError;
|
||||
};
|
||||
|
||||
export type OfferApiV1PipecatRtcOfferPostError = OfferApiV1PipecatRtcOfferPostErrors[keyof OfferApiV1PipecatRtcOfferPostErrors];
|
||||
|
||||
export type OfferApiV1PipecatRtcOfferPostResponses = {
|
||||
/**
|
||||
* Successful Response
|
||||
*/
|
||||
200: unknown;
|
||||
};
|
||||
|
||||
export type ImpersonateApiV1SuperuserImpersonatePostData = {
|
||||
body: ImpersonateRequest;
|
||||
headers?: {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue