feat: add transcript panel during live call for better visibility (#116)

* chore: remove old signaling route * Show real time feedbacks
2026-07-22 11:51:04 +02:00 · 2026-01-13 22:48:18 +05:30 · 2026-01-13 22:48:18 +05:30 · e7712474c1
commit e7712474c1
parent ad4cff73c8
15 changed files with 599 additions and 469 deletions
--- a/api/app.py
+++ b/api/app.py
@ -20,7 +20,6 @@ if SENTRY_DSN and (
    print(f"Sentry initialized in environment: {ENVIRONMENT}")


-import asyncio
 from contextlib import asynccontextmanager
 from typing import Optional

@ -30,7 +29,6 @@ from fastapi.middleware.cors import CORSMiddleware
 from loguru import logger

 from api.routes.main import router as main_router
-from api.routes.rtc_offer import pcs_map
 from api.services.telephony.worker_event_subscriber import (
    WorkerEventSubscriber,
    setup_worker_subscriber,
@ -77,11 +75,6 @@ async def lifespan(app: FastAPI):
            # Fall back to immediate stop
            await worker_subscriber.stop()

-    # close all dangling pipecat connections
-    coros = [pc.close() for pc in pcs_map.values()]
-    await asyncio.gather(*coros)
-    pcs_map.clear()
-
    await redis.aclose()


--- a/api/routes/main.py
+++ b/api/routes/main.py
@ -10,7 +10,6 @@ from api.routes.organization_usage import router as organization_usage_router
 from api.routes.public_agent import router as public_agent_router
 from api.routes.public_embed import router as public_embed_router
 from api.routes.reports import router as reports_router
-from api.routes.rtc_offer import router as rtc_offer_router
 from api.routes.s3_signed_url import router as s3_router
 from api.routes.service_keys import router as service_keys_router
 from api.routes.superuser import router as superuser_router
@ -27,7 +26,6 @@ router = APIRouter(
 )

 router.include_router(telephony_router)
-router.include_router(rtc_offer_router)
 router.include_router(superuser_router)
 router.include_router(workflow_router)
 router.include_router(user_router)
--- a/api/routes/rtc_offer.py
+++ b/api/routes/rtc_offer.py
@ -1,77 +0,0 @@
-from typing import Dict
-
-from fastapi import APIRouter, BackgroundTasks, Depends
-from loguru import logger
-from pydantic import BaseModel
-
-from api.db.models import UserModel
-from api.services.auth.depends import get_user
-from api.services.pipecat.run_pipeline import run_pipeline_smallwebrtc
-from pipecat.transports.smallwebrtc.connection import SmallWebRTCConnection
-from pipecat.utils.context import set_current_run_id
-
-router = APIRouter(prefix="/pipecat")
-
-pcs_map: Dict[str, SmallWebRTCConnection] = {}
-ice_servers = ["stun:stun.l.google.com:19302"]
-
-
-class RTCOfferRequest(BaseModel):
-    pc_id: str | None
-    sdp: str
-    type: str
-    workflow_id: int
-    workflow_run_id: int
-    restart_pc: bool = False
-    call_context_vars: dict | None = None
-
-
-@router.post("/rtc-offer")
-async def offer(
-    request: RTCOfferRequest,
-    background_tasks: BackgroundTasks,
-    user: UserModel = Depends(get_user),
-):
-    pc_id = request.pc_id
-
-    if pc_id and pc_id in pcs_map:
-        # Ensure run_id context is available for logs even when reusing an existing PC.
-        set_current_run_id(request.workflow_run_id)
-
-        pipecat_connection = pcs_map[pc_id]
-        logger.info(f"Reusing existing connection for pc_id: {pc_id}")
-        await pipecat_connection.renegotiate(
-            sdp=request.sdp,
-            type=request.type,
-            restart_pc=request.restart_pc,
-        )
-    else:
-        # Set the run_id *before* creating the SmallWebRTCConnection so that all
-        # async tasks and event-handler coroutines spawned inside the
-        # constructor inherit the correct context variable value.  Otherwise the
-        # default ("NA") leaks into the log output produced by those tasks.
-        set_current_run_id(request.workflow_run_id)
-
-        pipecat_connection = SmallWebRTCConnection(ice_servers)
-        await pipecat_connection.initialize(sdp=request.sdp, type=request.type)
-
-        @pipecat_connection.event_handler("closed")
-        async def handle_disconnected(webrtc_connection: SmallWebRTCConnection):
-            logger.info(
-                f"In pipecat connection closed handler. Popping peer connection pc_id: {webrtc_connection.pc_id} from pcs_map"
-            )
-            pcs_map.pop(webrtc_connection.pc_id, None)
-
-        background_tasks.add_task(
-            run_pipeline_smallwebrtc,
-            pipecat_connection,
-            request.workflow_id,
-            request.workflow_run_id,
-            user.id,
-            request.call_context_vars or {},
-        )
-
-    answer = pipecat_connection.get_answer()
-    pcs_map[answer["pc_id"]] = pipecat_connection
-
-    return answer
--- a/api/routes/webrtc_signaling.py
+++ b/api/routes/webrtc_signaling.py
@ -18,11 +18,16 @@ from aiortc import RTCIceServer
 from aiortc.sdp import candidate_from_sdp
 from fastapi import APIRouter, Depends, WebSocket, WebSocketDisconnect
 from loguru import logger
+from starlette.websockets import WebSocketState

 from api.db import db_client
 from api.db.models import UserModel
 from api.services.auth.depends import get_user_ws
 from api.services.pipecat.run_pipeline import run_pipeline_smallwebrtc
+from api.services.pipecat.ws_sender_registry import (
+    register_ws_sender,
+    unregister_ws_sender,
+)
 from api.services.quota_service import check_dograh_quota
 from pipecat.transports.smallwebrtc.connection import SmallWebRTCConnection
 from pipecat.utils.context import set_current_run_id
@ -92,6 +97,9 @@ class SignalingManager:
            # Cleanup
            self._connections.pop(connection_id, None)

+            # Unregister WebSocket sender for real-time feedback
+            unregister_ws_sender(workflow_run_id)
+
            # Clean up all peer connections for this workflow run
            # Note: In a WebSocket-based signaling approach (vs HTTP PATCH),
            # we maintain our own connection map instead of relying on
@ -182,6 +190,13 @@ class SignalingManager:
            # Store peer connection using client's pc_id
            self._peer_connections[pc_id] = pc

+            # Register WebSocket sender for real-time feedback
+            async def ws_sender(message: dict):
+                if ws.application_state == WebSocketState.CONNECTED:
+                    await ws.send_json(message)
+
+            register_ws_sender(workflow_run_id, ws_sender)
+
            # Setup closed handler
            @pc.event_handler("closed")
            async def handle_disconnected(webrtc_connection: SmallWebRTCConnection):
--- a/api/services/pipecat/realtime_feedback_observer.py
+++ b/api/services/pipecat/realtime_feedback_observer.py
@ -0,0 +1,227 @@
+"""Real-time feedback observer for sending pipeline events to the frontend.
+
+This observer watches pipeline frames and sends relevant events (transcriptions,
+bot text) over WebSocket to provide real-time feedback in the UI.
+
+For frames with presentation timestamps (pts), like TTSTextFrame, we respect
+the timing by queuing them and sending at the appropriate time, similar to
+how base_output.py handles timed frames.
+"""
+
+import asyncio
+import time
+from typing import Awaitable, Callable, Optional, Set
+
+from loguru import logger
+
+from pipecat.frames.frames import (
+    CancelFrame,
+    EndFrame,
+    FunctionCallInProgressFrame,
+    FunctionCallResultFrame,
+    InterimTranscriptionFrame,
+    InterruptionFrame,
+    StopFrame,
+    TranscriptionFrame,
+    TTSTextFrame,
+)
+from pipecat.observers.base_observer import BaseObserver, FramePushed
+from pipecat.processors.frame_processor import FrameDirection
+from pipecat.utils.time import nanoseconds_to_seconds
+
+
+class RealtimeFeedbackObserver(BaseObserver):
+    """Observer that sends real-time transcription and bot response events via WebSocket.
+
+    For frames with pts (presentation timestamp), we queue them and send at the
+    appropriate time to sync with audio playback.
+    """
+
+    def __init__(
+        self,
+        ws_sender: Callable[[dict], Awaitable[None]],
+    ):
+        """
+        Args:
+            ws_sender: Async function to send messages over WebSocket.
+                       Expected signature: async def send(message: dict) -> None
+        """
+        super().__init__()
+        self._ws_sender = ws_sender
+        self._frames_seen: Set[str] = set()
+
+        # Clock/timing for pts-based frames (similar to base_output.py)
+        self._clock_queue: Optional[asyncio.PriorityQueue] = None
+        self._clock_task: Optional[asyncio.Task] = None
+        self._clock_start_time: Optional[float] = (
+            None  # Wall clock time when we started
+        )
+        self._pts_start_time: Optional[int] = None  # First pts value we saw
+
+    async def _ensure_clock_task(self):
+        """Create the clock task if it doesn't exist."""
+        if self._clock_queue is None:
+            self._clock_queue = asyncio.PriorityQueue()
+            self._clock_task = asyncio.create_task(self._clock_task_handler())
+
+    async def _cancel_clock_task(self):
+        """Cancel the clock task and clear the queue.
+
+        Called on interruption to discard any pending bot text that
+        hasn't been sent yet.
+        """
+        if self._clock_task:
+            self._clock_task.cancel()
+            try:
+                await self._clock_task
+            except asyncio.CancelledError:
+                pass
+            self._clock_task = None
+        self._clock_queue = None
+        # Reset timing references so next bot response starts fresh
+        self._clock_start_time = None
+        self._pts_start_time = None
+
+    async def _handle_interruption(self):
+        """Handle interruption by clearing queued bot text.
+
+        Similar to base_output.py's handle_interruptions, we cancel the
+        clock task and recreate it to discard pending frames.
+        """
+        await self._cancel_clock_task()
+
+    async def _clock_task_handler(self):
+        """Process timed frames from the queue, respecting their presentation timestamps.
+
+        Similar to base_output.py's _clock_task_handler, we wait until the
+        frame's pts time has arrived before sending.
+        """
+        while True:
+            try:
+                pts, _frame_id, message = await self._clock_queue.get()
+
+                # Calculate when to send based on pts relative to our start time
+                if (
+                    self._clock_start_time is not None
+                    and self._pts_start_time is not None
+                ):
+                    # Target time = start wall time + (frame pts - start pts) in seconds
+                    target_time = self._clock_start_time + nanoseconds_to_seconds(
+                        pts - self._pts_start_time
+                    )
+                    current_time = time.time()
+                    if target_time > current_time:
+                        await asyncio.sleep(target_time - current_time)
+
+                # Send the message
+                await self._send_message(message)
+                self._clock_queue.task_done()
+            except asyncio.CancelledError:
+                break
+            except Exception as e:
+                logger.debug(f"Clock task error: {e}")
+
+    async def on_push_frame(self, data: FramePushed):
+        """Process frames and send relevant ones to the client."""
+        frame = data.frame
+        frame_direction = data.direction
+
+        # Handle pipeline termination - stop clock task
+        if isinstance(frame, (EndFrame, CancelFrame, StopFrame)):
+            await self._cancel_clock_task()
+            return
+
+        # Handle interruptions - clear any queued bot text
+        if isinstance(frame, InterruptionFrame):
+            await self._handle_interruption()
+            return
+
+        # Skip already processed frames (frames can be observed multiple times)
+        if frame.id in self._frames_seen:
+            return
+        self._frames_seen.add(frame.id)
+
+        # Handle user transcriptions (interim)
+        if isinstance(frame, InterimTranscriptionFrame):
+            await self._send_message(
+                {
+                    "type": "rtf-user-transcription",
+                    "payload": {
+                        "text": frame.text,
+                        "final": False,
+                        "user_id": frame.user_id,
+                        "timestamp": frame.timestamp,
+                    },
+                }
+            )
+        # Handle user transcriptions (final)
+        elif isinstance(frame, TranscriptionFrame):
+            await self._send_message(
+                {
+                    "type": "rtf-user-transcription",
+                    "payload": {
+                        "text": frame.text,
+                        "final": True,
+                        "user_id": frame.user_id,
+                        "timestamp": frame.timestamp,
+                    },
+                }
+            )
+        # Handle bot TTS text - respect pts timing
+        elif isinstance(frame, TTSTextFrame):
+            message = {
+                "type": "rtf-bot-text",
+                "payload": {
+                    "text": frame.text,
+                },
+            }
+
+            # If frame has pts, queue it for timed delivery
+            if frame.pts:
+                # Initialize timing reference on first pts frame
+                if self._pts_start_time is None:
+                    self._pts_start_time = frame.pts
+                    self._clock_start_time = time.time()
+
+                await self._ensure_clock_task()
+                await self._clock_queue.put((frame.pts, frame.id, message))
+            else:
+                # No pts, send immediately
+                await self._send_message(message)
+        # Handle function call in progress
+        elif (
+            isinstance(frame, FunctionCallInProgressFrame)
+            and frame_direction == FrameDirection.DOWNSTREAM
+        ):
+            await self._send_message(
+                {
+                    "type": "rtf-function-call-start",
+                    "payload": {
+                        "function_name": frame.function_name,
+                        "tool_call_id": frame.tool_call_id,
+                    },
+                }
+            )
+        # Handle function call result
+        elif (
+            isinstance(frame, FunctionCallResultFrame)
+            and frame_direction == FrameDirection.DOWNSTREAM
+        ):
+            await self._send_message(
+                {
+                    "type": "rtf-function-call-end",
+                    "payload": {
+                        "function_name": frame.function_name,
+                        "tool_call_id": frame.tool_call_id,
+                        "result": str(frame.result) if frame.result else None,
+                    },
+                }
+            )
+
+    async def _send_message(self, message: dict):
+        """Send message via WebSocket, handling errors gracefully."""
+        try:
+            await self._ws_sender(message)
+        except Exception as e:
+            # Log but don't fail - feedback is non-critical
+            logger.debug(f"Failed to send real-time feedback message: {e}")
--- a/api/services/pipecat/run_pipeline.py
+++ b/api/services/pipecat/run_pipeline.py
@ -23,6 +23,7 @@ from api.services.pipecat.pipeline_engine_callbacks_processor import (
    PipelineEngineCallbacksProcessor,
 )
 from api.services.pipecat.pipeline_metrics_aggregator import PipelineMetricsAggregator
+from api.services.pipecat.realtime_feedback_observer import RealtimeFeedbackObserver
 from api.services.pipecat.service_factory import (
    create_llm_service,
    create_stt_service,
@ -38,6 +39,7 @@ from api.services.pipecat.transport_setup import (
    create_vonage_transport,
    create_webrtc_transport,
 )
+from api.services.pipecat.ws_sender_registry import get_ws_sender
 from api.services.telephony.stasis_rtp_connection import StasisRTPConnection
 from api.services.workflow.dto import ReactFlowDTO
 from api.services.workflow.pipecat_engine import PipecatEngine
@ -564,6 +566,12 @@ async def _run_pipeline(
    # Create pipeline task with audio configuration
    task = create_pipeline_task(pipeline, workflow_run_id, audio_config)

+    # Add real-time feedback observer if WebSocket sender is available
+    ws_sender = get_ws_sender(workflow_run_id)
+    if ws_sender:
+        feedback_observer = RealtimeFeedbackObserver(ws_sender=ws_sender)
+        task.add_observer(feedback_observer)
+
    # Now set the task on the engine
    engine.set_task(task)

--- a/api/services/pipecat/ws_sender_registry.py
+++ b/api/services/pipecat/ws_sender_registry.py
@ -0,0 +1,28 @@
+"""Registry to store WebSocket senders by workflow_run_id.
+
+This allows the pipeline observer to send messages back through
+the signaling WebSocket without passing the WebSocket directly.
+"""
+
+from typing import Awaitable, Callable, Dict, Optional
+
+_ws_senders: Dict[int, Callable[[dict], Awaitable[None]]] = {}
+
+
+def register_ws_sender(
+    workflow_run_id: int, sender: Callable[[dict], Awaitable[None]]
+) -> None:
+    """Register a WebSocket sender for a workflow run."""
+    _ws_senders[workflow_run_id] = sender
+
+
+def unregister_ws_sender(workflow_run_id: int) -> None:
+    """Unregister a WebSocket sender for a workflow run."""
+    _ws_senders.pop(workflow_run_id, None)
+
+
+def get_ws_sender(
+    workflow_run_id: int,
+) -> Optional[Callable[[dict], Awaitable[None]]]:
+    """Get the WebSocket sender for a workflow run."""
+    return _ws_senders.get(workflow_run_id)
--- a/ui/src/app/workflow/[workflowId]/run/[runId]/BrowserCall.tsx
+++ b/ui/src/app/workflow/[workflowId]/run/[runId]/BrowserCall.tsx
@ -9,6 +9,7 @@ import {
    ApiKeyErrorDialog,
    AudioControls,
    ConnectionStatus,
+    RealtimeFeedbackPanel,
    WorkflowConfigErrorDialog
 } from "./components";
 import { useWebSocketRTC } from "./hooks";
@ -40,7 +41,8 @@ const BrowserCall = ({ workflowId, workflowRunId, accessToken, initialContextVar
        start,
        stop,
        isStarting,
-        getAudioInputDevices
+        getAudioInputDevices,
+        feedbackMessages,
    } = useWebSocketRTC({ workflowId, workflowRunId, accessToken, initialContextVariables });

    // Poll for recording availability after call ends
@ -93,44 +95,61 @@ const BrowserCall = ({ workflowId, workflowRunId, accessToken, initialContextVar

    return (
        <>
-            <Card className="w-full max-w-4xl mx-auto">
-                <CardHeader>
-                    <CardTitle>Call Voice Agent</CardTitle>
-                </CardHeader>
+            <div className="flex h-full w-full">
+                {/* Main content - 2/3 width when panel visible, full width otherwise */}
+                <div className="w-2/3 h-full">
+                    <div className="flex justify-center items-center h-full px-8">
+                        <Card className="w-full max-w-xl">
+                            <CardHeader>
+                                <CardTitle>Call Voice Agent</CardTitle>
+                            </CardHeader>

-                <CardContent>
-                    {isCompleted && checkingForRecording ? (
-                        <div className="flex flex-col items-center justify-center space-y-4 p-8">
-                            <Loader2 className="h-8 w-8 animate-spin text-primary" />
-                            <div className="text-center space-y-2">
-                                <p className="text-foreground font-medium">Processing your call</p>
-                                <p className="text-sm text-muted-foreground">Fetching transcript and recording...</p>
-                            </div>
-                        </div>
-                    ) : (
-                        <>
-                            <AudioControls
-                                audioInputs={audioInputs}
-                                selectedAudioInput={selectedAudioInput}
-                                setSelectedAudioInput={setSelectedAudioInput}
-                                isCompleted={isCompleted}
-                                connectionActive={connectionActive}
-                                permissionError={permissionError}
-                                start={start}
-                                stop={stop}
-                                isStarting={isStarting}
-                                getAudioInputDevices={getAudioInputDevices}
-                            />
+                            <CardContent>
+                                {isCompleted && checkingForRecording ? (
+                                    <div className="flex flex-col items-center justify-center space-y-4 p-8">
+                                        <Loader2 className="h-8 w-8 animate-spin text-primary" />
+                                        <div className="text-center space-y-2">
+                                            <p className="text-foreground font-medium">Processing your call</p>
+                                            <p className="text-sm text-muted-foreground">Fetching transcript and recording...</p>
+                                        </div>
+                                    </div>
+                                ) : (
+                                    <>
+                                        <AudioControls
+                                            audioInputs={audioInputs}
+                                            selectedAudioInput={selectedAudioInput}
+                                            setSelectedAudioInput={setSelectedAudioInput}
+                                            isCompleted={isCompleted}
+                                            connectionActive={connectionActive}
+                                            permissionError={permissionError}
+                                            start={start}
+                                            stop={stop}
+                                            isStarting={isStarting}
+                                            getAudioInputDevices={getAudioInputDevices}
+                                        />

-                            <ConnectionStatus
-                                connectionStatus={connectionStatus}
-                            />
-                        </>
-                    )}
-                </CardContent>
+                                        <ConnectionStatus
+                                            connectionStatus={connectionStatus}
+                                        />
+                                    </>
+                                )}
+                            </CardContent>

-                <audio ref={audioRef} autoPlay playsInline className="hidden" />
-            </Card>
+                            <audio ref={audioRef} autoPlay playsInline className="hidden" />
+                        </Card>
+                    </div>
+                </div>
+
+                {/* Show transcript panel */}
+                <div className="w-1/3 h-full shrink-0">
+                    <RealtimeFeedbackPanel
+                        messages={feedbackMessages}
+                        isVisible={true}
+                        isCallActive={connectionActive}
+                        isCallCompleted={isCompleted}
+                    />
+                </div>
+            </div>

            <ApiKeyErrorDialog
                open={apiKeyModalOpen}
--- a/ui/src/app/workflow/[workflowId]/run/[runId]/components/RealtimeFeedbackPanel.tsx
+++ b/ui/src/app/workflow/[workflowId]/run/[runId]/components/RealtimeFeedbackPanel.tsx
@ -0,0 +1,152 @@
+"use client";
+
+import { Loader2, MessageSquare, Mic, MicOff, Wrench } from "lucide-react";
+import { useEffect, useRef } from "react";
+
+import { cn } from "@/lib/utils";
+
+import { FeedbackMessage } from "../hooks/useWebSocketRTC";
+
+interface RealtimeFeedbackPanelProps {
+    messages: FeedbackMessage[];
+    isVisible: boolean;
+    isCallActive: boolean;
+    isCallCompleted: boolean;
+}
+
+const MessageItem = ({ msg }: { msg: FeedbackMessage }) => {
+    // Function call message - centered
+    if (msg.type === 'function-call') {
+        return (
+            <div className="flex justify-center">
+                <div className="px-3 py-1.5 rounded-full text-xs bg-amber-500/10 border border-amber-500/20 inline-flex items-center gap-2">
+                    {msg.status === 'running' ? (
+                        <Loader2 className="h-3 w-3 animate-spin text-amber-500" />
+                    ) : (
+                        <Wrench className="h-3 w-3 text-amber-500" />
+                    )}
+                    <span className="font-mono text-amber-700 dark:text-amber-400">
+                        {msg.functionName}()
+                    </span>
+                    {msg.status === 'completed' && (
+                        <span className="text-muted-foreground">✓</span>
+                    )}
+                </div>
+            </div>
+        );
+    }
+
+    const isUser = msg.type === 'user-transcription';
+
+    // User messages on right, bot messages on left
+    return (
+        <div className={cn(
+            "flex",
+            isUser ? "justify-end" : "justify-start"
+        )}>
+            <div
+                className={cn(
+                    "max-w-[85%] px-3 py-2 rounded-2xl text-sm",
+                    isUser
+                        ? "bg-primary text-primary-foreground rounded-br-md"
+                        : "bg-muted rounded-bl-md",
+                    !msg.final && "opacity-70"
+                )}
+            >
+                <div className="whitespace-pre-wrap leading-relaxed">{msg.text}</div>
+                {!msg.final && (
+                    <div className={cn(
+                        "text-[10px] mt-1 italic",
+                        isUser ? "text-primary-foreground/70" : "text-muted-foreground"
+                    )}>
+                        speaking...
+                    </div>
+                )}
+            </div>
+        </div>
+    );
+};
+
+export const RealtimeFeedbackPanel = ({
+    messages,
+    isVisible,
+    isCallActive,
+    isCallCompleted
+}: RealtimeFeedbackPanelProps) => {
+    const scrollRef = useRef<HTMLDivElement>(null);
+
+    // Auto-scroll to bottom when new messages arrive
+    useEffect(() => {
+        if (scrollRef.current) {
+            scrollRef.current.scrollTop = scrollRef.current.scrollHeight;
+        }
+    }, [messages]);
+
+    if (!isVisible) return null;
+
+    return (
+        <div className="w-full h-full flex flex-col bg-background border-l border-border">
+            {/* Header */}
+            <div className="px-4 py-3 border-b border-border shrink-0">
+                <div className="flex items-center justify-center gap-2">
+                    <MessageSquare className="h-4 w-4 text-muted-foreground shrink-0" />
+                    <span className="font-medium text-sm whitespace-nowrap">Live Transcript</span>
+                    <div className={cn(
+                        "flex items-center gap-1 text-xs px-2 py-0.5 rounded-full shrink-0",
+                        isCallActive
+                            ? "bg-green-500/10 text-green-600 dark:text-green-400"
+                            : isCallCompleted
+                                ? "bg-muted text-muted-foreground"
+                                : "bg-muted text-muted-foreground"
+                    )}>
+                        {isCallActive ? (
+                            <>
+                                <Mic className="h-3 w-3" />
+                                <span>Live</span>
+                            </>
+                        ) : isCallCompleted ? (
+                            <>
+                                <MicOff className="h-3 w-3" />
+                                <span>Ended</span>
+                            </>
+                        ) : (
+                            <>
+                                <MicOff className="h-3 w-3" />
+                                <span>Ready</span>
+                            </>
+                        )}
+                    </div>
+                </div>
+            </div>
+
+            {/* Messages */}
+            <div ref={scrollRef} className="flex-1 overflow-y-auto">
+                {messages.length === 0 ? (
+                    <div className="flex flex-col items-center justify-center h-full text-muted-foreground text-sm">
+                        <MessageSquare className="h-10 w-10 mb-4 opacity-30" />
+                        <p className="font-medium">No messages yet</p>
+                        <p className="text-xs mt-1 text-center px-4">
+                            {isCallActive
+                                ? "Start speaking to see the transcript"
+                                : "Start the call to begin the conversation"
+                            }
+                        </p>
+                    </div>
+                ) : (
+                    <div className="space-y-3 p-4">
+                        {messages.map((msg) => (
+                            <MessageItem key={msg.id} msg={msg} />
+                        ))}
+                    </div>
+                )}
+            </div>
+
+            {/* Footer with message count */}
+            {messages.length > 0 && (
+                <div className="px-4 py-2 border-t border-border text-xs text-muted-foreground shrink-0">
+                    {messages.filter(m => m.type !== 'function-call').length} messages
+                </div>
+            )}
+        </div>
+    );
+};
--- a/ui/src/app/workflow/[workflowId]/run/[runId]/components/index.ts
+++ b/ui/src/app/workflow/[workflowId]/run/[runId]/components/index.ts
@ -2,4 +2,5 @@ export * from './ApiKeyErrorDialog';
 export * from './AudioControls';
 export * from './ConnectionStatus';
 export * from './ContextDisplay';
+export * from './RealtimeFeedbackPanel';
 export * from './WorkflowConfigErrorDialog'
--- a/ui/src/app/workflow/[workflowId]/run/[runId]/hooks/index.ts
+++ b/ui/src/app/workflow/[workflowId]/run/[runId]/hooks/index.ts
@ -1,3 +1,2 @@
 export * from './useDeviceInputs';
-export * from './useWebRTC';
 export * from './useWebSocketRTC';
--- a/ui/src/app/workflow/[workflowId]/run/[runId]/hooks/useWebRTC.tsx
+++ b/ui/src/app/workflow/[workflowId]/run/[runId]/hooks/useWebRTC.tsx
@ -1,287 +0,0 @@
-import { useRef, useState } from "react";
-
-import { offerApiV1PipecatRtcOfferPost, validateUserConfigurationsApiV1UserConfigurationsUserValidateGet, validateWorkflowApiV1WorkflowWorkflowIdValidatePost } from "@/client/sdk.gen";
-import { WorkflowValidationError } from "@/components/flow/types";
-import logger from '@/lib/logger';
-
-import { sdpFilterCodec } from "../utils";
-import { useDeviceInputs } from "./useDeviceInputs";
-
-interface UseWebRTCProps {
-    workflowId: number;
-    workflowRunId: number;
-    accessToken: string | null;
-    initialContextVariables?: Record<string, string> | null;
-}
-
-export const useWebRTC = ({ workflowId, workflowRunId, accessToken, initialContextVariables }: UseWebRTCProps) => {
-    const [connectionStatus, setConnectionStatus] = useState<'idle' | 'connecting' | 'connected' | 'failed'>('idle');
-    const [connectionActive, setConnectionActive] = useState(false);
-    const [isCompleted, setIsCompleted] = useState(false);
-    const [apiKeyModalOpen, setApiKeyModalOpen] = useState(false);
-    const [apiKeyError, setApiKeyError] = useState<string | null>(null);
-    const [workflowConfigModalOpen, setWorkflowConfigModalOpen] = useState(false);
-    const [workflowConfigError, setWorkflowConfigError] = useState<string | null>(null);
-    const [isStarting, setIsStarting] = useState(false);
-    // Use initial context variables directly, no UI for editing
-    const initialContext = initialContextVariables || {};
-
-    const {
-        audioInputs,
-        selectedAudioInput,
-        setSelectedAudioInput,
-        permissionError,
-        setPermissionError
-    } = useDeviceInputs();
-
-    const useStun = true;
-    const useAudio = true;
-    const audioCodec = 'default';
-
-    const audioRef = useRef<HTMLAudioElement>(null);
-    const pcRef = useRef<RTCPeerConnection | null>(null);
-    const timeStartRef = useRef<number | null>(null);
-
-    // Generate a cryptographically secure unique ID
-    const generateSecureId = () => {
-        // Use Web Crypto API to generate random bytes
-        const array = new Uint8Array(16);
-        crypto.getRandomValues(array);
-        // Convert to hex string
-        return 'PC-' + Array.from(array)
-            .map(b => b.toString(16).padStart(2, '0'))
-            .join('');
-    };
-
-    const pc_id = generateSecureId();
-
-    const createPeerConnection = () => {
-        const config: RTCConfiguration = {
-            iceServers: useStun ? [{ urls: ['stun:stun.l.google.com:19302'] }] : []
-        };
-
-        const pc = new RTCPeerConnection(config);
-
-        pc.addEventListener('icegatheringstatechange', () => {
-            logger.info(`ICE gathering state changed in createPeerConnection, ${pc.iceGatheringState}`);
-        });
-
-        pc.addEventListener('iceconnectionstatechange', () => {
-            logger.info(`ICE connection state changed: ${pc.iceConnectionState}`);
-            if (pc.iceConnectionState === 'connected' || pc.iceConnectionState === 'completed') {
-                setConnectionStatus('connected');
-            } else if (pc.iceConnectionState === 'failed' || pc.iceConnectionState === 'disconnected') {
-                setConnectionStatus('failed');
-            }
-        });
-
-        pc.addEventListener('track', (evt) => {
-            if (evt.track.kind === 'audio' && audioRef.current) {
-                audioRef.current.srcObject = evt.streams[0];
-            }
-        });
-
-        pcRef.current = pc;
-        return pc;
-    };
-
-    const negotiate = async () => {
-        const pc = pcRef.current;
-        if (!pc) return;
-
-        try {
-            const offer = await pc.createOffer();
-            await pc.setLocalDescription(offer);
-
-            await new Promise<void>((resolve) => {
-                if (pc.iceGatheringState === 'complete') {
-                    resolve();
-                } else {
-                    const checkState = () => {
-                        if (pc.iceGatheringState === 'complete') {
-                            logger.debug(`ICE gathering is complete in negotiate, ${pc.iceGatheringState}`);
-                            pc.removeEventListener('icegatheringstatechange', checkState);
-                            resolve();
-                        }
-                    };
-                    pc.addEventListener('icegatheringstatechange', checkState);
-                }
-            });
-
-            const localDescription = pc.localDescription;
-            if (!localDescription) return;
-
-            let sdp = localDescription.sdp;
-
-            if (audioCodec !== 'default') {
-                sdp = sdpFilterCodec('audio', audioCodec, sdp);
-            }
-
-            if (!accessToken) return;
-
-            const response = await offerApiV1PipecatRtcOfferPost({
-                headers: {
-                    'Authorization': `Bearer ${accessToken}`,
-                },
-                body: {
-                    sdp: sdp,
-                    type: 'offer',
-                    pc_id: pc_id,
-                    restart_pc: false,
-                    workflow_id: workflowId,
-                    workflow_run_id: workflowRunId,
-                    call_context_vars: initialContext
-                }
-            });
-
-            if (response && response.data) {
-                const answerSdpText = typeof response.data === 'object' && 'sdp' in response.data
-                    ? response.data.sdp as string
-                    : '';
-
-                await pc.setRemoteDescription({
-                    type: 'answer',
-                    sdp: answerSdpText
-                });
-                setConnectionActive(true);
-            }
-        } catch (e) {
-            logger.error(`Negotiation failed: ${e}`);
-        }
-    };
-
-    const start = async () => {
-        if (isStarting || !accessToken) return;
-        setIsStarting(true);
-        setConnectionStatus('connecting');
-        try {
-            const response = await validateUserConfigurationsApiV1UserConfigurationsUserValidateGet({
-                headers: {
-                    'Authorization': `Bearer ${accessToken}`,
-                },
-                query: {
-                    validity_ttl_seconds: 86400
-                },
-            });
-            if (response.error) {
-                setApiKeyModalOpen(true);
-                let msg = 'API Key Error';
-                const detail = (response.error as unknown as { detail?: { errors: { model: string; message: string }[] } }).detail;
-                if (Array.isArray(detail)) {
-                    msg = detail
-                        .map((e: { model: string; message: string }) => `${e.model}: ${e.message}`)
-                        .join('\n');
-                }
-                setApiKeyError(msg);
-                return;
-            }
-
-            // Then check workflow validation
-            const workflowResponse = await validateWorkflowApiV1WorkflowWorkflowIdValidatePost({
-                path: {
-                    workflow_id: workflowId,
-                },
-                headers: {
-                    'Authorization': `Bearer ${accessToken}`,
-                },
-            });
-
-            if (workflowResponse.error) {
-                setWorkflowConfigModalOpen(true);
-                let msg = 'Workflow validation failed';
-                const errorDetail = workflowResponse.error as { detail?: { errors: WorkflowValidationError[] } };
-                if (errorDetail?.detail?.errors) {
-                    msg = errorDetail.detail.errors
-                        .map(err => `${err.kind}: ${err.message}`)
-                        .join('\n');
-                }
-                setWorkflowConfigError(msg);
-                return;
-            }
-
-            timeStartRef.current = null;
-            const pc = createPeerConnection();
-
-            const constraints: MediaStreamConstraints = {
-                audio: false,
-            };
-
-            if (useAudio) {
-                const audioConstraints: MediaTrackConstraints = {};
-                if (selectedAudioInput) {
-                    audioConstraints.deviceId = { exact: selectedAudioInput };
-                }
-                constraints.audio = Object.keys(audioConstraints).length ? audioConstraints : true;
-            }
-
-            if (constraints.audio) {
-                try {
-                    const stream = await navigator.mediaDevices.getUserMedia(constraints);
-                    stream.getTracks().forEach((track) => {
-                        pc.addTrack(track, stream);
-                    });
-                    await negotiate();
-                } catch (err) {
-                    logger.error(`Could not acquire media: ${err}`);
-                    setPermissionError('Could not acquire media');
-                    setConnectionStatus('failed');
-                }
-            } else {
-                await negotiate();
-            }
-        } finally {
-            setIsStarting(false);
-        }
-    };
-
-    const stop = () => {
-        setConnectionActive(false);
-        setIsCompleted(true);
-        setConnectionStatus('idle');
-
-        const pc = pcRef.current;
-        if (!pc) return;
-
-        if (pc.getTransceivers) {
-            pc.getTransceivers().forEach((transceiver) => {
-                if (transceiver.stop) {
-                    transceiver.stop();
-                }
-            });
-        }
-
-        pc.getSenders().forEach((sender) => {
-            if (sender.track) {
-                sender.track.stop();
-            }
-        });
-
-        setTimeout(() => {
-            if (pcRef.current) {
-                pcRef.current.close();
-                pcRef.current = null;
-            }
-        }, 500);
-    };
-
-    return {
-        audioRef,
-        audioInputs,
-        selectedAudioInput,
-        setSelectedAudioInput,
-        connectionActive,
-        permissionError,
-        isCompleted,
-        apiKeyModalOpen,
-        setApiKeyModalOpen,
-        apiKeyError,
-        workflowConfigError,
-        workflowConfigModalOpen,
-        setWorkflowConfigModalOpen,
-        connectionStatus,
-        start,
-        stop,
-        isStarting,
-        initialContext
-    };
-};
--- a/ui/src/app/workflow/[workflowId]/run/[runId]/hooks/useWebSocketRTC.tsx
+++ b/ui/src/app/workflow/[workflowId]/run/[runId]/hooks/useWebSocketRTC.tsx
@ -15,6 +15,16 @@ interface UseWebSocketRTCProps {
    initialContextVariables?: Record<string, string> | null;
 }

+export interface FeedbackMessage {
+    id: string;
+    type: 'user-transcription' | 'bot-text' | 'function-call';
+    text: string;
+    final?: boolean;
+    timestamp: string;
+    functionName?: string;
+    status?: 'running' | 'completed';
+}
+
 export const useWebSocketRTC = ({ workflowId, workflowRunId, accessToken, initialContextVariables }: UseWebSocketRTCProps) => {
    const [connectionStatus, setConnectionStatus] = useState<'idle' | 'connecting' | 'connected' | 'failed'>('idle');
    const [connectionActive, setConnectionActive] = useState(false);
@ -24,6 +34,7 @@ export const useWebSocketRTC = ({ workflowId, workflowRunId, accessToken, initia
    const [workflowConfigModalOpen, setWorkflowConfigModalOpen] = useState(false);
    const [workflowConfigError, setWorkflowConfigError] = useState<string | null>(null);
    const [isStarting, setIsStarting] = useState(false);
+    const [feedbackMessages, setFeedbackMessages] = useState<FeedbackMessage[]>([]);
    const initialContext = initialContextVariables || {};

    const {
@ -271,6 +282,105 @@ export const useWebSocketRTC = ({ workflowId, workflowRunId, accessToken, initia
                            }
                            break;

+                        case 'rtf-user-transcription': {
+                            const transcription = message.payload;
+                            setFeedbackMessages(prev => {
+                                // Mark last bot message as final (user started speaking)
+                                const withBotFinalized = prev.map((m, i) =>
+                                    i === prev.length - 1 && m.type === 'bot-text' && !m.final
+                                        ? { ...m, final: true }
+                                        : m
+                                );
+
+                                // For interim transcriptions, replace the last interim
+                                if (!transcription.final) {
+                                    const withoutLastInterim = withBotFinalized.filter(
+                                        m => !(m.type === 'user-transcription' && !m.final)
+                                    );
+                                    return [...withoutLastInterim, {
+                                        id: `user-${Date.now()}`,
+                                        type: 'user-transcription',
+                                        text: transcription.text,
+                                        final: false,
+                                        timestamp: new Date().toISOString(),
+                                    }];
+                                }
+                                // For final transcriptions, replace interim with final
+                                const withoutInterim = withBotFinalized.filter(
+                                    m => !(m.type === 'user-transcription' && !m.final)
+                                );
+                                return [...withoutInterim, {
+                                    id: `user-${Date.now()}`,
+                                    type: 'user-transcription',
+                                    text: transcription.text,
+                                    final: true,
+                                    timestamp: new Date().toISOString(),
+                                }];
+                            });
+                            break;
+                        }
+
+                        case 'rtf-bot-text': {
+                            // TTS text comes as sentences/phrases, concatenate with space
+                            setFeedbackMessages(prev => {
+                                const last = prev[prev.length - 1];
+                                if (last && last.type === 'bot-text' && !last.final) {
+                                    // Append to existing bot message with space if needed
+                                    const existingText = last.text;
+                                    const newText = message.payload.text;
+                                    // Add space between chunks if previous doesn't end with space
+                                    // and new doesn't start with space or punctuation
+                                    const needsSpace = existingText.length > 0 &&
+                                        !existingText.endsWith(' ') &&
+                                        !newText.startsWith(' ') &&
+                                        !/^[.,!?;:]/.test(newText);
+                                    return [
+                                        ...prev.slice(0, -1),
+                                        { ...last, text: existingText + (needsSpace ? ' ' : '') + newText }
+                                    ];
+                                }
+                                // Start new bot message
+                                return [...prev, {
+                                    id: `bot-${Date.now()}`,
+                                    type: 'bot-text',
+                                    text: message.payload.text,
+                                    final: false,
+                                    timestamp: new Date().toISOString(),
+                                }];
+                            });
+                            break;
+                        }
+
+                        case 'rtf-function-call-start': {
+                            const { function_name, tool_call_id } = message.payload;
+                            setFeedbackMessages(prev => {
+                                // Check if we already have this function call
+                                const existingId = `func-${tool_call_id}`;
+                                if (prev.some(msg => msg.id === existingId)) {
+                                    return prev;
+                                }
+                                return [...prev, {
+                                    id: existingId,
+                                    type: 'function-call',
+                                    text: function_name,
+                                    functionName: function_name,
+                                    status: 'running',
+                                    timestamp: new Date().toISOString(),
+                                }];
+                            });
+                            break;
+                        }
+
+                        case 'rtf-function-call-end': {
+                            const { tool_call_id, result } = message.payload;
+                            setFeedbackMessages(prev => prev.map(msg =>
+                                msg.id === `func-${tool_call_id}`
+                                    ? { ...msg, status: 'completed' as const, text: result || msg.text }
+                                    : msg
+                            ));
+                            break;
+                        }
+
                        default:
                            logger.warn('Unknown message type:', message.type);
                    }
@ -505,6 +615,7 @@ export const useWebSocketRTC = ({ workflowId, workflowRunId, accessToken, initia
        stop,
        isStarting,
        initialContext,
-        getAudioInputDevices
+        getAudioInputDevices,
+        feedbackMessages,
    };
 };
--- a/ui/src/client/sdk.gen.ts
+++ b/ui/src/client/sdk.gen.ts
--- a/ui/src/client/types.gen.ts
+++ b/ui/src/client/types.gen.ts
@ -483,18 +483,6 @@ export type PresignedUploadUrlResponse = {
    expires_in: number;
 };

-export type RtcOfferRequest = {
-    pc_id: string | null;
-    sdp: string;
-    type: string;
-    workflow_id: number;
-    workflow_run_id: number;
-    restart_pc?: boolean;
-    call_context_vars?: {
-        [key: string]: unknown;
-    } | null;
-};
-
 export type S3SignedUrlResponse = {
    url: string;
    expires_in: number;
@ -1260,37 +1248,6 @@ export type HandleInboundFallbackApiV1TelephonyInboundFallbackPostResponses = {
    200: unknown;
 };

-export type OfferApiV1PipecatRtcOfferPostData = {
-    body: RtcOfferRequest;
-    headers?: {
-        authorization?: string | null;
-        'X-API-Key'?: string | null;
-    };
-    path?: never;
-    query?: never;
-    url: '/api/v1/pipecat/rtc-offer';
-};
-
-export type OfferApiV1PipecatRtcOfferPostErrors = {
-    /**
-     * Not found
-     */
-    404: unknown;
-    /**
-     * Validation Error
-     */
-    422: HttpValidationError;
-};
-
-export type OfferApiV1PipecatRtcOfferPostError = OfferApiV1PipecatRtcOfferPostErrors[keyof OfferApiV1PipecatRtcOfferPostErrors];
-
-export type OfferApiV1PipecatRtcOfferPostResponses = {
-    /**
-     * Successful Response
-     */
-    200: unknown;
-};
-
 export type ImpersonateApiV1SuperuserImpersonatePostData = {
    body: ImpersonateRequest;
    headers?: {