2026-01-13 22:48:18 +05:30
|
|
|
"""Real-time feedback observer for sending pipeline events to the frontend.
|
|
|
|
|
|
|
|
|
|
This observer watches pipeline frames and sends relevant events (transcriptions,
|
2026-01-15 16:17:17 +05:30
|
|
|
bot text, function calls, TTFB metrics) over WebSocket to provide real-time
|
|
|
|
|
feedback in the UI.
|
2026-01-13 22:48:18 +05:30
|
|
|
|
2026-05-31 16:05:03 +05:30
|
|
|
For TTS text, we wait until the frame has passed through BaseOutputTransport.
|
|
|
|
|
That transport already applies presentation timestamp timing against audio
|
|
|
|
|
playback, so the UI text is emitted from the same clock as the spoken audio.
|
2026-01-15 16:17:17 +05:30
|
|
|
|
2026-02-21 14:21:39 +05:30
|
|
|
Streaming vs. persisted data:
|
|
|
|
|
- WebSocket receives all events in real-time (interim transcriptions, TTS text
|
|
|
|
|
chunks, function calls, metrics) for live UI feedback.
|
|
|
|
|
- The logs buffer only stores final complete transcripts per turn (via
|
|
|
|
|
register_turn_handlers hooking into aggregator events), function calls,
|
|
|
|
|
and metrics — not interim/streaming data.
|
|
|
|
|
|
2026-01-15 16:17:17 +05:30
|
|
|
Note: Node transition events are sent directly from PipecatEngine.set_node()
|
|
|
|
|
rather than being observed here, to ensure precise timing at the moment of
|
|
|
|
|
node changes.
|
2026-01-13 22:48:18 +05:30
|
|
|
"""
|
|
|
|
|
|
2026-05-02 17:37:48 +05:30
|
|
|
import json
|
2026-01-15 16:17:17 +05:30
|
|
|
from typing import TYPE_CHECKING, Awaitable, Callable, Optional, Set
|
2026-01-13 22:48:18 +05:30
|
|
|
|
|
|
|
|
from loguru import logger
|
|
|
|
|
|
2026-05-21 15:20:02 +05:30
|
|
|
from api.services.pipecat.realtime_feedback_events import (
|
|
|
|
|
build_bot_text_event,
|
|
|
|
|
build_function_call_end_event,
|
|
|
|
|
build_function_call_start_event,
|
|
|
|
|
build_pipeline_error_event,
|
|
|
|
|
build_ttfb_metric_event,
|
|
|
|
|
build_user_transcription_event,
|
|
|
|
|
)
|
|
|
|
|
|
2026-01-15 16:17:17 +05:30
|
|
|
if TYPE_CHECKING:
|
|
|
|
|
from api.services.pipecat.in_memory_buffers import InMemoryLogsBuffer
|
|
|
|
|
|
2026-01-13 22:48:18 +05:30
|
|
|
from pipecat.frames.frames import (
|
2026-03-21 13:55:34 +05:30
|
|
|
BotStartedSpeakingFrame,
|
|
|
|
|
BotStoppedSpeakingFrame,
|
2026-01-13 22:48:18 +05:30
|
|
|
CancelFrame,
|
|
|
|
|
EndFrame,
|
2026-03-05 14:51:39 +05:30
|
|
|
ErrorFrame,
|
2026-01-13 22:48:18 +05:30
|
|
|
FunctionCallInProgressFrame,
|
|
|
|
|
FunctionCallResultFrame,
|
|
|
|
|
InterimTranscriptionFrame,
|
|
|
|
|
InterruptionFrame,
|
2026-01-15 16:17:17 +05:30
|
|
|
MetricsFrame,
|
2026-01-13 22:48:18 +05:30
|
|
|
StopFrame,
|
|
|
|
|
TranscriptionFrame,
|
2026-04-21 07:56:16 +05:30
|
|
|
TTSSpeakFrame,
|
2026-03-21 13:55:34 +05:30
|
|
|
TTSTextFrame,
|
|
|
|
|
UserMuteStartedFrame,
|
|
|
|
|
UserMuteStoppedFrame,
|
2026-01-13 22:48:18 +05:30
|
|
|
)
|
2026-01-15 16:17:17 +05:30
|
|
|
from pipecat.metrics.metrics import TTFBMetricsData
|
2026-01-13 22:48:18 +05:30
|
|
|
from pipecat.observers.base_observer import BaseObserver, FramePushed
|
|
|
|
|
from pipecat.processors.frame_processor import FrameDirection
|
2026-05-31 16:05:03 +05:30
|
|
|
from pipecat.transports.base_output import BaseOutputTransport
|
2026-02-21 14:21:39 +05:30
|
|
|
from pipecat.utils.enums import RealtimeFeedbackType
|
2026-01-13 22:48:18 +05:30
|
|
|
|
|
|
|
|
|
|
|
|
|
class RealtimeFeedbackObserver(BaseObserver):
|
2026-02-21 14:21:39 +05:30
|
|
|
"""Observer that sends real-time events via WebSocket and persists final transcripts.
|
2026-01-15 16:17:17 +05:30
|
|
|
|
2026-02-21 14:21:39 +05:30
|
|
|
WebSocket streaming (all events for live UI):
|
2026-01-15 16:17:17 +05:30
|
|
|
- User transcriptions (interim and final)
|
2026-05-31 16:05:03 +05:30
|
|
|
- Bot TTS text after output transport timing
|
2026-01-15 16:17:17 +05:30
|
|
|
- Function calls (start/end)
|
2026-02-21 14:21:39 +05:30
|
|
|
- TTFB metrics (LLM generation time only)
|
|
|
|
|
|
|
|
|
|
Logs buffer persistence (only final data for post-call analysis):
|
|
|
|
|
- Complete user transcripts per turn (via on_user_turn_stopped)
|
|
|
|
|
- Complete assistant transcripts per turn (via on_assistant_turn_stopped)
|
|
|
|
|
- Function calls and TTFB metrics
|
2026-01-13 22:48:18 +05:30
|
|
|
|
2026-01-15 16:17:17 +05:30
|
|
|
Note: Node transitions are handled by PipecatEngine.set_node() callback.
|
2026-01-13 22:48:18 +05:30
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
def __init__(
|
|
|
|
|
self,
|
|
|
|
|
ws_sender: Callable[[dict], Awaitable[None]],
|
2026-01-15 16:17:17 +05:30
|
|
|
logs_buffer: Optional["InMemoryLogsBuffer"] = None,
|
2026-01-13 22:48:18 +05:30
|
|
|
):
|
|
|
|
|
"""
|
|
|
|
|
Args:
|
|
|
|
|
ws_sender: Async function to send messages over WebSocket.
|
|
|
|
|
Expected signature: async def send(message: dict) -> None
|
2026-01-15 16:17:17 +05:30
|
|
|
logs_buffer: Optional InMemoryLogsBuffer to persist events for post-call analysis.
|
2026-01-13 22:48:18 +05:30
|
|
|
"""
|
|
|
|
|
super().__init__()
|
|
|
|
|
self._ws_sender = ws_sender
|
2026-01-15 16:17:17 +05:30
|
|
|
self._logs_buffer = logs_buffer
|
2026-01-13 22:48:18 +05:30
|
|
|
self._frames_seen: Set[str] = set()
|
|
|
|
|
|
2026-03-25 22:44:38 +05:30
|
|
|
async def cleanup(self):
|
|
|
|
|
"""Clean up resources. Must be called when the observer is no longer needed."""
|
2026-05-31 16:05:03 +05:30
|
|
|
pass
|
2026-01-13 22:48:18 +05:30
|
|
|
|
|
|
|
|
async def on_push_frame(self, data: FramePushed):
|
|
|
|
|
"""Process frames and send relevant ones to the client."""
|
|
|
|
|
frame = data.frame
|
|
|
|
|
frame_direction = data.direction
|
2026-05-31 16:05:03 +05:30
|
|
|
source = data.source
|
2026-01-13 22:48:18 +05:30
|
|
|
|
2026-05-02 17:37:48 +05:30
|
|
|
# Skip already processed frames (frames can be observed multiple times).
|
|
|
|
|
# ErrorFrames are accepted in either direction — push_error() emits them
|
2026-05-31 16:05:03 +05:30
|
|
|
# UPSTREAM, and we still want to surface them to the UI. Upstream-only
|
|
|
|
|
# transcription frames are accepted too: upstream Gemini Live emits user
|
|
|
|
|
# transcripts toward the user aggregator, not downstream. Broadcast
|
|
|
|
|
# transcription siblings are still handled only on the downstream copy to
|
|
|
|
|
# avoid duplicate live UI messages.
|
2026-05-02 17:37:48 +05:30
|
|
|
if frame.id in self._frames_seen:
|
|
|
|
|
return
|
2026-05-31 16:05:03 +05:30
|
|
|
if frame_direction != FrameDirection.DOWNSTREAM:
|
|
|
|
|
is_upstream_transcription = (
|
|
|
|
|
isinstance(frame, (InterimTranscriptionFrame, TranscriptionFrame))
|
|
|
|
|
and frame.broadcast_sibling_id is None
|
|
|
|
|
)
|
|
|
|
|
if not isinstance(frame, ErrorFrame) and not is_upstream_transcription:
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
# TTSTextFrame may be observed before the output transport has applied
|
|
|
|
|
# its audio clock. Match RTVIObserver: leave the frame unmarked so the
|
|
|
|
|
# transport-pushed copy can be handled with playback timing already done.
|
|
|
|
|
if isinstance(frame, TTSTextFrame) and not isinstance(
|
|
|
|
|
source, BaseOutputTransport
|
2026-04-10 16:18:01 +05:30
|
|
|
):
|
2026-03-25 15:01:39 +05:30
|
|
|
return
|
2026-05-31 16:05:03 +05:30
|
|
|
|
2026-03-25 15:01:39 +05:30
|
|
|
self._frames_seen.add(frame.id)
|
|
|
|
|
|
2026-01-15 16:17:17 +05:30
|
|
|
logger.trace(f"{self} Received Frame: {frame} Direction: {frame_direction}")
|
|
|
|
|
|
2026-05-31 16:05:03 +05:30
|
|
|
if isinstance(frame, (EndFrame, CancelFrame, StopFrame, InterruptionFrame)):
|
|
|
|
|
return
|
2026-03-21 13:55:34 +05:30
|
|
|
# Bot speaking state - WS only (ephemeral state signals, not persisted)
|
2026-03-25 15:01:39 +05:30
|
|
|
elif isinstance(frame, BotStartedSpeakingFrame):
|
2026-03-21 13:55:34 +05:30
|
|
|
await self._send_ws(
|
|
|
|
|
{"type": RealtimeFeedbackType.BOT_STARTED_SPEAKING.value, "payload": {}}
|
|
|
|
|
)
|
2026-03-25 15:01:39 +05:30
|
|
|
elif isinstance(frame, BotStoppedSpeakingFrame):
|
2026-03-21 13:55:34 +05:30
|
|
|
await self._send_ws(
|
|
|
|
|
{"type": RealtimeFeedbackType.BOT_STOPPED_SPEAKING.value, "payload": {}}
|
|
|
|
|
)
|
|
|
|
|
# User mute state - WS only (ephemeral state signals, not persisted)
|
2026-03-25 15:01:39 +05:30
|
|
|
elif isinstance(frame, UserMuteStartedFrame):
|
2026-03-21 13:55:34 +05:30
|
|
|
await self._send_ws(
|
|
|
|
|
{"type": RealtimeFeedbackType.USER_MUTE_STARTED.value, "payload": {}}
|
|
|
|
|
)
|
2026-03-25 15:01:39 +05:30
|
|
|
elif isinstance(frame, UserMuteStoppedFrame):
|
2026-03-21 13:55:34 +05:30
|
|
|
await self._send_ws(
|
|
|
|
|
{"type": RealtimeFeedbackType.USER_MUTE_STOPPED.value, "payload": {}}
|
|
|
|
|
)
|
2026-02-21 14:21:39 +05:30
|
|
|
# Handle user transcriptions (interim) - WebSocket only
|
2026-03-25 15:01:39 +05:30
|
|
|
elif isinstance(frame, InterimTranscriptionFrame):
|
2026-02-21 14:21:39 +05:30
|
|
|
await self._send_ws(
|
2026-05-21 15:20:02 +05:30
|
|
|
build_user_transcription_event(
|
|
|
|
|
text=frame.text,
|
|
|
|
|
final=False,
|
|
|
|
|
user_id=frame.user_id,
|
|
|
|
|
timestamp=frame.timestamp,
|
|
|
|
|
)
|
2026-01-13 22:48:18 +05:30
|
|
|
)
|
2026-02-21 14:21:39 +05:30
|
|
|
# Handle user transcriptions (final) - WebSocket only
|
|
|
|
|
# Complete turn text is persisted via register_turn_handlers
|
2026-01-13 22:48:18 +05:30
|
|
|
elif isinstance(frame, TranscriptionFrame):
|
2026-02-21 14:21:39 +05:30
|
|
|
await self._send_ws(
|
2026-05-21 15:20:02 +05:30
|
|
|
build_user_transcription_event(
|
|
|
|
|
text=frame.text,
|
|
|
|
|
final=True,
|
|
|
|
|
user_id=frame.user_id,
|
|
|
|
|
timestamp=frame.timestamp,
|
|
|
|
|
)
|
2026-01-13 22:48:18 +05:30
|
|
|
)
|
2026-04-21 07:56:16 +05:30
|
|
|
# Handle engine-queued speech (transition/tool messages) marked for
|
|
|
|
|
# log persistence. The downstream TTSTextFrame(s) from the TTS service
|
|
|
|
|
# still stream to WS as normal; we persist the full utterance once here
|
|
|
|
|
# to avoid word-level log entries from word-timestamp providers.
|
|
|
|
|
elif isinstance(frame, TTSSpeakFrame):
|
|
|
|
|
if getattr(frame, "persist_to_logs", False):
|
2026-05-21 15:20:02 +05:30
|
|
|
await self._append_to_buffer(build_bot_text_event(text=frame.text))
|
2026-05-31 16:05:03 +05:30
|
|
|
# Handle bot TTS text after output transport timing, WebSocket only
|
2026-04-21 07:56:16 +05:30
|
|
|
# Complete turn text is persisted via register_turn_handlers,
|
|
|
|
|
# except for frames explicitly flagged persist_to_logs (e.g. recording
|
|
|
|
|
# transcripts from play_audio) which bypass the aggregator path.
|
2026-03-21 13:55:34 +05:30
|
|
|
elif isinstance(frame, TTSTextFrame):
|
2026-05-21 15:20:02 +05:30
|
|
|
message = build_bot_text_event(text=frame.text)
|
2026-01-13 22:48:18 +05:30
|
|
|
|
2026-05-31 16:05:03 +05:30
|
|
|
if getattr(frame, "persist_to_logs", False):
|
2026-04-21 07:56:16 +05:30
|
|
|
await self._send_message(message)
|
2026-01-13 22:48:18 +05:30
|
|
|
else:
|
2026-02-21 14:21:39 +05:30
|
|
|
await self._send_ws(message)
|
2026-01-13 22:48:18 +05:30
|
|
|
# Handle function call in progress
|
|
|
|
|
elif (
|
|
|
|
|
isinstance(frame, FunctionCallInProgressFrame)
|
|
|
|
|
and frame_direction == FrameDirection.DOWNSTREAM
|
|
|
|
|
):
|
|
|
|
|
await self._send_message(
|
2026-05-21 15:20:02 +05:30
|
|
|
build_function_call_start_event(
|
|
|
|
|
function_name=frame.function_name,
|
|
|
|
|
tool_call_id=frame.tool_call_id,
|
|
|
|
|
arguments=dict(frame.arguments or {}),
|
|
|
|
|
)
|
2026-01-13 22:48:18 +05:30
|
|
|
)
|
|
|
|
|
# Handle function call result
|
|
|
|
|
elif (
|
|
|
|
|
isinstance(frame, FunctionCallResultFrame)
|
|
|
|
|
and frame_direction == FrameDirection.DOWNSTREAM
|
|
|
|
|
):
|
|
|
|
|
await self._send_message(
|
2026-05-21 15:20:02 +05:30
|
|
|
build_function_call_end_event(
|
|
|
|
|
function_name=frame.function_name,
|
|
|
|
|
tool_call_id=frame.tool_call_id,
|
|
|
|
|
result=frame.result,
|
|
|
|
|
)
|
2026-01-13 22:48:18 +05:30
|
|
|
)
|
2026-01-15 16:17:17 +05:30
|
|
|
# Handle TTFB metrics - capture LLM generation time only
|
|
|
|
|
elif isinstance(frame, MetricsFrame):
|
|
|
|
|
# Check if this MetricsFrame contains TTFB data from an LLM processor
|
|
|
|
|
for metric_data in frame.data:
|
|
|
|
|
if isinstance(metric_data, TTFBMetricsData):
|
|
|
|
|
# Only send TTFB if it's from an LLM processor
|
|
|
|
|
if metric_data.processor and "LLM" in metric_data.processor:
|
|
|
|
|
await self._send_message(
|
2026-05-21 15:20:02 +05:30
|
|
|
build_ttfb_metric_event(
|
|
|
|
|
ttfb_seconds=metric_data.value,
|
|
|
|
|
processor=metric_data.processor,
|
|
|
|
|
model=metric_data.model,
|
|
|
|
|
)
|
2026-01-15 16:17:17 +05:30
|
|
|
)
|
2026-03-05 14:51:39 +05:30
|
|
|
# Handle pipeline errors
|
|
|
|
|
elif isinstance(frame, ErrorFrame):
|
|
|
|
|
processor_name = str(frame.processor) if frame.processor else None
|
2026-05-21 15:20:02 +05:30
|
|
|
extra_payload: dict[str, object] = {}
|
2026-05-02 17:37:48 +05:30
|
|
|
# Surface structured fields when the underlying exception carries
|
|
|
|
|
# them (e.g. google.genai APIError: code=1008, status=None,
|
|
|
|
|
# message="Your project has been denied access...").
|
|
|
|
|
exc = frame.exception
|
|
|
|
|
if exc is not None:
|
|
|
|
|
exc_type = type(exc).__name__
|
2026-05-21 15:20:02 +05:30
|
|
|
extra_payload["exception_type"] = exc_type
|
|
|
|
|
extra_payload["exception_message"] = str(exc)
|
2026-05-02 17:37:48 +05:30
|
|
|
for attr in ("code", "status", "message", "details"):
|
|
|
|
|
value = getattr(exc, attr, None)
|
2026-05-21 15:20:02 +05:30
|
|
|
if value is None or attr in extra_payload:
|
2026-05-02 17:37:48 +05:30
|
|
|
continue
|
|
|
|
|
try:
|
|
|
|
|
# Ensure the value is JSON-serializable; fall back
|
|
|
|
|
# to str() for opaque objects (e.g. raw response).
|
|
|
|
|
json.dumps(value)
|
2026-05-21 15:20:02 +05:30
|
|
|
extra_payload[attr] = value
|
2026-05-02 17:37:48 +05:30
|
|
|
except (TypeError, ValueError):
|
2026-05-21 15:20:02 +05:30
|
|
|
extra_payload[attr] = str(value)
|
2026-03-05 14:51:39 +05:30
|
|
|
await self._send_message(
|
2026-05-21 15:20:02 +05:30
|
|
|
build_pipeline_error_event(
|
|
|
|
|
error=frame.error,
|
|
|
|
|
fatal=frame.fatal,
|
|
|
|
|
processor=processor_name,
|
|
|
|
|
extra_payload=extra_payload or None,
|
|
|
|
|
)
|
2026-03-05 14:51:39 +05:30
|
|
|
)
|
2026-01-13 22:48:18 +05:30
|
|
|
|
2026-02-21 14:21:39 +05:30
|
|
|
async def _send_ws(self, message: dict):
|
|
|
|
|
"""Send message via WebSocket only, handling errors gracefully."""
|
|
|
|
|
if not self._ws_sender:
|
|
|
|
|
return
|
2026-01-13 22:48:18 +05:30
|
|
|
try:
|
2026-02-21 14:21:39 +05:30
|
|
|
# Inject current node info from the logs buffer
|
|
|
|
|
if self._logs_buffer and self._logs_buffer.current_node_id:
|
|
|
|
|
message = {
|
|
|
|
|
**message,
|
|
|
|
|
"node_id": self._logs_buffer.current_node_id,
|
|
|
|
|
"node_name": self._logs_buffer.current_node_name,
|
|
|
|
|
}
|
2026-01-13 22:48:18 +05:30
|
|
|
await self._ws_sender(message)
|
|
|
|
|
except Exception as e:
|
|
|
|
|
logger.debug(f"Failed to send real-time feedback message: {e}")
|
2026-01-15 16:17:17 +05:30
|
|
|
|
2026-02-21 14:21:39 +05:30
|
|
|
async def _send_message(self, message: dict):
|
|
|
|
|
"""Send message via WebSocket AND append to logs buffer."""
|
|
|
|
|
await self._send_ws(message)
|
|
|
|
|
await self._append_to_buffer(message)
|
|
|
|
|
|
|
|
|
|
async def _append_to_buffer(self, message: dict):
|
|
|
|
|
"""Append message to logs buffer, handling errors gracefully."""
|
2026-01-15 16:17:17 +05:30
|
|
|
if self._logs_buffer:
|
|
|
|
|
try:
|
|
|
|
|
await self._logs_buffer.append(message)
|
|
|
|
|
except Exception as e:
|
|
|
|
|
logger.error(f"Failed to append to logs buffer: {e}")
|
2026-02-21 14:21:39 +05:30
|
|
|
|
|
|
|
|
|
|
|
|
|
def register_turn_log_handlers(
|
|
|
|
|
logs_buffer: "InMemoryLogsBuffer",
|
|
|
|
|
user_aggregator,
|
|
|
|
|
assistant_aggregator,
|
|
|
|
|
):
|
|
|
|
|
"""Register event handlers on aggregators to persist final turn transcripts.
|
|
|
|
|
|
|
|
|
|
Hooks into on_user_turn_stopped and on_assistant_turn_stopped to store
|
|
|
|
|
complete turn text in the logs buffer. Works for both WebRTC and telephony
|
|
|
|
|
calls — independent of WebSocket availability.
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
@user_aggregator.event_handler("on_user_turn_stopped")
|
|
|
|
|
async def on_user_turn_stopped(aggregator, strategy, message):
|
|
|
|
|
logs_buffer.increment_turn()
|
|
|
|
|
try:
|
|
|
|
|
await logs_buffer.append(
|
2026-05-21 15:20:02 +05:30
|
|
|
build_user_transcription_event(
|
|
|
|
|
text=message.content,
|
|
|
|
|
final=True,
|
|
|
|
|
timestamp=message.timestamp,
|
|
|
|
|
)
|
2026-02-21 14:21:39 +05:30
|
|
|
)
|
|
|
|
|
except Exception as e:
|
|
|
|
|
logger.error(f"Failed to append user turn to logs buffer: {e}")
|
|
|
|
|
|
|
|
|
|
@assistant_aggregator.event_handler("on_assistant_turn_stopped")
|
|
|
|
|
async def on_assistant_turn_stopped(aggregator, message):
|
|
|
|
|
if message.content:
|
|
|
|
|
try:
|
|
|
|
|
await logs_buffer.append(
|
2026-05-21 15:20:02 +05:30
|
|
|
build_bot_text_event(
|
|
|
|
|
text=message.content,
|
|
|
|
|
timestamp=message.timestamp,
|
|
|
|
|
)
|
2026-02-21 14:21:39 +05:30
|
|
|
)
|
|
|
|
|
except Exception as e:
|
|
|
|
|
logger.error(f"Failed to append assistant turn to logs buffer: {e}")
|