feat: add chat based testing for voice agent (#308)

* feat: add backend foundations * feat: add text chat UI * chore: simplify the reload behaviour * fix: fix upgrade banner to be triggered after package upload * feat: simplify TesterPanel design * chore: fix formatting and generate client * chore: fix tracing for text chat mode * fix: fix revert and edit CTA * refactor: refactor TesterPanel into smaller components * feat: enable runtime transition of nodes * fix: fix review comments
2026-07-01 08:59:46 +02:00 · 2026-05-21 15:20:02 +05:30 · 2026-05-21 15:20:02 +05:30 · d97d1d72cd
commit d97d1d72cd
parent 67479e98fd
96 changed files with 7630 additions and 1684 deletions
--- a/api/services/pipecat/event_handlers.py
+++ b/api/services/pipecat/event_handlers.py
@ -7,7 +7,7 @@ from api.enums import PostHogEvent, WorkflowRunState
 from api.services.campaign.circuit_breaker import circuit_breaker
 from api.services.integrations import IntegrationRuntimeSession
 from api.services.pipecat.audio_config import AudioConfig
-from api.services.pipecat.audio_playback import play_audio, play_audio_loop
+from api.services.pipecat.audio_playback import play_audio_loop
 from api.services.pipecat.in_memory_buffers import (
    InMemoryAudioBuffer,
    InMemoryLogsBuffer,
@ -20,8 +20,6 @@ from api.tasks.arq import enqueue_job
 from api.tasks.function_names import FunctionNames
 from pipecat.frames.frames import (
    Frame,
-    LLMContextFrame,
-    TTSSpeakFrame,
 )
 from pipecat.pipeline.task import PipelineTask
 from pipecat.processors.audio.audio_buffer_processor import AudioBufferProcessor
@ -69,7 +67,6 @@ def register_event_handlers(
    pipeline_metrics_aggregator: PipelineMetricsAggregator,
    audio_config=AudioConfig,
    pre_call_fetch_task: asyncio.Task | None = None,
-    fetch_recording_audio=None,
    user_provider_id: str | None = None,
    integration_runtime_sessions: list[IntegrationRuntimeSession] | None = None,
 ):
@ -99,20 +96,11 @@ def register_event_handlers(
        "initial_response_triggered": False,
    }

-    async def queue_initial_llm_context():
-        # Queue LLMContextFrame after the VoicemailDetector since the detector
-        # gates LLMContextFrames until voicemail detection completes. We also
-        # don't want to trigger the Voicemail LLM with this initial frame.
-        await engine.llm.queue_frame(LLMContextFrame(engine.context))
-
    async def maybe_trigger_initial_response():
        """Start the conversation after both pipeline_started and client_connected events.

        If a pre-call fetch is in progress, plays a ringer while waiting for the
        response, then merges the result into the call context before proceeding.
-
-        If the start node has a greeting configured, play it directly via TTS.
-        Otherwise, trigger an LLM generation for the opening message.
        """
        if (
            ready_state["pipeline_started"]
@ -167,46 +155,11 @@ def register_event_handlers(
            # Set the start node now (after pre-call fetch data is merged)
            # so that render_template() has the complete _call_context_vars.
            await engine.set_node(engine.workflow.start_node_id)
-
-            greeting_info = engine.get_start_greeting()
-            if greeting_info:
-                greeting_type, greeting_value = greeting_info
-                if (
-                    greeting_type == "audio"
-                    and greeting_value
-                    and fetch_recording_audio
-                ):
-                    logger.debug(f"Playing audio greeting recording: {greeting_value}")
-                    result = await fetch_recording_audio(
-                        recording_pk=int(greeting_value)
-                    )
-                    if result:
-                        await play_audio(
-                            result.audio,
-                            sample_rate=audio_config.pipeline_sample_rate or 16000,
-                            queue_frame=transport.output().queue_frame,
-                            transcript=result.transcript,
-                            append_to_context=True,
-                        )
-                    else:
-                        logger.warning(
-                            f"Failed to fetch audio greeting {greeting_value}, "
-                            "falling back to LLM generation"
-                        )
-                        await queue_initial_llm_context()
-                else:
-                    logger.debug("Playing text greeting via TTS")
-                    # append_to_context=True so the assistant aggregator commits
-                    # the greeting to the LLM context once TTS finishes; without
-                    # it the LLM would re-greet on its first generation.
-                    await task.queue_frame(
-                        TTSSpeakFrame(greeting_value, append_to_context=True)
-                    )
-            else:
-                logger.debug(
-                    "Both pipeline_started and client_connected received - triggering initial LLM generation"
-                )
-                await queue_initial_llm_context()
+            await engine.queue_node_opening(
+                node_id=engine.workflow.start_node_id,
+                previous_node_id=None,
+                generate_if_no_greeting=True,
+            )

    @transport.event_handler("on_client_connected")
    async def on_client_connected(_transport, _participant):
--- a/api/services/pipecat/in_memory_buffers.py
+++ b/api/services/pipecat/in_memory_buffers.py
@ -6,6 +6,10 @@ from typing import List, Optional

 from loguru import logger

+from api.services.pipecat.realtime_feedback_events import (
+    realtime_feedback_event_sort_key,
+    stamp_realtime_feedback_event,
+)
 from api.utils.transcript import generate_transcript_text as _generate_transcript_text
 from pipecat.utils.enums import RealtimeFeedbackType

@ -98,16 +102,13 @@ class InMemoryLogsBuffer:

    async def append(self, event: dict):
        """Append a feedback event to the buffer with timestamp and current node."""
-        # Add timestamp, turn tracking, and current node
-        timestamped_event = {
-            **event,
-            "timestamp": datetime.now(UTC).isoformat(),
-            "turn": self._turn_counter,
-        }
-        if self._current_node_id:
-            timestamped_event["node_id"] = self._current_node_id
-        if self._current_node_name:
-            timestamped_event["node_name"] = self._current_node_name
+        timestamped_event = stamp_realtime_feedback_event(
+            event,
+            timestamp=datetime.now(UTC).isoformat(),
+            turn=self._turn_counter,
+            node_id=self._current_node_id,
+            node_name=self._current_node_name,
+        )
        self._events.append(timestamped_event)
        logger.trace(
            f"Appended event {event.get('type')} to logs buffer for workflow {self._workflow_run_id}"
@ -120,17 +121,12 @@ class InMemoryLogsBuffer:
            f"Incremented turn counter to {self._turn_counter} for workflow {self._workflow_run_id}"
        )

-    @staticmethod
-    def _event_sort_key(event: dict) -> str:
-        payload_ts = event.get("payload", {}).get("timestamp")
-        return payload_ts or event.get("timestamp", "")
-
    def _sorted_events(self) -> List[dict]:
        # Stable sort by the realtime (payload) timestamp when available, falling
        # back to the buffer-append timestamp. Python's sort is stable, so events
        # sharing a key retain their original insertion order — this keeps
        # consecutive bot-text chunks of a single turn contiguous.
-        return sorted(self._events, key=self._event_sort_key)
+        return sorted(self._events, key=realtime_feedback_event_sort_key)

    def get_events(self) -> List[dict]:
        """Get all events for final storage, ordered by realtime timestamp."""
--- a/api/services/pipecat/pipeline_builder.py
+++ b/api/services/pipecat/pipeline_builder.py
@ -152,8 +152,30 @@ def build_realtime_pipeline(
    return Pipeline(processors)


-def create_pipeline_task(pipeline, workflow_run_id, audio_config: AudioConfig = None):
-    """Create a pipeline task with appropriate parameters"""
+def create_pipeline_task(
+    pipeline,
+    workflow_run_id,
+    audio_config: AudioConfig = None,
+    *,
+    conversation_parent_context=None,
+    conversation_type: str = "voice",
+    additional_span_attributes: dict | None = None,
+):
+    """Create a pipeline task with appropriate parameters.
+
+    Args:
+        pipeline: The pipeline to run.
+        workflow_run_id: Run id, used as the conversation id.
+        audio_config: Optional audio configuration.
+        conversation_parent_context: Optional OTEL context carrying a fixed
+            trace id. When provided, the conversation span attaches to that
+            trace instead of starting a new root trace (used by text chat to
+            stitch every per-turn pipeline into one trace).
+        conversation_type: ``conversation.type`` span attribute value.
+        additional_span_attributes: Extra attributes set on the conversation
+            span (e.g. ``langfuse.trace.name`` to name a stitched trace that
+            has no real root span).
+    """
    # Set up pipeline params with audio configuration if provided
    pipeline_params = PipelineParams(
        enable_metrics=True,
@ -178,6 +200,9 @@ def create_pipeline_task(pipeline, workflow_run_id, audio_config: AudioConfig =
        enable_tracing=True,
        enable_rtvi=False,
        conversation_id=f"{workflow_run_id}",
+        conversation_parent_context=conversation_parent_context,
+        conversation_type=conversation_type,
+        additional_span_attributes=additional_span_attributes,
    )

    # Check if turn logging is enabled
--- a/api/services/pipecat/realtime_feedback_events.py
+++ b/api/services/pipecat/realtime_feedback_events.py
@ -0,0 +1,163 @@
+"""Shared helpers for building and ordering realtime feedback events."""
+
+from typing import Any
+
+from pipecat.utils.enums import RealtimeFeedbackType
+
+
+def build_node_transition_event(
+    *,
+    node_id: str | None,
+    node_name: str | None,
+    previous_node_id: str | None,
+    previous_node_name: str | None,
+    allow_interrupt: bool = False,
+) -> dict[str, Any]:
+    return {
+        "type": RealtimeFeedbackType.NODE_TRANSITION.value,
+        "payload": {
+            "node_id": node_id,
+            "node_name": node_name,
+            "previous_node_id": previous_node_id,
+            "previous_node_name": previous_node_name,
+            "allow_interrupt": allow_interrupt,
+        },
+    }
+
+
+def build_user_transcription_event(
+    *,
+    text: str,
+    final: bool,
+    timestamp: str | None = None,
+    user_id: str | None = None,
+) -> dict[str, Any]:
+    payload: dict[str, Any] = {
+        "text": text,
+        "final": final,
+    }
+    if timestamp is not None:
+        payload["timestamp"] = timestamp
+    if user_id is not None:
+        payload["user_id"] = user_id
+    return {
+        "type": RealtimeFeedbackType.USER_TRANSCRIPTION.value,
+        "payload": payload,
+    }
+
+
+def build_bot_text_event(
+    *,
+    text: str,
+    timestamp: str | None = None,
+) -> dict[str, Any]:
+    payload: dict[str, Any] = {"text": text}
+    if timestamp is not None:
+        payload["timestamp"] = timestamp
+    return {
+        "type": RealtimeFeedbackType.BOT_TEXT.value,
+        "payload": payload,
+    }
+
+
+def build_function_call_start_event(
+    *,
+    function_name: str | None,
+    tool_call_id: str | None,
+    arguments: dict[str, Any] | None = None,
+) -> dict[str, Any]:
+    payload: dict[str, Any] = {
+        "function_name": function_name,
+        "tool_call_id": tool_call_id,
+    }
+    if arguments is not None:
+        payload["arguments"] = arguments
+    return {
+        "type": RealtimeFeedbackType.FUNCTION_CALL_START.value,
+        "payload": payload,
+    }
+
+
+def serialize_realtime_feedback_tool_result(result: Any) -> str | None:
+    """Normalize function-call results to the string shape stored in logs."""
+    if result is None:
+        return None
+    return str(result)
+
+
+def build_function_call_end_event(
+    *,
+    function_name: str | None,
+    tool_call_id: str | None,
+    result: Any,
+) -> dict[str, Any]:
+    return {
+        "type": RealtimeFeedbackType.FUNCTION_CALL_END.value,
+        "payload": {
+            "function_name": function_name,
+            "tool_call_id": tool_call_id,
+            "result": serialize_realtime_feedback_tool_result(result),
+        },
+    }
+
+
+def build_ttfb_metric_event(
+    *,
+    ttfb_seconds: float,
+    processor: str | None,
+    model: str | None,
+) -> dict[str, Any]:
+    return {
+        "type": RealtimeFeedbackType.TTFB_METRIC.value,
+        "payload": {
+            "ttfb_seconds": ttfb_seconds,
+            "processor": processor,
+            "model": model,
+        },
+    }
+
+
+def build_pipeline_error_event(
+    *,
+    error: str,
+    fatal: bool,
+    processor: str | None = None,
+    extra_payload: dict[str, Any] | None = None,
+) -> dict[str, Any]:
+    payload: dict[str, Any] = {
+        "error": error,
+        "fatal": fatal,
+    }
+    if processor is not None:
+        payload["processor"] = processor
+    if extra_payload:
+        payload.update(extra_payload)
+    return {
+        "type": RealtimeFeedbackType.PIPELINE_ERROR.value,
+        "payload": payload,
+    }
+
+
+def stamp_realtime_feedback_event(
+    event: dict[str, Any],
+    *,
+    timestamp: str | None = None,
+    turn: int | None = None,
+    node_id: str | None = None,
+    node_name: str | None = None,
+) -> dict[str, Any]:
+    stamped = dict(event)
+    if timestamp is not None:
+        stamped["timestamp"] = timestamp
+    if turn is not None:
+        stamped["turn"] = turn
+    if node_id is not None:
+        stamped["node_id"] = node_id
+    if node_name is not None:
+        stamped["node_name"] = node_name
+    return stamped
+
+
+def realtime_feedback_event_sort_key(event: dict[str, Any]) -> str:
+    payload_timestamp = (event.get("payload") or {}).get("timestamp")
+    return payload_timestamp or event.get("timestamp") or ""
--- a/api/services/pipecat/realtime_feedback_observer.py
+++ b/api/services/pipecat/realtime_feedback_observer.py
@ -27,6 +27,15 @@ from typing import TYPE_CHECKING, Awaitable, Callable, Optional, Set

 from loguru import logger

+from api.services.pipecat.realtime_feedback_events import (
+    build_bot_text_event,
+    build_function_call_end_event,
+    build_function_call_start_event,
+    build_pipeline_error_event,
+    build_ttfb_metric_event,
+    build_user_transcription_event,
+)
+
 if TYPE_CHECKING:
    from api.services.pipecat.in_memory_buffers import InMemoryLogsBuffer

@ -211,29 +220,23 @@ class RealtimeFeedbackObserver(BaseObserver):
        # Handle user transcriptions (interim) - WebSocket only
        elif isinstance(frame, InterimTranscriptionFrame):
            await self._send_ws(
-                {
-                    "type": RealtimeFeedbackType.USER_TRANSCRIPTION.value,
-                    "payload": {
-                        "text": frame.text,
-                        "final": False,
-                        "user_id": frame.user_id,
-                        "timestamp": frame.timestamp,
-                    },
-                }
+                build_user_transcription_event(
+                    text=frame.text,
+                    final=False,
+                    user_id=frame.user_id,
+                    timestamp=frame.timestamp,
+                )
            )
        # Handle user transcriptions (final) - WebSocket only
        # Complete turn text is persisted via register_turn_handlers
        elif isinstance(frame, TranscriptionFrame):
            await self._send_ws(
-                {
-                    "type": RealtimeFeedbackType.USER_TRANSCRIPTION.value,
-                    "payload": {
-                        "text": frame.text,
-                        "final": True,
-                        "user_id": frame.user_id,
-                        "timestamp": frame.timestamp,
-                    },
-                }
+                build_user_transcription_event(
+                    text=frame.text,
+                    final=True,
+                    user_id=frame.user_id,
+                    timestamp=frame.timestamp,
+                )
            )
        # Handle engine-queued speech (transition/tool messages) marked for
        # log persistence. The downstream TTSTextFrame(s) from the TTS service
@ -241,23 +244,13 @@ class RealtimeFeedbackObserver(BaseObserver):
        # to avoid word-level log entries from word-timestamp providers.
        elif isinstance(frame, TTSSpeakFrame):
            if getattr(frame, "persist_to_logs", False):
-                await self._append_to_buffer(
-                    {
-                        "type": RealtimeFeedbackType.BOT_TEXT.value,
-                        "payload": {"text": frame.text},
-                    }
-                )
+                await self._append_to_buffer(build_bot_text_event(text=frame.text))
        # Handle bot TTS text - respect pts timing, WebSocket only
        # Complete turn text is persisted via register_turn_handlers,
        # except for frames explicitly flagged persist_to_logs (e.g. recording
        # transcripts from play_audio) which bypass the aggregator path.
        elif isinstance(frame, TTSTextFrame):
-            message = {
-                "type": RealtimeFeedbackType.BOT_TEXT.value,
-                "payload": {
-                    "text": frame.text,
-                },
-            }
+            message = build_bot_text_event(text=frame.text)

            # If frame has pts, queue it for timed delivery
            if frame.pts:
@ -280,13 +273,11 @@ class RealtimeFeedbackObserver(BaseObserver):
            and frame_direction == FrameDirection.DOWNSTREAM
        ):
            await self._send_message(
-                {
-                    "type": RealtimeFeedbackType.FUNCTION_CALL_START.value,
-                    "payload": {
-                        "function_name": frame.function_name,
-                        "tool_call_id": frame.tool_call_id,
-                    },
-                }
+                build_function_call_start_event(
+                    function_name=frame.function_name,
+                    tool_call_id=frame.tool_call_id,
+                    arguments=dict(frame.arguments or {}),
+                )
            )
        # Handle function call result
        elif (
@ -294,14 +285,11 @@ class RealtimeFeedbackObserver(BaseObserver):
            and frame_direction == FrameDirection.DOWNSTREAM
        ):
            await self._send_message(
-                {
-                    "type": RealtimeFeedbackType.FUNCTION_CALL_END.value,
-                    "payload": {
-                        "function_name": frame.function_name,
-                        "tool_call_id": frame.tool_call_id,
-                        "result": str(frame.result) if frame.result else None,
-                    },
-                }
+                build_function_call_end_event(
+                    function_name=frame.function_name,
+                    tool_call_id=frame.tool_call_id,
+                    result=frame.result,
+                )
            )
        # Handle TTFB metrics - capture LLM generation time only
        elif isinstance(frame, MetricsFrame):
@ -311,47 +299,42 @@ class RealtimeFeedbackObserver(BaseObserver):
                    # Only send TTFB if it's from an LLM processor
                    if metric_data.processor and "LLM" in metric_data.processor:
                        await self._send_message(
-                            {
-                                "type": RealtimeFeedbackType.TTFB_METRIC.value,
-                                "payload": {
-                                    "ttfb_seconds": metric_data.value,
-                                    "processor": metric_data.processor,
-                                    "model": metric_data.model,
-                                },
-                            }
+                            build_ttfb_metric_event(
+                                ttfb_seconds=metric_data.value,
+                                processor=metric_data.processor,
+                                model=metric_data.model,
+                            )
                        )
        # Handle pipeline errors
        elif isinstance(frame, ErrorFrame):
            processor_name = str(frame.processor) if frame.processor else None
-            payload = {
-                "error": frame.error,
-                "fatal": frame.fatal,
-                "processor": processor_name,
-            }
+            extra_payload: dict[str, object] = {}
            # Surface structured fields when the underlying exception carries
            # them (e.g. google.genai APIError: code=1008, status=None,
            # message="Your project has been denied access...").
            exc = frame.exception
            if exc is not None:
                exc_type = type(exc).__name__
-                payload["exception_type"] = exc_type
-                payload["exception_message"] = str(exc)
+                extra_payload["exception_type"] = exc_type
+                extra_payload["exception_message"] = str(exc)
                for attr in ("code", "status", "message", "details"):
                    value = getattr(exc, attr, None)
-                    if value is None or attr in payload:
+                    if value is None or attr in extra_payload:
                        continue
                    try:
                        # Ensure the value is JSON-serializable; fall back
                        # to str() for opaque objects (e.g. raw response).
                        json.dumps(value)
-                        payload[attr] = value
+                        extra_payload[attr] = value
                    except (TypeError, ValueError):
-                        payload[attr] = str(value)
+                        extra_payload[attr] = str(value)
            await self._send_message(
-                {
-                    "type": RealtimeFeedbackType.PIPELINE_ERROR.value,
-                    "payload": payload,
-                }
+                build_pipeline_error_event(
+                    error=frame.error,
+                    fatal=frame.fatal,
+                    processor=processor_name,
+                    extra_payload=extra_payload or None,
+                )
            )

    async def _send_ws(self, message: dict):
@ -401,14 +384,11 @@ def register_turn_log_handlers(
        logs_buffer.increment_turn()
        try:
            await logs_buffer.append(
-                {
-                    "type": RealtimeFeedbackType.USER_TRANSCRIPTION.value,
-                    "payload": {
-                        "text": message.content,
-                        "final": True,
-                        "timestamp": message.timestamp,
-                    },
-                }
+                build_user_transcription_event(
+                    text=message.content,
+                    final=True,
+                    timestamp=message.timestamp,
+                )
            )
        except Exception as e:
            logger.error(f"Failed to append user turn to logs buffer: {e}")
@ -418,13 +398,10 @@ def register_turn_log_handlers(
        if message.content:
            try:
                await logs_buffer.append(
-                    {
-                        "type": RealtimeFeedbackType.BOT_TEXT.value,
-                        "payload": {
-                            "text": message.content,
-                            "timestamp": message.timestamp,
-                        },
-                    }
+                    build_bot_text_event(
+                        text=message.content,
+                        timestamp=message.timestamp,
+                    )
                )
            except Exception as e:
                logger.error(f"Failed to append assistant turn to logs buffer: {e}")
--- a/api/services/pipecat/run_pipeline.py
+++ b/api/services/pipecat/run_pipeline.py
@ -28,6 +28,9 @@ from api.services.pipecat.pipeline_engine_callbacks_processor import (
 )
 from api.services.pipecat.pipeline_metrics_aggregator import PipelineMetricsAggregator
 from api.services.pipecat.pre_call_fetch import execute_pre_call_fetch
+from api.services.pipecat.realtime_feedback_events import (
+    build_node_transition_event,
+)
 from api.services.pipecat.realtime_feedback_observer import (
    RealtimeFeedbackObserver,
    register_turn_log_handlers,
@ -465,16 +468,13 @@ async def _run_pipeline(
        # Update current node on the buffer so subsequent events are tagged
        in_memory_logs_buffer.set_current_node(node_id, node_name)

-        message = {
-            "type": RealtimeFeedbackType.NODE_TRANSITION.value,
-            "payload": {
-                "node_id": node_id,
-                "node_name": node_name,
-                "previous_node_id": previous_node_id,
-                "previous_node_name": previous_node_name,
-                "allow_interrupt": allow_interrupt,
-            },
-        }
+        message = build_node_transition_event(
+            node_id=node_id,
+            node_name=node_name,
+            previous_node_id=previous_node_id,
+            previous_node_name=previous_node_name,
+            allow_interrupt=allow_interrupt,
+        )
        # Send via WebSocket if available
        if ws_sender:
            try:
@ -803,7 +803,6 @@ async def _run_pipeline(
        pipeline_metrics_aggregator=pipeline_metrics_aggregator,
        audio_config=audio_config,
        pre_call_fetch_task=pre_call_fetch_task,
-        fetch_recording_audio=fetch_audio,
        user_provider_id=user_provider_id,
        integration_runtime_sessions=integration_runtime_sessions,
    )
--- a/api/services/pipecat/tracing_config.py
+++ b/api/services/pipecat/tracing_config.py
@ -254,6 +254,44 @@ async def handle_langfuse_sync(event):
        unregister_org_langfuse_credentials(org_id)


+def build_remote_parent_context(trace_id: str | None):
+    """Build an OTEL context whose active span carries ``trace_id``.
+
+    Spans started under the returned context join the Langfuse trace identified
+    by ``trace_id`` (Langfuse groups observations by trace id). The parent span
+    id is a non-existent placeholder, so spans created under it attach at the
+    trace root rather than nesting under a real parent span.
+
+    This is the shared primitive behind both post-call QA tracing and text-chat
+    trace stitching. Returns the context, or ``None`` when tracing is
+    unavailable or ``trace_id`` is missing/invalid.
+    """
+    if not trace_id:
+        return None
+    if not ensure_tracing():
+        return None
+    try:
+        from opentelemetry.trace import (
+            NonRecordingSpan,
+            SpanContext,
+            TraceFlags,
+            set_span_in_context,
+        )
+
+        parent_span_context = SpanContext(
+            trace_id=int(trace_id, 16),
+            span_id=0x1,
+            is_remote=True,
+            trace_flags=TraceFlags(0x01),
+        )
+        return set_span_in_context(NonRecordingSpan(parent_span_context))
+    except Exception as e:
+        logger.warning(
+            f"Failed to build remote parent context for trace {trace_id}: {e}"
+        )
+        return None
+
+
 def get_trace_url(trace_id: str, org_id=None) -> str | None:
    """Build a Langfuse trace URL, using org-specific host when available."""
    if org_id is None: