feat: add chat based testing for voice agent (#308)

* feat: add backend foundations * feat: add text chat UI * chore: simplify the reload behaviour * fix: fix upgrade banner to be triggered after package upload * feat: simplify TesterPanel design * chore: fix formatting and generate client * chore: fix tracing for text chat mode * fix: fix revert and edit CTA * refactor: refactor TesterPanel into smaller components * feat: enable runtime transition of nodes * fix: fix review comments
2026-07-22 11:51:04 +02:00 · 2026-05-21 15:20:02 +05:30 · 2026-05-21 15:20:02 +05:30 · d97d1d72cd
commit d97d1d72cd
parent 67479e98fd
96 changed files with 7630 additions and 1684 deletions
--- a/api/services/pipecat/event_handlers.py
+++ b/api/services/pipecat/event_handlers.py
@ -7,7 +7,7 @@ from api.enums import PostHogEvent, WorkflowRunState
 from api.services.campaign.circuit_breaker import circuit_breaker
 from api.services.integrations import IntegrationRuntimeSession
 from api.services.pipecat.audio_config import AudioConfig
-from api.services.pipecat.audio_playback import play_audio, play_audio_loop
+from api.services.pipecat.audio_playback import play_audio_loop
 from api.services.pipecat.in_memory_buffers import (
    InMemoryAudioBuffer,
    InMemoryLogsBuffer,
@ -20,8 +20,6 @@ from api.tasks.arq import enqueue_job
 from api.tasks.function_names import FunctionNames
 from pipecat.frames.frames import (
    Frame,
-    LLMContextFrame,
-    TTSSpeakFrame,
 )
 from pipecat.pipeline.task import PipelineTask
 from pipecat.processors.audio.audio_buffer_processor import AudioBufferProcessor
@ -69,7 +67,6 @@ def register_event_handlers(
    pipeline_metrics_aggregator: PipelineMetricsAggregator,
    audio_config=AudioConfig,
    pre_call_fetch_task: asyncio.Task | None = None,
-    fetch_recording_audio=None,
    user_provider_id: str | None = None,
    integration_runtime_sessions: list[IntegrationRuntimeSession] | None = None,
 ):
@ -99,20 +96,11 @@ def register_event_handlers(
        "initial_response_triggered": False,
    }

-    async def queue_initial_llm_context():
-        # Queue LLMContextFrame after the VoicemailDetector since the detector
-        # gates LLMContextFrames until voicemail detection completes. We also
-        # don't want to trigger the Voicemail LLM with this initial frame.
-        await engine.llm.queue_frame(LLMContextFrame(engine.context))
-
    async def maybe_trigger_initial_response():
        """Start the conversation after both pipeline_started and client_connected events.

        If a pre-call fetch is in progress, plays a ringer while waiting for the
        response, then merges the result into the call context before proceeding.
-
-        If the start node has a greeting configured, play it directly via TTS.
-        Otherwise, trigger an LLM generation for the opening message.
        """
        if (
            ready_state["pipeline_started"]
@ -167,46 +155,11 @@ def register_event_handlers(
            # Set the start node now (after pre-call fetch data is merged)
            # so that render_template() has the complete _call_context_vars.
            await engine.set_node(engine.workflow.start_node_id)
-
-            greeting_info = engine.get_start_greeting()
-            if greeting_info:
-                greeting_type, greeting_value = greeting_info
-                if (
-                    greeting_type == "audio"
-                    and greeting_value
-                    and fetch_recording_audio
-                ):
-                    logger.debug(f"Playing audio greeting recording: {greeting_value}")
-                    result = await fetch_recording_audio(
-                        recording_pk=int(greeting_value)
-                    )
-                    if result:
-                        await play_audio(
-                            result.audio,
-                            sample_rate=audio_config.pipeline_sample_rate or 16000,
-                            queue_frame=transport.output().queue_frame,
-                            transcript=result.transcript,
-                            append_to_context=True,
-                        )
-                    else:
-                        logger.warning(
-                            f"Failed to fetch audio greeting {greeting_value}, "
-                            "falling back to LLM generation"
-                        )
-                        await queue_initial_llm_context()
-                else:
-                    logger.debug("Playing text greeting via TTS")
-                    # append_to_context=True so the assistant aggregator commits
-                    # the greeting to the LLM context once TTS finishes; without
-                    # it the LLM would re-greet on its first generation.
-                    await task.queue_frame(
-                        TTSSpeakFrame(greeting_value, append_to_context=True)
-                    )
-            else:
-                logger.debug(
-                    "Both pipeline_started and client_connected received - triggering initial LLM generation"
-                )
-                await queue_initial_llm_context()
+            await engine.queue_node_opening(
+                node_id=engine.workflow.start_node_id,
+                previous_node_id=None,
+                generate_if_no_greeting=True,
+            )

    @transport.event_handler("on_client_connected")
    async def on_client_connected(_transport, _participant):
--- a/api/services/pipecat/in_memory_buffers.py
+++ b/api/services/pipecat/in_memory_buffers.py
@ -6,6 +6,10 @@ from typing import List, Optional

 from loguru import logger

+from api.services.pipecat.realtime_feedback_events import (
+    realtime_feedback_event_sort_key,
+    stamp_realtime_feedback_event,
+)
 from api.utils.transcript import generate_transcript_text as _generate_transcript_text
 from pipecat.utils.enums import RealtimeFeedbackType

@ -98,16 +102,13 @@ class InMemoryLogsBuffer:

    async def append(self, event: dict):
        """Append a feedback event to the buffer with timestamp and current node."""
-        # Add timestamp, turn tracking, and current node
-        timestamped_event = {
-            **event,
-            "timestamp": datetime.now(UTC).isoformat(),
-            "turn": self._turn_counter,
-        }
-        if self._current_node_id:
-            timestamped_event["node_id"] = self._current_node_id
-        if self._current_node_name:
-            timestamped_event["node_name"] = self._current_node_name
+        timestamped_event = stamp_realtime_feedback_event(
+            event,
+            timestamp=datetime.now(UTC).isoformat(),
+            turn=self._turn_counter,
+            node_id=self._current_node_id,
+            node_name=self._current_node_name,
+        )
        self._events.append(timestamped_event)
        logger.trace(
            f"Appended event {event.get('type')} to logs buffer for workflow {self._workflow_run_id}"
@ -120,17 +121,12 @@ class InMemoryLogsBuffer:
            f"Incremented turn counter to {self._turn_counter} for workflow {self._workflow_run_id}"
        )

-    @staticmethod
-    def _event_sort_key(event: dict) -> str:
-        payload_ts = event.get("payload", {}).get("timestamp")
-        return payload_ts or event.get("timestamp", "")
-
    def _sorted_events(self) -> List[dict]:
        # Stable sort by the realtime (payload) timestamp when available, falling
        # back to the buffer-append timestamp. Python's sort is stable, so events
        # sharing a key retain their original insertion order — this keeps
        # consecutive bot-text chunks of a single turn contiguous.
-        return sorted(self._events, key=self._event_sort_key)
+        return sorted(self._events, key=realtime_feedback_event_sort_key)

    def get_events(self) -> List[dict]:
        """Get all events for final storage, ordered by realtime timestamp."""
--- a/api/services/pipecat/pipeline_builder.py
+++ b/api/services/pipecat/pipeline_builder.py
@ -152,8 +152,30 @@ def build_realtime_pipeline(
    return Pipeline(processors)


-def create_pipeline_task(pipeline, workflow_run_id, audio_config: AudioConfig = None):
-    """Create a pipeline task with appropriate parameters"""
+def create_pipeline_task(
+    pipeline,
+    workflow_run_id,
+    audio_config: AudioConfig = None,
+    *,
+    conversation_parent_context=None,
+    conversation_type: str = "voice",
+    additional_span_attributes: dict | None = None,
+):
+    """Create a pipeline task with appropriate parameters.
+
+    Args:
+        pipeline: The pipeline to run.
+        workflow_run_id: Run id, used as the conversation id.
+        audio_config: Optional audio configuration.
+        conversation_parent_context: Optional OTEL context carrying a fixed
+            trace id. When provided, the conversation span attaches to that
+            trace instead of starting a new root trace (used by text chat to
+            stitch every per-turn pipeline into one trace).
+        conversation_type: ``conversation.type`` span attribute value.
+        additional_span_attributes: Extra attributes set on the conversation
+            span (e.g. ``langfuse.trace.name`` to name a stitched trace that
+            has no real root span).
+    """
    # Set up pipeline params with audio configuration if provided
    pipeline_params = PipelineParams(
        enable_metrics=True,
@ -178,6 +200,9 @@ def create_pipeline_task(pipeline, workflow_run_id, audio_config: AudioConfig =
        enable_tracing=True,
        enable_rtvi=False,
        conversation_id=f"{workflow_run_id}",
+        conversation_parent_context=conversation_parent_context,
+        conversation_type=conversation_type,
+        additional_span_attributes=additional_span_attributes,
    )

    # Check if turn logging is enabled
--- a/api/services/pipecat/realtime_feedback_events.py
+++ b/api/services/pipecat/realtime_feedback_events.py
@ -0,0 +1,163 @@
+"""Shared helpers for building and ordering realtime feedback events."""
+
+from typing import Any
+
+from pipecat.utils.enums import RealtimeFeedbackType
+
+
+def build_node_transition_event(
+    *,
+    node_id: str | None,
+    node_name: str | None,
+    previous_node_id: str | None,
+    previous_node_name: str | None,
+    allow_interrupt: bool = False,
+) -> dict[str, Any]:
+    return {
+        "type": RealtimeFeedbackType.NODE_TRANSITION.value,
+        "payload": {
+            "node_id": node_id,
+            "node_name": node_name,
+            "previous_node_id": previous_node_id,
+            "previous_node_name": previous_node_name,
+            "allow_interrupt": allow_interrupt,
+        },
+    }
+
+
+def build_user_transcription_event(
+    *,
+    text: str,
+    final: bool,
+    timestamp: str | None = None,
+    user_id: str | None = None,
+) -> dict[str, Any]:
+    payload: dict[str, Any] = {
+        "text": text,
+        "final": final,
+    }
+    if timestamp is not None:
+        payload["timestamp"] = timestamp
+    if user_id is not None:
+        payload["user_id"] = user_id
+    return {
+        "type": RealtimeFeedbackType.USER_TRANSCRIPTION.value,
+        "payload": payload,
+    }
+
+
+def build_bot_text_event(
+    *,
+    text: str,
+    timestamp: str | None = None,
+) -> dict[str, Any]:
+    payload: dict[str, Any] = {"text": text}
+    if timestamp is not None:
+        payload["timestamp"] = timestamp
+    return {
+        "type": RealtimeFeedbackType.BOT_TEXT.value,
+        "payload": payload,
+    }
+
+
+def build_function_call_start_event(
+    *,
+    function_name: str | None,
+    tool_call_id: str | None,
+    arguments: dict[str, Any] | None = None,
+) -> dict[str, Any]:
+    payload: dict[str, Any] = {
+        "function_name": function_name,
+        "tool_call_id": tool_call_id,
+    }
+    if arguments is not None:
+        payload["arguments"] = arguments
+    return {
+        "type": RealtimeFeedbackType.FUNCTION_CALL_START.value,
+        "payload": payload,
+    }
+
+
+def serialize_realtime_feedback_tool_result(result: Any) -> str | None:
+    """Normalize function-call results to the string shape stored in logs."""
+    if result is None:
+        return None
+    return str(result)
+
+
+def build_function_call_end_event(
+    *,
+    function_name: str | None,
+    tool_call_id: str | None,
+    result: Any,
+) -> dict[str, Any]:
+    return {
+        "type": RealtimeFeedbackType.FUNCTION_CALL_END.value,
+        "payload": {
+            "function_name": function_name,
+            "tool_call_id": tool_call_id,
+            "result": serialize_realtime_feedback_tool_result(result),
+        },
+    }
+
+
+def build_ttfb_metric_event(
+    *,
+    ttfb_seconds: float,
+    processor: str | None,
+    model: str | None,
+) -> dict[str, Any]:
+    return {
+        "type": RealtimeFeedbackType.TTFB_METRIC.value,
+        "payload": {
+            "ttfb_seconds": ttfb_seconds,
+            "processor": processor,
+            "model": model,
+        },
+    }
+
+
+def build_pipeline_error_event(
+    *,
+    error: str,
+    fatal: bool,
+    processor: str | None = None,
+    extra_payload: dict[str, Any] | None = None,
+) -> dict[str, Any]:
+    payload: dict[str, Any] = {
+        "error": error,
+        "fatal": fatal,
+    }
+    if processor is not None:
+        payload["processor"] = processor
+    if extra_payload:
+        payload.update(extra_payload)
+    return {
+        "type": RealtimeFeedbackType.PIPELINE_ERROR.value,
+        "payload": payload,
+    }
+
+
+def stamp_realtime_feedback_event(
+    event: dict[str, Any],
+    *,
+    timestamp: str | None = None,
+    turn: int | None = None,
+    node_id: str | None = None,
+    node_name: str | None = None,
+) -> dict[str, Any]:
+    stamped = dict(event)
+    if timestamp is not None:
+        stamped["timestamp"] = timestamp
+    if turn is not None:
+        stamped["turn"] = turn
+    if node_id is not None:
+        stamped["node_id"] = node_id
+    if node_name is not None:
+        stamped["node_name"] = node_name
+    return stamped
+
+
+def realtime_feedback_event_sort_key(event: dict[str, Any]) -> str:
+    payload_timestamp = (event.get("payload") or {}).get("timestamp")
+    return payload_timestamp or event.get("timestamp") or ""
--- a/api/services/pipecat/realtime_feedback_observer.py
+++ b/api/services/pipecat/realtime_feedback_observer.py
@ -27,6 +27,15 @@ from typing import TYPE_CHECKING, Awaitable, Callable, Optional, Set

 from loguru import logger

+from api.services.pipecat.realtime_feedback_events import (
+    build_bot_text_event,
+    build_function_call_end_event,
+    build_function_call_start_event,
+    build_pipeline_error_event,
+    build_ttfb_metric_event,
+    build_user_transcription_event,
+)
+
 if TYPE_CHECKING:
    from api.services.pipecat.in_memory_buffers import InMemoryLogsBuffer

@ -211,29 +220,23 @@ class RealtimeFeedbackObserver(BaseObserver):
        # Handle user transcriptions (interim) - WebSocket only
        elif isinstance(frame, InterimTranscriptionFrame):
            await self._send_ws(
-                {
-                    "type": RealtimeFeedbackType.USER_TRANSCRIPTION.value,
-                    "payload": {
-                        "text": frame.text,
-                        "final": False,
-                        "user_id": frame.user_id,
-                        "timestamp": frame.timestamp,
-                    },
-                }
+                build_user_transcription_event(
+                    text=frame.text,
+                    final=False,
+                    user_id=frame.user_id,
+                    timestamp=frame.timestamp,
+                )
            )
        # Handle user transcriptions (final) - WebSocket only
        # Complete turn text is persisted via register_turn_handlers
        elif isinstance(frame, TranscriptionFrame):
            await self._send_ws(
-                {
-                    "type": RealtimeFeedbackType.USER_TRANSCRIPTION.value,
-                    "payload": {
-                        "text": frame.text,
-                        "final": True,
-                        "user_id": frame.user_id,
-                        "timestamp": frame.timestamp,
-                    },
-                }
+                build_user_transcription_event(
+                    text=frame.text,
+                    final=True,
+                    user_id=frame.user_id,
+                    timestamp=frame.timestamp,
+                )
            )
        # Handle engine-queued speech (transition/tool messages) marked for
        # log persistence. The downstream TTSTextFrame(s) from the TTS service
@ -241,23 +244,13 @@ class RealtimeFeedbackObserver(BaseObserver):
        # to avoid word-level log entries from word-timestamp providers.
        elif isinstance(frame, TTSSpeakFrame):
            if getattr(frame, "persist_to_logs", False):
-                await self._append_to_buffer(
-                    {
-                        "type": RealtimeFeedbackType.BOT_TEXT.value,
-                        "payload": {"text": frame.text},
-                    }
-                )
+                await self._append_to_buffer(build_bot_text_event(text=frame.text))
        # Handle bot TTS text - respect pts timing, WebSocket only
        # Complete turn text is persisted via register_turn_handlers,
        # except for frames explicitly flagged persist_to_logs (e.g. recording
        # transcripts from play_audio) which bypass the aggregator path.
        elif isinstance(frame, TTSTextFrame):
-            message = {
-                "type": RealtimeFeedbackType.BOT_TEXT.value,
-                "payload": {
-                    "text": frame.text,
-                },
-            }
+            message = build_bot_text_event(text=frame.text)

            # If frame has pts, queue it for timed delivery
            if frame.pts:
@ -280,13 +273,11 @@ class RealtimeFeedbackObserver(BaseObserver):
            and frame_direction == FrameDirection.DOWNSTREAM
        ):
            await self._send_message(
-                {
-                    "type": RealtimeFeedbackType.FUNCTION_CALL_START.value,
-                    "payload": {
-                        "function_name": frame.function_name,
-                        "tool_call_id": frame.tool_call_id,
-                    },
-                }
+                build_function_call_start_event(
+                    function_name=frame.function_name,
+                    tool_call_id=frame.tool_call_id,
+                    arguments=dict(frame.arguments or {}),
+                )
            )
        # Handle function call result
        elif (
@ -294,14 +285,11 @@ class RealtimeFeedbackObserver(BaseObserver):
            and frame_direction == FrameDirection.DOWNSTREAM
        ):
            await self._send_message(
-                {
-                    "type": RealtimeFeedbackType.FUNCTION_CALL_END.value,
-                    "payload": {
-                        "function_name": frame.function_name,
-                        "tool_call_id": frame.tool_call_id,
-                        "result": str(frame.result) if frame.result else None,
-                    },
-                }
+                build_function_call_end_event(
+                    function_name=frame.function_name,
+                    tool_call_id=frame.tool_call_id,
+                    result=frame.result,
+                )
            )
        # Handle TTFB metrics - capture LLM generation time only
        elif isinstance(frame, MetricsFrame):
@ -311,47 +299,42 @@ class RealtimeFeedbackObserver(BaseObserver):
                    # Only send TTFB if it's from an LLM processor
                    if metric_data.processor and "LLM" in metric_data.processor:
                        await self._send_message(
-                            {
-                                "type": RealtimeFeedbackType.TTFB_METRIC.value,
-                                "payload": {
-                                    "ttfb_seconds": metric_data.value,
-                                    "processor": metric_data.processor,
-                                    "model": metric_data.model,
-                                },
-                            }
+                            build_ttfb_metric_event(
+                                ttfb_seconds=metric_data.value,
+                                processor=metric_data.processor,
+                                model=metric_data.model,
+                            )
                        )
        # Handle pipeline errors
        elif isinstance(frame, ErrorFrame):
            processor_name = str(frame.processor) if frame.processor else None
-            payload = {
-                "error": frame.error,
-                "fatal": frame.fatal,
-                "processor": processor_name,
-            }
+            extra_payload: dict[str, object] = {}
            # Surface structured fields when the underlying exception carries
            # them (e.g. google.genai APIError: code=1008, status=None,
            # message="Your project has been denied access...").
            exc = frame.exception
            if exc is not None:
                exc_type = type(exc).__name__
-                payload["exception_type"] = exc_type
-                payload["exception_message"] = str(exc)
+                extra_payload["exception_type"] = exc_type
+                extra_payload["exception_message"] = str(exc)
                for attr in ("code", "status", "message", "details"):
                    value = getattr(exc, attr, None)
-                    if value is None or attr in payload:
+                    if value is None or attr in extra_payload:
                        continue
                    try:
                        # Ensure the value is JSON-serializable; fall back
                        # to str() for opaque objects (e.g. raw response).
                        json.dumps(value)
-                        payload[attr] = value
+                        extra_payload[attr] = value
                    except (TypeError, ValueError):
-                        payload[attr] = str(value)
+                        extra_payload[attr] = str(value)
            await self._send_message(
-                {
-                    "type": RealtimeFeedbackType.PIPELINE_ERROR.value,
-                    "payload": payload,
-                }
+                build_pipeline_error_event(
+                    error=frame.error,
+                    fatal=frame.fatal,
+                    processor=processor_name,
+                    extra_payload=extra_payload or None,
+                )
            )

    async def _send_ws(self, message: dict):
@ -401,14 +384,11 @@ def register_turn_log_handlers(
        logs_buffer.increment_turn()
        try:
            await logs_buffer.append(
-                {
-                    "type": RealtimeFeedbackType.USER_TRANSCRIPTION.value,
-                    "payload": {
-                        "text": message.content,
-                        "final": True,
-                        "timestamp": message.timestamp,
-                    },
-                }
+                build_user_transcription_event(
+                    text=message.content,
+                    final=True,
+                    timestamp=message.timestamp,
+                )
            )
        except Exception as e:
            logger.error(f"Failed to append user turn to logs buffer: {e}")
@ -418,13 +398,10 @@ def register_turn_log_handlers(
        if message.content:
            try:
                await logs_buffer.append(
-                    {
-                        "type": RealtimeFeedbackType.BOT_TEXT.value,
-                        "payload": {
-                            "text": message.content,
-                            "timestamp": message.timestamp,
-                        },
-                    }
+                    build_bot_text_event(
+                        text=message.content,
+                        timestamp=message.timestamp,
+                    )
                )
            except Exception as e:
                logger.error(f"Failed to append assistant turn to logs buffer: {e}")
--- a/api/services/pipecat/run_pipeline.py
+++ b/api/services/pipecat/run_pipeline.py
@ -28,6 +28,9 @@ from api.services.pipecat.pipeline_engine_callbacks_processor import (
 )
 from api.services.pipecat.pipeline_metrics_aggregator import PipelineMetricsAggregator
 from api.services.pipecat.pre_call_fetch import execute_pre_call_fetch
+from api.services.pipecat.realtime_feedback_events import (
+    build_node_transition_event,
+)
 from api.services.pipecat.realtime_feedback_observer import (
    RealtimeFeedbackObserver,
    register_turn_log_handlers,
@ -465,16 +468,13 @@ async def _run_pipeline(
        # Update current node on the buffer so subsequent events are tagged
        in_memory_logs_buffer.set_current_node(node_id, node_name)

-        message = {
-            "type": RealtimeFeedbackType.NODE_TRANSITION.value,
-            "payload": {
-                "node_id": node_id,
-                "node_name": node_name,
-                "previous_node_id": previous_node_id,
-                "previous_node_name": previous_node_name,
-                "allow_interrupt": allow_interrupt,
-            },
-        }
+        message = build_node_transition_event(
+            node_id=node_id,
+            node_name=node_name,
+            previous_node_id=previous_node_id,
+            previous_node_name=previous_node_name,
+            allow_interrupt=allow_interrupt,
+        )
        # Send via WebSocket if available
        if ws_sender:
            try:
@ -803,7 +803,6 @@ async def _run_pipeline(
        pipeline_metrics_aggregator=pipeline_metrics_aggregator,
        audio_config=audio_config,
        pre_call_fetch_task=pre_call_fetch_task,
-        fetch_recording_audio=fetch_audio,
        user_provider_id=user_provider_id,
        integration_runtime_sessions=integration_runtime_sessions,
    )
--- a/api/services/pipecat/tracing_config.py
+++ b/api/services/pipecat/tracing_config.py
@ -254,6 +254,44 @@ async def handle_langfuse_sync(event):
        unregister_org_langfuse_credentials(org_id)


+def build_remote_parent_context(trace_id: str | None):
+    """Build an OTEL context whose active span carries ``trace_id``.
+
+    Spans started under the returned context join the Langfuse trace identified
+    by ``trace_id`` (Langfuse groups observations by trace id). The parent span
+    id is a non-existent placeholder, so spans created under it attach at the
+    trace root rather than nesting under a real parent span.
+
+    This is the shared primitive behind both post-call QA tracing and text-chat
+    trace stitching. Returns the context, or ``None`` when tracing is
+    unavailable or ``trace_id`` is missing/invalid.
+    """
+    if not trace_id:
+        return None
+    if not ensure_tracing():
+        return None
+    try:
+        from opentelemetry.trace import (
+            NonRecordingSpan,
+            SpanContext,
+            TraceFlags,
+            set_span_in_context,
+        )
+
+        parent_span_context = SpanContext(
+            trace_id=int(trace_id, 16),
+            span_id=0x1,
+            is_remote=True,
+            trace_flags=TraceFlags(0x01),
+        )
+        return set_span_in_context(NonRecordingSpan(parent_span_context))
+    except Exception as e:
+        logger.warning(
+            f"Failed to build remote parent context for trace {trace_id}: {e}"
+        )
+        return None
+
+
 def get_trace_url(trace_id: str, org_id=None) -> str | None:
    """Build a Langfuse trace URL, using org-specific host when available."""
    if org_id is None:
--- a/api/services/pricing/workflow_run_cost.py
+++ b/api/services/pricing/workflow_run_cost.py
@ -1,3 +1,5 @@
+from decimal import Decimal
+
 from loguru import logger

 from api.db import db_client
@ -63,24 +65,31 @@ async def _update_organization_usage(
        )


-async def calculate_workflow_run_cost(workflow_run_id: int):
-    logger.debug("Calculating cost for workflow run")
+async def _get_pricing_organization(workflow_run):
+    workflow = getattr(workflow_run, "workflow", None)
+    organization_id = getattr(workflow, "organization_id", None)
+    if organization_id is None and workflow and workflow.user:
+        organization_id = workflow.user.selected_organization_id
+    if organization_id is None:
+        return None
+    return await db_client.get_organization_by_id(organization_id)

-    workflow_run = await db_client.get_workflow_run_by_id(workflow_run_id)
-    if not workflow_run:
-        logger.warning("Workflow run not found")
-        return

-    workflow_usage_info = workflow_run.usage_info
-    if not workflow_usage_info:
+async def _build_usage_cost_snapshot(
+    usage_info: dict | None,
+    *,
+    workflow_run=None,
+    include_telephony_cost: bool = False,
+    organization=None,
+    calculated_at: str | None = None,
+) -> dict | None:
+    if not usage_info:
        logger.warning("No usage info available for workflow run")
-        return
+        return None

-    try:
-        # Calculate cost breakdown
-        cost_breakdown = cost_calculator.calculate_total_cost(workflow_usage_info)
+    cost_breakdown = cost_calculator.calculate_total_cost(usage_info)

-        # Fetch telephony call cost
+    if include_telephony_cost and workflow_run is not None:
        try:
            telephony_cost = await _fetch_telephony_cost(workflow_run)
            if telephony_cost:
@ -95,61 +104,127 @@ async def calculate_workflow_run_cost(workflow_run_id: int):
            logger.error(f"Failed to fetch telephony call cost: {e}")
            # Don't fail the whole cost calculation if telephony API fails

-        # Store cost information back to the workflow run
-        # Convert USD to Dograh Tokens (1 cent = 1 token)
-        dograh_tokens = round(float(cost_breakdown["total"]) * 100, 2)
+    total_cost_usd = Decimal(str(cost_breakdown["total"]))
+    dograh_tokens = float(total_cost_usd * Decimal("100"))

-        # Get organization to check if it has USD pricing
-        org = None
-        charge_usd = None
-        if (
-            workflow_run.workflow
-            and workflow_run.workflow.user
-            and workflow_run.workflow.user.selected_organization_id
-        ):
-            org = await db_client.get_organization_by_id(
-                workflow_run.workflow.user.selected_organization_id
-            )
+    if organization is None and workflow_run is not None:
+        organization = await _get_pricing_organization(workflow_run)

-        # Calculate USD cost if organization has pricing configured
-        if org and org.price_per_second_usd:
-            duration_seconds = workflow_usage_info.get("call_duration_seconds", 0)
-            charge_usd = duration_seconds * org.price_per_second_usd
+    charge_usd = None
+    if organization and organization.price_per_second_usd:
+        duration_seconds = usage_info.get("call_duration_seconds", 0)
+        charge_usd = float(
+            Decimal(str(duration_seconds))
+            * Decimal(str(organization.price_per_second_usd))
+        )

-        cost_info = {
-            **workflow_run.cost_info,
-            "cost_breakdown": cost_breakdown,
-            "total_cost_usd": float(cost_breakdown["total"]),
-            "dograh_token_usage": dograh_tokens,
-            "calculated_at": workflow_run.created_at.isoformat(),
-            "call_duration_seconds": workflow_usage_info["call_duration_seconds"],
-        }
+    cost_info = {
+        "cost_breakdown": cost_breakdown,
+        "total_cost_usd": float(total_cost_usd),
+        "dograh_token_usage": dograh_tokens,
+        "calculated_at": calculated_at
+        or (workflow_run.created_at.isoformat() if workflow_run is not None else None),
+        "call_duration_seconds": usage_info.get("call_duration_seconds", 0),
+    }

-        # Add USD cost if available
-        if charge_usd is not None:
-            cost_info["charge_usd"] = charge_usd
-            cost_info["price_per_second_usd"] = org.price_per_second_usd
+    if charge_usd is not None:
+        cost_info["charge_usd"] = charge_usd
+        cost_info["price_per_second_usd"] = organization.price_per_second_usd

-        # Update workflow run with cost information
-        await db_client.update_workflow_run(run_id=workflow_run_id, cost_info=cost_info)
+    return cost_info

-        # Update organization usage if applicable
-        if org:
-            try:
-                duration_seconds = workflow_usage_info.get("call_duration_seconds", 0)
-                await _update_organization_usage(
-                    org, dograh_tokens, duration_seconds, charge_usd
-                )
-            except Exception as e:
+
+async def build_workflow_run_cost_info(workflow_run) -> dict | None:
+    cost_info = await _build_usage_cost_snapshot(
+        workflow_run.usage_info,
+        workflow_run=workflow_run,
+        include_telephony_cost=True,
+        calculated_at=workflow_run.created_at.isoformat(),
+    )
+    if cost_info is None:
+        return None
+    return {
+        **(workflow_run.cost_info or {}),
+        **cost_info,
+    }
+
+
+async def save_workflow_run_cost_info(
+    workflow_run_id: int, cost_info: dict | None
+) -> None:
+    if cost_info is None:
+        return
+    await db_client.update_workflow_run(run_id=workflow_run_id, cost_info=cost_info)
+
+
+async def apply_workflow_run_usage_to_organization(
+    workflow_run, cost_info: dict | None
+) -> None:
+    if cost_info is None:
+        return
+
+    org = await _get_pricing_organization(workflow_run)
+    if not org:
+        return
+
+    await _update_organization_usage(
+        org,
+        float(cost_info.get("dograh_token_usage") or 0),
+        float(cost_info.get("call_duration_seconds") or 0),
+        cost_info.get("charge_usd"),
+    )
+
+
+async def apply_usage_delta_to_organization(
+    workflow_run, usage_info: dict | None
+) -> dict | None:
+    org = await _get_pricing_organization(workflow_run)
+    if not org:
+        return None
+
+    cost_info = await _build_usage_cost_snapshot(usage_info, organization=org)
+    if cost_info is None:
+        return None
+
+    await _update_organization_usage(
+        org,
+        float(cost_info.get("dograh_token_usage") or 0),
+        float(cost_info.get("call_duration_seconds") or 0),
+        cost_info.get("charge_usd"),
+    )
+    return cost_info
+
+
+async def calculate_workflow_run_cost(workflow_run_id: int):
+    logger.debug("Calculating cost for workflow run")
+
+    workflow_run = await db_client.get_workflow_run_by_id(workflow_run_id)
+    if not workflow_run:
+        logger.warning("Workflow run not found")
+        return
+
+    try:
+        cost_info = await build_workflow_run_cost_info(workflow_run)
+        if cost_info is None:
+            return
+
+        await save_workflow_run_cost_info(workflow_run_id, cost_info)
+
+        try:
+            await apply_workflow_run_usage_to_organization(workflow_run, cost_info)
+        except Exception as e:
+            org = await _get_pricing_organization(workflow_run)
+            if org:
                logger.error(
                    f"Failed to update organization usage for org {org.id}: {e}"
                )
-                # Don't fail the whole task if usage update fails
+            else:
+                logger.error(f"Failed to update organization usage: {e}")
+            # Don't fail the whole cost calculation if usage update fails

        logger.info(
-            f"Calculated cost for workflow run: ${cost_breakdown['total']:.6f} USD ({dograh_tokens} Dograh Tokens)"
+            f"Calculated cost for workflow run: ${cost_info['total_cost_usd']:.6f} USD ({cost_info['dograh_token_usage']} Dograh Tokens)"
        )
-
    except Exception as e:
        logger.error(f"Error calculating cost for workflow run: {e}")
        raise
--- a/api/services/workflow/pipecat_engine.py
+++ b/api/services/workflow/pipecat_engine.py
@ -1,4 +1,4 @@
-from typing import TYPE_CHECKING, Awaitable, Callable, Dict, Optional, Union
+from typing import TYPE_CHECKING, Awaitable, Callable, Dict, Literal, Optional, Union

 from pipecat.adapters.schemas.tools_schema import ToolsSchema
 from pipecat.frames.frames import (
@ -7,6 +7,7 @@ from pipecat.frames.frames import (
    CancelFrame,
    EndFrame,
    FunctionCallResultProperties,
+    LLMContextFrame,
    TTSSpeakFrame,
 )
 from pipecat.pipeline.task import PipelineTask
@ -533,7 +534,7 @@ class PipecatEngine:
        )
        await self._update_llm_context(system_prompt, functions)

-    async def set_node(self, node_id: str):
+    async def set_node(self, node_id: str, emit_transition_event: bool = True):
        """
        Simplified set_node implementation according to v2 PRD.
        """
@ -556,7 +557,7 @@ class PipecatEngine:
            nodes_visited.append(node.name)

        # Send node transition event if callback is provided
-        if self._node_transition_callback:
+        if emit_transition_event and self._node_transition_callback:
            try:
                await self._node_transition_callback(
                    node_id,
@ -598,8 +599,8 @@ class PipecatEngine:
        # Setup LLM context with prompts and functions.
        await self._setup_llm_context(node)

-    def get_start_greeting(self) -> Optional[tuple[str, Optional[str]]]:
-        """Return the greeting info for the start node, or None if not configured.
+    def get_node_greeting(self, node_id: str) -> Optional[tuple[str, Optional[str]]]:
+        """Return the greeting info for a node, or None if not configured.

        Returns:
            A tuple of (greeting_type, value) where:
@ -607,20 +608,93 @@ class PipecatEngine:
            - ("audio", recording_id) for pre-recorded audio greetings
            Or None if no greeting is configured.
        """
-        start_node = self.workflow.nodes.get(self.workflow.start_node_id)
-        if not start_node:
+        node = self.workflow.nodes.get(node_id)
+        if not node:
            return None

-        greeting_type = start_node.greeting_type or "text"
+        greeting_type = node.greeting_type or "text"

-        if greeting_type == "audio" and start_node.greeting_recording_id:
-            return ("audio", start_node.greeting_recording_id)
+        if greeting_type == "audio" and node.greeting_recording_id:
+            return ("audio", node.greeting_recording_id)

-        if start_node.greeting:
-            return ("text", self._format_prompt(start_node.greeting))
+        if node.greeting:
+            return ("text", self._format_prompt(node.greeting))

        return None

+    def get_start_greeting(self) -> Optional[tuple[str, Optional[str]]]:
+        """Return the greeting info for the start node, or None if not configured."""
+        return self.get_node_greeting(self.workflow.start_node_id)
+
+    async def queue_node_opening(
+        self,
+        *,
+        node_id: str,
+        previous_node_id: Optional[str] = None,
+        generate_if_no_greeting: bool = False,
+    ) -> Literal["none", "greeting", "llm"]:
+        """Queue the opening behavior for a node.
+
+        This is the shared source of truth for how a node begins once the
+        engine is ready and the node has already been set on the context.
+
+        Returns:
+            "greeting" when a text/audio greeting was queued,
+            "llm" when an initial LLM generation was queued,
+            "none" when nothing was queued.
+        """
+        if previous_node_id != node_id:
+            greeting_info = self.get_node_greeting(node_id)
+            if greeting_info:
+                greeting_type, greeting_value = greeting_info
+                if (
+                    greeting_type == "audio"
+                    and greeting_value
+                    and self._fetch_recording_audio
+                    and self._transport_output is not None
+                ):
+                    logger.debug(f"Playing audio greeting recording: {greeting_value}")
+                    result = await self._fetch_recording_audio(
+                        recording_pk=int(greeting_value)
+                    )
+                    if result:
+                        await play_audio(
+                            result.audio,
+                            sample_rate=self._audio_config.pipeline_sample_rate
+                            if self._audio_config
+                            else 16000,
+                            queue_frame=self._transport_output.queue_frame,
+                            transcript=result.transcript,
+                            append_to_context=True,
+                        )
+                        return "greeting"
+                    logger.warning(
+                        f"Failed to fetch audio greeting {greeting_value}, "
+                        "falling back to LLM generation"
+                    )
+                elif greeting_value and self.task is not None:
+                    logger.debug("Playing text greeting via TTS")
+                    # append_to_context=True so the assistant aggregator commits
+                    # the greeting to the LLM context once TTS finishes; without
+                    # it the LLM would re-greet on its first generation.
+                    await self.task.queue_frame(
+                        TTSSpeakFrame(greeting_value, append_to_context=True)
+                    )
+                    return "greeting"
+
+        if (
+            generate_if_no_greeting
+            and self.llm is not None
+            and self.context is not None
+        ):
+            logger.debug("Queueing initial LLM generation for node opening")
+            # Queue after the voicemail detector in the live pipeline so the
+            # detector can gate initial generations when needed.
+            await self.llm.queue_frame(LLMContextFrame(self.context))
+            return "llm"
+
+        return "none"
+
    async def _handle_end_node(self, node: Node) -> None:
        """Handle end node execution."""
        # Setup LLM context with prompts and functions.
--- a/api/services/workflow/pipecat_engine_custom_tools.py
+++ b/api/services/workflow/pipecat_engine_custom_tools.py
@ -511,6 +511,17 @@ class CustomToolManager:
                workflow_run = await db_client.get_workflow_run_by_id(
                    self._engine._workflow_run_id
                )
+                if workflow_run.mode == WorkflowRunMode.TEXTCHAT.value:
+                    textchat_error_result = {
+                        "status": "failed",
+                        "message": "I'm sorry, but call transfers are not available in text chat tests.",
+                        "action": "transfer_failed",
+                        "reason": "textchat_not_supported",
+                    }
+                    await self._handle_transfer_result(
+                        textchat_error_result, function_call_params, properties
+                    )
+                    return
                if workflow_run.mode in [
                    WorkflowRunMode.WEBRTC.value,
                    WorkflowRunMode.SMALLWEBRTC.value,
--- a/api/services/workflow/qa/tracing.py
+++ b/api/services/workflow/qa/tracing.py
@ -6,7 +6,10 @@ import re
 from loguru import logger

 from api.db.models import WorkflowRunModel
-from api.services.pipecat.tracing_config import get_trace_url
+from api.services.pipecat.tracing_config import (
+    build_remote_parent_context,
+    get_trace_url,
+)


 def extract_trace_id(gathered_context: dict) -> str | None:
@ -33,36 +36,12 @@ def setup_langfuse_parent_context(workflow_run: WorkflowRunModel):

    Returns the parent context object, or None if tracing is unavailable.
    """
-    try:
-        from opentelemetry.trace import (
-            NonRecordingSpan,
-            SpanContext,
-            TraceFlags,
-            set_span_in_context,
-        )
-
-        from api.services.pipecat.tracing_config import ensure_tracing
-
-        if not ensure_tracing():
-            return None
-
-        gathered_context = workflow_run.gathered_context or {}
-        trace_id = extract_trace_id(gathered_context)
-        if not trace_id:
-            logger.debug("No trace_id found, skipping Langfuse tracing")
-            return None
-
-        parent_span_ctx = SpanContext(
-            trace_id=int(trace_id, 16),
-            span_id=0x1,
-            is_remote=True,
-            trace_flags=TraceFlags(0x01),
-        )
-        return set_span_in_context(NonRecordingSpan(parent_span_ctx))
-
-    except Exception as e:
-        logger.warning(f"Failed to set up Langfuse parent context: {e}")
+    gathered_context = workflow_run.gathered_context or {}
+    trace_id = extract_trace_id(gathered_context)
+    if not trace_id:
+        logger.debug("No trace_id found, skipping Langfuse tracing")
        return None
+    return build_remote_parent_context(trace_id)


 def add_qa_span_to_trace(
--- a/api/services/workflow/text_chat_logs.py
+++ b/api/services/workflow/text_chat_logs.py
@ -0,0 +1,144 @@
+"""Helpers for projecting text-chat session state into run-log snapshots."""
+
+from typing import Any
+
+from api.services.pipecat.realtime_feedback_events import (
+    build_bot_text_event,
+    build_function_call_end_event,
+    build_function_call_start_event,
+    build_node_transition_event,
+    build_pipeline_error_event,
+    build_user_transcription_event,
+    realtime_feedback_event_sort_key,
+    stamp_realtime_feedback_event,
+)
+
+
+def visible_text_chat_turns(session_data: dict[str, Any]) -> list[dict[str, Any]]:
+    """Return the active branch of turns for the current text-chat session.
+
+    After a rewind, `session_data["turns"]` may still contain future turns until
+    the next message is sent. Those turns are no longer part of the visible
+    branch, so callers that synthesize transcript/log views should trim at
+    `cursor_turn_id`.
+    """
+    turns = list(session_data.get("turns") or [])
+    cursor_turn_id = session_data.get("cursor_turn_id")
+    if cursor_turn_id is None:
+        return turns
+
+    for index, turn in enumerate(turns):
+        if turn.get("id") == cursor_turn_id:
+            return turns[: index + 1]
+
+    return turns
+
+
+def build_text_chat_realtime_feedback_events(
+    session_data: dict[str, Any],
+) -> list[dict[str, Any]]:
+    """Project text-chat session state into `workflow_runs.logs` event format.
+
+    `workflow_run_text_sessions` holds the authoritative rewindable conversation
+    state. Historical run pages and QA helpers read the normalized
+    `workflow_runs.logs.realtime_feedback_events` schema instead, so this helper
+    rebuilds that snapshot from the currently visible branch.
+    """
+    events: list[dict[str, Any]] = []
+    last_emitted_node_id: str | None = None
+
+    for turn_index, turn in enumerate(visible_text_chat_turns(session_data)):
+        turn_events = list(turn.get("events") or [])
+        for event in turn_events:
+            payload = dict(event.get("payload") or {})
+            event_type = event.get("type")
+            timestamp = event.get("created_at") or turn.get("created_at")
+
+            if event_type == "node_transition":
+                node_id = payload.get("node_id")
+                if node_id is not None and node_id == last_emitted_node_id:
+                    continue
+                snapshot_event = stamp_realtime_feedback_event(
+                    build_node_transition_event(
+                        node_id=node_id,
+                        node_name=payload.get("node_name"),
+                        previous_node_id=payload.get("previous_node_id"),
+                        previous_node_name=payload.get("previous_node_name"),
+                        allow_interrupt=bool(payload.get("allow_interrupt", False)),
+                    ),
+                    timestamp=timestamp,
+                    turn=turn_index,
+                    node_id=node_id,
+                    node_name=payload.get("node_name"),
+                )
+                if node_id is not None:
+                    last_emitted_node_id = node_id
+                events.append(snapshot_event)
+            elif event_type == "tool_call_started":
+                events.append(
+                    stamp_realtime_feedback_event(
+                        build_function_call_start_event(
+                            function_name=payload.get("function_name"),
+                            tool_call_id=payload.get("tool_call_id"),
+                            arguments=payload.get("arguments"),
+                        ),
+                        timestamp=timestamp,
+                        turn=turn_index,
+                    )
+                )
+            elif event_type == "tool_call_result":
+                events.append(
+                    stamp_realtime_feedback_event(
+                        build_function_call_end_event(
+                            function_name=payload.get("function_name"),
+                            tool_call_id=payload.get("tool_call_id"),
+                            result=payload.get("result"),
+                        ),
+                        timestamp=timestamp,
+                        turn=turn_index,
+                    )
+                )
+            elif event_type == "execution_error":
+                events.append(
+                    stamp_realtime_feedback_event(
+                        build_pipeline_error_event(
+                            error=payload.get("message", "Execution error"),
+                            fatal=True,
+                        ),
+                        timestamp=timestamp,
+                        turn=turn_index,
+                    )
+                )
+
+        user_message = turn.get("user_message") or {}
+        if user_message.get("text"):
+            message_timestamp = user_message.get("created_at") or turn.get("created_at")
+            events.append(
+                stamp_realtime_feedback_event(
+                    build_user_transcription_event(
+                        text=user_message["text"],
+                        final=True,
+                        timestamp=message_timestamp,
+                    ),
+                    timestamp=message_timestamp,
+                    turn=turn_index,
+                )
+            )
+
+        assistant_message = turn.get("assistant_message") or {}
+        if assistant_message.get("text"):
+            message_timestamp = assistant_message.get("created_at") or turn.get(
+                "created_at"
+            )
+            events.append(
+                stamp_realtime_feedback_event(
+                    build_bot_text_event(
+                        text=assistant_message["text"],
+                        timestamp=message_timestamp,
+                    ),
+                    timestamp=message_timestamp,
+                    turn=turn_index,
+                )
+            )
+
+    return sorted(events, key=realtime_feedback_event_sort_key)
--- a/api/services/workflow/text_chat_runner.py
+++ b/api/services/workflow/text_chat_runner.py
@ -0,0 +1,649 @@
+import asyncio
+import hashlib
+import time
+from dataclasses import dataclass, field
+from datetime import UTC, datetime
+from typing import Any
+
+from fastapi.encoders import jsonable_encoder
+from loguru import logger
+from pipecat.frames.frames import (
+    BotStoppedSpeakingFrame,
+    CancelFrame,
+    EndFrame,
+    FunctionCallInProgressFrame,
+    FunctionCallResultFrame,
+    LLMAssistantPushAggregationFrame,
+    LLMContextFrame,
+    LLMFullResponseEndFrame,
+    LLMFullResponseStartFrame,
+    TextFrame,
+    TTSSpeakFrame,
+    TTSStoppedFrame,
+)
+from pipecat.pipeline.pipeline import Pipeline
+from pipecat.pipeline.runner import PipelineRunner
+from pipecat.processors.aggregators.llm_context import LLMContext
+from pipecat.processors.aggregators.llm_response_universal import (
+    LLMAssistantAggregatorParams,
+    LLMContextAggregatorPair,
+)
+from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
+from pipecat.utils.run_context import set_current_org_id
+
+from api.db import db_client
+from api.enums import WorkflowRunMode, WorkflowRunState
+from api.services.configuration.resolve import resolve_effective_config
+from api.services.pipecat.audio_config import create_audio_config
+from api.services.pipecat.pipeline_builder import create_pipeline_task
+from api.services.pipecat.pipeline_metrics_aggregator import (
+    PipelineMetricsAggregator,
+)
+from api.services.pipecat.recording_audio_cache import create_recording_audio_fetcher
+from api.services.pipecat.service_factory import create_llm_service
+from api.services.pipecat.tracing_config import (
+    build_remote_parent_context,
+    get_trace_url,
+)
+from api.services.workflow.dto import ReactFlowDTO
+from api.services.workflow.pipecat_engine import PipecatEngine
+from api.services.workflow.workflow_graph import WorkflowGraph
+
+TEXT_CHAT_CHECKPOINT_VERSION = 1
+TEXT_CHAT_TURN_TIMEOUT_SECONDS = 60.0
+TEXT_CHAT_IDLE_SETTLE_SECONDS = 0.2
+TEXT_CHAT_INTERNAL_CANCEL_REASON = "text_chat_turn_complete"
+
+
+def text_chat_trace_id(workflow_run_id: int) -> str:
+    """Deterministic Langfuse trace id for a text-chat session.
+
+    Each turn runs in its own short-lived pipeline, so there is no single
+    long-running task to own the trace the way a voice call does. Deriving the
+    id from the run id means every turn re-creates the *same* trace id and all
+    per-turn spans land in one shared trace — without persisting extra state
+    across the otherwise stateless turn requests.
+    """
+    digest = hashlib.sha256(f"dograh-text-chat:{workflow_run_id}".encode()).hexdigest()
+    return digest[:32]
+
+
+def default_text_chat_checkpoint() -> dict[str, Any]:
+    return {
+        "version": TEXT_CHAT_CHECKPOINT_VERSION,
+        "anchor_turn_id": None,
+        "current_node_id": None,
+        "messages": [],
+        "gathered_context": {},
+        "tool_state": {},
+    }
+
+
+def normalize_text_chat_checkpoint(
+    checkpoint: dict[str, Any] | None,
+) -> dict[str, Any]:
+    normalized = {
+        **default_text_chat_checkpoint(),
+        **(checkpoint or {}),
+    }
+    normalized["messages"] = list(normalized.get("messages") or [])
+    normalized["gathered_context"] = dict(normalized.get("gathered_context") or {})
+    normalized["tool_state"] = dict(normalized.get("tool_state") or {})
+    return normalized
+
+
+@dataclass
+class TextChatTurnExecutionResult:
+    assistant_text: str | None
+    assistant_created_at: str
+    events: list[dict[str, Any]]
+    usage: dict[str, Any]
+    checkpoint: dict[str, Any]
+    gathered_context: dict[str, Any]
+    initial_context: dict[str, Any]
+    state: str
+    is_completed: bool
+
+
+@dataclass
+class _ResponseWindowState:
+    active_assistant_segments: int = 0
+    active_llm_completions: int = 0
+    pending_context_requests: int = 0
+    blocking_tool_call_ids: set[str] = field(default_factory=set)
+    outputs: list[str] = field(default_factory=list)
+
+    def note_direct_context_request(self) -> None:
+        self.pending_context_requests += 1
+
+    def note_upstream_context_request(self) -> None:
+        self.pending_context_requests += 1
+
+    def note_llm_start(self) -> None:
+        if self.pending_context_requests > 0:
+            self.pending_context_requests -= 1
+        self.active_llm_completions += 1
+
+    def note_llm_end(self) -> None:
+        if self.active_llm_completions > 0:
+            self.active_llm_completions -= 1
+
+    def note_assistant_turn_started(self) -> None:
+        self.active_assistant_segments += 1
+
+    def note_assistant_turn_stopped(self, content: str) -> None:
+        if self.active_assistant_segments > 0:
+            self.active_assistant_segments -= 1
+        normalized_content = content.strip()
+        if normalized_content:
+            self.outputs.append(normalized_content)
+
+    def note_function_call_in_progress(self, tool_call_id: str, blocking: bool) -> None:
+        if blocking:
+            self.blocking_tool_call_ids.add(tool_call_id)
+
+    def note_function_call_result(self, tool_call_id: str) -> None:
+        self.blocking_tool_call_ids.discard(tool_call_id)
+
+    @property
+    def has_blocking_tool_calls(self) -> bool:
+        return bool(self.blocking_tool_call_ids)
+
+    @property
+    def frontier_is_idle(self) -> bool:
+        return (
+            self.pending_context_requests == 0
+            and self.active_llm_completions == 0
+            and self.active_assistant_segments == 0
+            and not self.has_blocking_tool_calls
+        )
+
+
+class _TaskQueueProxy:
+    def __init__(self, queue_frame):
+        self.queue_frame = queue_frame
+
+
+class _TextChatCaptureProcessor(FrameProcessor):
+    def __init__(self, response_window: _ResponseWindowState) -> None:
+        super().__init__()
+        self.last_activity_at = time.monotonic()
+        self.activity_count = 0
+        self.events: list[dict[str, Any]] = []
+        self._response_window = response_window
+
+    def _touch(self) -> None:
+        self.last_activity_at = time.monotonic()
+        self.activity_count += 1
+
+    def _append_event(self, event_type: str, payload: dict[str, Any]) -> None:
+        self.events.append(
+            {
+                "type": event_type,
+                "created_at": datetime.now(UTC).isoformat(),
+                "payload": jsonable_encoder(payload),
+            }
+        )
+
+    async def process_frame(self, frame, direction: FrameDirection):
+        await super().process_frame(frame, direction)
+        self._touch()
+
+        if isinstance(frame, TTSSpeakFrame):
+            text_frame = TextFrame(frame.text)
+            text_frame.append_to_context = (
+                frame.append_to_context if frame.append_to_context is not None else True
+            )
+            await self.push_frame(text_frame, direction)
+            await self.push_frame(LLMAssistantPushAggregationFrame(), direction)
+            return
+
+        if isinstance(frame, LLMContextFrame) and direction == FrameDirection.UPSTREAM:
+            self._response_window.note_upstream_context_request()
+
+        if isinstance(frame, TTSStoppedFrame):
+            await self.push_frame(frame, direction)
+            await self.push_frame(LLMAssistantPushAggregationFrame(), direction)
+            return
+
+        if (
+            isinstance(frame, LLMFullResponseStartFrame)
+            and direction == FrameDirection.DOWNSTREAM
+        ):
+            self._response_window.note_llm_start()
+
+        if (
+            isinstance(frame, LLMFullResponseEndFrame)
+            and direction is FrameDirection.DOWNSTREAM
+        ):
+            self._response_window.note_llm_end()
+            await self.push_frame(frame, direction)
+            # Text chat has no TTS/output transport, so mixed text+tool responses
+            # would otherwise leave function calls waiting forever on a
+            # BotStoppedSpeakingFrame that never arrives.
+            await self.push_frame(BotStoppedSpeakingFrame(), FrameDirection.UPSTREAM)
+            return
+
+        if isinstance(frame, FunctionCallInProgressFrame):
+            self._response_window.note_function_call_in_progress(
+                tool_call_id=frame.tool_call_id,
+                blocking=frame.cancel_on_interruption,
+            )
+            self._append_event(
+                "tool_call_started",
+                {
+                    "function_name": frame.function_name,
+                    "tool_call_id": frame.tool_call_id,
+                    "arguments": dict(frame.arguments or {}),
+                },
+            )
+        elif isinstance(frame, FunctionCallResultFrame):
+            self._response_window.note_function_call_result(frame.tool_call_id)
+            self._append_event(
+                "tool_call_result",
+                {
+                    "function_name": frame.function_name,
+                    "tool_call_id": frame.tool_call_id,
+                    "result": frame.result,
+                },
+            )
+        elif isinstance(frame, EndFrame):
+            self._append_event("session_end", {"reason": frame.reason})
+        elif isinstance(frame, CancelFrame):
+            if frame.reason != TEXT_CHAT_INTERNAL_CANCEL_REASON:
+                self._append_event("session_cancelled", {"reason": frame.reason})
+
+        await self.push_frame(frame, direction)
+
+
+def _merge_usage_info(
+    existing: dict[str, Any] | None,
+    delta: dict[str, Any] | None,
+) -> dict[str, Any]:
+    merged = dict(existing or {})
+    delta = dict(delta or {})
+
+    merged_llm = dict(merged.get("llm") or {})
+    for key, value in (delta.get("llm") or {}).items():
+        current = dict(merged_llm.get(key) or {})
+        merged_llm[key] = {
+            "prompt_tokens": int(current.get("prompt_tokens") or 0)
+            + int(value.get("prompt_tokens") or 0),
+            "completion_tokens": int(current.get("completion_tokens") or 0)
+            + int(value.get("completion_tokens") or 0),
+            "total_tokens": int(current.get("total_tokens") or 0)
+            + int(value.get("total_tokens") or 0),
+            "cache_read_input_tokens": int(current.get("cache_read_input_tokens") or 0)
+            + int(value.get("cache_read_input_tokens") or 0),
+            "cache_creation_input_tokens": int(
+                current.get("cache_creation_input_tokens") or 0
+            )
+            + int(value.get("cache_creation_input_tokens") or 0),
+        }
+    merged["llm"] = merged_llm
+
+    for section in ("tts", "stt"):
+        merged_section = dict(merged.get(section) or {})
+        for key, value in (delta.get(section) or {}).items():
+            merged_section[key] = float(merged_section.get(key) or 0) + float(value)
+        merged[section] = merged_section
+
+    merged["call_duration_seconds"] = int(
+        merged.get("call_duration_seconds") or 0
+    ) + int(delta.get("call_duration_seconds") or 0)
+
+    return merged
+
+
+def merge_text_chat_usage_info(
+    existing: dict[str, Any] | None,
+    delta: dict[str, Any] | None,
+) -> dict[str, Any]:
+    return _merge_usage_info(existing, delta)
+
+
+def _resolve_checkpoint_for_pending_turn(
+    session_data: dict[str, Any],
+    checkpoint: dict[str, Any] | None,
+) -> dict[str, Any]:
+    turns = list(session_data.get("turns") or [])
+    if not turns:
+        return normalize_text_chat_checkpoint(checkpoint)
+
+    pending_turn = turns[-1]
+    if pending_turn.get("status") != "pending":
+        return normalize_text_chat_checkpoint(checkpoint)
+
+    for turn in reversed(turns[:-1]):
+        if turn.get("status") != "completed":
+            continue
+        stored_checkpoint = turn.get("checkpoint_after_turn")
+        if stored_checkpoint:
+            return normalize_text_chat_checkpoint(stored_checkpoint)
+        break
+
+    return normalize_text_chat_checkpoint(checkpoint)
+
+
+async def _wait_for_quiescence(
+    *,
+    capture_processor: _TextChatCaptureProcessor,
+    response_window: _ResponseWindowState,
+    runner_task: asyncio.Task,
+    activity_marker: int,
+    timeout_seconds: float = TEXT_CHAT_TURN_TIMEOUT_SECONDS,
+) -> None:
+    loop = asyncio.get_running_loop()
+    deadline = loop.time() + timeout_seconds
+
+    while loop.time() < deadline:
+        if runner_task.done():
+            await runner_task
+            return
+
+        if (
+            capture_processor.activity_count <= activity_marker
+            and response_window.frontier_is_idle
+        ):
+            await asyncio.sleep(0.05)
+            continue
+
+        if (
+            response_window.frontier_is_idle
+            and (time.monotonic() - capture_processor.last_activity_at)
+            >= TEXT_CHAT_IDLE_SETTLE_SECONDS
+        ):
+            return
+
+        await asyncio.sleep(0.05)
+
+    raise TimeoutError(
+        "Timed out waiting for text chat response window to settle "
+        f"(pending_context_requests={response_window.pending_context_requests}, "
+        f"active_llm_completions={response_window.active_llm_completions}, "
+        f"active_assistant_segments={response_window.active_assistant_segments}, "
+        f"blocking_tool_calls={sorted(response_window.blocking_tool_call_ids)})"
+    )
+
+
+async def execute_text_chat_pending_turn(
+    *,
+    workflow_run_id: int,
+    workflow_id: int,
+    session_data: dict[str, Any],
+    checkpoint: dict[str, Any] | None,
+) -> TextChatTurnExecutionResult:
+    turns = list(session_data.get("turns") or [])
+    if not turns or turns[-1].get("status") != "pending":
+        raise ValueError("Text chat session has no pending turn to execute")
+
+    pending_turn = turns[-1]
+    pending_user_message = (
+        ((pending_turn.get("user_message") or {}).get("text") or "").strip()
+        if pending_turn.get("user_message") is not None
+        else None
+    )
+
+    workflow_run, _ = await db_client.get_workflow_run_with_context(workflow_run_id)
+    if not workflow_run or workflow_run.workflow_id != workflow_id:
+        raise ValueError("Workflow run not found for text chat execution")
+    if workflow_run.definition is None:
+        raise ValueError("Workflow run is missing a pinned definition")
+    if workflow_run.workflow is None or workflow_run.workflow.user is None:
+        raise ValueError("Workflow run is missing workflow context")
+
+    workflow = await db_client.get_workflow(
+        workflow_id, organization_id=workflow_run.workflow.organization_id
+    )
+    if workflow is None:
+        raise ValueError("Workflow not found for text chat execution")
+
+    # Stamp the async context so OTEL spans are tagged with this org and routed
+    # to its Langfuse project (the voice paths do this in run_pipeline /
+    # webrtc_signaling; the text path previously skipped it, so its spans never
+    # reached org-specific exporters).
+    set_current_org_id(workflow.organization_id)
+
+    run_definition = workflow_run.definition
+    run_configs = run_definition.workflow_configurations or {}
+
+    user_config = await db_client.get_user_configurations(workflow_run.workflow.user.id)
+    user_config = resolve_effective_config(
+        user_config, run_configs.get("model_overrides")
+    )
+    if user_config.llm is None:
+        raise ValueError("Text chat requires an LLM configuration")
+
+    llm = create_llm_service(user_config)
+    inference_llm = llm
+
+    runtime_configuration = {
+        "llm_provider": user_config.llm.provider,
+        "llm_model": user_config.llm.model,
+    }
+    initial_context = {
+        **(workflow_run.initial_context or {}),
+        "runtime_configuration": runtime_configuration,
+    }
+
+    workflow_graph = WorkflowGraph(
+        ReactFlowDTO.model_validate(run_definition.workflow_json)
+    )
+    base_checkpoint = _resolve_checkpoint_for_pending_turn(session_data, checkpoint)
+
+    response_window = _ResponseWindowState()
+    capture_processor = _TextChatCaptureProcessor(response_window)
+    context = LLMContext()
+    context.set_messages(base_checkpoint["messages"])
+
+    node_transition_events = capture_processor.events
+
+    async def send_node_transition(
+        node_id: str,
+        node_name: str,
+        previous_node_id: str | None,
+        previous_node_name: str | None,
+        allow_interrupt: bool = False,
+    ) -> None:
+        node_transition_events.append(
+            {
+                "type": "node_transition",
+                "created_at": datetime.now(UTC).isoformat(),
+                "payload": {
+                    "node_id": node_id,
+                    "node_name": node_name,
+                    "previous_node_id": previous_node_id,
+                    "previous_node_name": previous_node_name,
+                    "allow_interrupt": allow_interrupt,
+                },
+            }
+        )
+
+    embeddings_api_key = None
+    embeddings_model = None
+    embeddings_base_url = None
+    if user_config.embeddings:
+        embeddings_api_key = user_config.embeddings.api_key
+        embeddings_model = user_config.embeddings.model
+        embeddings_base_url = getattr(user_config.embeddings, "base_url", None)
+
+    has_recordings = await db_client.has_active_recordings(workflow.organization_id)
+    context_compaction_enabled = (workflow.workflow_configurations or {}).get(
+        "context_compaction_enabled", False
+    )
+
+    engine = PipecatEngine(
+        llm=llm,
+        inference_llm=inference_llm,
+        context=context,
+        workflow=workflow_graph,
+        call_context_vars=initial_context,
+        workflow_run_id=workflow_run_id,
+        node_transition_callback=send_node_transition,
+        embeddings_api_key=embeddings_api_key,
+        embeddings_model=embeddings_model,
+        embeddings_base_url=embeddings_base_url,
+        has_recordings=has_recordings,
+        context_compaction_enabled=context_compaction_enabled,
+    )
+    engine._gathered_context = dict(base_checkpoint["gathered_context"])
+
+    assistant_params = LLMAssistantAggregatorParams()
+    context_aggregator = LLMContextAggregatorPair(
+        context, assistant_params=assistant_params
+    )
+    assistant_context_aggregator = context_aggregator.assistant()
+
+    @assistant_context_aggregator.event_handler("on_assistant_turn_started")
+    async def on_assistant_turn_started(_aggregator):
+        response_window.note_assistant_turn_started()
+
+    @assistant_context_aggregator.event_handler("on_assistant_turn_stopped")
+    async def on_assistant_turn_stopped(_aggregator, message):
+        response_window.note_assistant_turn_stopped(message.content or "")
+
+    # Text chat has no wire transport; reuse the neutral 16 kHz config shape
+    # from the browser pipeline so TTS/recording helpers still have sane defaults.
+    audio_config = create_audio_config(WorkflowRunMode.SMALLWEBRTC.value)
+    pipeline_metrics_aggregator = PipelineMetricsAggregator()
+
+    # Stitch every per-turn pipeline of this session into one Langfuse trace by
+    # handing each task the same remote parent context (derived from the run id).
+    trace_id = text_chat_trace_id(workflow_run_id)
+    conversation_parent_context = build_remote_parent_context(trace_id)
+    # The stitched trace has no real root span (each per-turn conversation span
+    # hangs off a synthetic remote parent), so Langfuse can't infer a name and
+    # shows "Unnamed trace". Name it explicitly via the conversation span.
+    trace_span_attributes = {
+        "langfuse.trace.name": workflow_run.name or f"text-chat-{workflow_run_id}"
+    }
+
+    pipeline = Pipeline(
+        [
+            llm,
+            capture_processor,
+            assistant_context_aggregator,
+            pipeline_metrics_aggregator,
+        ]
+    )
+    task = create_pipeline_task(
+        pipeline,
+        workflow_run_id,
+        audio_config,
+        conversation_parent_context=conversation_parent_context,
+        conversation_type="text",
+        additional_span_attributes=trace_span_attributes,
+    )
+    runner = PipelineRunner(handle_sigint=False, handle_sigterm=False)
+    runner_task = asyncio.create_task(runner.run(task))
+
+    engine.set_task(task)
+    engine.set_audio_config(audio_config)
+    engine.set_transport_output(_TaskQueueProxy(task.queue_frame))
+    engine.set_fetch_recording_audio(
+        create_recording_audio_fetcher(
+            organization_id=workflow.organization_id,
+            pipeline_sample_rate=audio_config.pipeline_sample_rate,
+        )
+    )
+
+    try:
+        await asyncio.wait_for(task._pipeline_start_event.wait(), timeout=5.0)
+
+        await engine.initialize()
+
+        current_node_id = base_checkpoint.get("current_node_id")
+        target_node_id = current_node_id or workflow_graph.start_node_id
+        await engine.set_node(
+            target_node_id,
+            emit_transition_event=current_node_id is None,
+        )
+
+        opening_marker = capture_processor.activity_count
+        opening_expects_llm = pending_user_message is None and (
+            current_node_id == target_node_id
+            or engine.get_node_greeting(target_node_id) is None
+        )
+        if opening_expects_llm:
+            response_window.note_direct_context_request()
+        opening_action = await engine.queue_node_opening(
+            node_id=target_node_id,
+            previous_node_id=current_node_id,
+            generate_if_no_greeting=pending_user_message is None,
+        )
+        if opening_action != "llm" and opening_expects_llm:
+            response_window.pending_context_requests = max(
+                0, response_window.pending_context_requests - 1
+            )
+        if opening_action != "none":
+            await _wait_for_quiescence(
+                capture_processor=capture_processor,
+                response_window=response_window,
+                runner_task=runner_task,
+                activity_marker=opening_marker,
+            )
+
+        if pending_user_message is not None:
+            context.add_message({"role": "user", "content": pending_user_message})
+            generation_marker = capture_processor.activity_count
+            response_window.note_direct_context_request()
+            await llm.queue_frame(LLMContextFrame(context))
+            await _wait_for_quiescence(
+                capture_processor=capture_processor,
+                response_window=response_window,
+                runner_task=runner_task,
+                activity_marker=generation_marker,
+            )
+    finally:
+        if not task.has_finished():
+            await task.cancel(reason=TEXT_CHAT_INTERNAL_CANCEL_REASON)
+        try:
+            await runner_task
+        except Exception:
+            logger.exception(
+                "Transportless text chat pipeline failed while closing run {}",
+                workflow_run_id,
+            )
+            await engine.cleanup()
+            raise
+        await engine.cleanup()
+
+    gathered_context = await engine.get_gathered_context()
+    assistant_text = (
+        "\n\n".join(part for part in response_window.outputs if part).strip()
+        if response_window.outputs
+        else None
+    )
+    assistant_created_at = datetime.now(UTC).isoformat()
+    usage = pipeline_metrics_aggregator.get_all_usage_metrics_serialized()
+    current_node = getattr(engine, "_current_node", None)
+
+    updated_checkpoint = {
+        "version": TEXT_CHAT_CHECKPOINT_VERSION,
+        "anchor_turn_id": pending_turn.get("id"),
+        "current_node_id": current_node.id if current_node else None,
+        "messages": jsonable_encoder(context.get_messages()),
+        "gathered_context": jsonable_encoder(gathered_context),
+        "tool_state": jsonable_encoder(base_checkpoint.get("tool_state") or {}),
+    }
+
+    encoded_gathered_context = jsonable_encoder(gathered_context)
+    trace_url = get_trace_url(trace_id, org_id=workflow.organization_id)
+    if trace_url:
+        encoded_gathered_context = {**encoded_gathered_context, "trace_url": trace_url}
+
+    return TextChatTurnExecutionResult(
+        assistant_text=assistant_text,
+        assistant_created_at=assistant_created_at,
+        events=jsonable_encoder(capture_processor.events),
+        usage=jsonable_encoder(usage),
+        checkpoint=updated_checkpoint,
+        gathered_context=encoded_gathered_context,
+        initial_context=jsonable_encoder(initial_context),
+        state=(
+            WorkflowRunState.COMPLETED.value
+            if engine.is_call_disposed()
+            else WorkflowRunState.RUNNING.value
+        ),
+        is_completed=engine.is_call_disposed(),
+    )
--- a/api/services/workflow/text_chat_session_service.py
+++ b/api/services/workflow/text_chat_session_service.py
@ -0,0 +1,411 @@
+"""Service helpers for text-chat session lifecycle orchestration."""
+
+from datetime import UTC, datetime
+from typing import Any
+from uuid import uuid4
+
+from loguru import logger
+
+from api.db import db_client
+from api.db.models import WorkflowRunTextSessionModel
+from api.db.workflow_run_text_session_client import (
+    WorkflowRunTextSessionRevisionConflictError,
+)
+from api.services.pricing.workflow_run_cost import (
+    apply_usage_delta_to_organization,
+    build_workflow_run_cost_info,
+)
+from api.services.workflow.text_chat_logs import (
+    build_text_chat_realtime_feedback_events,
+)
+from api.services.workflow.text_chat_runner import (
+    default_text_chat_checkpoint,
+    execute_text_chat_pending_turn,
+    merge_text_chat_usage_info,
+    normalize_text_chat_checkpoint,
+)
+
+TEXT_CHAT_SESSION_VERSION = 1
+
+
+class TextChatSessionRevisionConflictError(Exception):
+    def __init__(self, expected_revision: int, actual_revision: int):
+        self.expected_revision = expected_revision
+        self.actual_revision = actual_revision
+        super().__init__(
+            "Text chat session revision conflict: "
+            f"expected {expected_revision}, found {actual_revision}"
+        )
+
+
+class TextChatSessionExecutionError(Exception):
+    """Raised when the assistant turn fails to execute."""
+
+
+class TextChatPendingTurnLostError(Exception):
+    """Raised when the pending turn disappears before persistence completes."""
+
+
+class TextChatTurnNotFoundError(Exception):
+    """Raised when a requested rewind cursor does not exist in the session."""
+
+
+def default_text_chat_session_data() -> dict[str, Any]:
+    return {
+        "version": TEXT_CHAT_SESSION_VERSION,
+        "status": "idle",
+        "cursor_turn_id": None,
+        "turns": [],
+        "discarded_future": [],
+        "simulator": {
+            "enabled": False,
+            "config": {},
+        },
+    }
+
+
+def normalize_text_chat_session_data(
+    session_data: dict[str, Any] | None,
+) -> dict[str, Any]:
+    normalized = {
+        **default_text_chat_session_data(),
+        **(session_data or {}),
+    }
+    normalized["turns"] = list(normalized.get("turns") or [])
+    normalized["discarded_future"] = list(normalized.get("discarded_future") or [])
+    simulator = normalized.get("simulator") or {}
+    normalized["simulator"] = {
+        "enabled": bool(simulator.get("enabled", False)),
+        "config": dict(simulator.get("config") or {}),
+    }
+    return normalized
+
+
+async def initialize_text_chat_session(
+    *,
+    run_id: int,
+    text_session: WorkflowRunTextSessionModel,
+) -> WorkflowRunTextSessionModel:
+    session_data = normalize_text_chat_session_data(text_session.session_data)
+    checkpoint = normalize_text_chat_checkpoint(text_session.checkpoint)
+
+    session_data["turns"] = [build_pending_text_chat_turn(user_text=None)]
+    session_data["status"] = "pending_assistant_turn"
+    checkpoint["anchor_turn_id"] = latest_completed_text_chat_turn_id(
+        session_data["turns"]
+    )
+
+    try:
+        await db_client.update_workflow_run_text_session(
+            run_id,
+            session_data=session_data,
+            checkpoint=checkpoint,
+            expected_revision=text_session.revision,
+        )
+    except WorkflowRunTextSessionRevisionConflictError as e:
+        raise TextChatSessionRevisionConflictError(
+            expected_revision=e.expected_revision,
+            actual_revision=e.actual_revision,
+        ) from e
+
+    return await _reload_text_chat_session(run_id)
+
+
+async def append_text_chat_user_message(
+    *,
+    run_id: int,
+    text_session: WorkflowRunTextSessionModel,
+    user_text: str,
+    expected_revision: int | None,
+) -> WorkflowRunTextSessionModel:
+    session_data = normalize_text_chat_session_data(text_session.session_data)
+    checkpoint = normalize_text_chat_checkpoint(text_session.checkpoint)
+
+    active_turns, discarded_future = truncate_text_chat_future_turns(session_data)
+    active_turns.append(build_pending_text_chat_turn(user_text=user_text))
+
+    session_data["turns"] = active_turns
+    session_data["discarded_future"] = discarded_future
+    session_data["cursor_turn_id"] = None
+    session_data["status"] = "pending_assistant_turn"
+    checkpoint["anchor_turn_id"] = latest_completed_text_chat_turn_id(active_turns)
+
+    try:
+        await db_client.update_workflow_run_text_session(
+            run_id,
+            session_data=session_data,
+            checkpoint=checkpoint,
+            expected_revision=expected_revision,
+        )
+    except WorkflowRunTextSessionRevisionConflictError as e:
+        raise TextChatSessionRevisionConflictError(
+            expected_revision=e.expected_revision,
+            actual_revision=e.actual_revision,
+        ) from e
+
+    return await _reload_text_chat_session(run_id)
+
+
+async def rewind_text_chat_session_state(
+    *,
+    run_id: int,
+    text_session: WorkflowRunTextSessionModel,
+    cursor_turn_id: str | None,
+    expected_revision: int | None,
+) -> WorkflowRunTextSessionModel:
+    session_data = normalize_text_chat_session_data(text_session.session_data)
+    validate_text_chat_turn_cursor(session_data, cursor_turn_id)
+
+    session_data["cursor_turn_id"] = cursor_turn_id
+    session_data["status"] = "rewound" if cursor_turn_id else "idle"
+
+    try:
+        await db_client.update_workflow_run_text_session(
+            run_id,
+            session_data=session_data,
+            expected_revision=expected_revision,
+        )
+    except WorkflowRunTextSessionRevisionConflictError as e:
+        raise TextChatSessionRevisionConflictError(
+            expected_revision=e.expected_revision,
+            actual_revision=e.actual_revision,
+        ) from e
+
+    await db_client.update_workflow_run(
+        run_id,
+        logs={
+            "realtime_feedback_events": build_text_chat_realtime_feedback_events(
+                session_data
+            )
+        },
+    )
+
+    return await _reload_text_chat_session(run_id)
+
+
+async def execute_pending_text_chat_turn(
+    *,
+    workflow_id: int,
+    run_id: int,
+    text_session: WorkflowRunTextSessionModel,
+) -> WorkflowRunTextSessionModel:
+    """Execute the current pending assistant turn and persist its side effects."""
+    session_data = normalize_text_chat_session_data(text_session.session_data)
+    checkpoint = normalize_text_chat_checkpoint(text_session.checkpoint)
+
+    try:
+        execution = await execute_text_chat_pending_turn(
+            workflow_run_id=run_id,
+            workflow_id=workflow_id,
+            session_data=session_data,
+            checkpoint=checkpoint,
+        )
+    except Exception as e:
+        await _mark_pending_turn_failed(
+            run_id=run_id,
+            text_session=text_session,
+            error_message=str(e),
+        )
+        raise TextChatSessionExecutionError(
+            "Failed to execute text chat assistant turn"
+        ) from e
+
+    completed_session_data = normalize_text_chat_session_data(text_session.session_data)
+    completed_turns = list(completed_session_data.get("turns") or [])
+    if not completed_turns or completed_turns[-1].get("status") != "pending":
+        raise TextChatPendingTurnLostError(
+            "Text chat session lost its pending turn before completion"
+        )
+
+    completed_turns[-1]["status"] = "completed"
+    completed_turns[-1]["assistant_message"] = (
+        {
+            "text": execution.assistant_text,
+            "created_at": execution.assistant_created_at,
+        }
+        if execution.assistant_text
+        else None
+    )
+    completed_turns[-1]["events"] = execution.events
+    completed_turns[-1]["usage"] = execution.usage
+    completed_turns[-1]["checkpoint_after_turn"] = execution.checkpoint
+    completed_session_data["turns"] = completed_turns
+    completed_session_data["status"] = "idle"
+
+    try:
+        await db_client.update_workflow_run_text_session(
+            run_id,
+            session_data=completed_session_data,
+            checkpoint=execution.checkpoint,
+            expected_revision=text_session.revision,
+        )
+    except WorkflowRunTextSessionRevisionConflictError as e:
+        raise TextChatSessionRevisionConflictError(
+            expected_revision=e.expected_revision,
+            actual_revision=e.actual_revision,
+        ) from e
+
+    existing_usage_info = text_session.workflow_run.usage_info or {}
+    merged_usage_info = merge_text_chat_usage_info(existing_usage_info, execution.usage)
+    text_chat_logs = {
+        "realtime_feedback_events": build_text_chat_realtime_feedback_events(
+            completed_session_data
+        )
+    }
+    await db_client.update_workflow_run(
+        run_id,
+        initial_context=execution.initial_context,
+        usage_info=merged_usage_info,
+        gathered_context=execution.gathered_context,
+        logs=text_chat_logs,
+        state=execution.state,
+        is_completed=execution.is_completed,
+    )
+    workflow_run = await db_client.get_workflow_run_by_id(run_id)
+    if workflow_run:
+        try:
+            # Apply the per-turn delta so org usage tracks cumulative run cost
+            # without replaying the full session totals on every turn.
+            await apply_usage_delta_to_organization(workflow_run, execution.usage)
+        except Exception as e:
+            logger.error(
+                f"Failed to update organization usage for text chat run {run_id}: {e}"
+            )
+
+        cost_info = await build_workflow_run_cost_info(workflow_run)
+        if cost_info is not None:
+            await db_client.update_workflow_run(run_id, cost_info=cost_info)
+
+    return await _reload_text_chat_session(run_id)
+
+
+def validate_text_chat_turn_cursor(
+    session_data: dict[str, Any],
+    cursor_turn_id: str | None,
+) -> None:
+    if cursor_turn_id is None:
+        return
+    if not any(turn.get("id") == cursor_turn_id for turn in session_data["turns"]):
+        raise TextChatTurnNotFoundError("Turn not found in text chat session")
+
+
+def truncate_text_chat_future_turns(
+    session_data: dict[str, Any],
+) -> tuple[list[dict[str, Any]], list[dict[str, Any]]]:
+    cursor_turn_id = session_data.get("cursor_turn_id")
+    turns = list(session_data.get("turns") or [])
+    discarded_future = list(session_data.get("discarded_future") or [])
+
+    if cursor_turn_id is None:
+        return turns, discarded_future
+
+    for index, turn in enumerate(turns):
+        if turn.get("id") == cursor_turn_id:
+            active_turns = turns[: index + 1]
+            future_turns = turns[index + 1 :]
+            if future_turns:
+                discarded_future.append(
+                    {
+                        "rewound_from_turn_id": cursor_turn_id,
+                        "discarded_at": datetime.now(UTC).isoformat(),
+                        "turns": future_turns,
+                    }
+                )
+            return active_turns, discarded_future
+
+    raise TextChatTurnNotFoundError("Turn not found in text chat session")
+
+
+def latest_completed_text_chat_turn_id(turns: list[dict[str, Any]]) -> str | None:
+    for turn in reversed(turns):
+        if turn.get("status") == "completed":
+            return turn.get("id")
+    return None
+
+
+def build_pending_text_chat_turn(*, user_text: str | None) -> dict[str, Any]:
+    now = datetime.now(UTC).isoformat()
+    return {
+        "id": f"turn_{uuid4().hex[:12]}",
+        "status": "pending",
+        "created_at": now,
+        "user_message": (
+            {
+                "text": user_text,
+                "created_at": now,
+            }
+            if user_text is not None
+            else None
+        ),
+        "assistant_message": None,
+        "events": [],
+        "usage": {},
+    }
+
+
+async def _mark_pending_turn_failed(
+    *,
+    run_id: int,
+    text_session: WorkflowRunTextSessionModel,
+    error_message: str,
+) -> None:
+    failed_session_data = normalize_text_chat_session_data(text_session.session_data)
+    failed_turns = list(failed_session_data.get("turns") or [])
+    if not failed_turns or failed_turns[-1].get("status") != "pending":
+        return
+
+    failed_turns[-1]["status"] = "failed"
+    failed_turns[-1]["events"] = [
+        *(failed_turns[-1].get("events") or []),
+        {
+            "type": "execution_error",
+            "created_at": datetime.now(UTC).isoformat(),
+            "payload": {"message": error_message},
+        },
+    ]
+    failed_session_data["turns"] = failed_turns
+    failed_session_data["status"] = "error"
+    try:
+        await db_client.update_workflow_run_text_session(
+            run_id,
+            session_data=failed_session_data,
+            expected_revision=text_session.revision,
+        )
+    except WorkflowRunTextSessionRevisionConflictError:
+        return
+
+
+async def _reload_text_chat_session(run_id: int) -> WorkflowRunTextSessionModel:
+    organization_id = await db_client.get_organization_id_by_workflow_run_id(run_id)
+    if organization_id is None:
+        raise TextChatSessionExecutionError(
+            "Workflow run organization not found after update"
+        )
+    updated_text_session = await db_client.get_workflow_run_text_session(
+        run_id,
+        organization_id=organization_id,
+    )
+    if updated_text_session is None:
+        raise TextChatSessionExecutionError("Text chat session not found after update")
+    return updated_text_session
+
+
+__all__ = [
+    "TEXT_CHAT_SESSION_VERSION",
+    "TextChatTurnNotFoundError",
+    "append_text_chat_user_message",
+    "build_pending_text_chat_turn",
+    "TextChatPendingTurnLostError",
+    "TextChatSessionExecutionError",
+    "TextChatSessionRevisionConflictError",
+    "default_text_chat_checkpoint",
+    "default_text_chat_session_data",
+    "execute_pending_text_chat_turn",
+    "initialize_text_chat_session",
+    "latest_completed_text_chat_turn_id",
+    "normalize_text_chat_checkpoint",
+    "normalize_text_chat_session_data",
+    "rewind_text_chat_session_state",
+    "truncate_text_chat_future_turns",
+    "validate_text_chat_turn_cursor",
+]