dograh/api/services/pipecat/run_pipeline.py

import asyncio
from typing import Optional

from fastapi import HTTPException
from loguru import logger

from api.db import db_client
from api.enums import WorkflowRunMode
from api.services.configuration.registry import ServiceProviders
from api.services.integrations import (
    IntegrationRuntimeContext,
    create_runtime_sessions,
)
from api.services.pipecat.audio_config import AudioConfig, create_audio_config
from api.services.pipecat.event_handlers import (
    register_audio_data_handler,
    register_event_handlers,
)
from api.services.pipecat.in_memory_buffers import InMemoryLogsBuffer
from api.services.pipecat.pipeline_builder import (
    build_pipeline,
    build_realtime_pipeline,
    create_pipeline_components,
    create_pipeline_task,
)
from api.services.pipecat.pipeline_engine_callbacks_processor import (
    PipelineEngineCallbacksProcessor,
)
from api.services.pipecat.pipeline_metrics_aggregator import PipelineMetricsAggregator
from api.services.pipecat.pre_call_fetch import execute_pre_call_fetch
from api.services.pipecat.realtime_feedback_events import (
    build_node_transition_event,
)
from api.services.pipecat.realtime_feedback_observer import (
    RealtimeFeedbackObserver,
    register_turn_log_handlers,
)
from api.services.pipecat.recording_audio_cache import (
    create_recording_audio_fetcher,
    warm_recording_cache,
)
from api.services.pipecat.recording_router_processor import RecordingRouterProcessor
from api.services.pipecat.service_factory import (
    create_llm_service,
    create_llm_service_from_provider,
    create_realtime_llm_service,
    create_stt_service,
    create_tts_service,
)
from api.services.pipecat.tracing_config import (
    ensure_tracing,
)
from api.services.pipecat.transport_setup import create_webrtc_transport
from api.services.pipecat.worker_runner import run_pipeline_worker
from api.services.pipecat.ws_sender_registry import get_ws_sender
from api.services.telephony import registry as telephony_registry
from api.services.workflow.dto import ReactFlowDTO
from api.services.workflow.pipecat_engine import PipecatEngine
from api.services.workflow.workflow_graph import WorkflowGraph
from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
from pipecat.audio.vad.silero import SileroVADAnalyzer
from pipecat.audio.vad.vad_analyzer import VADParams
from pipecat.extensions.voicemail.voicemail_detector import VoicemailDetector
from pipecat.processors.aggregators.llm_response_universal import (
    LLMAssistantAggregatorParams,
    LLMContextAggregatorPair,
    LLMUserAggregatorParams,
)
from pipecat.transports.smallwebrtc.connection import SmallWebRTCConnection
from pipecat.turns.user_mute import (
    CallbackUserMuteStrategy,
    FunctionCallUserMuteStrategy,
    MuteUntilFirstBotCompleteUserMuteStrategy,
)
from pipecat.turns.user_start import (
    ExternalUserTurnStartStrategy,
    TranscriptionUserTurnStartStrategy,
)
from pipecat.turns.user_start.vad_user_turn_start_strategy import (
    VADUserTurnStartStrategy,
)
from pipecat.turns.user_stop import (
    ExternalUserTurnStopStrategy,
    SpeechTimeoutUserTurnStopStrategy,
    TurnAnalyzerUserTurnStopStrategy,
)
from pipecat.turns.user_turn_strategies import UserTurnStrategies
from pipecat.utils.enums import EndTaskReason, RealtimeFeedbackType
from pipecat.utils.run_context import set_current_org_id, set_current_run_id

# Setup tracing if enabled
ensure_tracing()


def _create_realtime_user_turn_config(provider: str):
    """Return user turn strategies and optional local VAD for realtime providers."""
    if provider in {
        ServiceProviders.GOOGLE_REALTIME.value,
        ServiceProviders.GOOGLE_VERTEX_REALTIME.value,
    }:
        # Let Gemini Live own barge-in via its server-side VAD, but keep local
        # Silero VAD for early user-turn start and speaking-state tracking.
        return (
            UserTurnStrategies(
                start=[VADUserTurnStartStrategy(enable_interruptions=False)],
                stop=[SpeechTimeoutUserTurnStopStrategy()],
            ),
            SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
        )

    if provider == ServiceProviders.OPENAI_REALTIME.value:
        # OpenAI Realtime already emits speaking-state frames and interruption
        # events from the provider, so the aggregator should follow those
        # external signals rather than run its own local VAD.
        return (
            UserTurnStrategies(
                start=[ExternalUserTurnStartStrategy()],
                stop=[ExternalUserTurnStopStrategy()],
            ),
            None,
        )
    if provider == ServiceProviders.GROK_REALTIME.value:
        # Grok Voice Agent emits server-side speech-start/stop and
        # interruption signals, so local VAD should stay out of the way.
        return (
            UserTurnStrategies(
                start=[ExternalUserTurnStartStrategy()],
                stop=[ExternalUserTurnStopStrategy()],
            ),
            None,
        )

    return (
        UserTurnStrategies(
            start=[VADUserTurnStartStrategy()],
            stop=[SpeechTimeoutUserTurnStopStrategy()],
        ),
        SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
    )


async def run_pipeline_telephony(
    websocket,
    *,
    provider_name: str,
    workflow_id: int,
    workflow_run_id: int,
    user_id: int,
    call_id: str,
    transport_kwargs: dict,
) -> None:
    """Run a pipeline for any telephony provider.

    Replaces the previous per-provider run_pipeline_<x> functions. The
    provider's transport factory and audio config are looked up from the
    registry, so adding a new provider requires no changes here.

    Args:
        websocket: The accepted WebSocket from the provider.
        provider_name: Stable identifier of the provider (registry key).
        workflow_id: Workflow being executed.
        workflow_run_id: Workflow run row.
        user_id: Owner of the workflow.
        call_id: Provider call identifier (stored in cost_info for billing).
        transport_kwargs: Provider-specific kwargs forwarded to the transport
            factory (e.g. stream_sid + call_sid for Twilio).
    """
    logger.debug(f"Running {provider_name} pipeline for workflow_run {workflow_run_id}")
    set_current_run_id(workflow_run_id)

    await db_client.update_workflow_run(workflow_run_id, cost_info={"call_id": call_id})

    workflow = await db_client.get_workflow(workflow_id, user_id)
    if workflow:
        set_current_org_id(workflow.organization_id)

    ambient_noise_config = None
    if workflow and workflow.workflow_configurations:
        ambient_noise_config = workflow.workflow_configurations.get(
            "ambient_noise_configuration"
        )

    # The telephony config id is stamped on the workflow run when it's created
    # (test call, campaign dispatch, inbound). Transports use it to load creds
    # from the right config row. Falls back to None for legacy runs (transports
    # then resolve the org's default config).
    workflow_run = await db_client.get_workflow_run(workflow_run_id)
    telephony_configuration_id = None
    if workflow_run and workflow_run.initial_context:
        telephony_configuration_id = workflow_run.initial_context.get(
            "telephony_configuration_id"
        )

    # Resolve effective user config here so the transport can tune its
    # bot-stopped-speaking fallback based on is_realtime; pass the resolved
    # values into _run_pipeline so it doesn't fetch them again.
    from api.services.configuration.resolve import resolve_effective_config

    user_config = await db_client.get_user_configurations(user_id)
    run_configs = (
        (workflow_run.definition.workflow_configurations or {}) if workflow_run else {}
    )
    user_config = resolve_effective_config(
        user_config, run_configs.get("model_overrides")
    )
    is_realtime = bool(user_config.is_realtime and user_config.realtime is not None)

    spec = telephony_registry.get(provider_name)
    audio_config = create_audio_config(provider_name)

    transport = await spec.transport_factory(
        websocket,
        workflow_run_id,
        audio_config,
        workflow.organization_id,
        ambient_noise_config=ambient_noise_config,
        telephony_configuration_id=telephony_configuration_id,
        is_realtime=is_realtime,
        **transport_kwargs,
    )

    try:
        await _run_pipeline(
            transport,
            workflow_id,
            workflow_run_id,
            user_id,
            audio_config=audio_config,
            workflow_run=workflow_run,
            resolved_user_config=user_config,
        )
    except Exception as e:
        logger.error(
            f"[run {workflow_run_id}] Error in {provider_name} pipeline: {e}",
            exc_info=True,
        )
        raise


async def run_pipeline_smallwebrtc(
    webrtc_connection: SmallWebRTCConnection,
    workflow_id: int,
    workflow_run_id: int,
    user_id: int,
    call_context_vars: dict = {},
    user_provider_id: str | None = None,
) -> None:
    """Run pipeline for WebRTC connections"""
    logger.debug(
        f"Running pipeline for WebRTC connection with workflow_id: {workflow_id} and workflow_run_id: {workflow_run_id}"
    )
    set_current_run_id(workflow_run_id)

    # Get workflow to extract all pipeline configurations
    workflow = await db_client.get_workflow(workflow_id, user_id)

    # Set org context early so tasks created by the transport inherit it
    if workflow:
        set_current_org_id(workflow.organization_id)

    ambient_noise_config = None
    if workflow and workflow.workflow_configurations:
        if "ambient_noise_configuration" in workflow.workflow_configurations:
            ambient_noise_config = workflow.workflow_configurations[
                "ambient_noise_configuration"
            ]

    # Create audio configuration for WebRTC
    audio_config = create_audio_config(WorkflowRunMode.SMALLWEBRTC.value)

    # Resolve workflow_run + effective user_config here so the transport can
    # tune its bot-stopped-speaking fallback based on is_realtime. _run_pipeline
    # reuses these via kwargs so we don't fetch twice.
    from api.services.configuration.resolve import resolve_effective_config

    workflow_run = await db_client.get_workflow_run(workflow_run_id, user_id)
    user_config = await db_client.get_user_configurations(user_id)
    run_configs = (
        (workflow_run.definition.workflow_configurations or {}) if workflow_run else {}
    )
    user_config = resolve_effective_config(
        user_config, run_configs.get("model_overrides")
    )
    is_realtime = bool(user_config.is_realtime and user_config.realtime is not None)

    transport = await create_webrtc_transport(
        webrtc_connection,
        workflow_run_id,
        audio_config,
        ambient_noise_config,
        is_realtime=is_realtime,
    )
    await _run_pipeline(
        transport,
        workflow_id,
        workflow_run_id,
        user_id,
        call_context_vars=call_context_vars,
        audio_config=audio_config,
        user_provider_id=user_provider_id,
        workflow_run=workflow_run,
        resolved_user_config=user_config,
    )


async def _run_pipeline(
    transport,
    workflow_id: int,
    workflow_run_id: int,
    user_id: int,
    call_context_vars: dict = {},
    audio_config: AudioConfig = None,
    user_provider_id: str | None = None,
    workflow_run=None,
    resolved_user_config=None,
) -> None:
    """
    Run the pipeline with the given transport and configuration

    Args:
        transport: The transport to use for the pipeline
        workflow_id: The ID of the workflow
        workflow_run_id: The ID of the workflow run
        user_id: The ID of the user
        workflow_run: Pre-fetched workflow run row. Fetched here if None.
        resolved_user_config: User configuration with model_overrides already
            applied. Fetched and resolved here if None.
    """
    if workflow_run is None:
        workflow_run = await db_client.get_workflow_run(workflow_run_id, user_id)

    # If the workflow run is already completed, we don't need to run it again
    if workflow_run.is_completed:
        raise HTTPException(status_code=400, detail="Workflow run already completed")

    merged_call_context_vars = workflow_run.initial_context
    # If there is some extra call_context_vars, fold them in. Persistence
    # happens once below, after runtime_configuration is also resolved.
    if call_context_vars:
        merged_call_context_vars = {**merged_call_context_vars, **call_context_vars}

    # Get workflow for metadata (name, organization_id, call_disposition_codes)
    workflow = await db_client.get_workflow(workflow_id, user_id)
    if not workflow:
        raise HTTPException(status_code=404, detail="Workflow not found")

    # Use the run's pinned definition for graph + configs (not the workflow's current)
    run_definition = workflow_run.definition
    run_workflow_json = run_definition.workflow_json
    run_configs = run_definition.workflow_configurations or {}

    # Extract configurations from the version's workflow_configurations
    max_call_duration_seconds = 300  # Default 5 minutes
    max_user_idle_timeout = 10.0  # Default 10 seconds
    smart_turn_stop_secs = 2.0  # Default 2 seconds for incomplete turn timeout
    turn_stop_strategy = "transcription"  # Default to transcription-based detection
    keyterms = None  # Dictionary words for STT boosting

    if run_configs:
        if "max_call_duration" in run_configs:
            max_call_duration_seconds = run_configs["max_call_duration"]

        if "max_user_idle_timeout" in run_configs:
            max_user_idle_timeout = run_configs["max_user_idle_timeout"]

        if "smart_turn_stop_secs" in run_configs:
            smart_turn_stop_secs = run_configs["smart_turn_stop_secs"]

        if "turn_stop_strategy" in run_configs:
            turn_stop_strategy = run_configs["turn_stop_strategy"]

        if "dictionary" in run_configs:
            dictionary = run_configs["dictionary"]
            if dictionary and isinstance(dictionary, str):
                keyterms = [
                    term.strip() for term in dictionary.split(",") if term.strip()
                ]

    # Resolve model overrides from the version onto global user config (skip
    # when the caller already resolved it).
    if resolved_user_config is None:
        from api.services.configuration.resolve import resolve_effective_config

        user_config = await db_client.get_user_configurations(user_id)
        user_config = resolve_effective_config(
            user_config, run_configs.get("model_overrides")
        )
    else:
        user_config = resolved_user_config

    # Detect realtime mode (speech-to-speech services like OpenAI Realtime, Gemini Live)
    is_realtime = user_config.is_realtime and user_config.realtime is not None

    # Create services based on user configuration
    if is_realtime:
        llm = create_realtime_llm_service(user_config, audio_config)
        stt = None
        tts = None
        # Realtime services don't implement run_inference, so create a
        # separate text LLM for variable extraction and other out-of-band
        # inference calls.
        inference_llm = create_llm_service(user_config)
    else:
        stt = create_stt_service(user_config, audio_config, keyterms=keyterms)
        tts = create_tts_service(user_config, audio_config)
        llm = create_llm_service(user_config)
        inference_llm = None

    # Stamp the providers/models actually resolved for this run onto
    # initial_context so they're available for post-call analytics
    # (model_overrides may have shifted them away from the org-level
    # user_config).
    if is_realtime:
        # llm_* refers to the side-channel text LLM (variable extraction,
        # voicemail detection); realtime_* is the speech-to-speech service.
        runtime_configuration = {
            "realtime_provider": user_config.realtime.provider,
            "realtime_model": user_config.realtime.model,
            "llm_provider": user_config.llm.provider,
            "llm_model": user_config.llm.model,
        }
    else:
        runtime_configuration = {
            "stt_provider": user_config.stt.provider,
            "stt_model": user_config.stt.model,
            "tts_provider": user_config.tts.provider,
            "tts_model": user_config.tts.model,
            "llm_provider": user_config.llm.provider,
            "llm_model": user_config.llm.model,
        }
    merged_call_context_vars = {
        **merged_call_context_vars,
        "runtime_configuration": runtime_configuration,
    }
    await db_client.update_workflow_run(
        workflow_run_id, initial_context=merged_call_context_vars
    )

    workflow_graph = WorkflowGraph(ReactFlowDTO.model_validate(run_workflow_json))

    # Pre-call fetch: fire early so it runs concurrently with remaining setup
    pre_call_fetch_task = None
    start_node = workflow_graph.nodes.get(workflow_graph.start_node_id)
    if (
        start_node
        and start_node.pre_call_fetch_enabled
        and start_node.pre_call_fetch_url
    ):
        logger.info(
            f"Pre-call fetch enabled for workflow run {workflow_run_id}, "
            f"firing request to {start_node.pre_call_fetch_url}"
        )
        pre_call_fetch_task = asyncio.create_task(
            execute_pre_call_fetch(
                url=start_node.pre_call_fetch_url,
                credential_uuid=start_node.pre_call_fetch_credential_uuid,
                call_context_vars=merged_call_context_vars,
                workflow_id=workflow_id,
                organization_id=workflow.organization_id,
            )
        )

    # Create in-memory logs buffer early so it can be used by engine callbacks
    in_memory_logs_buffer = InMemoryLogsBuffer(workflow_run_id)

    # Create node transition callback (always logs to buffer, optionally streams to WS)
    ws_sender = get_ws_sender(workflow_run_id)

    async def send_node_transition(
        node_id: str,
        node_name: str,
        previous_node_id: Optional[str],
        previous_node_name: Optional[str],
        allow_interrupt: bool = False,
    ) -> None:
        """Send node transition event to logs buffer and optionally via WebSocket."""
        # Update current node on the buffer so subsequent events are tagged
        in_memory_logs_buffer.set_current_node(node_id, node_name)

        message = build_node_transition_event(
            node_id=node_id,
            node_name=node_name,
            previous_node_id=previous_node_id,
            previous_node_name=previous_node_name,
            allow_interrupt=allow_interrupt,
        )
        # Send via WebSocket if available
        if ws_sender:
            try:
                await ws_sender({**message, "node_id": node_id, "node_name": node_name})
            except Exception as e:
                logger.debug(f"Failed to send node transition via WebSocket: {e}")

        # Always log to in-memory buffer (node_id/node_name injected by buffer's append)
        try:
            await in_memory_logs_buffer.append(message)
        except Exception as e:
            logger.error(f"Failed to append node transition to logs buffer: {e}")

    node_transition_callback = send_node_transition

    # Extract embeddings configuration from user config
    embeddings_api_key = None
    embeddings_model = None
    embeddings_base_url = None
    if user_config and user_config.embeddings:
        embeddings_api_key = user_config.embeddings.api_key
        embeddings_model = user_config.embeddings.model
        embeddings_base_url = getattr(user_config.embeddings, "base_url", None)

    # Check if the workflow has any active recordings so the engine can
    # include recording response mode instructions in all node prompts.
    has_recordings = await db_client.has_active_recordings(workflow.organization_id)

    context_compaction_enabled = (workflow.workflow_configurations or {}).get(
        "context_compaction_enabled", False
    )
    # Context compaction doesn't apply in realtime mode: the speech-to-speech
    # service manages its own conversation state server-side.
    if is_realtime and context_compaction_enabled:
        logger.info("Disabling context_compaction_enabled for realtime workflow run")
        context_compaction_enabled = False

    engine = PipecatEngine(
        llm=llm,
        inference_llm=inference_llm,
        workflow=workflow_graph,
        call_context_vars=merged_call_context_vars,
        workflow_run_id=workflow_run_id,
        node_transition_callback=node_transition_callback,
        embeddings_api_key=embeddings_api_key,
        embeddings_model=embeddings_model,
        embeddings_base_url=embeddings_base_url,
        has_recordings=has_recordings,
        context_compaction_enabled=context_compaction_enabled,
    )

    # Create pipeline components
    audio_buffer, context = create_pipeline_components(audio_config)

    integration_runtime_sessions = create_runtime_sessions(
        IntegrationRuntimeContext(
            workflow_run_id=workflow_run_id,
            workflow_run=workflow_run,
            workflow_graph=workflow_graph,
            run_definition=run_definition,
            user_config=user_config,
            is_realtime=is_realtime,
            context_messages_provider=lambda: context.messages,
        )
    )

    # Set the context, audio_config, and audio_buffer after creation
    engine.set_context(context)
    engine.set_audio_config(audio_config)

    assistant_params = LLMAssistantAggregatorParams(
        correct_aggregation_callback=engine.create_aggregation_correction_callback(),
    )

    user_mute_strategies = [
        MuteUntilFirstBotCompleteUserMuteStrategy(),
        FunctionCallUserMuteStrategy(),
        CallbackUserMuteStrategy(should_mute_callback=engine.should_mute_user),
    ]
    user_vad_analyzer = SileroVADAnalyzer(params=VADParams(stop_secs=0.2))

    # Configure turn strategies based on STT provider, model, and workflow configuration
    if is_realtime:
        # Realtime services still need user-turn tracking even when the model
        # itself owns speech generation and interruption behavior.
        user_turn_strategies, user_vad_analyzer = _create_realtime_user_turn_config(
            user_config.realtime.provider
        )
    else:
        # Deepgram Flux uses external turn detection (VAD + External start/stop)
        # Other models use configurable turn detection strategy
        is_deepgram_flux = (
            user_config.stt.provider == ServiceProviders.DEEPGRAM.value
            and user_config.stt.model == "flux-general-en"
        )

        if is_deepgram_flux:
            user_turn_strategies = UserTurnStrategies(
                start=[
                    VADUserTurnStartStrategy(),
                    ExternalUserTurnStartStrategy(enable_interruptions=True),
                ],
                stop=[ExternalUserTurnStopStrategy()],
            )
        elif turn_stop_strategy == "turn_analyzer":
            # Smart Turn Analyzer: best for longer responses with natural pauses
            smart_turn_params = SmartTurnParams(stop_secs=smart_turn_stop_secs)
            user_turn_strategies = UserTurnStrategies(
                start=[
                    VADUserTurnStartStrategy(),
                    TranscriptionUserTurnStartStrategy(),
                ],
                stop=[
                    TurnAnalyzerUserTurnStopStrategy(
                        turn_analyzer=LocalSmartTurnAnalyzerV3(params=smart_turn_params)
                    )
                ],
            )
        else:
            # Transcription-based (default): best for short 1-2 word responses
            user_turn_strategies = UserTurnStrategies(
                start=[
                    VADUserTurnStartStrategy(),
                    TranscriptionUserTurnStartStrategy(),
                ],
                stop=[SpeechTimeoutUserTurnStopStrategy()],
            )

    user_params = LLMUserAggregatorParams(
        user_turn_strategies=user_turn_strategies,
        user_mute_strategies=user_mute_strategies,
        user_idle_timeout=max_user_idle_timeout,
        vad_analyzer=user_vad_analyzer,
    )
    context_aggregator = LLMContextAggregatorPair(
        context, assistant_params=assistant_params, user_params=user_params
    )

    # Create usage metrics aggregator with engine's callback
    pipeline_engine_callback_processor = PipelineEngineCallbacksProcessor(
        max_call_duration_seconds=max_call_duration_seconds,
        max_duration_end_task_callback=engine.create_max_duration_callback(),
        generation_started_callback=engine.create_generation_started_callback(),
        llm_text_frame_callback=engine.handle_llm_text_frame,
    )

    pipeline_metrics_aggregator = PipelineMetricsAggregator()

    user_context_aggregator = context_aggregator.user()
    assistant_context_aggregator = context_aggregator.assistant()

    # Register user idle event handlers
    user_idle_handler = engine.create_user_idle_handler()

    @user_context_aggregator.event_handler("on_user_turn_idle")
    async def on_user_turn_idle(aggregator):
        await user_idle_handler.handle_idle(aggregator)

    @user_context_aggregator.event_handler("on_user_turn_started")
    async def on_user_turn_started(aggregator, strategy):
        user_idle_handler.reset()

    voicemail_detector = None
    recording_router = None

    # Create recording audio fetcher (used by recording router, audio greetings,
    # and audio transition speech)
    fetch_audio = create_recording_audio_fetcher(
        organization_id=workflow.organization_id,
        pipeline_sample_rate=audio_config.pipeline_sample_rate,
    )
    engine.set_fetch_recording_audio(fetch_audio)

    voicemail_config = (workflow.workflow_configurations or {}).get(
        "voicemail_detection", {}
    )
    if is_realtime and voicemail_config.get("enabled", False):
        logger.info(
            f"Disabling voicemail detection for realtime workflow run {workflow_run_id}"
        )
    if voicemail_config.get("enabled", False) and not is_realtime:
        logger.info(f"Voicemail detection enabled for workflow run {workflow_run_id}")
        # Create a separate LLM instance for the voicemail sub-pipeline
        # (can't share with main pipeline as it would mess up frame linking)
        if voicemail_config.get("use_workflow_llm", True):
            voicemail_llm = create_llm_service(user_config)
        else:
            voicemail_llm = create_llm_service_from_provider(
                provider=voicemail_config.get("provider", "openai"),
                model=voicemail_config.get("model", "gpt-4.1"),
                api_key=voicemail_config.get("api_key", ""),
            )

        long_speech_timeout = voicemail_config.get("long_speech_timeout", 8.0)
        custom_system_prompt = voicemail_config.get("system_prompt") or None

        voicemail_detector = VoicemailDetector(
            llm=voicemail_llm,
            long_speech_timeout=long_speech_timeout,
            custom_system_prompt=custom_system_prompt,
        )

        # Register event handler to end task when voicemail is detected
        @voicemail_detector.event_handler("on_voicemail_detected")
        async def _on_voicemail_detected(_processor):
            logger.info(f"Voicemail detected for workflow run {workflow_run_id}")
            await engine.end_call_with_reason(
                reason=EndTaskReason.VOICEMAIL_DETECTED.value,
                abort_immediately=True,
            )

    # Recording router is only meaningful in non-realtime mode (it routes between
    # pre-recorded audio playback and dynamic TTS; realtime LLMs produce audio
    # directly).
    if not is_realtime and has_recordings:
        recording_router = RecordingRouterProcessor(
            audio_sample_rate=audio_config.pipeline_sample_rate,
            fetch_recording_audio=fetch_audio,
        )
        # Warm the recording cache in the background so audio is ready
        # before the first playback request.
        asyncio.create_task(
            warm_recording_cache(
                organization_id=workflow.organization_id,
                pipeline_sample_rate=audio_config.pipeline_sample_rate,
            )
        )

    # Build the pipeline
    if is_realtime:
        pipeline = build_realtime_pipeline(
            transport,
            llm,
            audio_buffer,
            user_context_aggregator,
            assistant_context_aggregator,
            pipeline_engine_callback_processor,
            pipeline_metrics_aggregator,
            voicemail_detector=voicemail_detector,
        )
    else:
        pipeline = build_pipeline(
            transport,
            stt,
            audio_buffer,
            llm,
            tts,
            user_context_aggregator,
            assistant_context_aggregator,
            pipeline_engine_callback_processor,
            pipeline_metrics_aggregator,
            voicemail_detector=voicemail_detector,
            recording_router=recording_router,
        )

    # Create pipeline task with audio configuration
    task = create_pipeline_task(pipeline, workflow_run_id, audio_config)

    for runtime_session in integration_runtime_sessions:
        runtime_session.attach(task)
        logger.info(
            "[integrations] attached runtime session '{}' for workflow run {}",
            runtime_session.name,
            workflow_run_id,
        )

    # Now set the task and transport output on the engine
    engine.set_task(task)
    engine.set_transport_output(transport.output())

    # Initialize the engine to set the initial context with
    # System Prompt and Tools
    await engine.initialize()

    # Add real-time feedback observer (always logs to buffer, streams to WS if available)
    feedback_observer = RealtimeFeedbackObserver(
        ws_sender=ws_sender,
        logs_buffer=in_memory_logs_buffer,
    )
    task.add_observer(feedback_observer)

    # Register latency observer to log user-to-bot response latency
    if task.user_bot_latency_observer:

        @task.user_bot_latency_observer.event_handler("on_latency_measured")
        async def on_latency_measured(observer, latency_seconds):
            message = {
                "type": RealtimeFeedbackType.LATENCY_MEASURED.value,
                "payload": {
                    "latency_seconds": latency_seconds,
                },
            }
            if ws_sender:
                try:
                    ws_message = message
                    if in_memory_logs_buffer.current_node_id:
                        ws_message = {
                            **message,
                            "node_id": in_memory_logs_buffer.current_node_id,
                            "node_name": in_memory_logs_buffer.current_node_name,
                        }
                    await ws_sender(ws_message)
                except Exception as e:
                    logger.debug(f"Failed to send latency via WebSocket: {e}")
            try:
                await in_memory_logs_buffer.append(message)
            except Exception as e:
                logger.error(f"Failed to append latency to logs buffer: {e}")

    # Register turn log handlers for all call types (WebRTC and telephony)
    register_turn_log_handlers(
        in_memory_logs_buffer, user_context_aggregator, assistant_context_aggregator
    )

    # Register event handlers — resolve provider_id for PostHog tracking
    if not user_provider_id:
        user_obj = await db_client.get_user_by_id(user_id)
        user_provider_id = str(user_obj.provider_id) if user_obj else None
    in_memory_audio_buffer = register_event_handlers(
        task,
        transport,
        workflow_run_id,
        engine=engine,
        audio_buffer=audio_buffer,
        in_memory_logs_buffer=in_memory_logs_buffer,
        pipeline_metrics_aggregator=pipeline_metrics_aggregator,
        audio_config=audio_config,
        pre_call_fetch_task=pre_call_fetch_task,
        user_provider_id=user_provider_id,
        integration_runtime_sessions=integration_runtime_sessions,
    )

    register_audio_data_handler(audio_buffer, workflow_run_id, in_memory_audio_buffer)

    try:
        # Run the pipeline
        await run_pipeline_worker(task)
        logger.info(f"Task completed for run {workflow_run_id}")
    except asyncio.CancelledError:
        logger.warning("Received CancelledError in _run_pipeline")
    finally:
        # Close MCP sessions here, not in engine.cleanup(). The anyio cancel
        # scopes opened by MCPClient.start() in engine.initialize() are
        # task-affine; this finally runs in the same task as initialize(),
        # whereas engine.cleanup() runs in a pipecat event-handler task.
        await engine.close_mcp_sessions()
        await feedback_observer.cleanup()
        logger.debug(f"Cleaned up context providers for workflow run {workflow_run_id}")