fix: changes to update pipecat version to 0.0.100 (#122)

* feat: add stt evals * add smart turn as provider * chore: remove deprecations * chore: format files * fix: remove deprecated UserIdleProcessor * fix: remove deprecated TranscriptProcessor * chore: update pipecat submodule * feat: add evals visualisation * fix: trigger llm generation on client connected and pipeline started * chore: update pipecat * chore: update pipecat submodule * Add tests * fix: slow loading of workflow page * chore: update pipecat submodule * Show version after release * Fixes #99 * fix: provider check for websocket connection * Fixes #107 * Fix #96 * chore: fix documentation * fix: cloudonix campaign call error --------- Co-authored-by: Sabiha Khan <sabihak89@gmail.com>
2026-06-28 08:49:42 +02:00 · 2026-01-23 18:53:59 +05:30 · 2026-01-23 18:53:59 +05:30 · 911c5ed416
commit 911c5ed416
parent a4367bd83b
104 changed files with 16919 additions and 597 deletions
--- a/api/services/campaign/call_dispatcher.py
+++ b/api/services/campaign/call_dispatcher.py
@ -170,13 +170,6 @@ class CampaignCallDispatcher:
            )
            raise ValueError(f"Workflow {campaign.workflow_id} not found")

-        # Merge context variables (queued_run context already includes retry info if applicable)
-        initial_context = {
-            **workflow.template_context_variables,
-            **queued_run.context_variables,
-            "campaign_id": campaign.id,
-        }
-
        # Extract phone number
        phone_number = queued_run.context_variables.get("phone_number")
        if not phone_number:
@ -186,13 +179,25 @@ class CampaignCallDispatcher:
            )
            raise ValueError(f"No phone number in queued run {queued_run.id}")

-        # Create workflow run with queued_run_id tracking
-        workflow_run_name = f"WR-CAMPAIGN-{campaign.id}-{queued_run.id}"
-
        # Get provider first to determine the mode
        provider = await self.get_telephony_provider(campaign.organization_id)
        workflow_run_mode = provider.PROVIDER_NAME
+        
+        logger.info(f"Provider name: {provider.PROVIDER_NAME}")
+        logger.info(f"Queued run context: {queued_run.context_variables}")

+        # Merge context variables (queued_run context already includes retry info if applicable)
+        initial_context = {
+            **workflow.template_context_variables,
+            **queued_run.context_variables,
+            "campaign_id": campaign.id,
+            "provider": provider.PROVIDER_NAME,
+        }
+        
+        logger.info(f"Final initial_context: {initial_context}")
+
+        # Create workflow run with queued_run_id tracking
+        workflow_run_name = f"WR-CAMPAIGN-{campaign.id}-{queued_run.id}"
        try:
            workflow_run = await db_client.create_workflow_run(
                name=workflow_run_name,
@ -243,6 +248,8 @@ class CampaignCallDispatcher:
                to_number=phone_number,
                webhook_url=webhook_url,
                workflow_run_id=workflow_run.id,
+                workflow_id=campaign.workflow_id,
+                user_id=campaign.created_by,
            )

            # Store provider type and metadata in gathered_context
--- a/api/services/configuration/registry.py
+++ b/api/services/configuration/registry.py
@ -300,7 +300,7 @@ TTSConfig = Annotated[
 ###################################################### STT ########################################################################


-DEEPGRAM_STT_MODELS = ["nova-2", "nova-3-general"]
+DEEPGRAM_STT_MODELS = ["nova-2", "nova-3-general", "flux-general-en"]
 DEEPGRAM_LANGUAGES = [
    "multi",
    "en",
--- a/api/services/looptalk/core/pipeline_builder.py
+++ b/api/services/looptalk/core/pipeline_builder.py
@ -103,7 +103,6 @@ class LoopTalkPipelineBuilder:

        # Set the context and audio_buffer after creation
        engine.set_context(context)
-        engine.set_audio_buffer(audio_buffer)

        context_aggregator = LLMContextAggregatorPair(context)

--- a/api/services/looptalk/internal_serializer.py
+++ b/api/services/looptalk/internal_serializer.py
@ -12,9 +12,8 @@ from pipecat.frames.frames import (
    Frame,
    InputAudioRawFrame,
    OutputAudioRawFrame,
-    StartFrame,
 )
-from pipecat.serializers.base_serializer import FrameSerializer, FrameSerializerType
+from pipecat.serializers.base_serializer import FrameSerializer


 class InternalFrameSerializer(FrameSerializer):
@ -24,15 +23,6 @@ class InternalFrameSerializer(FrameSerializer):
    preventing control frames from creating infinite loops.
    """

-    @property
-    def type(self) -> FrameSerializerType:
-        """Internal transport uses binary frames."""
-        return FrameSerializerType.BINARY
-
-    async def setup(self, frame: StartFrame):
-        """No setup required for internal transport."""
-        pass
-
    async def serialize(self, frame: Frame) -> bytes | None:
        """Only serialize audio frames for transmission between agents."""
        # Only pass audio frames between agents
--- a/api/services/pipecat/event_handlers.py
+++ b/api/services/pipecat/event_handlers.py
@ -22,16 +22,21 @@ from pipecat.pipeline.task import PipelineTask
 from pipecat.processors.audio.audio_buffer_processor import AudioBufferProcessor


-def register_transport_event_handlers(
+def register_event_handlers(
    task: PipelineTask,
    transport,
-    workflow_run_id,
+    workflow_run_id: int,
    engine: PipecatEngine,
    audio_buffer: AudioBufferProcessor,
+    in_memory_logs_buffer: InMemoryLogsBuffer,
+    pipeline_metrics_aggregator: PipelineMetricsAggregator,
    audio_config=AudioConfig,
 ):
-    """Register event handlers for transport events"""
+    """Register all event handlers for transport and task events.

+    Returns:
+        Tuple of (in_memory_audio_buffer, in_memory_transcript_buffer) for use by other handlers.
+    """
    # Initialize in-memory buffers with proper audio configuration
    sample_rate = audio_config.pipeline_sample_rate if audio_config else 16000
    num_channels = 1  # Pipeline audio is always mono
@ -48,13 +53,35 @@ def register_transport_event_handlers(
    )
    in_memory_transcript_buffer = InMemoryTranscriptBuffer(workflow_run_id)

+    # Track both events to ensure LLM is only triggered after both occur
+    ready_state = {
+        "pipeline_started": False,
+        "client_connected": False,
+        "llm_triggered": False,
+    }
+
+    async def maybe_trigger_llm():
+        """Trigger LLM only after both pipeline_started and client_connected events."""
+        if (
+            ready_state["pipeline_started"]
+            and ready_state["client_connected"]
+            and not ready_state["llm_triggered"]
+        ):
+            ready_state["llm_triggered"] = True
+            logger.debug(
+                "Both pipeline_started and client_connected received - triggering initial LLM generation"
+            )
+            await engine.llm.queue_frame(LLMContextFrame(engine.context))
+
    @transport.event_handler("on_client_connected")
-    async def on_client_connected(transport, participant):
-        logger.debug("In on_client_connected callback handler - initializing workflow")
+    async def on_client_connected(_transport, _participant):
+        logger.debug("In on_client_connected callback handler")
        await audio_buffer.start_recording()
+        ready_state["client_connected"] = True
+        await maybe_trigger_llm()

    @transport.event_handler("on_client_disconnected")
-    async def on_client_disconnected(transport, participant):
+    async def on_client_disconnected(_transport, _participant):
        call_disposed = engine.is_call_disposed()

        logger.debug(
@ -69,33 +96,16 @@ def register_transport_event_handlers(
        if not call_disposed:
            await task.cancel()

-    # Return the buffers so they can be passed to other handlers
-    return in_memory_audio_buffer, in_memory_transcript_buffer
-
-
-def register_task_event_handler(
-    workflow_run_id: int,
-    engine: PipecatEngine,
-    task: PipelineTask,
-    transport,
-    audio_buffer: AudioBufferProcessor,
-    in_memory_audio_buffer: InMemoryAudioBuffer,
-    in_memory_transcript_buffer: InMemoryTranscriptBuffer,
-    in_memory_logs_buffer: InMemoryLogsBuffer,
-    pipeline_metrics_aggregator: PipelineMetricsAggregator,
-):
    @task.event_handler("on_pipeline_started")
-    async def on_pipeline_started(task: PipelineTask, frame: Frame):
-        logger.debug(
-            "In on_pipeline_started callback handler - triggering initial LLM generation"
-        )
-        # Trigger initial LLM generation after pipeline has started
-        await engine.llm.queue_frame(LLMContextFrame(engine.context))
+    async def on_pipeline_started(_task: PipelineTask, _frame: Frame):
+        logger.debug("In on_pipeline_started callback handler")
+        ready_state["pipeline_started"] = True
+        await maybe_trigger_llm()

    @task.event_handler("on_pipeline_finished")
    async def on_pipeline_finished(
        task: PipelineTask,
-        frame: Frame,
+        _frame: Frame,
    ):
        logger.debug(f"In on_pipeline_finished callback handler")

@ -207,14 +217,13 @@ def register_task_event_handler(
        if workflow_run and workflow_run.campaign_id:
            await campaign_call_dispatcher.release_call_slot(workflow_run_id)

-        # Write buffers to temp files and enqueue S3 upload
+        # Write buffers to temp files and enqueue combined processing task
+        audio_temp_path = None
+        transcript_temp_path = None
+
        try:
-            # Only upload if buffers have content
            if not in_memory_audio_buffer.is_empty:
                audio_temp_path = await in_memory_audio_buffer.write_to_temp_file()
-                await enqueue_job(
-                    FunctionNames.UPLOAD_AUDIO_TO_S3, workflow_run_id, audio_temp_path
-                )
            else:
                logger.debug("Audio buffer is empty, skipping upload")

@ -222,11 +231,6 @@ def register_task_event_handler(
                transcript_temp_path = (
                    await in_memory_transcript_buffer.write_to_temp_file()
                )
-                await enqueue_job(
-                    FunctionNames.UPLOAD_TRANSCRIPT_TO_S3,
-                    workflow_run_id,
-                    transcript_temp_path,
-                )
            else:
                logger.debug("Transcript buffer is empty, skipping upload")

@ -234,10 +238,18 @@ def register_task_event_handler(
            logger.error(f"Error preparing buffers for S3 upload: {e}", exc_info=True)

        await enqueue_job(FunctionNames.CALCULATE_WORKFLOW_RUN_COST, workflow_run_id)
+
+        # Combined task: uploads artifacts then runs integrations sequentially
        await enqueue_job(
-            FunctionNames.RUN_INTEGRATIONS_POST_WORKFLOW_RUN, workflow_run_id
+            FunctionNames.PROCESS_WORKFLOW_COMPLETION,
+            workflow_run_id,
+            audio_temp_path,
+            transcript_temp_path,
        )

+    # Return the buffers so they can be passed to other handlers
+    return in_memory_audio_buffer, in_memory_transcript_buffer
+

 def register_audio_data_handler(
    audio_buffer: AudioBufferProcessor,
@ -260,18 +272,26 @@ def register_audio_data_handler(
            # Could implement overflow to disk here if needed


-def register_transcript_handler(
-    transcript, workflow_run_id, in_memory_buffer: InMemoryTranscriptBuffer
+def register_transcript_handlers(
+    user_aggregator,
+    assistant_aggregator,
+    workflow_run_id,
+    in_memory_buffer: InMemoryTranscriptBuffer,
 ):
-    """Register event handler for transcript updates"""
+    """Register event handlers for transcript updates on context aggregators.

-    @transcript.event_handler("on_transcript_update")
-    async def on_transcript_update(processor, frame):
-        transcript_text = ""
-        for msg in frame.messages:
-            timestamp = f"[{msg.timestamp}] " if msg.timestamp else ""
-            line = f"{timestamp}{msg.role}: {msg.content}\n"
-            transcript_text += line
+    Uses the on_user_turn_stopped and on_assistant_turn_stopped events to capture
+    transcripts as turns complete, following the event-based pattern.
+    """

-        # Use in-memory buffer
-        await in_memory_buffer.append(transcript_text)
+    @user_aggregator.event_handler("on_user_turn_stopped")
+    async def on_user_turn_stopped(aggregator, strategy, message):
+        timestamp = f"[{message.timestamp}] " if message.timestamp else ""
+        line = f"{timestamp}user: {message.content}\n"
+        await in_memory_buffer.append(line)
+
+    @assistant_aggregator.event_handler("on_assistant_turn_stopped")
+    async def on_assistant_turn_stopped(aggregator, message):
+        timestamp = f"[{message.timestamp}] " if message.timestamp else ""
+        line = f"{timestamp}assistant: {message.content}\n"
+        await in_memory_buffer.append(line)
--- a/api/services/pipecat/pipeline_builder.py
+++ b/api/services/pipecat/pipeline_builder.py
@ -1,5 +1,4 @@
 import os
-from typing import TYPE_CHECKING

 from loguru import logger

@ -11,14 +10,10 @@ from pipecat.pipeline.pipeline import Pipeline
 from pipecat.pipeline.task import PipelineParams, PipelineTask
 from pipecat.processors.aggregators.llm_context import LLMContext
 from pipecat.processors.audio.audio_buffer_processor import AudioBufferProcessor
-from pipecat.processors.transcript_processor import TranscriptProcessor
 from pipecat.utils.context import turn_var

-if TYPE_CHECKING:
-    from api.services.workflow.pipecat_engine import PipecatEngine

-
-def create_pipeline_components(audio_config: AudioConfig, engine: "PipecatEngine"):
+def create_pipeline_components(audio_config: AudioConfig):
    """Create and return the main pipeline components with proper audio configuration"""
    logger.info(f"Creating pipeline components with audio config: {audio_config}")

@ -28,28 +23,21 @@ def create_pipeline_components(audio_config: AudioConfig, engine: "PipecatEngine
        buffer_size=audio_config.buffer_size_bytes,
    )

-    transcript = TranscriptProcessor(
-        assistant_correct_aggregation_callback=engine.create_aggregation_correction_callback()
-    )
-
    context = LLMContext()

-    return audio_buffer, transcript, context
+    return audio_buffer, context


 def build_pipeline(
    transport,
    stt,
-    transcript,
    audio_buffer,
    llm,
    tts,
    user_context_aggregator,
    assistant_context_aggregator,
    pipeline_engine_callback_processor,
-    stt_mute_filter,
    pipeline_metrics_aggregator,
-    user_idle_disconnect,
    voicemail_detector=None,
 ):
    """Build the main pipeline with all components.
@ -63,7 +51,7 @@ def build_pipeline(
    # Build processors list with optional voicemail detection
    processors = [
        transport.input(),  # Transport user input
-        stt,  # STT (audio_passthrough=True by default, passes InputAudioRawFrame)
+        stt,
    ]

    # Insert voicemail detector after STT if enabled
@ -76,16 +64,12 @@ def build_pipeline(
    # Continue with the rest of the pipeline
    processors.extend(
        [
-            stt_mute_filter,  # STTMuteFilters don't let VAD related events pass through if muted
-            user_idle_disconnect,
-            transcript.user(),
            user_context_aggregator,
            llm,  # LLM
            pipeline_engine_callback_processor,
            tts,  # TTS
            transport.output(),  # Transport bot output
            audio_buffer,  # AudioBufferProcessor - records both input and output audio
-            transcript.assistant(),
            assistant_context_aggregator,  # Assistant spoken responses
            pipeline_metrics_aggregator,
        ]
@ -98,7 +82,6 @@ def create_pipeline_task(pipeline, workflow_run_id, audio_config: AudioConfig =
    """Create a pipeline task with appropriate parameters"""
    # Set up pipeline params with audio configuration if provided
    pipeline_params = PipelineParams(
-        allow_interruptions=True,
        enable_metrics=True,
        enable_usage_metrics=True,
        send_initial_empty_metrics=False,
@ -119,6 +102,7 @@ def create_pipeline_task(pipeline, workflow_run_id, audio_config: AudioConfig =
        pipeline,
        params=pipeline_params,
        enable_tracing=ENABLE_TRACING,
+        enable_rtvi=False,
        conversation_id=f"{workflow_run_id}",
    )

--- a/api/services/pipecat/run_pipeline.py
+++ b/api/services/pipecat/run_pipeline.py
@ -7,12 +7,12 @@ from loguru import logger
 from api.db import db_client
 from api.db.models import WorkflowModel
 from api.enums import WorkflowRunMode
+from api.services.configuration.registry import ServiceProviders
 from api.services.pipecat.audio_config import AudioConfig, create_audio_config
 from api.services.pipecat.event_handlers import (
    register_audio_data_handler,
-    register_task_event_handler,
-    register_transcript_handler,
-    register_transport_event_handlers,
+    register_event_handlers,
+    register_transcript_handlers,
 )
 from api.services.pipecat.in_memory_buffers import InMemoryLogsBuffer
 from api.services.pipecat.pipeline_builder import (
@ -46,20 +46,25 @@ from api.services.workflow.pipecat_engine import PipecatEngine
 from api.services.workflow.workflow import WorkflowGraph
 from pipecat.extensions.voicemail.voicemail_detector import VoicemailDetector
 from pipecat.pipeline.base_task import PipelineTaskParams
-from pipecat.processors.aggregators.llm_response import (
+from pipecat.processors.aggregators.llm_response_universal import (
    LLMAssistantAggregatorParams,
+    LLMContextAggregatorPair,
    LLMUserAggregatorParams,
 )
-from pipecat.processors.aggregators.llm_response_universal import (
-    LLMContextAggregatorPair,
-)
-from pipecat.processors.filters.stt_mute_filter import (
-    STTMuteConfig,
-    STTMuteFilter,
-    STTMuteStrategy,
-)
-from pipecat.processors.user_idle_processor import UserIdleProcessor
 from pipecat.transports.smallwebrtc.connection import SmallWebRTCConnection
+from pipecat.turns.user_mute import MuteUntilFirstBotCompleteUserMuteStrategy
+from pipecat.turns.user_start import (
+    ExternalUserTurnStartStrategy,
+    TranscriptionUserTurnStartStrategy,
+)
+from pipecat.turns.user_start.vad_user_turn_start_strategy import (
+    VADUserTurnStartStrategy,
+)
+from pipecat.turns.user_stop import (
+    ExternalUserTurnStopStrategy,
+    TranscriptionUserTurnStopStrategy,
+)
+from pipecat.turns.user_turn_strategies import UserTurnStrategies
 from pipecat.utils.context import set_current_run_id
 from pipecat.utils.enums import EndTaskReason
 from pipecat.utils.tracing.context_registry import ContextProviderRegistry
@ -517,12 +522,11 @@ async def _run_pipeline(
        embeddings_model=embeddings_model,
    )

-    # Create pipeline components with audio configuration and engine
-    audio_buffer, transcript, context = create_pipeline_components(audio_config, engine)
+    # Create pipeline components with audio configuration
+    audio_buffer, context = create_pipeline_components(audio_config)

    # Set the context and audio_buffer after creation
    engine.set_context(context)
-    engine.set_audio_buffer(audio_buffer)

    # Set Stasis connection for immediate transfers (if available)
    if stasis_connection:
@ -532,7 +536,31 @@ async def _run_pipeline(
        expect_stripped_words=True,
        correct_aggregation_callback=engine.create_aggregation_correction_callback(),
    )
-    user_params = LLMUserAggregatorParams(enable_emulated_vad_interruptions=True)
+
+    # Configure turn strategies based on STT provider and model
+    # Deepgram Flux uses external turn detection (VAD + External start/stop)
+    # Other models use transcription-based turn detection with smart turn analyzer
+    is_deepgram_flux = (
+        user_config.stt.provider == ServiceProviders.DEEPGRAM.value
+        and user_config.stt.model == "flux-general-en"
+    )
+
+    if is_deepgram_flux:
+        user_turn_strategies = UserTurnStrategies(
+            start=[VADUserTurnStartStrategy(), ExternalUserTurnStartStrategy()],
+            stop=[ExternalUserTurnStopStrategy()],
+        )
+    else:
+        user_turn_strategies = UserTurnStrategies(
+            start=[VADUserTurnStartStrategy(), TranscriptionUserTurnStartStrategy()],
+            stop=[TranscriptionUserTurnStopStrategy()],
+        )
+
+    user_params = LLMUserAggregatorParams(
+        user_turn_strategies=user_turn_strategies,
+        user_mute_strategies=[MuteUntilFirstBotCompleteUserMuteStrategy()],
+        user_idle_timeout=max_user_idle_timeout,
+    )
    context_aggregator = LLMContextAggregatorPair(
        context, assistant_params=assistant_params, user_params=user_params
    )
@ -547,25 +575,20 @@ async def _run_pipeline(

    pipeline_metrics_aggregator = PipelineMetricsAggregator()

-    # Create STT mute filter using the selected strategies and the engine's callback
-    stt_mute_filter = STTMuteFilter(
-        config=STTMuteConfig(
-            strategies={
-                STTMuteStrategy.MUTE_UNTIL_FIRST_BOT_COMPLETE,
-                STTMuteStrategy.CUSTOM,
-            },
-            should_mute_callback=engine.create_should_mute_callback(),
-        )
-    )
-
-    # Use engine's user idle callback with configured timeout
-    user_idle_disconnect = UserIdleProcessor(
-        callback=engine.create_user_idle_callback(), timeout=max_user_idle_timeout
-    )
-
    user_context_aggregator = context_aggregator.user()
    assistant_context_aggregator = context_aggregator.assistant()

+    # Register user idle event handlers
+    user_idle_handler = engine.create_user_idle_handler()
+
+    @user_context_aggregator.event_handler("on_user_turn_idle")
+    async def on_user_turn_idle(aggregator):
+        await user_idle_handler.handle_idle(aggregator)
+
+    @user_context_aggregator.event_handler("on_user_turn_started")
+    async def on_user_turn_started(aggregator, strategy):
+        user_idle_handler.reset()
+
    # Create voicemail detector if enabled in the workflow's start node
    voicemail_detector = None
    start_node = workflow_graph.nodes.get(workflow_graph.start_node_id)
@ -592,16 +615,13 @@ async def _run_pipeline(
    pipeline = build_pipeline(
        transport,
        stt,
-        transcript,
        audio_buffer,
        llm,
        tts,
        user_context_aggregator,
        assistant_context_aggregator,
        pipeline_engine_callback_processor,
-        stt_mute_filter,
        pipeline_metrics_aggregator,
-        user_idle_disconnect,
        voicemail_detector=voicemail_detector,
    )

@ -614,18 +634,6 @@ async def _run_pipeline(
    # Initialize the engine to set the initial context
    await engine.initialize()

-    # Register event handlers
-    in_memory_audio_buffer, in_memory_transcript_buffer = (
-        register_transport_event_handlers(
-            task,
-            transport,
-            workflow_run_id,
-            engine=engine,
-            audio_buffer=audio_buffer,
-            audio_config=audio_config,
-        )
-    )
-
    # Add real-time feedback observer if WebSocket sender is available
    # Note: ws_sender was already fetched earlier for node_transition_callback
    if ws_sender:
@ -635,21 +643,24 @@ async def _run_pipeline(
        )
        task.add_observer(feedback_observer)

-    register_task_event_handler(
-        workflow_run_id,
-        engine,
+    # Register event handlers
+    in_memory_audio_buffer, in_memory_transcript_buffer = register_event_handlers(
        task,
        transport,
-        audio_buffer,
-        in_memory_audio_buffer,
-        in_memory_transcript_buffer,
-        in_memory_logs_buffer,
-        pipeline_metrics_aggregator,
+        workflow_run_id,
+        engine=engine,
+        audio_buffer=audio_buffer,
+        in_memory_logs_buffer=in_memory_logs_buffer,
+        pipeline_metrics_aggregator=pipeline_metrics_aggregator,
+        audio_config=audio_config,
    )

    register_audio_data_handler(audio_buffer, workflow_run_id, in_memory_audio_buffer)
-    register_transcript_handler(
-        transcript, workflow_run_id, in_memory_transcript_buffer
+    register_transcript_handlers(
+        user_context_aggregator,
+        assistant_context_aggregator,
+        workflow_run_id,
+        in_memory_transcript_buffer,
    )

    try:
--- a/api/services/pipecat/service_factory.py
+++ b/api/services/pipecat/service_factory.py
@ -7,6 +7,7 @@ from api.constants import MPS_API_URL
 from api.services.configuration.registry import ServiceProviders
 from pipecat.services.azure.llm import AzureLLMService
 from pipecat.services.cartesia.stt import CartesiaSTTService
+from pipecat.services.deepgram.flux.stt import DeepgramFluxSTTService
 from pipecat.services.deepgram.stt import DeepgramSTTService, LiveOptions
 from pipecat.services.deepgram.tts import DeepgramTTSService
 from pipecat.services.dograh.llm import DograhLLMService
@ -34,6 +35,20 @@ def create_stt_service(user_config):
        f"Creating STT service: provider={user_config.stt.provider}, model={user_config.stt.model}"
    )
    if user_config.stt.provider == ServiceProviders.DEEPGRAM.value:
+        # Check if using Flux model (English-only, no language selection)
+        if user_config.stt.model == "flux-general-en":
+            logger.debug("Using DeepGram Flux Model")
+            return DeepgramFluxSTTService(
+                api_key=user_config.stt.api_key,
+                model=user_config.stt.model,
+                params=DeepgramFluxSTTService.InputParams(
+                    eot_timeout_ms=3000,
+                    eot_threshold=0.7,
+                ),
+                should_interrupt=False,  # Let UserAggregator take care of sending InterruptionFrame
+            )
+
+        # Other models than flux
        # Use language from user config, defaulting to "multi" for multilingual support
        language = getattr(user_config.stt, "language", None) or "multi"
        live_options = LiveOptions(
@ -44,7 +59,9 @@ def create_stt_service(user_config):
        )
        logger.debug(f"Using DeepGram Model - {user_config.stt.model}")
        return DeepgramSTTService(
-            live_options=live_options, api_key=user_config.stt.api_key
+            live_options=live_options,
+            api_key=user_config.stt.api_key,
+            should_interrupt=False,  # Let UserAggregator take care of sending InterruptionFrame
        )
    elif user_config.stt.provider == ServiceProviders.OPENAI.value:
        return OpenAISTTService(
--- a/api/services/pipecat/transport_setup.py
+++ b/api/services/pipecat/transport_setup.py
@ -2,10 +2,9 @@ import os

 from fastapi import WebSocket

-from api.constants import APP_ROOT_DIR, ENABLE_RNNOISE, ENABLE_SMART_TURN
+from api.constants import APP_ROOT_DIR
 from api.db import db_client
 from api.enums import OrganizationConfigurationKey
-from api.services.looptalk.internal_transport import InternalTransport
 from api.services.pipecat.audio_config import AudioConfig
 from api.services.telephony.stasis_rtp_connection import StasisRTPConnection
 from api.services.telephony.stasis_rtp_serializer import StasisRTPFrameSerializer
@ -13,11 +12,8 @@ from api.services.telephony.stasis_rtp_transport import (
    StasisRTPTransport,
    StasisRTPTransportParams,
 )
-from pipecat.audio.filters.rnnoise_filter import RNNoiseFilter
 from pipecat.audio.mixers.silence_mixer import SilenceAudioMixer
 from pipecat.audio.mixers.soundfile_mixer import SoundfileMixer
-from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams
-from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
 from pipecat.audio.vad.silero import SileroVADAnalyzer, VADParams
 from pipecat.serializers.twilio import TwilioFrameSerializer
 from pipecat.serializers.vobiz import VobizFrameSerializer
@ -35,19 +31,6 @@ librnnoise_path = os.path.normpath(
 )


-def create_turn_analyzer(workflow_run_id: int, audio_config: AudioConfig):
-    """Create a turn analyzer backed by the local Smart Turn HTTP service.
-
-    Args:
-        workflow_run_id: ID of the workflow run for turn analyzer context
-        audio_config: Audio configuration containing pipeline sample rate
-    """
-    if ENABLE_SMART_TURN:
-        return LocalSmartTurnAnalyzerV3(params=SmartTurnParams())
-
-    return None
-
-
 async def create_twilio_transport(
    websocket_client: WebSocket,
    stream_sid: str,
@ -78,8 +61,6 @@ async def create_twilio_transport(
            f"Incomplete Twilio configuration for organization {organization_id}"
        )

-    turn_analyzer = create_turn_analyzer(workflow_run_id, audio_config)
-
    serializer = TwilioFrameSerializer(
        stream_sid=stream_sid,
        call_sid=call_sid,
@ -119,11 +100,7 @@ async def create_twilio_transport(
                if ambient_noise_config and ambient_noise_config.get("enabled", False)
                else SilenceAudioMixer()
            ),
-            turn_analyzer=turn_analyzer,
            serializer=serializer,
-            audio_in_filter=RNNoiseFilter(library_path=librnnoise_path)
-            if ENABLE_RNNOISE
-            else None,
        ),
    )

@ -158,8 +135,6 @@ async def create_cloudonix_transport(
            f"Required: bearer_token, domain_id"
        )

-    turn_analyzer = create_turn_analyzer(workflow_run_id, audio_config)
-
    from pipecat.serializers.cloudonix import CloudonixFrameSerializer

    serializer = CloudonixFrameSerializer(
@ -202,11 +177,7 @@ async def create_cloudonix_transport(
                if ambient_noise_config and ambient_noise_config.get("enabled", False)
                else SilenceAudioMixer()
            ),
-            turn_analyzer=turn_analyzer,
            serializer=serializer,
-            audio_in_filter=RNNoiseFilter(library_path=librnnoise_path)
-            if ENABLE_RNNOISE
-            else None,
        ),
    )

@ -238,8 +209,6 @@ async def create_vonage_transport(
            f"Incomplete Vonage configuration for organization {organization_id}"
        )

-    turn_analyzer = create_turn_analyzer(workflow_run_id, audio_config)
-
    serializer = VonageFrameSerializer(
        call_uuid=call_uuid,
        application_id=application_id,
@ -283,11 +252,7 @@ async def create_vonage_transport(
                if ambient_noise_config and ambient_noise_config.get("enabled", False)
                else SilenceAudioMixer()
            ),
-            turn_analyzer=turn_analyzer,
            serializer=serializer,
-            audio_in_filter=RNNoiseFilter(library_path=librnnoise_path)
-            if ENABLE_RNNOISE
-            else None,
        ),
    )

@ -337,8 +302,6 @@ async def create_vobiz_transport(
        f"from_numbers={len(config.get('from_numbers', []))} numbers"
    )

-    turn_analyzer = create_turn_analyzer(workflow_run_id, audio_config)
-
    # Use VobizFrameSerializer for Vobiz WebSocket protocol
    serializer = VobizFrameSerializer(
        stream_id=stream_id,
@ -389,11 +352,7 @@ async def create_vobiz_transport(
                if ambient_noise_config and ambient_noise_config.get("enabled", False)
                else SilenceAudioMixer()
            ),
-            turn_analyzer=turn_analyzer,
            serializer=serializer,
-            audio_in_filter=RNNoiseFilter(library_path=librnnoise_path)
-            if ENABLE_RNNOISE
-            else None,
        ),
    )

@ -411,7 +370,6 @@ def create_webrtc_transport(
    ambient_noise_config: dict | None = None,
 ):
    """Create a transport for WebRTC connections"""
-    turn_analyzer = create_turn_analyzer(workflow_run_id, audio_config)

    return SmallWebRTCTransport(
        webrtc_connection=webrtc_connection,
@ -445,10 +403,6 @@ def create_webrtc_transport(
                if ambient_noise_config and ambient_noise_config.get("enabled", False)
                else SilenceAudioMixer()
            ),
-            turn_analyzer=turn_analyzer,
-            audio_in_filter=RNNoiseFilter(library_path=librnnoise_path)
-            if ENABLE_RNNOISE
-            else None,
        ),
    )

@ -461,7 +415,6 @@ def create_stasis_transport(
    ambient_noise_config: dict | None = None,
 ):
    """Create a transport for ARI connections"""
-    turn_analyzer = create_turn_analyzer(workflow_run_id, audio_config)

    serializer = StasisRTPFrameSerializer(
        StasisRTPFrameSerializer.InputParams(
@ -502,11 +455,7 @@ def create_stasis_transport(
                if ambient_noise_config and ambient_noise_config.get("enabled", False)
                else SilenceAudioMixer()
            ),
-            turn_analyzer=turn_analyzer,
            serializer=serializer,
-            audio_in_filter=RNNoiseFilter(library_path=librnnoise_path)
-            if ENABLE_RNNOISE
-            else None,
        ),
    )

@ -528,46 +477,44 @@ def create_internal_transport(
    Returns:
        InternalTransport instance configured with turn analyzer
    """
-    turn_analyzer = create_turn_analyzer(workflow_run_id, audio_config)
+    pass
+    # Commented out because looptalk coming in the regular import flow
+    # was causing issue. May be move this to looptalk/orchestrator.py

    # Create and return the internal transport with latency
-    return InternalTransport(
-        params=TransportParams(
-            audio_out_enabled=True,
-            audio_out_sample_rate=audio_config.transport_out_sample_rate,
-            audio_out_channels=1,
-            audio_in_enabled=True,
-            audio_in_sample_rate=audio_config.transport_in_sample_rate,
-            audio_in_channels=1,
-            vad_analyzer=(
-                SileroVADAnalyzer(
-                    params=VADParams(
-                        confidence=vad_config.get("confidence", 0.7),
-                        start_secs=vad_config.get("start_seconds", 0.4),
-                        stop_secs=vad_config.get("stop_seconds", 0.8),
-                        min_volume=vad_config.get("minimum_volume", 0.6),
-                    )
-                )
-                if vad_config
-                else SileroVADAnalyzer()
-            ),
-            audio_out_mixer=(
-                SoundfileMixer(
-                    sound_files={
-                        "office": APP_ROOT_DIR
-                        / "assets"
-                        / f"office-ambience-{audio_config.transport_out_sample_rate}-mono.wav"
-                    },
-                    default_sound="office",
-                    volume=ambient_noise_config.get("volume", 0.3),
-                )
-                if ambient_noise_config and ambient_noise_config.get("enabled", False)
-                else SilenceAudioMixer()
-            ),
-            turn_analyzer=turn_analyzer,
-            audio_in_filter=RNNoiseFilter(library_path=librnnoise_path)
-            if ENABLE_RNNOISE
-            else None,
-        ),
-        latency_seconds=latency_seconds,
-    )
+    # return InternalTransport(
+    #     params=TransportParams(
+    #         audio_out_enabled=True,
+    #         audio_out_sample_rate=audio_config.transport_out_sample_rate,
+    #         audio_out_channels=1,
+    #         audio_in_enabled=True,
+    #         audio_in_sample_rate=audio_config.transport_in_sample_rate,
+    #         audio_in_channels=1,
+    #         vad_analyzer=(
+    #             SileroVADAnalyzer(
+    #                 params=VADParams(
+    #                     confidence=vad_config.get("confidence", 0.7),
+    #                     start_secs=vad_config.get("start_seconds", 0.4),
+    #                     stop_secs=vad_config.get("stop_seconds", 0.8),
+    #                     min_volume=vad_config.get("minimum_volume", 0.6),
+    #                 )
+    #             )
+    #             if vad_config
+    #             else SileroVADAnalyzer()
+    #         ),
+    #         audio_out_mixer=(
+    #             SoundfileMixer(
+    #                 sound_files={
+    #                     "office": APP_ROOT_DIR
+    #                     / "assets"
+    #                     / f"office-ambience-{audio_config.transport_out_sample_rate}-mono.wav"
+    #                 },
+    #                 default_sound="office",
+    #                 volume=ambient_noise_config.get("volume", 0.3),
+    #             )
+    #             if ambient_noise_config and ambient_noise_config.get("enabled", False)
+    #             else SilenceAudioMixer()
+    #         ),
+    #     ),
+    #     latency_seconds=latency_seconds,
+    # )
--- a/api/services/telephony/stasis_rtp_serializer.py
+++ b/api/services/telephony/stasis_rtp_serializer.py
@ -15,6 +15,8 @@ The serializer:
 from typing import Optional

 from loguru import logger
+from pydantic import BaseModel
+
 from pipecat.audio.utils import create_default_resampler, pcm_to_ulaw, ulaw_to_pcm
 from pipecat.frames.frames import (
    AudioRawFrame,
@ -22,8 +24,7 @@ from pipecat.frames.frames import (
    InputAudioRawFrame,
    StartFrame,
 )
-from pipecat.serializers.base_serializer import FrameSerializer, FrameSerializerType
-from pydantic import BaseModel
+from pipecat.serializers.base_serializer import FrameSerializer


 class StasisRTPFrameSerializer(FrameSerializer):
@ -59,11 +60,6 @@ class StasisRTPFrameSerializer(FrameSerializer):
        # Resampler shared between encode / decode paths
        self._resampler = create_default_resampler()

-    @property
-    def type(self) -> FrameSerializerType:
-        """Stasis uses raw bytes → BINARY."""
-        return FrameSerializerType.BINARY
-
    async def setup(self, frame: StartFrame):
        """Remember pipeline configuration."""
        self._sample_rate = self._params.sample_rate or frame.audio_in_sample_rate
--- a/api/services/workflow/pipecat_engine.py
+++ b/api/services/workflow/pipecat_engine.py
@ -19,7 +19,6 @@ from pipecat.utils.enums import EndTaskReason

 if TYPE_CHECKING:
    from api.services.telephony.stasis_rtp_connection import StasisRTPConnection
-    from pipecat.processors.audio.audio_buffer_processor import AudioBuffer
    from pipecat.services.anthropic.llm import AnthropicLLMService
    from pipecat.services.google.llm import GoogleLLMService
    from pipecat.services.openai.llm import OpenAILLMService
@ -64,7 +63,6 @@ class PipecatEngine:
        transport: Optional[BaseTransport] = None,
        workflow: WorkflowGraph,
        call_context_vars: dict,
-        audio_buffer: Optional["AudioBuffer"] = None,
        workflow_run_id: Optional[int] = None,
        node_transition_callback: Optional[
            Callable[[str, Optional[str]], Awaitable[None]]
@ -78,7 +76,6 @@ class PipecatEngine:
        self.transport = transport
        self.workflow = workflow
        self._call_context_vars = call_context_vars
-        self._audio_buffer = audio_buffer
        self._workflow_run_id = workflow_run_id
        self._node_transition_callback = node_transition_callback
        self._initialized = False
@ -204,6 +201,7 @@ class PipecatEngine:
            logger.info(f"Arguments: {function_call_params.arguments}")
            await self.set_node(transition_to_node)
            try:
+
                async def on_context_updated() -> None:
                    """
                    pipecat framework will run this function after the function call result has been updated in the context.
@ -215,6 +213,12 @@ class PipecatEngine:
                        self._current_node
                    )

+                    # Queue EndFrame if we just transitioned to EndNode
+                    if self._current_node.is_end:
+                        await self.send_end_task_frame(
+                            EndTaskReason.USER_QUALIFIED.value
+                        )
+
                result = {"status": "done"}

                properties = FunctionCallResultProperties(
@ -478,8 +482,6 @@ class PipecatEngine:
        if node.extraction_enabled and node.extraction_variables:
            await self._perform_variable_extraction_if_needed(node)

-        await self.send_end_task_frame(EndTaskReason.USER_QUALIFIED.value)
-
    async def _handle_agent_node(self, node: Node) -> None:
        """Handle agent node execution."""
        if node.is_static:
@ -680,12 +682,12 @@ class PipecatEngine:
        """
        return engine_callbacks.create_should_mute_callback(self)

-    def create_user_idle_callback(self):
+    def create_user_idle_handler(self):
        """
-        This callback is called when the user is idle for a certain duration.
-        We use this to either play the static text or end the call
+        Returns a UserIdleHandler that manages user-idle timeouts with state.
+        The handler tracks retry count and handles escalating prompts.
        """
-        return engine_callbacks.create_user_idle_callback(self)
+        return engine_callbacks.create_user_idle_handler(self)

    def create_max_duration_callback(self):
        """
@ -721,14 +723,6 @@ class PipecatEngine:
        """
        self.task = task

-    def set_audio_buffer(self, audio_buffer: "AudioBuffer") -> None:
-        """Set the audio buffer.
-
-        This allows setting the audio buffer after the engine has been created,
-        which is useful when the audio buffer needs to be created after the engine.
-        """
-        self._audio_buffer = audio_buffer
-
    def set_stasis_connection(
        self, connection: Optional["StasisRTPConnection"]
    ) -> None:
--- a/api/services/workflow/pipecat_engine_callbacks.py
+++ b/api/services/workflow/pipecat_engine_callbacks.py
@ -23,7 +23,6 @@ from pipecat.utils.enums import EndTaskReason

 if TYPE_CHECKING:
    from api.services.workflow.pipecat_engine import PipecatEngine
-    from pipecat.processors.user_idle_processor import UserIdleProcessor


 # ---------------------------------------------------------------------------
@ -57,33 +56,43 @@ def create_should_mute_callback(
 # ---------------------------------------------------------------------------


-def create_user_idle_callback(engine: "PipecatEngine"):
-    """Return a callback that handles user-idle timeouts."""
+class UserIdleHandler:
+    """Helper class to manage user idle retry logic with state."""

-    async def handle_user_idle(
-        user_idle: "UserIdleProcessor", retry_count: int
-    ) -> bool:
-        logger.debug(f"Handling user_idle, attempt: {retry_count}")
+    def __init__(self, engine: "PipecatEngine"):
+        self._engine = engine
+        self._retry_count = 0

-        if retry_count == 1:
+    def reset(self):
+        """Reset the retry count when user becomes active."""
+        self._retry_count = 0
+
+    async def handle_idle(self, aggregator):
+        """Handle user idle event with escalating prompts."""
+        self._retry_count += 1
+        logger.debug(f"Handling user_idle, attempt: {self._retry_count}")
+
+        if self._retry_count == 1:
            message = {
                "role": "system",
                "content": "The user has been quiet. Politely and briefly ask if they're still there in the language that the user has been speaking so far.",
            }
-            await user_idle.push_frame(LLMMessagesAppendFrame([message], run_llm=True))
-            return True
+            await aggregator.push_frame(LLMMessagesAppendFrame([message], run_llm=True))
+            return

        message = {
            "role": "system",
            "content": "The user has been quiet. We will be disconnecting the call now. Wish them a good day in the language that the user has been speaking so far.",
        }
-        await user_idle.push_frame(LLMMessagesAppendFrame([message], run_llm=True))
-        await engine.send_end_task_frame(
+        await aggregator.push_frame(LLMMessagesAppendFrame([message], run_llm=True))
+        await self._engine.send_end_task_frame(
            EndTaskReason.USER_IDLE_MAX_DURATION_EXCEEDED.value
        )
-        return False

-    return handle_user_idle
+
+def create_user_idle_handler(engine: "PipecatEngine") -> UserIdleHandler:
+    """Return a UserIdleHandler that manages user-idle timeouts with state."""
+    return UserIdleHandler(engine)


 # ---------------------------------------------------------------------------