feat: simplify pipecat engine execution

2026-06-19 08:28:10 +02:00 · 2025-11-15 17:22:15 +05:30 · 2025-11-15 17:22:15 +05:30 · cc05f363ff
commit cc05f363ff
parent 5e4aef346d
35 changed files with 545 additions and 1861 deletions
--- a/api/services/campaign/call_dispatcher.py
+++ b/api/services/campaign/call_dispatcher.py
@ -7,10 +7,10 @@ from loguru import logger

 from api.db import db_client
 from api.db.models import QueuedRunModel, WorkflowRunModel
-from api.enums import OrganizationConfigurationKey, WorkflowRunMode
+from api.enums import OrganizationConfigurationKey
 from api.services.campaign.rate_limiter import rate_limiter
-from api.services.telephony.factory import get_telephony_provider
 from api.services.telephony.base import TelephonyProvider
+from api.services.telephony.factory import get_telephony_provider
 from api.utils.tunnel import TunnelURLProvider


@ -238,7 +238,7 @@ class CampaignCallDispatcher:
                f"&campaign_id={campaign.id}"
                f"&organization_id={campaign.organization_id}"
            )
-            
+
            call_result = await provider.initiate_call(
                to_number=phone_number,
                webhook_url=webhook_url,
@ -255,7 +255,9 @@ class CampaignCallDispatcher:
            )

            # Update workflow run as failed
-            telephony_callback_logs = workflow_run.logs.get("telephony_status_callbacks", [])
+            telephony_callback_logs = workflow_run.logs.get(
+                "telephony_status_callbacks", []
+            )
            telephony_callback_log = {
                "status": "failed",
                "timestamp": datetime.now(UTC).isoformat(),
--- a/api/services/looptalk/core/pipeline_builder.py
+++ b/api/services/looptalk/core/pipeline_builder.py
@ -24,6 +24,9 @@ from api.services.workflow.dto import ReactFlowDTO
 from api.services.workflow.pipecat_engine import PipecatEngine
 from api.services.workflow.workflow import WorkflowGraph
 from pipecat.pipeline.pipeline import Pipeline
+from pipecat.processors.aggregators.llm_response_universal import (
+    LLMContextAggregatorPair,
+)
 from pipecat.processors.filters.stt_mute_filter import (
    STTMuteConfig,
    STTMuteFilter,
@ -83,7 +86,8 @@ class LoopTalkPipelineBuilder:
        audio_buffer, audio_synchronizer, transcript, context = (
            create_pipeline_components(audio_config)
        )
-        context_aggregator = llm.create_context_aggregator(context)
+
+        context_aggregator = LLMContextAggregatorPair(context)

        # Get workflow graph
        workflow_graph = WorkflowGraph(
@ -113,7 +117,6 @@ class LoopTalkPipelineBuilder:
        pipeline_engine_callback_processor = PipelineEngineCallbacksProcessor(
            max_call_duration_seconds=300,
            max_duration_end_task_callback=engine.create_max_duration_callback(),
-            llm_generated_text_callback=engine.create_llm_generated_text_callback(),
            generation_started_callback=engine.create_generation_started_callback(),
        )

--- a/api/services/looptalk/orchestrator.py
+++ b/api/services/looptalk/orchestrator.py
@ -272,14 +272,6 @@ class LoopTalkTestOrchestrator:

            await task.cancel()

-        # Connect the context aggregator events to engine
-        @assistant_context_aggregator.event_handler("on_push_aggregation")
-        async def on_assistant_aggregator_push_context(_aggregator):
-            logger.debug(
-                "Assistant aggregator push context – flushing pending transitions"
-            )
-            await engine.flush_pending_transitions()
-
        # Register custom audio and transcript handlers for LoopTalk
        await self._register_looptalk_handlers(
            audio_synchronizer, transcript, test_session_id, role
--- a/api/services/pipecat/engine_pre_aggregator_processor.py
+++ b/api/services/pipecat/engine_pre_aggregator_processor.py
@ -1,69 +0,0 @@
-"""Engine Pre-Aggregator Processor
-
-This processor sits before the user context aggregator in the pipeline and handles
-engine-specific callbacks for frames that need to be processed before aggregation.
-This ensures the engine can update context before the aggregator generates LLM frames.
-"""
-
-from typing import Awaitable, Callable, Optional
-
-from loguru import logger
-
-from api.services.pipecat.exceptions import VoicemailDetectedException
-from pipecat.frames.frames import (
-    Frame,
-    UserStartedSpeakingFrame,
-    UserStoppedSpeakingFrame,
-)
-from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
-
-
-class EnginePreAggregatorProcessor(FrameProcessor):
-    """
-    Processor that handles engine callbacks before user context aggregation.
-
-    This processor is positioned before the user context aggregator to ensure
-    the engine can update LLM context before aggregation occurs.
-    """
-
-    def __init__(
-        self,
-        user_started_speaking_callback: Optional[Callable[[], Awaitable[None]]] = None,
-        user_stopped_speaking_callback: Optional[Callable[[], Awaitable[None]]] = None,
-        **kwargs,
-    ):
-        super().__init__(**kwargs)
-        self._user_started_speaking_callback = user_started_speaking_callback
-        self._user_stopped_speaking_callback = user_stopped_speaking_callback
-
-    async def process_frame(self, frame: Frame, direction: FrameDirection):
-        await super().process_frame(frame, direction)
-
-        # Handle frames that need engine processing before aggregation
-        if isinstance(frame, UserStartedSpeakingFrame):
-            await self._handle_user_started_speaking()
-        elif isinstance(frame, UserStoppedSpeakingFrame):
-            try:
-                await self._handle_user_stopped_speaking()
-            except VoicemailDetectedException:
-                # We have detected voicemail, lets not
-                # forward the UserStoppedSpeakingFrame, so that
-                # we don't issue an llm call from user context
-                # aggregator
-                logger.debug("Voicemail detected, not pushing UserStoppedSpeakingFrame")
-                return
-
-        # Always push the frame downstream
-        await self.push_frame(frame, direction)
-
-    async def _handle_user_started_speaking(self):
-        """Handle UserStartedSpeakingFrame before aggregation."""
-        if self._user_started_speaking_callback:
-            # logger.debug("Engine pre-aggregator: User started speaking")
-            await self._user_started_speaking_callback()
-
-    async def _handle_user_stopped_speaking(self):
-        """Handle UserStoppedSpeakingFrame before aggregation."""
-        if self._user_stopped_speaking_callback:
-            # logger.debug("Engine pre-aggregator: User stopped speaking")
-            await self._user_stopped_speaking_callback()
--- a/api/services/pipecat/pipeline_builder.py
+++ b/api/services/pipecat/pipeline_builder.py
@ -9,7 +9,7 @@ from api.constants import (
 from api.services.pipecat.audio_config import AudioConfig
 from pipecat.pipeline.pipeline import Pipeline
 from pipecat.pipeline.task import PipelineParams, PipelineTask
-from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
+from pipecat.processors.aggregators.llm_context import LLMContext
 from pipecat.processors.audio.audio_buffer_processor import AudioBuffer
 from pipecat.processors.audio.audio_synchronizer import AudioSynchronizer
 from pipecat.processors.transcript_processor import TranscriptProcessor
@ -39,7 +39,7 @@ def create_pipeline_components(audio_config: AudioConfig, engine: "PipecatEngine
        assistant_correct_aggregation_callback=engine.create_aggregation_correction_callback()
    )

-    context = OpenAILLMContext()
+    context = LLMContext()

    return audio_buffer, audio_synchronizer, transcript, context

@ -58,7 +58,6 @@ def build_pipeline(
    stt_mute_filter,
    pipeline_metrics_aggregator,
    user_idle_disconnect,
-    engine_pre_aggregator_processor=None,
 ):
    """Build the main pipeline with all components"""
    # Register processors with synchronizer for merged audio
@ -69,16 +68,12 @@ def build_pipeline(
    processors = [
        transport.input(),  # Transport user input
        audio_buffer.input(),  # Record input audio (only processes InputAudioRawFrame)
-        stt_mute_filter,
        stt,  # STT can now have audio_passthrough=False
+        stt_mute_filter,  # STTMuteFilters don't let VAD related events pass through if muted
        user_idle_disconnect,
        transcript.user(),
    ]

-    # Insert engine pre-aggregator processor if provided (before user aggregator)
-    if engine_pre_aggregator_processor:
-        processors.append(engine_pre_aggregator_processor)
-
    processors.extend(
        [
            user_context_aggregator,
--- a/api/services/pipecat/pipeline_engine_callbacks_processor.py
+++ b/api/services/pipecat/pipeline_engine_callbacks_processor.py
@ -7,7 +7,6 @@ from pipecat.frames.frames import (
    Frame,
    HeartbeatFrame,
    LLMFullResponseStartFrame,
-    LLMGeneratedTextFrame,
    LLMTextFrame,
    StartFrame,
    TTSSpeakFrame,
@ -26,7 +25,6 @@ class PipelineEngineCallbacksProcessor(FrameProcessor):
        self,
        max_call_duration_seconds: int = 300,
        max_duration_end_task_callback: Optional[Callable[[], Awaitable[None]]] = None,
-        llm_generated_text_callback: Optional[Callable[[], Awaitable[None]]] = None,
        generation_started_callback: Optional[Callable[[], Awaitable[None]]] = None,
        llm_text_frame_callback: Optional[Callable[[str], Awaitable[None]]] = None,
    ):
@ -34,7 +32,6 @@ class PipelineEngineCallbacksProcessor(FrameProcessor):
        self._start_time = None
        self._max_call_duration_seconds = max_call_duration_seconds
        self._max_duration_end_task_callback = max_duration_end_task_callback
-        self._llm_generated_text_callback = llm_generated_text_callback
        self._generation_started_callback = generation_started_callback
        self._llm_text_frame_callback = llm_text_frame_callback
        self._end_task_frame_pushed = False
@ -46,8 +43,6 @@ class PipelineEngineCallbacksProcessor(FrameProcessor):
            await self._start(frame)
        elif isinstance(frame, HeartbeatFrame):
            await self._check_call_duration()
-        elif isinstance(frame, LLMGeneratedTextFrame):
-            await self._generated_text_frame(frame)
        elif isinstance(frame, LLMFullResponseStartFrame):
            await self._generation_started()
        elif (
@ -74,11 +69,6 @@ class PipelineEngineCallbacksProcessor(FrameProcessor):
                        "Max call duration exceeded. Skipping EndTaskFrame since already sent"
                    )

-    async def _generated_text_frame(self, _: LLMGeneratedTextFrame):
-        """Handle LLMGeneratedTextFrame."""
-        if self._llm_generated_text_callback is not None:
-            await self._llm_generated_text_callback()
-
    async def _generation_started(self):
        if self._generation_started_callback:
            await self._generation_started_callback()
--- a/api/services/pipecat/run_pipeline.py
+++ b/api/services/pipecat/run_pipeline.py
@ -7,9 +7,6 @@ from api.db import db_client
 from api.db.models import WorkflowModel
 from api.enums import WorkflowRunMode
 from api.services.pipecat.audio_config import AudioConfig, create_audio_config
-from api.services.pipecat.engine_pre_aggregator_processor import (
-    EnginePreAggregatorProcessor,
-)
 from api.services.pipecat.event_handlers import (
    register_audio_data_handler,
    register_task_event_handler,
@ -43,6 +40,9 @@ from api.services.workflow.pipecat_engine import PipecatEngine
 from api.services.workflow.workflow import WorkflowGraph
 from pipecat.pipeline.runner import PipelineRunner
 from pipecat.processors.aggregators.llm_response import LLMAssistantAggregatorParams
+from pipecat.processors.aggregators.llm_response_universal import (
+    LLMContextAggregatorPair,
+)
 from pipecat.processors.filters.stt_mute_filter import (
    STTMuteConfig,
    STTMuteFilter,
@ -119,7 +119,7 @@ async def run_pipeline_vonage(
    user_id: int,
 ):
    """Run pipeline for Vonage WebSocket connections.
-    
+
    Vonage uses raw PCM audio over WebSocket instead of base64-encoded μ-law.
    The audio is transmitted as binary frames at 16kHz by default.
    """
@ -137,7 +137,9 @@ async def run_pipeline_vonage(
        if "vad_configuration" in workflow.workflow_configurations:
            vad_config = workflow.workflow_configurations["vad_configuration"]
        if "ambient_noise_configuration" in workflow.workflow_configurations:
-            ambient_noise_config = workflow.workflow_configurations["ambient_noise_configuration"]
+            ambient_noise_config = workflow.workflow_configurations[
+                "ambient_noise_configuration"
+            ]

    try:
        # Setup audio config for Vonage using the centralized config
@ -355,21 +357,14 @@ async def _run_pipeline(
        expect_stripped_words=True,
        correct_aggregation_callback=engine.create_aggregation_correction_callback(),
    )
-    context_aggregator = llm.create_context_aggregator(
+    context_aggregator = LLMContextAggregatorPair(
        context, assistant_params=assistant_params
    )

-    # Create engine pre-aggregator processor for speaking events
-    engine_pre_aggregator_processor = EnginePreAggregatorProcessor(
-        user_started_speaking_callback=engine.create_user_started_speaking_callback(),
-        user_stopped_speaking_callback=engine.create_user_stopped_speaking_callback(),
-    )
-
    # Create usage metrics aggregator with engine's callback
    pipeline_engine_callback_processor = PipelineEngineCallbacksProcessor(
        max_call_duration_seconds=max_call_duration_seconds,
        max_duration_end_task_callback=engine.create_max_duration_callback(),
-        llm_generated_text_callback=engine.create_llm_generated_text_callback(),
        generation_started_callback=engine.create_generation_started_callback(),
        llm_text_frame_callback=engine.handle_llm_text_frame,
        # Note: speaking event callbacks are now handled by pre-aggregator processor
@ -396,11 +391,6 @@ async def _run_pipeline(
    user_context_aggregator = context_aggregator.user()
    assistant_context_aggregator = context_aggregator.assistant()

-    @assistant_context_aggregator.event_handler("on_push_aggregation")
-    async def on_assistant_aggregator_push_context(_aggregator):
-        logger.debug("Assistant aggregator push context – flushing pending transitions")
-        await engine.flush_pending_transitions(source="context_push")
-
    # Build the pipeline with the STT mute filter and context controller
    pipeline = build_pipeline(
        transport,
@ -416,7 +406,6 @@ async def _run_pipeline(
        stt_mute_filter,
        pipeline_metrics_aggregator,
        user_idle_disconnect,
-        engine_pre_aggregator_processor=engine_pre_aggregator_processor,
    )

    # Create pipeline task with audio configuration
--- a/api/services/pipecat/transport_setup.py
+++ b/api/services/pipecat/transport_setup.py
@ -165,14 +165,15 @@ async def create_vonage_transport(

    # Use the factory to load config from database
    from api.services.telephony.factory import load_telephony_config
+
    config = await load_telephony_config(organization_id)
-    
+
    if config.get("provider") != "vonage":
        raise ValueError(f"Expected Vonage provider, got {config.get('provider')}")

    application_id = config.get("application_id")
    private_key = config.get("private_key")
-    
+
    if not application_id or not private_key:
        raise ValueError(
            f"Incomplete Vonage configuration for organization {organization_id}"
@ -186,8 +187,8 @@ async def create_vonage_transport(
        private_key=private_key,
        params=VonageFrameSerializer.InputParams(
            vonage_sample_rate=audio_config.transport_in_sample_rate,
-            sample_rate=audio_config.pipeline_sample_rate
-        )
+            sample_rate=audio_config.pipeline_sample_rate,
+        ),
    )

    # Important: Vonage uses binary WebSocket mode, not text
--- a/api/services/telephony/base.py
+++ b/api/services/telephony/base.py
@ -3,6 +3,7 @@ Base telephony provider interface for abstracting telephony services.
 This allows easy switching between different providers (Twilio, Vonage, etc.)
 while keeping business logic decoupled from specific implementations.
 """
+
 from abc import ABC, abstractmethod
 from dataclasses import dataclass, field
 from typing import TYPE_CHECKING, Any, Dict, List, Optional
@ -14,10 +15,15 @@ if TYPE_CHECKING:
@dataclass
 class CallInitiationResult:
    """Standardized response from initiate_call across all providers."""
-    call_id: str                          # Provider's call identifier (SID for Twilio, UUID for Vonage)
-    status: str                            # Initial status (e.g., "queued", "initiated", "started")
-    provider_metadata: Dict[str, Any] = field(default_factory=dict)  # Data that needs to be persisted
-    raw_response: Dict[str, Any] = field(default_factory=dict)       # Full provider response for debugging
+
+    call_id: str  # Provider's call identifier (SID for Twilio, UUID for Vonage)
+    status: str  # Initial status (e.g., "queued", "initiated", "started")
+    provider_metadata: Dict[str, Any] = field(
+        default_factory=dict
+    )  # Data that needs to be persisted
+    raw_response: Dict[str, Any] = field(
+        default_factory=dict
+    )  # Full provider response for debugging


 class TelephonyProvider(ABC):
@ -25,6 +31,7 @@ class TelephonyProvider(ABC):
    Abstract base class for telephony providers.
    All telephony providers must implement these core methods.
    """
+
    PROVIDER_NAME = None
    WEBHOOK_ENDPOINT = None

@ -38,13 +45,13 @@ class TelephonyProvider(ABC):
    ) -> CallInitiationResult:
        """
        Initiate an outbound call.
-        
+
        Args:
            to_number: The destination phone number
            webhook_url: The URL to receive call events
            workflow_run_id: Optional workflow run ID for tracking
            **kwargs: Provider-specific additional parameters
-            
+
        Returns:
            CallInitiationResult with standardized call details
        """
@ -54,10 +61,10 @@ class TelephonyProvider(ABC):
    async def get_call_status(self, call_id: str) -> Dict[str, Any]:
        """
        Get the current status of a call.
-        
+
        Args:
            call_id: The provider-specific call identifier
-            
+
        Returns:
            Dict containing call status information
        """
@ -67,7 +74,7 @@ class TelephonyProvider(ABC):
    async def get_available_phone_numbers(self) -> List[str]:
        """
        Get list of available phone numbers for this provider.
-        
+
        Returns:
            List of phone numbers that can be used for outbound calls
        """
@ -77,7 +84,7 @@ class TelephonyProvider(ABC):
    def validate_config(self) -> bool:
        """
        Validate that the provider is properly configured.
-        
+
        Returns:
            True if configuration is valid, False otherwise
        """
@ -89,12 +96,12 @@ class TelephonyProvider(ABC):
    ) -> bool:
        """
        Verify webhook signature for security.
-        
+
        Args:
            url: The webhook URL
            params: The webhook parameters
            signature: The signature to verify
-            
+
        Returns:
            True if signature is valid, False otherwise
        """
@ -106,12 +113,12 @@ class TelephonyProvider(ABC):
    ) -> str:
        """
        Generate the initial webhook response for starting a call session.
-        
+
        Args:
            workflow_id: The workflow ID
            user_id: The user ID
            workflow_run_id: The workflow run ID
-            
+
        Returns:
            Provider-specific response (e.g., TwiML for Twilio)
        """
@ -121,10 +128,10 @@ class TelephonyProvider(ABC):
    async def get_call_cost(self, call_id: str) -> Dict[str, Any]:
        """
        Get cost information for a completed call.
-        
+
        Args:
            call_id: Provider-specific call identifier (SID for Twilio, UUID for Vonage)
-            
+
        Returns:
            Dict containing:
                - cost_usd: The cost in USD as float
@ -138,10 +145,10 @@ class TelephonyProvider(ABC):
    def parse_status_callback(self, data: Dict[str, Any]) -> Dict[str, Any]:
        """
        Parse provider-specific status callback data into generic format.
-        
+
        Args:
            data: Raw callback data from the provider
-            
+
        Returns:
            Dict with standardized fields:
                - call_id: Provider's call identifier
@ -163,14 +170,14 @@ class TelephonyProvider(ABC):
    ) -> None:
        """
        Handle provider-specific WebSocket connection for real-time call audio.
-        
+
        This method encapsulates all provider-specific WebSocket handshake and
        message routing logic, keeping the main websocket endpoint clean.
-        
+
        Args:
            websocket: The WebSocket connection
            workflow_id: The workflow ID
            user_id: The user ID
            workflow_run_id: The workflow run ID
        """
-        pass
+        pass
--- a/api/services/telephony/factory.py
+++ b/api/services/telephony/factory.py
@ -3,8 +3,8 @@ Factory for creating telephony providers.
 Handles configuration loading from environment (OSS) or database (SaaS).
 The providers themselves don't know or care where config comes from.
 """
-import os
-from typing import Any, Dict, Optional
+
+from typing import Any, Dict

 from loguru import logger

@ -18,36 +18,36 @@ from api.services.telephony.providers.vonage_provider import VonageProvider
 async def load_telephony_config(organization_id: int) -> Dict[str, Any]:
    """
    Load telephony configuration from database.
-    
+
    Args:
        organization_id: Organization ID for database config
-    
+
    Returns:
        Configuration dictionary with provider type and credentials
-    
+
    Raises:
        ValueError: If no configuration found for the organization
    """
    if not organization_id:
        raise ValueError("Organization ID is required to load telephony configuration")
-    
+
    logger.debug(f"Loading telephony config from database for org {organization_id}")
-    
+
    config = await db_client.get_configuration(
        organization_id,
        OrganizationConfigurationKey.TELEPHONY_CONFIGURATION.value,
    )
-    
+
    if config and config.value:
        # Simple single-provider format
        provider = config.value.get("provider", "twilio")
-        
+
        if provider == "twilio":
            return {
                "provider": "twilio",
                "account_sid": config.value.get("account_sid"),
                "auth_token": config.value.get("auth_token"),
-                "from_numbers": config.value.get("from_numbers", [])
+                "from_numbers": config.value.get("from_numbers", []),
            }
        elif provider == "vonage":
            return {
@ -56,41 +56,41 @@ async def load_telephony_config(organization_id: int) -> Dict[str, Any]:
                "private_key": config.value.get("private_key"),
                "api_key": config.value.get("api_key"),
                "api_secret": config.value.get("api_secret"),
-                "from_numbers": config.value.get("from_numbers", [])
+                "from_numbers": config.value.get("from_numbers", []),
            }
        else:
            raise ValueError(f"Unknown provider in config: {provider}")
-    
-    raise ValueError(f"No telephony configuration found for organization {organization_id}")
+
+    raise ValueError(
+        f"No telephony configuration found for organization {organization_id}"
+    )


-async def get_telephony_provider(
-    organization_id: int
-) -> TelephonyProvider:
+async def get_telephony_provider(organization_id: int) -> TelephonyProvider:
    """
    Factory function to create telephony providers.
-    
+
    Args:
        organization_id: Organization ID (required)
-        
+
    Returns:
        Configured telephony provider instance
-        
+
    Raises:
        ValueError: If provider type is unknown or configuration is invalid
    """
    # Load configuration
    config = await load_telephony_config(organization_id)
-    
+
    provider_type = config.get("provider", "twilio")
    logger.info(f"Creating {provider_type} telephony provider")
-    
+
    # Create provider instance with configuration
    if provider_type == "twilio":
        return TwilioProvider(config)
-    
+
    elif provider_type == "vonage":
        return VonageProvider(config)
-    
+
    else:
        raise ValueError(f"Unknown telephony provider: {provider_type}")
--- a/api/services/telephony/providers/init.py
+++ b/api/services/telephony/providers/init.py
@ -1 +1 @@
-# Telephony provider implementations
+# Telephony provider implementations
--- a/api/services/telephony/providers/twilio_provider.py
+++ b/api/services/telephony/providers/twilio_provider.py
@ -1,6 +1,7 @@
 """
 Twilio implementation of the TelephonyProvider interface.
 """
+
 import json
 import random
 from typing import TYPE_CHECKING, Any, Dict, List, Optional
@ -9,9 +10,9 @@ import aiohttp
 from loguru import logger
 from twilio.request_validator import RequestValidator

+from api.enums import WorkflowRunMode
 from api.services.telephony.base import CallInitiationResult, TelephonyProvider
 from api.utils.tunnel import TunnelURLProvider
-from api.enums import WorkflowRunMode

 if TYPE_CHECKING:
    from fastapi import WebSocket
@ -22,14 +23,14 @@ class TwilioProvider(TelephonyProvider):
    Twilio implementation of TelephonyProvider.
    Accepts configuration and works the same regardless of OSS/SaaS mode.
    """
-    
+
    PROVIDER_NAME = WorkflowRunMode.TWILIO.value
    WEBHOOK_ENDPOINT = "twiml"

    def __init__(self, config: Dict[str, Any]):
        """
        Initialize TwilioProvider with configuration.
-        
+
        Args:
            config: Dictionary containing:
                - account_sid: Twilio Account SID
@ -39,11 +40,11 @@ class TwilioProvider(TelephonyProvider):
        self.account_sid = config.get("account_sid")
        self.auth_token = config.get("auth_token")
        self.from_numbers = config.get("from_numbers", [])
-        
+
        # Handle both single number (string) and multiple numbers (list)
        if isinstance(self.from_numbers, str):
            self.from_numbers = [self.from_numbers]
-        
+
        self.base_url = f"https://api.twilio.com/2010-04-01/Accounts/{self.account_sid}"

    async def initiate_call(
@ -58,32 +59,35 @@ class TwilioProvider(TelephonyProvider):
        """
        if not self.validate_config():
            raise ValueError("Twilio provider not properly configured")
-        
+
        endpoint = f"{self.base_url}/Calls.json"
-        
+
        # Select a random phone number
        from_number = random.choice(self.from_numbers)
        logger.info(f"Selected phone number {from_number} for outbound call")
-        
+
        # Prepare call data
-        data = {
-            "To": to_number,
-            "From": from_number,
-            "Url": webhook_url
-        }
-        
+        data = {"To": to_number, "From": from_number, "Url": webhook_url}
+
        # Add status callback if workflow_run_id provided
        if workflow_run_id:
            backend_endpoint = await TunnelURLProvider.get_tunnel_url()
            callback_url = f"https://{backend_endpoint}/api/v1/telephony/twilio/status-callback/{workflow_run_id}"
-            data.update({
-                "StatusCallback": callback_url,
-                "StatusCallbackEvent": ["initiated", "ringing", "answered", "completed"],
-                "StatusCallbackMethod": "POST"
-            })
-        
+            data.update(
+                {
+                    "StatusCallback": callback_url,
+                    "StatusCallbackEvent": [
+                        "initiated",
+                        "ringing",
+                        "answered",
+                        "completed",
+                    ],
+                    "StatusCallbackMethod": "POST",
+                }
+            )
+
        data.update(kwargs)
-        
+
        # Make the API request
        async with aiohttp.ClientSession() as session:
            auth = aiohttp.BasicAuth(self.account_sid, self.auth_token)
@ -91,14 +95,14 @@ class TwilioProvider(TelephonyProvider):
                if response.status != 201:
                    error_data = await response.json()
                    raise Exception(f"Failed to initiate call: {error_data}")
-                
+
                response_data = await response.json()
-                
+
                return CallInitiationResult(
                    call_id=response_data["sid"],
                    status=response_data.get("status", "queued"),
                    provider_metadata={},  # Twilio doesn't need to persist extra data
-                    raw_response=response_data
+                    raw_response=response_data,
                )

    async def get_call_status(self, call_id: str) -> Dict[str, Any]:
@ -107,16 +111,16 @@ class TwilioProvider(TelephonyProvider):
        """
        if not self.validate_config():
            raise ValueError("Twilio provider not properly configured")
-        
+
        endpoint = f"{self.base_url}/Calls/{call_id}.json"
-        
+
        async with aiohttp.ClientSession() as session:
            auth = aiohttp.BasicAuth(self.account_sid, self.auth_token)
            async with session.get(endpoint, auth=auth) as response:
                if response.status != 200:
                    error_data = await response.json()
                    raise Exception(f"Failed to get call status: {error_data}")
-                
+
                return await response.json()

    async def get_available_phone_numbers(self) -> List[str]:
@ -129,11 +133,7 @@ class TwilioProvider(TelephonyProvider):
        """
        Validate Twilio configuration.
        """
-        return bool(
-            self.account_sid and 
-            self.auth_token and 
-            self.from_numbers
-        )
+        return bool(self.account_sid and self.auth_token and self.from_numbers)

    async def verify_webhook_signature(
        self, url: str, params: Dict[str, Any], signature: str
@ -144,7 +144,7 @@ class TwilioProvider(TelephonyProvider):
        if not self.auth_token:
            logger.error("No auth token available for webhook signature verification")
            return False
-        
+
        validator = RequestValidator(self.auth_token)
        return validator.validate(url, params, signature)

@ -155,7 +155,7 @@ class TwilioProvider(TelephonyProvider):
        Generate TwiML response for starting a call session.
        """
        backend_endpoint = await TunnelURLProvider.get_tunnel_url()
-        
+
        twiml_content = f"""<?xml version="1.0" encoding="UTF-8"?>
 <Response>
    <Connect>
@ -168,15 +168,15 @@ class TwilioProvider(TelephonyProvider):
    async def get_call_cost(self, call_id: str) -> Dict[str, Any]:
        """
        Get cost information for a completed Twilio call.
-        
+
        Args:
            call_id: The Twilio Call SID
-            
+
        Returns:
            Dict containing cost information
        """
        endpoint = f"{self.base_url}/Calls/{call_id}.json"
-        
+
        try:
            async with aiohttp.ClientSession() as session:
                auth = aiohttp.BasicAuth(self.account_sid, self.auth_token)
@ -188,34 +188,29 @@ class TwilioProvider(TelephonyProvider):
                            "cost_usd": 0.0,
                            "duration": 0,
                            "status": "error",
-                            "error": str(error_data)
+                            "error": str(error_data),
                        }
-                    
+
                    call_data = await response.json()
-                    
+
                    # Twilio returns price as a negative string (e.g., "-0.0085")
                    price_str = call_data.get("price", "0")
                    cost_usd = abs(float(price_str)) if price_str else 0.0
-                    
+
                    # Duration is in seconds as a string
                    duration = int(call_data.get("duration", "0"))
-                    
+
                    return {
                        "cost_usd": cost_usd,
                        "duration": duration,
                        "status": call_data.get("status", "unknown"),
                        "price_unit": call_data.get("price_unit", "USD"),
-                        "raw_response": call_data
+                        "raw_response": call_data,
                    }
-                    
+
        except Exception as e:
            logger.error(f"Exception fetching Twilio call cost: {e}")
-            return {
-                "cost_usd": 0.0,
-                "duration": 0,
-                "status": "error",
-                "error": str(e)
-            }
+            return {"cost_usd": 0.0, "duration": 0, "status": "error", "error": str(e)}

    def parse_status_callback(self, data: Dict[str, Any]) -> Dict[str, Any]:
        """
@ -228,7 +223,7 @@ class TwilioProvider(TelephonyProvider):
            "to_number": data.get("To"),
            "direction": data.get("Direction"),
            "duration": data.get("CallDuration") or data.get("Duration"),
-            "extra": data  # Include all original data
+            "extra": data,  # Include all original data
        }

    async def handle_websocket(
@ -240,36 +235,38 @@ class TwilioProvider(TelephonyProvider):
    ) -> None:
        """
        Handle Twilio-specific WebSocket connection.
-        
+
        Twilio sends:
        1. "connected" event first
        2. "start" event with streamSid and callSid
        3. Then audio messages
        """
        from api.services.pipecat.run_pipeline import run_pipeline_twilio
-        
+
        try:
            # Wait for "connected" event
            first_msg = await websocket.receive_text()
            msg = json.loads(first_msg)
-            
+
            if msg.get("event") != "connected":
                logger.error(f"Expected 'connected' event, got: {msg.get('event')}")
                await websocket.close(code=4400, reason="Expected connected event")
                return
-            
-            logger.debug(f"Twilio WebSocket connected for workflow_run {workflow_run_id}")
-            
+
+            logger.debug(
+                f"Twilio WebSocket connected for workflow_run {workflow_run_id}"
+            )
+
            # Wait for "start" event with stream details
            start_msg = await websocket.receive_text()
            logger.debug(f"Received start message: {start_msg}")
-            
+
            start_msg = json.loads(start_msg)
            if start_msg.get("event") != "start":
                logger.error("Expected 'start' event second")
                await websocket.close(code=4400, reason="Expected start event")
                return
-            
+
            # Extract Twilio-specific identifiers
            try:
                stream_sid = start_msg["start"]["streamSid"]
@ -278,12 +275,12 @@ class TwilioProvider(TelephonyProvider):
                logger.error("Missing streamSid or callSid in start message")
                await websocket.close(code=4400, reason="Missing stream identifiers")
                return
-            
+
            # Run the Twilio pipeline
            await run_pipeline_twilio(
                websocket, stream_sid, call_sid, workflow_id, workflow_run_id, user_id
            )
-            
+
        except Exception as e:
            logger.error(f"Error in Twilio WebSocket handler: {e}")
-            raise
+            raise
--- a/api/services/telephony/providers/vonage_provider.py
+++ b/api/services/telephony/providers/vonage_provider.py
@ -1,6 +1,7 @@
 """
 Vonage (Nexmo) implementation of the TelephonyProvider interface.
 """
+
 import json
 import random
 import time
@ -10,9 +11,9 @@ import aiohttp
 import jwt
 from loguru import logger

+from api.enums import WorkflowRunMode
 from api.services.telephony.base import CallInitiationResult, TelephonyProvider
 from api.utils.tunnel import TunnelURLProvider
-from api.enums import WorkflowRunMode

 if TYPE_CHECKING:
    from fastapi import WebSocket
@ -23,14 +24,14 @@ class VonageProvider(TelephonyProvider):
    Vonage implementation of TelephonyProvider.
    Uses JWT authentication and NCCO for call control.
    """
-    
+
    PROVIDER_NAME = WorkflowRunMode.VONAGE.value
    WEBHOOK_ENDPOINT = "ncco"
-    
+
    def __init__(self, config: Dict[str, Any]):
        """
        Initialize VonageProvider with configuration.
-        
+
        Args:
            config: Dictionary containing:
                - api_key: Vonage API Key
@ -44,25 +45,27 @@ class VonageProvider(TelephonyProvider):
        self.application_id = config.get("application_id")
        self.private_key = config.get("private_key")
        self.from_numbers = config.get("from_numbers", [])
-        
+
        # Handle both single number (string) and multiple numbers (list)
        if isinstance(self.from_numbers, str):
            self.from_numbers = [self.from_numbers]
-        
+
        self.base_url = "https://api.nexmo.com"

    def _generate_jwt(self) -> str:
        """Generate JWT token for Vonage API authentication."""
        if not self.application_id or not self.private_key:
-            raise ValueError("Application ID and private key required for JWT generation")
-        
+            raise ValueError(
+                "Application ID and private key required for JWT generation"
+            )
+
        claims = {
            "application_id": self.application_id,
            "iat": int(time.time()),
            "exp": int(time.time()) + 3600,
-            "jti": str(time.time())
+            "jti": str(time.time()),
        }
-        
+
        return jwt.encode(claims, self.private_key, algorithm="RS256")

    async def initiate_call(
@ -77,68 +80,57 @@ class VonageProvider(TelephonyProvider):
        """
        if not self.validate_config():
            raise ValueError("Vonage provider not properly configured")
-        
+
        endpoint = f"{self.base_url}/v1/calls"
-        
+
        # Select a random phone number
        from_number = random.choice(self.from_numbers)
        # Remove '+' prefix for Vonage
        from_number = from_number.replace("+", "")
        to_number = to_number.replace("+", "")
-        
+
        logger.info(f"Selected phone number {from_number} for outbound call")
-        
+
        # Prepare call data
        data = {
-            "to": [{
-                "type": "phone",
-                "number": to_number
-            }],
-            "from": {
-                "type": "phone",
-                "number": from_number
-            },
+            "to": [{"type": "phone", "number": to_number}],
+            "from": {"type": "phone", "number": from_number},
            "answer_url": [webhook_url],
-            "answer_method": "GET"
+            "answer_method": "GET",
        }
-        
+
        # Add event webhook if workflow_run_id provided
        if workflow_run_id:
            backend_endpoint = await TunnelURLProvider.get_tunnel_url()
            event_url = f"https://{backend_endpoint}/api/v1/telephony/vonage/events/{workflow_run_id}"
-            data.update({
-                "event_url": [event_url],
-                "event_method": "POST"
-            })
-        
+            data.update({"event_url": [event_url], "event_method": "POST"})
+
        data.update(kwargs)
-        
+
        # Generate JWT token
        token = self._generate_jwt()
        headers = {
            "Authorization": f"Bearer {token}",
-            "Content-Type": "application/json"
+            "Content-Type": "application/json",
        }
-        
+
        # Make the API request
        async with aiohttp.ClientSession() as session:
-            async with session.post(
-                endpoint, 
-                json=data,
-                headers=headers
-            ) as response:
+            async with session.post(endpoint, json=data, headers=headers) as response:
                response_data = await response.json()
-                
+
                if response.status != 201:
                    raise Exception(f"Failed to initiate call: {response_data}")
-                
+
                return CallInitiationResult(
                    call_id=response_data["uuid"],
                    status=response_data.get("status", "started"),
                    provider_metadata={
-                        "call_uuid": response_data["uuid"]  # Vonage needs UUID persisted for WebSocket
+                        "call_uuid": response_data[
+                            "uuid"
+                        ]  # Vonage needs UUID persisted for WebSocket
                    },
-                    raw_response=response_data
+                    raw_response=response_data,
                )

    async def get_call_status(self, call_id: str) -> Dict[str, Any]:
@ -147,21 +139,19 @@ class VonageProvider(TelephonyProvider):
        """
        if not self.validate_config():
            raise ValueError("Vonage provider not properly configured")
-        
+
        endpoint = f"{self.base_url}/v1/calls/{call_id}"
-        
+
        # Generate JWT token
        token = self._generate_jwt()
-        headers = {
-            "Authorization": f"Bearer {token}"
-        }
-        
+        headers = {"Authorization": f"Bearer {token}"}
+
        async with aiohttp.ClientSession() as session:
            async with session.get(endpoint, headers=headers) as response:
                if response.status != 200:
                    error_data = await response.json()
                    raise Exception(f"Failed to get call status: {error_data}")
-                
+
                return await response.json()

    async def get_available_phone_numbers(self) -> List[str]:
@ -174,11 +164,7 @@ class VonageProvider(TelephonyProvider):
        """
        Validate Vonage configuration.
        """
-        return bool(
-            self.application_id and 
-            self.private_key and 
-            self.from_numbers
-        )
+        return bool(self.application_id and self.private_key and self.from_numbers)

    async def verify_webhook_signature(
        self, url: str, params: Dict[str, Any], signature: str
@ -190,14 +176,14 @@ class VonageProvider(TelephonyProvider):
        if not self.api_secret:
            logger.error("No API secret available for webhook signature verification")
            return False
-        
+
        try:
            # Vonage sends JWT in Authorization header. Verify the JWT signature
            decoded = jwt.decode(
-                signature, 
-                self.api_secret, 
+                signature,
+                self.api_secret,
                algorithms=["HS256"],
-                options={"verify_signature": True}
+                options={"verify_signature": True},
            )
            return True
        except jwt.InvalidTokenError:
@ -211,43 +197,42 @@ class VonageProvider(TelephonyProvider):
        NCCO (Nexmo Call Control Objects) is JSON-based, unlike TwiML which is XML.
        """
        backend_endpoint = await TunnelURLProvider.get_tunnel_url()
-        
+
        # NCCO for WebSocket connection
        ncco = [
            {
                "action": "connect",
-                "endpoint": [{
-                    "type": "websocket",
-                    "uri": f"wss://{backend_endpoint}/api/v1/telephony/ws/{workflow_id}/{user_id}/{workflow_run_id}",
-                    "content-type": "audio/l16;rate=16000",  # 16kHz Linear PCM
-                    "headers": {}
-                }]
+                "endpoint": [
+                    {
+                        "type": "websocket",
+                        "uri": f"wss://{backend_endpoint}/api/v1/telephony/ws/{workflow_id}/{user_id}/{workflow_run_id}",
+                        "content-type": "audio/l16;rate=16000",  # 16kHz Linear PCM
+                        "headers": {},
+                    }
+                ],
            }
        ]
-        
+
        return json.dumps(ncco)

    def _get_auth_headers(self) -> Dict[str, str]:
        """Generate authorization headers for Vonage API."""
        token = self._generate_jwt()
-        return {
-            "Authorization": f"Bearer {token}",
-            "Content-Type": "application/json"
-        }
+        return {"Authorization": f"Bearer {token}", "Content-Type": "application/json"}

    async def get_call_cost(self, call_id: str) -> Dict[str, Any]:
        """
        Get cost information for a completed Vonage call.
-        
+
        Args:
            call_id: The Vonage Call UUID
-            
+
        Returns:
            Dict containing cost information
        """
        headers = self._get_auth_headers()
        endpoint = f"https://api.nexmo.com/v1/calls/{call_id}"
-        
+
        try:
            async with aiohttp.ClientSession() as session:
                async with session.get(endpoint, headers=headers) as response:
@ -258,39 +243,34 @@ class VonageProvider(TelephonyProvider):
                            "cost_usd": 0.0,
                            "duration": 0,
                            "status": "error",
-                            "error": str(error_data)
+                            "error": str(error_data),
                        }
-                    
+
                    call_data = await response.json()
-                    
+
                    # Vonage returns price and rate
                    # Price is the total cost, rate is the per-minute rate
                    price = float(call_data.get("price", 0))
                    cost_usd = price  # Vonage returns positive values
-                    
+
                    # Duration is in seconds
                    duration = int(call_data.get("duration", 0))
-                    
+
                    # Get the call status
                    status = call_data.get("status", "unknown")
-                    
+
                    return {
                        "cost_usd": cost_usd,
                        "duration": duration,
                        "status": status,
                        "price_unit": "USD",  # Vonage uses USD by default
                        "rate": call_data.get("rate", 0),  # Per-minute rate
-                        "raw_response": call_data
+                        "raw_response": call_data,
                    }
-                    
+
        except Exception as e:
            logger.error(f"Exception fetching Vonage call cost: {e}")
-            return {
-                "cost_usd": 0.0,
-                "duration": 0,
-                "status": "error",
-                "error": str(e)
-            }
+            return {"cost_usd": 0.0, "duration": 0, "status": "error", "error": str(e)}

    def parse_status_callback(self, data: Dict[str, Any]) -> Dict[str, Any]:
        """
@ -300,14 +280,14 @@ class VonageProvider(TelephonyProvider):
        status_map = {
            "started": "initiated",
            "ringing": "ringing",
-            "answered": "answered", 
+            "answered": "answered",
            "complete": "completed",
            "failed": "failed",
            "busy": "busy",
            "timeout": "no-answer",
-            "rejected": "busy"
+            "rejected": "busy",
        }
-        
+
        return {
            "call_id": data.get("uuid", ""),
            "status": status_map.get(data.get("status", ""), data.get("status", "")),
@ -315,7 +295,7 @@ class VonageProvider(TelephonyProvider):
            "to_number": data.get("to"),
            "direction": data.get("direction"),
            "duration": data.get("duration"),
-            "extra": data  # Include all original data
+            "extra": data,  # Include all original data
        }

    async def handle_websocket(
@ -327,14 +307,14 @@ class VonageProvider(TelephonyProvider):
    ) -> None:
        """
        Handle Vonage-specific WebSocket connection.
-        
+
        Vonage can send:
        1. JSON metadata first (websocket:connected event)
        2. Or directly start with binary audio
        """
        from api.db import db_client
        from api.services.pipecat.run_pipeline import run_pipeline_vonage
-        
+
        try:
            # Get workflow run to extract call UUID
            workflow_run = await db_client.get_workflow_run(workflow_run_id)
@ -342,38 +322,48 @@ class VonageProvider(TelephonyProvider):
                logger.error(f"Workflow run {workflow_run_id} not found")
                await websocket.close(code=4404, reason="Workflow run not found")
                return
-            
+
            # Get workflow for organization info
            workflow = await db_client.get_workflow(workflow_id, user_id)
            if not workflow:
                logger.error(f"Workflow {workflow_id} not found")
                await websocket.close(code=4404, reason="Workflow not found")
                return
-            
+
            # Extract call UUID from workflow run context
-            call_uuid = workflow_run.gathered_context.get("call_uuid") if workflow_run.gathered_context else None
-            
+            call_uuid = (
+                workflow_run.gathered_context.get("call_uuid")
+                if workflow_run.gathered_context
+                else None
+            )
+
            if not call_uuid:
-                logger.error(f"No call UUID found for Vonage connection in workflow run {workflow_run_id}")
+                logger.error(
+                    f"No call UUID found for Vonage connection in workflow run {workflow_run_id}"
+                )
                await websocket.close(code=4400, reason="Missing call UUID")
                return
-            
-            logger.info(f"Vonage WebSocket connected for workflow_run {workflow_run_id}, call_uuid: {call_uuid}")
-            
+
+            logger.info(
+                f"Vonage WebSocket connected for workflow_run {workflow_run_id}, call_uuid: {call_uuid}"
+            )
+
            # Peek at first message to see if it's metadata or audio
            first_msg = await websocket.receive()
-            
+
            if "text" in first_msg:
                # JSON metadata - check if it's the connection event
                msg = json.loads(first_msg["text"])
                if msg.get("event") == "websocket:connected":
-                    logger.debug(f"Received Vonage connection confirmation for {workflow_run_id}")
+                    logger.debug(
+                        f"Received Vonage connection confirmation for {workflow_run_id}"
+                    )
                # Continue to pipeline regardless of message type
            elif "bytes" in first_msg:
                # Binary audio - Vonage started with audio immediately
                logger.debug(f"Vonage started with binary audio for {workflow_run_id}")
                # The pipeline will handle this first audio chunk
-            
+
            # Run the Vonage pipeline
            await run_pipeline_vonage(
                websocket,
@ -382,9 +372,9 @@ class VonageProvider(TelephonyProvider):
                workflow.organization_id,
                workflow_id,
                workflow_run_id,
-                user_id
+                user_id,
            )
-            
+
        except Exception as e:
            logger.error(f"Error in Vonage WebSocket handler: {e}")
-            raise
+            raise
--- a/api/services/telephony/stasis_rtp_transport.py
+++ b/api/services/telephony/stasis_rtp_transport.py
@ -22,9 +22,7 @@ from pipecat.frames.frames import (
 )
 from pipecat.serializers.base_serializer import FrameSerializer
 from pipecat.transports.base_input import BaseInputTransport
-from pipecat.transports.base_output import (
-    BaseOutputTransport
-)
+from pipecat.transports.base_output import BaseOutputTransport
 from pipecat.transports.base_transport import BaseTransport, TransportParams


--- a/api/services/workflow/pipecat_engine.py
+++ b/api/services/workflow/pipecat_engine.py
@ -14,14 +14,14 @@ from pipecat.frames.frames import (
    CancelFrame,
    EndFrame,
    FunctionCallResultProperties,
+    LLMContextFrame,
    LLMFullResponseEndFrame,
    LLMFullResponseStartFrame,
    TTSSpeakFrame,
 )
 from pipecat.pipeline.task import PipelineTask
-from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContextFrame
+from pipecat.processors.aggregators.llm_context import LLMContext
 from pipecat.services.llm_service import FunctionCallParams
-from pipecat.services.openai.llm import OpenAILLMContext
 from pipecat.transports.base_transport import BaseTransport
 from pipecat.utils.enums import EndTaskReason

@ -63,7 +63,7 @@ class PipecatEngine:
        *,
        task: Optional[PipelineTask] = None,
        llm: Optional["LLMService"] = None,
-        context: Optional[OpenAILLMContext] = None,
+        context: Optional[LLMContext] = None,
        tts: Optional[Any] = None,
        transport: Optional[BaseTransport] = None,
        workflow: WorkflowGraph,
@ -82,7 +82,6 @@ class PipecatEngine:
        self._workflow_run_id = workflow_run_id
        self._initialized = False
        self._client_disconnected = False
-        self._pending_function_calls = 0
        self._current_node: Optional[Node] = None
        self._gathered_context: dict = {}
        self._user_response_timeout_task: Optional[asyncio.Task] = None
@ -102,29 +101,9 @@ class PipecatEngine:
        self._voicemail_detector = None
        self._voicemail_detection_task: Optional[asyncio.Task] = None

-        # This transition is generated by the llm as part of tool call. This can
-        # also be accompanied with some content which can be played using TTS. If the
-        # bot is interrupted, we would cancel this transition (we do cancel this currently when
-        # the next generation starts in handle_generation_started callback handler.)
-        self._pending_generated_transition_after_context_push: Optional[
-            Callable[[], Awaitable[None]]
-        ] = None
-
-        # This is the transtion which is typically programmatic transition, and not goes as
-        # tool call to LLM. This is not interrupted by the user and is done on context push
-        self._pending_control_transition_after_context_push: Optional[
-            Callable[[], Awaitable[None]]
-        ] = None
-
-        # Flag to determine if the current llm generation has a text completion
-        self._defer_context_push: bool = False
-
        # Lazy loaded built-in function schemas
        self._builtin_function_schemas: Optional[list[dict]] = None

-        # Flag to control whether to queue context frame
-        self._queue_context_frame: bool = True
-
        # Track current LLM reference text for TTS aggregation correction
        self._current_llm_reference_text: str = ""

@ -211,23 +190,15 @@ class PipecatEngine:

    async def _create_transition_func(self, name: str, transition_to_node: str):
        async def transition_func(function_call_params: FunctionCallParams) -> None:
-            """Inner function that handles the actual tool invocation."""
+            """Inner function that handles the node change tool calls"""
            try:
-                # Track pending function call
-                self._pending_function_calls += 1
-                logger.debug(
-                    f"Function call pending: {function_call_params.function_name} (total: {self._pending_function_calls})"
-                )

-                # For edge functions, prevent LLM completion until transition (run_llm=False)
-                # For node functions, allow immediate completion (run_llm=True)
                async def on_context_updated() -> None:
                    """
-                    Framework will run this function after the function call result has been updated in the context.
+                    pipecat framework will run this function after the function call result has been updated in the context.
                    This way, when we do set_node from within this function, and go for LLM completion with updated
                    system prompts, the context is updated with function call result.
                    """
-                    self._pending_function_calls -= 1
                    # Perform variable extraction before transitioning to new node
                    await self._perform_variable_extraction_if_needed(
                        self._current_node
@ -241,41 +212,14 @@ class PipecatEngine:
                    on_context_updated=on_context_updated,
                )

-                async def _invoke_result_callback():
-                    """
-                    Functions are executed immediately when they come from LLM as part of text completion.
-                    But, if the LLM completion also has some text, we would want to not call the function if the user interrupts the speech.
-                    We would also not want the function to be added to context, so that the LLM can call the function again. Hence, we
-                    defer the function invocation until we receive on_context_updated callback, i.e the bot has finished speaking
-                    the text that was generated.
-                    """
-                    await function_call_params.result_callback(
-                        result, properties=properties
-                    )
-
-                if self._defer_context_push:
-                    """
-                    We set the flag to _defer_context_push when we receive text in the current generation from LLM. 
-                    This is set in the handle_llm_generated_text callback handler.
-                    """
-                    logger.debug(
-                        "Deferring transition function result until context push"
-                    )
-                    # Only one deferred transition should exist at any time.
-                    # Overwrite if one is somehow already set (unexpected).
-                    self._pending_generated_transition_after_context_push = (
-                        _invoke_result_callback
-                    )
-                else:
-                    """
-                    If there was no text in the current generation, and we only had function call,
-                    lets invoke the result callback, so that framework can call on_context_updated and
-                    we can do switch node.
-                    """
-                    await _invoke_result_callback()
+                # Call results callback from the pipecat framework
+                # so that a new llm generation can be triggred if
+                # required
+                await function_call_params.result_callback(
+                    result, properties=properties
+                )
            except Exception as e:
                logger.error(f"Error in transition function {name}: {str(e)}")
-                self._pending_function_calls = 0
                error_result = {"status": "error", "error": str(e)}
                await function_call_params.result_callback(error_result)

@ -362,27 +306,6 @@ class PipecatEngine:
            ]
        )

-    async def _setup_static_start_node_transition(self, node: Node) -> None:
-        """Set up the deferred transition for static start nodes."""
-        if not node.out_edges:
-            return
-
-        next_node_id = node.out_edges[0].target
-
-        if not node.wait_for_user_response:
-            # Normal static start node - transition immediately after context push
-            async def _deferred_static_transition():
-                try:
-                    await self.set_node(next_node_id)
-                except Exception as exc:
-                    logger.error(
-                        f"Error executing deferred static node transition to {next_node_id}: {exc}"
-                    )
-
-            self._pending_control_transition_after_context_push = (
-                _deferred_static_transition
-            )
-
    async def _perform_variable_extraction_if_needed(
        self, previous_node: Optional[Node]
    ) -> None:
@ -441,17 +364,7 @@ class PipecatEngine:
            functions,
        ) = await self._compose_system_message_functions_for_node(node)
        await self._update_llm_context(system_message, functions)
-
-        # Queue context frame if needed
-        if self._queue_context_frame:
-            await self.task.queue_frame(OpenAILLMContextFrame(self.context))
-        else:
-            logger.debug(
-                f"Not queueing context frame for node: {node.name} as _queue_context_frame is False"
-            )
-
-        # Reset _queue_context_frame as default behavior
-        self._queue_context_frame = True
+        await self.task.queue_frame(LLMContextFrame(self.context))

    async def set_node(self, node_id: str):
        """
@ -525,12 +438,7 @@ class PipecatEngine:
            await asyncio.sleep(delay_duration)

        if node.is_static:
-            # Queue TTS for static start node
-            formatted_prompt = self._format_prompt(node.prompt)
-            await self._queue_tts_response(formatted_prompt)
-
-            # Set up deferred transition for static start nodes
-            await self._setup_static_start_node_transition(node)
+            raise ValueError("Static nodes are not supported!")
        else:
            # Start generation for non-static start node
            await self._setup_llm_context_and_start_generation(node)
@ -538,66 +446,24 @@ class PipecatEngine:
    async def _handle_end_node(self, node: Node) -> None:
        """Handle end node execution."""
        if node.is_static:
-            # Queue TTS for static end node
-            formatted_prompt = self._format_prompt(node.prompt)
-            await self._queue_tts_response(formatted_prompt)
+            raise ValueError("Static nodes are not supported!")
        else:
-            # Start generation for non-static end node
            await self._setup_llm_context_and_start_generation(node)

        # If this end node has extraction enabled, perform extraction immediately
        if node.extraction_enabled and node.extraction_variables:
            await self._perform_variable_extraction_if_needed(node)

-        # TODO: Extract disposition code from extracted variables
-        # Defer send_end_task_frame using _pending_control_transition_after_context_push
-
-        # Decide the end-task reason dynamically depending on call_disposition.
-        async def _deferred_end_task():
-            # call_disposition is the disposition which is generated from
-            # llm call based on the conversation so far.
-            # TODO: Make this more generic based on configuration or llm prompting
-            disposition = self._gathered_context.get("call_disposition")
-            if disposition == "XFER":
-                reason = EndTaskReason.USER_QUALIFIED.value
-            else:
-                reason = EndTaskReason.USER_DISQUALIFIED.value
-            await self.send_end_task_frame(reason)
-
-        self._pending_control_transition_after_context_push = _deferred_end_task
+        await self.send_end_task_frame(EndTaskReason.USER_QUALIFIED.value)

    async def _handle_agent_node(self, node: Node) -> None:
        """Handle agent node execution."""
        if node.is_static:
-            # Queue TTS for static agent node
-            formatted_prompt = self._format_prompt(node.prompt)
-            await self._queue_tts_response(formatted_prompt)
-
-            # Set up deferred transition for static agent nodes
-            await self._setup_agent_node_transition(node)
+            raise ValueError("Static nodes are not supported!")
        else:
            # Set context and functions for non-static agent node
            await self._setup_llm_context_and_start_generation(node)

-    async def _setup_agent_node_transition(self, node: Node) -> None:
-        """Set up the deferred transition for static agent nodes."""
-        if not node.out_edges:
-            return
-
-        next_node_id = node.out_edges[0].target
-
-        async def _deferred_static_transition():
-            try:
-                await self.set_node(next_node_id)
-            except Exception as exc:
-                logger.error(
-                    f"Error executing deferred static node transition to {next_node_id}: {exc}"
-                )
-
-        self._pending_control_transition_after_context_push = (
-            _deferred_static_transition
-        )
-
    async def send_end_task_frame(
        self,
        reason: str,
@ -640,7 +506,7 @@ class PipecatEngine:
            # Store the mapped disconnect reason
            self._gathered_context["call_disposition"] = mapped_disposition

-        # TODO: Generalise this, currently tailored to Kapil's use case
+        # TODO: Generalise this
        self._gathered_context["address"] = ", ".join(
            [
                self._call_context_vars.get("address1", ""),
@ -759,55 +625,6 @@ class PipecatEngine:

        return system_message, functions

-    # ------------------------------------------------------------------
-    # Pending transition handling
-    # ------------------------------------------------------------------
-
-    async def flush_pending_transitions(self, *, source: str = "context_push"):
-        """Execute and clear any pending transitions.
-
-        Args:
-            source: Indicates the trigger that caused this flush:
-                - "context_push": the assistant context aggregator completed a push.
-        """
-
-        if source != "context_push":
-            raise ValueError("Invalid flush source – expected 'context_push'")
-
-        len_pending_functions = 0
-
-        if self._pending_generated_transition_after_context_push is not None:
-            len_pending_functions += 1
-        if self._pending_control_transition_after_context_push is not None:
-            len_pending_functions += 1
-
-        # Nothing to do
-        if len_pending_functions == 0:
-            return
-
-        logger.debug(
-            f"Flushing {len_pending_functions} pending transition(s) after {source.replace('_', ' ')}"
-        )
-
-        # Generated transition
-        if self._pending_generated_transition_after_context_push is not None:
-            pending_cb = self._pending_generated_transition_after_context_push
-            self._pending_generated_transition_after_context_push = None
-            try:
-                await pending_cb()
-            except Exception as exc:  # pragma: no cover
-                logger.error(f"Error executing deferred transition: {exc}")
-
-        # Control transition (context push)
-        if self._pending_control_transition_after_context_push is not None:
-            logger.debug("Executing control transition after context push")
-            static_cb = self._pending_control_transition_after_context_push
-            self._pending_control_transition_after_context_push = None
-            try:
-                await static_cb()
-            except Exception as exc:  # pragma: no cover
-                logger.error(f"Error executing deferred static node transition: {exc}")
-
    def create_should_mute_callback(self) -> Callable[[STTMuteFilter], Awaitable[bool]]:
        """
        This callback is called by STTMuteFilter to determine if the STT should be muted.
@ -828,15 +645,6 @@ class PipecatEngine:
        """
        return engine_callbacks.create_max_duration_callback(self)

-    def create_llm_generated_text_callback(self):
-        """
-        This callback is called when some text is generated by the LLM.
-        We use this to defer the result_callback of the node transition functions if
-        there is set_node called along with some text generated. This way, we will
-        have the context sent in the next generation from new node.
-        """
-        return engine_callbacks.create_llm_generated_text_callback(self)
-
    def create_generation_started_callback(self):
        """
        This callback is called when a new generation starts.
@ -844,26 +652,12 @@ class PipecatEngine:
        """
        return engine_callbacks.create_generation_started_callback(self)

-    def create_user_stopped_speaking_callback(self):
-        """
-        This callback is called when the user stops speaking.
-        We use this to handle transitions when wait_for_user_response is enabled.
-        """
-        return engine_callbacks.create_user_stopped_speaking_callback(self)
-
-    def create_user_started_speaking_callback(self):
-        """
-        This callback is called when the user starts speaking.
-        We use this to handle wait_for_user_greeting functionality.
-        """
-        return engine_callbacks.create_user_started_speaking_callback(self)
-
    def create_aggregation_correction_callback(self) -> Callable[[str], str]:
        """Create a callback that corrects corrupted aggregation using reference text."""
        return engine_callbacks.create_aggregation_correction_callback(self)

-    def set_context(self, context: OpenAILLMContext) -> None:
-        """Set the OpenAI LLM context.
+    def set_context(self, context: LLMContext) -> None:
+        """Set the LLM context.

        This allows setting the context after the engine has been created,
        which is useful when the context needs to be created after the engine.
--- a/api/services/workflow/pipecat_engine_callbacks.py
+++ b/api/services/workflow/pipecat_engine_callbacks.py
@ -14,6 +14,7 @@ import re
 from typing import TYPE_CHECKING, Awaitable, Callable

 from loguru import logger
+
 from pipecat.frames.frames import (
    LLMFullResponseEndFrame,
    LLMFullResponseStartFrame,
@ -23,9 +24,8 @@ from pipecat.processors.filters.stt_mute_filter import STTMuteFilter
 from pipecat.utils.enums import EndTaskReason

 if TYPE_CHECKING:
-    from pipecat.processors.user_idle_processor import UserIdleProcessor
-
    from api.services.workflow.pipecat_engine import PipecatEngine
+    from pipecat.processors.user_idle_processor import UserIdleProcessor


 # ---------------------------------------------------------------------------
@ -114,23 +114,6 @@ def create_max_duration_callback(engine: "PipecatEngine"):
    return handle_max_duration


-# ---------------------------------------------------------------------------
-# LLM-generated-text handling
-# ---------------------------------------------------------------------------
-
-
-def create_llm_generated_text_callback(engine: "PipecatEngine"):
-    """Return a callback invoked when the LLM emits text (not only tool calls)."""
-
-    async def handle_llm_generated_text():  # noqa: D401
-        logger.debug(
-            "Generation has text content in current response - deferring context push from set_node"
-        )
-        engine._defer_context_push = True
-
-    return handle_llm_generated_text
-
-
 # ---------------------------------------------------------------------------
 # Generation-started handling
 # ---------------------------------------------------------------------------
@ -140,96 +123,13 @@ def create_generation_started_callback(engine: "PipecatEngine"):
    """Return a callback that resets flags at the start of each LLM generation."""

    async def handle_generation_started():  # noqa: D401
-        logger.debug("LLM generation started - resetting defer flags and tool counters")
-        engine._defer_context_push = False
-        engine._pending_function_calls = 0
-        engine._pending_generated_transition_after_context_push = None
+        logger.debug("LLM generation started in callback processor")
        # Clear reference text from previous generation
        engine._current_llm_reference_text = ""

    return handle_generation_started


-# ---------------------------------------------------------------------------
-# User-stopped-speaking handling
-# ---------------------------------------------------------------------------
-
-
-def create_user_stopped_speaking_callback(engine: "PipecatEngine"):
-    """Return a callback that handles when the user stops speaking.
-
-    According to simplified flow:
-    - For start nodes with wait_for_user_response=True:
-      - Cancel timeout task if still active
-      - Transition to next node with _queue_context_frame=False
-    """
-
-    async def handle_user_stopped_speaking():
-        # Only handle if current node is a start node with wait_for_user_response
-        if (
-            engine._current_node
-            and engine._current_node.is_start
-            and engine._current_node.wait_for_user_response
-            and engine._current_node.out_edges
-        ):
-            # Cancel timeout task if it's still active
-            if (
-                engine._user_response_timeout_task
-                and not engine._user_response_timeout_task.done()
-            ):
-                logger.debug("Cancelling user response timeout - user responded")
-                engine._user_response_timeout_task.cancel()
-                engine._user_response_timeout_task = None
-
-            # Transition to next node
-            next_node_id = engine._current_node.out_edges[0].target
-            logger.debug(
-                f"User stopped speaking after wait_for_user_response - transitioning to: {next_node_id}"
-            )
-
-            # Set flag to not queue context frame since
-            # it will be pushed by user context aggregator
-            # we are just setting the context with next node's
-            # functions and prompts
-            engine._queue_context_frame = False
-
-            # Transition to next node
-            await engine.set_node(next_node_id)
-
-    return handle_user_stopped_speaking
-
-
-# ---------------------------------------------------------------------------
-# User-started-speaking handling
-# ---------------------------------------------------------------------------
-
-
-def create_user_started_speaking_callback(engine: "PipecatEngine"):
-    """Return a callback that handles when the user starts speaking.
-
-    According to simplified flow:
-    - For start nodes with wait_for_user_response=True:
-      - Cancel the timeout timer if it exists (but don't set to None)
-    """
-
-    async def handle_user_started_speaking():
-        # Only handle if current node is a start node with wait_for_user_response
-        if (
-            engine._current_node
-            and engine._current_node.is_start
-            and engine._current_node.wait_for_user_response
-            and engine._user_response_timeout_task
-            and not engine._user_response_timeout_task.done()
-        ):
-            logger.debug(
-                "User started speaking during wait_for_user_response - cancelling timeout timer"
-            )
-            engine._user_response_timeout_task.cancel()
-            # Don't set to None here - let user_stopped_speaking handle the transition
-
-    return handle_user_started_speaking
-
-
 def create_aggregation_correction_callback(engine: "PipecatEngine"):
    """Create a callback that uses engine's reference text to correct corrupted aggregation."""

--- a/api/services/workflow/pipecat_engine_utils.py
+++ b/api/services/workflow/pipecat_engine_utils.py
@ -2,16 +2,10 @@ from __future__ import annotations

 from typing import Any, Dict, List

-from google.genai.types import (
-    Content,
-    Part,
-)
+from api.utils.template_renderer import render_template
 from pipecat.adapters.schemas.function_schema import FunctionSchema
 from pipecat.adapters.schemas.tools_schema import ToolsSchema
-from pipecat.services.google.llm import GoogleLLMContext
-from pipecat.services.openai.llm import OpenAILLMContext
-
-from api.utils.template_renderer import render_template
+from pipecat.processors.aggregators.llm_context import LLMContext

 __all__ = [
    "get_function_schema",
@ -44,7 +38,7 @@ def get_function_schema(


 def update_llm_context(
-    context: OpenAILLMContext,
+    context: LLMContext,
    system_message: Dict[str, Any],
    functions: List[FunctionSchema],
 ) -> None:
@ -59,21 +53,6 @@ def update_llm_context(
    # associated with the current LLM service can convert them to the correct
    # provider-specific representation when required.
    tools_schema = ToolsSchema(standard_tools=functions)
-
-    if isinstance(context, GoogleLLMContext):
-        context.system_message = system_message["content"]
-
-        if functions:
-            # Lets only call set_tools if we have functions, else Gemini will
-            # throw an exception
-            context.set_tools(tools_schema)
-
-        if context.messages[-1].role != "user":
-            # Google expects the last message should end with user message
-            context.add_message(Content(role="user", parts=[Part(text="...")]))
-        return
-
-    # In case of OpenAILLMContext, replace the system message with incoming system message
    previous_interactions = context.messages

    # Filter out old system messages but keep user/assistant/function content.
--- a/api/services/workflow/pipecat_engine_variable_extractor.py
+++ b/api/services/workflow/pipecat_engine_variable_extractor.py
@ -7,11 +7,11 @@ from typing import TYPE_CHECKING, Any, List
 from loguru import logger
 from openai import AsyncOpenAI
 from opentelemetry import trace
-from pipecat.services.openai.llm import OpenAILLMContext
-from pipecat.utils.tracing.service_attributes import add_llm_span_attributes

 from api.services.pipecat.tracing_config import is_tracing_enabled
 from api.services.workflow.dto import ExtractionVariableDTO
+from pipecat.processors.aggregators.llm_context import LLMContext
+from pipecat.utils.tracing.service_attributes import add_llm_span_attributes

 if TYPE_CHECKING:
    from api.services.workflow.pipecat_engine import PipecatEngine
@ -139,7 +139,7 @@ class VariableExtractionManager:
            f"{conversation_history}"
        )

-        extraction_context = OpenAILLMContext()
+        extraction_context = LLMContext()
        extraction_messages = [
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": user_prompt},
@ -171,7 +171,7 @@ class VariableExtractionManager:
                    service_name="OpenAILLMService",
                    model=self._model,
                    operation_name="variable_extraction",
-                    messages=json.dumps(extraction_messages),
+                    messages=extraction_messages,
                    output=llm_response,
                    stream=False,
                    parameters={"temperature": 0.0, "response_format": "json_object"},
--- a/api/services/workflow/workflow.py
+++ b/api/services/workflow/workflow.py
@ -44,8 +44,6 @@ class Node:
        self.extraction_prompt = data.extraction_prompt
        self.extraction_variables = data.extraction_variables
        self.add_global_prompt = data.add_global_prompt
-        self.wait_for_user_response = data.wait_for_user_response
-        self.wait_for_user_response_timeout = data.wait_for_user_response_timeout
        self.detect_voicemail = data.detect_voicemail
        self.delayed_start = data.delayed_start
        self.delayed_start_duration = data.delayed_start_duration