From 4640f69f9bee0d5094dab2ca5a5794ed007e1395 Mon Sep 17 00:00:00 2001 From: Abhishek Kumar Date: Wed, 10 Dec 2025 09:07:03 +0700 Subject: [PATCH] feat: add smart turn v3 --- api/services/pipecat/transport_setup.py | 29 ++----------------- .../workflow/pipecat_engine_callbacks.py | 8 ----- pipecat | 2 +- scripts/setup_pipecat.sh | 2 +- 4 files changed, 4 insertions(+), 37 deletions(-) diff --git a/api/services/pipecat/transport_setup.py b/api/services/pipecat/transport_setup.py index 574ddc3..4137cce 100644 --- a/api/services/pipecat/transport_setup.py +++ b/api/services/pipecat/transport_setup.py @@ -7,9 +7,6 @@ from api.db import db_client from api.enums import OrganizationConfigurationKey from api.services.looptalk.internal_transport import InternalTransport from api.services.pipecat.audio_config import AudioConfig -from api.services.smart_turn.websocket_smart_turn import ( - WebSocketSmartTurnAnalyzer, -) from api.services.telephony.stasis_rtp_connection import StasisRTPConnection from api.services.telephony.stasis_rtp_serializer import StasisRTPFrameSerializer from api.services.telephony.stasis_rtp_transport import ( @@ -20,6 +17,7 @@ from pipecat.audio.filters.rnnoise_filter import RNNoiseFilter from pipecat.audio.mixers.silence_mixer import SilenceAudioMixer from pipecat.audio.mixers.soundfile_mixer import SoundfileMixer from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams +from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3 from pipecat.audio.vad.silero import SileroVADAnalyzer, VADParams from pipecat.serializers.plivo import PlivoFrameSerializer from pipecat.serializers.twilio import TwilioFrameSerializer @@ -45,30 +43,7 @@ def create_turn_analyzer(workflow_run_id: int, audio_config: AudioConfig): audio_config: Audio configuration containing pipeline sample rate """ if ENABLE_SMART_TURN: - service_url = os.getenv( - "SMART_TURN_WS_SERVICE_ENDPOINT", "ws://localhost:8010/ws" - ) - - # Prepare optional authentication headers for Smart Turn service - secret_key = os.getenv("SMART_TURN_HTTP_SERVICE_KEY") - headers = {"X-API-Key": secret_key} if secret_key else None - - return WebSocketSmartTurnAnalyzer( - url=service_url, - headers=headers, - sample_rate=audio_config.pipeline_sample_rate, - params=SmartTurnParams( - stop_secs=1.5, # send turn complete if silent for stop_secs seconds - pre_speech_ms=0, # send speech segments before speech was detected by VAD - max_duration_secs=5, # max duration of speech to be sent to the end of turn analyzer - # we don't want to _clear except when we have end of turn prediction as 1 from last run - # else if we have speaking -> queit -> trigger end of turn -> clear() and then - # we have speak -> queit, we may end up sending a very small segment of speech - # to end of turn model, which is not good - use_only_last_vad_segment=False, - ), - service_context=workflow_run_id, - ) + return LocalSmartTurnAnalyzerV3(params=SmartTurnParams()) return None diff --git a/api/services/workflow/pipecat_engine_callbacks.py b/api/services/workflow/pipecat_engine_callbacks.py index d4ba2a4..a7d6b51 100644 --- a/api/services/workflow/pipecat_engine_callbacks.py +++ b/api/services/workflow/pipecat_engine_callbacks.py @@ -67,14 +67,6 @@ def create_user_idle_callback(engine: "PipecatEngine"): ) -> bool: logger.debug(f"Handling user_idle, attempt: {retry_count}") - # Check if we're on a StartNode - if yes, directly disconnect - if engine._current_node and engine._current_node.is_start: - logger.debug("User idle on StartNode - disconnecting immediately") - await engine.send_end_task_frame( - EndTaskReason.USER_IDLE_MAX_DURATION_EXCEEDED.value - ) - return False - if retry_count == 1: # Simulate an LLM generation, so that we can have the LLM context # updated with the new message diff --git a/pipecat b/pipecat index 22faa3c..3987090 160000 --- a/pipecat +++ b/pipecat @@ -1 +1 @@ -Subproject commit 22faa3ca1c0c7d3e3dfaf461b29c69b835f6d9e9 +Subproject commit 3987090bfb3e1e4a0341f875c93ddee69a740d60 diff --git a/scripts/setup_pipecat.sh b/scripts/setup_pipecat.sh index d86c25c..03fde25 100755 --- a/scripts/setup_pipecat.sh +++ b/scripts/setup_pipecat.sh @@ -16,7 +16,7 @@ git submodule update --init --recursive # Install pipecat in editable mode with all extras echo "Installing pipecat dependencies..." -pip install -e ./pipecat[cartesia,deepgram,openai,elevenlabs,groq,google,azure,soundfile,silero,webrtc] +pip install -e ./pipecat[cartesia,deepgram,openai,elevenlabs,groq,google,azure,soundfile,silero,webrtc,local-smart-turn-v3] # Install other requirements echo "Installing dograh API requirements..."