fix: changes to update pipecat version to 0.0.100 (#122)

* feat: add stt evals

* add smart turn as provider

* chore: remove deprecations

* chore: format files

* fix: remove deprecated UserIdleProcessor

* fix: remove deprecated TranscriptProcessor

* chore: update pipecat submodule

* feat: add evals visualisation

* fix: trigger llm generation on client connected and pipeline started

* chore: update pipecat

* chore: update pipecat submodule

* Add tests

* fix: slow loading of workflow page

* chore: update pipecat submodule

* Show version after release

* Fixes #99

* fix: provider check for websocket connection

* Fixes #107

* Fix #96

* chore: fix documentation

* fix: cloudonix campaign call error

---------

Co-authored-by: Sabiha Khan <sabihak89@gmail.com>
This commit is contained in:
Abhishek 2026-01-23 18:53:59 +05:30 committed by GitHub
parent a4367bd83b
commit 911c5ed416
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
104 changed files with 16919 additions and 597 deletions

View file

@ -170,13 +170,6 @@ class CampaignCallDispatcher:
)
raise ValueError(f"Workflow {campaign.workflow_id} not found")
# Merge context variables (queued_run context already includes retry info if applicable)
initial_context = {
**workflow.template_context_variables,
**queued_run.context_variables,
"campaign_id": campaign.id,
}
# Extract phone number
phone_number = queued_run.context_variables.get("phone_number")
if not phone_number:
@ -186,13 +179,25 @@ class CampaignCallDispatcher:
)
raise ValueError(f"No phone number in queued run {queued_run.id}")
# Create workflow run with queued_run_id tracking
workflow_run_name = f"WR-CAMPAIGN-{campaign.id}-{queued_run.id}"
# Get provider first to determine the mode
provider = await self.get_telephony_provider(campaign.organization_id)
workflow_run_mode = provider.PROVIDER_NAME
logger.info(f"Provider name: {provider.PROVIDER_NAME}")
logger.info(f"Queued run context: {queued_run.context_variables}")
# Merge context variables (queued_run context already includes retry info if applicable)
initial_context = {
**workflow.template_context_variables,
**queued_run.context_variables,
"campaign_id": campaign.id,
"provider": provider.PROVIDER_NAME,
}
logger.info(f"Final initial_context: {initial_context}")
# Create workflow run with queued_run_id tracking
workflow_run_name = f"WR-CAMPAIGN-{campaign.id}-{queued_run.id}"
try:
workflow_run = await db_client.create_workflow_run(
name=workflow_run_name,
@ -243,6 +248,8 @@ class CampaignCallDispatcher:
to_number=phone_number,
webhook_url=webhook_url,
workflow_run_id=workflow_run.id,
workflow_id=campaign.workflow_id,
user_id=campaign.created_by,
)
# Store provider type and metadata in gathered_context

View file

@ -300,7 +300,7 @@ TTSConfig = Annotated[
###################################################### STT ########################################################################
DEEPGRAM_STT_MODELS = ["nova-2", "nova-3-general"]
DEEPGRAM_STT_MODELS = ["nova-2", "nova-3-general", "flux-general-en"]
DEEPGRAM_LANGUAGES = [
"multi",
"en",

View file

@ -103,7 +103,6 @@ class LoopTalkPipelineBuilder:
# Set the context and audio_buffer after creation
engine.set_context(context)
engine.set_audio_buffer(audio_buffer)
context_aggregator = LLMContextAggregatorPair(context)

View file

@ -12,9 +12,8 @@ from pipecat.frames.frames import (
Frame,
InputAudioRawFrame,
OutputAudioRawFrame,
StartFrame,
)
from pipecat.serializers.base_serializer import FrameSerializer, FrameSerializerType
from pipecat.serializers.base_serializer import FrameSerializer
class InternalFrameSerializer(FrameSerializer):
@ -24,15 +23,6 @@ class InternalFrameSerializer(FrameSerializer):
preventing control frames from creating infinite loops.
"""
@property
def type(self) -> FrameSerializerType:
"""Internal transport uses binary frames."""
return FrameSerializerType.BINARY
async def setup(self, frame: StartFrame):
"""No setup required for internal transport."""
pass
async def serialize(self, frame: Frame) -> bytes | None:
"""Only serialize audio frames for transmission between agents."""
# Only pass audio frames between agents

View file

@ -22,16 +22,21 @@ from pipecat.pipeline.task import PipelineTask
from pipecat.processors.audio.audio_buffer_processor import AudioBufferProcessor
def register_transport_event_handlers(
def register_event_handlers(
task: PipelineTask,
transport,
workflow_run_id,
workflow_run_id: int,
engine: PipecatEngine,
audio_buffer: AudioBufferProcessor,
in_memory_logs_buffer: InMemoryLogsBuffer,
pipeline_metrics_aggregator: PipelineMetricsAggregator,
audio_config=AudioConfig,
):
"""Register event handlers for transport events"""
"""Register all event handlers for transport and task events.
Returns:
Tuple of (in_memory_audio_buffer, in_memory_transcript_buffer) for use by other handlers.
"""
# Initialize in-memory buffers with proper audio configuration
sample_rate = audio_config.pipeline_sample_rate if audio_config else 16000
num_channels = 1 # Pipeline audio is always mono
@ -48,13 +53,35 @@ def register_transport_event_handlers(
)
in_memory_transcript_buffer = InMemoryTranscriptBuffer(workflow_run_id)
# Track both events to ensure LLM is only triggered after both occur
ready_state = {
"pipeline_started": False,
"client_connected": False,
"llm_triggered": False,
}
async def maybe_trigger_llm():
"""Trigger LLM only after both pipeline_started and client_connected events."""
if (
ready_state["pipeline_started"]
and ready_state["client_connected"]
and not ready_state["llm_triggered"]
):
ready_state["llm_triggered"] = True
logger.debug(
"Both pipeline_started and client_connected received - triggering initial LLM generation"
)
await engine.llm.queue_frame(LLMContextFrame(engine.context))
@transport.event_handler("on_client_connected")
async def on_client_connected(transport, participant):
logger.debug("In on_client_connected callback handler - initializing workflow")
async def on_client_connected(_transport, _participant):
logger.debug("In on_client_connected callback handler")
await audio_buffer.start_recording()
ready_state["client_connected"] = True
await maybe_trigger_llm()
@transport.event_handler("on_client_disconnected")
async def on_client_disconnected(transport, participant):
async def on_client_disconnected(_transport, _participant):
call_disposed = engine.is_call_disposed()
logger.debug(
@ -69,33 +96,16 @@ def register_transport_event_handlers(
if not call_disposed:
await task.cancel()
# Return the buffers so they can be passed to other handlers
return in_memory_audio_buffer, in_memory_transcript_buffer
def register_task_event_handler(
workflow_run_id: int,
engine: PipecatEngine,
task: PipelineTask,
transport,
audio_buffer: AudioBufferProcessor,
in_memory_audio_buffer: InMemoryAudioBuffer,
in_memory_transcript_buffer: InMemoryTranscriptBuffer,
in_memory_logs_buffer: InMemoryLogsBuffer,
pipeline_metrics_aggregator: PipelineMetricsAggregator,
):
@task.event_handler("on_pipeline_started")
async def on_pipeline_started(task: PipelineTask, frame: Frame):
logger.debug(
"In on_pipeline_started callback handler - triggering initial LLM generation"
)
# Trigger initial LLM generation after pipeline has started
await engine.llm.queue_frame(LLMContextFrame(engine.context))
async def on_pipeline_started(_task: PipelineTask, _frame: Frame):
logger.debug("In on_pipeline_started callback handler")
ready_state["pipeline_started"] = True
await maybe_trigger_llm()
@task.event_handler("on_pipeline_finished")
async def on_pipeline_finished(
task: PipelineTask,
frame: Frame,
_frame: Frame,
):
logger.debug(f"In on_pipeline_finished callback handler")
@ -207,14 +217,13 @@ def register_task_event_handler(
if workflow_run and workflow_run.campaign_id:
await campaign_call_dispatcher.release_call_slot(workflow_run_id)
# Write buffers to temp files and enqueue S3 upload
# Write buffers to temp files and enqueue combined processing task
audio_temp_path = None
transcript_temp_path = None
try:
# Only upload if buffers have content
if not in_memory_audio_buffer.is_empty:
audio_temp_path = await in_memory_audio_buffer.write_to_temp_file()
await enqueue_job(
FunctionNames.UPLOAD_AUDIO_TO_S3, workflow_run_id, audio_temp_path
)
else:
logger.debug("Audio buffer is empty, skipping upload")
@ -222,11 +231,6 @@ def register_task_event_handler(
transcript_temp_path = (
await in_memory_transcript_buffer.write_to_temp_file()
)
await enqueue_job(
FunctionNames.UPLOAD_TRANSCRIPT_TO_S3,
workflow_run_id,
transcript_temp_path,
)
else:
logger.debug("Transcript buffer is empty, skipping upload")
@ -234,10 +238,18 @@ def register_task_event_handler(
logger.error(f"Error preparing buffers for S3 upload: {e}", exc_info=True)
await enqueue_job(FunctionNames.CALCULATE_WORKFLOW_RUN_COST, workflow_run_id)
# Combined task: uploads artifacts then runs integrations sequentially
await enqueue_job(
FunctionNames.RUN_INTEGRATIONS_POST_WORKFLOW_RUN, workflow_run_id
FunctionNames.PROCESS_WORKFLOW_COMPLETION,
workflow_run_id,
audio_temp_path,
transcript_temp_path,
)
# Return the buffers so they can be passed to other handlers
return in_memory_audio_buffer, in_memory_transcript_buffer
def register_audio_data_handler(
audio_buffer: AudioBufferProcessor,
@ -260,18 +272,26 @@ def register_audio_data_handler(
# Could implement overflow to disk here if needed
def register_transcript_handler(
transcript, workflow_run_id, in_memory_buffer: InMemoryTranscriptBuffer
def register_transcript_handlers(
user_aggregator,
assistant_aggregator,
workflow_run_id,
in_memory_buffer: InMemoryTranscriptBuffer,
):
"""Register event handler for transcript updates"""
"""Register event handlers for transcript updates on context aggregators.
@transcript.event_handler("on_transcript_update")
async def on_transcript_update(processor, frame):
transcript_text = ""
for msg in frame.messages:
timestamp = f"[{msg.timestamp}] " if msg.timestamp else ""
line = f"{timestamp}{msg.role}: {msg.content}\n"
transcript_text += line
Uses the on_user_turn_stopped and on_assistant_turn_stopped events to capture
transcripts as turns complete, following the event-based pattern.
"""
# Use in-memory buffer
await in_memory_buffer.append(transcript_text)
@user_aggregator.event_handler("on_user_turn_stopped")
async def on_user_turn_stopped(aggregator, strategy, message):
timestamp = f"[{message.timestamp}] " if message.timestamp else ""
line = f"{timestamp}user: {message.content}\n"
await in_memory_buffer.append(line)
@assistant_aggregator.event_handler("on_assistant_turn_stopped")
async def on_assistant_turn_stopped(aggregator, message):
timestamp = f"[{message.timestamp}] " if message.timestamp else ""
line = f"{timestamp}assistant: {message.content}\n"
await in_memory_buffer.append(line)

View file

@ -1,5 +1,4 @@
import os
from typing import TYPE_CHECKING
from loguru import logger
@ -11,14 +10,10 @@ from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.task import PipelineParams, PipelineTask
from pipecat.processors.aggregators.llm_context import LLMContext
from pipecat.processors.audio.audio_buffer_processor import AudioBufferProcessor
from pipecat.processors.transcript_processor import TranscriptProcessor
from pipecat.utils.context import turn_var
if TYPE_CHECKING:
from api.services.workflow.pipecat_engine import PipecatEngine
def create_pipeline_components(audio_config: AudioConfig, engine: "PipecatEngine"):
def create_pipeline_components(audio_config: AudioConfig):
"""Create and return the main pipeline components with proper audio configuration"""
logger.info(f"Creating pipeline components with audio config: {audio_config}")
@ -28,28 +23,21 @@ def create_pipeline_components(audio_config: AudioConfig, engine: "PipecatEngine
buffer_size=audio_config.buffer_size_bytes,
)
transcript = TranscriptProcessor(
assistant_correct_aggregation_callback=engine.create_aggregation_correction_callback()
)
context = LLMContext()
return audio_buffer, transcript, context
return audio_buffer, context
def build_pipeline(
transport,
stt,
transcript,
audio_buffer,
llm,
tts,
user_context_aggregator,
assistant_context_aggregator,
pipeline_engine_callback_processor,
stt_mute_filter,
pipeline_metrics_aggregator,
user_idle_disconnect,
voicemail_detector=None,
):
"""Build the main pipeline with all components.
@ -63,7 +51,7 @@ def build_pipeline(
# Build processors list with optional voicemail detection
processors = [
transport.input(), # Transport user input
stt, # STT (audio_passthrough=True by default, passes InputAudioRawFrame)
stt,
]
# Insert voicemail detector after STT if enabled
@ -76,16 +64,12 @@ def build_pipeline(
# Continue with the rest of the pipeline
processors.extend(
[
stt_mute_filter, # STTMuteFilters don't let VAD related events pass through if muted
user_idle_disconnect,
transcript.user(),
user_context_aggregator,
llm, # LLM
pipeline_engine_callback_processor,
tts, # TTS
transport.output(), # Transport bot output
audio_buffer, # AudioBufferProcessor - records both input and output audio
transcript.assistant(),
assistant_context_aggregator, # Assistant spoken responses
pipeline_metrics_aggregator,
]
@ -98,7 +82,6 @@ def create_pipeline_task(pipeline, workflow_run_id, audio_config: AudioConfig =
"""Create a pipeline task with appropriate parameters"""
# Set up pipeline params with audio configuration if provided
pipeline_params = PipelineParams(
allow_interruptions=True,
enable_metrics=True,
enable_usage_metrics=True,
send_initial_empty_metrics=False,
@ -119,6 +102,7 @@ def create_pipeline_task(pipeline, workflow_run_id, audio_config: AudioConfig =
pipeline,
params=pipeline_params,
enable_tracing=ENABLE_TRACING,
enable_rtvi=False,
conversation_id=f"{workflow_run_id}",
)

View file

@ -7,12 +7,12 @@ from loguru import logger
from api.db import db_client
from api.db.models import WorkflowModel
from api.enums import WorkflowRunMode
from api.services.configuration.registry import ServiceProviders
from api.services.pipecat.audio_config import AudioConfig, create_audio_config
from api.services.pipecat.event_handlers import (
register_audio_data_handler,
register_task_event_handler,
register_transcript_handler,
register_transport_event_handlers,
register_event_handlers,
register_transcript_handlers,
)
from api.services.pipecat.in_memory_buffers import InMemoryLogsBuffer
from api.services.pipecat.pipeline_builder import (
@ -46,20 +46,25 @@ from api.services.workflow.pipecat_engine import PipecatEngine
from api.services.workflow.workflow import WorkflowGraph
from pipecat.extensions.voicemail.voicemail_detector import VoicemailDetector
from pipecat.pipeline.base_task import PipelineTaskParams
from pipecat.processors.aggregators.llm_response import (
from pipecat.processors.aggregators.llm_response_universal import (
LLMAssistantAggregatorParams,
LLMContextAggregatorPair,
LLMUserAggregatorParams,
)
from pipecat.processors.aggregators.llm_response_universal import (
LLMContextAggregatorPair,
)
from pipecat.processors.filters.stt_mute_filter import (
STTMuteConfig,
STTMuteFilter,
STTMuteStrategy,
)
from pipecat.processors.user_idle_processor import UserIdleProcessor
from pipecat.transports.smallwebrtc.connection import SmallWebRTCConnection
from pipecat.turns.user_mute import MuteUntilFirstBotCompleteUserMuteStrategy
from pipecat.turns.user_start import (
ExternalUserTurnStartStrategy,
TranscriptionUserTurnStartStrategy,
)
from pipecat.turns.user_start.vad_user_turn_start_strategy import (
VADUserTurnStartStrategy,
)
from pipecat.turns.user_stop import (
ExternalUserTurnStopStrategy,
TranscriptionUserTurnStopStrategy,
)
from pipecat.turns.user_turn_strategies import UserTurnStrategies
from pipecat.utils.context import set_current_run_id
from pipecat.utils.enums import EndTaskReason
from pipecat.utils.tracing.context_registry import ContextProviderRegistry
@ -517,12 +522,11 @@ async def _run_pipeline(
embeddings_model=embeddings_model,
)
# Create pipeline components with audio configuration and engine
audio_buffer, transcript, context = create_pipeline_components(audio_config, engine)
# Create pipeline components with audio configuration
audio_buffer, context = create_pipeline_components(audio_config)
# Set the context and audio_buffer after creation
engine.set_context(context)
engine.set_audio_buffer(audio_buffer)
# Set Stasis connection for immediate transfers (if available)
if stasis_connection:
@ -532,7 +536,31 @@ async def _run_pipeline(
expect_stripped_words=True,
correct_aggregation_callback=engine.create_aggregation_correction_callback(),
)
user_params = LLMUserAggregatorParams(enable_emulated_vad_interruptions=True)
# Configure turn strategies based on STT provider and model
# Deepgram Flux uses external turn detection (VAD + External start/stop)
# Other models use transcription-based turn detection with smart turn analyzer
is_deepgram_flux = (
user_config.stt.provider == ServiceProviders.DEEPGRAM.value
and user_config.stt.model == "flux-general-en"
)
if is_deepgram_flux:
user_turn_strategies = UserTurnStrategies(
start=[VADUserTurnStartStrategy(), ExternalUserTurnStartStrategy()],
stop=[ExternalUserTurnStopStrategy()],
)
else:
user_turn_strategies = UserTurnStrategies(
start=[VADUserTurnStartStrategy(), TranscriptionUserTurnStartStrategy()],
stop=[TranscriptionUserTurnStopStrategy()],
)
user_params = LLMUserAggregatorParams(
user_turn_strategies=user_turn_strategies,
user_mute_strategies=[MuteUntilFirstBotCompleteUserMuteStrategy()],
user_idle_timeout=max_user_idle_timeout,
)
context_aggregator = LLMContextAggregatorPair(
context, assistant_params=assistant_params, user_params=user_params
)
@ -547,25 +575,20 @@ async def _run_pipeline(
pipeline_metrics_aggregator = PipelineMetricsAggregator()
# Create STT mute filter using the selected strategies and the engine's callback
stt_mute_filter = STTMuteFilter(
config=STTMuteConfig(
strategies={
STTMuteStrategy.MUTE_UNTIL_FIRST_BOT_COMPLETE,
STTMuteStrategy.CUSTOM,
},
should_mute_callback=engine.create_should_mute_callback(),
)
)
# Use engine's user idle callback with configured timeout
user_idle_disconnect = UserIdleProcessor(
callback=engine.create_user_idle_callback(), timeout=max_user_idle_timeout
)
user_context_aggregator = context_aggregator.user()
assistant_context_aggregator = context_aggregator.assistant()
# Register user idle event handlers
user_idle_handler = engine.create_user_idle_handler()
@user_context_aggregator.event_handler("on_user_turn_idle")
async def on_user_turn_idle(aggregator):
await user_idle_handler.handle_idle(aggregator)
@user_context_aggregator.event_handler("on_user_turn_started")
async def on_user_turn_started(aggregator, strategy):
user_idle_handler.reset()
# Create voicemail detector if enabled in the workflow's start node
voicemail_detector = None
start_node = workflow_graph.nodes.get(workflow_graph.start_node_id)
@ -592,16 +615,13 @@ async def _run_pipeline(
pipeline = build_pipeline(
transport,
stt,
transcript,
audio_buffer,
llm,
tts,
user_context_aggregator,
assistant_context_aggregator,
pipeline_engine_callback_processor,
stt_mute_filter,
pipeline_metrics_aggregator,
user_idle_disconnect,
voicemail_detector=voicemail_detector,
)
@ -614,18 +634,6 @@ async def _run_pipeline(
# Initialize the engine to set the initial context
await engine.initialize()
# Register event handlers
in_memory_audio_buffer, in_memory_transcript_buffer = (
register_transport_event_handlers(
task,
transport,
workflow_run_id,
engine=engine,
audio_buffer=audio_buffer,
audio_config=audio_config,
)
)
# Add real-time feedback observer if WebSocket sender is available
# Note: ws_sender was already fetched earlier for node_transition_callback
if ws_sender:
@ -635,21 +643,24 @@ async def _run_pipeline(
)
task.add_observer(feedback_observer)
register_task_event_handler(
workflow_run_id,
engine,
# Register event handlers
in_memory_audio_buffer, in_memory_transcript_buffer = register_event_handlers(
task,
transport,
audio_buffer,
in_memory_audio_buffer,
in_memory_transcript_buffer,
in_memory_logs_buffer,
pipeline_metrics_aggregator,
workflow_run_id,
engine=engine,
audio_buffer=audio_buffer,
in_memory_logs_buffer=in_memory_logs_buffer,
pipeline_metrics_aggregator=pipeline_metrics_aggregator,
audio_config=audio_config,
)
register_audio_data_handler(audio_buffer, workflow_run_id, in_memory_audio_buffer)
register_transcript_handler(
transcript, workflow_run_id, in_memory_transcript_buffer
register_transcript_handlers(
user_context_aggregator,
assistant_context_aggregator,
workflow_run_id,
in_memory_transcript_buffer,
)
try:

View file

@ -7,6 +7,7 @@ from api.constants import MPS_API_URL
from api.services.configuration.registry import ServiceProviders
from pipecat.services.azure.llm import AzureLLMService
from pipecat.services.cartesia.stt import CartesiaSTTService
from pipecat.services.deepgram.flux.stt import DeepgramFluxSTTService
from pipecat.services.deepgram.stt import DeepgramSTTService, LiveOptions
from pipecat.services.deepgram.tts import DeepgramTTSService
from pipecat.services.dograh.llm import DograhLLMService
@ -34,6 +35,20 @@ def create_stt_service(user_config):
f"Creating STT service: provider={user_config.stt.provider}, model={user_config.stt.model}"
)
if user_config.stt.provider == ServiceProviders.DEEPGRAM.value:
# Check if using Flux model (English-only, no language selection)
if user_config.stt.model == "flux-general-en":
logger.debug("Using DeepGram Flux Model")
return DeepgramFluxSTTService(
api_key=user_config.stt.api_key,
model=user_config.stt.model,
params=DeepgramFluxSTTService.InputParams(
eot_timeout_ms=3000,
eot_threshold=0.7,
),
should_interrupt=False, # Let UserAggregator take care of sending InterruptionFrame
)
# Other models than flux
# Use language from user config, defaulting to "multi" for multilingual support
language = getattr(user_config.stt, "language", None) or "multi"
live_options = LiveOptions(
@ -44,7 +59,9 @@ def create_stt_service(user_config):
)
logger.debug(f"Using DeepGram Model - {user_config.stt.model}")
return DeepgramSTTService(
live_options=live_options, api_key=user_config.stt.api_key
live_options=live_options,
api_key=user_config.stt.api_key,
should_interrupt=False, # Let UserAggregator take care of sending InterruptionFrame
)
elif user_config.stt.provider == ServiceProviders.OPENAI.value:
return OpenAISTTService(

View file

@ -2,10 +2,9 @@ import os
from fastapi import WebSocket
from api.constants import APP_ROOT_DIR, ENABLE_RNNOISE, ENABLE_SMART_TURN
from api.constants import APP_ROOT_DIR
from api.db import db_client
from api.enums import OrganizationConfigurationKey
from api.services.looptalk.internal_transport import InternalTransport
from api.services.pipecat.audio_config import AudioConfig
from api.services.telephony.stasis_rtp_connection import StasisRTPConnection
from api.services.telephony.stasis_rtp_serializer import StasisRTPFrameSerializer
@ -13,11 +12,8 @@ from api.services.telephony.stasis_rtp_transport import (
StasisRTPTransport,
StasisRTPTransportParams,
)
from pipecat.audio.filters.rnnoise_filter import RNNoiseFilter
from pipecat.audio.mixers.silence_mixer import SilenceAudioMixer
from pipecat.audio.mixers.soundfile_mixer import SoundfileMixer
from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
from pipecat.audio.vad.silero import SileroVADAnalyzer, VADParams
from pipecat.serializers.twilio import TwilioFrameSerializer
from pipecat.serializers.vobiz import VobizFrameSerializer
@ -35,19 +31,6 @@ librnnoise_path = os.path.normpath(
)
def create_turn_analyzer(workflow_run_id: int, audio_config: AudioConfig):
"""Create a turn analyzer backed by the local Smart Turn HTTP service.
Args:
workflow_run_id: ID of the workflow run for turn analyzer context
audio_config: Audio configuration containing pipeline sample rate
"""
if ENABLE_SMART_TURN:
return LocalSmartTurnAnalyzerV3(params=SmartTurnParams())
return None
async def create_twilio_transport(
websocket_client: WebSocket,
stream_sid: str,
@ -78,8 +61,6 @@ async def create_twilio_transport(
f"Incomplete Twilio configuration for organization {organization_id}"
)
turn_analyzer = create_turn_analyzer(workflow_run_id, audio_config)
serializer = TwilioFrameSerializer(
stream_sid=stream_sid,
call_sid=call_sid,
@ -119,11 +100,7 @@ async def create_twilio_transport(
if ambient_noise_config and ambient_noise_config.get("enabled", False)
else SilenceAudioMixer()
),
turn_analyzer=turn_analyzer,
serializer=serializer,
audio_in_filter=RNNoiseFilter(library_path=librnnoise_path)
if ENABLE_RNNOISE
else None,
),
)
@ -158,8 +135,6 @@ async def create_cloudonix_transport(
f"Required: bearer_token, domain_id"
)
turn_analyzer = create_turn_analyzer(workflow_run_id, audio_config)
from pipecat.serializers.cloudonix import CloudonixFrameSerializer
serializer = CloudonixFrameSerializer(
@ -202,11 +177,7 @@ async def create_cloudonix_transport(
if ambient_noise_config and ambient_noise_config.get("enabled", False)
else SilenceAudioMixer()
),
turn_analyzer=turn_analyzer,
serializer=serializer,
audio_in_filter=RNNoiseFilter(library_path=librnnoise_path)
if ENABLE_RNNOISE
else None,
),
)
@ -238,8 +209,6 @@ async def create_vonage_transport(
f"Incomplete Vonage configuration for organization {organization_id}"
)
turn_analyzer = create_turn_analyzer(workflow_run_id, audio_config)
serializer = VonageFrameSerializer(
call_uuid=call_uuid,
application_id=application_id,
@ -283,11 +252,7 @@ async def create_vonage_transport(
if ambient_noise_config and ambient_noise_config.get("enabled", False)
else SilenceAudioMixer()
),
turn_analyzer=turn_analyzer,
serializer=serializer,
audio_in_filter=RNNoiseFilter(library_path=librnnoise_path)
if ENABLE_RNNOISE
else None,
),
)
@ -337,8 +302,6 @@ async def create_vobiz_transport(
f"from_numbers={len(config.get('from_numbers', []))} numbers"
)
turn_analyzer = create_turn_analyzer(workflow_run_id, audio_config)
# Use VobizFrameSerializer for Vobiz WebSocket protocol
serializer = VobizFrameSerializer(
stream_id=stream_id,
@ -389,11 +352,7 @@ async def create_vobiz_transport(
if ambient_noise_config and ambient_noise_config.get("enabled", False)
else SilenceAudioMixer()
),
turn_analyzer=turn_analyzer,
serializer=serializer,
audio_in_filter=RNNoiseFilter(library_path=librnnoise_path)
if ENABLE_RNNOISE
else None,
),
)
@ -411,7 +370,6 @@ def create_webrtc_transport(
ambient_noise_config: dict | None = None,
):
"""Create a transport for WebRTC connections"""
turn_analyzer = create_turn_analyzer(workflow_run_id, audio_config)
return SmallWebRTCTransport(
webrtc_connection=webrtc_connection,
@ -445,10 +403,6 @@ def create_webrtc_transport(
if ambient_noise_config and ambient_noise_config.get("enabled", False)
else SilenceAudioMixer()
),
turn_analyzer=turn_analyzer,
audio_in_filter=RNNoiseFilter(library_path=librnnoise_path)
if ENABLE_RNNOISE
else None,
),
)
@ -461,7 +415,6 @@ def create_stasis_transport(
ambient_noise_config: dict | None = None,
):
"""Create a transport for ARI connections"""
turn_analyzer = create_turn_analyzer(workflow_run_id, audio_config)
serializer = StasisRTPFrameSerializer(
StasisRTPFrameSerializer.InputParams(
@ -502,11 +455,7 @@ def create_stasis_transport(
if ambient_noise_config and ambient_noise_config.get("enabled", False)
else SilenceAudioMixer()
),
turn_analyzer=turn_analyzer,
serializer=serializer,
audio_in_filter=RNNoiseFilter(library_path=librnnoise_path)
if ENABLE_RNNOISE
else None,
),
)
@ -528,46 +477,44 @@ def create_internal_transport(
Returns:
InternalTransport instance configured with turn analyzer
"""
turn_analyzer = create_turn_analyzer(workflow_run_id, audio_config)
pass
# Commented out because looptalk coming in the regular import flow
# was causing issue. May be move this to looptalk/orchestrator.py
# Create and return the internal transport with latency
return InternalTransport(
params=TransportParams(
audio_out_enabled=True,
audio_out_sample_rate=audio_config.transport_out_sample_rate,
audio_out_channels=1,
audio_in_enabled=True,
audio_in_sample_rate=audio_config.transport_in_sample_rate,
audio_in_channels=1,
vad_analyzer=(
SileroVADAnalyzer(
params=VADParams(
confidence=vad_config.get("confidence", 0.7),
start_secs=vad_config.get("start_seconds", 0.4),
stop_secs=vad_config.get("stop_seconds", 0.8),
min_volume=vad_config.get("minimum_volume", 0.6),
)
)
if vad_config
else SileroVADAnalyzer()
),
audio_out_mixer=(
SoundfileMixer(
sound_files={
"office": APP_ROOT_DIR
/ "assets"
/ f"office-ambience-{audio_config.transport_out_sample_rate}-mono.wav"
},
default_sound="office",
volume=ambient_noise_config.get("volume", 0.3),
)
if ambient_noise_config and ambient_noise_config.get("enabled", False)
else SilenceAudioMixer()
),
turn_analyzer=turn_analyzer,
audio_in_filter=RNNoiseFilter(library_path=librnnoise_path)
if ENABLE_RNNOISE
else None,
),
latency_seconds=latency_seconds,
)
# return InternalTransport(
# params=TransportParams(
# audio_out_enabled=True,
# audio_out_sample_rate=audio_config.transport_out_sample_rate,
# audio_out_channels=1,
# audio_in_enabled=True,
# audio_in_sample_rate=audio_config.transport_in_sample_rate,
# audio_in_channels=1,
# vad_analyzer=(
# SileroVADAnalyzer(
# params=VADParams(
# confidence=vad_config.get("confidence", 0.7),
# start_secs=vad_config.get("start_seconds", 0.4),
# stop_secs=vad_config.get("stop_seconds", 0.8),
# min_volume=vad_config.get("minimum_volume", 0.6),
# )
# )
# if vad_config
# else SileroVADAnalyzer()
# ),
# audio_out_mixer=(
# SoundfileMixer(
# sound_files={
# "office": APP_ROOT_DIR
# / "assets"
# / f"office-ambience-{audio_config.transport_out_sample_rate}-mono.wav"
# },
# default_sound="office",
# volume=ambient_noise_config.get("volume", 0.3),
# )
# if ambient_noise_config and ambient_noise_config.get("enabled", False)
# else SilenceAudioMixer()
# ),
# ),
# latency_seconds=latency_seconds,
# )

View file

@ -15,6 +15,8 @@ The serializer:
from typing import Optional
from loguru import logger
from pydantic import BaseModel
from pipecat.audio.utils import create_default_resampler, pcm_to_ulaw, ulaw_to_pcm
from pipecat.frames.frames import (
AudioRawFrame,
@ -22,8 +24,7 @@ from pipecat.frames.frames import (
InputAudioRawFrame,
StartFrame,
)
from pipecat.serializers.base_serializer import FrameSerializer, FrameSerializerType
from pydantic import BaseModel
from pipecat.serializers.base_serializer import FrameSerializer
class StasisRTPFrameSerializer(FrameSerializer):
@ -59,11 +60,6 @@ class StasisRTPFrameSerializer(FrameSerializer):
# Resampler shared between encode / decode paths
self._resampler = create_default_resampler()
@property
def type(self) -> FrameSerializerType:
"""Stasis uses raw bytes → BINARY."""
return FrameSerializerType.BINARY
async def setup(self, frame: StartFrame):
"""Remember pipeline configuration."""
self._sample_rate = self._params.sample_rate or frame.audio_in_sample_rate

View file

@ -19,7 +19,6 @@ from pipecat.utils.enums import EndTaskReason
if TYPE_CHECKING:
from api.services.telephony.stasis_rtp_connection import StasisRTPConnection
from pipecat.processors.audio.audio_buffer_processor import AudioBuffer
from pipecat.services.anthropic.llm import AnthropicLLMService
from pipecat.services.google.llm import GoogleLLMService
from pipecat.services.openai.llm import OpenAILLMService
@ -64,7 +63,6 @@ class PipecatEngine:
transport: Optional[BaseTransport] = None,
workflow: WorkflowGraph,
call_context_vars: dict,
audio_buffer: Optional["AudioBuffer"] = None,
workflow_run_id: Optional[int] = None,
node_transition_callback: Optional[
Callable[[str, Optional[str]], Awaitable[None]]
@ -78,7 +76,6 @@ class PipecatEngine:
self.transport = transport
self.workflow = workflow
self._call_context_vars = call_context_vars
self._audio_buffer = audio_buffer
self._workflow_run_id = workflow_run_id
self._node_transition_callback = node_transition_callback
self._initialized = False
@ -204,6 +201,7 @@ class PipecatEngine:
logger.info(f"Arguments: {function_call_params.arguments}")
await self.set_node(transition_to_node)
try:
async def on_context_updated() -> None:
"""
pipecat framework will run this function after the function call result has been updated in the context.
@ -215,6 +213,12 @@ class PipecatEngine:
self._current_node
)
# Queue EndFrame if we just transitioned to EndNode
if self._current_node.is_end:
await self.send_end_task_frame(
EndTaskReason.USER_QUALIFIED.value
)
result = {"status": "done"}
properties = FunctionCallResultProperties(
@ -478,8 +482,6 @@ class PipecatEngine:
if node.extraction_enabled and node.extraction_variables:
await self._perform_variable_extraction_if_needed(node)
await self.send_end_task_frame(EndTaskReason.USER_QUALIFIED.value)
async def _handle_agent_node(self, node: Node) -> None:
"""Handle agent node execution."""
if node.is_static:
@ -680,12 +682,12 @@ class PipecatEngine:
"""
return engine_callbacks.create_should_mute_callback(self)
def create_user_idle_callback(self):
def create_user_idle_handler(self):
"""
This callback is called when the user is idle for a certain duration.
We use this to either play the static text or end the call
Returns a UserIdleHandler that manages user-idle timeouts with state.
The handler tracks retry count and handles escalating prompts.
"""
return engine_callbacks.create_user_idle_callback(self)
return engine_callbacks.create_user_idle_handler(self)
def create_max_duration_callback(self):
"""
@ -721,14 +723,6 @@ class PipecatEngine:
"""
self.task = task
def set_audio_buffer(self, audio_buffer: "AudioBuffer") -> None:
"""Set the audio buffer.
This allows setting the audio buffer after the engine has been created,
which is useful when the audio buffer needs to be created after the engine.
"""
self._audio_buffer = audio_buffer
def set_stasis_connection(
self, connection: Optional["StasisRTPConnection"]
) -> None:

View file

@ -23,7 +23,6 @@ from pipecat.utils.enums import EndTaskReason
if TYPE_CHECKING:
from api.services.workflow.pipecat_engine import PipecatEngine
from pipecat.processors.user_idle_processor import UserIdleProcessor
# ---------------------------------------------------------------------------
@ -57,33 +56,43 @@ def create_should_mute_callback(
# ---------------------------------------------------------------------------
def create_user_idle_callback(engine: "PipecatEngine"):
"""Return a callback that handles user-idle timeouts."""
class UserIdleHandler:
"""Helper class to manage user idle retry logic with state."""
async def handle_user_idle(
user_idle: "UserIdleProcessor", retry_count: int
) -> bool:
logger.debug(f"Handling user_idle, attempt: {retry_count}")
def __init__(self, engine: "PipecatEngine"):
self._engine = engine
self._retry_count = 0
if retry_count == 1:
def reset(self):
"""Reset the retry count when user becomes active."""
self._retry_count = 0
async def handle_idle(self, aggregator):
"""Handle user idle event with escalating prompts."""
self._retry_count += 1
logger.debug(f"Handling user_idle, attempt: {self._retry_count}")
if self._retry_count == 1:
message = {
"role": "system",
"content": "The user has been quiet. Politely and briefly ask if they're still there in the language that the user has been speaking so far.",
}
await user_idle.push_frame(LLMMessagesAppendFrame([message], run_llm=True))
return True
await aggregator.push_frame(LLMMessagesAppendFrame([message], run_llm=True))
return
message = {
"role": "system",
"content": "The user has been quiet. We will be disconnecting the call now. Wish them a good day in the language that the user has been speaking so far.",
}
await user_idle.push_frame(LLMMessagesAppendFrame([message], run_llm=True))
await engine.send_end_task_frame(
await aggregator.push_frame(LLMMessagesAppendFrame([message], run_llm=True))
await self._engine.send_end_task_frame(
EndTaskReason.USER_IDLE_MAX_DURATION_EXCEEDED.value
)
return False
return handle_user_idle
def create_user_idle_handler(engine: "PipecatEngine") -> UserIdleHandler:
"""Return a UserIdleHandler that manages user-idle timeouts with state."""
return UserIdleHandler(engine)
# ---------------------------------------------------------------------------