mirror of
https://github.com/dograh-hq/dograh.git
synced 2026-06-28 08:49:42 +02:00
fix: changes to update pipecat version to 0.0.100 (#122)
* feat: add stt evals * add smart turn as provider * chore: remove deprecations * chore: format files * fix: remove deprecated UserIdleProcessor * fix: remove deprecated TranscriptProcessor * chore: update pipecat submodule * feat: add evals visualisation * fix: trigger llm generation on client connected and pipeline started * chore: update pipecat * chore: update pipecat submodule * Add tests * fix: slow loading of workflow page * chore: update pipecat submodule * Show version after release * Fixes #99 * fix: provider check for websocket connection * Fixes #107 * Fix #96 * chore: fix documentation * fix: cloudonix campaign call error --------- Co-authored-by: Sabiha Khan <sabihak89@gmail.com>
This commit is contained in:
parent
a4367bd83b
commit
911c5ed416
104 changed files with 16919 additions and 597 deletions
|
|
@ -170,13 +170,6 @@ class CampaignCallDispatcher:
|
|||
)
|
||||
raise ValueError(f"Workflow {campaign.workflow_id} not found")
|
||||
|
||||
# Merge context variables (queued_run context already includes retry info if applicable)
|
||||
initial_context = {
|
||||
**workflow.template_context_variables,
|
||||
**queued_run.context_variables,
|
||||
"campaign_id": campaign.id,
|
||||
}
|
||||
|
||||
# Extract phone number
|
||||
phone_number = queued_run.context_variables.get("phone_number")
|
||||
if not phone_number:
|
||||
|
|
@ -186,13 +179,25 @@ class CampaignCallDispatcher:
|
|||
)
|
||||
raise ValueError(f"No phone number in queued run {queued_run.id}")
|
||||
|
||||
# Create workflow run with queued_run_id tracking
|
||||
workflow_run_name = f"WR-CAMPAIGN-{campaign.id}-{queued_run.id}"
|
||||
|
||||
# Get provider first to determine the mode
|
||||
provider = await self.get_telephony_provider(campaign.organization_id)
|
||||
workflow_run_mode = provider.PROVIDER_NAME
|
||||
|
||||
logger.info(f"Provider name: {provider.PROVIDER_NAME}")
|
||||
logger.info(f"Queued run context: {queued_run.context_variables}")
|
||||
|
||||
# Merge context variables (queued_run context already includes retry info if applicable)
|
||||
initial_context = {
|
||||
**workflow.template_context_variables,
|
||||
**queued_run.context_variables,
|
||||
"campaign_id": campaign.id,
|
||||
"provider": provider.PROVIDER_NAME,
|
||||
}
|
||||
|
||||
logger.info(f"Final initial_context: {initial_context}")
|
||||
|
||||
# Create workflow run with queued_run_id tracking
|
||||
workflow_run_name = f"WR-CAMPAIGN-{campaign.id}-{queued_run.id}"
|
||||
try:
|
||||
workflow_run = await db_client.create_workflow_run(
|
||||
name=workflow_run_name,
|
||||
|
|
@ -243,6 +248,8 @@ class CampaignCallDispatcher:
|
|||
to_number=phone_number,
|
||||
webhook_url=webhook_url,
|
||||
workflow_run_id=workflow_run.id,
|
||||
workflow_id=campaign.workflow_id,
|
||||
user_id=campaign.created_by,
|
||||
)
|
||||
|
||||
# Store provider type and metadata in gathered_context
|
||||
|
|
|
|||
|
|
@ -300,7 +300,7 @@ TTSConfig = Annotated[
|
|||
###################################################### STT ########################################################################
|
||||
|
||||
|
||||
DEEPGRAM_STT_MODELS = ["nova-2", "nova-3-general"]
|
||||
DEEPGRAM_STT_MODELS = ["nova-2", "nova-3-general", "flux-general-en"]
|
||||
DEEPGRAM_LANGUAGES = [
|
||||
"multi",
|
||||
"en",
|
||||
|
|
|
|||
|
|
@ -103,7 +103,6 @@ class LoopTalkPipelineBuilder:
|
|||
|
||||
# Set the context and audio_buffer after creation
|
||||
engine.set_context(context)
|
||||
engine.set_audio_buffer(audio_buffer)
|
||||
|
||||
context_aggregator = LLMContextAggregatorPair(context)
|
||||
|
||||
|
|
|
|||
|
|
@ -12,9 +12,8 @@ from pipecat.frames.frames import (
|
|||
Frame,
|
||||
InputAudioRawFrame,
|
||||
OutputAudioRawFrame,
|
||||
StartFrame,
|
||||
)
|
||||
from pipecat.serializers.base_serializer import FrameSerializer, FrameSerializerType
|
||||
from pipecat.serializers.base_serializer import FrameSerializer
|
||||
|
||||
|
||||
class InternalFrameSerializer(FrameSerializer):
|
||||
|
|
@ -24,15 +23,6 @@ class InternalFrameSerializer(FrameSerializer):
|
|||
preventing control frames from creating infinite loops.
|
||||
"""
|
||||
|
||||
@property
|
||||
def type(self) -> FrameSerializerType:
|
||||
"""Internal transport uses binary frames."""
|
||||
return FrameSerializerType.BINARY
|
||||
|
||||
async def setup(self, frame: StartFrame):
|
||||
"""No setup required for internal transport."""
|
||||
pass
|
||||
|
||||
async def serialize(self, frame: Frame) -> bytes | None:
|
||||
"""Only serialize audio frames for transmission between agents."""
|
||||
# Only pass audio frames between agents
|
||||
|
|
|
|||
|
|
@ -22,16 +22,21 @@ from pipecat.pipeline.task import PipelineTask
|
|||
from pipecat.processors.audio.audio_buffer_processor import AudioBufferProcessor
|
||||
|
||||
|
||||
def register_transport_event_handlers(
|
||||
def register_event_handlers(
|
||||
task: PipelineTask,
|
||||
transport,
|
||||
workflow_run_id,
|
||||
workflow_run_id: int,
|
||||
engine: PipecatEngine,
|
||||
audio_buffer: AudioBufferProcessor,
|
||||
in_memory_logs_buffer: InMemoryLogsBuffer,
|
||||
pipeline_metrics_aggregator: PipelineMetricsAggregator,
|
||||
audio_config=AudioConfig,
|
||||
):
|
||||
"""Register event handlers for transport events"""
|
||||
"""Register all event handlers for transport and task events.
|
||||
|
||||
Returns:
|
||||
Tuple of (in_memory_audio_buffer, in_memory_transcript_buffer) for use by other handlers.
|
||||
"""
|
||||
# Initialize in-memory buffers with proper audio configuration
|
||||
sample_rate = audio_config.pipeline_sample_rate if audio_config else 16000
|
||||
num_channels = 1 # Pipeline audio is always mono
|
||||
|
|
@ -48,13 +53,35 @@ def register_transport_event_handlers(
|
|||
)
|
||||
in_memory_transcript_buffer = InMemoryTranscriptBuffer(workflow_run_id)
|
||||
|
||||
# Track both events to ensure LLM is only triggered after both occur
|
||||
ready_state = {
|
||||
"pipeline_started": False,
|
||||
"client_connected": False,
|
||||
"llm_triggered": False,
|
||||
}
|
||||
|
||||
async def maybe_trigger_llm():
|
||||
"""Trigger LLM only after both pipeline_started and client_connected events."""
|
||||
if (
|
||||
ready_state["pipeline_started"]
|
||||
and ready_state["client_connected"]
|
||||
and not ready_state["llm_triggered"]
|
||||
):
|
||||
ready_state["llm_triggered"] = True
|
||||
logger.debug(
|
||||
"Both pipeline_started and client_connected received - triggering initial LLM generation"
|
||||
)
|
||||
await engine.llm.queue_frame(LLMContextFrame(engine.context))
|
||||
|
||||
@transport.event_handler("on_client_connected")
|
||||
async def on_client_connected(transport, participant):
|
||||
logger.debug("In on_client_connected callback handler - initializing workflow")
|
||||
async def on_client_connected(_transport, _participant):
|
||||
logger.debug("In on_client_connected callback handler")
|
||||
await audio_buffer.start_recording()
|
||||
ready_state["client_connected"] = True
|
||||
await maybe_trigger_llm()
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
async def on_client_disconnected(transport, participant):
|
||||
async def on_client_disconnected(_transport, _participant):
|
||||
call_disposed = engine.is_call_disposed()
|
||||
|
||||
logger.debug(
|
||||
|
|
@ -69,33 +96,16 @@ def register_transport_event_handlers(
|
|||
if not call_disposed:
|
||||
await task.cancel()
|
||||
|
||||
# Return the buffers so they can be passed to other handlers
|
||||
return in_memory_audio_buffer, in_memory_transcript_buffer
|
||||
|
||||
|
||||
def register_task_event_handler(
|
||||
workflow_run_id: int,
|
||||
engine: PipecatEngine,
|
||||
task: PipelineTask,
|
||||
transport,
|
||||
audio_buffer: AudioBufferProcessor,
|
||||
in_memory_audio_buffer: InMemoryAudioBuffer,
|
||||
in_memory_transcript_buffer: InMemoryTranscriptBuffer,
|
||||
in_memory_logs_buffer: InMemoryLogsBuffer,
|
||||
pipeline_metrics_aggregator: PipelineMetricsAggregator,
|
||||
):
|
||||
@task.event_handler("on_pipeline_started")
|
||||
async def on_pipeline_started(task: PipelineTask, frame: Frame):
|
||||
logger.debug(
|
||||
"In on_pipeline_started callback handler - triggering initial LLM generation"
|
||||
)
|
||||
# Trigger initial LLM generation after pipeline has started
|
||||
await engine.llm.queue_frame(LLMContextFrame(engine.context))
|
||||
async def on_pipeline_started(_task: PipelineTask, _frame: Frame):
|
||||
logger.debug("In on_pipeline_started callback handler")
|
||||
ready_state["pipeline_started"] = True
|
||||
await maybe_trigger_llm()
|
||||
|
||||
@task.event_handler("on_pipeline_finished")
|
||||
async def on_pipeline_finished(
|
||||
task: PipelineTask,
|
||||
frame: Frame,
|
||||
_frame: Frame,
|
||||
):
|
||||
logger.debug(f"In on_pipeline_finished callback handler")
|
||||
|
||||
|
|
@ -207,14 +217,13 @@ def register_task_event_handler(
|
|||
if workflow_run and workflow_run.campaign_id:
|
||||
await campaign_call_dispatcher.release_call_slot(workflow_run_id)
|
||||
|
||||
# Write buffers to temp files and enqueue S3 upload
|
||||
# Write buffers to temp files and enqueue combined processing task
|
||||
audio_temp_path = None
|
||||
transcript_temp_path = None
|
||||
|
||||
try:
|
||||
# Only upload if buffers have content
|
||||
if not in_memory_audio_buffer.is_empty:
|
||||
audio_temp_path = await in_memory_audio_buffer.write_to_temp_file()
|
||||
await enqueue_job(
|
||||
FunctionNames.UPLOAD_AUDIO_TO_S3, workflow_run_id, audio_temp_path
|
||||
)
|
||||
else:
|
||||
logger.debug("Audio buffer is empty, skipping upload")
|
||||
|
||||
|
|
@ -222,11 +231,6 @@ def register_task_event_handler(
|
|||
transcript_temp_path = (
|
||||
await in_memory_transcript_buffer.write_to_temp_file()
|
||||
)
|
||||
await enqueue_job(
|
||||
FunctionNames.UPLOAD_TRANSCRIPT_TO_S3,
|
||||
workflow_run_id,
|
||||
transcript_temp_path,
|
||||
)
|
||||
else:
|
||||
logger.debug("Transcript buffer is empty, skipping upload")
|
||||
|
||||
|
|
@ -234,10 +238,18 @@ def register_task_event_handler(
|
|||
logger.error(f"Error preparing buffers for S3 upload: {e}", exc_info=True)
|
||||
|
||||
await enqueue_job(FunctionNames.CALCULATE_WORKFLOW_RUN_COST, workflow_run_id)
|
||||
|
||||
# Combined task: uploads artifacts then runs integrations sequentially
|
||||
await enqueue_job(
|
||||
FunctionNames.RUN_INTEGRATIONS_POST_WORKFLOW_RUN, workflow_run_id
|
||||
FunctionNames.PROCESS_WORKFLOW_COMPLETION,
|
||||
workflow_run_id,
|
||||
audio_temp_path,
|
||||
transcript_temp_path,
|
||||
)
|
||||
|
||||
# Return the buffers so they can be passed to other handlers
|
||||
return in_memory_audio_buffer, in_memory_transcript_buffer
|
||||
|
||||
|
||||
def register_audio_data_handler(
|
||||
audio_buffer: AudioBufferProcessor,
|
||||
|
|
@ -260,18 +272,26 @@ def register_audio_data_handler(
|
|||
# Could implement overflow to disk here if needed
|
||||
|
||||
|
||||
def register_transcript_handler(
|
||||
transcript, workflow_run_id, in_memory_buffer: InMemoryTranscriptBuffer
|
||||
def register_transcript_handlers(
|
||||
user_aggregator,
|
||||
assistant_aggregator,
|
||||
workflow_run_id,
|
||||
in_memory_buffer: InMemoryTranscriptBuffer,
|
||||
):
|
||||
"""Register event handler for transcript updates"""
|
||||
"""Register event handlers for transcript updates on context aggregators.
|
||||
|
||||
@transcript.event_handler("on_transcript_update")
|
||||
async def on_transcript_update(processor, frame):
|
||||
transcript_text = ""
|
||||
for msg in frame.messages:
|
||||
timestamp = f"[{msg.timestamp}] " if msg.timestamp else ""
|
||||
line = f"{timestamp}{msg.role}: {msg.content}\n"
|
||||
transcript_text += line
|
||||
Uses the on_user_turn_stopped and on_assistant_turn_stopped events to capture
|
||||
transcripts as turns complete, following the event-based pattern.
|
||||
"""
|
||||
|
||||
# Use in-memory buffer
|
||||
await in_memory_buffer.append(transcript_text)
|
||||
@user_aggregator.event_handler("on_user_turn_stopped")
|
||||
async def on_user_turn_stopped(aggregator, strategy, message):
|
||||
timestamp = f"[{message.timestamp}] " if message.timestamp else ""
|
||||
line = f"{timestamp}user: {message.content}\n"
|
||||
await in_memory_buffer.append(line)
|
||||
|
||||
@assistant_aggregator.event_handler("on_assistant_turn_stopped")
|
||||
async def on_assistant_turn_stopped(aggregator, message):
|
||||
timestamp = f"[{message.timestamp}] " if message.timestamp else ""
|
||||
line = f"{timestamp}assistant: {message.content}\n"
|
||||
await in_memory_buffer.append(line)
|
||||
|
|
|
|||
|
|
@ -1,5 +1,4 @@
|
|||
import os
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from loguru import logger
|
||||
|
||||
|
|
@ -11,14 +10,10 @@ from pipecat.pipeline.pipeline import Pipeline
|
|||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
||||
from pipecat.processors.audio.audio_buffer_processor import AudioBufferProcessor
|
||||
from pipecat.processors.transcript_processor import TranscriptProcessor
|
||||
from pipecat.utils.context import turn_var
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from api.services.workflow.pipecat_engine import PipecatEngine
|
||||
|
||||
|
||||
def create_pipeline_components(audio_config: AudioConfig, engine: "PipecatEngine"):
|
||||
def create_pipeline_components(audio_config: AudioConfig):
|
||||
"""Create and return the main pipeline components with proper audio configuration"""
|
||||
logger.info(f"Creating pipeline components with audio config: {audio_config}")
|
||||
|
||||
|
|
@ -28,28 +23,21 @@ def create_pipeline_components(audio_config: AudioConfig, engine: "PipecatEngine
|
|||
buffer_size=audio_config.buffer_size_bytes,
|
||||
)
|
||||
|
||||
transcript = TranscriptProcessor(
|
||||
assistant_correct_aggregation_callback=engine.create_aggregation_correction_callback()
|
||||
)
|
||||
|
||||
context = LLMContext()
|
||||
|
||||
return audio_buffer, transcript, context
|
||||
return audio_buffer, context
|
||||
|
||||
|
||||
def build_pipeline(
|
||||
transport,
|
||||
stt,
|
||||
transcript,
|
||||
audio_buffer,
|
||||
llm,
|
||||
tts,
|
||||
user_context_aggregator,
|
||||
assistant_context_aggregator,
|
||||
pipeline_engine_callback_processor,
|
||||
stt_mute_filter,
|
||||
pipeline_metrics_aggregator,
|
||||
user_idle_disconnect,
|
||||
voicemail_detector=None,
|
||||
):
|
||||
"""Build the main pipeline with all components.
|
||||
|
|
@ -63,7 +51,7 @@ def build_pipeline(
|
|||
# Build processors list with optional voicemail detection
|
||||
processors = [
|
||||
transport.input(), # Transport user input
|
||||
stt, # STT (audio_passthrough=True by default, passes InputAudioRawFrame)
|
||||
stt,
|
||||
]
|
||||
|
||||
# Insert voicemail detector after STT if enabled
|
||||
|
|
@ -76,16 +64,12 @@ def build_pipeline(
|
|||
# Continue with the rest of the pipeline
|
||||
processors.extend(
|
||||
[
|
||||
stt_mute_filter, # STTMuteFilters don't let VAD related events pass through if muted
|
||||
user_idle_disconnect,
|
||||
transcript.user(),
|
||||
user_context_aggregator,
|
||||
llm, # LLM
|
||||
pipeline_engine_callback_processor,
|
||||
tts, # TTS
|
||||
transport.output(), # Transport bot output
|
||||
audio_buffer, # AudioBufferProcessor - records both input and output audio
|
||||
transcript.assistant(),
|
||||
assistant_context_aggregator, # Assistant spoken responses
|
||||
pipeline_metrics_aggregator,
|
||||
]
|
||||
|
|
@ -98,7 +82,6 @@ def create_pipeline_task(pipeline, workflow_run_id, audio_config: AudioConfig =
|
|||
"""Create a pipeline task with appropriate parameters"""
|
||||
# Set up pipeline params with audio configuration if provided
|
||||
pipeline_params = PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
send_initial_empty_metrics=False,
|
||||
|
|
@ -119,6 +102,7 @@ def create_pipeline_task(pipeline, workflow_run_id, audio_config: AudioConfig =
|
|||
pipeline,
|
||||
params=pipeline_params,
|
||||
enable_tracing=ENABLE_TRACING,
|
||||
enable_rtvi=False,
|
||||
conversation_id=f"{workflow_run_id}",
|
||||
)
|
||||
|
||||
|
|
|
|||
|
|
@ -7,12 +7,12 @@ from loguru import logger
|
|||
from api.db import db_client
|
||||
from api.db.models import WorkflowModel
|
||||
from api.enums import WorkflowRunMode
|
||||
from api.services.configuration.registry import ServiceProviders
|
||||
from api.services.pipecat.audio_config import AudioConfig, create_audio_config
|
||||
from api.services.pipecat.event_handlers import (
|
||||
register_audio_data_handler,
|
||||
register_task_event_handler,
|
||||
register_transcript_handler,
|
||||
register_transport_event_handlers,
|
||||
register_event_handlers,
|
||||
register_transcript_handlers,
|
||||
)
|
||||
from api.services.pipecat.in_memory_buffers import InMemoryLogsBuffer
|
||||
from api.services.pipecat.pipeline_builder import (
|
||||
|
|
@ -46,20 +46,25 @@ from api.services.workflow.pipecat_engine import PipecatEngine
|
|||
from api.services.workflow.workflow import WorkflowGraph
|
||||
from pipecat.extensions.voicemail.voicemail_detector import VoicemailDetector
|
||||
from pipecat.pipeline.base_task import PipelineTaskParams
|
||||
from pipecat.processors.aggregators.llm_response import (
|
||||
from pipecat.processors.aggregators.llm_response_universal import (
|
||||
LLMAssistantAggregatorParams,
|
||||
LLMContextAggregatorPair,
|
||||
LLMUserAggregatorParams,
|
||||
)
|
||||
from pipecat.processors.aggregators.llm_response_universal import (
|
||||
LLMContextAggregatorPair,
|
||||
)
|
||||
from pipecat.processors.filters.stt_mute_filter import (
|
||||
STTMuteConfig,
|
||||
STTMuteFilter,
|
||||
STTMuteStrategy,
|
||||
)
|
||||
from pipecat.processors.user_idle_processor import UserIdleProcessor
|
||||
from pipecat.transports.smallwebrtc.connection import SmallWebRTCConnection
|
||||
from pipecat.turns.user_mute import MuteUntilFirstBotCompleteUserMuteStrategy
|
||||
from pipecat.turns.user_start import (
|
||||
ExternalUserTurnStartStrategy,
|
||||
TranscriptionUserTurnStartStrategy,
|
||||
)
|
||||
from pipecat.turns.user_start.vad_user_turn_start_strategy import (
|
||||
VADUserTurnStartStrategy,
|
||||
)
|
||||
from pipecat.turns.user_stop import (
|
||||
ExternalUserTurnStopStrategy,
|
||||
TranscriptionUserTurnStopStrategy,
|
||||
)
|
||||
from pipecat.turns.user_turn_strategies import UserTurnStrategies
|
||||
from pipecat.utils.context import set_current_run_id
|
||||
from pipecat.utils.enums import EndTaskReason
|
||||
from pipecat.utils.tracing.context_registry import ContextProviderRegistry
|
||||
|
|
@ -517,12 +522,11 @@ async def _run_pipeline(
|
|||
embeddings_model=embeddings_model,
|
||||
)
|
||||
|
||||
# Create pipeline components with audio configuration and engine
|
||||
audio_buffer, transcript, context = create_pipeline_components(audio_config, engine)
|
||||
# Create pipeline components with audio configuration
|
||||
audio_buffer, context = create_pipeline_components(audio_config)
|
||||
|
||||
# Set the context and audio_buffer after creation
|
||||
engine.set_context(context)
|
||||
engine.set_audio_buffer(audio_buffer)
|
||||
|
||||
# Set Stasis connection for immediate transfers (if available)
|
||||
if stasis_connection:
|
||||
|
|
@ -532,7 +536,31 @@ async def _run_pipeline(
|
|||
expect_stripped_words=True,
|
||||
correct_aggregation_callback=engine.create_aggregation_correction_callback(),
|
||||
)
|
||||
user_params = LLMUserAggregatorParams(enable_emulated_vad_interruptions=True)
|
||||
|
||||
# Configure turn strategies based on STT provider and model
|
||||
# Deepgram Flux uses external turn detection (VAD + External start/stop)
|
||||
# Other models use transcription-based turn detection with smart turn analyzer
|
||||
is_deepgram_flux = (
|
||||
user_config.stt.provider == ServiceProviders.DEEPGRAM.value
|
||||
and user_config.stt.model == "flux-general-en"
|
||||
)
|
||||
|
||||
if is_deepgram_flux:
|
||||
user_turn_strategies = UserTurnStrategies(
|
||||
start=[VADUserTurnStartStrategy(), ExternalUserTurnStartStrategy()],
|
||||
stop=[ExternalUserTurnStopStrategy()],
|
||||
)
|
||||
else:
|
||||
user_turn_strategies = UserTurnStrategies(
|
||||
start=[VADUserTurnStartStrategy(), TranscriptionUserTurnStartStrategy()],
|
||||
stop=[TranscriptionUserTurnStopStrategy()],
|
||||
)
|
||||
|
||||
user_params = LLMUserAggregatorParams(
|
||||
user_turn_strategies=user_turn_strategies,
|
||||
user_mute_strategies=[MuteUntilFirstBotCompleteUserMuteStrategy()],
|
||||
user_idle_timeout=max_user_idle_timeout,
|
||||
)
|
||||
context_aggregator = LLMContextAggregatorPair(
|
||||
context, assistant_params=assistant_params, user_params=user_params
|
||||
)
|
||||
|
|
@ -547,25 +575,20 @@ async def _run_pipeline(
|
|||
|
||||
pipeline_metrics_aggregator = PipelineMetricsAggregator()
|
||||
|
||||
# Create STT mute filter using the selected strategies and the engine's callback
|
||||
stt_mute_filter = STTMuteFilter(
|
||||
config=STTMuteConfig(
|
||||
strategies={
|
||||
STTMuteStrategy.MUTE_UNTIL_FIRST_BOT_COMPLETE,
|
||||
STTMuteStrategy.CUSTOM,
|
||||
},
|
||||
should_mute_callback=engine.create_should_mute_callback(),
|
||||
)
|
||||
)
|
||||
|
||||
# Use engine's user idle callback with configured timeout
|
||||
user_idle_disconnect = UserIdleProcessor(
|
||||
callback=engine.create_user_idle_callback(), timeout=max_user_idle_timeout
|
||||
)
|
||||
|
||||
user_context_aggregator = context_aggregator.user()
|
||||
assistant_context_aggregator = context_aggregator.assistant()
|
||||
|
||||
# Register user idle event handlers
|
||||
user_idle_handler = engine.create_user_idle_handler()
|
||||
|
||||
@user_context_aggregator.event_handler("on_user_turn_idle")
|
||||
async def on_user_turn_idle(aggregator):
|
||||
await user_idle_handler.handle_idle(aggregator)
|
||||
|
||||
@user_context_aggregator.event_handler("on_user_turn_started")
|
||||
async def on_user_turn_started(aggregator, strategy):
|
||||
user_idle_handler.reset()
|
||||
|
||||
# Create voicemail detector if enabled in the workflow's start node
|
||||
voicemail_detector = None
|
||||
start_node = workflow_graph.nodes.get(workflow_graph.start_node_id)
|
||||
|
|
@ -592,16 +615,13 @@ async def _run_pipeline(
|
|||
pipeline = build_pipeline(
|
||||
transport,
|
||||
stt,
|
||||
transcript,
|
||||
audio_buffer,
|
||||
llm,
|
||||
tts,
|
||||
user_context_aggregator,
|
||||
assistant_context_aggregator,
|
||||
pipeline_engine_callback_processor,
|
||||
stt_mute_filter,
|
||||
pipeline_metrics_aggregator,
|
||||
user_idle_disconnect,
|
||||
voicemail_detector=voicemail_detector,
|
||||
)
|
||||
|
||||
|
|
@ -614,18 +634,6 @@ async def _run_pipeline(
|
|||
# Initialize the engine to set the initial context
|
||||
await engine.initialize()
|
||||
|
||||
# Register event handlers
|
||||
in_memory_audio_buffer, in_memory_transcript_buffer = (
|
||||
register_transport_event_handlers(
|
||||
task,
|
||||
transport,
|
||||
workflow_run_id,
|
||||
engine=engine,
|
||||
audio_buffer=audio_buffer,
|
||||
audio_config=audio_config,
|
||||
)
|
||||
)
|
||||
|
||||
# Add real-time feedback observer if WebSocket sender is available
|
||||
# Note: ws_sender was already fetched earlier for node_transition_callback
|
||||
if ws_sender:
|
||||
|
|
@ -635,21 +643,24 @@ async def _run_pipeline(
|
|||
)
|
||||
task.add_observer(feedback_observer)
|
||||
|
||||
register_task_event_handler(
|
||||
workflow_run_id,
|
||||
engine,
|
||||
# Register event handlers
|
||||
in_memory_audio_buffer, in_memory_transcript_buffer = register_event_handlers(
|
||||
task,
|
||||
transport,
|
||||
audio_buffer,
|
||||
in_memory_audio_buffer,
|
||||
in_memory_transcript_buffer,
|
||||
in_memory_logs_buffer,
|
||||
pipeline_metrics_aggregator,
|
||||
workflow_run_id,
|
||||
engine=engine,
|
||||
audio_buffer=audio_buffer,
|
||||
in_memory_logs_buffer=in_memory_logs_buffer,
|
||||
pipeline_metrics_aggregator=pipeline_metrics_aggregator,
|
||||
audio_config=audio_config,
|
||||
)
|
||||
|
||||
register_audio_data_handler(audio_buffer, workflow_run_id, in_memory_audio_buffer)
|
||||
register_transcript_handler(
|
||||
transcript, workflow_run_id, in_memory_transcript_buffer
|
||||
register_transcript_handlers(
|
||||
user_context_aggregator,
|
||||
assistant_context_aggregator,
|
||||
workflow_run_id,
|
||||
in_memory_transcript_buffer,
|
||||
)
|
||||
|
||||
try:
|
||||
|
|
|
|||
|
|
@ -7,6 +7,7 @@ from api.constants import MPS_API_URL
|
|||
from api.services.configuration.registry import ServiceProviders
|
||||
from pipecat.services.azure.llm import AzureLLMService
|
||||
from pipecat.services.cartesia.stt import CartesiaSTTService
|
||||
from pipecat.services.deepgram.flux.stt import DeepgramFluxSTTService
|
||||
from pipecat.services.deepgram.stt import DeepgramSTTService, LiveOptions
|
||||
from pipecat.services.deepgram.tts import DeepgramTTSService
|
||||
from pipecat.services.dograh.llm import DograhLLMService
|
||||
|
|
@ -34,6 +35,20 @@ def create_stt_service(user_config):
|
|||
f"Creating STT service: provider={user_config.stt.provider}, model={user_config.stt.model}"
|
||||
)
|
||||
if user_config.stt.provider == ServiceProviders.DEEPGRAM.value:
|
||||
# Check if using Flux model (English-only, no language selection)
|
||||
if user_config.stt.model == "flux-general-en":
|
||||
logger.debug("Using DeepGram Flux Model")
|
||||
return DeepgramFluxSTTService(
|
||||
api_key=user_config.stt.api_key,
|
||||
model=user_config.stt.model,
|
||||
params=DeepgramFluxSTTService.InputParams(
|
||||
eot_timeout_ms=3000,
|
||||
eot_threshold=0.7,
|
||||
),
|
||||
should_interrupt=False, # Let UserAggregator take care of sending InterruptionFrame
|
||||
)
|
||||
|
||||
# Other models than flux
|
||||
# Use language from user config, defaulting to "multi" for multilingual support
|
||||
language = getattr(user_config.stt, "language", None) or "multi"
|
||||
live_options = LiveOptions(
|
||||
|
|
@ -44,7 +59,9 @@ def create_stt_service(user_config):
|
|||
)
|
||||
logger.debug(f"Using DeepGram Model - {user_config.stt.model}")
|
||||
return DeepgramSTTService(
|
||||
live_options=live_options, api_key=user_config.stt.api_key
|
||||
live_options=live_options,
|
||||
api_key=user_config.stt.api_key,
|
||||
should_interrupt=False, # Let UserAggregator take care of sending InterruptionFrame
|
||||
)
|
||||
elif user_config.stt.provider == ServiceProviders.OPENAI.value:
|
||||
return OpenAISTTService(
|
||||
|
|
|
|||
|
|
@ -2,10 +2,9 @@ import os
|
|||
|
||||
from fastapi import WebSocket
|
||||
|
||||
from api.constants import APP_ROOT_DIR, ENABLE_RNNOISE, ENABLE_SMART_TURN
|
||||
from api.constants import APP_ROOT_DIR
|
||||
from api.db import db_client
|
||||
from api.enums import OrganizationConfigurationKey
|
||||
from api.services.looptalk.internal_transport import InternalTransport
|
||||
from api.services.pipecat.audio_config import AudioConfig
|
||||
from api.services.telephony.stasis_rtp_connection import StasisRTPConnection
|
||||
from api.services.telephony.stasis_rtp_serializer import StasisRTPFrameSerializer
|
||||
|
|
@ -13,11 +12,8 @@ from api.services.telephony.stasis_rtp_transport import (
|
|||
StasisRTPTransport,
|
||||
StasisRTPTransportParams,
|
||||
)
|
||||
from pipecat.audio.filters.rnnoise_filter import RNNoiseFilter
|
||||
from pipecat.audio.mixers.silence_mixer import SilenceAudioMixer
|
||||
from pipecat.audio.mixers.soundfile_mixer import SoundfileMixer
|
||||
from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams
|
||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer, VADParams
|
||||
from pipecat.serializers.twilio import TwilioFrameSerializer
|
||||
from pipecat.serializers.vobiz import VobizFrameSerializer
|
||||
|
|
@ -35,19 +31,6 @@ librnnoise_path = os.path.normpath(
|
|||
)
|
||||
|
||||
|
||||
def create_turn_analyzer(workflow_run_id: int, audio_config: AudioConfig):
|
||||
"""Create a turn analyzer backed by the local Smart Turn HTTP service.
|
||||
|
||||
Args:
|
||||
workflow_run_id: ID of the workflow run for turn analyzer context
|
||||
audio_config: Audio configuration containing pipeline sample rate
|
||||
"""
|
||||
if ENABLE_SMART_TURN:
|
||||
return LocalSmartTurnAnalyzerV3(params=SmartTurnParams())
|
||||
|
||||
return None
|
||||
|
||||
|
||||
async def create_twilio_transport(
|
||||
websocket_client: WebSocket,
|
||||
stream_sid: str,
|
||||
|
|
@ -78,8 +61,6 @@ async def create_twilio_transport(
|
|||
f"Incomplete Twilio configuration for organization {organization_id}"
|
||||
)
|
||||
|
||||
turn_analyzer = create_turn_analyzer(workflow_run_id, audio_config)
|
||||
|
||||
serializer = TwilioFrameSerializer(
|
||||
stream_sid=stream_sid,
|
||||
call_sid=call_sid,
|
||||
|
|
@ -119,11 +100,7 @@ async def create_twilio_transport(
|
|||
if ambient_noise_config and ambient_noise_config.get("enabled", False)
|
||||
else SilenceAudioMixer()
|
||||
),
|
||||
turn_analyzer=turn_analyzer,
|
||||
serializer=serializer,
|
||||
audio_in_filter=RNNoiseFilter(library_path=librnnoise_path)
|
||||
if ENABLE_RNNOISE
|
||||
else None,
|
||||
),
|
||||
)
|
||||
|
||||
|
|
@ -158,8 +135,6 @@ async def create_cloudonix_transport(
|
|||
f"Required: bearer_token, domain_id"
|
||||
)
|
||||
|
||||
turn_analyzer = create_turn_analyzer(workflow_run_id, audio_config)
|
||||
|
||||
from pipecat.serializers.cloudonix import CloudonixFrameSerializer
|
||||
|
||||
serializer = CloudonixFrameSerializer(
|
||||
|
|
@ -202,11 +177,7 @@ async def create_cloudonix_transport(
|
|||
if ambient_noise_config and ambient_noise_config.get("enabled", False)
|
||||
else SilenceAudioMixer()
|
||||
),
|
||||
turn_analyzer=turn_analyzer,
|
||||
serializer=serializer,
|
||||
audio_in_filter=RNNoiseFilter(library_path=librnnoise_path)
|
||||
if ENABLE_RNNOISE
|
||||
else None,
|
||||
),
|
||||
)
|
||||
|
||||
|
|
@ -238,8 +209,6 @@ async def create_vonage_transport(
|
|||
f"Incomplete Vonage configuration for organization {organization_id}"
|
||||
)
|
||||
|
||||
turn_analyzer = create_turn_analyzer(workflow_run_id, audio_config)
|
||||
|
||||
serializer = VonageFrameSerializer(
|
||||
call_uuid=call_uuid,
|
||||
application_id=application_id,
|
||||
|
|
@ -283,11 +252,7 @@ async def create_vonage_transport(
|
|||
if ambient_noise_config and ambient_noise_config.get("enabled", False)
|
||||
else SilenceAudioMixer()
|
||||
),
|
||||
turn_analyzer=turn_analyzer,
|
||||
serializer=serializer,
|
||||
audio_in_filter=RNNoiseFilter(library_path=librnnoise_path)
|
||||
if ENABLE_RNNOISE
|
||||
else None,
|
||||
),
|
||||
)
|
||||
|
||||
|
|
@ -337,8 +302,6 @@ async def create_vobiz_transport(
|
|||
f"from_numbers={len(config.get('from_numbers', []))} numbers"
|
||||
)
|
||||
|
||||
turn_analyzer = create_turn_analyzer(workflow_run_id, audio_config)
|
||||
|
||||
# Use VobizFrameSerializer for Vobiz WebSocket protocol
|
||||
serializer = VobizFrameSerializer(
|
||||
stream_id=stream_id,
|
||||
|
|
@ -389,11 +352,7 @@ async def create_vobiz_transport(
|
|||
if ambient_noise_config and ambient_noise_config.get("enabled", False)
|
||||
else SilenceAudioMixer()
|
||||
),
|
||||
turn_analyzer=turn_analyzer,
|
||||
serializer=serializer,
|
||||
audio_in_filter=RNNoiseFilter(library_path=librnnoise_path)
|
||||
if ENABLE_RNNOISE
|
||||
else None,
|
||||
),
|
||||
)
|
||||
|
||||
|
|
@ -411,7 +370,6 @@ def create_webrtc_transport(
|
|||
ambient_noise_config: dict | None = None,
|
||||
):
|
||||
"""Create a transport for WebRTC connections"""
|
||||
turn_analyzer = create_turn_analyzer(workflow_run_id, audio_config)
|
||||
|
||||
return SmallWebRTCTransport(
|
||||
webrtc_connection=webrtc_connection,
|
||||
|
|
@ -445,10 +403,6 @@ def create_webrtc_transport(
|
|||
if ambient_noise_config and ambient_noise_config.get("enabled", False)
|
||||
else SilenceAudioMixer()
|
||||
),
|
||||
turn_analyzer=turn_analyzer,
|
||||
audio_in_filter=RNNoiseFilter(library_path=librnnoise_path)
|
||||
if ENABLE_RNNOISE
|
||||
else None,
|
||||
),
|
||||
)
|
||||
|
||||
|
|
@ -461,7 +415,6 @@ def create_stasis_transport(
|
|||
ambient_noise_config: dict | None = None,
|
||||
):
|
||||
"""Create a transport for ARI connections"""
|
||||
turn_analyzer = create_turn_analyzer(workflow_run_id, audio_config)
|
||||
|
||||
serializer = StasisRTPFrameSerializer(
|
||||
StasisRTPFrameSerializer.InputParams(
|
||||
|
|
@ -502,11 +455,7 @@ def create_stasis_transport(
|
|||
if ambient_noise_config and ambient_noise_config.get("enabled", False)
|
||||
else SilenceAudioMixer()
|
||||
),
|
||||
turn_analyzer=turn_analyzer,
|
||||
serializer=serializer,
|
||||
audio_in_filter=RNNoiseFilter(library_path=librnnoise_path)
|
||||
if ENABLE_RNNOISE
|
||||
else None,
|
||||
),
|
||||
)
|
||||
|
||||
|
|
@ -528,46 +477,44 @@ def create_internal_transport(
|
|||
Returns:
|
||||
InternalTransport instance configured with turn analyzer
|
||||
"""
|
||||
turn_analyzer = create_turn_analyzer(workflow_run_id, audio_config)
|
||||
pass
|
||||
# Commented out because looptalk coming in the regular import flow
|
||||
# was causing issue. May be move this to looptalk/orchestrator.py
|
||||
|
||||
# Create and return the internal transport with latency
|
||||
return InternalTransport(
|
||||
params=TransportParams(
|
||||
audio_out_enabled=True,
|
||||
audio_out_sample_rate=audio_config.transport_out_sample_rate,
|
||||
audio_out_channels=1,
|
||||
audio_in_enabled=True,
|
||||
audio_in_sample_rate=audio_config.transport_in_sample_rate,
|
||||
audio_in_channels=1,
|
||||
vad_analyzer=(
|
||||
SileroVADAnalyzer(
|
||||
params=VADParams(
|
||||
confidence=vad_config.get("confidence", 0.7),
|
||||
start_secs=vad_config.get("start_seconds", 0.4),
|
||||
stop_secs=vad_config.get("stop_seconds", 0.8),
|
||||
min_volume=vad_config.get("minimum_volume", 0.6),
|
||||
)
|
||||
)
|
||||
if vad_config
|
||||
else SileroVADAnalyzer()
|
||||
),
|
||||
audio_out_mixer=(
|
||||
SoundfileMixer(
|
||||
sound_files={
|
||||
"office": APP_ROOT_DIR
|
||||
/ "assets"
|
||||
/ f"office-ambience-{audio_config.transport_out_sample_rate}-mono.wav"
|
||||
},
|
||||
default_sound="office",
|
||||
volume=ambient_noise_config.get("volume", 0.3),
|
||||
)
|
||||
if ambient_noise_config and ambient_noise_config.get("enabled", False)
|
||||
else SilenceAudioMixer()
|
||||
),
|
||||
turn_analyzer=turn_analyzer,
|
||||
audio_in_filter=RNNoiseFilter(library_path=librnnoise_path)
|
||||
if ENABLE_RNNOISE
|
||||
else None,
|
||||
),
|
||||
latency_seconds=latency_seconds,
|
||||
)
|
||||
# return InternalTransport(
|
||||
# params=TransportParams(
|
||||
# audio_out_enabled=True,
|
||||
# audio_out_sample_rate=audio_config.transport_out_sample_rate,
|
||||
# audio_out_channels=1,
|
||||
# audio_in_enabled=True,
|
||||
# audio_in_sample_rate=audio_config.transport_in_sample_rate,
|
||||
# audio_in_channels=1,
|
||||
# vad_analyzer=(
|
||||
# SileroVADAnalyzer(
|
||||
# params=VADParams(
|
||||
# confidence=vad_config.get("confidence", 0.7),
|
||||
# start_secs=vad_config.get("start_seconds", 0.4),
|
||||
# stop_secs=vad_config.get("stop_seconds", 0.8),
|
||||
# min_volume=vad_config.get("minimum_volume", 0.6),
|
||||
# )
|
||||
# )
|
||||
# if vad_config
|
||||
# else SileroVADAnalyzer()
|
||||
# ),
|
||||
# audio_out_mixer=(
|
||||
# SoundfileMixer(
|
||||
# sound_files={
|
||||
# "office": APP_ROOT_DIR
|
||||
# / "assets"
|
||||
# / f"office-ambience-{audio_config.transport_out_sample_rate}-mono.wav"
|
||||
# },
|
||||
# default_sound="office",
|
||||
# volume=ambient_noise_config.get("volume", 0.3),
|
||||
# )
|
||||
# if ambient_noise_config and ambient_noise_config.get("enabled", False)
|
||||
# else SilenceAudioMixer()
|
||||
# ),
|
||||
# ),
|
||||
# latency_seconds=latency_seconds,
|
||||
# )
|
||||
|
|
|
|||
|
|
@ -15,6 +15,8 @@ The serializer:
|
|||
from typing import Optional
|
||||
|
||||
from loguru import logger
|
||||
from pydantic import BaseModel
|
||||
|
||||
from pipecat.audio.utils import create_default_resampler, pcm_to_ulaw, ulaw_to_pcm
|
||||
from pipecat.frames.frames import (
|
||||
AudioRawFrame,
|
||||
|
|
@ -22,8 +24,7 @@ from pipecat.frames.frames import (
|
|||
InputAudioRawFrame,
|
||||
StartFrame,
|
||||
)
|
||||
from pipecat.serializers.base_serializer import FrameSerializer, FrameSerializerType
|
||||
from pydantic import BaseModel
|
||||
from pipecat.serializers.base_serializer import FrameSerializer
|
||||
|
||||
|
||||
class StasisRTPFrameSerializer(FrameSerializer):
|
||||
|
|
@ -59,11 +60,6 @@ class StasisRTPFrameSerializer(FrameSerializer):
|
|||
# Resampler shared between encode / decode paths
|
||||
self._resampler = create_default_resampler()
|
||||
|
||||
@property
|
||||
def type(self) -> FrameSerializerType:
|
||||
"""Stasis uses raw bytes → BINARY."""
|
||||
return FrameSerializerType.BINARY
|
||||
|
||||
async def setup(self, frame: StartFrame):
|
||||
"""Remember pipeline configuration."""
|
||||
self._sample_rate = self._params.sample_rate or frame.audio_in_sample_rate
|
||||
|
|
|
|||
|
|
@ -19,7 +19,6 @@ from pipecat.utils.enums import EndTaskReason
|
|||
|
||||
if TYPE_CHECKING:
|
||||
from api.services.telephony.stasis_rtp_connection import StasisRTPConnection
|
||||
from pipecat.processors.audio.audio_buffer_processor import AudioBuffer
|
||||
from pipecat.services.anthropic.llm import AnthropicLLMService
|
||||
from pipecat.services.google.llm import GoogleLLMService
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
|
|
@ -64,7 +63,6 @@ class PipecatEngine:
|
|||
transport: Optional[BaseTransport] = None,
|
||||
workflow: WorkflowGraph,
|
||||
call_context_vars: dict,
|
||||
audio_buffer: Optional["AudioBuffer"] = None,
|
||||
workflow_run_id: Optional[int] = None,
|
||||
node_transition_callback: Optional[
|
||||
Callable[[str, Optional[str]], Awaitable[None]]
|
||||
|
|
@ -78,7 +76,6 @@ class PipecatEngine:
|
|||
self.transport = transport
|
||||
self.workflow = workflow
|
||||
self._call_context_vars = call_context_vars
|
||||
self._audio_buffer = audio_buffer
|
||||
self._workflow_run_id = workflow_run_id
|
||||
self._node_transition_callback = node_transition_callback
|
||||
self._initialized = False
|
||||
|
|
@ -204,6 +201,7 @@ class PipecatEngine:
|
|||
logger.info(f"Arguments: {function_call_params.arguments}")
|
||||
await self.set_node(transition_to_node)
|
||||
try:
|
||||
|
||||
async def on_context_updated() -> None:
|
||||
"""
|
||||
pipecat framework will run this function after the function call result has been updated in the context.
|
||||
|
|
@ -215,6 +213,12 @@ class PipecatEngine:
|
|||
self._current_node
|
||||
)
|
||||
|
||||
# Queue EndFrame if we just transitioned to EndNode
|
||||
if self._current_node.is_end:
|
||||
await self.send_end_task_frame(
|
||||
EndTaskReason.USER_QUALIFIED.value
|
||||
)
|
||||
|
||||
result = {"status": "done"}
|
||||
|
||||
properties = FunctionCallResultProperties(
|
||||
|
|
@ -478,8 +482,6 @@ class PipecatEngine:
|
|||
if node.extraction_enabled and node.extraction_variables:
|
||||
await self._perform_variable_extraction_if_needed(node)
|
||||
|
||||
await self.send_end_task_frame(EndTaskReason.USER_QUALIFIED.value)
|
||||
|
||||
async def _handle_agent_node(self, node: Node) -> None:
|
||||
"""Handle agent node execution."""
|
||||
if node.is_static:
|
||||
|
|
@ -680,12 +682,12 @@ class PipecatEngine:
|
|||
"""
|
||||
return engine_callbacks.create_should_mute_callback(self)
|
||||
|
||||
def create_user_idle_callback(self):
|
||||
def create_user_idle_handler(self):
|
||||
"""
|
||||
This callback is called when the user is idle for a certain duration.
|
||||
We use this to either play the static text or end the call
|
||||
Returns a UserIdleHandler that manages user-idle timeouts with state.
|
||||
The handler tracks retry count and handles escalating prompts.
|
||||
"""
|
||||
return engine_callbacks.create_user_idle_callback(self)
|
||||
return engine_callbacks.create_user_idle_handler(self)
|
||||
|
||||
def create_max_duration_callback(self):
|
||||
"""
|
||||
|
|
@ -721,14 +723,6 @@ class PipecatEngine:
|
|||
"""
|
||||
self.task = task
|
||||
|
||||
def set_audio_buffer(self, audio_buffer: "AudioBuffer") -> None:
|
||||
"""Set the audio buffer.
|
||||
|
||||
This allows setting the audio buffer after the engine has been created,
|
||||
which is useful when the audio buffer needs to be created after the engine.
|
||||
"""
|
||||
self._audio_buffer = audio_buffer
|
||||
|
||||
def set_stasis_connection(
|
||||
self, connection: Optional["StasisRTPConnection"]
|
||||
) -> None:
|
||||
|
|
|
|||
|
|
@ -23,7 +23,6 @@ from pipecat.utils.enums import EndTaskReason
|
|||
|
||||
if TYPE_CHECKING:
|
||||
from api.services.workflow.pipecat_engine import PipecatEngine
|
||||
from pipecat.processors.user_idle_processor import UserIdleProcessor
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
|
|
@ -57,33 +56,43 @@ def create_should_mute_callback(
|
|||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def create_user_idle_callback(engine: "PipecatEngine"):
|
||||
"""Return a callback that handles user-idle timeouts."""
|
||||
class UserIdleHandler:
|
||||
"""Helper class to manage user idle retry logic with state."""
|
||||
|
||||
async def handle_user_idle(
|
||||
user_idle: "UserIdleProcessor", retry_count: int
|
||||
) -> bool:
|
||||
logger.debug(f"Handling user_idle, attempt: {retry_count}")
|
||||
def __init__(self, engine: "PipecatEngine"):
|
||||
self._engine = engine
|
||||
self._retry_count = 0
|
||||
|
||||
if retry_count == 1:
|
||||
def reset(self):
|
||||
"""Reset the retry count when user becomes active."""
|
||||
self._retry_count = 0
|
||||
|
||||
async def handle_idle(self, aggregator):
|
||||
"""Handle user idle event with escalating prompts."""
|
||||
self._retry_count += 1
|
||||
logger.debug(f"Handling user_idle, attempt: {self._retry_count}")
|
||||
|
||||
if self._retry_count == 1:
|
||||
message = {
|
||||
"role": "system",
|
||||
"content": "The user has been quiet. Politely and briefly ask if they're still there in the language that the user has been speaking so far.",
|
||||
}
|
||||
await user_idle.push_frame(LLMMessagesAppendFrame([message], run_llm=True))
|
||||
return True
|
||||
await aggregator.push_frame(LLMMessagesAppendFrame([message], run_llm=True))
|
||||
return
|
||||
|
||||
message = {
|
||||
"role": "system",
|
||||
"content": "The user has been quiet. We will be disconnecting the call now. Wish them a good day in the language that the user has been speaking so far.",
|
||||
}
|
||||
await user_idle.push_frame(LLMMessagesAppendFrame([message], run_llm=True))
|
||||
await engine.send_end_task_frame(
|
||||
await aggregator.push_frame(LLMMessagesAppendFrame([message], run_llm=True))
|
||||
await self._engine.send_end_task_frame(
|
||||
EndTaskReason.USER_IDLE_MAX_DURATION_EXCEEDED.value
|
||||
)
|
||||
return False
|
||||
|
||||
return handle_user_idle
|
||||
|
||||
def create_user_idle_handler(engine: "PipecatEngine") -> UserIdleHandler:
|
||||
"""Return a UserIdleHandler that manages user-idle timeouts with state."""
|
||||
return UserIdleHandler(engine)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue