mirror of
https://github.com/dograh-hq/dograh.git
synced 2026-06-10 08:05:22 +02:00
feat: add hold music
This commit is contained in:
parent
c990af2a16
commit
f77a2afca6
11 changed files with 372 additions and 20 deletions
BIN
api/assets/transfer_hold_ring_16000.wav
Normal file
BIN
api/assets/transfer_hold_ring_16000.wav
Normal file
Binary file not shown.
BIN
api/assets/transfer_hold_ring_8000.wav
Normal file
BIN
api/assets/transfer_hold_ring_8000.wav
Normal file
Binary file not shown.
|
|
@ -72,9 +72,26 @@ class EndCallToolDefinition(BaseModel):
|
|||
config: EndCallConfig = Field(description="End Call configuration")
|
||||
|
||||
|
||||
class TransferCallConfig(BaseModel):
|
||||
"""Configuration for Transfer Call tools."""
|
||||
|
||||
transfer_number: str = Field(description="Number to transfer the call to")
|
||||
transfer_message: Optional[str] = Field(
|
||||
default=None, description="Message to play before transferring the call"
|
||||
)
|
||||
|
||||
|
||||
class TransferCallToolDefinition(BaseModel):
|
||||
"""Tool definition for Transfer Call tools."""
|
||||
|
||||
schema_version: int = Field(default=1, description="Schema version")
|
||||
type: Literal["transfer_call"] = Field(description="Tool type")
|
||||
config: TransferCallConfig = Field(description="Transfer Call configuration")
|
||||
|
||||
|
||||
# Union type for tool definitions - Pydantic will discriminate based on 'type' field
|
||||
ToolDefinition = Annotated[
|
||||
Union[HttpApiToolDefinition, EndCallToolDefinition],
|
||||
Union[HttpApiToolDefinition, EndCallToolDefinition, TransferCallToolDefinition],
|
||||
Field(discriminator="type"),
|
||||
]
|
||||
|
||||
|
|
|
|||
|
|
@ -548,6 +548,7 @@ async def _run_pipeline(
|
|||
node_transition_callback=node_transition_callback,
|
||||
embeddings_api_key=embeddings_api_key,
|
||||
embeddings_model=embeddings_model,
|
||||
audio_out_sample_rate=audio_config.transport_out_sample_rate,
|
||||
)
|
||||
|
||||
# Create pipeline components with audio configuration
|
||||
|
|
|
|||
|
|
@ -70,6 +70,7 @@ class PipecatEngine:
|
|||
] = None,
|
||||
embeddings_api_key: Optional[str] = None,
|
||||
embeddings_model: Optional[str] = None,
|
||||
audio_out_sample_rate: int = 16000,
|
||||
):
|
||||
self.task = task
|
||||
self.llm = llm
|
||||
|
|
@ -111,6 +112,9 @@ class PipecatEngine:
|
|||
self._embeddings_api_key: Optional[str] = embeddings_api_key
|
||||
self._embeddings_model: Optional[str] = embeddings_model
|
||||
|
||||
# Output audio sample rate for playback (8000 or 16000)
|
||||
self._audio_out_sample_rate: int = audio_out_sample_rate
|
||||
|
||||
async def _get_organization_id(self) -> Optional[int]:
|
||||
"""Get and cache the organization ID from workflow run."""
|
||||
if self._custom_tool_manager:
|
||||
|
|
@ -697,10 +701,14 @@ class PipecatEngine:
|
|||
connection: The StasisRTPConnection instance, or None for non-Stasis transports
|
||||
"""
|
||||
self._stasis_connection = connection
|
||||
if connection:
|
||||
logger.debug(
|
||||
f"Stasis connection set for immediate transfers: {connection.channel_id}"
|
||||
)
|
||||
|
||||
def mute_pipeline(self) -> None:
|
||||
"""Mute the pipeline to prevent further LLM generations.
|
||||
|
||||
Call this before playing final messages (like transfer announcements)
|
||||
to ensure the pipeline doesn't process any more user input.
|
||||
"""
|
||||
self._mute_pipeline = True
|
||||
|
||||
async def handle_llm_text_frame(self, text: str):
|
||||
"""Accumulate LLM text frames to build reference text."""
|
||||
|
|
|
|||
|
|
@ -6,6 +6,7 @@ during workflow execution.
|
|||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
from typing import TYPE_CHECKING, Any, Optional
|
||||
|
||||
from loguru import logger
|
||||
|
|
@ -24,8 +25,13 @@ from api.services.workflow.transfer_event_protocol import (
|
|||
TransferEventType,
|
||||
wait_for_transfer_signal,
|
||||
)
|
||||
from api.utils.hold_audio import get_hold_audio_duration_ms, load_hold_audio
|
||||
from pipecat.adapters.schemas.function_schema import FunctionSchema
|
||||
from pipecat.frames.frames import FunctionCallResultProperties, TTSSpeakFrame
|
||||
from pipecat.frames.frames import (
|
||||
FunctionCallResultProperties,
|
||||
OutputAudioRawFrame,
|
||||
TTSSpeakFrame,
|
||||
)
|
||||
from pipecat.services.llm_service import FunctionCallParams
|
||||
from pipecat.utils.enums import EndTaskReason
|
||||
|
||||
|
|
@ -249,11 +255,48 @@ class CustomToolManager:
|
|||
Async handler function for the transfer call tool
|
||||
"""
|
||||
|
||||
async def play_hold_music_loop(stop_event: asyncio.Event) -> None:
|
||||
"""Play hold music in a loop until stop_event is set."""
|
||||
sample_rate = self._engine._audio_out_sample_rate
|
||||
try:
|
||||
hold_audio = load_hold_audio(sample_rate)
|
||||
duration_ms = get_hold_audio_duration_ms(sample_rate)
|
||||
duration_secs = duration_ms / 1000.0
|
||||
|
||||
logger.info(
|
||||
f"Starting hold music loop at {sample_rate}Hz, "
|
||||
f"duration={duration_secs:.2f}s per loop"
|
||||
)
|
||||
|
||||
while not stop_event.is_set():
|
||||
# Queue the hold audio frame
|
||||
frame = OutputAudioRawFrame(
|
||||
audio=hold_audio,
|
||||
sample_rate=sample_rate,
|
||||
num_channels=1,
|
||||
)
|
||||
await self._engine.task.queue_frame(frame)
|
||||
|
||||
# Wait for the audio to play or until stopped
|
||||
try:
|
||||
await asyncio.wait_for(stop_event.wait(), timeout=duration_secs)
|
||||
break # Stop event was set
|
||||
except asyncio.TimeoutError:
|
||||
pass # Continue looping
|
||||
|
||||
logger.info("Hold music loop stopped")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error playing hold music: {e}")
|
||||
|
||||
async def transfer_call_handler(
|
||||
function_call_params: FunctionCallParams,
|
||||
) -> None:
|
||||
logger.info(f"Transfer Call Tool EXECUTED: {function_name}")
|
||||
|
||||
stop_hold_music = asyncio.Event()
|
||||
hold_music_task: Optional[asyncio.Task] = None
|
||||
|
||||
try:
|
||||
# Get the transfer call configuration
|
||||
config = tool.definition.get("config", {})
|
||||
|
|
@ -269,6 +312,9 @@ class CustomToolManager:
|
|||
|
||||
logger.info(f"Initiating transfer to: {transfer_number}")
|
||||
|
||||
# Mute pipeline before playing transfer message
|
||||
self._engine.mute_pipeline()
|
||||
|
||||
# Play transfer message if configured
|
||||
if transfer_message:
|
||||
logger.info(f"Playing transfer message: {transfer_message}")
|
||||
|
|
@ -278,6 +324,11 @@ class CustomToolManager:
|
|||
self._engine._gathered_context["transfer_requested"] = True
|
||||
self._engine._gathered_context["transfer_number"] = transfer_number
|
||||
|
||||
# Start playing hold music in the background
|
||||
hold_music_task = asyncio.create_task(
|
||||
play_hold_music_loop(stop_hold_music)
|
||||
)
|
||||
|
||||
# Wait for external signal to proceed with transfer (30s timeout)
|
||||
workflow_run_id = self._engine._workflow_run_id
|
||||
logger.info(
|
||||
|
|
@ -286,9 +337,12 @@ class CustomToolManager:
|
|||
|
||||
transfer_event = await wait_for_transfer_signal(
|
||||
workflow_run_id=workflow_run_id,
|
||||
timeout_seconds=30.0,
|
||||
timeout_seconds=8.0,
|
||||
)
|
||||
|
||||
# Stop hold music
|
||||
stop_hold_music.set()
|
||||
|
||||
if transfer_event is None:
|
||||
# Timeout - transfer failed
|
||||
logger.warning("Transfer signal timed out")
|
||||
|
|
@ -329,8 +383,16 @@ class CustomToolManager:
|
|||
f"Transfer call tool '{function_name}' execution failed: {e}"
|
||||
)
|
||||
await function_call_params.result_callback(
|
||||
{"status": "error", "error": str(e)},
|
||||
properties=properties,
|
||||
{"status": "error", "error": str(e)}
|
||||
)
|
||||
finally:
|
||||
# Ensure hold music is stopped
|
||||
stop_hold_music.set()
|
||||
if hold_music_task and not hold_music_task.done():
|
||||
hold_music_task.cancel()
|
||||
try:
|
||||
await hold_music_task
|
||||
except asyncio.CancelledError:
|
||||
pass
|
||||
|
||||
return transfer_call_handler
|
||||
|
|
|
|||
|
|
@ -102,6 +102,7 @@ async def run_pipeline_with_tool_calls(
|
|||
workflow=workflow,
|
||||
call_context_vars={"customer_name": "Test User"},
|
||||
workflow_run_id=1,
|
||||
audio_out_sample_rate=16000,
|
||||
)
|
||||
|
||||
# Create the pipeline with the mock LLM and TTS
|
||||
|
|
@ -371,6 +372,8 @@ class TestPipecatEngineToolCalls:
|
|||
|
||||
# Callback to send transfer signal while handler is waiting
|
||||
async def send_signal(engine: PipecatEngine):
|
||||
# Wait a bit to allow hold music to play
|
||||
await asyncio.sleep(0.5)
|
||||
# Send the transfer signal to unblock the waiting handler
|
||||
await send_transfer_signal(
|
||||
workflow_run_id=engine._workflow_run_id,
|
||||
|
|
|
|||
80
api/utils/hold_audio.py
Normal file
80
api/utils/hold_audio.py
Normal file
|
|
@ -0,0 +1,80 @@
|
|||
"""Utility for loading and playing hold audio files."""
|
||||
|
||||
from typing import Dict
|
||||
|
||||
import soundfile as sf
|
||||
from loguru import logger
|
||||
|
||||
from api.constants import APP_ROOT_DIR
|
||||
|
||||
# Cache for loaded audio data
|
||||
_audio_cache: Dict[str, bytes] = {}
|
||||
|
||||
|
||||
def load_hold_audio(sample_rate: int) -> bytes:
|
||||
"""Load hold audio file as raw PCM bytes for the given sample rate.
|
||||
|
||||
Args:
|
||||
sample_rate: The sample rate to load (8000 or 16000)
|
||||
|
||||
Returns:
|
||||
Raw PCM audio bytes (16-bit signed, mono)
|
||||
|
||||
Raises:
|
||||
FileNotFoundError: If the audio file doesn't exist
|
||||
ValueError: If sample rate is not supported
|
||||
"""
|
||||
if sample_rate not in (8000, 16000):
|
||||
raise ValueError(
|
||||
f"Unsupported sample rate: {sample_rate}. Must be 8000 or 16000"
|
||||
)
|
||||
|
||||
cache_key = f"hold_ring_{sample_rate}"
|
||||
|
||||
if cache_key in _audio_cache:
|
||||
return _audio_cache[cache_key]
|
||||
|
||||
# Construct path to the audio file
|
||||
assets_dir = APP_ROOT_DIR / "assets"
|
||||
audio_file = assets_dir / f"transfer_hold_ring_{sample_rate}.wav"
|
||||
|
||||
if not audio_file.exists():
|
||||
raise FileNotFoundError(f"Hold audio file not found: {audio_file}")
|
||||
|
||||
# Load the audio file
|
||||
audio_data, file_sample_rate = sf.read(str(audio_file), dtype="int16")
|
||||
|
||||
if file_sample_rate != sample_rate:
|
||||
logger.warning(
|
||||
f"Audio file sample rate ({file_sample_rate}) doesn't match "
|
||||
f"requested rate ({sample_rate})"
|
||||
)
|
||||
|
||||
# Convert to bytes
|
||||
audio_bytes = audio_data.tobytes()
|
||||
|
||||
# Cache for future use
|
||||
_audio_cache[cache_key] = audio_bytes
|
||||
|
||||
logger.debug(
|
||||
f"Loaded hold audio: {audio_file.name}, "
|
||||
f"duration={len(audio_data) / sample_rate:.2f}s"
|
||||
)
|
||||
|
||||
return audio_bytes
|
||||
|
||||
|
||||
def get_hold_audio_duration_ms(sample_rate: int) -> int:
|
||||
"""Get the duration of the hold audio in milliseconds.
|
||||
|
||||
Args:
|
||||
sample_rate: The sample rate (8000 or 16000)
|
||||
|
||||
Returns:
|
||||
Duration in milliseconds
|
||||
"""
|
||||
audio_bytes = load_hold_audio(sample_rate)
|
||||
# 2 bytes per sample (16-bit PCM)
|
||||
num_samples = len(audio_bytes) // 2
|
||||
duration_ms = int((num_samples / sample_rate) * 1000)
|
||||
return duration_ms
|
||||
Loading…
Add table
Add a link
Reference in a new issue