feat: add hold music

2026-06-10 08:05:22 +02:00 · 2026-02-05 21:02:02 +05:30 · 2026-02-05 21:02:02 +05:30 · f77a2afca6
commit f77a2afca6
parent c990af2a16
11 changed files with 372 additions and 20 deletions
--- a/api/assets/transfer_hold_ring_16000.wav
+++ b/api/assets/transfer_hold_ring_16000.wav
--- a/api/assets/transfer_hold_ring_8000.wav
+++ b/api/assets/transfer_hold_ring_8000.wav
--- a/api/routes/tool.py
+++ b/api/routes/tool.py
@ -72,9 +72,26 @@ class EndCallToolDefinition(BaseModel):
    config: EndCallConfig = Field(description="End Call configuration")


+class TransferCallConfig(BaseModel):
+    """Configuration for Transfer Call tools."""
+
+    transfer_number: str = Field(description="Number to transfer the call to")
+    transfer_message: Optional[str] = Field(
+        default=None, description="Message to play before transferring the call"
+    )
+
+
+class TransferCallToolDefinition(BaseModel):
+    """Tool definition for Transfer Call tools."""
+
+    schema_version: int = Field(default=1, description="Schema version")
+    type: Literal["transfer_call"] = Field(description="Tool type")
+    config: TransferCallConfig = Field(description="Transfer Call configuration")
+
+
 # Union type for tool definitions - Pydantic will discriminate based on 'type' field
 ToolDefinition = Annotated[
-    Union[HttpApiToolDefinition, EndCallToolDefinition],
+    Union[HttpApiToolDefinition, EndCallToolDefinition, TransferCallToolDefinition],
    Field(discriminator="type"),
 ]

--- a/api/services/pipecat/run_pipeline.py
+++ b/api/services/pipecat/run_pipeline.py
@ -548,6 +548,7 @@ async def _run_pipeline(
        node_transition_callback=node_transition_callback,
        embeddings_api_key=embeddings_api_key,
        embeddings_model=embeddings_model,
+        audio_out_sample_rate=audio_config.transport_out_sample_rate,
    )

    # Create pipeline components with audio configuration
--- a/api/services/workflow/pipecat_engine.py
+++ b/api/services/workflow/pipecat_engine.py
@ -70,6 +70,7 @@ class PipecatEngine:
        ] = None,
        embeddings_api_key: Optional[str] = None,
        embeddings_model: Optional[str] = None,
+        audio_out_sample_rate: int = 16000,
    ):
        self.task = task
        self.llm = llm
@ -111,6 +112,9 @@ class PipecatEngine:
        self._embeddings_api_key: Optional[str] = embeddings_api_key
        self._embeddings_model: Optional[str] = embeddings_model

+        # Output audio sample rate for playback (8000 or 16000)
+        self._audio_out_sample_rate: int = audio_out_sample_rate
+
    async def _get_organization_id(self) -> Optional[int]:
        """Get and cache the organization ID from workflow run."""
        if self._custom_tool_manager:
@ -697,10 +701,14 @@ class PipecatEngine:
            connection: The StasisRTPConnection instance, or None for non-Stasis transports
        """
        self._stasis_connection = connection
-        if connection:
-            logger.debug(
-                f"Stasis connection set for immediate transfers: {connection.channel_id}"
-            )
+
+    def mute_pipeline(self) -> None:
+        """Mute the pipeline to prevent further LLM generations.
+
+        Call this before playing final messages (like transfer announcements)
+        to ensure the pipeline doesn't process any more user input.
+        """
+        self._mute_pipeline = True

    async def handle_llm_text_frame(self, text: str):
        """Accumulate LLM text frames to build reference text."""
--- a/api/services/workflow/pipecat_engine_custom_tools.py
+++ b/api/services/workflow/pipecat_engine_custom_tools.py
@ -6,6 +6,7 @@ during workflow execution.

 from __future__ import annotations

+import asyncio
 from typing import TYPE_CHECKING, Any, Optional

 from loguru import logger
@ -24,8 +25,13 @@ from api.services.workflow.transfer_event_protocol import (
    TransferEventType,
    wait_for_transfer_signal,
 )
+from api.utils.hold_audio import get_hold_audio_duration_ms, load_hold_audio
 from pipecat.adapters.schemas.function_schema import FunctionSchema
-from pipecat.frames.frames import FunctionCallResultProperties, TTSSpeakFrame
+from pipecat.frames.frames import (
+    FunctionCallResultProperties,
+    OutputAudioRawFrame,
+    TTSSpeakFrame,
+)
 from pipecat.services.llm_service import FunctionCallParams
 from pipecat.utils.enums import EndTaskReason

@ -249,11 +255,48 @@ class CustomToolManager:
            Async handler function for the transfer call tool
        """

+        async def play_hold_music_loop(stop_event: asyncio.Event) -> None:
+            """Play hold music in a loop until stop_event is set."""
+            sample_rate = self._engine._audio_out_sample_rate
+            try:
+                hold_audio = load_hold_audio(sample_rate)
+                duration_ms = get_hold_audio_duration_ms(sample_rate)
+                duration_secs = duration_ms / 1000.0
+
+                logger.info(
+                    f"Starting hold music loop at {sample_rate}Hz, "
+                    f"duration={duration_secs:.2f}s per loop"
+                )
+
+                while not stop_event.is_set():
+                    # Queue the hold audio frame
+                    frame = OutputAudioRawFrame(
+                        audio=hold_audio,
+                        sample_rate=sample_rate,
+                        num_channels=1,
+                    )
+                    await self._engine.task.queue_frame(frame)
+
+                    # Wait for the audio to play or until stopped
+                    try:
+                        await asyncio.wait_for(stop_event.wait(), timeout=duration_secs)
+                        break  # Stop event was set
+                    except asyncio.TimeoutError:
+                        pass  # Continue looping
+
+                logger.info("Hold music loop stopped")
+
+            except Exception as e:
+                logger.error(f"Error playing hold music: {e}")
+
        async def transfer_call_handler(
            function_call_params: FunctionCallParams,
        ) -> None:
            logger.info(f"Transfer Call Tool EXECUTED: {function_name}")

+            stop_hold_music = asyncio.Event()
+            hold_music_task: Optional[asyncio.Task] = None
+
            try:
                # Get the transfer call configuration
                config = tool.definition.get("config", {})
@ -269,6 +312,9 @@ class CustomToolManager:

                logger.info(f"Initiating transfer to: {transfer_number}")

+                # Mute pipeline before playing transfer message
+                self._engine.mute_pipeline()
+
                # Play transfer message if configured
                if transfer_message:
                    logger.info(f"Playing transfer message: {transfer_message}")
@ -278,6 +324,11 @@ class CustomToolManager:
                self._engine._gathered_context["transfer_requested"] = True
                self._engine._gathered_context["transfer_number"] = transfer_number

+                # Start playing hold music in the background
+                hold_music_task = asyncio.create_task(
+                    play_hold_music_loop(stop_hold_music)
+                )
+
                # Wait for external signal to proceed with transfer (30s timeout)
                workflow_run_id = self._engine._workflow_run_id
                logger.info(
@ -286,9 +337,12 @@ class CustomToolManager:

                transfer_event = await wait_for_transfer_signal(
                    workflow_run_id=workflow_run_id,
-                    timeout_seconds=30.0,
+                    timeout_seconds=8.0,
                )

+                # Stop hold music
+                stop_hold_music.set()
+
                if transfer_event is None:
                    # Timeout - transfer failed
                    logger.warning("Transfer signal timed out")
@ -329,8 +383,16 @@ class CustomToolManager:
                    f"Transfer call tool '{function_name}' execution failed: {e}"
                )
                await function_call_params.result_callback(
-                    {"status": "error", "error": str(e)},
-                    properties=properties,
+                    {"status": "error", "error": str(e)}
                )
+            finally:
+                # Ensure hold music is stopped
+                stop_hold_music.set()
+                if hold_music_task and not hold_music_task.done():
+                    hold_music_task.cancel()
+                    try:
+                        await hold_music_task
+                    except asyncio.CancelledError:
+                        pass

        return transfer_call_handler
--- a/api/tests/test_pipecat_engine_tool_calls.py
+++ b/api/tests/test_pipecat_engine_tool_calls.py
@ -102,6 +102,7 @@ async def run_pipeline_with_tool_calls(
        workflow=workflow,
        call_context_vars={"customer_name": "Test User"},
        workflow_run_id=1,
+        audio_out_sample_rate=16000,
    )

    # Create the pipeline with the mock LLM and TTS
@ -371,6 +372,8 @@ class TestPipecatEngineToolCalls:

        # Callback to send transfer signal while handler is waiting
        async def send_signal(engine: PipecatEngine):
+            # Wait a bit to allow hold music to play
+            await asyncio.sleep(0.5)
            # Send the transfer signal to unblock the waiting handler
            await send_transfer_signal(
                workflow_run_id=engine._workflow_run_id,
--- a/api/utils/hold_audio.py
+++ b/api/utils/hold_audio.py
@ -0,0 +1,80 @@
+"""Utility for loading and playing hold audio files."""
+
+from typing import Dict
+
+import soundfile as sf
+from loguru import logger
+
+from api.constants import APP_ROOT_DIR
+
+# Cache for loaded audio data
+_audio_cache: Dict[str, bytes] = {}
+
+
+def load_hold_audio(sample_rate: int) -> bytes:
+    """Load hold audio file as raw PCM bytes for the given sample rate.
+
+    Args:
+        sample_rate: The sample rate to load (8000 or 16000)
+
+    Returns:
+        Raw PCM audio bytes (16-bit signed, mono)
+
+    Raises:
+        FileNotFoundError: If the audio file doesn't exist
+        ValueError: If sample rate is not supported
+    """
+    if sample_rate not in (8000, 16000):
+        raise ValueError(
+            f"Unsupported sample rate: {sample_rate}. Must be 8000 or 16000"
+        )
+
+    cache_key = f"hold_ring_{sample_rate}"
+
+    if cache_key in _audio_cache:
+        return _audio_cache[cache_key]
+
+    # Construct path to the audio file
+    assets_dir = APP_ROOT_DIR / "assets"
+    audio_file = assets_dir / f"transfer_hold_ring_{sample_rate}.wav"
+
+    if not audio_file.exists():
+        raise FileNotFoundError(f"Hold audio file not found: {audio_file}")
+
+    # Load the audio file
+    audio_data, file_sample_rate = sf.read(str(audio_file), dtype="int16")
+
+    if file_sample_rate != sample_rate:
+        logger.warning(
+            f"Audio file sample rate ({file_sample_rate}) doesn't match "
+            f"requested rate ({sample_rate})"
+        )
+
+    # Convert to bytes
+    audio_bytes = audio_data.tobytes()
+
+    # Cache for future use
+    _audio_cache[cache_key] = audio_bytes
+
+    logger.debug(
+        f"Loaded hold audio: {audio_file.name}, "
+        f"duration={len(audio_data) / sample_rate:.2f}s"
+    )
+
+    return audio_bytes
+
+
+def get_hold_audio_duration_ms(sample_rate: int) -> int:
+    """Get the duration of the hold audio in milliseconds.
+
+    Args:
+        sample_rate: The sample rate (8000 or 16000)
+
+    Returns:
+        Duration in milliseconds
+    """
+    audio_bytes = load_hold_audio(sample_rate)
+    # 2 bytes per sample (16-bit PCM)
+    num_samples = len(audio_bytes) // 2
+    duration_ms = int((num_samples / sample_rate) * 1000)
+    return duration_ms