feat: add hold music

This commit is contained in:
Abhishek Kumar 2026-02-05 21:02:02 +05:30
parent c990af2a16
commit f77a2afca6
11 changed files with 372 additions and 20 deletions

Binary file not shown.

Binary file not shown.

View file

@ -72,9 +72,26 @@ class EndCallToolDefinition(BaseModel):
config: EndCallConfig = Field(description="End Call configuration")
class TransferCallConfig(BaseModel):
"""Configuration for Transfer Call tools."""
transfer_number: str = Field(description="Number to transfer the call to")
transfer_message: Optional[str] = Field(
default=None, description="Message to play before transferring the call"
)
class TransferCallToolDefinition(BaseModel):
"""Tool definition for Transfer Call tools."""
schema_version: int = Field(default=1, description="Schema version")
type: Literal["transfer_call"] = Field(description="Tool type")
config: TransferCallConfig = Field(description="Transfer Call configuration")
# Union type for tool definitions - Pydantic will discriminate based on 'type' field
ToolDefinition = Annotated[
Union[HttpApiToolDefinition, EndCallToolDefinition],
Union[HttpApiToolDefinition, EndCallToolDefinition, TransferCallToolDefinition],
Field(discriminator="type"),
]

View file

@ -548,6 +548,7 @@ async def _run_pipeline(
node_transition_callback=node_transition_callback,
embeddings_api_key=embeddings_api_key,
embeddings_model=embeddings_model,
audio_out_sample_rate=audio_config.transport_out_sample_rate,
)
# Create pipeline components with audio configuration

View file

@ -70,6 +70,7 @@ class PipecatEngine:
] = None,
embeddings_api_key: Optional[str] = None,
embeddings_model: Optional[str] = None,
audio_out_sample_rate: int = 16000,
):
self.task = task
self.llm = llm
@ -111,6 +112,9 @@ class PipecatEngine:
self._embeddings_api_key: Optional[str] = embeddings_api_key
self._embeddings_model: Optional[str] = embeddings_model
# Output audio sample rate for playback (8000 or 16000)
self._audio_out_sample_rate: int = audio_out_sample_rate
async def _get_organization_id(self) -> Optional[int]:
"""Get and cache the organization ID from workflow run."""
if self._custom_tool_manager:
@ -697,10 +701,14 @@ class PipecatEngine:
connection: The StasisRTPConnection instance, or None for non-Stasis transports
"""
self._stasis_connection = connection
if connection:
logger.debug(
f"Stasis connection set for immediate transfers: {connection.channel_id}"
)
def mute_pipeline(self) -> None:
"""Mute the pipeline to prevent further LLM generations.
Call this before playing final messages (like transfer announcements)
to ensure the pipeline doesn't process any more user input.
"""
self._mute_pipeline = True
async def handle_llm_text_frame(self, text: str):
"""Accumulate LLM text frames to build reference text."""

View file

@ -6,6 +6,7 @@ during workflow execution.
from __future__ import annotations
import asyncio
from typing import TYPE_CHECKING, Any, Optional
from loguru import logger
@ -24,8 +25,13 @@ from api.services.workflow.transfer_event_protocol import (
TransferEventType,
wait_for_transfer_signal,
)
from api.utils.hold_audio import get_hold_audio_duration_ms, load_hold_audio
from pipecat.adapters.schemas.function_schema import FunctionSchema
from pipecat.frames.frames import FunctionCallResultProperties, TTSSpeakFrame
from pipecat.frames.frames import (
FunctionCallResultProperties,
OutputAudioRawFrame,
TTSSpeakFrame,
)
from pipecat.services.llm_service import FunctionCallParams
from pipecat.utils.enums import EndTaskReason
@ -249,11 +255,48 @@ class CustomToolManager:
Async handler function for the transfer call tool
"""
async def play_hold_music_loop(stop_event: asyncio.Event) -> None:
"""Play hold music in a loop until stop_event is set."""
sample_rate = self._engine._audio_out_sample_rate
try:
hold_audio = load_hold_audio(sample_rate)
duration_ms = get_hold_audio_duration_ms(sample_rate)
duration_secs = duration_ms / 1000.0
logger.info(
f"Starting hold music loop at {sample_rate}Hz, "
f"duration={duration_secs:.2f}s per loop"
)
while not stop_event.is_set():
# Queue the hold audio frame
frame = OutputAudioRawFrame(
audio=hold_audio,
sample_rate=sample_rate,
num_channels=1,
)
await self._engine.task.queue_frame(frame)
# Wait for the audio to play or until stopped
try:
await asyncio.wait_for(stop_event.wait(), timeout=duration_secs)
break # Stop event was set
except asyncio.TimeoutError:
pass # Continue looping
logger.info("Hold music loop stopped")
except Exception as e:
logger.error(f"Error playing hold music: {e}")
async def transfer_call_handler(
function_call_params: FunctionCallParams,
) -> None:
logger.info(f"Transfer Call Tool EXECUTED: {function_name}")
stop_hold_music = asyncio.Event()
hold_music_task: Optional[asyncio.Task] = None
try:
# Get the transfer call configuration
config = tool.definition.get("config", {})
@ -269,6 +312,9 @@ class CustomToolManager:
logger.info(f"Initiating transfer to: {transfer_number}")
# Mute pipeline before playing transfer message
self._engine.mute_pipeline()
# Play transfer message if configured
if transfer_message:
logger.info(f"Playing transfer message: {transfer_message}")
@ -278,6 +324,11 @@ class CustomToolManager:
self._engine._gathered_context["transfer_requested"] = True
self._engine._gathered_context["transfer_number"] = transfer_number
# Start playing hold music in the background
hold_music_task = asyncio.create_task(
play_hold_music_loop(stop_hold_music)
)
# Wait for external signal to proceed with transfer (30s timeout)
workflow_run_id = self._engine._workflow_run_id
logger.info(
@ -286,9 +337,12 @@ class CustomToolManager:
transfer_event = await wait_for_transfer_signal(
workflow_run_id=workflow_run_id,
timeout_seconds=30.0,
timeout_seconds=8.0,
)
# Stop hold music
stop_hold_music.set()
if transfer_event is None:
# Timeout - transfer failed
logger.warning("Transfer signal timed out")
@ -329,8 +383,16 @@ class CustomToolManager:
f"Transfer call tool '{function_name}' execution failed: {e}"
)
await function_call_params.result_callback(
{"status": "error", "error": str(e)},
properties=properties,
{"status": "error", "error": str(e)}
)
finally:
# Ensure hold music is stopped
stop_hold_music.set()
if hold_music_task and not hold_music_task.done():
hold_music_task.cancel()
try:
await hold_music_task
except asyncio.CancelledError:
pass
return transfer_call_handler

View file

@ -102,6 +102,7 @@ async def run_pipeline_with_tool_calls(
workflow=workflow,
call_context_vars={"customer_name": "Test User"},
workflow_run_id=1,
audio_out_sample_rate=16000,
)
# Create the pipeline with the mock LLM and TTS
@ -371,6 +372,8 @@ class TestPipecatEngineToolCalls:
# Callback to send transfer signal while handler is waiting
async def send_signal(engine: PipecatEngine):
# Wait a bit to allow hold music to play
await asyncio.sleep(0.5)
# Send the transfer signal to unblock the waiting handler
await send_transfer_signal(
workflow_run_id=engine._workflow_run_id,

80
api/utils/hold_audio.py Normal file
View file

@ -0,0 +1,80 @@
"""Utility for loading and playing hold audio files."""
from typing import Dict
import soundfile as sf
from loguru import logger
from api.constants import APP_ROOT_DIR
# Cache for loaded audio data
_audio_cache: Dict[str, bytes] = {}
def load_hold_audio(sample_rate: int) -> bytes:
"""Load hold audio file as raw PCM bytes for the given sample rate.
Args:
sample_rate: The sample rate to load (8000 or 16000)
Returns:
Raw PCM audio bytes (16-bit signed, mono)
Raises:
FileNotFoundError: If the audio file doesn't exist
ValueError: If sample rate is not supported
"""
if sample_rate not in (8000, 16000):
raise ValueError(
f"Unsupported sample rate: {sample_rate}. Must be 8000 or 16000"
)
cache_key = f"hold_ring_{sample_rate}"
if cache_key in _audio_cache:
return _audio_cache[cache_key]
# Construct path to the audio file
assets_dir = APP_ROOT_DIR / "assets"
audio_file = assets_dir / f"transfer_hold_ring_{sample_rate}.wav"
if not audio_file.exists():
raise FileNotFoundError(f"Hold audio file not found: {audio_file}")
# Load the audio file
audio_data, file_sample_rate = sf.read(str(audio_file), dtype="int16")
if file_sample_rate != sample_rate:
logger.warning(
f"Audio file sample rate ({file_sample_rate}) doesn't match "
f"requested rate ({sample_rate})"
)
# Convert to bytes
audio_bytes = audio_data.tobytes()
# Cache for future use
_audio_cache[cache_key] = audio_bytes
logger.debug(
f"Loaded hold audio: {audio_file.name}, "
f"duration={len(audio_data) / sample_rate:.2f}s"
)
return audio_bytes
def get_hold_audio_duration_ms(sample_rate: int) -> int:
"""Get the duration of the hold audio in milliseconds.
Args:
sample_rate: The sample rate (8000 or 16000)
Returns:
Duration in milliseconds
"""
audio_bytes = load_hold_audio(sample_rate)
# 2 bytes per sample (16-bit PCM)
num_samples = len(audio_bytes) // 2
duration_ms = int((num_samples / sample_rate) * 1000)
return duration_ms

@ -1 +1 @@
Subproject commit 866bf1c5685e7fadf2af012d8769ebbc35297db0
Subproject commit e618bb98dfde6224ef9f4e15769580790719b269

File diff suppressed because one or more lines are too long

View file

@ -258,7 +258,9 @@ export type CreateToolRequest = {
type?: 'http_api';
} & HttpApiToolDefinition) | ({
type?: 'end_call';
} & EndCallToolDefinition);
} & EndCallToolDefinition) | ({
type?: 'transfer_call';
} & TransferCallToolDefinition);
};
export type CreateWorkflowRequest = {
@ -705,10 +707,6 @@ export type ProcessDocumentRequestSchema = {
* S3 key of the uploaded file
*/
s3_key: string;
/**
* Embedding service to use for processing. Options: 'openai' (default, 1536-dim, requires API key) or 'sentence_transformer' (free, 384-dim)
*/
embedding_service?: 'sentence_transformer' | 'openai';
};
export type RetryConfigRequest = {
@ -860,6 +858,46 @@ export type ToolResponse = {
created_by?: CreatedByResponse | null;
};
/**
* Configuration for Transfer Call tools.
*/
export type TransferCallConfig = {
/**
* Number to transfer the call to
*/
transfer_number: string;
/**
* Message to play before transferring the call
*/
transfer_message?: string | null;
};
/**
* Tool definition for Transfer Call tools.
*/
export type TransferCallToolDefinition = {
/**
* Schema version
*/
schema_version?: number;
/**
* Tool type
*/
type: 'transfer_call';
/**
* Transfer Call configuration
*/
config: TransferCallConfig;
};
/**
* Request to send a transfer signal.
*/
export type TransferSignalRequest = {
action?: string;
message?: string | null;
};
/**
* Request model for triggering a call via API
*/
@ -948,7 +986,9 @@ export type UpdateToolRequest = {
type?: 'http_api';
} & HttpApiToolDefinition) | ({
type?: 'end_call';
} & EndCallToolDefinition)) | null;
} & EndCallToolDefinition) | ({
type?: 'transfer_call';
} & TransferCallToolDefinition)) | null;
status?: string | null;
};
@ -1530,6 +1570,35 @@ export type HandleCloudonixCdrApiV1TelephonyCloudonixCdrPostResponses = {
200: unknown;
};
export type SendTransferSignalEndpointApiV1TelephonyTransferSignalWorkflowRunIdPostData = {
body: TransferSignalRequest;
path: {
workflow_run_id: number;
};
query?: never;
url: '/api/v1/telephony/transfer-signal/{workflow_run_id}';
};
export type SendTransferSignalEndpointApiV1TelephonyTransferSignalWorkflowRunIdPostErrors = {
/**
* Not found
*/
404: unknown;
/**
* Validation Error
*/
422: HttpValidationError;
};
export type SendTransferSignalEndpointApiV1TelephonyTransferSignalWorkflowRunIdPostError = SendTransferSignalEndpointApiV1TelephonyTransferSignalWorkflowRunIdPostErrors[keyof SendTransferSignalEndpointApiV1TelephonyTransferSignalWorkflowRunIdPostErrors];
export type SendTransferSignalEndpointApiV1TelephonyTransferSignalWorkflowRunIdPostResponses = {
/**
* Successful Response
*/
200: unknown;
};
export type ImpersonateApiV1SuperuserImpersonatePostData = {
body: ImpersonateRequest;
headers?: {
@ -4354,6 +4423,66 @@ export type OptionsConfigApiV1PublicEmbedConfigTokenOptionsResponses = {
200: unknown;
};
export type GetPublicTurnCredentialsApiV1PublicEmbedTurnCredentialsSessionTokenGetData = {
body?: never;
path: {
session_token: string;
};
query?: never;
url: '/api/v1/public/embed/turn-credentials/{session_token}';
};
export type GetPublicTurnCredentialsApiV1PublicEmbedTurnCredentialsSessionTokenGetErrors = {
/**
* Not found
*/
404: unknown;
/**
* Validation Error
*/
422: HttpValidationError;
};
export type GetPublicTurnCredentialsApiV1PublicEmbedTurnCredentialsSessionTokenGetError = GetPublicTurnCredentialsApiV1PublicEmbedTurnCredentialsSessionTokenGetErrors[keyof GetPublicTurnCredentialsApiV1PublicEmbedTurnCredentialsSessionTokenGetErrors];
export type GetPublicTurnCredentialsApiV1PublicEmbedTurnCredentialsSessionTokenGetResponses = {
/**
* Successful Response
*/
200: TurnCredentialsResponse;
};
export type GetPublicTurnCredentialsApiV1PublicEmbedTurnCredentialsSessionTokenGetResponse = GetPublicTurnCredentialsApiV1PublicEmbedTurnCredentialsSessionTokenGetResponses[keyof GetPublicTurnCredentialsApiV1PublicEmbedTurnCredentialsSessionTokenGetResponses];
export type OptionsTurnCredentialsApiV1PublicEmbedTurnCredentialsSessionTokenOptionsData = {
body?: never;
path: {
session_token: string;
};
query?: never;
url: '/api/v1/public/embed/turn-credentials/{session_token}';
};
export type OptionsTurnCredentialsApiV1PublicEmbedTurnCredentialsSessionTokenOptionsErrors = {
/**
* Not found
*/
404: unknown;
/**
* Validation Error
*/
422: HttpValidationError;
};
export type OptionsTurnCredentialsApiV1PublicEmbedTurnCredentialsSessionTokenOptionsError = OptionsTurnCredentialsApiV1PublicEmbedTurnCredentialsSessionTokenOptionsErrors[keyof OptionsTurnCredentialsApiV1PublicEmbedTurnCredentialsSessionTokenOptionsErrors];
export type OptionsTurnCredentialsApiV1PublicEmbedTurnCredentialsSessionTokenOptionsResponses = {
/**
* Successful Response
*/
200: unknown;
};
export type InitiateCallApiV1PublicAgentUuidPostData = {
body: TriggerCallRequest;
headers: {