mirror of
https://github.com/dograh-hq/dograh.git
synced 2026-06-07 07:55:16 +02:00
feat: add hold music
This commit is contained in:
parent
c990af2a16
commit
f77a2afca6
11 changed files with 372 additions and 20 deletions
BIN
api/assets/transfer_hold_ring_16000.wav
Normal file
BIN
api/assets/transfer_hold_ring_16000.wav
Normal file
Binary file not shown.
BIN
api/assets/transfer_hold_ring_8000.wav
Normal file
BIN
api/assets/transfer_hold_ring_8000.wav
Normal file
Binary file not shown.
|
|
@ -72,9 +72,26 @@ class EndCallToolDefinition(BaseModel):
|
|||
config: EndCallConfig = Field(description="End Call configuration")
|
||||
|
||||
|
||||
class TransferCallConfig(BaseModel):
|
||||
"""Configuration for Transfer Call tools."""
|
||||
|
||||
transfer_number: str = Field(description="Number to transfer the call to")
|
||||
transfer_message: Optional[str] = Field(
|
||||
default=None, description="Message to play before transferring the call"
|
||||
)
|
||||
|
||||
|
||||
class TransferCallToolDefinition(BaseModel):
|
||||
"""Tool definition for Transfer Call tools."""
|
||||
|
||||
schema_version: int = Field(default=1, description="Schema version")
|
||||
type: Literal["transfer_call"] = Field(description="Tool type")
|
||||
config: TransferCallConfig = Field(description="Transfer Call configuration")
|
||||
|
||||
|
||||
# Union type for tool definitions - Pydantic will discriminate based on 'type' field
|
||||
ToolDefinition = Annotated[
|
||||
Union[HttpApiToolDefinition, EndCallToolDefinition],
|
||||
Union[HttpApiToolDefinition, EndCallToolDefinition, TransferCallToolDefinition],
|
||||
Field(discriminator="type"),
|
||||
]
|
||||
|
||||
|
|
|
|||
|
|
@ -548,6 +548,7 @@ async def _run_pipeline(
|
|||
node_transition_callback=node_transition_callback,
|
||||
embeddings_api_key=embeddings_api_key,
|
||||
embeddings_model=embeddings_model,
|
||||
audio_out_sample_rate=audio_config.transport_out_sample_rate,
|
||||
)
|
||||
|
||||
# Create pipeline components with audio configuration
|
||||
|
|
|
|||
|
|
@ -70,6 +70,7 @@ class PipecatEngine:
|
|||
] = None,
|
||||
embeddings_api_key: Optional[str] = None,
|
||||
embeddings_model: Optional[str] = None,
|
||||
audio_out_sample_rate: int = 16000,
|
||||
):
|
||||
self.task = task
|
||||
self.llm = llm
|
||||
|
|
@ -111,6 +112,9 @@ class PipecatEngine:
|
|||
self._embeddings_api_key: Optional[str] = embeddings_api_key
|
||||
self._embeddings_model: Optional[str] = embeddings_model
|
||||
|
||||
# Output audio sample rate for playback (8000 or 16000)
|
||||
self._audio_out_sample_rate: int = audio_out_sample_rate
|
||||
|
||||
async def _get_organization_id(self) -> Optional[int]:
|
||||
"""Get and cache the organization ID from workflow run."""
|
||||
if self._custom_tool_manager:
|
||||
|
|
@ -697,10 +701,14 @@ class PipecatEngine:
|
|||
connection: The StasisRTPConnection instance, or None for non-Stasis transports
|
||||
"""
|
||||
self._stasis_connection = connection
|
||||
if connection:
|
||||
logger.debug(
|
||||
f"Stasis connection set for immediate transfers: {connection.channel_id}"
|
||||
)
|
||||
|
||||
def mute_pipeline(self) -> None:
|
||||
"""Mute the pipeline to prevent further LLM generations.
|
||||
|
||||
Call this before playing final messages (like transfer announcements)
|
||||
to ensure the pipeline doesn't process any more user input.
|
||||
"""
|
||||
self._mute_pipeline = True
|
||||
|
||||
async def handle_llm_text_frame(self, text: str):
|
||||
"""Accumulate LLM text frames to build reference text."""
|
||||
|
|
|
|||
|
|
@ -6,6 +6,7 @@ during workflow execution.
|
|||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
from typing import TYPE_CHECKING, Any, Optional
|
||||
|
||||
from loguru import logger
|
||||
|
|
@ -24,8 +25,13 @@ from api.services.workflow.transfer_event_protocol import (
|
|||
TransferEventType,
|
||||
wait_for_transfer_signal,
|
||||
)
|
||||
from api.utils.hold_audio import get_hold_audio_duration_ms, load_hold_audio
|
||||
from pipecat.adapters.schemas.function_schema import FunctionSchema
|
||||
from pipecat.frames.frames import FunctionCallResultProperties, TTSSpeakFrame
|
||||
from pipecat.frames.frames import (
|
||||
FunctionCallResultProperties,
|
||||
OutputAudioRawFrame,
|
||||
TTSSpeakFrame,
|
||||
)
|
||||
from pipecat.services.llm_service import FunctionCallParams
|
||||
from pipecat.utils.enums import EndTaskReason
|
||||
|
||||
|
|
@ -249,11 +255,48 @@ class CustomToolManager:
|
|||
Async handler function for the transfer call tool
|
||||
"""
|
||||
|
||||
async def play_hold_music_loop(stop_event: asyncio.Event) -> None:
|
||||
"""Play hold music in a loop until stop_event is set."""
|
||||
sample_rate = self._engine._audio_out_sample_rate
|
||||
try:
|
||||
hold_audio = load_hold_audio(sample_rate)
|
||||
duration_ms = get_hold_audio_duration_ms(sample_rate)
|
||||
duration_secs = duration_ms / 1000.0
|
||||
|
||||
logger.info(
|
||||
f"Starting hold music loop at {sample_rate}Hz, "
|
||||
f"duration={duration_secs:.2f}s per loop"
|
||||
)
|
||||
|
||||
while not stop_event.is_set():
|
||||
# Queue the hold audio frame
|
||||
frame = OutputAudioRawFrame(
|
||||
audio=hold_audio,
|
||||
sample_rate=sample_rate,
|
||||
num_channels=1,
|
||||
)
|
||||
await self._engine.task.queue_frame(frame)
|
||||
|
||||
# Wait for the audio to play or until stopped
|
||||
try:
|
||||
await asyncio.wait_for(stop_event.wait(), timeout=duration_secs)
|
||||
break # Stop event was set
|
||||
except asyncio.TimeoutError:
|
||||
pass # Continue looping
|
||||
|
||||
logger.info("Hold music loop stopped")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error playing hold music: {e}")
|
||||
|
||||
async def transfer_call_handler(
|
||||
function_call_params: FunctionCallParams,
|
||||
) -> None:
|
||||
logger.info(f"Transfer Call Tool EXECUTED: {function_name}")
|
||||
|
||||
stop_hold_music = asyncio.Event()
|
||||
hold_music_task: Optional[asyncio.Task] = None
|
||||
|
||||
try:
|
||||
# Get the transfer call configuration
|
||||
config = tool.definition.get("config", {})
|
||||
|
|
@ -269,6 +312,9 @@ class CustomToolManager:
|
|||
|
||||
logger.info(f"Initiating transfer to: {transfer_number}")
|
||||
|
||||
# Mute pipeline before playing transfer message
|
||||
self._engine.mute_pipeline()
|
||||
|
||||
# Play transfer message if configured
|
||||
if transfer_message:
|
||||
logger.info(f"Playing transfer message: {transfer_message}")
|
||||
|
|
@ -278,6 +324,11 @@ class CustomToolManager:
|
|||
self._engine._gathered_context["transfer_requested"] = True
|
||||
self._engine._gathered_context["transfer_number"] = transfer_number
|
||||
|
||||
# Start playing hold music in the background
|
||||
hold_music_task = asyncio.create_task(
|
||||
play_hold_music_loop(stop_hold_music)
|
||||
)
|
||||
|
||||
# Wait for external signal to proceed with transfer (30s timeout)
|
||||
workflow_run_id = self._engine._workflow_run_id
|
||||
logger.info(
|
||||
|
|
@ -286,9 +337,12 @@ class CustomToolManager:
|
|||
|
||||
transfer_event = await wait_for_transfer_signal(
|
||||
workflow_run_id=workflow_run_id,
|
||||
timeout_seconds=30.0,
|
||||
timeout_seconds=8.0,
|
||||
)
|
||||
|
||||
# Stop hold music
|
||||
stop_hold_music.set()
|
||||
|
||||
if transfer_event is None:
|
||||
# Timeout - transfer failed
|
||||
logger.warning("Transfer signal timed out")
|
||||
|
|
@ -329,8 +383,16 @@ class CustomToolManager:
|
|||
f"Transfer call tool '{function_name}' execution failed: {e}"
|
||||
)
|
||||
await function_call_params.result_callback(
|
||||
{"status": "error", "error": str(e)},
|
||||
properties=properties,
|
||||
{"status": "error", "error": str(e)}
|
||||
)
|
||||
finally:
|
||||
# Ensure hold music is stopped
|
||||
stop_hold_music.set()
|
||||
if hold_music_task and not hold_music_task.done():
|
||||
hold_music_task.cancel()
|
||||
try:
|
||||
await hold_music_task
|
||||
except asyncio.CancelledError:
|
||||
pass
|
||||
|
||||
return transfer_call_handler
|
||||
|
|
|
|||
|
|
@ -102,6 +102,7 @@ async def run_pipeline_with_tool_calls(
|
|||
workflow=workflow,
|
||||
call_context_vars={"customer_name": "Test User"},
|
||||
workflow_run_id=1,
|
||||
audio_out_sample_rate=16000,
|
||||
)
|
||||
|
||||
# Create the pipeline with the mock LLM and TTS
|
||||
|
|
@ -371,6 +372,8 @@ class TestPipecatEngineToolCalls:
|
|||
|
||||
# Callback to send transfer signal while handler is waiting
|
||||
async def send_signal(engine: PipecatEngine):
|
||||
# Wait a bit to allow hold music to play
|
||||
await asyncio.sleep(0.5)
|
||||
# Send the transfer signal to unblock the waiting handler
|
||||
await send_transfer_signal(
|
||||
workflow_run_id=engine._workflow_run_id,
|
||||
|
|
|
|||
80
api/utils/hold_audio.py
Normal file
80
api/utils/hold_audio.py
Normal file
|
|
@ -0,0 +1,80 @@
|
|||
"""Utility for loading and playing hold audio files."""
|
||||
|
||||
from typing import Dict
|
||||
|
||||
import soundfile as sf
|
||||
from loguru import logger
|
||||
|
||||
from api.constants import APP_ROOT_DIR
|
||||
|
||||
# Cache for loaded audio data
|
||||
_audio_cache: Dict[str, bytes] = {}
|
||||
|
||||
|
||||
def load_hold_audio(sample_rate: int) -> bytes:
|
||||
"""Load hold audio file as raw PCM bytes for the given sample rate.
|
||||
|
||||
Args:
|
||||
sample_rate: The sample rate to load (8000 or 16000)
|
||||
|
||||
Returns:
|
||||
Raw PCM audio bytes (16-bit signed, mono)
|
||||
|
||||
Raises:
|
||||
FileNotFoundError: If the audio file doesn't exist
|
||||
ValueError: If sample rate is not supported
|
||||
"""
|
||||
if sample_rate not in (8000, 16000):
|
||||
raise ValueError(
|
||||
f"Unsupported sample rate: {sample_rate}. Must be 8000 or 16000"
|
||||
)
|
||||
|
||||
cache_key = f"hold_ring_{sample_rate}"
|
||||
|
||||
if cache_key in _audio_cache:
|
||||
return _audio_cache[cache_key]
|
||||
|
||||
# Construct path to the audio file
|
||||
assets_dir = APP_ROOT_DIR / "assets"
|
||||
audio_file = assets_dir / f"transfer_hold_ring_{sample_rate}.wav"
|
||||
|
||||
if not audio_file.exists():
|
||||
raise FileNotFoundError(f"Hold audio file not found: {audio_file}")
|
||||
|
||||
# Load the audio file
|
||||
audio_data, file_sample_rate = sf.read(str(audio_file), dtype="int16")
|
||||
|
||||
if file_sample_rate != sample_rate:
|
||||
logger.warning(
|
||||
f"Audio file sample rate ({file_sample_rate}) doesn't match "
|
||||
f"requested rate ({sample_rate})"
|
||||
)
|
||||
|
||||
# Convert to bytes
|
||||
audio_bytes = audio_data.tobytes()
|
||||
|
||||
# Cache for future use
|
||||
_audio_cache[cache_key] = audio_bytes
|
||||
|
||||
logger.debug(
|
||||
f"Loaded hold audio: {audio_file.name}, "
|
||||
f"duration={len(audio_data) / sample_rate:.2f}s"
|
||||
)
|
||||
|
||||
return audio_bytes
|
||||
|
||||
|
||||
def get_hold_audio_duration_ms(sample_rate: int) -> int:
|
||||
"""Get the duration of the hold audio in milliseconds.
|
||||
|
||||
Args:
|
||||
sample_rate: The sample rate (8000 or 16000)
|
||||
|
||||
Returns:
|
||||
Duration in milliseconds
|
||||
"""
|
||||
audio_bytes = load_hold_audio(sample_rate)
|
||||
# 2 bytes per sample (16-bit PCM)
|
||||
num_samples = len(audio_bytes) // 2
|
||||
duration_ms = int((num_samples / sample_rate) * 1000)
|
||||
return duration_ms
|
||||
2
pipecat
2
pipecat
|
|
@ -1 +1 @@
|
|||
Subproject commit 866bf1c5685e7fadf2af012d8769ebbc35297db0
|
||||
Subproject commit e618bb98dfde6224ef9f4e15769580790719b269
|
||||
File diff suppressed because one or more lines are too long
|
|
@ -258,7 +258,9 @@ export type CreateToolRequest = {
|
|||
type?: 'http_api';
|
||||
} & HttpApiToolDefinition) | ({
|
||||
type?: 'end_call';
|
||||
} & EndCallToolDefinition);
|
||||
} & EndCallToolDefinition) | ({
|
||||
type?: 'transfer_call';
|
||||
} & TransferCallToolDefinition);
|
||||
};
|
||||
|
||||
export type CreateWorkflowRequest = {
|
||||
|
|
@ -705,10 +707,6 @@ export type ProcessDocumentRequestSchema = {
|
|||
* S3 key of the uploaded file
|
||||
*/
|
||||
s3_key: string;
|
||||
/**
|
||||
* Embedding service to use for processing. Options: 'openai' (default, 1536-dim, requires API key) or 'sentence_transformer' (free, 384-dim)
|
||||
*/
|
||||
embedding_service?: 'sentence_transformer' | 'openai';
|
||||
};
|
||||
|
||||
export type RetryConfigRequest = {
|
||||
|
|
@ -860,6 +858,46 @@ export type ToolResponse = {
|
|||
created_by?: CreatedByResponse | null;
|
||||
};
|
||||
|
||||
/**
|
||||
* Configuration for Transfer Call tools.
|
||||
*/
|
||||
export type TransferCallConfig = {
|
||||
/**
|
||||
* Number to transfer the call to
|
||||
*/
|
||||
transfer_number: string;
|
||||
/**
|
||||
* Message to play before transferring the call
|
||||
*/
|
||||
transfer_message?: string | null;
|
||||
};
|
||||
|
||||
/**
|
||||
* Tool definition for Transfer Call tools.
|
||||
*/
|
||||
export type TransferCallToolDefinition = {
|
||||
/**
|
||||
* Schema version
|
||||
*/
|
||||
schema_version?: number;
|
||||
/**
|
||||
* Tool type
|
||||
*/
|
||||
type: 'transfer_call';
|
||||
/**
|
||||
* Transfer Call configuration
|
||||
*/
|
||||
config: TransferCallConfig;
|
||||
};
|
||||
|
||||
/**
|
||||
* Request to send a transfer signal.
|
||||
*/
|
||||
export type TransferSignalRequest = {
|
||||
action?: string;
|
||||
message?: string | null;
|
||||
};
|
||||
|
||||
/**
|
||||
* Request model for triggering a call via API
|
||||
*/
|
||||
|
|
@ -948,7 +986,9 @@ export type UpdateToolRequest = {
|
|||
type?: 'http_api';
|
||||
} & HttpApiToolDefinition) | ({
|
||||
type?: 'end_call';
|
||||
} & EndCallToolDefinition)) | null;
|
||||
} & EndCallToolDefinition) | ({
|
||||
type?: 'transfer_call';
|
||||
} & TransferCallToolDefinition)) | null;
|
||||
status?: string | null;
|
||||
};
|
||||
|
||||
|
|
@ -1530,6 +1570,35 @@ export type HandleCloudonixCdrApiV1TelephonyCloudonixCdrPostResponses = {
|
|||
200: unknown;
|
||||
};
|
||||
|
||||
export type SendTransferSignalEndpointApiV1TelephonyTransferSignalWorkflowRunIdPostData = {
|
||||
body: TransferSignalRequest;
|
||||
path: {
|
||||
workflow_run_id: number;
|
||||
};
|
||||
query?: never;
|
||||
url: '/api/v1/telephony/transfer-signal/{workflow_run_id}';
|
||||
};
|
||||
|
||||
export type SendTransferSignalEndpointApiV1TelephonyTransferSignalWorkflowRunIdPostErrors = {
|
||||
/**
|
||||
* Not found
|
||||
*/
|
||||
404: unknown;
|
||||
/**
|
||||
* Validation Error
|
||||
*/
|
||||
422: HttpValidationError;
|
||||
};
|
||||
|
||||
export type SendTransferSignalEndpointApiV1TelephonyTransferSignalWorkflowRunIdPostError = SendTransferSignalEndpointApiV1TelephonyTransferSignalWorkflowRunIdPostErrors[keyof SendTransferSignalEndpointApiV1TelephonyTransferSignalWorkflowRunIdPostErrors];
|
||||
|
||||
export type SendTransferSignalEndpointApiV1TelephonyTransferSignalWorkflowRunIdPostResponses = {
|
||||
/**
|
||||
* Successful Response
|
||||
*/
|
||||
200: unknown;
|
||||
};
|
||||
|
||||
export type ImpersonateApiV1SuperuserImpersonatePostData = {
|
||||
body: ImpersonateRequest;
|
||||
headers?: {
|
||||
|
|
@ -4354,6 +4423,66 @@ export type OptionsConfigApiV1PublicEmbedConfigTokenOptionsResponses = {
|
|||
200: unknown;
|
||||
};
|
||||
|
||||
export type GetPublicTurnCredentialsApiV1PublicEmbedTurnCredentialsSessionTokenGetData = {
|
||||
body?: never;
|
||||
path: {
|
||||
session_token: string;
|
||||
};
|
||||
query?: never;
|
||||
url: '/api/v1/public/embed/turn-credentials/{session_token}';
|
||||
};
|
||||
|
||||
export type GetPublicTurnCredentialsApiV1PublicEmbedTurnCredentialsSessionTokenGetErrors = {
|
||||
/**
|
||||
* Not found
|
||||
*/
|
||||
404: unknown;
|
||||
/**
|
||||
* Validation Error
|
||||
*/
|
||||
422: HttpValidationError;
|
||||
};
|
||||
|
||||
export type GetPublicTurnCredentialsApiV1PublicEmbedTurnCredentialsSessionTokenGetError = GetPublicTurnCredentialsApiV1PublicEmbedTurnCredentialsSessionTokenGetErrors[keyof GetPublicTurnCredentialsApiV1PublicEmbedTurnCredentialsSessionTokenGetErrors];
|
||||
|
||||
export type GetPublicTurnCredentialsApiV1PublicEmbedTurnCredentialsSessionTokenGetResponses = {
|
||||
/**
|
||||
* Successful Response
|
||||
*/
|
||||
200: TurnCredentialsResponse;
|
||||
};
|
||||
|
||||
export type GetPublicTurnCredentialsApiV1PublicEmbedTurnCredentialsSessionTokenGetResponse = GetPublicTurnCredentialsApiV1PublicEmbedTurnCredentialsSessionTokenGetResponses[keyof GetPublicTurnCredentialsApiV1PublicEmbedTurnCredentialsSessionTokenGetResponses];
|
||||
|
||||
export type OptionsTurnCredentialsApiV1PublicEmbedTurnCredentialsSessionTokenOptionsData = {
|
||||
body?: never;
|
||||
path: {
|
||||
session_token: string;
|
||||
};
|
||||
query?: never;
|
||||
url: '/api/v1/public/embed/turn-credentials/{session_token}';
|
||||
};
|
||||
|
||||
export type OptionsTurnCredentialsApiV1PublicEmbedTurnCredentialsSessionTokenOptionsErrors = {
|
||||
/**
|
||||
* Not found
|
||||
*/
|
||||
404: unknown;
|
||||
/**
|
||||
* Validation Error
|
||||
*/
|
||||
422: HttpValidationError;
|
||||
};
|
||||
|
||||
export type OptionsTurnCredentialsApiV1PublicEmbedTurnCredentialsSessionTokenOptionsError = OptionsTurnCredentialsApiV1PublicEmbedTurnCredentialsSessionTokenOptionsErrors[keyof OptionsTurnCredentialsApiV1PublicEmbedTurnCredentialsSessionTokenOptionsErrors];
|
||||
|
||||
export type OptionsTurnCredentialsApiV1PublicEmbedTurnCredentialsSessionTokenOptionsResponses = {
|
||||
/**
|
||||
* Successful Response
|
||||
*/
|
||||
200: unknown;
|
||||
};
|
||||
|
||||
export type InitiateCallApiV1PublicAgentUuidPostData = {
|
||||
body: TriggerCallRequest;
|
||||
headers: {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue