feat: add vonage telephony (#35)

* refactor: telephony integration

* feat: add vonage telephony
This commit is contained in:
Sabiha Khan 2025-10-27 15:29:57 +05:30 committed by Sabiha Khan
parent 6503d806c5
commit 4cfdc3d420
39 changed files with 3382 additions and 335 deletions

View file

@ -80,7 +80,7 @@ def create_audio_config(transport_type: str) -> AudioConfig:
"""Create audio configuration based on transport type.
Args:
transport_type: Type of transport ("webrtc", "twilio", "stasis")
transport_type: Type of transport ("webrtc", "twilio", "vonage", "stasis")
Returns:
AudioConfig instance with appropriate settings
@ -93,6 +93,15 @@ def create_audio_config(transport_type: str) -> AudioConfig:
pipeline_sample_rate=8000, # Keep at 8kHz to avoid resampling
buffer_size_seconds=1.0,
)
elif transport_type == WorkflowRunMode.VONAGE.value:
# Vonage uses 16kHz Linear PCM
return AudioConfig(
transport_in_sample_rate=16000,
transport_out_sample_rate=16000,
vad_sample_rate=16000, # Use matching VAD rate
pipeline_sample_rate=16000, # Keep at 16kHz to avoid resampling
buffer_size_seconds=1.0,
)
elif transport_type in [
WorkflowRunMode.WEBRTC.value,
WorkflowRunMode.SMALLWEBRTC.value,

View file

@ -4,6 +4,7 @@ from fastapi import HTTPException, WebSocket
from loguru import logger
from api.db import db_client
from api.db.models import WorkflowModel
from api.enums import WorkflowRunMode
from api.services.pipecat.audio_config import AudioConfig, create_audio_config
from api.services.pipecat.engine_pre_aggregator_processor import (
@ -33,6 +34,7 @@ from api.services.pipecat.tracing_config import setup_pipeline_tracing
from api.services.pipecat.transport_setup import (
create_stasis_transport,
create_twilio_transport,
create_vonage_transport,
create_webrtc_transport,
)
from api.services.telephony.stasis_rtp_connection import StasisRTPConnection
@ -70,7 +72,7 @@ async def run_pipeline_twilio(
set_current_run_id(workflow_run_id)
# Store Twilio call SID in cost_info for later cost calculation
cost_info = {"twilio_call_sid": call_sid}
cost_info = {"twilio_call_sid": call_sid, "provider": "twilio"}
await db_client.update_workflow_run(workflow_run_id, cost_info=cost_info)
# Get workflow to extract all pipeline configurations
@ -107,6 +109,69 @@ async def run_pipeline_twilio(
)
async def run_pipeline_vonage(
websocket_client,
call_uuid: str,
workflow: WorkflowModel,
organization_id: int,
workflow_id: int,
workflow_run_id: int,
user_id: int,
):
"""Run pipeline for Vonage WebSocket connections.
Vonage uses raw PCM audio over WebSocket instead of base64-encoded μ-law.
The audio is transmitted as binary frames at 16kHz by default.
"""
logger.info(f"Starting Vonage pipeline for workflow run {workflow_run_id}")
set_current_run_id(workflow_run_id)
# Store Vonage call UUID in cost_info for later cost calculation
cost_info = {"vonage_call_uuid": call_uuid, "provider": "vonage"}
await db_client.update_workflow_run(workflow_run_id, cost_info=cost_info)
# Extract VAD and ambient noise config from workflow
vad_config = None
ambient_noise_config = None
if workflow and workflow.workflow_configurations:
if "vad_configuration" in workflow.workflow_configurations:
vad_config = workflow.workflow_configurations["vad_configuration"]
if "ambient_noise_configuration" in workflow.workflow_configurations:
ambient_noise_config = workflow.workflow_configurations["ambient_noise_configuration"]
try:
# Setup audio config for Vonage using the centralized config
audio_config = create_audio_config(WorkflowRunMode.VONAGE.value)
# Create Vonage transport
transport = await create_vonage_transport(
websocket_client,
call_uuid,
workflow_run_id,
audio_config,
organization_id,
vad_config,
ambient_noise_config,
)
# No special handshake needed for Vonage
# Audio streaming starts immediately
# Run the pipeline (same as Twilio/WebRTC)
await _run_pipeline(
transport,
workflow_id,
workflow_run_id,
user_id,
call_context_vars={},
audio_config=audio_config,
)
except Exception as e:
logger.error(f"Error in Vonage pipeline: {e}")
raise
async def run_pipeline_smallwebrtc(
webrtc_connection: SmallWebRTCConnection,
workflow_id: int,

View file

@ -22,6 +22,7 @@ from pipecat.audio.mixers.soundfile_mixer import SoundfileMixer
from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams
from pipecat.audio.vad.silero import SileroVADAnalyzer, VADParams
from pipecat.serializers.twilio import TwilioFrameSerializer
from pipecat.serializers.vonage import VonageFrameSerializer
from pipecat.transports.base_transport import TransportParams
from pipecat.transports.smallwebrtc.connection import SmallWebRTCConnection
from pipecat.transports.smallwebrtc.transport import SmallWebRTCTransport
@ -85,7 +86,7 @@ async def create_twilio_transport(
# Fetch Twilio credentials from organization config
config = await db_client.get_configuration(
organization_id, OrganizationConfigurationKey.TWILIO_CONFIGURATION.value
organization_id, OrganizationConfigurationKey.TELEPHONY_CONFIGURATION.value
)
if not config or not config.value:
@ -151,6 +152,86 @@ async def create_twilio_transport(
)
async def create_vonage_transport(
websocket_client,
call_uuid: str,
workflow_run_id: int,
audio_config: AudioConfig,
organization_id: int,
vad_config: dict | None = None,
ambient_noise_config: dict | None = None,
):
"""Create a transport for Vonage connections"""
# Use the factory to load config from database
from api.services.telephony.factory import load_telephony_config
config = await load_telephony_config(organization_id)
if config.get("provider") != "vonage":
raise ValueError(f"Expected Vonage provider, got {config.get('provider')}")
application_id = config.get("application_id")
private_key = config.get("private_key")
if not application_id or not private_key:
raise ValueError(
f"Incomplete Vonage configuration for organization {organization_id}"
)
turn_analyzer = create_turn_analyzer(workflow_run_id, audio_config)
serializer = VonageFrameSerializer(
call_uuid=call_uuid,
application_id=application_id,
private_key=private_key,
params=VonageFrameSerializer.InputParams(
vonage_sample_rate=audio_config.transport_in_sample_rate,
sample_rate=audio_config.pipeline_sample_rate
)
)
# Important: Vonage uses binary WebSocket mode, not text
return FastAPIWebsocketTransport(
websocket=websocket_client,
params=FastAPIWebsocketParams(
audio_in_enabled=True,
audio_out_enabled=True,
audio_in_sample_rate=audio_config.transport_in_sample_rate,
audio_out_sample_rate=audio_config.transport_out_sample_rate,
vad_analyzer=(
SileroVADAnalyzer(
params=VADParams(
confidence=vad_config.get("confidence", 0.7),
start_secs=vad_config.get("start_seconds", 0.4),
stop_secs=vad_config.get("stop_seconds", 0.8),
min_volume=vad_config.get("minimum_volume", 0.6),
)
)
if vad_config
else SileroVADAnalyzer()
),
audio_out_mixer=(
SoundfileMixer(
sound_files={
"office": APP_ROOT_DIR
/ "assets"
/ f"office-ambience-{audio_config.transport_out_sample_rate}-mono.wav"
},
default_sound="office",
volume=ambient_noise_config.get("volume", 0.3),
)
if ambient_noise_config and ambient_noise_config.get("enabled", False)
else SilenceAudioMixer()
),
turn_analyzer=turn_analyzer,
serializer=serializer,
audio_in_filter=RNNoiseFilter(library_path=librnnoise_path)
if ENABLE_RNNOISE
else None,
),
)
def create_webrtc_transport(
webrtc_connection: SmallWebRTCConnection,
workflow_run_id: int,