mirror of
https://github.com/dograh-hq/dograh.git
synced 2026-07-01 08:59:46 +02:00
feat: add vonage telephony (#35)
* refactor: telephony integration * feat: add vonage telephony
This commit is contained in:
parent
6503d806c5
commit
4cfdc3d420
39 changed files with 3382 additions and 335 deletions
|
|
@ -80,7 +80,7 @@ def create_audio_config(transport_type: str) -> AudioConfig:
|
|||
"""Create audio configuration based on transport type.
|
||||
|
||||
Args:
|
||||
transport_type: Type of transport ("webrtc", "twilio", "stasis")
|
||||
transport_type: Type of transport ("webrtc", "twilio", "vonage", "stasis")
|
||||
|
||||
Returns:
|
||||
AudioConfig instance with appropriate settings
|
||||
|
|
@ -93,6 +93,15 @@ def create_audio_config(transport_type: str) -> AudioConfig:
|
|||
pipeline_sample_rate=8000, # Keep at 8kHz to avoid resampling
|
||||
buffer_size_seconds=1.0,
|
||||
)
|
||||
elif transport_type == WorkflowRunMode.VONAGE.value:
|
||||
# Vonage uses 16kHz Linear PCM
|
||||
return AudioConfig(
|
||||
transport_in_sample_rate=16000,
|
||||
transport_out_sample_rate=16000,
|
||||
vad_sample_rate=16000, # Use matching VAD rate
|
||||
pipeline_sample_rate=16000, # Keep at 16kHz to avoid resampling
|
||||
buffer_size_seconds=1.0,
|
||||
)
|
||||
elif transport_type in [
|
||||
WorkflowRunMode.WEBRTC.value,
|
||||
WorkflowRunMode.SMALLWEBRTC.value,
|
||||
|
|
|
|||
|
|
@ -4,6 +4,7 @@ from fastapi import HTTPException, WebSocket
|
|||
from loguru import logger
|
||||
|
||||
from api.db import db_client
|
||||
from api.db.models import WorkflowModel
|
||||
from api.enums import WorkflowRunMode
|
||||
from api.services.pipecat.audio_config import AudioConfig, create_audio_config
|
||||
from api.services.pipecat.engine_pre_aggregator_processor import (
|
||||
|
|
@ -33,6 +34,7 @@ from api.services.pipecat.tracing_config import setup_pipeline_tracing
|
|||
from api.services.pipecat.transport_setup import (
|
||||
create_stasis_transport,
|
||||
create_twilio_transport,
|
||||
create_vonage_transport,
|
||||
create_webrtc_transport,
|
||||
)
|
||||
from api.services.telephony.stasis_rtp_connection import StasisRTPConnection
|
||||
|
|
@ -70,7 +72,7 @@ async def run_pipeline_twilio(
|
|||
set_current_run_id(workflow_run_id)
|
||||
|
||||
# Store Twilio call SID in cost_info for later cost calculation
|
||||
cost_info = {"twilio_call_sid": call_sid}
|
||||
cost_info = {"twilio_call_sid": call_sid, "provider": "twilio"}
|
||||
await db_client.update_workflow_run(workflow_run_id, cost_info=cost_info)
|
||||
|
||||
# Get workflow to extract all pipeline configurations
|
||||
|
|
@ -107,6 +109,69 @@ async def run_pipeline_twilio(
|
|||
)
|
||||
|
||||
|
||||
async def run_pipeline_vonage(
|
||||
websocket_client,
|
||||
call_uuid: str,
|
||||
workflow: WorkflowModel,
|
||||
organization_id: int,
|
||||
workflow_id: int,
|
||||
workflow_run_id: int,
|
||||
user_id: int,
|
||||
):
|
||||
"""Run pipeline for Vonage WebSocket connections.
|
||||
|
||||
Vonage uses raw PCM audio over WebSocket instead of base64-encoded μ-law.
|
||||
The audio is transmitted as binary frames at 16kHz by default.
|
||||
"""
|
||||
logger.info(f"Starting Vonage pipeline for workflow run {workflow_run_id}")
|
||||
set_current_run_id(workflow_run_id)
|
||||
|
||||
# Store Vonage call UUID in cost_info for later cost calculation
|
||||
cost_info = {"vonage_call_uuid": call_uuid, "provider": "vonage"}
|
||||
await db_client.update_workflow_run(workflow_run_id, cost_info=cost_info)
|
||||
|
||||
# Extract VAD and ambient noise config from workflow
|
||||
vad_config = None
|
||||
ambient_noise_config = None
|
||||
if workflow and workflow.workflow_configurations:
|
||||
if "vad_configuration" in workflow.workflow_configurations:
|
||||
vad_config = workflow.workflow_configurations["vad_configuration"]
|
||||
if "ambient_noise_configuration" in workflow.workflow_configurations:
|
||||
ambient_noise_config = workflow.workflow_configurations["ambient_noise_configuration"]
|
||||
|
||||
try:
|
||||
# Setup audio config for Vonage using the centralized config
|
||||
audio_config = create_audio_config(WorkflowRunMode.VONAGE.value)
|
||||
|
||||
# Create Vonage transport
|
||||
transport = await create_vonage_transport(
|
||||
websocket_client,
|
||||
call_uuid,
|
||||
workflow_run_id,
|
||||
audio_config,
|
||||
organization_id,
|
||||
vad_config,
|
||||
ambient_noise_config,
|
||||
)
|
||||
|
||||
# No special handshake needed for Vonage
|
||||
# Audio streaming starts immediately
|
||||
|
||||
# Run the pipeline (same as Twilio/WebRTC)
|
||||
await _run_pipeline(
|
||||
transport,
|
||||
workflow_id,
|
||||
workflow_run_id,
|
||||
user_id,
|
||||
call_context_vars={},
|
||||
audio_config=audio_config,
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error in Vonage pipeline: {e}")
|
||||
raise
|
||||
|
||||
|
||||
async def run_pipeline_smallwebrtc(
|
||||
webrtc_connection: SmallWebRTCConnection,
|
||||
workflow_id: int,
|
||||
|
|
|
|||
|
|
@ -22,6 +22,7 @@ from pipecat.audio.mixers.soundfile_mixer import SoundfileMixer
|
|||
from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer, VADParams
|
||||
from pipecat.serializers.twilio import TwilioFrameSerializer
|
||||
from pipecat.serializers.vonage import VonageFrameSerializer
|
||||
from pipecat.transports.base_transport import TransportParams
|
||||
from pipecat.transports.smallwebrtc.connection import SmallWebRTCConnection
|
||||
from pipecat.transports.smallwebrtc.transport import SmallWebRTCTransport
|
||||
|
|
@ -85,7 +86,7 @@ async def create_twilio_transport(
|
|||
|
||||
# Fetch Twilio credentials from organization config
|
||||
config = await db_client.get_configuration(
|
||||
organization_id, OrganizationConfigurationKey.TWILIO_CONFIGURATION.value
|
||||
organization_id, OrganizationConfigurationKey.TELEPHONY_CONFIGURATION.value
|
||||
)
|
||||
|
||||
if not config or not config.value:
|
||||
|
|
@ -151,6 +152,86 @@ async def create_twilio_transport(
|
|||
)
|
||||
|
||||
|
||||
async def create_vonage_transport(
|
||||
websocket_client,
|
||||
call_uuid: str,
|
||||
workflow_run_id: int,
|
||||
audio_config: AudioConfig,
|
||||
organization_id: int,
|
||||
vad_config: dict | None = None,
|
||||
ambient_noise_config: dict | None = None,
|
||||
):
|
||||
"""Create a transport for Vonage connections"""
|
||||
|
||||
# Use the factory to load config from database
|
||||
from api.services.telephony.factory import load_telephony_config
|
||||
config = await load_telephony_config(organization_id)
|
||||
|
||||
if config.get("provider") != "vonage":
|
||||
raise ValueError(f"Expected Vonage provider, got {config.get('provider')}")
|
||||
|
||||
application_id = config.get("application_id")
|
||||
private_key = config.get("private_key")
|
||||
|
||||
if not application_id or not private_key:
|
||||
raise ValueError(
|
||||
f"Incomplete Vonage configuration for organization {organization_id}"
|
||||
)
|
||||
|
||||
turn_analyzer = create_turn_analyzer(workflow_run_id, audio_config)
|
||||
|
||||
serializer = VonageFrameSerializer(
|
||||
call_uuid=call_uuid,
|
||||
application_id=application_id,
|
||||
private_key=private_key,
|
||||
params=VonageFrameSerializer.InputParams(
|
||||
vonage_sample_rate=audio_config.transport_in_sample_rate,
|
||||
sample_rate=audio_config.pipeline_sample_rate
|
||||
)
|
||||
)
|
||||
|
||||
# Important: Vonage uses binary WebSocket mode, not text
|
||||
return FastAPIWebsocketTransport(
|
||||
websocket=websocket_client,
|
||||
params=FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
audio_in_sample_rate=audio_config.transport_in_sample_rate,
|
||||
audio_out_sample_rate=audio_config.transport_out_sample_rate,
|
||||
vad_analyzer=(
|
||||
SileroVADAnalyzer(
|
||||
params=VADParams(
|
||||
confidence=vad_config.get("confidence", 0.7),
|
||||
start_secs=vad_config.get("start_seconds", 0.4),
|
||||
stop_secs=vad_config.get("stop_seconds", 0.8),
|
||||
min_volume=vad_config.get("minimum_volume", 0.6),
|
||||
)
|
||||
)
|
||||
if vad_config
|
||||
else SileroVADAnalyzer()
|
||||
),
|
||||
audio_out_mixer=(
|
||||
SoundfileMixer(
|
||||
sound_files={
|
||||
"office": APP_ROOT_DIR
|
||||
/ "assets"
|
||||
/ f"office-ambience-{audio_config.transport_out_sample_rate}-mono.wav"
|
||||
},
|
||||
default_sound="office",
|
||||
volume=ambient_noise_config.get("volume", 0.3),
|
||||
)
|
||||
if ambient_noise_config and ambient_noise_config.get("enabled", False)
|
||||
else SilenceAudioMixer()
|
||||
),
|
||||
turn_analyzer=turn_analyzer,
|
||||
serializer=serializer,
|
||||
audio_in_filter=RNNoiseFilter(library_path=librnnoise_path)
|
||||
if ENABLE_RNNOISE
|
||||
else None,
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
def create_webrtc_transport(
|
||||
webrtc_connection: SmallWebRTCConnection,
|
||||
workflow_run_id: int,
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue