feat: add vonage telephony (#35)

* refactor: telephony integration * feat: add vonage telephony
2026-07-01 08:59:46 +02:00 · 2025-10-27 15:29:57 +05:30 · 2025-10-27 15:29:57 +05:30 · 4cfdc3d420
commit 4cfdc3d420
parent 6503d806c5
39 changed files with 3382 additions and 335 deletions
--- a/api/services/pipecat/audio_config.py
+++ b/api/services/pipecat/audio_config.py
@ -80,7 +80,7 @@ def create_audio_config(transport_type: str) -> AudioConfig:
    """Create audio configuration based on transport type.

    Args:
-        transport_type: Type of transport ("webrtc", "twilio", "stasis")
+        transport_type: Type of transport ("webrtc", "twilio", "vonage", "stasis")

    Returns:
        AudioConfig instance with appropriate settings
@ -93,6 +93,15 @@ def create_audio_config(transport_type: str) -> AudioConfig:
            pipeline_sample_rate=8000,  # Keep at 8kHz to avoid resampling
            buffer_size_seconds=1.0,
        )
+    elif transport_type == WorkflowRunMode.VONAGE.value:
+        # Vonage uses 16kHz Linear PCM
+        return AudioConfig(
+            transport_in_sample_rate=16000,
+            transport_out_sample_rate=16000,
+            vad_sample_rate=16000,  # Use matching VAD rate
+            pipeline_sample_rate=16000,  # Keep at 16kHz to avoid resampling
+            buffer_size_seconds=1.0,
+        )
    elif transport_type in [
        WorkflowRunMode.WEBRTC.value,
        WorkflowRunMode.SMALLWEBRTC.value,
--- a/api/services/pipecat/run_pipeline.py
+++ b/api/services/pipecat/run_pipeline.py
@ -4,6 +4,7 @@ from fastapi import HTTPException, WebSocket
 from loguru import logger

 from api.db import db_client
+from api.db.models import WorkflowModel
 from api.enums import WorkflowRunMode
 from api.services.pipecat.audio_config import AudioConfig, create_audio_config
 from api.services.pipecat.engine_pre_aggregator_processor import (
@ -33,6 +34,7 @@ from api.services.pipecat.tracing_config import setup_pipeline_tracing
 from api.services.pipecat.transport_setup import (
    create_stasis_transport,
    create_twilio_transport,
+    create_vonage_transport,
    create_webrtc_transport,
 )
 from api.services.telephony.stasis_rtp_connection import StasisRTPConnection
@ -70,7 +72,7 @@ async def run_pipeline_twilio(
    set_current_run_id(workflow_run_id)

    # Store Twilio call SID in cost_info for later cost calculation
-    cost_info = {"twilio_call_sid": call_sid}
+    cost_info = {"twilio_call_sid": call_sid, "provider": "twilio"}
    await db_client.update_workflow_run(workflow_run_id, cost_info=cost_info)

    # Get workflow to extract all pipeline configurations
@ -107,6 +109,69 @@ async def run_pipeline_twilio(
    )


+async def run_pipeline_vonage(
+    websocket_client,
+    call_uuid: str,
+    workflow: WorkflowModel,
+    organization_id: int,
+    workflow_id: int,
+    workflow_run_id: int,
+    user_id: int,
+):
+    """Run pipeline for Vonage WebSocket connections.
+    
+    Vonage uses raw PCM audio over WebSocket instead of base64-encoded μ-law.
+    The audio is transmitted as binary frames at 16kHz by default.
+    """
+    logger.info(f"Starting Vonage pipeline for workflow run {workflow_run_id}")
+    set_current_run_id(workflow_run_id)
+
+    # Store Vonage call UUID in cost_info for later cost calculation
+    cost_info = {"vonage_call_uuid": call_uuid, "provider": "vonage"}
+    await db_client.update_workflow_run(workflow_run_id, cost_info=cost_info)
+
+    # Extract VAD and ambient noise config from workflow
+    vad_config = None
+    ambient_noise_config = None
+    if workflow and workflow.workflow_configurations:
+        if "vad_configuration" in workflow.workflow_configurations:
+            vad_config = workflow.workflow_configurations["vad_configuration"]
+        if "ambient_noise_configuration" in workflow.workflow_configurations:
+            ambient_noise_config = workflow.workflow_configurations["ambient_noise_configuration"]
+
+    try:
+        # Setup audio config for Vonage using the centralized config
+        audio_config = create_audio_config(WorkflowRunMode.VONAGE.value)
+
+        # Create Vonage transport
+        transport = await create_vonage_transport(
+            websocket_client,
+            call_uuid,
+            workflow_run_id,
+            audio_config,
+            organization_id,
+            vad_config,
+            ambient_noise_config,
+        )
+
+        # No special handshake needed for Vonage
+        # Audio streaming starts immediately
+
+        # Run the pipeline (same as Twilio/WebRTC)
+        await _run_pipeline(
+            transport,
+            workflow_id,
+            workflow_run_id,
+            user_id,
+            call_context_vars={},
+            audio_config=audio_config,
+        )
+
+    except Exception as e:
+        logger.error(f"Error in Vonage pipeline: {e}")
+        raise
+
+
 async def run_pipeline_smallwebrtc(
    webrtc_connection: SmallWebRTCConnection,
    workflow_id: int,
--- a/api/services/pipecat/transport_setup.py
+++ b/api/services/pipecat/transport_setup.py
@ -22,6 +22,7 @@ from pipecat.audio.mixers.soundfile_mixer import SoundfileMixer
 from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams
 from pipecat.audio.vad.silero import SileroVADAnalyzer, VADParams
 from pipecat.serializers.twilio import TwilioFrameSerializer
+from pipecat.serializers.vonage import VonageFrameSerializer
 from pipecat.transports.base_transport import TransportParams
 from pipecat.transports.smallwebrtc.connection import SmallWebRTCConnection
 from pipecat.transports.smallwebrtc.transport import SmallWebRTCTransport
@ -85,7 +86,7 @@ async def create_twilio_transport(

    # Fetch Twilio credentials from organization config
    config = await db_client.get_configuration(
-        organization_id, OrganizationConfigurationKey.TWILIO_CONFIGURATION.value
+        organization_id, OrganizationConfigurationKey.TELEPHONY_CONFIGURATION.value
    )

    if not config or not config.value:
@ -151,6 +152,86 @@ async def create_twilio_transport(
    )


+async def create_vonage_transport(
+    websocket_client,
+    call_uuid: str,
+    workflow_run_id: int,
+    audio_config: AudioConfig,
+    organization_id: int,
+    vad_config: dict | None = None,
+    ambient_noise_config: dict | None = None,
+):
+    """Create a transport for Vonage connections"""
+
+    # Use the factory to load config from database
+    from api.services.telephony.factory import load_telephony_config
+    config = await load_telephony_config(organization_id)
+    
+    if config.get("provider") != "vonage":
+        raise ValueError(f"Expected Vonage provider, got {config.get('provider')}")
+
+    application_id = config.get("application_id")
+    private_key = config.get("private_key")
+    
+    if not application_id or not private_key:
+        raise ValueError(
+            f"Incomplete Vonage configuration for organization {organization_id}"
+        )
+
+    turn_analyzer = create_turn_analyzer(workflow_run_id, audio_config)
+
+    serializer = VonageFrameSerializer(
+        call_uuid=call_uuid,
+        application_id=application_id,
+        private_key=private_key,
+        params=VonageFrameSerializer.InputParams(
+            vonage_sample_rate=audio_config.transport_in_sample_rate,
+            sample_rate=audio_config.pipeline_sample_rate
+        )
+    )
+
+    # Important: Vonage uses binary WebSocket mode, not text
+    return FastAPIWebsocketTransport(
+        websocket=websocket_client,
+        params=FastAPIWebsocketParams(
+            audio_in_enabled=True,
+            audio_out_enabled=True,
+            audio_in_sample_rate=audio_config.transport_in_sample_rate,
+            audio_out_sample_rate=audio_config.transport_out_sample_rate,
+            vad_analyzer=(
+                SileroVADAnalyzer(
+                    params=VADParams(
+                        confidence=vad_config.get("confidence", 0.7),
+                        start_secs=vad_config.get("start_seconds", 0.4),
+                        stop_secs=vad_config.get("stop_seconds", 0.8),
+                        min_volume=vad_config.get("minimum_volume", 0.6),
+                    )
+                )
+                if vad_config
+                else SileroVADAnalyzer()
+            ),
+            audio_out_mixer=(
+                SoundfileMixer(
+                    sound_files={
+                        "office": APP_ROOT_DIR
+                        / "assets"
+                        / f"office-ambience-{audio_config.transport_out_sample_rate}-mono.wav"
+                    },
+                    default_sound="office",
+                    volume=ambient_noise_config.get("volume", 0.3),
+                )
+                if ambient_noise_config and ambient_noise_config.get("enabled", False)
+                else SilenceAudioMixer()
+            ),
+            turn_analyzer=turn_analyzer,
+            serializer=serializer,
+            audio_in_filter=RNNoiseFilter(library_path=librnnoise_path)
+            if ENABLE_RNNOISE
+            else None,
+        ),
+    )
+
+
 def create_webrtc_transport(
    webrtc_connection: SmallWebRTCConnection,
    workflow_run_id: int,