From 5ee68f58bb976709cf9b8b64182311792fd32797 Mon Sep 17 00:00:00 2001 From: Sabiha Khan Date: Wed, 29 Apr 2026 19:05:06 +0530 Subject: [PATCH] fix: honor telnyxs per-call codec in bidirectional stream MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Telnyx's bidirectional stream uses different codecs per direction: - Dograh → Telnyx: what we declare via `stream_bidirectional_codec` - Telnyx → Dograh: whatever the PSTN leg negotiated (PCMA for UK, Europe, India termination; PCMU for US), announced as `media_format.encoding` on the WebSocket start message. We hardcoded both directions on the serializer to PCMU, so any call whose PSTN leg used PCMA arrived as A-law bytes that we decoded through a μ-law table → static for the entire call. `handle_websocket` now extracts `media_format.encoding` from the start message and threads it through `transport_kwargs`. The serializer uses it for the Telnyx → Dograh direction; the Dograh → Telnyx direction stays pinned to PCMU to match the unchanged `stream_bidirectional_codec` in the dial/answer payloads. Note: pipecat's `TelnyxFrameSerializer` names its params from the call's POV, not Dograh's — `inbound_encoding` is what we send into the call, `outbound_encoding` is what we receive. Easy to mix up. --- api/services/telephony/providers/telnyx/provider.py | 12 ++++++++++-- api/services/telephony/providers/telnyx/transport.py | 9 +++++++-- 2 files changed, 17 insertions(+), 4 deletions(-) diff --git a/api/services/telephony/providers/telnyx/provider.py b/api/services/telephony/providers/telnyx/provider.py index d73c5ac..dcbc515 100644 --- a/api/services/telephony/providers/telnyx/provider.py +++ b/api/services/telephony/providers/telnyx/provider.py @@ -82,6 +82,9 @@ class TelnyxProvider(TelephonyProvider): f"{backend_endpoint}/api/v1/telephony/telnyx/events/{workflow_run_id}" ) + # stream_bidirectional_codec controls only the Dograh → Telnyx direction. + # The Telnyx → Dograh direction follows the PSTN leg and is announced via + # media_format.encoding in the WebSocket start message. payload = { "connection_id": self.connection_id, "to": to_number, @@ -268,11 +271,15 @@ class TelnyxProvider(TelephonyProvider): await websocket.close(code=4400, reason="Expected start event") return - # Extract Telnyx-specific identifiers + # media_format.encoding is the codec Telnyx delivers on the + # inbound direction (Telnyx → Dograh); the outbound direction is + # pinned to PCMU separately via stream_bidirectional_codec. try: stream_id = start_data.get("stream_id", "") start_info = start_data.get("start", {}) call_control_id = start_info.get("call_control_id", "") + media_format = start_info.get("media_format") or {} + encoding = media_format.get("encoding") or "PCMU" except (KeyError, AttributeError): logger.error("Missing stream_id or call_control_id in start message") await websocket.close(code=4400, reason="Missing stream identifiers") @@ -288,7 +295,7 @@ class TelnyxProvider(TelephonyProvider): logger.info( f"Telnyx stream started: stream_id={stream_id}, " - f"call_control_id={call_control_id}" + f"call_control_id={call_control_id}, encoding={encoding}" ) await run_pipeline_telephony( @@ -301,6 +308,7 @@ class TelnyxProvider(TelephonyProvider): transport_kwargs={ "stream_id": stream_id, "call_control_id": call_control_id, + "encoding": encoding, }, ) diff --git a/api/services/telephony/providers/telnyx/transport.py b/api/services/telephony/providers/telnyx/transport.py index f41cc2a..fb603f5 100644 --- a/api/services/telephony/providers/telnyx/transport.py +++ b/api/services/telephony/providers/telnyx/transport.py @@ -24,6 +24,7 @@ async def create_transport( telephony_configuration_id: int | None = None, stream_id: str, call_control_id: str, + encoding: str = "PCMU", ): """Create a transport for Telnyx connections.""" config = await load_credentials_for_transport( @@ -36,12 +37,16 @@ async def create_transport( f"Incomplete Telnyx configuration for organization {organization_id}" ) + # Pipecat's TelnyxFrameSerializer names its params from the call's POV, + # not Dograh's: ``inbound_encoding`` is what we *send into the call* + # (Dograh → Telnyx), and ``outbound_encoding`` is what we *receive out of + # the call* (Telnyx → Dograh). serializer = TelnyxFrameSerializer( stream_id=stream_id, call_control_id=call_control_id, api_key=api_key, - outbound_encoding="PCMU", - inbound_encoding="PCMU", + inbound_encoding="PCMU", # Dograh → Telnyx; matches stream_bidirectional_codec + outbound_encoding=encoding, # Telnyx → Dograh; from media_format.encoding ) mixer = await build_audio_out_mixer(