dograh/api/services/telephony/providers/telnyx/transport.py
Sabiha Khan 085ab0a7ae
fix: honor telnyxs per-call codec in bidirectional stream (#256)
Telnyx's bidirectional stream uses different codecs per direction:
  - Dograh → Telnyx: what we declare via `stream_bidirectional_codec`
  - Telnyx → Dograh: whatever the PSTN leg negotiated (PCMA for UK,
    Europe, India termination; PCMU for US), announced as
    `media_format.encoding` on the WebSocket start message.

We hardcoded both directions on the serializer to PCMU, so any call
whose PSTN leg used PCMA arrived as A-law bytes that we decoded
through a μ-law table → static for the entire call.

`handle_websocket` now extracts `media_format.encoding` from the
start message and threads it through `transport_kwargs`. The
serializer uses it for the Telnyx → Dograh direction; the
Dograh → Telnyx direction stays pinned to PCMU to match the
unchanged `stream_bidirectional_codec` in the dial/answer payloads.

Note: pipecat's `TelnyxFrameSerializer` names its params from the
call's POV, not Dograh's — `inbound_encoding` is what we send into
the call, `outbound_encoding` is what we receive. Easy to mix up.
2026-04-29 19:20:52 +05:30

66 lines
2.2 KiB
Python

"""Telnyx transport factory."""
from fastapi import WebSocket
from api.services.pipecat.audio_config import AudioConfig
from api.services.pipecat.audio_mixer import build_audio_out_mixer
from api.services.telephony.factory import load_credentials_for_transport
from pipecat.transports.websocket.fastapi import (
FastAPIWebsocketParams,
FastAPIWebsocketTransport,
)
from .serializers import TelnyxFrameSerializer
async def create_transport(
websocket: WebSocket,
workflow_run_id: int,
audio_config: AudioConfig,
organization_id: int,
*,
vad_config: dict | None = None,
ambient_noise_config: dict | None = None,
telephony_configuration_id: int | None = None,
stream_id: str,
call_control_id: str,
encoding: str = "PCMU",
):
"""Create a transport for Telnyx connections."""
config = await load_credentials_for_transport(
organization_id, telephony_configuration_id, expected_provider="telnyx"
)
api_key = config.get("api_key")
if not api_key:
raise ValueError(
f"Incomplete Telnyx configuration for organization {organization_id}"
)
# Pipecat's TelnyxFrameSerializer names its params from the call's POV,
# not Dograh's: ``inbound_encoding`` is what we *send into the call*
# (Dograh → Telnyx), and ``outbound_encoding`` is what we *receive out of
# the call* (Telnyx → Dograh).
serializer = TelnyxFrameSerializer(
stream_id=stream_id,
call_control_id=call_control_id,
api_key=api_key,
inbound_encoding="PCMU", # Dograh → Telnyx; matches stream_bidirectional_codec
outbound_encoding=encoding, # Telnyx → Dograh; from media_format.encoding
)
mixer = await build_audio_out_mixer(
audio_config.transport_out_sample_rate, ambient_noise_config
)
return FastAPIWebsocketTransport(
websocket=websocket,
params=FastAPIWebsocketParams(
audio_in_enabled=True,
audio_out_enabled=True,
audio_in_sample_rate=audio_config.transport_in_sample_rate,
audio_out_sample_rate=audio_config.transport_out_sample_rate,
audio_out_mixer=mixer,
serializer=serializer,
),
)