Merge branch 'main' into feat/call-tags

2026-06-13 08:15:21 +02:00 · 2026-02-16 13:07:23 +05:30 · 2026-02-16 13:07:23 +05:30 · ea0967fd9c
commit ea0967fd9c
parent 15809e03a4 5d14d17ceb
41 changed files with 480 additions and 317 deletions
--- a/.release-please-manifest.json
+++ b/.release-please-manifest.json
@ -1,3 +1,3 @@
 {
-    ".": "1.12.0"
+    ".": "1.13.0"
 }
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -1,5 +1,23 @@
 # Changelog

+## [1.13.0](https://github.com/dograh-hq/dograh/compare/dograh-v1.12.0...dograh-v1.13.0) (2026-02-13)
+
+
+### Features
+
+* add languages for deepgram and dograh ([5256010](https://github.com/dograh-hq/dograh/commit/525601088accb763aa710ff6e0fb2a6fdf5acec2))
+* add openrouter support ([4c936ae](https://github.com/dograh-hq/dograh/commit/4c936ae57d14ea3aa40ea427b20b341776a9be9f))
+* add sarvam v3 voices ([a75bc72](https://github.com/dograh-hq/dograh/commit/a75bc72cb59537098b3b605a012a2ef8a3f0fe6a))
+* limit campaign concurrency to number of CLIs ([3cdede0](https://github.com/dograh-hq/dograh/commit/3cdede0f45de5d719c17797635464742ad2eac86))
+
+
+### Bug Fixes
+
+* add vad_analyzer in user aggregator ([6711dcb](https://github.com/dograh-hq/dograh/commit/6711dcb3ea7b7f78d7db5794fefc87380b8b751b))
+* fix cloudonix call hangup ([#154](https://github.com/dograh-hq/dograh/issues/154)) ([b9ddd30](https://github.com/dograh-hq/dograh/commit/b9ddd308134056db4d24a37702d151c9ee45e72d))
+* fixes aggregation in elevenlabs TTS ([#153](https://github.com/dograh-hq/dograh/issues/153)) ([e156524](https://github.com/dograh-hq/dograh/commit/e1565246fa6ecc5b73a663e9251a92d0796d19d3))
+* send sample rate to STT services ([7a10202](https://github.com/dograh-hq/dograh/commit/7a102026fbe90bed89455919ed1f9912cbab634b))
+
 ## [1.12.0](https://github.com/dograh-hq/dograh/compare/dograh-v1.11.2...dograh-v1.12.0) (2026-02-05)


--- a/api/logging_config.py
+++ b/api/logging_config.py
@ -7,7 +7,7 @@ import loguru
 from api.constants import SERIALIZE_LOG_OUTPUT
 from api.enums import Environment
 from api.utils.worker import get_worker_id, is_worker_process
-from pipecat.utils.context import run_id_var, turn_var
+from pipecat.utils.run_context import run_id_var, turn_var

 ENVIRONMENT = os.getenv("ENVIRONMENT", Environment.LOCAL.value)
 ENABLE_TURN_LOGGING = os.getenv("ENABLE_TURN_LOGGING", "false").lower() == "true"
--- a/api/pyproject.toml
+++ b/api/pyproject.toml
@ -1,5 +1,5 @@
 [project]
 name = "dograh-api"
-version = "1.12.0"
+version = "1.13.0"
 description = "Backend API for Dograh voice AI platform"
 requires-python = ">=3.12"
--- a/api/routes/integration.py
+++ b/api/routes/integration.py
@ -1,3 +1,7 @@
+"""
+Route for 3rd party integrations. Currently being backed by nango.
+"""
+
 from dataclasses import dataclass
 from typing import Any, Dict, List, Optional, TypedDict

--- a/api/routes/stasis_rtp.py
+++ b/api/routes/stasis_rtp.py
@ -1,12 +1,12 @@
 import random

 from loguru import logger
-from pipecat.utils.context import set_current_run_id

 from api.db import db_client
 from api.enums import WorkflowRunMode
 from api.services.pipecat.run_pipeline import run_pipeline_ari_stasis
 from api.services.telephony.stasis_rtp_connection import StasisRTPConnection
+from pipecat.utils.run_context import set_current_run_id


 async def on_stasis_call(call: StasisRTPConnection, call_context_vars: dict):
--- a/api/routes/telephony.py
+++ b/api/routes/telephony.py
@ -37,7 +37,7 @@ from api.utils.telephony_helper import (
    numbers_match,
    parse_webhook_request,
 )
-from pipecat.utils.context import set_current_run_id
+from pipecat.utils.run_context import set_current_run_id

 router = APIRouter(prefix="/telephony")

--- a/api/routes/webrtc_signaling.py
+++ b/api/routes/webrtc_signaling.py
@ -22,7 +22,7 @@ from typing import Dict, List, Optional

 from aiortc import RTCIceServer
 from aiortc.sdp import candidate_from_sdp
-from fastapi import APIRouter, Depends, WebSocket, WebSocketDisconnect
+from fastapi import APIRouter, Depends, HTTPException, WebSocket, WebSocketDisconnect
 from loguru import logger
 from starlette.websockets import WebSocketState

@ -44,7 +44,7 @@ from api.services.pipecat.ws_sender_registry import (
 )
 from api.services.quota_service import check_dograh_quota
 from pipecat.transports.smallwebrtc.connection import SmallWebRTCConnection
-from pipecat.utils.context import set_current_run_id
+from pipecat.utils.run_context import set_current_run_id

 router = APIRouter(prefix="/ws")

@ -390,6 +390,11 @@ async def signaling_websocket(
    user: UserModel = Depends(get_user_ws),
 ):
    """WebSocket endpoint for WebRTC signaling with ICE trickling."""
+    workflow_run = await db_client.get_workflow_run(workflow_run_id, user.id)
+    if not workflow_run:
+        logger.warning(f"workflow run {workflow_run_id} not found for user {user.id}")
+        raise HTTPException(status_code=400, detail="Bad workflow_run_id")
+
    await signaling_manager.handle_websocket(
        websocket, workflow_id, workflow_run_id, user
    )
--- a/api/services/configuration/registry.py
+++ b/api/services/configuration/registry.py
@ -278,7 +278,48 @@ class DograhTTSService(BaseTTSConfiguration):


 SARVAM_TTS_MODELS = ["bulbul:v2", "bulbul:v3"]
-SARVAM_VOICES = ["anushka", "manisha", "vidya", "arya", "abhilash", "karun", "hitesh"]
+SARVAM_V2_VOICES = ["anushka", "manisha", "vidya", "arya", "abhilash", "karun", "hitesh"]
+SARVAM_V3_VOICES = [
+    "shubh",
+    "aditya",
+    "ritu",
+    "priya",
+    "neha",
+    "rahul",
+    "pooja",
+    "rohan",
+    "simran",
+    "kavya",
+    "amit",
+    "dev",
+    "ishita",
+    "shreya",
+    "ratan",
+    "varun",
+    "manan",
+    "sumit",
+    "roopa",
+    "kabir",
+    "aayan",
+    "ashutosh",
+    "advait",
+    "amelia",
+    "sophia",
+    "anand",
+    "tanya",
+    "tarun",
+    "sunny",
+    "mani",
+    "gokul",
+    "vijay",
+    "shruti",
+    "suhani",
+    "mohit",
+    "kavitha",
+    "rehan",
+    "soham",
+    "rupali",
+]
 SARVAM_LANGUAGES = [
    "bn-IN",
    "en-IN",
@ -301,7 +342,16 @@ class SarvamTTSConfiguration(BaseTTSConfiguration):
    model: str = Field(
        default="bulbul:v2", json_schema_extra={"examples": SARVAM_TTS_MODELS}
    )
-    voice: str = Field(default="anushka", json_schema_extra={"examples": SARVAM_VOICES})
+    voice: str = Field(
+        default="anushka",
+        json_schema_extra={
+            "examples": SARVAM_V2_VOICES,
+            "model_options": {
+                "bulbul:v2": SARVAM_V2_VOICES,
+                "bulbul:v3": SARVAM_V3_VOICES,
+            },
+        },
+    )
    language: str = Field(
        default="hi-IN", json_schema_extra={"examples": SARVAM_LANGUAGES}
    )
@ -322,39 +372,89 @@ TTSConfig = Annotated[
 ###################################################### STT ########################################################################


-DEEPGRAM_STT_MODELS = ["nova-2", "nova-3-general", "flux-general-en"]
+DEEPGRAM_STT_MODELS = ["nova-3-general", "flux-general-en"]
 DEEPGRAM_LANGUAGES = [
    "multi",
+    "ar",
+    "ar-AE",
+    "ar-SA",
+    "ar-QA",
+    "ar-KW",
+    "ar-SY",
+    "ar-LB",
+    "ar-PS",
+    "ar-JO",
+    "ar-EG",
+    "ar-SD",
+    "ar-TD",
+    "ar-MA",
+    "ar-DZ",
+    "ar-TN",
+    "ar-IQ",
+    "ar-IR",
+    "be",
+    "bn",
+    "bs",
+    "bg",
+    "ca",
+    "cs",
+    "da",
+    "da-DK",
+    "de",
+    "de-CH",
+    "el",
    "en",
    "en-US",
-    "en-GB",
    "en-AU",
+    "en-GB",
    "en-IN",
+    "en-NZ",
    "es",
    "es-419",
+    "et",
+    "fa",
+    "fi",
    "fr",
    "fr-CA",
-    "de",
+    "he",
+    "hi",
+    "hr",
+    "hu",
+    "id",
    "it",
+    "ja",
+    "kn",
+    "ko",
+    "ko-KR",
+    "lt",
+    "lv",
+    "mk",
+    "mr",
+    "ms",
+    "nl",
+    "nl-BE",
+    "no",
+    "pl",
    "pt",
    "pt-BR",
-    "nl",
-    "hi",
-    "ja",
-    "ko",
-    "zh-CN",
-    "zh-TW",
+    "pt-PT",
+    "ro",
    "ru",
-    "pl",
+    "sk",
+    "sl",
+    "sr",
+    "sv",
+    "sv-SE",
+    "ta",
+    "te",
+    "th",
+    "tl",
    "tr",
    "uk",
+    "ur",
    "vi",
-    "sv",
-    "da",
-    "no",
-    "fi",
-    "id",
-    "th",
+    "zh-CN",
+    "zh-TW",
 ]


@ -365,7 +465,14 @@ class DeepgramSTTConfiguration(BaseSTTConfiguration):
        default="nova-3-general", json_schema_extra={"examples": DEEPGRAM_STT_MODELS}
    )
    language: str = Field(
-        default="multi", json_schema_extra={"examples": DEEPGRAM_LANGUAGES}
+        default="multi",
+        json_schema_extra={
+            "examples": DEEPGRAM_LANGUAGES,
+            "model_options": {
+                "nova-3-general": DEEPGRAM_LANGUAGES,
+                "flux-general-en": ["en"],
+            },
+        },
    )
    api_key: str

@ -390,39 +497,7 @@ class OpenAISTTConfiguration(BaseSTTConfiguration):

 # Dograh STT Service
 DOGRAH_STT_MODELS = ["default"]
-DOGRAH_STT_LANGUAGES = [
-    "multi",
-    "en",
-    "en-US",
-    "en-GB",
-    "en-AU",
-    "en-IN",
-    "es",
-    "es-419",
-    "fr",
-    "fr-CA",
-    "de",
-    "it",
-    "pt",
-    "pt-BR",
-    "nl",
-    "hi",
-    "ja",
-    "ko",
-    "zh-CN",
-    "zh-TW",
-    "ru",
-    "pl",
-    "tr",
-    "uk",
-    "vi",
-    "sv",
-    "da",
-    "no",
-    "fi",
-    "id",
-    "th",
-]
+DOGRAH_STT_LANGUAGES = DEEPGRAM_LANGUAGES


@register_stt
--- a/api/services/looptalk/orchestrator.py
+++ b/api/services/looptalk/orchestrator.py
@ -14,7 +14,7 @@ from api.services.looptalk.internal_transport import (
 )
 from api.services.pipecat.transport_setup import create_internal_transport
 from pipecat.pipeline.task import PipelineTask
-from pipecat.utils.context import set_current_run_id
+from pipecat.utils.run_context import set_current_run_id

 from .core.pipeline_builder import LoopTalkPipelineBuilder
 from .core.recording_manager import RecordingManager
--- a/api/services/pipecat/pipeline_builder.py
+++ b/api/services/pipecat/pipeline_builder.py
@ -10,7 +10,7 @@ from pipecat.pipeline.pipeline import Pipeline
 from pipecat.pipeline.task import PipelineParams, PipelineTask
 from pipecat.processors.aggregators.llm_context import LLMContext
 from pipecat.processors.audio.audio_buffer_processor import AudioBufferProcessor
-from pipecat.utils.context import turn_var
+from pipecat.utils.run_context import turn_var


 def create_pipeline_components(audio_config: AudioConfig):
--- a/api/services/pipecat/run_pipeline.py
+++ b/api/services/pipecat/run_pipeline.py
@ -62,6 +62,7 @@ from pipecat.turns.user_mute import (
    MuteUntilFirstBotCompleteUserMuteStrategy,
 )
 from pipecat.turns.user_start import (
+    ExternalUserTurnStartStrategy,
    TranscriptionUserTurnStartStrategy,
 )
 from pipecat.turns.user_start.vad_user_turn_start_strategy import (
@ -69,12 +70,12 @@ from pipecat.turns.user_start.vad_user_turn_start_strategy import (
 )
 from pipecat.turns.user_stop import (
    ExternalUserTurnStopStrategy,
-    TranscriptionUserTurnStopStrategy,
+    SpeechTimeoutUserTurnStopStrategy,
    TurnAnalyzerUserTurnStopStrategy,
 )
 from pipecat.turns.user_turn_strategies import UserTurnStrategies
-from pipecat.utils.context import set_current_run_id
 from pipecat.utils.enums import EndTaskReason
+from pipecat.utils.run_context import set_current_run_id
 from pipecat.utils.tracing.context_registry import ContextProviderRegistry

 # Setup tracing if enabled
@ -265,7 +266,6 @@ async def run_pipeline_vobiz(
 async def run_pipeline_cloudonix(
    websocket_client: WebSocket,
    stream_sid: str,
-    call_sid: str,
    workflow_id: int,
    workflow_run_id: int,
    user_id: int,
@ -274,10 +274,15 @@ async def run_pipeline_cloudonix(
    logger.debug(
        f"Running pipeline for Cloudonix connection with workflow_id: {workflow_id} and workflow_run_id: {workflow_run_id}"
    )
-    set_current_run_id(workflow_run_id)
+
+    workflow_run = await db_client.get_workflow_run_by_id(workflow_run_id)
+    call_id = workflow_run.gathered_context.get("call_id")
+    if not call_id:
+        logger.warning("call_id not found in gathered_context")
+        raise Exception()

    # Store call ID in cost_info for later cost calculation (provider-agnostic)
-    cost_info = {"call_id": call_sid}
+    cost_info = {"call_id": call_id}
    await db_client.update_workflow_run(workflow_run_id, cost_info=cost_info)

    # Get workflow to extract all pipeline configurations
@ -292,26 +297,18 @@ async def run_pipeline_cloudonix(
                "ambient_noise_configuration"
            ]

-    # Retrieve session_token from workflow_run gathered_context
-    workflow_run = await db_client.get_workflow_run(workflow_run_id)
-    session_token = None
-    if workflow_run and workflow_run.gathered_context:
-        session_token = workflow_run.gathered_context.get("session_token")
-        logger.debug(f"Retrieved session_token from workflow_run: {session_token}")
-
    # Create audio configuration for Cloudonix
    audio_config = create_audio_config(WorkflowRunMode.CLOUDONIX.value)

    transport = await create_cloudonix_transport(
        websocket_client,
+        call_id,
        stream_sid,
-        call_sid,
        workflow_run_id,
        audio_config,
        workflow.organization_id,
        vad_config,
        ambient_noise_config,
-        session_token,
    )
    await _run_pipeline(
        transport,
@ -580,7 +577,10 @@ async def _run_pipeline(

    if is_deepgram_flux:
        user_turn_strategies = UserTurnStrategies(
-            start=[VADUserTurnStartStrategy(), TranscriptionUserTurnStartStrategy()],
+            start=[
+                VADUserTurnStartStrategy(),
+                ExternalUserTurnStartStrategy(enable_interruptions=True),
+            ],
            stop=[ExternalUserTurnStopStrategy()],
        )
    elif turn_stop_strategy == "turn_analyzer":
@ -598,7 +598,7 @@ async def _run_pipeline(
        # Transcription-based (default): best for short 1-2 word responses
        user_turn_strategies = UserTurnStrategies(
            start=[VADUserTurnStartStrategy(), TranscriptionUserTurnStartStrategy()],
-            stop=[TranscriptionUserTurnStopStrategy()],
+            stop=[SpeechTimeoutUserTurnStopStrategy()],
        )

    # Create user mute strategies
--- a/api/services/pipecat/service_factory.py
+++ b/api/services/pipecat/service_factory.py
@ -30,7 +30,9 @@ if TYPE_CHECKING:
    from api.services.pipecat.audio_config import AudioConfig


-def create_stt_service(user_config, audio_config: "AudioConfig", keyterms: list[str] | None = None):
+def create_stt_service(
+    user_config, audio_config: "AudioConfig", keyterms: list[str] | None = None
+):
    """Create and return appropriate STT service based on user configuration

    Args:
@ -53,7 +55,7 @@ def create_stt_service(user_config, audio_config: "AudioConfig", keyterms: list[
                    keyterm=keyterms or [],
                ),
                should_interrupt=False,  # Let UserAggregator take care of sending InterruptionFrame
-                sample_rate=audio_config.transport_in_sample_rate
+                sample_rate=audio_config.transport_in_sample_rate,
            )

        # Other models than flux
@ -64,21 +66,24 @@ def create_stt_service(user_config, audio_config: "AudioConfig", keyterms: list[
            profanity_filter=False,
            endpointing=100,
            model=user_config.stt.model,
-            keyterm=keyterms or []
+            keyterm=keyterms or [],
        )
        logger.debug(f"Using DeepGram Model - {user_config.stt.model}")
        return DeepgramSTTService(
            live_options=live_options,
            api_key=user_config.stt.api_key,
            should_interrupt=False,  # Let UserAggregator take care of sending InterruptionFrame
-            sample_rate=audio_config.transport_in_sample_rate
+            sample_rate=audio_config.transport_in_sample_rate,
        )
    elif user_config.stt.provider == ServiceProviders.OPENAI.value:
        return OpenAISTTService(
            api_key=user_config.stt.api_key, model=user_config.stt.model
        )
    elif user_config.stt.provider == ServiceProviders.CARTESIA.value:
-        return CartesiaSTTService(api_key=user_config.stt.api_key, sample_rate=audio_config.transport_in_sample_rate)
+        return CartesiaSTTService(
+            api_key=user_config.stt.api_key,
+            sample_rate=audio_config.transport_in_sample_rate,
+        )
    elif user_config.stt.provider == ServiceProviders.DOGRAH.value:
        base_url = MPS_API_URL.replace("http://", "ws://").replace("https://", "wss://")
        language = getattr(user_config.stt, "language", None) or "multi"
@ -88,7 +93,7 @@ def create_stt_service(user_config, audio_config: "AudioConfig", keyterms: list[
            model=user_config.stt.model,
            language=language,
            keyterms=keyterms,
-            sample_rate=audio_config.transport_in_sample_rate
+            sample_rate=audio_config.transport_in_sample_rate,
        )
    elif user_config.stt.provider == ServiceProviders.SARVAM.value:
        # Map Sarvam language code to pipecat Language enum
@ -112,7 +117,7 @@ def create_stt_service(user_config, audio_config: "AudioConfig", keyterms: list[
            api_key=user_config.stt.api_key,
            model=user_config.stt.model,
            params=SarvamSTTService.InputParams(language=pipecat_language),
-            sample_rate=audio_config.transport_in_sample_rate
+            sample_rate=audio_config.transport_in_sample_rate,
        )
    elif user_config.stt.provider == ServiceProviders.SPEECHMATICS.value:
        from pipecat.services.speechmatics.stt import (
@ -138,7 +143,7 @@ def create_stt_service(user_config, audio_config: "AudioConfig", keyterms: list[
                operating_point=operating_point,
                additional_vocab=additional_vocab,
            ),
-            sample_rate=audio_config.transport_in_sample_rate
+            sample_rate=audio_config.transport_in_sample_rate,
        )
    else:
        raise HTTPException(
--- a/api/services/pipecat/transport_setup.py
+++ b/api/services/pipecat/transport_setup.py
@ -94,14 +94,13 @@ async def create_twilio_transport(

 async def create_cloudonix_transport(
    websocket_client: WebSocket,
+    call_id: str,
    stream_sid: str,
-    call_sid: str,
    workflow_run_id: int,
    audio_config: AudioConfig,
    organization_id: int,
    vad_config: dict | None = None,
    ambient_noise_config: dict | None = None,
-    session_token: str | None = None,
 ):
    """Create a transport for Cloudonix connections"""

@ -125,11 +124,10 @@ async def create_cloudonix_transport(
    from pipecat.serializers.cloudonix import CloudonixFrameSerializer

    serializer = CloudonixFrameSerializer(
+        call_id=call_id,
        stream_sid=stream_sid,
-        call_sid=call_sid,
        domain_id=domain_id,
        bearer_token=bearer_token,
-        session_token=session_token,
    )

    return FastAPIWebsocketTransport(
--- a/api/services/pipecat/turn_context.py
+++ b/api/services/pipecat/turn_context.py
@ -8,7 +8,7 @@ propagate through asyncio.create_task() calls.
 import asyncio
 from typing import Dict, Optional

-from pipecat.utils.context import turn_var
+from pipecat.utils.run_context import turn_var


 class TurnContextManager:
--- a/api/services/telephony/providers/cloudonix_provider.py
+++ b/api/services/telephony/providers/cloudonix_provider.py
@ -395,10 +395,6 @@ class CloudonixProvider(TelephonyProvider):
                await websocket.close(code=4400, reason="Expected connected event")
                return

-            logger.debug(
-                f"Cloudonix WebSocket connected for workflow_run {workflow_run_id}"
-            )
-
            # Wait for "start" event with stream details
            start_msg = await websocket.receive_text()
            logger.debug(f"Received start message: {start_msg}")
@ -418,9 +414,14 @@ class CloudonixProvider(TelephonyProvider):
                await websocket.close(code=4400, reason="Missing stream identifiers")
                return

+            logger.debug(
+                f"Cloudonix WebSocket connected for workflow_run {workflow_run_id} "
+                f"stream_sid: {stream_sid} call_sid: {call_sid}"
+            )
+
            # Run the Cloudonix pipeline
            await run_pipeline_cloudonix(
-                websocket, stream_sid, call_sid, workflow_id, workflow_run_id, user_id
+                websocket, stream_sid, workflow_id, workflow_run_id, user_id
            )

        except Exception as e:
--- a/api/services/telephony/providers/twilio_provider.py
+++ b/api/services/telephony/providers/twilio_provider.py
@ -110,7 +110,7 @@ class TwilioProvider(TelephonyProvider):
                return CallInitiationResult(
                    call_id=response_data["sid"],
                    status=response_data.get("status", "queued"),
-                    provider_metadata={},  # Twilio doesn't need to persist extra data
+                    provider_metadata={"call_id": response_data["sid"]},
                    raw_response=response_data,
                )

--- a/api/services/telephony/providers/vobiz_provider.py
+++ b/api/services/telephony/providers/vobiz_provider.py
@ -150,7 +150,7 @@ class VobizProvider(TelephonyProvider):
                return CallInitiationResult(
                    call_id=call_id,
                    status="queued",  # Vobiz returns "message": "call fired"
-                    provider_metadata={},
+                    provider_metadata={"call_id": call_id},
                    raw_response=response_data,
                )

--- a/api/services/telephony/providers/vonage_provider.py
+++ b/api/services/telephony/providers/vonage_provider.py
@ -138,10 +138,8 @@ class VonageProvider(TelephonyProvider):
                    call_id=response_data["uuid"],
                    status=response_data.get("status", "started"),
                    provider_metadata={
-                        "call_uuid": response_data[
-                            "uuid"
-                        ]  # Vonage needs UUID persisted for WebSocket
-                    },
+                        "call_uuid": response_data["uuid"]
+                    },  # Vonage needs UUID persisted for WebSocket
                    raw_response=response_data,
                )

--- a/api/services/telephony/worker_event_subscriber.py
+++ b/api/services/telephony/worker_event_subscriber.py
@ -23,7 +23,7 @@ from api.services.telephony.stasis_event_protocol import (
    parse_event,
 )
 from api.services.telephony.stasis_rtp_connection import StasisRTPConnection
-from pipecat.utils.context import set_current_run_id
+from pipecat.utils.run_context import set_current_run_id


 class WorkerEventSubscriber:
--- a/api/services/workflow/pipecat_engine_callbacks.py
+++ b/api/services/workflow/pipecat_engine_callbacks.py
@ -116,6 +116,10 @@ def create_aggregation_correction_callback(engine: "PipecatEngine"):
        if corrupted in ref or len(alnum_ref) < len(alnum_corr) or len(alnum_corr) < 10:
            return corrupted

+        logger.debug(
+            f"In correct_corrupted_aggregation: ref: {ref} corrupted: {corrupted}"
+        )
+
        # 2) Find where in `ref` we should start aligning.
        #    We take the first N (N=10) characters of `corrupted`
        #    and look for all their occurrences in `ref`.
--- a/api/tasks/run_integrations.py
+++ b/api/tasks/run_integrations.py
@ -10,7 +10,7 @@ from api.db import db_client
 from api.db.models import WorkflowRunModel
 from api.utils.credential_auth import build_auth_header
 from api.utils.template_renderer import render_template
-from pipecat.utils.context import set_current_run_id
+from pipecat.utils.run_context import set_current_run_id


 async def run_integrations_post_workflow_run(_ctx, workflow_run_id: int):
--- a/api/tasks/s3_upload.py
+++ b/api/tasks/s3_upload.py
@ -6,7 +6,7 @@ from loguru import logger
 from api.db import db_client
 from api.services.storage import get_current_storage_backend, storage_fs
 from api.tasks.run_integrations import run_integrations_post_workflow_run
-from pipecat.utils.context import set_current_run_id
+from pipecat.utils.run_context import set_current_run_id


 async def upload_voicemail_audio_to_s3(
--- a/api/tasks/workflow_run_cost.py
+++ b/api/tasks/workflow_run_cost.py
@ -4,7 +4,62 @@ from api.db import db_client
 from api.enums import WorkflowRunMode
 from api.services.pricing.cost_calculator import cost_calculator
 from api.services.telephony.factory import get_telephony_provider
-from pipecat.utils.context import set_current_run_id
+from pipecat.utils.run_context import set_current_run_id
+
+
+async def _fetch_telephony_cost(workflow_run) -> dict | None:
+    """Fetch telephony call cost. Returns a dict with cost_usd and provider_name, or None."""
+    if (
+        workflow_run.mode
+        not in [WorkflowRunMode.TWILIO.value, WorkflowRunMode.VONAGE.value]
+        or not workflow_run.cost_info
+    ):
+        return None
+
+    call_id = workflow_run.cost_info.get("call_id")
+    if not call_id:
+        logger.warning(f"call_id not found in cost_info")
+        return None
+
+    provider_name = workflow_run.mode.lower() if workflow_run.mode else ""
+
+    workflow = await db_client.get_workflow_by_id(workflow_run.workflow_id)
+    if not workflow:
+        logger.warning("Workflow not found for workflow run")
+        raise Exception("Workflow not found")
+
+    provider = await get_telephony_provider(workflow.organization_id)
+    call_cost_info = await provider.get_call_cost(call_id)
+
+    if call_cost_info.get("status") == "error":
+        logger.error(
+            f"Failed to fetch {provider_name} call cost: {call_cost_info.get('error')}"
+        )
+        return None
+
+    cost_usd = call_cost_info.get("cost_usd", 0.0)
+    logger.info(
+        f"{provider_name.title()} call cost: ${cost_usd:.6f} USD for call {call_id}"
+    )
+    return {"cost_usd": cost_usd, "provider_name": provider_name}
+
+
+async def _update_organization_usage(
+    org, dograh_tokens: float, duration_seconds: float, charge_usd: float | None
+) -> None:
+    """Update organization usage after a workflow run."""
+    org_id = org.id
+    await db_client.update_usage_after_run(
+        org_id, dograh_tokens, duration_seconds, charge_usd
+    )
+    if charge_usd is not None:
+        logger.info(
+            f"Updated organization usage with ${charge_usd:.2f} USD ({dograh_tokens} Dograh Tokens) and {duration_seconds}s duration for org {org_id}"
+        )
+    else:
+        logger.info(
+            f"Updated organization usage with {dograh_tokens} Dograh Tokens and {duration_seconds}s duration for org {org_id}"
+        )


 async def calculate_workflow_run_cost(ctx, workflow_run_id: int):
@ -26,62 +81,20 @@ async def calculate_workflow_run_cost(ctx, workflow_run_id: int):
        # Calculate cost breakdown
        cost_breakdown = cost_calculator.calculate_total_cost(workflow_usage_info)

-        # Fetch telephony call cost for both Twilio and Vonage
-        telephony_cost_usd = 0.0
-        if (
-            workflow_run.mode
-            in [WorkflowRunMode.TWILIO.value, WorkflowRunMode.VONAGE.value]
-            and workflow_run.cost_info
-        ):
-            # Get the call ID (provider-agnostic approach with backward compatibility)
-            call_id = workflow_run.cost_info.get("call_id")
-
-            # Fallback to legacy provider-specific fields if needed
-            if not call_id:
-                if workflow_run.mode == WorkflowRunMode.TWILIO.value:
-                    call_id = workflow_run.cost_info.get("twilio_call_sid")
-                elif workflow_run.mode == WorkflowRunMode.VONAGE.value:
-                    call_id = workflow_run.cost_info.get("vonage_call_uuid")
-
-            # Provider name is derived from workflow run mode
-            provider_name = workflow_run.mode.lower() if workflow_run.mode else ""
-
-            if call_id:
-                try:
-                    # Get workflow to access organization_id
-                    workflow = await db_client.get_workflow_by_id(
-                        workflow_run.workflow_id
-                    )
-                    if not workflow:
-                        logger.warning("Workflow not found for workflow run")
-                        raise Exception("Workflow not found")
-
-                    # Use telephony provider abstraction
-                    provider = await get_telephony_provider(workflow.organization_id)
-                    call_cost_info = await provider.get_call_cost(call_id)
-
-                    if call_cost_info.get("status") != "error":
-                        telephony_cost_usd = call_cost_info.get("cost_usd", 0.0)
-                        cost_breakdown["telephony_call"] = telephony_cost_usd
-                        cost_breakdown[f"{provider_name}_call"] = (
-                            telephony_cost_usd  # Keep backward compatibility
-                        )
-
-                        # Add telephony cost to the total
-                        cost_breakdown["total"] = (
-                            float(cost_breakdown["total"]) + telephony_cost_usd
-                        )
-                        logger.info(
-                            f"{provider_name.title()} call cost: ${telephony_cost_usd:.6f} USD for call {call_id}"
-                        )
-                    else:
-                        logger.error(
-                            f"Failed to fetch {provider_name} call cost: {call_cost_info.get('error')}"
-                        )
-
-                except Exception as e:
-                    logger.error(f"Failed to fetch telephony call cost: {e}")
-                    # Don't fail the whole cost calculation if telephony API fails
+        # Fetch telephony call cost
+        try:
+            telephony_cost = await _fetch_telephony_cost(workflow_run)
+            if telephony_cost:
+                telephony_cost_usd = telephony_cost["cost_usd"]
+                provider_name = telephony_cost["provider_name"]
+                cost_breakdown["telephony_call"] = telephony_cost_usd
+                cost_breakdown[f"{provider_name}_call"] = telephony_cost_usd
+                cost_breakdown["total"] = (
+                    float(cost_breakdown["total"]) + telephony_cost_usd
+                )
+        except Exception as e:
+            logger.error(f"Failed to fetch telephony call cost: {e}")
+            # Don't fail the whole cost calculation if telephony API fails

        # Store cost information back to the workflow run
        # We'll add the cost breakdown to the workflow run
@ -106,6 +119,7 @@ async def calculate_workflow_run_cost(ctx, workflow_run_id: int):
            charge_usd = duration_seconds * org.price_per_second_usd

        cost_info = {
+            **workflow_run.cost_info,
            "cost_breakdown": cost_breakdown,
            "total_cost_usd": float(cost_breakdown["total"]),
            "dograh_token_usage": dograh_tokens,
@ -118,42 +132,19 @@ async def calculate_workflow_run_cost(ctx, workflow_run_id: int):
            cost_info["charge_usd"] = charge_usd
            cost_info["price_per_second_usd"] = org.price_per_second_usd

-        # Preserve call ID (provider-agnostic with backward compatibility)
-        if workflow_run.cost_info:
-            # Preserve generic call_id if it exists
-            if "call_id" in workflow_run.cost_info:
-                cost_info["call_id"] = workflow_run.cost_info["call_id"]
-            # Also preserve legacy fields for backward compatibility
-            elif "twilio_call_sid" in workflow_run.cost_info:
-                cost_info["twilio_call_sid"] = workflow_run.cost_info["twilio_call_sid"]
-            elif "vonage_call_uuid" in workflow_run.cost_info:
-                cost_info["vonage_call_uuid"] = workflow_run.cost_info[
-                    "vonage_call_uuid"
-                ]
-
        # Update workflow run with cost information
        await db_client.update_workflow_run(run_id=workflow_run_id, cost_info=cost_info)

        # Update organization usage if applicable
        if org:
-            org_id = org.id
            try:
                duration_seconds = workflow_usage_info.get("call_duration_seconds", 0)
-                # Pass USD amount if organization has pricing
-                await db_client.update_usage_after_run(
-                    org_id, dograh_tokens, duration_seconds, charge_usd
+                await _update_organization_usage(
+                    org, dograh_tokens, duration_seconds, charge_usd
                )
-                if charge_usd is not None:
-                    logger.info(
-                        f"Updated organization usage with ${charge_usd:.2f} USD ({dograh_tokens} Dograh Tokens) and {duration_seconds}s duration for org {org_id}"
-                    )
-                else:
-                    logger.info(
-                        f"Updated organization usage with {dograh_tokens} Dograh Tokens and {duration_seconds}s duration for org {org_id}"
-                    )
            except Exception as e:
                logger.error(
-                    f"Failed to update organization usage for org {org_id}: {e}"
+                    f"Failed to update organization usage for org {org.id}: {e}"
                )
                # Don't fail the whole task if usage update fails

--- a/api/tests/test_pipecat_engine_context_update.py
+++ b/api/tests/test_pipecat_engine_context_update.py
@ -31,8 +31,8 @@ from pipecat.pipeline.pipeline import Pipeline
 from pipecat.pipeline.runner import PipelineRunner
 from pipecat.pipeline.task import PipelineParams, PipelineTask
 from pipecat.processors.aggregators.llm_context import LLMContext
-from pipecat.processors.aggregators.llm_response import LLMAssistantAggregatorParams
 from pipecat.processors.aggregators.llm_response_universal import (
+    LLMAssistantAggregatorParams,
    LLMContextAggregatorPair,
 )
 from pipecat.tests import MockLLMService, MockTTSService
--- a/api/tests/test_pipecat_engine_end_call.py
+++ b/api/tests/test_pipecat_engine_end_call.py
@ -46,8 +46,8 @@ from pipecat.pipeline.pipeline import Pipeline
 from pipecat.pipeline.runner import PipelineRunner
 from pipecat.pipeline.task import PipelineParams, PipelineTask
 from pipecat.processors.aggregators.llm_context import LLMContext
-from pipecat.processors.aggregators.llm_response import LLMAssistantAggregatorParams
 from pipecat.processors.aggregators.llm_response_universal import (
+    LLMAssistantAggregatorParams,
    LLMContextAggregatorPair,
    LLMUserAggregatorParams,
 )
--- a/api/tests/test_pipecat_engine_node_switch_with_user_speech.py
+++ b/api/tests/test_pipecat_engine_node_switch_with_user_speech.py
@ -30,8 +30,8 @@ from pipecat.pipeline.pipeline import Pipeline
 from pipecat.pipeline.runner import PipelineRunner
 from pipecat.pipeline.task import PipelineParams, PipelineTask
 from pipecat.processors.aggregators.llm_context import LLMContext
-from pipecat.processors.aggregators.llm_response import LLMAssistantAggregatorParams
 from pipecat.processors.aggregators.llm_response_universal import (
+    LLMAssistantAggregatorParams,
    LLMContextAggregatorPair,
    LLMUserAggregatorParams,
 )
--- a/api/tests/test_pipecat_engine_tool_calls.py
+++ b/api/tests/test_pipecat_engine_tool_calls.py
@ -18,8 +18,8 @@ from pipecat.pipeline.pipeline import Pipeline
 from pipecat.pipeline.runner import PipelineRunner
 from pipecat.pipeline.task import PipelineParams, PipelineTask
 from pipecat.processors.aggregators.llm_context import LLMContext
-from pipecat.processors.aggregators.llm_response import LLMAssistantAggregatorParams
 from pipecat.processors.aggregators.llm_response_universal import (
+    LLMAssistantAggregatorParams,
    LLMContextAggregatorPair,
 )
 from pipecat.tests import MockLLMService, MockTTSService
--- a/api/tests/test_pipecat_engine_variable_extraction.py
+++ b/api/tests/test_pipecat_engine_variable_extraction.py
@ -27,8 +27,8 @@ from pipecat.pipeline.pipeline import Pipeline
 from pipecat.pipeline.runner import PipelineRunner
 from pipecat.pipeline.task import PipelineParams, PipelineTask
 from pipecat.processors.aggregators.llm_context import LLMContext
-from pipecat.processors.aggregators.llm_response import LLMAssistantAggregatorParams
 from pipecat.processors.aggregators.llm_response_universal import (
+    LLMAssistantAggregatorParams,
    LLMContextAggregatorPair,
 )
 from pipecat.tests import MockLLMService, MockTTSService
--- a/api/tests/test_tts_endframe_with_audio_write_failure.py
+++ b/api/tests/test_tts_endframe_with_audio_write_failure.py
@ -43,8 +43,8 @@ from pipecat.pipeline.pipeline import Pipeline
 from pipecat.pipeline.runner import PipelineRunner
 from pipecat.pipeline.task import PipelineParams, PipelineTask
 from pipecat.processors.aggregators.llm_context import LLMContext
-from pipecat.processors.aggregators.llm_response import LLMAssistantAggregatorParams
 from pipecat.processors.aggregators.llm_response_universal import (
+    LLMAssistantAggregatorParams,
    LLMContextAggregatorPair,
    LLMUserAggregatorParams,
 )
--- a/api/tests/test_user_idle_handler.py
+++ b/api/tests/test_user_idle_handler.py
@ -29,8 +29,8 @@ from pipecat.pipeline.pipeline import Pipeline
 from pipecat.pipeline.runner import PipelineRunner
 from pipecat.pipeline.task import PipelineParams, PipelineTask
 from pipecat.processors.aggregators.llm_context import LLMContext
-from pipecat.processors.aggregators.llm_response import LLMAssistantAggregatorParams
 from pipecat.processors.aggregators.llm_response_universal import (
+    LLMAssistantAggregatorParams,
    LLMContextAggregatorPair,
    LLMUserAggregatorParams,
 )
--- a/api/tests/test_user_muting_during_bot_speech.py
+++ b/api/tests/test_user_muting_during_bot_speech.py
@ -34,8 +34,8 @@ from pipecat.pipeline.pipeline import Pipeline
 from pipecat.pipeline.runner import PipelineRunner
 from pipecat.pipeline.task import PipelineParams, PipelineTask
 from pipecat.processors.aggregators.llm_context import LLMContext
-from pipecat.processors.aggregators.llm_response import LLMAssistantAggregatorParams
 from pipecat.processors.aggregators.llm_response_universal import (
+    LLMAssistantAggregatorParams,
    LLMContextAggregatorPair,
    LLMUserAggregator,
    LLMUserAggregatorParams,
--- a/api/tests/test_voicemail_detector.py
+++ b/api/tests/test_voicemail_detector.py
@ -21,8 +21,8 @@ from pipecat.pipeline.pipeline import Pipeline
 from pipecat.pipeline.runner import PipelineRunner
 from pipecat.pipeline.task import PipelineParams, PipelineTask
 from pipecat.processors.aggregators.llm_context import LLMContext
-from pipecat.processors.aggregators.llm_response import LLMAssistantAggregatorParams
 from pipecat.processors.aggregators.llm_response_universal import (
+    LLMAssistantAggregatorParams,
    LLMContextAggregatorPair,
    LLMUserAggregatorParams,
 )
--- a/2
+++ b/2
@ -1 +1 @@
-Subproject commit d67983b3b165f945a93e5ce594f47781a96bff9b
+Subproject commit e180bd3c2abc3cebbdf5e2d7955d9928cca5d219
--- a/ui/package.json
+++ b/ui/package.json
@ -1,6 +1,6 @@
 {
  "name": "ui",
-  "version": "1.12.0",
+  "version": "1.13.0",
  "private": true,
  "scripts": {
    "dev": "NODE_OPTIONS='--enable-source-maps' next dev --turbopack",
--- a/ui/src/app/superadmin/runs/page.tsx
+++ b/ui/src/app/superadmin/runs/page.tsx
@ -7,7 +7,7 @@ import { useCallback, useEffect, useState } from "react";

 import { getWorkflowRunsApiV1SuperuserWorkflowRunsGet, setAdminCommentApiV1SuperuserWorkflowRunsRunIdCommentPost } from '@/client/sdk.gen';
 import { FilterBuilder } from "@/components/filters/FilterBuilder";
-import { MediaPreviewButtons, MediaPreviewDialog } from '@/components/MediaPreviewDialog';
+import { MediaPreviewButton, MediaPreviewDialog } from '@/components/MediaPreviewDialog';
 import { Badge } from "@/components/ui/badge";
 import { Button } from "@/components/ui/button";
 import { Card, CardContent, CardDescription, CardHeader, CardTitle } from "@/components/ui/card";
@ -515,12 +515,11 @@ export default function RunsPage() {
                                                    </TableCell>
                                                    <TableCell>
                                                        <div className="flex space-x-2">
-                                                            <MediaPreviewButtons
+                                                            <MediaPreviewButton
                                                                recordingUrl={run.recording_url}
                                                                transcriptUrl={run.transcript_url}
                                                                runId={run.id}
-                                                                onOpenAudio={mediaPreview.openAudioModal}
-                                                                onOpenTranscript={mediaPreview.openTranscriptModal}
+                                                                onOpenPreview={mediaPreview.openPreview}
                                                                onSelect={setSelectedRowId}
                                                            />
                                                            <Button
--- a/ui/src/app/usage/page.tsx
+++ b/ui/src/app/usage/page.tsx
@ -9,7 +9,7 @@ import { getCurrentPeriodUsageApiV1OrganizationsUsageCurrentPeriodGet, getDailyU
 import type { CurrentUsageResponse, DailyUsageBreakdownResponse,UsageHistoryResponse, WorkflowRunUsageResponse } from '@/client/types.gen';
 import { DailyUsageTable } from '@/components/DailyUsageTable';
 import { FilterBuilder } from '@/components/filters/FilterBuilder';
-import { MediaPreviewButtons, MediaPreviewDialog } from '@/components/MediaPreviewDialog';
+import { MediaPreviewButton, MediaPreviewDialog } from '@/components/MediaPreviewDialog';
 import { Badge } from '@/components/ui/badge';
 import { Button } from '@/components/ui/button';
 import { Card, CardContent, CardDescription, CardHeader, CardTitle } from '@/components/ui/card';
@ -551,12 +551,11 @@ export default function UsagePage() {
                                                        }
                                                    </TableCell>
                                                    <TableCell>
-                                                        <MediaPreviewButtons
+                                                        <MediaPreviewButton
                                                            recordingUrl={run.recording_url}
                                                            transcriptUrl={run.transcript_url}
                                                            runId={run.id}
-                                                            onOpenAudio={mediaPreview.openAudioModal}
-                                                            onOpenTranscript={mediaPreview.openTranscriptModal}
+                                                            onOpenPreview={mediaPreview.openPreview}
                                                        />
                                                    </TableCell>
                                                </TableRow>
--- a/ui/src/app/workflow/[workflowId]/run/[runId]/page.tsx
+++ b/ui/src/app/workflow/[workflowId]/run/[runId]/page.tsx
@ -9,7 +9,7 @@ import BrowserCall from '@/app/workflow/[workflowId]/run/[runId]/BrowserCall';
 import { RealtimeFeedback, WorkflowRunLogs } from '@/app/workflow/[workflowId]/run/[runId]/components/RealtimeFeedback';
 import WorkflowLayout from '@/app/workflow/WorkflowLayout';
 import { getWorkflowRunApiV1WorkflowWorkflowIdRunsRunIdGet } from '@/client/sdk.gen';
-import { MediaPreviewButtons, MediaPreviewDialog } from '@/components/MediaPreviewDialog';
+import { MediaPreviewButton, MediaPreviewDialog } from '@/components/MediaPreviewDialog';
 import { OnboardingTooltip } from '@/components/onboarding/OnboardingTooltip';
 import { Button } from '@/components/ui/button';
 import { Card, CardContent, CardFooter, CardHeader, CardTitle } from '@/components/ui/card';
@ -92,7 +92,7 @@ export default function WorkflowRunPage() {
        }
    }, [auth]);

-    const { openAudioModal, openTranscriptModal, dialog } = MediaPreviewDialog({ accessToken });
+    const { openPreview, dialog } = MediaPreviewDialog({ accessToken });

    useEffect(() => {
        const fetchWorkflowRun = async () => {
@ -187,12 +187,11 @@ export default function WorkflowRunPage() {
                            <div className="flex flex-wrap gap-4">
                                <div className="flex items-center gap-2">
                                    <span className="text-sm text-muted-foreground">Preview:</span>
-                                    <MediaPreviewButtons
+                                    <MediaPreviewButton
                                        recordingUrl={workflowRun?.recording_url}
                                        transcriptUrl={workflowRun?.transcript_url}
                                        runId={Number(params.runId)}
-                                        onOpenAudio={openAudioModal}
-                                        onOpenTranscript={openTranscriptModal}
+                                        onOpenPreview={openPreview}
                                    />
                                </div>
                                <div className="flex items-center gap-2 border-l border-border pl-4">
--- a/ui/src/components/MediaPreviewDialog.tsx
+++ b/ui/src/components/MediaPreviewDialog.tsx
@ -1,6 +1,6 @@
 'use client';

-import { FileText, Loader2, Video } from 'lucide-react';
+import { Headphones, Loader2 } from 'lucide-react';
 import { useCallback, useState } from 'react';

 import { Button } from '@/components/ui/button';
@ -20,65 +20,56 @@ interface MediaPreviewDialogProps {

 export function MediaPreviewDialog({ accessToken }: MediaPreviewDialogProps) {
    const [isOpen, setIsOpen] = useState(false);
-    const [mediaType, setMediaType] = useState<'audio' | 'transcript' | null>(null);
-    const [mediaSignedUrl, setMediaSignedUrl] = useState<string | null>(null);
+    const [audioSignedUrl, setAudioSignedUrl] = useState<string | null>(null);
    const [transcriptContent, setTranscriptContent] = useState<string | null>(null);
    const [selectedRunId, setSelectedRunId] = useState<number | null>(null);
-    const [mediaDownloadKey, setMediaDownloadKey] = useState<string | null>(null);
+    const [recordingKey, setRecordingKey] = useState<string | null>(null);
+    const [transcriptKey, setTranscriptKey] = useState<string | null>(null);
    const [mediaLoading, setMediaLoading] = useState(false);

-    const openAudioModal = useCallback(
-        async (fileKey: string | null, runId: number) => {
-            if (!fileKey || !accessToken) return;
-            setMediaLoading(true);
-            const signed = await getSignedUrl(fileKey, accessToken);
-            if (signed) {
-                setMediaType('audio');
-                setMediaSignedUrl(signed);
-                setMediaDownloadKey(fileKey);
-                setSelectedRunId(runId);
-                setIsOpen(true);
-            }
-            setMediaLoading(false);
-        },
-        [accessToken],
-    );
-
-    const openTranscriptModal = useCallback(
-        async (fileKey: string | null, runId: number) => {
-            if (!fileKey || !accessToken) return;
+    const openPreview = useCallback(
+        async (recordingUrl: string | null, transcriptUrl: string | null, runId: number) => {
+            if (!accessToken || (!recordingUrl && !transcriptUrl)) return;
            setMediaLoading(true);
+            setAudioSignedUrl(null);
            setTranscriptContent(null);
-            const signed = await getSignedUrl(fileKey, accessToken, true);
-            if (signed) {
-                setMediaType('transcript');
-                setMediaSignedUrl(signed);
-                setMediaDownloadKey(fileKey);
-                setSelectedRunId(runId);
-                setIsOpen(true);
-                // Fetch transcript content with proper UTF-8 encoding
+            setRecordingKey(recordingUrl);
+            setTranscriptKey(transcriptUrl);
+            setSelectedRunId(runId);
+            setIsOpen(true);
+
+            const [audioResult, transcriptResult] = await Promise.all([
+                recordingUrl ? getSignedUrl(recordingUrl, accessToken) : null,
+                transcriptUrl ? getSignedUrl(transcriptUrl, accessToken, true) : null,
+            ]);
+
+            if (audioResult) {
+                setAudioSignedUrl(audioResult);
+            }
+
+            if (transcriptResult) {
                try {
-                    const response = await fetch(signed);
+                    const response = await fetch(transcriptResult);
                    const text = await response.text();
                    setTranscriptContent(text);
                } catch (error) {
                    console.error('Error fetching transcript:', error);
                }
            }
+
            setMediaLoading(false);
        },
        [accessToken],
    );

    return {
-        openAudioModal,
-        openTranscriptModal,
+        openPreview,
        dialog: (
            <Dialog open={isOpen} onOpenChange={setIsOpen}>
                <DialogContent className="sm:max-w-2xl">
                    <DialogHeader>
                        <DialogTitle>
-                            {mediaType === 'audio' ? 'Recording Preview' : 'Transcript Preview'}
+                            Run Preview
                            {selectedRunId && ` - Run #${selectedRunId}`}
                        </DialogTitle>
                    </DialogHeader>
@ -90,23 +81,38 @@ export function MediaPreviewDialog({ accessToken }: MediaPreviewDialogProps) {
                        </div>
                    )}

-                    {!mediaLoading && mediaType === 'audio' && mediaSignedUrl && (
-                        <audio src={mediaSignedUrl} controls autoPlay className="w-full mt-4" />
+                    {!mediaLoading && audioSignedUrl && (
+                        <audio src={audioSignedUrl} controls autoPlay className="w-full mt-4" />
                    )}

-                    {!mediaLoading && mediaType === 'transcript' && transcriptContent && (
+                    {!mediaLoading && transcriptContent && (
                        <pre className="w-full h-[60vh] overflow-auto border rounded-md mt-4 p-4 bg-muted text-sm whitespace-pre-wrap font-mono">
                            {transcriptContent}
                        </pre>
                    )}

+                    {!mediaLoading && !audioSignedUrl && !transcriptContent && (
+                        <div className="flex items-center justify-center py-8 text-muted-foreground">
+                            No recording or transcript available.
+                        </div>
+                    )}
+
                    <DialogFooter className="pt-4">
                        <DialogClose asChild>
                            <Button variant="secondary">Close</Button>
                        </DialogClose>
-                        {mediaDownloadKey && accessToken && (
-                            <Button onClick={() => downloadFile(mediaDownloadKey, accessToken)}>Download</Button>
-                        )}
+                        <div className="flex gap-2">
+                            {recordingKey && accessToken && (
+                                <Button variant="outline" onClick={() => downloadFile(recordingKey, accessToken)}>
+                                    Download Recording
+                                </Button>
+                            )}
+                            {transcriptKey && accessToken && (
+                                <Button variant="outline" onClick={() => downloadFile(transcriptKey, accessToken)}>
+                                    Download Transcript
+                                </Button>
+                            )}
+                        </div>
                    </DialogFooter>
                </DialogContent>
            </Dialog>
@ -114,53 +120,35 @@ export function MediaPreviewDialog({ accessToken }: MediaPreviewDialogProps) {
    };
 }

-interface MediaPreviewButtonsProps {
+interface MediaPreviewButtonProps {
    recordingUrl: string | null | undefined;
    transcriptUrl: string | null | undefined;
    runId: number;
-    onOpenAudio: (fileKey: string | null, runId: number) => void;
-    onOpenTranscript: (fileKey: string | null, runId: number) => void;
+    onOpenPreview: (recordingUrl: string | null, transcriptUrl: string | null, runId: number) => void;
    onSelect?: (runId: number) => void;
 }

-export function MediaPreviewButtons({
+export function MediaPreviewButton({
    recordingUrl,
    transcriptUrl,
    runId,
-    onOpenAudio,
-    onOpenTranscript,
+    onOpenPreview,
    onSelect,
-}: MediaPreviewButtonsProps) {
-    const handleOpenAudio = () => {
-        onSelect?.(runId);
-        onOpenAudio(recordingUrl ?? null, runId);
-    };
+}: MediaPreviewButtonProps) {
+    if (!recordingUrl && !transcriptUrl) return null;

-    const handleOpenTranscript = () => {
+    const handleOpen = () => {
        onSelect?.(runId);
-        onOpenTranscript(transcriptUrl ?? null, runId);
+        onOpenPreview(recordingUrl ?? null, transcriptUrl ?? null, runId);
    };

    return (
-        <div className="flex space-x-2">
-            {recordingUrl && (
-                <Button
-                    variant="outline"
-                    size="icon"
-                    onClick={handleOpenAudio}
-                >
-                    <Video className="h-4 w-4" />
-                </Button>
-            )}
-            {transcriptUrl && (
-                <Button
-                    variant="outline"
-                    size="icon"
-                    onClick={handleOpenTranscript}
-                >
-                    <FileText className="h-4 w-4" />
-                </Button>
-            )}
-        </div>
+        <Button
+            variant="outline"
+            size="icon"
+            onClick={handleOpen}
+        >
+            <Headphones className="h-4 w-4" />
+        </Button>
    );
 }
--- a/ui/src/components/ServiceConfiguration.tsx
+++ b/ui/src/components/ServiceConfiguration.tsx
@ -21,6 +21,7 @@ interface SchemaProperty {
    default?: string | number | boolean;
    enum?: string[];
    examples?: string[];
+    model_options?: Record<string, string[]>;
    $ref?: string;
    description?: string;
    format?: string;
@ -46,38 +47,89 @@ const TAB_CONFIG: { key: ServiceSegment; label: string }[] = [

 // Display names for language codes (Deepgram + Sarvam)
 const LANGUAGE_DISPLAY_NAMES: Record<string, string> = {
-    // Deepgram languages
    "multi": "Multilingual (Auto-detect)",
+    // Arabic
+    "ar": "Arabic",
+    "ar-AE": "Arabic (UAE)",
+    "ar-SA": "Arabic (Saudi Arabia)",
+    "ar-QA": "Arabic (Qatar)",
+    "ar-KW": "Arabic (Kuwait)",
+    "ar-SY": "Arabic (Syria)",
+    "ar-LB": "Arabic (Lebanon)",
+    "ar-PS": "Arabic (Palestine)",
+    "ar-JO": "Arabic (Jordan)",
+    "ar-EG": "Arabic (Egypt)",
+    "ar-SD": "Arabic (Sudan)",
+    "ar-TD": "Arabic (Chad)",
+    "ar-MA": "Arabic (Morocco)",
+    "ar-DZ": "Arabic (Algeria)",
+    "ar-TN": "Arabic (Tunisia)",
+    "ar-IQ": "Arabic (Iraq)",
+    "ar-IR": "Arabic (Iran)",
+    // Other languages
+    "be": "Belarusian",
+    "bn": "Bengali",
+    "bs": "Bosnian",
+    "bg": "Bulgarian",
+    "ca": "Catalan",
+    "cs": "Czech",
+    "da": "Danish",
+    "da-DK": "Danish (Denmark)",
+    "de": "German",
+    "de-CH": "German (Switzerland)",
+    "el": "Greek",
    "en": "English",
    "en-US": "English (US)",
-    "en-GB": "English (UK)",
    "en-AU": "English (Australia)",
+    "en-GB": "English (UK)",
    "en-IN": "English (India)",
+    "en-NZ": "English (New Zealand)",
    "es": "Spanish",
    "es-419": "Spanish (Latin America)",
+    "et": "Estonian",
+    "fa": "Persian",
+    "fi": "Finnish",
    "fr": "French",
    "fr-CA": "French (Canada)",
-    "de": "German",
+    "he": "Hebrew",
+    "hi": "Hindi",
+    "hr": "Croatian",
+    "hu": "Hungarian",
+    "id": "Indonesian",
    "it": "Italian",
+    "ja": "Japanese",
+    "kn": "Kannada",
+    "ko": "Korean",
+    "ko-KR": "Korean (South Korea)",
+    "lt": "Lithuanian",
+    "lv": "Latvian",
+    "mk": "Macedonian",
+    "mr": "Marathi",
+    "ms": "Malay",
+    "nl": "Dutch",
+    "nl-BE": "Flemish",
+    "no": "Norwegian",
+    "pl": "Polish",
    "pt": "Portuguese",
    "pt-BR": "Portuguese (Brazil)",
-    "nl": "Dutch",
-    "hi": "Hindi",
-    "ja": "Japanese",
-    "ko": "Korean",
-    "zh-CN": "Chinese (Simplified)",
-    "zh-TW": "Chinese (Traditional)",
+    "pt-PT": "Portuguese (Portugal)",
+    "ro": "Romanian",
    "ru": "Russian",
-    "pl": "Polish",
+    "sk": "Slovak",
+    "sl": "Slovenian",
+    "sr": "Serbian",
+    "sv": "Swedish",
+    "sv-SE": "Swedish (Sweden)",
+    "ta": "Tamil",
+    "te": "Telugu",
+    "th": "Thai",
+    "tl": "Tagalog",
    "tr": "Turkish",
    "uk": "Ukrainian",
+    "ur": "Urdu",
    "vi": "Vietnamese",
-    "sv": "Swedish",
-    "da": "Danish",
-    "no": "Norwegian",
-    "fi": "Finnish",
-    "id": "Indonesian",
-    "th": "Thai",
+    "zh-CN": "Chinese (Simplified)",
+    "zh-TW": "Chinese (Traditional)",
    // Sarvam Indian languages
    "bn-IN": "Bengali",
    "gu-IN": "Gujarati",
@ -212,6 +264,34 @@ export default function ServiceConfiguration() {
        }
    }, [schemas, serviceProviders.llm, userConfig?.llm?.model, hasCheckedManualMode]);

+    // Reset voice when TTS model changes if the provider has model-dependent voice options
+    const ttsModel = watch("tts_model");
+    useEffect(() => {
+        const voiceSchema = schemas?.tts?.[serviceProviders.tts]?.properties?.voice;
+        const modelOptions = voiceSchema?.model_options;
+        if (!modelOptions || !ttsModel) return;
+
+        const validVoices = modelOptions[ttsModel as string];
+        const currentVoice = getValues("tts_voice") as string;
+        if (validVoices && currentVoice && !validVoices.includes(currentVoice)) {
+            setValue("tts_voice", validVoices[0], { shouldDirty: true });
+        }
+    }, [ttsModel, serviceProviders.tts, setValue, getValues, schemas]);
+
+    // Reset language when STT model changes if the provider has model-dependent language options
+    const sttModel = watch("stt_model");
+    useEffect(() => {
+        const languageSchema = schemas?.stt?.[serviceProviders.stt]?.properties?.language;
+        const modelOptions = languageSchema?.model_options;
+        if (!modelOptions || !sttModel) return;
+
+        const validLanguages = modelOptions[sttModel as string];
+        const currentLanguage = getValues("stt_language") as string;
+        if (validLanguages && currentLanguage && !validLanguages.includes(currentLanguage)) {
+            setValue("stt_language", validLanguages[0], { shouldDirty: true });
+        }
+    }, [sttModel, serviceProviders.stt, setValue, getValues, schemas]);
+
    const handleProviderChange = (service: ServiceSegment, providerName: string) => {
        if (!providerName) {
            return;
@ -325,15 +405,6 @@ export default function ServiceConfiguration() {
            field => field !== "provider" && field !== "api_key"
        );

-        // For Deepgram STT, hide language field when flux-general-en model is selected
-        // Flux model is English-only and doesn't support language selection
-        if (service === "stt" && currentProvider === "deepgram") {
-            const currentModel = watch("stt_model") as string;
-            if (currentModel === "flux-general-en") {
-                return fields.filter(field => field !== "language");
-            }
-        }
-
        return fields;
    };

@ -516,7 +587,16 @@ export default function ServiceConfiguration() {
        }

        // Handle fields with enum or examples (dropdown options)
-        const dropdownOptions = actualSchema?.enum || actualSchema?.examples;
+        let dropdownOptions = actualSchema?.enum || actualSchema?.examples;
+
+        // Use model-dependent options when available (e.g., Sarvam voices per model)
+        if (actualSchema?.model_options) {
+            const modelValue = watch(`${service}_model`) as string;
+            if (modelValue && actualSchema.model_options[modelValue]) {
+                dropdownOptions = actualSchema.model_options[modelValue];
+            }
+        }
+
        if (dropdownOptions && dropdownOptions.length > 0) {
            // Use friendly display names for language and voice fields
            const getDisplayName = (value: string) => {
@ -524,7 +604,7 @@ export default function ServiceConfiguration() {
                    return LANGUAGE_DISPLAY_NAMES[value] || value;
                }
                if (field === "voice") {
-                    return VOICE_DISPLAY_NAMES[value] || value;
+                    return VOICE_DISPLAY_NAMES[value] || value.charAt(0).toUpperCase() + value.slice(1);
                }
                return value;
            };
--- a/ui/src/components/workflow-runs/WorkflowRunsTable.tsx
+++ b/ui/src/components/workflow-runs/WorkflowRunsTable.tsx
@ -5,7 +5,7 @@ import { useState } from "react";

 import { WorkflowRunResponseSchema } from "@/client/types.gen";
 import { FilterBuilder } from "@/components/filters/FilterBuilder";
-import { MediaPreviewButtons, MediaPreviewDialog } from "@/components/MediaPreviewDialog";
+import { MediaPreviewButton, MediaPreviewDialog } from "@/components/MediaPreviewDialog";
 import { Badge } from "@/components/ui/badge";
 import { Button } from "@/components/ui/button";
 import { Card, CardContent, CardDescription, CardHeader, CardTitle } from "@/components/ui/card";
@ -211,12 +211,11 @@ export function WorkflowRunsTable({
                                            </TableCell>
                                            <TableCell>
                                                <div className="flex space-x-2" onClick={(e) => e.stopPropagation()}>
-                                                    <MediaPreviewButtons
+                                                    <MediaPreviewButton
                                                        recordingUrl={run.recording_url}
                                                        transcriptUrl={run.transcript_url}
                                                        runId={run.id}
-                                                        onOpenAudio={mediaPreview.openAudioModal}
-                                                        onOpenTranscript={mediaPreview.openTranscriptModal}
+                                                        onOpenPreview={mediaPreview.openPreview}
                                                        onSelect={setSelectedRowId}
                                                    />
                                                    <Button