diff --git a/.release-please-manifest.json b/.release-please-manifest.json index 3e217c42..2baa8f54 100644 --- a/.release-please-manifest.json +++ b/.release-please-manifest.json @@ -1,3 +1,3 @@ { - ".": "1.12.0" + ".": "1.13.0" } \ No newline at end of file diff --git a/CHANGELOG.md b/CHANGELOG.md index 4be6fa49..cd715c62 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,23 @@ # Changelog +## [1.13.0](https://github.com/dograh-hq/dograh/compare/dograh-v1.12.0...dograh-v1.13.0) (2026-02-13) + + +### Features + +* add languages for deepgram and dograh ([5256010](https://github.com/dograh-hq/dograh/commit/525601088accb763aa710ff6e0fb2a6fdf5acec2)) +* add openrouter support ([4c936ae](https://github.com/dograh-hq/dograh/commit/4c936ae57d14ea3aa40ea427b20b341776a9be9f)) +* add sarvam v3 voices ([a75bc72](https://github.com/dograh-hq/dograh/commit/a75bc72cb59537098b3b605a012a2ef8a3f0fe6a)) +* limit campaign concurrency to number of CLIs ([3cdede0](https://github.com/dograh-hq/dograh/commit/3cdede0f45de5d719c17797635464742ad2eac86)) + + +### Bug Fixes + +* add vad_analyzer in user aggregator ([6711dcb](https://github.com/dograh-hq/dograh/commit/6711dcb3ea7b7f78d7db5794fefc87380b8b751b)) +* fix cloudonix call hangup ([#154](https://github.com/dograh-hq/dograh/issues/154)) ([b9ddd30](https://github.com/dograh-hq/dograh/commit/b9ddd308134056db4d24a37702d151c9ee45e72d)) +* fixes aggregation in elevenlabs TTS ([#153](https://github.com/dograh-hq/dograh/issues/153)) ([e156524](https://github.com/dograh-hq/dograh/commit/e1565246fa6ecc5b73a663e9251a92d0796d19d3)) +* send sample rate to STT services ([7a10202](https://github.com/dograh-hq/dograh/commit/7a102026fbe90bed89455919ed1f9912cbab634b)) + ## [1.12.0](https://github.com/dograh-hq/dograh/compare/dograh-v1.11.2...dograh-v1.12.0) (2026-02-05) diff --git a/api/logging_config.py b/api/logging_config.py index 37f5ea3e..dc44b691 100644 --- a/api/logging_config.py +++ b/api/logging_config.py @@ -7,7 +7,7 @@ import loguru from api.constants import SERIALIZE_LOG_OUTPUT from api.enums import Environment from api.utils.worker import get_worker_id, is_worker_process -from pipecat.utils.context import run_id_var, turn_var +from pipecat.utils.run_context import run_id_var, turn_var ENVIRONMENT = os.getenv("ENVIRONMENT", Environment.LOCAL.value) ENABLE_TURN_LOGGING = os.getenv("ENABLE_TURN_LOGGING", "false").lower() == "true" diff --git a/api/pyproject.toml b/api/pyproject.toml index 59cbafd7..6bf079ef 100644 --- a/api/pyproject.toml +++ b/api/pyproject.toml @@ -1,5 +1,5 @@ [project] name = "dograh-api" -version = "1.12.0" +version = "1.13.0" description = "Backend API for Dograh voice AI platform" requires-python = ">=3.12" diff --git a/api/routes/integration.py b/api/routes/integration.py index 71eb7276..ae4d98cf 100644 --- a/api/routes/integration.py +++ b/api/routes/integration.py @@ -1,3 +1,7 @@ +""" +Route for 3rd party integrations. Currently being backed by nango. +""" + from dataclasses import dataclass from typing import Any, Dict, List, Optional, TypedDict diff --git a/api/routes/stasis_rtp.py b/api/routes/stasis_rtp.py index c18682ac..0fede33f 100644 --- a/api/routes/stasis_rtp.py +++ b/api/routes/stasis_rtp.py @@ -1,12 +1,12 @@ import random from loguru import logger -from pipecat.utils.context import set_current_run_id from api.db import db_client from api.enums import WorkflowRunMode from api.services.pipecat.run_pipeline import run_pipeline_ari_stasis from api.services.telephony.stasis_rtp_connection import StasisRTPConnection +from pipecat.utils.run_context import set_current_run_id async def on_stasis_call(call: StasisRTPConnection, call_context_vars: dict): diff --git a/api/routes/telephony.py b/api/routes/telephony.py index 0ed6c87f..e93dd60b 100644 --- a/api/routes/telephony.py +++ b/api/routes/telephony.py @@ -37,7 +37,7 @@ from api.utils.telephony_helper import ( numbers_match, parse_webhook_request, ) -from pipecat.utils.context import set_current_run_id +from pipecat.utils.run_context import set_current_run_id router = APIRouter(prefix="/telephony") diff --git a/api/routes/webrtc_signaling.py b/api/routes/webrtc_signaling.py index f50180da..2dadd83d 100644 --- a/api/routes/webrtc_signaling.py +++ b/api/routes/webrtc_signaling.py @@ -22,7 +22,7 @@ from typing import Dict, List, Optional from aiortc import RTCIceServer from aiortc.sdp import candidate_from_sdp -from fastapi import APIRouter, Depends, WebSocket, WebSocketDisconnect +from fastapi import APIRouter, Depends, HTTPException, WebSocket, WebSocketDisconnect from loguru import logger from starlette.websockets import WebSocketState @@ -44,7 +44,7 @@ from api.services.pipecat.ws_sender_registry import ( ) from api.services.quota_service import check_dograh_quota from pipecat.transports.smallwebrtc.connection import SmallWebRTCConnection -from pipecat.utils.context import set_current_run_id +from pipecat.utils.run_context import set_current_run_id router = APIRouter(prefix="/ws") @@ -390,6 +390,11 @@ async def signaling_websocket( user: UserModel = Depends(get_user_ws), ): """WebSocket endpoint for WebRTC signaling with ICE trickling.""" + workflow_run = await db_client.get_workflow_run(workflow_run_id, user.id) + if not workflow_run: + logger.warning(f"workflow run {workflow_run_id} not found for user {user.id}") + raise HTTPException(status_code=400, detail="Bad workflow_run_id") + await signaling_manager.handle_websocket( websocket, workflow_id, workflow_run_id, user ) diff --git a/api/services/configuration/registry.py b/api/services/configuration/registry.py index 4443b5a5..c9e36756 100644 --- a/api/services/configuration/registry.py +++ b/api/services/configuration/registry.py @@ -278,7 +278,48 @@ class DograhTTSService(BaseTTSConfiguration): SARVAM_TTS_MODELS = ["bulbul:v2", "bulbul:v3"] -SARVAM_VOICES = ["anushka", "manisha", "vidya", "arya", "abhilash", "karun", "hitesh"] +SARVAM_V2_VOICES = ["anushka", "manisha", "vidya", "arya", "abhilash", "karun", "hitesh"] +SARVAM_V3_VOICES = [ + "shubh", + "aditya", + "ritu", + "priya", + "neha", + "rahul", + "pooja", + "rohan", + "simran", + "kavya", + "amit", + "dev", + "ishita", + "shreya", + "ratan", + "varun", + "manan", + "sumit", + "roopa", + "kabir", + "aayan", + "ashutosh", + "advait", + "amelia", + "sophia", + "anand", + "tanya", + "tarun", + "sunny", + "mani", + "gokul", + "vijay", + "shruti", + "suhani", + "mohit", + "kavitha", + "rehan", + "soham", + "rupali", +] SARVAM_LANGUAGES = [ "bn-IN", "en-IN", @@ -301,7 +342,16 @@ class SarvamTTSConfiguration(BaseTTSConfiguration): model: str = Field( default="bulbul:v2", json_schema_extra={"examples": SARVAM_TTS_MODELS} ) - voice: str = Field(default="anushka", json_schema_extra={"examples": SARVAM_VOICES}) + voice: str = Field( + default="anushka", + json_schema_extra={ + "examples": SARVAM_V2_VOICES, + "model_options": { + "bulbul:v2": SARVAM_V2_VOICES, + "bulbul:v3": SARVAM_V3_VOICES, + }, + }, + ) language: str = Field( default="hi-IN", json_schema_extra={"examples": SARVAM_LANGUAGES} ) @@ -322,39 +372,89 @@ TTSConfig = Annotated[ ###################################################### STT ######################################################################## -DEEPGRAM_STT_MODELS = ["nova-2", "nova-3-general", "flux-general-en"] +DEEPGRAM_STT_MODELS = ["nova-3-general", "flux-general-en"] DEEPGRAM_LANGUAGES = [ "multi", + "ar", + "ar-AE", + "ar-SA", + "ar-QA", + "ar-KW", + "ar-SY", + "ar-LB", + "ar-PS", + "ar-JO", + "ar-EG", + "ar-SD", + "ar-TD", + "ar-MA", + "ar-DZ", + "ar-TN", + "ar-IQ", + "ar-IR", + "be", + "bn", + "bs", + "bg", + "ca", + "cs", + "da", + "da-DK", + "de", + "de-CH", + "el", "en", "en-US", - "en-GB", "en-AU", + "en-GB", "en-IN", + "en-NZ", "es", "es-419", + "et", + "fa", + "fi", "fr", "fr-CA", - "de", + "he", + "hi", + "hr", + "hu", + "id", "it", + "ja", + "kn", + "ko", + "ko-KR", + "lt", + "lv", + "mk", + "mr", + "ms", + "nl", + "nl-BE", + "no", + "pl", "pt", "pt-BR", - "nl", - "hi", - "ja", - "ko", - "zh-CN", - "zh-TW", + "pt-PT", + "ro", "ru", - "pl", + "sk", + "sl", + "sr", + "sv", + "sv-SE", + "ta", + "te", + "th", + "tl", "tr", "uk", + "ur", "vi", - "sv", - "da", - "no", - "fi", - "id", - "th", + "zh-CN", + "zh-TW", ] @@ -365,7 +465,14 @@ class DeepgramSTTConfiguration(BaseSTTConfiguration): default="nova-3-general", json_schema_extra={"examples": DEEPGRAM_STT_MODELS} ) language: str = Field( - default="multi", json_schema_extra={"examples": DEEPGRAM_LANGUAGES} + default="multi", + json_schema_extra={ + "examples": DEEPGRAM_LANGUAGES, + "model_options": { + "nova-3-general": DEEPGRAM_LANGUAGES, + "flux-general-en": ["en"], + }, + }, ) api_key: str @@ -390,39 +497,7 @@ class OpenAISTTConfiguration(BaseSTTConfiguration): # Dograh STT Service DOGRAH_STT_MODELS = ["default"] -DOGRAH_STT_LANGUAGES = [ - "multi", - "en", - "en-US", - "en-GB", - "en-AU", - "en-IN", - "es", - "es-419", - "fr", - "fr-CA", - "de", - "it", - "pt", - "pt-BR", - "nl", - "hi", - "ja", - "ko", - "zh-CN", - "zh-TW", - "ru", - "pl", - "tr", - "uk", - "vi", - "sv", - "da", - "no", - "fi", - "id", - "th", -] +DOGRAH_STT_LANGUAGES = DEEPGRAM_LANGUAGES @register_stt diff --git a/api/services/looptalk/orchestrator.py b/api/services/looptalk/orchestrator.py index 917774a2..d4c969ad 100644 --- a/api/services/looptalk/orchestrator.py +++ b/api/services/looptalk/orchestrator.py @@ -14,7 +14,7 @@ from api.services.looptalk.internal_transport import ( ) from api.services.pipecat.transport_setup import create_internal_transport from pipecat.pipeline.task import PipelineTask -from pipecat.utils.context import set_current_run_id +from pipecat.utils.run_context import set_current_run_id from .core.pipeline_builder import LoopTalkPipelineBuilder from .core.recording_manager import RecordingManager diff --git a/api/services/pipecat/pipeline_builder.py b/api/services/pipecat/pipeline_builder.py index 1706a2eb..00807706 100644 --- a/api/services/pipecat/pipeline_builder.py +++ b/api/services/pipecat/pipeline_builder.py @@ -10,7 +10,7 @@ from pipecat.pipeline.pipeline import Pipeline from pipecat.pipeline.task import PipelineParams, PipelineTask from pipecat.processors.aggregators.llm_context import LLMContext from pipecat.processors.audio.audio_buffer_processor import AudioBufferProcessor -from pipecat.utils.context import turn_var +from pipecat.utils.run_context import turn_var def create_pipeline_components(audio_config: AudioConfig): diff --git a/api/services/pipecat/run_pipeline.py b/api/services/pipecat/run_pipeline.py index 484d5112..f0107e98 100644 --- a/api/services/pipecat/run_pipeline.py +++ b/api/services/pipecat/run_pipeline.py @@ -62,6 +62,7 @@ from pipecat.turns.user_mute import ( MuteUntilFirstBotCompleteUserMuteStrategy, ) from pipecat.turns.user_start import ( + ExternalUserTurnStartStrategy, TranscriptionUserTurnStartStrategy, ) from pipecat.turns.user_start.vad_user_turn_start_strategy import ( @@ -69,12 +70,12 @@ from pipecat.turns.user_start.vad_user_turn_start_strategy import ( ) from pipecat.turns.user_stop import ( ExternalUserTurnStopStrategy, - TranscriptionUserTurnStopStrategy, + SpeechTimeoutUserTurnStopStrategy, TurnAnalyzerUserTurnStopStrategy, ) from pipecat.turns.user_turn_strategies import UserTurnStrategies -from pipecat.utils.context import set_current_run_id from pipecat.utils.enums import EndTaskReason +from pipecat.utils.run_context import set_current_run_id from pipecat.utils.tracing.context_registry import ContextProviderRegistry # Setup tracing if enabled @@ -265,7 +266,6 @@ async def run_pipeline_vobiz( async def run_pipeline_cloudonix( websocket_client: WebSocket, stream_sid: str, - call_sid: str, workflow_id: int, workflow_run_id: int, user_id: int, @@ -274,10 +274,15 @@ async def run_pipeline_cloudonix( logger.debug( f"Running pipeline for Cloudonix connection with workflow_id: {workflow_id} and workflow_run_id: {workflow_run_id}" ) - set_current_run_id(workflow_run_id) + + workflow_run = await db_client.get_workflow_run_by_id(workflow_run_id) + call_id = workflow_run.gathered_context.get("call_id") + if not call_id: + logger.warning("call_id not found in gathered_context") + raise Exception() # Store call ID in cost_info for later cost calculation (provider-agnostic) - cost_info = {"call_id": call_sid} + cost_info = {"call_id": call_id} await db_client.update_workflow_run(workflow_run_id, cost_info=cost_info) # Get workflow to extract all pipeline configurations @@ -292,26 +297,18 @@ async def run_pipeline_cloudonix( "ambient_noise_configuration" ] - # Retrieve session_token from workflow_run gathered_context - workflow_run = await db_client.get_workflow_run(workflow_run_id) - session_token = None - if workflow_run and workflow_run.gathered_context: - session_token = workflow_run.gathered_context.get("session_token") - logger.debug(f"Retrieved session_token from workflow_run: {session_token}") - # Create audio configuration for Cloudonix audio_config = create_audio_config(WorkflowRunMode.CLOUDONIX.value) transport = await create_cloudonix_transport( websocket_client, + call_id, stream_sid, - call_sid, workflow_run_id, audio_config, workflow.organization_id, vad_config, ambient_noise_config, - session_token, ) await _run_pipeline( transport, @@ -580,7 +577,10 @@ async def _run_pipeline( if is_deepgram_flux: user_turn_strategies = UserTurnStrategies( - start=[VADUserTurnStartStrategy(), TranscriptionUserTurnStartStrategy()], + start=[ + VADUserTurnStartStrategy(), + ExternalUserTurnStartStrategy(enable_interruptions=True), + ], stop=[ExternalUserTurnStopStrategy()], ) elif turn_stop_strategy == "turn_analyzer": @@ -598,7 +598,7 @@ async def _run_pipeline( # Transcription-based (default): best for short 1-2 word responses user_turn_strategies = UserTurnStrategies( start=[VADUserTurnStartStrategy(), TranscriptionUserTurnStartStrategy()], - stop=[TranscriptionUserTurnStopStrategy()], + stop=[SpeechTimeoutUserTurnStopStrategy()], ) # Create user mute strategies diff --git a/api/services/pipecat/service_factory.py b/api/services/pipecat/service_factory.py index 4fb3b677..a3462521 100644 --- a/api/services/pipecat/service_factory.py +++ b/api/services/pipecat/service_factory.py @@ -30,7 +30,9 @@ if TYPE_CHECKING: from api.services.pipecat.audio_config import AudioConfig -def create_stt_service(user_config, audio_config: "AudioConfig", keyterms: list[str] | None = None): +def create_stt_service( + user_config, audio_config: "AudioConfig", keyterms: list[str] | None = None +): """Create and return appropriate STT service based on user configuration Args: @@ -53,7 +55,7 @@ def create_stt_service(user_config, audio_config: "AudioConfig", keyterms: list[ keyterm=keyterms or [], ), should_interrupt=False, # Let UserAggregator take care of sending InterruptionFrame - sample_rate=audio_config.transport_in_sample_rate + sample_rate=audio_config.transport_in_sample_rate, ) # Other models than flux @@ -64,21 +66,24 @@ def create_stt_service(user_config, audio_config: "AudioConfig", keyterms: list[ profanity_filter=False, endpointing=100, model=user_config.stt.model, - keyterm=keyterms or [] + keyterm=keyterms or [], ) logger.debug(f"Using DeepGram Model - {user_config.stt.model}") return DeepgramSTTService( live_options=live_options, api_key=user_config.stt.api_key, should_interrupt=False, # Let UserAggregator take care of sending InterruptionFrame - sample_rate=audio_config.transport_in_sample_rate + sample_rate=audio_config.transport_in_sample_rate, ) elif user_config.stt.provider == ServiceProviders.OPENAI.value: return OpenAISTTService( api_key=user_config.stt.api_key, model=user_config.stt.model ) elif user_config.stt.provider == ServiceProviders.CARTESIA.value: - return CartesiaSTTService(api_key=user_config.stt.api_key, sample_rate=audio_config.transport_in_sample_rate) + return CartesiaSTTService( + api_key=user_config.stt.api_key, + sample_rate=audio_config.transport_in_sample_rate, + ) elif user_config.stt.provider == ServiceProviders.DOGRAH.value: base_url = MPS_API_URL.replace("http://", "ws://").replace("https://", "wss://") language = getattr(user_config.stt, "language", None) or "multi" @@ -88,7 +93,7 @@ def create_stt_service(user_config, audio_config: "AudioConfig", keyterms: list[ model=user_config.stt.model, language=language, keyterms=keyterms, - sample_rate=audio_config.transport_in_sample_rate + sample_rate=audio_config.transport_in_sample_rate, ) elif user_config.stt.provider == ServiceProviders.SARVAM.value: # Map Sarvam language code to pipecat Language enum @@ -112,7 +117,7 @@ def create_stt_service(user_config, audio_config: "AudioConfig", keyterms: list[ api_key=user_config.stt.api_key, model=user_config.stt.model, params=SarvamSTTService.InputParams(language=pipecat_language), - sample_rate=audio_config.transport_in_sample_rate + sample_rate=audio_config.transport_in_sample_rate, ) elif user_config.stt.provider == ServiceProviders.SPEECHMATICS.value: from pipecat.services.speechmatics.stt import ( @@ -138,7 +143,7 @@ def create_stt_service(user_config, audio_config: "AudioConfig", keyterms: list[ operating_point=operating_point, additional_vocab=additional_vocab, ), - sample_rate=audio_config.transport_in_sample_rate + sample_rate=audio_config.transport_in_sample_rate, ) else: raise HTTPException( diff --git a/api/services/pipecat/transport_setup.py b/api/services/pipecat/transport_setup.py index 02900623..6cee7fb2 100644 --- a/api/services/pipecat/transport_setup.py +++ b/api/services/pipecat/transport_setup.py @@ -94,14 +94,13 @@ async def create_twilio_transport( async def create_cloudonix_transport( websocket_client: WebSocket, + call_id: str, stream_sid: str, - call_sid: str, workflow_run_id: int, audio_config: AudioConfig, organization_id: int, vad_config: dict | None = None, ambient_noise_config: dict | None = None, - session_token: str | None = None, ): """Create a transport for Cloudonix connections""" @@ -125,11 +124,10 @@ async def create_cloudonix_transport( from pipecat.serializers.cloudonix import CloudonixFrameSerializer serializer = CloudonixFrameSerializer( + call_id=call_id, stream_sid=stream_sid, - call_sid=call_sid, domain_id=domain_id, bearer_token=bearer_token, - session_token=session_token, ) return FastAPIWebsocketTransport( diff --git a/api/services/pipecat/turn_context.py b/api/services/pipecat/turn_context.py index 9d3683d2..05e6e4d8 100644 --- a/api/services/pipecat/turn_context.py +++ b/api/services/pipecat/turn_context.py @@ -8,7 +8,7 @@ propagate through asyncio.create_task() calls. import asyncio from typing import Dict, Optional -from pipecat.utils.context import turn_var +from pipecat.utils.run_context import turn_var class TurnContextManager: diff --git a/api/services/telephony/providers/cloudonix_provider.py b/api/services/telephony/providers/cloudonix_provider.py index 8449499d..c6b2a556 100644 --- a/api/services/telephony/providers/cloudonix_provider.py +++ b/api/services/telephony/providers/cloudonix_provider.py @@ -395,10 +395,6 @@ class CloudonixProvider(TelephonyProvider): await websocket.close(code=4400, reason="Expected connected event") return - logger.debug( - f"Cloudonix WebSocket connected for workflow_run {workflow_run_id}" - ) - # Wait for "start" event with stream details start_msg = await websocket.receive_text() logger.debug(f"Received start message: {start_msg}") @@ -418,9 +414,14 @@ class CloudonixProvider(TelephonyProvider): await websocket.close(code=4400, reason="Missing stream identifiers") return + logger.debug( + f"Cloudonix WebSocket connected for workflow_run {workflow_run_id} " + f"stream_sid: {stream_sid} call_sid: {call_sid}" + ) + # Run the Cloudonix pipeline await run_pipeline_cloudonix( - websocket, stream_sid, call_sid, workflow_id, workflow_run_id, user_id + websocket, stream_sid, workflow_id, workflow_run_id, user_id ) except Exception as e: diff --git a/api/services/telephony/providers/twilio_provider.py b/api/services/telephony/providers/twilio_provider.py index 713e282b..3c020b06 100644 --- a/api/services/telephony/providers/twilio_provider.py +++ b/api/services/telephony/providers/twilio_provider.py @@ -110,7 +110,7 @@ class TwilioProvider(TelephonyProvider): return CallInitiationResult( call_id=response_data["sid"], status=response_data.get("status", "queued"), - provider_metadata={}, # Twilio doesn't need to persist extra data + provider_metadata={"call_id": response_data["sid"]}, raw_response=response_data, ) diff --git a/api/services/telephony/providers/vobiz_provider.py b/api/services/telephony/providers/vobiz_provider.py index ddaf9c26..0666d858 100644 --- a/api/services/telephony/providers/vobiz_provider.py +++ b/api/services/telephony/providers/vobiz_provider.py @@ -150,7 +150,7 @@ class VobizProvider(TelephonyProvider): return CallInitiationResult( call_id=call_id, status="queued", # Vobiz returns "message": "call fired" - provider_metadata={}, + provider_metadata={"call_id": call_id}, raw_response=response_data, ) diff --git a/api/services/telephony/providers/vonage_provider.py b/api/services/telephony/providers/vonage_provider.py index 315f04d5..ee25d787 100644 --- a/api/services/telephony/providers/vonage_provider.py +++ b/api/services/telephony/providers/vonage_provider.py @@ -138,10 +138,8 @@ class VonageProvider(TelephonyProvider): call_id=response_data["uuid"], status=response_data.get("status", "started"), provider_metadata={ - "call_uuid": response_data[ - "uuid" - ] # Vonage needs UUID persisted for WebSocket - }, + "call_uuid": response_data["uuid"] + }, # Vonage needs UUID persisted for WebSocket raw_response=response_data, ) diff --git a/api/services/telephony/worker_event_subscriber.py b/api/services/telephony/worker_event_subscriber.py index 42afb393..61263725 100644 --- a/api/services/telephony/worker_event_subscriber.py +++ b/api/services/telephony/worker_event_subscriber.py @@ -23,7 +23,7 @@ from api.services.telephony.stasis_event_protocol import ( parse_event, ) from api.services.telephony.stasis_rtp_connection import StasisRTPConnection -from pipecat.utils.context import set_current_run_id +from pipecat.utils.run_context import set_current_run_id class WorkerEventSubscriber: diff --git a/api/services/workflow/pipecat_engine_callbacks.py b/api/services/workflow/pipecat_engine_callbacks.py index a422734b..684514f7 100644 --- a/api/services/workflow/pipecat_engine_callbacks.py +++ b/api/services/workflow/pipecat_engine_callbacks.py @@ -116,6 +116,10 @@ def create_aggregation_correction_callback(engine: "PipecatEngine"): if corrupted in ref or len(alnum_ref) < len(alnum_corr) or len(alnum_corr) < 10: return corrupted + logger.debug( + f"In correct_corrupted_aggregation: ref: {ref} corrupted: {corrupted}" + ) + # 2) Find where in `ref` we should start aligning. # We take the first N (N=10) characters of `corrupted` # and look for all their occurrences in `ref`. diff --git a/api/tasks/run_integrations.py b/api/tasks/run_integrations.py index fb38e5c8..b622f386 100644 --- a/api/tasks/run_integrations.py +++ b/api/tasks/run_integrations.py @@ -10,7 +10,7 @@ from api.db import db_client from api.db.models import WorkflowRunModel from api.utils.credential_auth import build_auth_header from api.utils.template_renderer import render_template -from pipecat.utils.context import set_current_run_id +from pipecat.utils.run_context import set_current_run_id async def run_integrations_post_workflow_run(_ctx, workflow_run_id: int): diff --git a/api/tasks/s3_upload.py b/api/tasks/s3_upload.py index d1c55a6d..f1b821d7 100644 --- a/api/tasks/s3_upload.py +++ b/api/tasks/s3_upload.py @@ -6,7 +6,7 @@ from loguru import logger from api.db import db_client from api.services.storage import get_current_storage_backend, storage_fs from api.tasks.run_integrations import run_integrations_post_workflow_run -from pipecat.utils.context import set_current_run_id +from pipecat.utils.run_context import set_current_run_id async def upload_voicemail_audio_to_s3( diff --git a/api/tasks/workflow_run_cost.py b/api/tasks/workflow_run_cost.py index 25400fca..176e3a8b 100644 --- a/api/tasks/workflow_run_cost.py +++ b/api/tasks/workflow_run_cost.py @@ -4,7 +4,62 @@ from api.db import db_client from api.enums import WorkflowRunMode from api.services.pricing.cost_calculator import cost_calculator from api.services.telephony.factory import get_telephony_provider -from pipecat.utils.context import set_current_run_id +from pipecat.utils.run_context import set_current_run_id + + +async def _fetch_telephony_cost(workflow_run) -> dict | None: + """Fetch telephony call cost. Returns a dict with cost_usd and provider_name, or None.""" + if ( + workflow_run.mode + not in [WorkflowRunMode.TWILIO.value, WorkflowRunMode.VONAGE.value] + or not workflow_run.cost_info + ): + return None + + call_id = workflow_run.cost_info.get("call_id") + if not call_id: + logger.warning(f"call_id not found in cost_info") + return None + + provider_name = workflow_run.mode.lower() if workflow_run.mode else "" + + workflow = await db_client.get_workflow_by_id(workflow_run.workflow_id) + if not workflow: + logger.warning("Workflow not found for workflow run") + raise Exception("Workflow not found") + + provider = await get_telephony_provider(workflow.organization_id) + call_cost_info = await provider.get_call_cost(call_id) + + if call_cost_info.get("status") == "error": + logger.error( + f"Failed to fetch {provider_name} call cost: {call_cost_info.get('error')}" + ) + return None + + cost_usd = call_cost_info.get("cost_usd", 0.0) + logger.info( + f"{provider_name.title()} call cost: ${cost_usd:.6f} USD for call {call_id}" + ) + return {"cost_usd": cost_usd, "provider_name": provider_name} + + +async def _update_organization_usage( + org, dograh_tokens: float, duration_seconds: float, charge_usd: float | None +) -> None: + """Update organization usage after a workflow run.""" + org_id = org.id + await db_client.update_usage_after_run( + org_id, dograh_tokens, duration_seconds, charge_usd + ) + if charge_usd is not None: + logger.info( + f"Updated organization usage with ${charge_usd:.2f} USD ({dograh_tokens} Dograh Tokens) and {duration_seconds}s duration for org {org_id}" + ) + else: + logger.info( + f"Updated organization usage with {dograh_tokens} Dograh Tokens and {duration_seconds}s duration for org {org_id}" + ) async def calculate_workflow_run_cost(ctx, workflow_run_id: int): @@ -26,62 +81,20 @@ async def calculate_workflow_run_cost(ctx, workflow_run_id: int): # Calculate cost breakdown cost_breakdown = cost_calculator.calculate_total_cost(workflow_usage_info) - # Fetch telephony call cost for both Twilio and Vonage - telephony_cost_usd = 0.0 - if ( - workflow_run.mode - in [WorkflowRunMode.TWILIO.value, WorkflowRunMode.VONAGE.value] - and workflow_run.cost_info - ): - # Get the call ID (provider-agnostic approach with backward compatibility) - call_id = workflow_run.cost_info.get("call_id") - - # Fallback to legacy provider-specific fields if needed - if not call_id: - if workflow_run.mode == WorkflowRunMode.TWILIO.value: - call_id = workflow_run.cost_info.get("twilio_call_sid") - elif workflow_run.mode == WorkflowRunMode.VONAGE.value: - call_id = workflow_run.cost_info.get("vonage_call_uuid") - - # Provider name is derived from workflow run mode - provider_name = workflow_run.mode.lower() if workflow_run.mode else "" - - if call_id: - try: - # Get workflow to access organization_id - workflow = await db_client.get_workflow_by_id( - workflow_run.workflow_id - ) - if not workflow: - logger.warning("Workflow not found for workflow run") - raise Exception("Workflow not found") - - # Use telephony provider abstraction - provider = await get_telephony_provider(workflow.organization_id) - call_cost_info = await provider.get_call_cost(call_id) - - if call_cost_info.get("status") != "error": - telephony_cost_usd = call_cost_info.get("cost_usd", 0.0) - cost_breakdown["telephony_call"] = telephony_cost_usd - cost_breakdown[f"{provider_name}_call"] = ( - telephony_cost_usd # Keep backward compatibility - ) - - # Add telephony cost to the total - cost_breakdown["total"] = ( - float(cost_breakdown["total"]) + telephony_cost_usd - ) - logger.info( - f"{provider_name.title()} call cost: ${telephony_cost_usd:.6f} USD for call {call_id}" - ) - else: - logger.error( - f"Failed to fetch {provider_name} call cost: {call_cost_info.get('error')}" - ) - - except Exception as e: - logger.error(f"Failed to fetch telephony call cost: {e}") - # Don't fail the whole cost calculation if telephony API fails + # Fetch telephony call cost + try: + telephony_cost = await _fetch_telephony_cost(workflow_run) + if telephony_cost: + telephony_cost_usd = telephony_cost["cost_usd"] + provider_name = telephony_cost["provider_name"] + cost_breakdown["telephony_call"] = telephony_cost_usd + cost_breakdown[f"{provider_name}_call"] = telephony_cost_usd + cost_breakdown["total"] = ( + float(cost_breakdown["total"]) + telephony_cost_usd + ) + except Exception as e: + logger.error(f"Failed to fetch telephony call cost: {e}") + # Don't fail the whole cost calculation if telephony API fails # Store cost information back to the workflow run # We'll add the cost breakdown to the workflow run @@ -106,6 +119,7 @@ async def calculate_workflow_run_cost(ctx, workflow_run_id: int): charge_usd = duration_seconds * org.price_per_second_usd cost_info = { + **workflow_run.cost_info, "cost_breakdown": cost_breakdown, "total_cost_usd": float(cost_breakdown["total"]), "dograh_token_usage": dograh_tokens, @@ -118,42 +132,19 @@ async def calculate_workflow_run_cost(ctx, workflow_run_id: int): cost_info["charge_usd"] = charge_usd cost_info["price_per_second_usd"] = org.price_per_second_usd - # Preserve call ID (provider-agnostic with backward compatibility) - if workflow_run.cost_info: - # Preserve generic call_id if it exists - if "call_id" in workflow_run.cost_info: - cost_info["call_id"] = workflow_run.cost_info["call_id"] - # Also preserve legacy fields for backward compatibility - elif "twilio_call_sid" in workflow_run.cost_info: - cost_info["twilio_call_sid"] = workflow_run.cost_info["twilio_call_sid"] - elif "vonage_call_uuid" in workflow_run.cost_info: - cost_info["vonage_call_uuid"] = workflow_run.cost_info[ - "vonage_call_uuid" - ] - # Update workflow run with cost information await db_client.update_workflow_run(run_id=workflow_run_id, cost_info=cost_info) # Update organization usage if applicable if org: - org_id = org.id try: duration_seconds = workflow_usage_info.get("call_duration_seconds", 0) - # Pass USD amount if organization has pricing - await db_client.update_usage_after_run( - org_id, dograh_tokens, duration_seconds, charge_usd + await _update_organization_usage( + org, dograh_tokens, duration_seconds, charge_usd ) - if charge_usd is not None: - logger.info( - f"Updated organization usage with ${charge_usd:.2f} USD ({dograh_tokens} Dograh Tokens) and {duration_seconds}s duration for org {org_id}" - ) - else: - logger.info( - f"Updated organization usage with {dograh_tokens} Dograh Tokens and {duration_seconds}s duration for org {org_id}" - ) except Exception as e: logger.error( - f"Failed to update organization usage for org {org_id}: {e}" + f"Failed to update organization usage for org {org.id}: {e}" ) # Don't fail the whole task if usage update fails diff --git a/api/tests/test_pipecat_engine_context_update.py b/api/tests/test_pipecat_engine_context_update.py index d40be6c9..cb99cdf0 100644 --- a/api/tests/test_pipecat_engine_context_update.py +++ b/api/tests/test_pipecat_engine_context_update.py @@ -31,8 +31,8 @@ from pipecat.pipeline.pipeline import Pipeline from pipecat.pipeline.runner import PipelineRunner from pipecat.pipeline.task import PipelineParams, PipelineTask from pipecat.processors.aggregators.llm_context import LLMContext -from pipecat.processors.aggregators.llm_response import LLMAssistantAggregatorParams from pipecat.processors.aggregators.llm_response_universal import ( + LLMAssistantAggregatorParams, LLMContextAggregatorPair, ) from pipecat.tests import MockLLMService, MockTTSService diff --git a/api/tests/test_pipecat_engine_end_call.py b/api/tests/test_pipecat_engine_end_call.py index 46fa2486..78bb0203 100644 --- a/api/tests/test_pipecat_engine_end_call.py +++ b/api/tests/test_pipecat_engine_end_call.py @@ -46,8 +46,8 @@ from pipecat.pipeline.pipeline import Pipeline from pipecat.pipeline.runner import PipelineRunner from pipecat.pipeline.task import PipelineParams, PipelineTask from pipecat.processors.aggregators.llm_context import LLMContext -from pipecat.processors.aggregators.llm_response import LLMAssistantAggregatorParams from pipecat.processors.aggregators.llm_response_universal import ( + LLMAssistantAggregatorParams, LLMContextAggregatorPair, LLMUserAggregatorParams, ) diff --git a/api/tests/test_pipecat_engine_node_switch_with_user_speech.py b/api/tests/test_pipecat_engine_node_switch_with_user_speech.py index e65e036e..efad8fd5 100644 --- a/api/tests/test_pipecat_engine_node_switch_with_user_speech.py +++ b/api/tests/test_pipecat_engine_node_switch_with_user_speech.py @@ -30,8 +30,8 @@ from pipecat.pipeline.pipeline import Pipeline from pipecat.pipeline.runner import PipelineRunner from pipecat.pipeline.task import PipelineParams, PipelineTask from pipecat.processors.aggregators.llm_context import LLMContext -from pipecat.processors.aggregators.llm_response import LLMAssistantAggregatorParams from pipecat.processors.aggregators.llm_response_universal import ( + LLMAssistantAggregatorParams, LLMContextAggregatorPair, LLMUserAggregatorParams, ) diff --git a/api/tests/test_pipecat_engine_tool_calls.py b/api/tests/test_pipecat_engine_tool_calls.py index 7938df84..43a05c44 100644 --- a/api/tests/test_pipecat_engine_tool_calls.py +++ b/api/tests/test_pipecat_engine_tool_calls.py @@ -18,8 +18,8 @@ from pipecat.pipeline.pipeline import Pipeline from pipecat.pipeline.runner import PipelineRunner from pipecat.pipeline.task import PipelineParams, PipelineTask from pipecat.processors.aggregators.llm_context import LLMContext -from pipecat.processors.aggregators.llm_response import LLMAssistantAggregatorParams from pipecat.processors.aggregators.llm_response_universal import ( + LLMAssistantAggregatorParams, LLMContextAggregatorPair, ) from pipecat.tests import MockLLMService, MockTTSService diff --git a/api/tests/test_pipecat_engine_variable_extraction.py b/api/tests/test_pipecat_engine_variable_extraction.py index 31593792..8a516142 100644 --- a/api/tests/test_pipecat_engine_variable_extraction.py +++ b/api/tests/test_pipecat_engine_variable_extraction.py @@ -27,8 +27,8 @@ from pipecat.pipeline.pipeline import Pipeline from pipecat.pipeline.runner import PipelineRunner from pipecat.pipeline.task import PipelineParams, PipelineTask from pipecat.processors.aggregators.llm_context import LLMContext -from pipecat.processors.aggregators.llm_response import LLMAssistantAggregatorParams from pipecat.processors.aggregators.llm_response_universal import ( + LLMAssistantAggregatorParams, LLMContextAggregatorPair, ) from pipecat.tests import MockLLMService, MockTTSService diff --git a/api/tests/test_tts_endframe_with_audio_write_failure.py b/api/tests/test_tts_endframe_with_audio_write_failure.py index 73509641..fa0dba6f 100644 --- a/api/tests/test_tts_endframe_with_audio_write_failure.py +++ b/api/tests/test_tts_endframe_with_audio_write_failure.py @@ -43,8 +43,8 @@ from pipecat.pipeline.pipeline import Pipeline from pipecat.pipeline.runner import PipelineRunner from pipecat.pipeline.task import PipelineParams, PipelineTask from pipecat.processors.aggregators.llm_context import LLMContext -from pipecat.processors.aggregators.llm_response import LLMAssistantAggregatorParams from pipecat.processors.aggregators.llm_response_universal import ( + LLMAssistantAggregatorParams, LLMContextAggregatorPair, LLMUserAggregatorParams, ) diff --git a/api/tests/test_user_idle_handler.py b/api/tests/test_user_idle_handler.py index 3b01df09..4548ca79 100644 --- a/api/tests/test_user_idle_handler.py +++ b/api/tests/test_user_idle_handler.py @@ -29,8 +29,8 @@ from pipecat.pipeline.pipeline import Pipeline from pipecat.pipeline.runner import PipelineRunner from pipecat.pipeline.task import PipelineParams, PipelineTask from pipecat.processors.aggregators.llm_context import LLMContext -from pipecat.processors.aggregators.llm_response import LLMAssistantAggregatorParams from pipecat.processors.aggregators.llm_response_universal import ( + LLMAssistantAggregatorParams, LLMContextAggregatorPair, LLMUserAggregatorParams, ) diff --git a/api/tests/test_user_muting_during_bot_speech.py b/api/tests/test_user_muting_during_bot_speech.py index b127e68e..6fd6b781 100644 --- a/api/tests/test_user_muting_during_bot_speech.py +++ b/api/tests/test_user_muting_during_bot_speech.py @@ -34,8 +34,8 @@ from pipecat.pipeline.pipeline import Pipeline from pipecat.pipeline.runner import PipelineRunner from pipecat.pipeline.task import PipelineParams, PipelineTask from pipecat.processors.aggregators.llm_context import LLMContext -from pipecat.processors.aggregators.llm_response import LLMAssistantAggregatorParams from pipecat.processors.aggregators.llm_response_universal import ( + LLMAssistantAggregatorParams, LLMContextAggregatorPair, LLMUserAggregator, LLMUserAggregatorParams, diff --git a/api/tests/test_voicemail_detector.py b/api/tests/test_voicemail_detector.py index d0681c26..64511ee3 100644 --- a/api/tests/test_voicemail_detector.py +++ b/api/tests/test_voicemail_detector.py @@ -21,8 +21,8 @@ from pipecat.pipeline.pipeline import Pipeline from pipecat.pipeline.runner import PipelineRunner from pipecat.pipeline.task import PipelineParams, PipelineTask from pipecat.processors.aggregators.llm_context import LLMContext -from pipecat.processors.aggregators.llm_response import LLMAssistantAggregatorParams from pipecat.processors.aggregators.llm_response_universal import ( + LLMAssistantAggregatorParams, LLMContextAggregatorPair, LLMUserAggregatorParams, ) diff --git a/pipecat b/pipecat index d67983b3..e180bd3c 160000 --- a/pipecat +++ b/pipecat @@ -1 +1 @@ -Subproject commit d67983b3b165f945a93e5ce594f47781a96bff9b +Subproject commit e180bd3c2abc3cebbdf5e2d7955d9928cca5d219 diff --git a/ui/package.json b/ui/package.json index e3912f39..ee196708 100644 --- a/ui/package.json +++ b/ui/package.json @@ -1,6 +1,6 @@ { "name": "ui", - "version": "1.12.0", + "version": "1.13.0", "private": true, "scripts": { "dev": "NODE_OPTIONS='--enable-source-maps' next dev --turbopack", diff --git a/ui/src/app/superadmin/runs/page.tsx b/ui/src/app/superadmin/runs/page.tsx index d20cb944..c58961f9 100644 --- a/ui/src/app/superadmin/runs/page.tsx +++ b/ui/src/app/superadmin/runs/page.tsx @@ -7,7 +7,7 @@ import { useCallback, useEffect, useState } from "react"; import { getWorkflowRunsApiV1SuperuserWorkflowRunsGet, setAdminCommentApiV1SuperuserWorkflowRunsRunIdCommentPost } from '@/client/sdk.gen'; import { FilterBuilder } from "@/components/filters/FilterBuilder"; -import { MediaPreviewButtons, MediaPreviewDialog } from '@/components/MediaPreviewDialog'; +import { MediaPreviewButton, MediaPreviewDialog } from '@/components/MediaPreviewDialog'; import { Badge } from "@/components/ui/badge"; import { Button } from "@/components/ui/button"; import { Card, CardContent, CardDescription, CardHeader, CardTitle } from "@/components/ui/card"; @@ -515,12 +515,11 @@ export default function RunsPage() {
- - {mediaDownloadKey && accessToken && ( - - )} +
+ {recordingKey && accessToken && ( + + )} + {transcriptKey && accessToken && ( + + )} +
@@ -114,53 +120,35 @@ export function MediaPreviewDialog({ accessToken }: MediaPreviewDialogProps) { }; } -interface MediaPreviewButtonsProps { +interface MediaPreviewButtonProps { recordingUrl: string | null | undefined; transcriptUrl: string | null | undefined; runId: number; - onOpenAudio: (fileKey: string | null, runId: number) => void; - onOpenTranscript: (fileKey: string | null, runId: number) => void; + onOpenPreview: (recordingUrl: string | null, transcriptUrl: string | null, runId: number) => void; onSelect?: (runId: number) => void; } -export function MediaPreviewButtons({ +export function MediaPreviewButton({ recordingUrl, transcriptUrl, runId, - onOpenAudio, - onOpenTranscript, + onOpenPreview, onSelect, -}: MediaPreviewButtonsProps) { - const handleOpenAudio = () => { - onSelect?.(runId); - onOpenAudio(recordingUrl ?? null, runId); - }; +}: MediaPreviewButtonProps) { + if (!recordingUrl && !transcriptUrl) return null; - const handleOpenTranscript = () => { + const handleOpen = () => { onSelect?.(runId); - onOpenTranscript(transcriptUrl ?? null, runId); + onOpenPreview(recordingUrl ?? null, transcriptUrl ?? null, runId); }; return ( -
- {recordingUrl && ( - - )} - {transcriptUrl && ( - - )} -
+ ); } diff --git a/ui/src/components/ServiceConfiguration.tsx b/ui/src/components/ServiceConfiguration.tsx index 637917c3..f52be717 100644 --- a/ui/src/components/ServiceConfiguration.tsx +++ b/ui/src/components/ServiceConfiguration.tsx @@ -21,6 +21,7 @@ interface SchemaProperty { default?: string | number | boolean; enum?: string[]; examples?: string[]; + model_options?: Record; $ref?: string; description?: string; format?: string; @@ -46,38 +47,89 @@ const TAB_CONFIG: { key: ServiceSegment; label: string }[] = [ // Display names for language codes (Deepgram + Sarvam) const LANGUAGE_DISPLAY_NAMES: Record = { - // Deepgram languages "multi": "Multilingual (Auto-detect)", + // Arabic + "ar": "Arabic", + "ar-AE": "Arabic (UAE)", + "ar-SA": "Arabic (Saudi Arabia)", + "ar-QA": "Arabic (Qatar)", + "ar-KW": "Arabic (Kuwait)", + "ar-SY": "Arabic (Syria)", + "ar-LB": "Arabic (Lebanon)", + "ar-PS": "Arabic (Palestine)", + "ar-JO": "Arabic (Jordan)", + "ar-EG": "Arabic (Egypt)", + "ar-SD": "Arabic (Sudan)", + "ar-TD": "Arabic (Chad)", + "ar-MA": "Arabic (Morocco)", + "ar-DZ": "Arabic (Algeria)", + "ar-TN": "Arabic (Tunisia)", + "ar-IQ": "Arabic (Iraq)", + "ar-IR": "Arabic (Iran)", + // Other languages + "be": "Belarusian", + "bn": "Bengali", + "bs": "Bosnian", + "bg": "Bulgarian", + "ca": "Catalan", + "cs": "Czech", + "da": "Danish", + "da-DK": "Danish (Denmark)", + "de": "German", + "de-CH": "German (Switzerland)", + "el": "Greek", "en": "English", "en-US": "English (US)", - "en-GB": "English (UK)", "en-AU": "English (Australia)", + "en-GB": "English (UK)", "en-IN": "English (India)", + "en-NZ": "English (New Zealand)", "es": "Spanish", "es-419": "Spanish (Latin America)", + "et": "Estonian", + "fa": "Persian", + "fi": "Finnish", "fr": "French", "fr-CA": "French (Canada)", - "de": "German", + "he": "Hebrew", + "hi": "Hindi", + "hr": "Croatian", + "hu": "Hungarian", + "id": "Indonesian", "it": "Italian", + "ja": "Japanese", + "kn": "Kannada", + "ko": "Korean", + "ko-KR": "Korean (South Korea)", + "lt": "Lithuanian", + "lv": "Latvian", + "mk": "Macedonian", + "mr": "Marathi", + "ms": "Malay", + "nl": "Dutch", + "nl-BE": "Flemish", + "no": "Norwegian", + "pl": "Polish", "pt": "Portuguese", "pt-BR": "Portuguese (Brazil)", - "nl": "Dutch", - "hi": "Hindi", - "ja": "Japanese", - "ko": "Korean", - "zh-CN": "Chinese (Simplified)", - "zh-TW": "Chinese (Traditional)", + "pt-PT": "Portuguese (Portugal)", + "ro": "Romanian", "ru": "Russian", - "pl": "Polish", + "sk": "Slovak", + "sl": "Slovenian", + "sr": "Serbian", + "sv": "Swedish", + "sv-SE": "Swedish (Sweden)", + "ta": "Tamil", + "te": "Telugu", + "th": "Thai", + "tl": "Tagalog", "tr": "Turkish", "uk": "Ukrainian", + "ur": "Urdu", "vi": "Vietnamese", - "sv": "Swedish", - "da": "Danish", - "no": "Norwegian", - "fi": "Finnish", - "id": "Indonesian", - "th": "Thai", + "zh-CN": "Chinese (Simplified)", + "zh-TW": "Chinese (Traditional)", // Sarvam Indian languages "bn-IN": "Bengali", "gu-IN": "Gujarati", @@ -212,6 +264,34 @@ export default function ServiceConfiguration() { } }, [schemas, serviceProviders.llm, userConfig?.llm?.model, hasCheckedManualMode]); + // Reset voice when TTS model changes if the provider has model-dependent voice options + const ttsModel = watch("tts_model"); + useEffect(() => { + const voiceSchema = schemas?.tts?.[serviceProviders.tts]?.properties?.voice; + const modelOptions = voiceSchema?.model_options; + if (!modelOptions || !ttsModel) return; + + const validVoices = modelOptions[ttsModel as string]; + const currentVoice = getValues("tts_voice") as string; + if (validVoices && currentVoice && !validVoices.includes(currentVoice)) { + setValue("tts_voice", validVoices[0], { shouldDirty: true }); + } + }, [ttsModel, serviceProviders.tts, setValue, getValues, schemas]); + + // Reset language when STT model changes if the provider has model-dependent language options + const sttModel = watch("stt_model"); + useEffect(() => { + const languageSchema = schemas?.stt?.[serviceProviders.stt]?.properties?.language; + const modelOptions = languageSchema?.model_options; + if (!modelOptions || !sttModel) return; + + const validLanguages = modelOptions[sttModel as string]; + const currentLanguage = getValues("stt_language") as string; + if (validLanguages && currentLanguage && !validLanguages.includes(currentLanguage)) { + setValue("stt_language", validLanguages[0], { shouldDirty: true }); + } + }, [sttModel, serviceProviders.stt, setValue, getValues, schemas]); + const handleProviderChange = (service: ServiceSegment, providerName: string) => { if (!providerName) { return; @@ -325,15 +405,6 @@ export default function ServiceConfiguration() { field => field !== "provider" && field !== "api_key" ); - // For Deepgram STT, hide language field when flux-general-en model is selected - // Flux model is English-only and doesn't support language selection - if (service === "stt" && currentProvider === "deepgram") { - const currentModel = watch("stt_model") as string; - if (currentModel === "flux-general-en") { - return fields.filter(field => field !== "language"); - } - } - return fields; }; @@ -516,7 +587,16 @@ export default function ServiceConfiguration() { } // Handle fields with enum or examples (dropdown options) - const dropdownOptions = actualSchema?.enum || actualSchema?.examples; + let dropdownOptions = actualSchema?.enum || actualSchema?.examples; + + // Use model-dependent options when available (e.g., Sarvam voices per model) + if (actualSchema?.model_options) { + const modelValue = watch(`${service}_model`) as string; + if (modelValue && actualSchema.model_options[modelValue]) { + dropdownOptions = actualSchema.model_options[modelValue]; + } + } + if (dropdownOptions && dropdownOptions.length > 0) { // Use friendly display names for language and voice fields const getDisplayName = (value: string) => { @@ -524,7 +604,7 @@ export default function ServiceConfiguration() { return LANGUAGE_DISPLAY_NAMES[value] || value; } if (field === "voice") { - return VOICE_DISPLAY_NAMES[value] || value; + return VOICE_DISPLAY_NAMES[value] || value.charAt(0).toUpperCase() + value.slice(1); } return value; }; diff --git a/ui/src/components/workflow-runs/WorkflowRunsTable.tsx b/ui/src/components/workflow-runs/WorkflowRunsTable.tsx index 7c082a10..7f5b752e 100644 --- a/ui/src/components/workflow-runs/WorkflowRunsTable.tsx +++ b/ui/src/components/workflow-runs/WorkflowRunsTable.tsx @@ -5,7 +5,7 @@ import { useState } from "react"; import { WorkflowRunResponseSchema } from "@/client/types.gen"; import { FilterBuilder } from "@/components/filters/FilterBuilder"; -import { MediaPreviewButtons, MediaPreviewDialog } from "@/components/MediaPreviewDialog"; +import { MediaPreviewButton, MediaPreviewDialog } from "@/components/MediaPreviewDialog"; import { Badge } from "@/components/ui/badge"; import { Button } from "@/components/ui/button"; import { Card, CardContent, CardDescription, CardHeader, CardTitle } from "@/components/ui/card"; @@ -211,12 +211,11 @@ export function WorkflowRunsTable({
e.stopPropagation()}> -