feat: support flux for dograh multi

This commit is contained in:
Abhishek Kumar 2026-06-24 12:45:07 +05:30
parent 0956157029
commit ca598933ef
7 changed files with 176 additions and 11 deletions

View file

@ -6,7 +6,6 @@ from loguru import logger
from api.db import db_client
from api.enums import WorkflowRunMode
from api.services.configuration.options import DEEPGRAM_FLUX_MODELS
from api.services.configuration.registry import ServiceProviders
from api.services.integrations import (
IntegrationRuntimeContext,
@ -47,6 +46,7 @@ from api.services.pipecat.service_factory import (
create_realtime_llm_service,
create_stt_service,
create_tts_service,
stt_uses_flux_turns,
)
from api.services.pipecat.tracing_config import (
ensure_tracing,
@ -626,14 +626,10 @@ async def _run_pipeline(
user_config.realtime.provider
)
else:
# Deepgram Flux uses external turn detection (VAD + External start/stop)
# Other models use configurable turn detection strategy
is_deepgram_flux = (
user_config.stt.provider == ServiceProviders.DEEPGRAM.value
and user_config.stt.model in DEEPGRAM_FLUX_MODELS
)
if is_deepgram_flux:
# Deepgram Flux and supported Dograh managed Flux languages emit their
# own turn boundaries, so the aggregator follows those external signals.
# Other models use configurable turn detection.
if stt_uses_flux_turns(user_config):
user_turn_strategies = UserTurnStrategies(
start=[
VADUserTurnStartStrategy(),

View file

@ -6,7 +6,10 @@ from fastapi import HTTPException
from loguru import logger
from api.constants import MPS_API_URL
from api.services.configuration.options import DEEPGRAM_FLUX_MODELS
from api.services.configuration.options import (
DEEPGRAM_FLUX_MODELS,
DEEPGRAM_FLUX_MULTILINGUAL_LANGUAGE_OPTIONS,
)
from api.services.configuration.registry import ServiceProviders
from api.services.pipecat.minimax_tts import MiniMaxOwnedSessionTTSService
from api.utils.url_security import validate_user_configured_service_url
@ -27,6 +30,7 @@ from pipecat.services.deepgram.flux.stt import (
)
from pipecat.services.deepgram.stt import DeepgramSTTService, DeepgramSTTSettings
from pipecat.services.deepgram.tts import DeepgramTTSService, DeepgramTTSSettings
from pipecat.services.dograh.flux.stt import DograhFluxSTTService
from pipecat.services.dograh.llm import DograhLLMService
from pipecat.services.dograh.stt import DograhSTTService, DograhSTTSettings
from pipecat.services.dograh.tts import DograhTTSService, DograhTTSSettings
@ -94,6 +98,19 @@ DEEPGRAM_FLUX_LANGUAGE_HINTS = {
}
def dograh_stt_uses_flux_language(language: str | None) -> bool:
language = language or "multi"
return language in DEEPGRAM_FLUX_MULTILINGUAL_LANGUAGE_OPTIONS
def stt_uses_flux_turns(user_config) -> bool:
if user_config.stt.provider == ServiceProviders.DEEPGRAM.value:
return user_config.stt.model in DEEPGRAM_FLUX_MODELS
if user_config.stt.provider == ServiceProviders.DOGRAH.value:
return dograh_stt_uses_flux_language(getattr(user_config.stt, "language", None))
return False
def _validate_runtime_service_url(url: str, field_name: str) -> None:
try:
validate_user_configured_service_url(
@ -193,6 +210,29 @@ def create_stt_service(
elif user_config.stt.provider == ServiceProviders.DOGRAH.value:
base_url = MPS_API_URL.replace("http://", "ws://").replace("https://", "wss://")
language = getattr(user_config.stt, "language", None) or "multi"
if dograh_stt_uses_flux_language(language):
# Dograh's Flux proxy only supports multilingual auto-detect and the
# same language hint subset as Deepgram Flux multilingual.
settings_kwargs = {
"model": "flux-general-multi",
"eot_timeout_ms": 3000,
"eot_threshold": 0.7,
"eager_eot_threshold": 0.5,
"keyterm": keyterms or [],
}
language_hint = DEEPGRAM_FLUX_LANGUAGE_HINTS.get(language)
if language_hint:
settings_kwargs["language_hints"] = [language_hint]
return DograhFluxSTTService(
base_url=base_url,
api_key=user_config.stt.api_key,
correlation_id=correlation_id,
settings=DeepgramFluxSTTSettings(**settings_kwargs),
should_interrupt=False, # external turn strategies own interruption
sample_rate=audio_config.transport_in_sample_rate,
)
return DograhSTTService(
base_url=base_url,
api_key=user_config.stt.api_key,