dograh/api/services/pipecat/service_factory.py

from typing import TYPE_CHECKING
from urllib.parse import urlencode, urlparse, urlunparse

import aiohttp
from fastapi import HTTPException
from loguru import logger

from api.constants import MPS_API_URL
from api.services.configuration.registry import ServiceProviders
from api.services.pipecat.minimax_tts import MiniMaxOwnedSessionTTSService
from api.utils.url_security import validate_user_configured_service_url
from pipecat.services.assemblyai.stt import AssemblyAISTTService, AssemblyAISTTSettings
from pipecat.services.aws.llm import AWSBedrockLLMService, AWSBedrockLLMSettings
from pipecat.services.azure.llm import AzureLLMService, AzureLLMSettings
from pipecat.services.azure.stt import AzureSTTService, AzureSTTSettings
from pipecat.services.azure.tts import AzureTTSService, AzureTTSSettings
from pipecat.services.cartesia.stt import CartesiaSTTService
from pipecat.services.cartesia.tts import (
    CartesiaTTSService,
    CartesiaTTSSettings,
    GenerationConfig,
)
from pipecat.services.deepgram.flux.stt import (
    DeepgramFluxSTTService,
    DeepgramFluxSTTSettings,
)
from pipecat.services.deepgram.stt import DeepgramSTTService, DeepgramSTTSettings
from pipecat.services.deepgram.tts import DeepgramTTSService, DeepgramTTSSettings
from pipecat.services.dograh.llm import DograhLLMService
from pipecat.services.dograh.stt import DograhSTTService, DograhSTTSettings
from pipecat.services.dograh.tts import DograhTTSService, DograhTTSSettings
from pipecat.services.elevenlabs.tts import ElevenLabsTTSService, ElevenLabsTTSSettings
from pipecat.services.gladia.stt import GladiaSTTService, GladiaSTTSettings
from pipecat.services.google.llm import GoogleLLMService, GoogleLLMSettings
from pipecat.services.google.stt import GoogleSTTService, GoogleSTTSettings
from pipecat.services.google.tts import GoogleTTSService, GoogleTTSSettings
from pipecat.services.google.vertex.llm import (
    GoogleVertexLLMService,
    GoogleVertexLLMSettings,
)
from pipecat.services.groq.llm import GroqLLMService, GroqLLMSettings
from pipecat.services.minimax.llm import MiniMaxLLMService
from pipecat.services.minimax.tts import MiniMaxTTSSettings
from pipecat.services.openai.base_llm import OpenAILLMSettings
from pipecat.services.openai.llm import OpenAILLMService
from pipecat.services.openai.stt import (
    OpenAISTTService,
    OpenAISTTSettings,
)
from pipecat.services.openai.tts import OpenAITTSService, OpenAITTSSettings
from pipecat.services.openrouter.llm import OpenRouterLLMService, OpenRouterLLMSettings
from pipecat.services.rime.tts import RimeTTSService, RimeTTSSettings
from pipecat.services.sarvam.llm import SarvamLLMService, SarvamLLMSettings
from pipecat.services.sarvam.stt import SarvamSTTService, SarvamSTTSettings
from pipecat.services.sarvam.tts import SarvamTTSService, SarvamTTSSettings
from pipecat.services.speaches.llm import SpeachesLLMService, SpeachesLLMSettings
from pipecat.services.speaches.stt import SpeachesSTTService, SpeachesSTTSettings
from pipecat.services.speaches.tts import SpeachesTTSService, SpeachesTTSSettings
from pipecat.services.speechmatics.stt import (
    SpeechmaticsSTTService,
    SpeechmaticsSTTSettings,
)
from pipecat.transcriptions.language import Language
from pipecat.utils.text.xml_function_tag_filter import XMLFunctionTagFilter

if TYPE_CHECKING:
    from api.services.pipecat.audio_config import AudioConfig


def _validate_runtime_service_url(url: str, field_name: str) -> None:
    try:
        validate_user_configured_service_url(
            url,
            field_name=field_name,
        )
    except ValueError as e:
        raise HTTPException(status_code=400, detail=str(e)) from e


def create_stt_service(
    user_config, audio_config: "AudioConfig", keyterms: list[str] | None = None
):
    """Create and return appropriate STT service based on user configuration

    Args:
        user_config: User configuration containing STT settings
        keyterms: Optional list of keyterms for speech recognition boosting (Deepgram only)
    """
    logger.info(
        f"Creating STT service: provider={user_config.stt.provider}, model={user_config.stt.model}"
    )
    if user_config.stt.provider == ServiceProviders.DEEPGRAM.value:
        # Check if using Flux model (English-only, no language selection)
        if user_config.stt.model == "flux-general-en":
            return DeepgramFluxSTTService(
                api_key=user_config.stt.api_key,
                settings=DeepgramFluxSTTSettings(
                    model=user_config.stt.model,
                    eot_timeout_ms=3000,
                    eot_threshold=0.7,
                    eager_eot_threshold=0.5,
                    keyterm=keyterms or [],
                ),
                should_interrupt=False,  # Let UserAggregator take care of sending InterruptionFrame
                sample_rate=audio_config.transport_in_sample_rate,
            )

        # Other models than flux
        # Use language from user config, defaulting to "multi" for multilingual support
        language = getattr(user_config.stt, "language", None) or "multi"
        logger.debug(f"Using DeepGram Model - {user_config.stt.model}")
        return DeepgramSTTService(
            api_key=user_config.stt.api_key,
            settings=DeepgramSTTSettings(
                language=language,
                profanity_filter=False,
                endpointing=100,
                model=user_config.stt.model,
                keyterm=keyterms or [],
            ),
            should_interrupt=False,  # Let UserAggregator take care of sending InterruptionFrame
            sample_rate=audio_config.transport_in_sample_rate,
        )
    elif user_config.stt.provider == ServiceProviders.OPENAI.value:
        kwargs = {}
        base_url = getattr(user_config.stt, "base_url", None)
        if base_url:
            _validate_runtime_service_url(base_url, "base_url")
            kwargs["base_url"] = base_url
        return OpenAISTTService(
            api_key=user_config.stt.api_key,
            settings=OpenAISTTSettings(model=user_config.stt.model),
            **kwargs,
        )
    elif user_config.stt.provider == ServiceProviders.GOOGLE.value:
        language = getattr(user_config.stt, "language", None) or "en-US"
        location = getattr(user_config.stt, "location", None) or "global"
        credentials = getattr(user_config.stt, "credentials", None)

        settings_kwargs = {"model": user_config.stt.model}
        try:
            settings_kwargs["languages"] = [Language(language)]
        except ValueError:
            settings_kwargs["language_codes"] = [language]

        return GoogleSTTService(
            credentials=credentials,
            location=location,
            settings=GoogleSTTSettings(**settings_kwargs),
            sample_rate=audio_config.transport_in_sample_rate,
        )
    elif user_config.stt.provider == ServiceProviders.CARTESIA.value:
        return CartesiaSTTService(
            api_key=user_config.stt.api_key,
            sample_rate=audio_config.transport_in_sample_rate,
        )
    elif user_config.stt.provider == ServiceProviders.DOGRAH.value:
        base_url = MPS_API_URL.replace("http://", "ws://").replace("https://", "wss://")
        language = getattr(user_config.stt, "language", None) or "multi"
        return DograhSTTService(
            base_url=base_url,
            api_key=user_config.stt.api_key,
            settings=DograhSTTSettings(
                model=user_config.stt.model,
                language=language,
            ),
            keyterms=keyterms,
            sample_rate=audio_config.transport_in_sample_rate,
        )
    elif user_config.stt.provider == ServiceProviders.SARVAM.value:
        language = getattr(user_config.stt, "language", None)
        language_mapping = {
            "bn-IN": Language.BN_IN,
            "gu-IN": Language.GU_IN,
            "hi-IN": Language.HI_IN,
            "kn-IN": Language.KN_IN,
            "ml-IN": Language.ML_IN,
            "mr-IN": Language.MR_IN,
            "ta-IN": Language.TA_IN,
            "te-IN": Language.TE_IN,
            "pa-IN": Language.PA_IN,
            "od-IN": Language.OR_IN,
            "en-IN": Language.EN_IN,
            "as-IN": Language.AS_IN,
            "ur-IN": Language.UR_IN,
            "kok-IN": Language.KOK_IN,
            "mai-IN": Language.MAI_IN,
            "sd-IN": Language.SD_IN,
        }
        if not language or language == "unknown":
            pipecat_language = None
        elif language in language_mapping:
            pipecat_language = language_mapping[language]
        else:
            # Unmapped BCP-47 codes pass through; Sarvam accepts them per https://docs.sarvam.ai/api-reference-docs/speech-to-text/transcribe
            pipecat_language = language
        return SarvamSTTService(
            api_key=user_config.stt.api_key,
            settings=SarvamSTTSettings(
                model=user_config.stt.model,
                language=pipecat_language,
            ),
            sample_rate=audio_config.transport_in_sample_rate,
        )
    elif user_config.stt.provider == ServiceProviders.SPEACHES.value:
        language = getattr(user_config.stt, "language", None)
        _validate_runtime_service_url(user_config.stt.base_url, "base_url")
        return SpeachesSTTService(
            base_url=user_config.stt.base_url,
            api_key=user_config.stt.api_key or "none",
            settings=SpeachesSTTSettings(
                model=user_config.stt.model,
                language=language,
            ),
            sample_rate=audio_config.transport_in_sample_rate,
        )
    elif user_config.stt.provider == ServiceProviders.ASSEMBLYAI.value:
        language = getattr(user_config.stt, "language", None)
        settings_kwargs = {"model": user_config.stt.model, "language": language}
        if keyterms:
            settings_kwargs["keyterms_prompt"] = keyterms
        return AssemblyAISTTService(
            api_key=user_config.stt.api_key,
            settings=AssemblyAISTTSettings(**settings_kwargs),
            sample_rate=audio_config.transport_in_sample_rate,
        )
    elif user_config.stt.provider == ServiceProviders.GLADIA.value:
        from pipecat.services.gladia.config import LanguageConfig

        language = getattr(user_config.stt, "language", None) or "en"
        settings_kwargs = {
            "model": user_config.stt.model,
            "language_config": LanguageConfig(
                languages=[language], code_switching=False
            ),
        }
        return GladiaSTTService(
            api_key=user_config.stt.api_key,
            settings=GladiaSTTSettings(**settings_kwargs),
            sample_rate=audio_config.transport_in_sample_rate,
        )
    elif user_config.stt.provider == ServiceProviders.SPEECHMATICS.value:
        from pipecat.services.speechmatics.stt import (
            AdditionalVocabEntry,
            OperatingPoint,
        )

        language = getattr(user_config.stt, "language", None) or "en"
        # Map model field to operating point (standard or enhanced)
        operating_point = (
            OperatingPoint.ENHANCED
            if user_config.stt.model == "enhanced"
            else OperatingPoint.STANDARD
        )
        # Convert keyterms to AdditionalVocabEntry objects for Speechmatics
        additional_vocab = []
        if keyterms:
            additional_vocab = [AdditionalVocabEntry(content=term) for term in keyterms]
        return SpeechmaticsSTTService(
            api_key=user_config.stt.api_key,
            settings=SpeechmaticsSTTSettings(
                language=language,
                operating_point=operating_point,
                additional_vocab=additional_vocab,
            ),
            sample_rate=audio_config.transport_in_sample_rate,
        )
    elif user_config.stt.provider == ServiceProviders.AZURE_SPEECH.value:
        from pipecat.transcriptions.language import Language as PipecatLanguage

        language_code = getattr(user_config.stt, "language", None) or "en-US"
        region = getattr(user_config.stt, "region", None) or "eastus"
        try:
            pipecat_language = PipecatLanguage(language_code)
        except ValueError:
            pipecat_language = language_code
        return AzureSTTService(
            api_key=user_config.stt.api_key,
            region=region,
            settings=AzureSTTSettings(language=pipecat_language),
            sample_rate=audio_config.transport_in_sample_rate,
        )
    else:
        raise HTTPException(
            status_code=400, detail=f"Invalid STT provider {user_config.stt.provider}"
        )


def create_tts_service(user_config, audio_config: "AudioConfig"):
    """Create and return appropriate TTS service based on user configuration

    Args:
        user_config: User configuration containing TTS settings
        transport_type: Type of transport (e.g., 'twilio', 'webrtc')
    """
    logger.info(
        f"Creating TTS service: provider={user_config.tts.provider}, model={user_config.tts.model}"
    )
    # Create function call filter to prevent TTS from speaking function call tags
    xml_function_tag_filter = XMLFunctionTagFilter()
    if user_config.tts.provider == ServiceProviders.DEEPGRAM.value:
        return DeepgramTTSService(
            api_key=user_config.tts.api_key,
            settings=DeepgramTTSSettings(voice=user_config.tts.voice),
            text_filters=[xml_function_tag_filter],
            skip_aggregator_types=["recording_router", "recording"],
            silence_time_s=1.0,
        )
    elif user_config.tts.provider == ServiceProviders.OPENAI.value:
        kwargs = {}
        base_url = getattr(user_config.tts, "base_url", None)
        if base_url:
            _validate_runtime_service_url(base_url, "base_url")
            kwargs["base_url"] = base_url
        return OpenAITTSService(
            api_key=user_config.tts.api_key,
            settings=OpenAITTSSettings(model=user_config.tts.model),
            text_filters=[xml_function_tag_filter],
            skip_aggregator_types=["recording_router", "recording"],
            silence_time_s=1.0,
            **kwargs,
        )
    elif user_config.tts.provider == ServiceProviders.GOOGLE.value:
        model = getattr(user_config.tts, "model", None) or "chirp_3_hd"
        language = getattr(user_config.tts, "language", None) or "en-US"
        voice = getattr(user_config.tts, "voice", None) or "en-US-Chirp3-HD-Charon"
        speed = getattr(user_config.tts, "speed", None)
        location = getattr(user_config.tts, "location", None) or None
        credentials = getattr(user_config.tts, "credentials", None)

        settings_kwargs = {
            "model": model,
            "voice": voice,
            "language": language,
        }
        if speed is not None and speed != 1.0:
            settings_kwargs["speaking_rate"] = speed

        return GoogleTTSService(
            credentials=credentials,
            location=location,
            settings=GoogleTTSSettings(**settings_kwargs),
            text_filters=[xml_function_tag_filter],
            skip_aggregator_types=["recording_router", "recording"],
            silence_time_s=1.0,
        )
    elif user_config.tts.provider == ServiceProviders.ELEVENLABS.value:
        # Backward compatible with older configuration "Name - voice_id"
        try:
            voice_id = user_config.tts.voice.split(" - ")[1]
        except IndexError:
            voice_id = user_config.tts.voice
        # ElevenLabs TTS uses WebSocket. Users configure base_url with an HTTP
        # scheme (matching ElevenLabs documentation, e.g.
        # https://api.eu.residency.elevenlabs.io); rewrite it to the WS scheme.
        _validate_runtime_service_url(user_config.tts.base_url, "base_url")
        elevenlabs_url = user_config.tts.base_url.replace("https://", "wss://").replace(
            "http://", "ws://"
        )
        return ElevenLabsTTSService(
            reconnect_on_error=False,
            api_key=user_config.tts.api_key,
            url=elevenlabs_url,
            settings=ElevenLabsTTSSettings(
                voice=voice_id,
                model=user_config.tts.model,
                stability=0.8,
                speed=user_config.tts.speed,
                similarity_boost=0.75,
            ),
            text_filters=[xml_function_tag_filter],
            skip_aggregator_types=["recording_router", "recording"],
            silence_time_s=1.0,
        )
    elif user_config.tts.provider == ServiceProviders.CARTESIA.value:
        speed = getattr(user_config.tts, "speed", None)
        volume = getattr(user_config.tts, "volume", None)
        gen_config_kwargs = {}
        if speed and speed != 1.0:
            gen_config_kwargs["speed"] = speed
        if volume and volume != 1.0:
            gen_config_kwargs["volume"] = volume
        generation_config = (
            GenerationConfig(**gen_config_kwargs) if gen_config_kwargs else None
        )
        return CartesiaTTSService(
            api_key=user_config.tts.api_key,
            settings=CartesiaTTSSettings(
                voice=user_config.tts.voice,
                model=user_config.tts.model,
                **(
                    {"generation_config": generation_config}
                    if generation_config
                    else {}
                ),
            ),
            text_filters=[xml_function_tag_filter],
            skip_aggregator_types=["recording_router", "recording"],
            silence_time_s=1.0,
        )
    elif user_config.tts.provider == ServiceProviders.DOGRAH.value:
        # Convert HTTP URL to WebSocket URL for TTS
        base_url = MPS_API_URL.replace("http://", "ws://").replace("https://", "wss://")
        return DograhTTSService(
            base_url=base_url,
            api_key=user_config.tts.api_key,
            settings=DograhTTSSettings(
                model=user_config.tts.model,
                voice=user_config.tts.voice,
                speed=user_config.tts.speed,
            ),
            text_filters=[xml_function_tag_filter],
            skip_aggregator_types=["recording_router", "recording"],
            silence_time_s=1.0,
        )
    elif user_config.tts.provider == ServiceProviders.CAMB.value:
        from pipecat.services.camb.tts import CambTTSService

        voice_id = int(getattr(user_config.tts, "voice", None) or "147320")
        language = getattr(user_config.tts, "language", None) or "en-us"
        tts = CambTTSService(
            api_key=user_config.tts.api_key,
            voice_id=voice_id,
            model=user_config.tts.model,
            text_filters=[xml_function_tag_filter],
            skip_aggregator_types=["recording_router", "recording"],
        )
        # Set language directly as BCP-47 code (bypasses Language enum conversion)
        tts._settings.language = language
        return tts
    elif user_config.tts.provider == ServiceProviders.SPEACHES.value:
        _validate_runtime_service_url(user_config.tts.base_url, "base_url")
        return SpeachesTTSService(
            base_url=user_config.tts.base_url,
            api_key=user_config.tts.api_key or "none",
            settings=SpeachesTTSSettings(
                model=user_config.tts.model,
                voice=user_config.tts.voice,
                speed=user_config.tts.speed,
            ),
            text_filters=[xml_function_tag_filter],
            skip_aggregator_types=["recording_router", "recording"],
            silence_time_s=1.0,
        )
    elif user_config.tts.provider == ServiceProviders.RIME.value:
        speed = getattr(user_config.tts, "speed", None)
        language_code = getattr(user_config.tts, "language", None) or "en"
        rime_language_mapping = {
            "en": Language.EN,
            "de": Language.DE,
            "fr": Language.FR,
            "es": Language.ES,
            "hi": Language.HI,
        }
        pipecat_language = rime_language_mapping.get(language_code, Language.EN)
        settings_kwargs = {
            "voice": user_config.tts.voice,
            "model": user_config.tts.model,
            "language": pipecat_language,
        }
        if speed and speed != 1.0:
            settings_kwargs["speedAlpha"] = speed
        return RimeTTSService(
            api_key=user_config.tts.api_key,
            settings=RimeTTSSettings(**settings_kwargs),
            text_filters=[xml_function_tag_filter],
            skip_aggregator_types=["recording_router", "recording"],
            silence_time_s=1.0,
        )
    elif user_config.tts.provider == ServiceProviders.SARVAM.value:
        # Map Sarvam language code to pipecat Language enum for TTS
        language_mapping = {
            "bn-IN": Language.BN,
            "en-IN": Language.EN,
            "gu-IN": Language.GU,
            "hi-IN": Language.HI,
            "kn-IN": Language.KN,
            "ml-IN": Language.ML,
            "mr-IN": Language.MR,
            "od-IN": Language.OR,
            "pa-IN": Language.PA,
            "ta-IN": Language.TA,
            "te-IN": Language.TE,
        }
        language = getattr(user_config.tts, "language", None)
        pipecat_language = language_mapping.get(language, Language.HI)

        voice = getattr(user_config.tts, "voice", None) or "anushka"
        return SarvamTTSService(
            api_key=user_config.tts.api_key,
            settings=SarvamTTSSettings(
                model=user_config.tts.model,
                voice=voice,
                language=pipecat_language,
            ),
            text_filters=[xml_function_tag_filter],
            skip_aggregator_types=["recording_router", "recording"],
            silence_time_s=1.0,
        )
    elif user_config.tts.provider == ServiceProviders.MINIMAX.value:
        group_id = getattr(user_config.tts, "group_id", None)
        if not group_id:
            raise HTTPException(
                status_code=400,
                detail="MiniMax TTS requires a group_id. Configure it in your TTS settings.",
            )
        voice = getattr(user_config.tts, "voice", None) or "English_Graceful_Lady"
        speed = getattr(user_config.tts, "speed", None) or 1.0

        # Pipecat appends "?GroupId=..." to base_url as-is, so /t2a_v2 must
        # already be in the path.
        base_url = (
            getattr(user_config.tts, "base_url", None)
            or "https://api.minimax.io/v1/t2a_v2"
        ).rstrip("/")
        if not base_url.endswith("/t2a_v2"):
            base_url = f"{base_url}/t2a_v2"
        _validate_runtime_service_url(base_url, "base_url")

        session = aiohttp.ClientSession()
        return MiniMaxOwnedSessionTTSService(
            api_key=user_config.tts.api_key,
            group_id=group_id,
            base_url=base_url,
            aiohttp_session=session,
            settings=MiniMaxTTSSettings(
                model=user_config.tts.model,
                voice=voice,
                speed=speed,
            ),
            text_filters=[xml_function_tag_filter],
            skip_aggregator_types=["recording_router", "recording"],
            silence_time_s=1.0,
        )
    elif user_config.tts.provider == ServiceProviders.AZURE_SPEECH.value:
        region = getattr(user_config.tts, "region", None) or "eastus"
        voice = getattr(user_config.tts, "voice", None) or "en-US-AriaNeural"
        language = getattr(user_config.tts, "language", None) or "en-US"
        speed = getattr(user_config.tts, "speed", None) or 1.0
        # Map speed multiplier (0.5–2.0) to Azure SSML rate string (e.g. "1.25")
        rate = str(speed) if speed != 1.0 else None
        settings_kwargs: dict = {
            "voice": voice,
            "language": language,
        }
        if rate:
            settings_kwargs["rate"] = rate
        return AzureTTSService(
            api_key=user_config.tts.api_key,
            region=region,
            settings=AzureTTSSettings(**settings_kwargs),
            text_filters=[xml_function_tag_filter],
            skip_aggregator_types=["recording_router", "recording"],
            silence_time_s=1.0,
        )
    else:
        raise HTTPException(
            status_code=400, detail=f"Invalid TTS provider {user_config.tts.provider}"
        )


def create_llm_service_from_provider(
    provider: str,
    model: str,
    api_key: str | None,
    *,
    base_url: str | None = None,
    endpoint: str | None = None,
    aws_access_key: str | None = None,
    aws_secret_key: str | None = None,
    aws_region: str | None = None,
    project_id: str | None = None,
    location: str | None = None,
    credentials: str | None = None,
    temperature: float | None = None,
):
    """Create an LLM service from explicit provider/model/api_key.

    Also used by create_llm_service which extracts these from user_config.
    """
    logger.info(f"Creating LLM service: provider={provider}, model={model}")
    if provider == ServiceProviders.OPENAI.value:
        kwargs = {}
        if base_url:
            _validate_runtime_service_url(base_url, "base_url")
            kwargs["base_url"] = base_url
        if "gpt-5" in model:
            return OpenAILLMService(
                api_key=api_key,
                settings=OpenAILLMSettings(
                    model=model,
                    extra={"reasoning_effort": "minimal", "verbosity": "low"},
                ),
                **kwargs,
            )
        return OpenAILLMService(
            api_key=api_key,
            settings=OpenAILLMSettings(model=model, temperature=0.1),
            **kwargs,
        )
    elif provider == ServiceProviders.GROQ.value:
        return GroqLLMService(
            api_key=api_key,
            settings=GroqLLMSettings(model=model, temperature=0.1),
        )
    elif provider == ServiceProviders.OPENROUTER.value:
        kwargs = {}
        if base_url:
            _validate_runtime_service_url(base_url, "base_url")
            kwargs["base_url"] = base_url
        return OpenRouterLLMService(
            api_key=api_key,
            settings=OpenRouterLLMSettings(model=model, temperature=0.1),
            **kwargs,
        )
    elif provider == ServiceProviders.GOOGLE.value:
        return GoogleLLMService(
            api_key=api_key,
            settings=GoogleLLMSettings(model=model, temperature=0.1),
        )
    elif provider == ServiceProviders.GOOGLE_VERTEX.value:
        return GoogleVertexLLMService(
            credentials=credentials,
            project_id=project_id,
            location=location or "us-east4",
            settings=GoogleVertexLLMSettings(model=model, temperature=0.1),
        )
    elif provider == ServiceProviders.AZURE.value:
        if endpoint:
            _validate_runtime_service_url(endpoint, "endpoint")
        return AzureLLMService(
            api_key=api_key,
            endpoint=endpoint,
            settings=AzureLLMSettings(model=model, temperature=0.1),
        )
    elif provider == ServiceProviders.DOGRAH.value:
        return DograhLLMService(
            base_url=f"{MPS_API_URL}/api/v1/llm",
            api_key=api_key,
            settings=OpenAILLMSettings(model=model),
        )
    elif provider == ServiceProviders.AWS_BEDROCK.value:
        return AWSBedrockLLMService(
            aws_access_key=aws_access_key,
            aws_secret_key=aws_secret_key,
            aws_region=aws_region,
            settings=AWSBedrockLLMSettings(model=model),
        )
    elif provider == ServiceProviders.SPEACHES.value:
        base_url = base_url or "http://localhost:11434/v1"
        _validate_runtime_service_url(base_url, "base_url")
        return SpeachesLLMService(
            base_url=base_url,
            api_key=api_key or "none",
            settings=SpeachesLLMSettings(model=model),
        )
    elif provider == ServiceProviders.MINIMAX.value:
        base_url = base_url or "https://api.minimax.io/v1"
        _validate_runtime_service_url(base_url, "base_url")
        return MiniMaxLLMService(
            api_key=api_key,
            base_url=base_url,
            settings=MiniMaxLLMService.Settings(
                model=model,
                temperature=temperature if temperature is not None else 1.0,
            ),
        )
    elif provider == ServiceProviders.SARVAM.value:
        return SarvamLLMService(
            api_key=api_key,
            settings=SarvamLLMSettings(
                model=model,
                temperature=temperature if temperature is not None else 0.5,
            ),
        )
    else:
        raise HTTPException(status_code=400, detail=f"Invalid LLM provider {provider}")


def create_realtime_llm_service(user_config, audio_config: "AudioConfig"):
    """Create a realtime (speech-to-speech) LLM service that handles STT+LLM+TTS.

    These services bypass separate STT/TTS and handle audio directly via
    a bidirectional WebSocket connection. Reads from user_config.realtime.
    """
    realtime_config = user_config.realtime
    provider = realtime_config.provider
    model = realtime_config.model
    api_key = realtime_config.api_key
    voice = getattr(realtime_config, "voice", None)
    language = getattr(realtime_config, "language", None)

    logger.info(
        f"Creating realtime LLM service: provider={provider}, model={model}, voice={voice}, language={language}"
    )

    if provider == ServiceProviders.OPENAI_REALTIME.value:
        from api.services.pipecat.realtime.openai_realtime import (
            DograhOpenAIRealtimeLLMService,
        )
        from pipecat.services.openai.realtime.events import (
            AudioConfiguration,
            AudioInput,
            AudioOutput,
            InputAudioTranscription,
            SessionProperties,
        )

        return DograhOpenAIRealtimeLLMService(
            api_key=api_key,
            settings=DograhOpenAIRealtimeLLMService.Settings(
                model=model,
                session_properties=SessionProperties(
                    audio=AudioConfiguration(
                        input=AudioInput(
                            transcription=InputAudioTranscription(),
                        ),
                        output=AudioOutput(
                            voice=voice or "alloy",
                        ),
                    ),
                ),
            ),
        )
    elif provider == ServiceProviders.GROK_REALTIME.value:
        from api.services.pipecat.realtime.grok_realtime import (
            DograhGrokRealtimeLLMService,
        )
        from pipecat.services.xai.realtime.events import SessionProperties

        return DograhGrokRealtimeLLMService(
            api_key=api_key,
            settings=DograhGrokRealtimeLLMService.Settings(
                model=model,
                session_properties=SessionProperties(
                    voice=voice or "Ara",
                ),
            ),
        )
    elif provider == ServiceProviders.ULTRAVOX_REALTIME.value:
        from api.services.pipecat.realtime.ultravox_realtime import (
            DograhUltravoxOneShotInputParams,
            DograhUltravoxRealtimeLLMService,
        )

        return DograhUltravoxRealtimeLLMService(
            params=DograhUltravoxOneShotInputParams(
                api_key=api_key,
                model=model,
                voice=voice,
                output_medium="voice",
            ),
            settings=DograhUltravoxRealtimeLLMService.Settings(
                model=model,
                output_medium="voice",
            ),
        )
    elif provider == ServiceProviders.GOOGLE_REALTIME.value:
        from api.services.pipecat.realtime.gemini_live import (
            DograhGeminiLiveLLMService,
        )

        # Gemini Live enables input/output audio transcription by default
        # in its _connect() method — no need to configure it explicitly.
        settings_kwargs = {
            "model": model,
            "voice": voice or "Puck",
        }
        if language:
            settings_kwargs["language"] = language
        return DograhGeminiLiveLLMService(
            api_key=api_key,
            settings=DograhGeminiLiveLLMService.Settings(**settings_kwargs),
        )
    elif provider == ServiceProviders.GOOGLE_VERTEX_REALTIME.value:
        from api.services.pipecat.realtime.gemini_live_vertex import (
            DograhGeminiLiveVertexLLMService,
        )

        project_id = getattr(realtime_config, "project_id", None)
        location = getattr(realtime_config, "location", None) or "us-east4"
        credentials = getattr(realtime_config, "credentials", None)

        settings_kwargs = {
            "model": model,
            "voice": voice or "Charon",
        }
        if language:
            settings_kwargs["language"] = language
        return DograhGeminiLiveVertexLLMService(
            credentials=credentials,
            project_id=project_id,
            location=location,
            settings=DograhGeminiLiveVertexLLMService.Settings(**settings_kwargs),
        )
    elif provider == ServiceProviders.AZURE_REALTIME.value:
        from api.services.pipecat.realtime.azure_realtime import (
            DograhAzureRealtimeLLMService,
        )
        from pipecat.services.openai.realtime.events import (
            AudioConfiguration,
            AudioInput,
            AudioOutput,
            InputAudioTranscription,
            SessionProperties,
        )

        endpoint = getattr(realtime_config, "endpoint", None) or ""
        if not endpoint:
            raise HTTPException(
                status_code=400,
                detail="Azure Realtime requires an endpoint.",
            )
        _validate_runtime_service_url(endpoint, "endpoint")
        api_version = (
            getattr(realtime_config, "api_version", None) or "2025-04-01-preview"
        )
        # Construct the Azure Realtime WebSocket URL
        # https://<resource>.openai.azure.com/openai/realtime?api-version=<ver>&deployment=<model>
        parsed_endpoint = urlparse(endpoint)
        wss_url = urlunparse(
            (
                "wss",
                parsed_endpoint.netloc,
                "/openai/realtime",
                "",
                urlencode({"api-version": api_version, "deployment": model}),
                "",
            )
        )
        return DograhAzureRealtimeLLMService(
            api_key=api_key,
            base_url=wss_url,
            settings=DograhAzureRealtimeLLMService.Settings(
                model=model,
                session_properties=SessionProperties(
                    audio=AudioConfiguration(
                        input=AudioInput(
                            transcription=InputAudioTranscription(),
                        ),
                        output=AudioOutput(
                            voice=voice or "alloy",
                        ),
                    ),
                ),
            ),
        )
    else:
        raise HTTPException(
            status_code=400, detail=f"Invalid realtime LLM provider {provider}"
        )


def create_llm_service(user_config):
    """Create and return appropriate LLM service based on user configuration."""
    provider = user_config.llm.provider
    model = user_config.llm.model
    api_key = user_config.llm.api_key

    kwargs = {}
    if provider == ServiceProviders.OPENAI.value:
        kwargs["base_url"] = user_config.llm.base_url
    elif provider == ServiceProviders.OPENROUTER.value:
        kwargs["base_url"] = user_config.llm.base_url
    elif provider == ServiceProviders.AZURE.value:
        kwargs["endpoint"] = user_config.llm.endpoint
    elif provider == ServiceProviders.SPEACHES.value:
        kwargs["base_url"] = user_config.llm.base_url
    elif provider == ServiceProviders.AWS_BEDROCK.value:
        kwargs["aws_access_key"] = user_config.llm.aws_access_key
        kwargs["aws_secret_key"] = user_config.llm.aws_secret_key
        kwargs["aws_region"] = user_config.llm.aws_region
    elif provider == ServiceProviders.GOOGLE_VERTEX.value:
        kwargs["project_id"] = user_config.llm.project_id
        kwargs["location"] = user_config.llm.location
        kwargs["credentials"] = user_config.llm.credentials
    elif provider == ServiceProviders.MINIMAX.value:
        kwargs["base_url"] = user_config.llm.base_url
        kwargs["temperature"] = user_config.llm.temperature
    elif provider == ServiceProviders.SARVAM.value:
        kwargs["temperature"] = user_config.llm.temperature

    return create_llm_service_from_provider(provider, model, api_key, **kwargs)
-												Initial Commit 🚀 🚀

											
										
										
											2025-09-09 14:37:32 +05:30
+								from typing import TYPE_CHECKING
-												feat: add Azure AI multi-provider support (TTS, STT, Embeddings, Realtime) (#381)

* feat: add Azure AI multi-provider support (TTS, STT, Embeddings, Realtime)

Enables Azure AI services across all model layers so users with Azure
credits can consolidate billing on a single provider.

- Voice (TTS): AzureSpeechTTSConfiguration via azure_speech provider
- Transcriber (STT): AzureSpeechSTTConfiguration via azure_speech provider
- Embedding: AzureOpenAIEmbeddingsConfiguration via azure provider
- Realtime: AzureRealtimeLLMConfiguration via azure_realtime provider

New files:
- api/services/pipecat/realtime/azure_realtime.py
- api/services/gen_ai/embedding/azure_openai_service.py
- api/tests/test_azure_speech_service_factory.py

The UI picks up all four providers automatically from the schema —
no frontend changes required.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

* fix: add validation for URL and params

---------

Co-authored-by: Vishal Dhateria <vishal@finela.ai>
Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com>
Co-authored-by: Abhishek Kumar <abhishek@a6k.me>
											
										
										
											2026-06-02 12:50:00 +05:30
+								from urllib.parse import urlencode, urlparse, urlunparse
-												Initial Commit 🚀 🚀

											
										
										
											2025-09-09 14:37:32 +05:30
-												feat: add MiniMax provider support (Chat + TTS) (#309)

* feat: add MiniMax provider support (Chat + TTS)

- Add MiniMax LLM provider using OpenAI-compatible API
  - Models: MiniMax-M2.7, MiniMax-M2.7-highspeed
  - Default base URL: https://api.minimax.io/v1
  - Uses MINIMAX_API_KEY for authentication
- Add MiniMax TTS provider using Pipecat's MiniMaxHttpTTSService
  - Models: speech-2.8-hd (default), speech-2.8-turbo
  - 6 built-in voices
  - Requires group_id configuration
- Add unit tests for both providers

* fix(minimax): validator, temperature, session cleanup, reasoning filter
  - check_validity.py: wire MiniMax into _validator_map and enforce
    group_id at save time. Without this, saving a config with a valid
    key was rejected.
  - registry.py: surface temperature on the LLM config (gt=0; MiniMax
    rejects 0) and base_url on the TTS config
  - service_factory.py:
    * Plumb temperature through create_llm_service
    * Normalize TTS base_url to include /t2a_v2 — pipecat appends only
      ?GroupId=... to the URL.
    * Use the new MiniMaxLLMService (from pipecat) to strip
      <think>...</think> reasoning that MiniMax-M2.7 emits inline in
      delta.content (otherwise it leaks straight to TTS).
    * Use MiniMaxOwnedSessionTTSService so the per-instance aiohttp
      session gets closed in cleanup() instead of leaking sockets/FDs.
  - minimax_tts.py: small wrapper around MiniMaxHttpTTSService that owns
    the session it was handed (pipecat's caller-owns-session API
    conflicts with the ftory's per-instance pattern).
  - pipecat submodule: bumps to a commit that adds MiniMaxLLMService — a
    thin OpenAILLMService subclass with the streaming <think> filter
    (mirrors NvidiaLLMService's pattern for NIM reasoning models).
  - Tests updated/added for all of the above.

  Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

---------

Co-authored-by: octo-patch <octo-patch@github.com>
Co-authored-by: Sabiha Khan <sabihak89@gmail.com>
											
										
										
											2026-05-22 15:39:41 +08:00
+								import aiohttp
-												Initial Commit 🚀 🚀

											
										
										
											2025-09-09 14:37:32 +05:30
+								from fastapi import HTTPException
-												chore: render initial and gathered context

											
										
										
											2025-12-31 22:02:50 +05:30
+								from loguru import logger
-												Initial Commit 🚀 🚀

											
										
										
											2025-09-09 14:37:32 +05:30
 								from api.constants import MPS_API_URL
 								from api.services.configuration.registry import ServiceProviders
-												feat: add MiniMax provider support (Chat + TTS) (#309)

* feat: add MiniMax provider support (Chat + TTS)

- Add MiniMax LLM provider using OpenAI-compatible API
  - Models: MiniMax-M2.7, MiniMax-M2.7-highspeed
  - Default base URL: https://api.minimax.io/v1
  - Uses MINIMAX_API_KEY for authentication
- Add MiniMax TTS provider using Pipecat's MiniMaxHttpTTSService
  - Models: speech-2.8-hd (default), speech-2.8-turbo
  - 6 built-in voices
  - Requires group_id configuration
- Add unit tests for both providers

* fix(minimax): validator, temperature, session cleanup, reasoning filter
  - check_validity.py: wire MiniMax into _validator_map and enforce
    group_id at save time. Without this, saving a config with a valid
    key was rejected.
  - registry.py: surface temperature on the LLM config (gt=0; MiniMax
    rejects 0) and base_url on the TTS config
  - service_factory.py:
    * Plumb temperature through create_llm_service
    * Normalize TTS base_url to include /t2a_v2 — pipecat appends only
      ?GroupId=... to the URL.
    * Use the new MiniMaxLLMService (from pipecat) to strip
      <think>...</think> reasoning that MiniMax-M2.7 emits inline in
      delta.content (otherwise it leaks straight to TTS).
    * Use MiniMaxOwnedSessionTTSService so the per-instance aiohttp
      session gets closed in cleanup() instead of leaking sockets/FDs.
  - minimax_tts.py: small wrapper around MiniMaxHttpTTSService that owns
    the session it was handed (pipecat's caller-owns-session API
    conflicts with the ftory's per-instance pattern).
  - pipecat submodule: bumps to a commit that adds MiniMaxLLMService — a
    thin OpenAILLMService subclass with the streaming <think> filter
    (mirrors NvidiaLLMService's pattern for NIM reasoning models).
  - Tests updated/added for all of the above.

  Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

---------

Co-authored-by: octo-patch <octo-patch@github.com>
Co-authored-by: Sabiha Khan <sabihak89@gmail.com>
											
										
										
											2026-05-22 15:39:41 +08:00
+								from api.services.pipecat.minimax_tts import MiniMaxOwnedSessionTTSService
-												feat: allow overriding base URL of OpenAI models (#368)

* Add OpenAI-compatible API option in model configuration

Backend-only cherry-pick from 20617db37a8417e4ee4f64efb6063fc5cd4aea98.

* Potential fix for pull request finding

Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com>

* fix: harden the base url settings in SaaS mode

---------

Co-authored-by: Chris Briddock <briddockchristopher@gmail.com>
Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com>
											
										
										
											2026-05-27 13:07:45 +05:30
+								from api.utils.url_security import validate_user_configured_service_url
-												feat: add Assembly AI STT

											
										
										
											2026-04-03 07:10:37 +05:30
+								from pipecat.services.assemblyai.stt import AssemblyAISTTService, AssemblyAISTTSettings
-												feat: add AWS Bedrock support

											
										
										
											2026-03-19 15:06:59 +05:30
+								from pipecat.services.aws.llm import AWSBedrockLLMService, AWSBedrockLLMSettings
-												feat: add hybrid text + recording functionality in agents (#191)

* feat: add recording feature in agents

* chore: pin pipecat version

* feat: show usage in UI

* chore: update pipecat
											
										
										
											2026-03-16 15:04:08 +05:30
+								from pipecat.services.azure.llm import AzureLLMService, AzureLLMSettings
-												feat: add Azure AI multi-provider support (TTS, STT, Embeddings, Realtime) (#381)

* feat: add Azure AI multi-provider support (TTS, STT, Embeddings, Realtime)

Enables Azure AI services across all model layers so users with Azure
credits can consolidate billing on a single provider.

- Voice (TTS): AzureSpeechTTSConfiguration via azure_speech provider
- Transcriber (STT): AzureSpeechSTTConfiguration via azure_speech provider
- Embedding: AzureOpenAIEmbeddingsConfiguration via azure provider
- Realtime: AzureRealtimeLLMConfiguration via azure_realtime provider

New files:
- api/services/pipecat/realtime/azure_realtime.py
- api/services/gen_ai/embedding/azure_openai_service.py
- api/tests/test_azure_speech_service_factory.py

The UI picks up all four providers automatically from the schema —
no frontend changes required.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

* fix: add validation for URL and params

---------

Co-authored-by: Vishal Dhateria <vishal@finela.ai>
Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com>
Co-authored-by: Abhishek Kumar <abhishek@a6k.me>
											
										
										
											2026-06-02 12:50:00 +05:30
+								from pipecat.services.azure.stt import AzureSTTService, AzureSTTSettings
 								from pipecat.services.azure.tts import AzureTTSService, AzureTTSSettings
-												Initial Commit 🚀 🚀

											
										
										
											2025-09-09 14:37:32 +05:30
+								from pipecat.services.cartesia.stt import CartesiaSTTService
-												feat: add support for self hosted llm models

											
										
										
											2026-03-24 17:50:45 +05:30
+								from pipecat.services.cartesia.tts import (
 								    CartesiaTTSService,
 								    CartesiaTTSSettings,
 								    GenerationConfig,
 								)
-												feat: add hybrid text + recording functionality in agents (#191)

* feat: add recording feature in agents

* chore: pin pipecat version

* feat: show usage in UI

* chore: update pipecat
											
										
										
											2026-03-16 15:04:08 +05:30
+								from pipecat.services.deepgram.flux.stt import (
 								    DeepgramFluxSTTService,
 								    DeepgramFluxSTTSettings,
 								)
 								from pipecat.services.deepgram.stt import DeepgramSTTService, DeepgramSTTSettings
 								from pipecat.services.deepgram.tts import DeepgramTTSService, DeepgramTTSSettings
-												Initial Commit 🚀 🚀

											
										
										
											2025-09-09 14:37:32 +05:30
+								from pipecat.services.dograh.llm import DograhLLMService
-												feat: add hybrid text + recording functionality in agents (#191)

* feat: add recording feature in agents

* chore: pin pipecat version

* feat: show usage in UI

* chore: update pipecat
											
										
										
											2026-03-16 15:04:08 +05:30
+								from pipecat.services.dograh.stt import DograhSTTService, DograhSTTSettings
 								from pipecat.services.dograh.tts import DograhTTSService, DograhTTSSettings
 								from pipecat.services.elevenlabs.tts import ElevenLabsTTSService, ElevenLabsTTSSettings
-												feat: add gladia stt support

											
										
										
											2026-04-04 14:47:48 +05:30
+								from pipecat.services.gladia.stt import GladiaSTTService, GladiaSTTSettings
-												feat: add hybrid text + recording functionality in agents (#191)

* feat: add recording feature in agents

* chore: pin pipecat version

* feat: show usage in UI

* chore: update pipecat
											
										
										
											2026-03-16 15:04:08 +05:30
+								from pipecat.services.google.llm import GoogleLLMService, GoogleLLMSettings
-												feat: add google stt and tts. add folders to organize agents

											
										
										
											2026-05-22 14:36:50 +05:30
+								from pipecat.services.google.stt import GoogleSTTService, GoogleSTTSettings
 								from pipecat.services.google.tts import GoogleTTSService, GoogleTTSSettings
-												feat: add xai grok as realtime model

											
										
										
											2026-05-22 18:04:59 +05:30
+								from pipecat.services.google.vertex.llm import (
 								    GoogleVertexLLMService,
 								    GoogleVertexLLMSettings,
 								)
-												feat: add hybrid text + recording functionality in agents (#191)

* feat: add recording feature in agents

* chore: pin pipecat version

* feat: show usage in UI

* chore: update pipecat
											
										
										
											2026-03-16 15:04:08 +05:30
+								from pipecat.services.groq.llm import GroqLLMService, GroqLLMSettings
-												feat: add xai grok as realtime model

											
										
										
											2026-05-22 18:04:59 +05:30
+								from pipecat.services.minimax.llm import MiniMaxLLMService
 								from pipecat.services.minimax.tts import MiniMaxTTSSettings
-												feat: add hybrid text + recording functionality in agents (#191)

* feat: add recording feature in agents

* chore: pin pipecat version

* feat: show usage in UI

* chore: update pipecat
											
										
										
											2026-03-16 15:04:08 +05:30
+								from pipecat.services.openai.base_llm import OpenAILLMSettings
-												Initial Commit 🚀 🚀

											
										
										
											2025-09-09 14:37:32 +05:30
+								from pipecat.services.openai.llm import OpenAILLMService
-												feat: add gemini live and speaches integration (#220)

* feat: add speaches models

* feat: add gemini realtime and speaches integration

- Add gemini realtime support
- Add speaches support for locally hosted LLMs

* chore: bump pipecat

* feat: add language option

* fix: add skip aggregator types to tts settings

* fix: make API key optional for realtime
											
										
										
											2026-03-31 21:42:03 +05:30
+								from pipecat.services.openai.stt import (
 								    OpenAISTTService,
 								    OpenAISTTSettings,
 								)
-												feat: add hybrid text + recording functionality in agents (#191)

* feat: add recording feature in agents

* chore: pin pipecat version

* feat: show usage in UI

* chore: update pipecat
											
										
										
											2026-03-16 15:04:08 +05:30
+								from pipecat.services.openai.tts import OpenAITTSService, OpenAITTSSettings
 								from pipecat.services.openrouter.llm import OpenRouterLLMService, OpenRouterLLMSettings
-												feat: add Rime TTS

											
										
										
											2026-04-07 14:05:47 +05:30
+								from pipecat.services.rime.tts import RimeTTSService, RimeTTSSettings
-												Add Sarvam LLM, update Sarvam STT models, expose usage_info on run detail (#351)

* Add Sarvam LLM provider, update Sarvam STT models, expose usage_info on run detail.
Depends on pipecat PR dograh-hq/pipecat#43 for STT string language support.
Submodule bump will follow after that merges.

* test: cover Sarvam STT language mapping; link Sarvam docs

---------

Co-authored-by: Sabiha Khan <sabihak89@gmail.com>
											
										
										
											2026-06-01 10:29:31 +05:30
+								from pipecat.services.sarvam.llm import SarvamLLMService, SarvamLLMSettings
-												feat: add hybrid text + recording functionality in agents (#191)

* feat: add recording feature in agents

* chore: pin pipecat version

* feat: show usage in UI

* chore: update pipecat
											
										
										
											2026-03-16 15:04:08 +05:30
+								from pipecat.services.sarvam.stt import SarvamSTTService, SarvamSTTSettings
 								from pipecat.services.sarvam.tts import SarvamTTSService, SarvamTTSSettings
-												feat: add gemini live and speaches integration (#220)

* feat: add speaches models

* feat: add gemini realtime and speaches integration

- Add gemini realtime support
- Add speaches support for locally hosted LLMs

* chore: bump pipecat

* feat: add language option

* fix: add skip aggregator types to tts settings

* fix: make API key optional for realtime
											
										
										
											2026-03-31 21:42:03 +05:30
+								from pipecat.services.speaches.llm import SpeachesLLMService, SpeachesLLMSettings
 								from pipecat.services.speaches.stt import SpeachesSTTService, SpeachesSTTSettings
 								from pipecat.services.speaches.tts import SpeachesTTSService, SpeachesTTSSettings
-												feat: add hybrid text + recording functionality in agents (#191)

* feat: add recording feature in agents

* chore: pin pipecat version

* feat: show usage in UI

* chore: update pipecat
											
										
										
											2026-03-16 15:04:08 +05:30
+								from pipecat.services.speechmatics.stt import (
 								    SpeechmaticsSTTService,
 								    SpeechmaticsSTTSettings,
 								)
-												feat: add voice selectors in elevenlabs (#88)


											
										
										
											2025-12-25 15:05:53 +05:30
+								from pipecat.transcriptions.language import Language
-												fix: add text filter for tts and logs for filter (#74)


											
										
										
											2025-12-09 16:24:24 +05:30
+								from pipecat.utils.text.xml_function_tag_filter import XMLFunctionTagFilter
-												Initial Commit 🚀 🚀

											
										
										
											2025-09-09 14:37:32 +05:30
 								if TYPE_CHECKING:
 								    from api.services.pipecat.audio_config import AudioConfig
-												feat: allow overriding base URL of OpenAI models (#368)

* Add OpenAI-compatible API option in model configuration

Backend-only cherry-pick from 20617db37a8417e4ee4f64efb6063fc5cd4aea98.

* Potential fix for pull request finding

Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com>

* fix: harden the base url settings in SaaS mode

---------

Co-authored-by: Chris Briddock <briddockchristopher@gmail.com>
Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com>
											
										
										
											2026-05-27 13:07:45 +05:30
+								def _validate_runtime_service_url(url: str, field_name: str) -> None:
 								    try:
 								        validate_user_configured_service_url(
 								            url,
 								            field_name=field_name,
 								        )
 								    except ValueError as e:
 								        raise HTTPException(status_code=400, detail=str(e)) from e
-												chore: update pipecat submodule

											
										
										
											2026-02-11 14:15:19 +05:30
+								def create_stt_service(
 								    user_config, audio_config: "AudioConfig", keyterms: list[str] | None = None
 								):
-												feat: add dictionary support for STT boosting in voice agents (#136)

* feat: add dictionary support for voice agents

Also fixes #132

* chore: add keyterms in evals
											
										
										
											2026-01-29 11:20:07 +05:30
+								    """Create and return appropriate STT service based on user configuration
 								    Args:
 								        user_config: User configuration containing STT settings
 								        keyterms: Optional list of keyterms for speech recognition boosting (Deepgram only)
 								    """
-												chore: minor fixes

											
										
										
											2026-01-13 14:55:48 +05:30
+								    logger.info(
 								        f"Creating STT service: provider={user_config.stt.provider}, model={user_config.stt.model}"
 								    )
-												Initial Commit 🚀 🚀

											
										
										
											2025-09-09 14:37:32 +05:30
+								    if user_config.stt.provider == ServiceProviders.DEEPGRAM.value:
-												fix: changes to update pipecat version to 0.0.100 (#122)

* feat: add stt evals

* add smart turn as provider

* chore: remove deprecations

* chore: format files

* fix: remove deprecated UserIdleProcessor

* fix: remove deprecated TranscriptProcessor

* chore: update pipecat submodule

* feat: add evals visualisation

* fix: trigger llm generation on client connected and pipeline started

* chore: update pipecat

* chore: update pipecat submodule

* Add tests

* fix: slow loading of workflow page

* chore: update pipecat submodule

* Show version after release

* Fixes #99

* fix: provider check for websocket connection

* Fixes #107

* Fix #96

* chore: fix documentation

* fix: cloudonix campaign call error

---------

Co-authored-by: Sabiha Khan <sabihak89@gmail.com>
											
										
										
											2026-01-23 18:53:59 +05:30
+								        # Check if using Flux model (English-only, no language selection)
 								        if user_config.stt.model == "flux-general-en":
 								            return DeepgramFluxSTTService(
 								                api_key=user_config.stt.api_key,
-												feat: add hybrid text + recording functionality in agents (#191)

* feat: add recording feature in agents

* chore: pin pipecat version

* feat: show usage in UI

* chore: update pipecat
											
										
										
											2026-03-16 15:04:08 +05:30
+								                settings=DeepgramFluxSTTSettings(
 								                    model=user_config.stt.model,
-												fix: changes to update pipecat version to 0.0.100 (#122)

* feat: add stt evals

* add smart turn as provider

* chore: remove deprecations

* chore: format files

* fix: remove deprecated UserIdleProcessor

* fix: remove deprecated TranscriptProcessor

* chore: update pipecat submodule

* feat: add evals visualisation

* fix: trigger llm generation on client connected and pipeline started

* chore: update pipecat

* chore: update pipecat submodule

* Add tests

* fix: slow loading of workflow page

* chore: update pipecat submodule

* Show version after release

* Fixes #99

* fix: provider check for websocket connection

* Fixes #107

* Fix #96

* chore: fix documentation

* fix: cloudonix campaign call error

---------

Co-authored-by: Sabiha Khan <sabihak89@gmail.com>
											
										
										
											2026-01-23 18:53:59 +05:30
+								                    eot_timeout_ms=3000,
 								                    eot_threshold=0.7,
-												feat: add early voicemail detection

											
										
										
											2026-03-07 12:41:24 +05:30
+								                    eager_eot_threshold=0.5,
-												feat: add dictionary support for STT boosting in voice agents (#136)

* feat: add dictionary support for voice agents

Also fixes #132

* chore: add keyterms in evals
											
										
										
											2026-01-29 11:20:07 +05:30
+								                    keyterm=keyterms or [],
-												fix: changes to update pipecat version to 0.0.100 (#122)

* feat: add stt evals

* add smart turn as provider

* chore: remove deprecations

* chore: format files

* fix: remove deprecated UserIdleProcessor

* fix: remove deprecated TranscriptProcessor

* chore: update pipecat submodule

* feat: add evals visualisation

* fix: trigger llm generation on client connected and pipeline started

* chore: update pipecat

* chore: update pipecat submodule

* Add tests

* fix: slow loading of workflow page

* chore: update pipecat submodule

* Show version after release

* Fixes #99

* fix: provider check for websocket connection

* Fixes #107

* Fix #96

* chore: fix documentation

* fix: cloudonix campaign call error

---------

Co-authored-by: Sabiha Khan <sabihak89@gmail.com>
											
										
										
											2026-01-23 18:53:59 +05:30
+								                ),
 								                should_interrupt=False,  # Let UserAggregator take care of sending InterruptionFrame
-												chore: update pipecat submodule

											
										
										
											2026-02-11 14:15:19 +05:30
+								                sample_rate=audio_config.transport_in_sample_rate,
-												fix: changes to update pipecat version to 0.0.100 (#122)

* feat: add stt evals

* add smart turn as provider

* chore: remove deprecations

* chore: format files

* fix: remove deprecated UserIdleProcessor

* fix: remove deprecated TranscriptProcessor

* chore: update pipecat submodule

* feat: add evals visualisation

* fix: trigger llm generation on client connected and pipeline started

* chore: update pipecat

* chore: update pipecat submodule

* Add tests

* fix: slow loading of workflow page

* chore: update pipecat submodule

* Show version after release

* Fixes #99

* fix: provider check for websocket connection

* Fixes #107

* Fix #96

* chore: fix documentation

* fix: cloudonix campaign call error

---------

Co-authored-by: Sabiha Khan <sabihak89@gmail.com>
											
										
										
											2026-01-23 18:53:59 +05:30
+								            )
 								        # Other models than flux
-												feat: add voice selectors in elevenlabs (#88)


											
										
										
											2025-12-25 15:05:53 +05:30
+								        # Use language from user config, defaulting to "multi" for multilingual support
-												fix: change type definition from enum to str for consistency

											
										
										
											2025-12-26 16:00:02 +05:30
+								        language = getattr(user_config.stt, "language", None) or "multi"
-												chore: render initial and gathered context

											
										
										
											2025-12-31 22:02:50 +05:30
+								        logger.debug(f"Using DeepGram Model - {user_config.stt.model}")
-												Initial Commit 🚀 🚀

											
										
										
											2025-09-09 14:37:32 +05:30
+								        return DeepgramSTTService(
-												fix: changes to update pipecat version to 0.0.100 (#122)

* feat: add stt evals

* add smart turn as provider

* chore: remove deprecations

* chore: format files

* fix: remove deprecated UserIdleProcessor

* fix: remove deprecated TranscriptProcessor

* chore: update pipecat submodule

* feat: add evals visualisation

* fix: trigger llm generation on client connected and pipeline started

* chore: update pipecat

* chore: update pipecat submodule

* Add tests

* fix: slow loading of workflow page

* chore: update pipecat submodule

* Show version after release

* Fixes #99

* fix: provider check for websocket connection

* Fixes #107

* Fix #96

* chore: fix documentation

* fix: cloudonix campaign call error

---------

Co-authored-by: Sabiha Khan <sabihak89@gmail.com>
											
										
										
											2026-01-23 18:53:59 +05:30
+								            api_key=user_config.stt.api_key,
-												feat: add hybrid text + recording functionality in agents (#191)

* feat: add recording feature in agents

* chore: pin pipecat version

* feat: show usage in UI

* chore: update pipecat
											
										
										
											2026-03-16 15:04:08 +05:30
+								            settings=DeepgramSTTSettings(
 								                language=language,
 								                profanity_filter=False,
 								                endpointing=100,
 								                model=user_config.stt.model,
 								                keyterm=keyterms or [],
 								            ),
-												fix: changes to update pipecat version to 0.0.100 (#122)

* feat: add stt evals

* add smart turn as provider

* chore: remove deprecations

* chore: format files

* fix: remove deprecated UserIdleProcessor

* fix: remove deprecated TranscriptProcessor

* chore: update pipecat submodule

* feat: add evals visualisation

* fix: trigger llm generation on client connected and pipeline started

* chore: update pipecat

* chore: update pipecat submodule

* Add tests

* fix: slow loading of workflow page

* chore: update pipecat submodule

* Show version after release

* Fixes #99

* fix: provider check for websocket connection

* Fixes #107

* Fix #96

* chore: fix documentation

* fix: cloudonix campaign call error

---------

Co-authored-by: Sabiha Khan <sabihak89@gmail.com>
											
										
										
											2026-01-23 18:53:59 +05:30
+								            should_interrupt=False,  # Let UserAggregator take care of sending InterruptionFrame
-												chore: update pipecat submodule

											
										
										
											2026-02-11 14:15:19 +05:30
+								            sample_rate=audio_config.transport_in_sample_rate,
-												Initial Commit 🚀 🚀

											
										
										
											2025-09-09 14:37:32 +05:30
+								        )
 								    elif user_config.stt.provider == ServiceProviders.OPENAI.value:
-												feat: allow overriding base URL of OpenAI STT and TTS (#377)

Mirrors the LLM treatment from #368 for the OpenAI STT and OpenAI TTS
providers. Users running OpenAI-compatible self-hosted services (vLLM,
Speaches, llama.cpp, custom proxies) can now point Dograh at them via
the OpenAI provider with `base_url`, instead of being forced onto the
Speaches provider as a workaround.

Changes:

* `registry.py` — add `base_url` field (default `https://api.openai.com/v1`)
  to `OpenAISTTConfiguration` and `OpenAITTSService`, identical in shape
  to the existing `OpenAILLMService.base_url` from #368.

* `service_factory.py` — in the OPENAI branches of `create_stt_service`
  and `create_tts_service`, lift `base_url` off the user config, run it
  through `_validate_runtime_service_url`, and forward it as a kwarg to
  `OpenAISTTService` / `OpenAITTSService` (both already accept it). Same
  pattern as the LLM branch.

* `test_user_configured_service_url_security.py` — adds four runtime
  validation tests covering private-IP rejection and localhost rejection
  in SaaS mode for both STT and TTS. Existing OSS-mode permissiveness
  is unchanged (DEPLOYMENT_MODE=oss skips the validator, as before).

No schema migration needed — Pydantic populates the default; existing
configurations without `base_url` continue to talk to api.openai.com.

`check_validity.py` requires no edits because the per-service validation
loop already iterates `("base_url", "endpoint")` via `getattr`, and the
`_check_openai_api_key` dispatcher already routes OPENAI providers
through the base_url-aware code path (introduced in #368) for STT and
TTS too.

Tests pass locally:

    pytest api/tests/test_user_configured_service_url_security.py
    23 passed in 4.80s   (19 existing + 4 new)

Co-authored-by: developer603 <developer603@users.noreply.github.com>
											
										
										
											2026-06-02 12:06:58 +05:30
+								        kwargs = {}
 								        base_url = getattr(user_config.stt, "base_url", None)
 								        if base_url:
 								            _validate_runtime_service_url(base_url, "base_url")
 								            kwargs["base_url"] = base_url
-												Initial Commit 🚀 🚀

											
										
										
											2025-09-09 14:37:32 +05:30
+								        return OpenAISTTService(
-												feat: add hybrid text + recording functionality in agents (#191)

* feat: add recording feature in agents

* chore: pin pipecat version

* feat: show usage in UI

* chore: update pipecat
											
										
										
											2026-03-16 15:04:08 +05:30
+								            api_key=user_config.stt.api_key,
 								            settings=OpenAISTTSettings(model=user_config.stt.model),
-												feat: allow overriding base URL of OpenAI STT and TTS (#377)

Mirrors the LLM treatment from #368 for the OpenAI STT and OpenAI TTS
providers. Users running OpenAI-compatible self-hosted services (vLLM,
Speaches, llama.cpp, custom proxies) can now point Dograh at them via
the OpenAI provider with `base_url`, instead of being forced onto the
Speaches provider as a workaround.

Changes:

* `registry.py` — add `base_url` field (default `https://api.openai.com/v1`)
  to `OpenAISTTConfiguration` and `OpenAITTSService`, identical in shape
  to the existing `OpenAILLMService.base_url` from #368.

* `service_factory.py` — in the OPENAI branches of `create_stt_service`
  and `create_tts_service`, lift `base_url` off the user config, run it
  through `_validate_runtime_service_url`, and forward it as a kwarg to
  `OpenAISTTService` / `OpenAITTSService` (both already accept it). Same
  pattern as the LLM branch.

* `test_user_configured_service_url_security.py` — adds four runtime
  validation tests covering private-IP rejection and localhost rejection
  in SaaS mode for both STT and TTS. Existing OSS-mode permissiveness
  is unchanged (DEPLOYMENT_MODE=oss skips the validator, as before).

No schema migration needed — Pydantic populates the default; existing
configurations without `base_url` continue to talk to api.openai.com.

`check_validity.py` requires no edits because the per-service validation
loop already iterates `("base_url", "endpoint")` via `getattr`, and the
`_check_openai_api_key` dispatcher already routes OPENAI providers
through the base_url-aware code path (introduced in #368) for STT and
TTS too.

Tests pass locally:

    pytest api/tests/test_user_configured_service_url_security.py
    23 passed in 4.80s   (19 existing + 4 new)

Co-authored-by: developer603 <developer603@users.noreply.github.com>
											
										
										
											2026-06-02 12:06:58 +05:30
+								            **kwargs,
-												Initial Commit 🚀 🚀

											
										
										
											2025-09-09 14:37:32 +05:30
+								        )
-												feat: add google stt and tts. add folders to organize agents

											
										
										
											2026-05-22 14:36:50 +05:30
+								    elif user_config.stt.provider == ServiceProviders.GOOGLE.value:
 								        language = getattr(user_config.stt, "language", None) or "en-US"
 								        location = getattr(user_config.stt, "location", None) or "global"
 								        credentials = getattr(user_config.stt, "credentials", None)
 								        settings_kwargs = {"model": user_config.stt.model}
 								        try:
 								            settings_kwargs["languages"] = [Language(language)]
 								        except ValueError:
 								            settings_kwargs["language_codes"] = [language]
 								        return GoogleSTTService(
 								            credentials=credentials,
 								            location=location,
 								            settings=GoogleSTTSettings(**settings_kwargs),
 								            sample_rate=audio_config.transport_in_sample_rate,
 								        )
-												Initial Commit 🚀 🚀

											
										
										
											2025-09-09 14:37:32 +05:30
+								    elif user_config.stt.provider == ServiceProviders.CARTESIA.value:
-												chore: update pipecat submodule

											
										
										
											2026-02-11 14:15:19 +05:30
+								        return CartesiaSTTService(
 								            api_key=user_config.stt.api_key,
 								            sample_rate=audio_config.transport_in_sample_rate,
 								        )
-												Initial Commit 🚀 🚀

											
										
										
											2025-09-09 14:37:32 +05:30
+								    elif user_config.stt.provider == ServiceProviders.DOGRAH.value:
 								        base_url = MPS_API_URL.replace("http://", "ws://").replace("https://", "wss://")
-												feat: add voices in Dograh configuration

											
										
										
											2026-01-19 14:52:54 +05:30
+								        language = getattr(user_config.stt, "language", None) or "multi"
-												Initial Commit 🚀 🚀

											
										
										
											2025-09-09 14:37:32 +05:30
+								        return DograhSTTService(
 								            base_url=base_url,
 								            api_key=user_config.stt.api_key,
-												feat: add hybrid text + recording functionality in agents (#191)

* feat: add recording feature in agents

* chore: pin pipecat version

* feat: show usage in UI

* chore: update pipecat
											
										
										
											2026-03-16 15:04:08 +05:30
+								            settings=DograhSTTSettings(
 								                model=user_config.stt.model,
 								                language=language,
 								            ),
-												feat: add dictionary support for STT boosting in voice agents (#136)

* feat: add dictionary support for voice agents

Also fixes #132

* chore: add keyterms in evals
											
										
										
											2026-01-29 11:20:07 +05:30
+								            keyterms=keyterms,
-												chore: update pipecat submodule

											
										
										
											2026-02-11 14:15:19 +05:30
+								            sample_rate=audio_config.transport_in_sample_rate,
-												Initial Commit 🚀 🚀

											
										
										
											2025-09-09 14:37:32 +05:30
+								        )
-												feat: add voice selectors in elevenlabs (#88)


											
										
										
											2025-12-25 15:05:53 +05:30
+								    elif user_config.stt.provider == ServiceProviders.SARVAM.value:
-												Add Sarvam LLM, update Sarvam STT models, expose usage_info on run detail (#351)

* Add Sarvam LLM provider, update Sarvam STT models, expose usage_info on run detail.
Depends on pipecat PR dograh-hq/pipecat#43 for STT string language support.
Submodule bump will follow after that merges.

* test: cover Sarvam STT language mapping; link Sarvam docs

---------

Co-authored-by: Sabiha Khan <sabihak89@gmail.com>
											
										
										
											2026-06-01 10:29:31 +05:30
+								        language = getattr(user_config.stt, "language", None)
-												feat: add voice selectors in elevenlabs (#88)


											
										
										
											2025-12-25 15:05:53 +05:30
+								        language_mapping = {
 								            "bn-IN": Language.BN_IN,
 								            "gu-IN": Language.GU_IN,
 								            "hi-IN": Language.HI_IN,
 								            "kn-IN": Language.KN_IN,
 								            "ml-IN": Language.ML_IN,
 								            "mr-IN": Language.MR_IN,
 								            "ta-IN": Language.TA_IN,
 								            "te-IN": Language.TE_IN,
 								            "pa-IN": Language.PA_IN,
 								            "od-IN": Language.OR_IN,
 								            "en-IN": Language.EN_IN,
 								            "as-IN": Language.AS_IN,
-												Add Sarvam LLM, update Sarvam STT models, expose usage_info on run detail (#351)

* Add Sarvam LLM provider, update Sarvam STT models, expose usage_info on run detail.
Depends on pipecat PR dograh-hq/pipecat#43 for STT string language support.
Submodule bump will follow after that merges.

* test: cover Sarvam STT language mapping; link Sarvam docs

---------

Co-authored-by: Sabiha Khan <sabihak89@gmail.com>
											
										
										
											2026-06-01 10:29:31 +05:30
+								            "ur-IN": Language.UR_IN,
 								            "kok-IN": Language.KOK_IN,
 								            "mai-IN": Language.MAI_IN,
 								            "sd-IN": Language.SD_IN,
-												feat: add voice selectors in elevenlabs (#88)


											
										
										
											2025-12-25 15:05:53 +05:30
+								        }
-												Add Sarvam LLM, update Sarvam STT models, expose usage_info on run detail (#351)

* Add Sarvam LLM provider, update Sarvam STT models, expose usage_info on run detail.
Depends on pipecat PR dograh-hq/pipecat#43 for STT string language support.
Submodule bump will follow after that merges.

* test: cover Sarvam STT language mapping; link Sarvam docs

---------

Co-authored-by: Sabiha Khan <sabihak89@gmail.com>
											
										
										
											2026-06-01 10:29:31 +05:30
+								        if not language or language == "unknown":
 								            pipecat_language = None
 								        elif language in language_mapping:
 								            pipecat_language = language_mapping[language]
 								        else:
 								            # Unmapped BCP-47 codes pass through; Sarvam accepts them per https://docs.sarvam.ai/api-reference-docs/speech-to-text/transcribe
 								            pipecat_language = language
-												feat: add voice selectors in elevenlabs (#88)


											
										
										
											2025-12-25 15:05:53 +05:30
+								        return SarvamSTTService(
 								            api_key=user_config.stt.api_key,
-												feat: add hybrid text + recording functionality in agents (#191)

* feat: add recording feature in agents

* chore: pin pipecat version

* feat: show usage in UI

* chore: update pipecat
											
										
										
											2026-03-16 15:04:08 +05:30
+								            settings=SarvamSTTSettings(
 								                model=user_config.stt.model,
 								                language=pipecat_language,
 								            ),
-												chore: update pipecat submodule

											
										
										
											2026-02-11 14:15:19 +05:30
+								            sample_rate=audio_config.transport_in_sample_rate,
-												fix: migrate from custom audio recorder to native AudioBuffer (#115)

* fix: update to pipecat VM Detector

* fix: refactor to remove audio synchronizer

* feat: add speechmatics as STT
											
										
										
											2026-01-08 18:03:26 +05:30
+								        )
-												feat: add gemini live and speaches integration (#220)

* feat: add speaches models

* feat: add gemini realtime and speaches integration

- Add gemini realtime support
- Add speaches support for locally hosted LLMs

* chore: bump pipecat

* feat: add language option

* fix: add skip aggregator types to tts settings

* fix: make API key optional for realtime
											
										
										
											2026-03-31 21:42:03 +05:30
+								    elif user_config.stt.provider == ServiceProviders.SPEACHES.value:
-												fix: Speaches STT service wiring

* Fix Speaches STT service wiring

* chore: bump pipecat submodule

---------

Co-authored-by: drascom <drascom@drascoms-MacBook-Pro.local>
Co-authored-by: Abhishek Kumar <abhishek@a6k.me>
											
										
										
											2026-04-06 09:41:58 +01:00
+								        language = getattr(user_config.stt, "language", None)
-												feat: allow overriding base URL of OpenAI models (#368)

* Add OpenAI-compatible API option in model configuration

Backend-only cherry-pick from 20617db37a8417e4ee4f64efb6063fc5cd4aea98.

* Potential fix for pull request finding

Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com>

* fix: harden the base url settings in SaaS mode

---------

Co-authored-by: Chris Briddock <briddockchristopher@gmail.com>
Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com>
											
										
										
											2026-05-27 13:07:45 +05:30
+								        _validate_runtime_service_url(user_config.stt.base_url, "base_url")
-												feat: add gemini live and speaches integration (#220)

* feat: add speaches models

* feat: add gemini realtime and speaches integration

- Add gemini realtime support
- Add speaches support for locally hosted LLMs

* chore: bump pipecat

* feat: add language option

* fix: add skip aggregator types to tts settings

* fix: make API key optional for realtime
											
										
										
											2026-03-31 21:42:03 +05:30
+								        return SpeachesSTTService(
-												fix: Speaches STT service wiring

* Fix Speaches STT service wiring

* chore: bump pipecat submodule

---------

Co-authored-by: drascom <drascom@drascoms-MacBook-Pro.local>
Co-authored-by: Abhishek Kumar <abhishek@a6k.me>
											
										
										
											2026-04-06 09:41:58 +01:00
+								            base_url=user_config.stt.base_url,
-												feat: add gemini live and speaches integration (#220)

* feat: add speaches models

* feat: add gemini realtime and speaches integration

- Add gemini realtime support
- Add speaches support for locally hosted LLMs

* chore: bump pipecat

* feat: add language option

* fix: add skip aggregator types to tts settings

* fix: make API key optional for realtime
											
										
										
											2026-03-31 21:42:03 +05:30
+								            api_key=user_config.stt.api_key or "none",
 								            settings=SpeachesSTTSettings(
 								                model=user_config.stt.model,
 								                language=language,
 								            ),
 								            sample_rate=audio_config.transport_in_sample_rate,
 								        )
-												feat: add Assembly AI STT

											
										
										
											2026-04-03 07:10:37 +05:30
+								    elif user_config.stt.provider == ServiceProviders.ASSEMBLYAI.value:
 								        language = getattr(user_config.stt, "language", None)
-												feat: enable context summarization

											
										
										
											2026-04-03 13:39:02 +05:30
+								        settings_kwargs = {"model": user_config.stt.model, "language": language}
-												feat: add Assembly AI STT

											
										
										
											2026-04-03 07:10:37 +05:30
+								        if keyterms:
 								            settings_kwargs["keyterms_prompt"] = keyterms
 								        return AssemblyAISTTService(
 								            api_key=user_config.stt.api_key,
 								            settings=AssemblyAISTTSettings(**settings_kwargs),
 								            sample_rate=audio_config.transport_in_sample_rate,
 								        )
-												feat: add gladia stt support

											
										
										
											2026-04-04 14:47:48 +05:30
+								    elif user_config.stt.provider == ServiceProviders.GLADIA.value:
 								        from pipecat.services.gladia.config import LanguageConfig
 								        language = getattr(user_config.stt, "language", None) or "en"
 								        settings_kwargs = {
 								            "model": user_config.stt.model,
 								            "language_config": LanguageConfig(
 								                languages=[language], code_switching=False
 								            ),
 								        }
 								        return GladiaSTTService(
 								            api_key=user_config.stt.api_key,
 								            settings=GladiaSTTSettings(**settings_kwargs),
 								            sample_rate=audio_config.transport_in_sample_rate,
 								        )
-												fix: migrate from custom audio recorder to native AudioBuffer (#115)

* fix: update to pipecat VM Detector

* fix: refactor to remove audio synchronizer

* feat: add speechmatics as STT
											
										
										
											2026-01-08 18:03:26 +05:30
+								    elif user_config.stt.provider == ServiceProviders.SPEECHMATICS.value:
-												feat: add dictionary support for STT boosting in voice agents (#136)

* feat: add dictionary support for voice agents

Also fixes #132

* chore: add keyterms in evals
											
										
										
											2026-01-29 11:20:07 +05:30
+								        from pipecat.services.speechmatics.stt import (
 								            AdditionalVocabEntry,
 								            OperatingPoint,
 								        )
-												fix: migrate from custom audio recorder to native AudioBuffer (#115)

* fix: update to pipecat VM Detector

* fix: refactor to remove audio synchronizer

* feat: add speechmatics as STT
											
										
										
											2026-01-08 18:03:26 +05:30
 								        language = getattr(user_config.stt, "language", None) or "en"
 								        # Map model field to operating point (standard or enhanced)
 								        operating_point = (
 								            OperatingPoint.ENHANCED
 								            if user_config.stt.model == "enhanced"
 								            else OperatingPoint.STANDARD
 								        )
-												feat: add dictionary support for STT boosting in voice agents (#136)

* feat: add dictionary support for voice agents

Also fixes #132

* chore: add keyterms in evals
											
										
										
											2026-01-29 11:20:07 +05:30
+								        # Convert keyterms to AdditionalVocabEntry objects for Speechmatics
 								        additional_vocab = []
 								        if keyterms:
 								            additional_vocab = [AdditionalVocabEntry(content=term) for term in keyterms]
-												fix: migrate from custom audio recorder to native AudioBuffer (#115)

* fix: update to pipecat VM Detector

* fix: refactor to remove audio synchronizer

* feat: add speechmatics as STT
											
										
										
											2026-01-08 18:03:26 +05:30
+								        return SpeechmaticsSTTService(
 								            api_key=user_config.stt.api_key,
-												feat: add hybrid text + recording functionality in agents (#191)

* feat: add recording feature in agents

* chore: pin pipecat version

* feat: show usage in UI

* chore: update pipecat
											
										
										
											2026-03-16 15:04:08 +05:30
+								            settings=SpeechmaticsSTTSettings(
-												fix: migrate from custom audio recorder to native AudioBuffer (#115)

* fix: update to pipecat VM Detector

* fix: refactor to remove audio synchronizer

* feat: add speechmatics as STT
											
										
										
											2026-01-08 18:03:26 +05:30
+								                language=language,
 								                operating_point=operating_point,
-												feat: add dictionary support for STT boosting in voice agents (#136)

* feat: add dictionary support for voice agents

Also fixes #132

* chore: add keyterms in evals
											
										
										
											2026-01-29 11:20:07 +05:30
+								                additional_vocab=additional_vocab,
-												fix: migrate from custom audio recorder to native AudioBuffer (#115)

* fix: update to pipecat VM Detector

* fix: refactor to remove audio synchronizer

* feat: add speechmatics as STT
											
										
										
											2026-01-08 18:03:26 +05:30
+								            ),
-												chore: update pipecat submodule

											
										
										
											2026-02-11 14:15:19 +05:30
+								            sample_rate=audio_config.transport_in_sample_rate,
-												feat: add voice selectors in elevenlabs (#88)


											
										
										
											2025-12-25 15:05:53 +05:30
+								        )
-												feat: add Azure AI multi-provider support (TTS, STT, Embeddings, Realtime) (#381)

* feat: add Azure AI multi-provider support (TTS, STT, Embeddings, Realtime)

Enables Azure AI services across all model layers so users with Azure
credits can consolidate billing on a single provider.

- Voice (TTS): AzureSpeechTTSConfiguration via azure_speech provider
- Transcriber (STT): AzureSpeechSTTConfiguration via azure_speech provider
- Embedding: AzureOpenAIEmbeddingsConfiguration via azure provider
- Realtime: AzureRealtimeLLMConfiguration via azure_realtime provider

New files:
- api/services/pipecat/realtime/azure_realtime.py
- api/services/gen_ai/embedding/azure_openai_service.py
- api/tests/test_azure_speech_service_factory.py

The UI picks up all four providers automatically from the schema —
no frontend changes required.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

* fix: add validation for URL and params

---------

Co-authored-by: Vishal Dhateria <vishal@finela.ai>
Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com>
Co-authored-by: Abhishek Kumar <abhishek@a6k.me>
											
										
										
											2026-06-02 12:50:00 +05:30
+								    elif user_config.stt.provider == ServiceProviders.AZURE_SPEECH.value:
 								        from pipecat.transcriptions.language import Language as PipecatLanguage
 								        language_code = getattr(user_config.stt, "language", None) or "en-US"
 								        region = getattr(user_config.stt, "region", None) or "eastus"
 								        try:
 								            pipecat_language = PipecatLanguage(language_code)
 								        except ValueError:
 								            pipecat_language = language_code
 								        return AzureSTTService(
 								            api_key=user_config.stt.api_key,
 								            region=region,
 								            settings=AzureSTTSettings(language=pipecat_language),
 								            sample_rate=audio_config.transport_in_sample_rate,
 								        )
-												Initial Commit 🚀 🚀

											
										
										
											2025-09-09 14:37:32 +05:30
+								    else:
 								        raise HTTPException(
 								            status_code=400, detail=f"Invalid STT provider {user_config.stt.provider}"
 								        )
 								def create_tts_service(user_config, audio_config: "AudioConfig"):
 								    """Create and return appropriate TTS service based on user configuration
 								    Args:
 								        user_config: User configuration containing TTS settings
-												feat: add asterisk ARI websocket interface (#159)

* chore: remove old files

* feat: ari outbound dialing

* feat: add websocket configuration for ARI

* feat: handling inbound calls

* delete ext channel from redis on stasis end

* fix: add lock in workflow run update, refactor _handle_stasis_start

* chore: update submodule

---------

Co-authored-by: Sabiha Khan <sabihak89@gmail.com>
											
										
										
											2026-02-17 19:32:03 +05:30
+								        transport_type: Type of transport (e.g., 'twilio', 'webrtc')
-												Initial Commit 🚀 🚀

											
										
										
											2025-09-09 14:37:32 +05:30
+								    """
-												chore: minor fixes

											
										
										
											2026-01-13 14:55:48 +05:30
+								    logger.info(
 								        f"Creating TTS service: provider={user_config.tts.provider}, model={user_config.tts.model}"
 								    )
-												fix: add text filter for tts and logs for filter (#74)


											
										
										
											2025-12-09 16:24:24 +05:30
+								    # Create function call filter to prevent TTS from speaking function call tags
 								    xml_function_tag_filter = XMLFunctionTagFilter()
-												Initial Commit 🚀 🚀

											
										
										
											2025-09-09 14:37:32 +05:30
+								    if user_config.tts.provider == ServiceProviders.DEEPGRAM.value:
 								        return DeepgramTTSService(
-												fix: prevent pipeline freezes when sending endframe (#77)

* fix: dont cancel task if call is already ending

* Update pipecat
											
										
										
											2025-12-10 08:22:37 +07:00
+								            api_key=user_config.tts.api_key,
-												feat: add hybrid text + recording functionality in agents (#191)

* feat: add recording feature in agents

* chore: pin pipecat version

* feat: show usage in UI

* chore: update pipecat
											
										
										
											2026-03-16 15:04:08 +05:30
+								            settings=DeepgramTTSSettings(voice=user_config.tts.voice),
-												fix: prevent pipeline freezes when sending endframe (#77)

* fix: dont cancel task if call is already ending

* Update pipecat
											
										
										
											2025-12-10 08:22:37 +07:00
+								            text_filters=[xml_function_tag_filter],
-												feat: allow recordings in tool transitions

											
										
										
											2026-04-10 16:18:01 +05:30
+								            skip_aggregator_types=["recording_router", "recording"],
-												feat: add hybrid text + recording functionality in agents (#191)

* feat: add recording feature in agents

* chore: pin pipecat version

* feat: show usage in UI

* chore: update pipecat
											
										
										
											2026-03-16 15:04:08 +05:30
+								            silence_time_s=1.0,
-												Initial Commit 🚀 🚀

											
										
										
											2025-09-09 14:37:32 +05:30
+								        )
 								    elif user_config.tts.provider == ServiceProviders.OPENAI.value:
-												feat: allow overriding base URL of OpenAI STT and TTS (#377)

Mirrors the LLM treatment from #368 for the OpenAI STT and OpenAI TTS
providers. Users running OpenAI-compatible self-hosted services (vLLM,
Speaches, llama.cpp, custom proxies) can now point Dograh at them via
the OpenAI provider with `base_url`, instead of being forced onto the
Speaches provider as a workaround.

Changes:

* `registry.py` — add `base_url` field (default `https://api.openai.com/v1`)
  to `OpenAISTTConfiguration` and `OpenAITTSService`, identical in shape
  to the existing `OpenAILLMService.base_url` from #368.

* `service_factory.py` — in the OPENAI branches of `create_stt_service`
  and `create_tts_service`, lift `base_url` off the user config, run it
  through `_validate_runtime_service_url`, and forward it as a kwarg to
  `OpenAISTTService` / `OpenAITTSService` (both already accept it). Same
  pattern as the LLM branch.

* `test_user_configured_service_url_security.py` — adds four runtime
  validation tests covering private-IP rejection and localhost rejection
  in SaaS mode for both STT and TTS. Existing OSS-mode permissiveness
  is unchanged (DEPLOYMENT_MODE=oss skips the validator, as before).

No schema migration needed — Pydantic populates the default; existing
configurations without `base_url` continue to talk to api.openai.com.

`check_validity.py` requires no edits because the per-service validation
loop already iterates `("base_url", "endpoint")` via `getattr`, and the
`_check_openai_api_key` dispatcher already routes OPENAI providers
through the base_url-aware code path (introduced in #368) for STT and
TTS too.

Tests pass locally:

    pytest api/tests/test_user_configured_service_url_security.py
    23 passed in 4.80s   (19 existing + 4 new)

Co-authored-by: developer603 <developer603@users.noreply.github.com>
											
										
										
											2026-06-02 12:06:58 +05:30
+								        kwargs = {}
 								        base_url = getattr(user_config.tts, "base_url", None)
 								        if base_url:
 								            _validate_runtime_service_url(base_url, "base_url")
 								            kwargs["base_url"] = base_url
-												Initial Commit 🚀 🚀

											
										
										
											2025-09-09 14:37:32 +05:30
+								        return OpenAITTSService(
-												fix: prevent pipeline freezes when sending endframe (#77)

* fix: dont cancel task if call is already ending

* Update pipecat
											
										
										
											2025-12-10 08:22:37 +07:00
+								            api_key=user_config.tts.api_key,
-												feat: add hybrid text + recording functionality in agents (#191)

* feat: add recording feature in agents

* chore: pin pipecat version

* feat: show usage in UI

* chore: update pipecat
											
										
										
											2026-03-16 15:04:08 +05:30
+								            settings=OpenAITTSSettings(model=user_config.tts.model),
-												fix: prevent pipeline freezes when sending endframe (#77)

* fix: dont cancel task if call is already ending

* Update pipecat
											
										
										
											2025-12-10 08:22:37 +07:00
+								            text_filters=[xml_function_tag_filter],
-												feat: allow recordings in tool transitions

											
										
										
											2026-04-10 16:18:01 +05:30
+								            skip_aggregator_types=["recording_router", "recording"],
-												feat: add hybrid text + recording functionality in agents (#191)

* feat: add recording feature in agents

* chore: pin pipecat version

* feat: show usage in UI

* chore: update pipecat
											
										
										
											2026-03-16 15:04:08 +05:30
+								            silence_time_s=1.0,
-												feat: allow overriding base URL of OpenAI STT and TTS (#377)

Mirrors the LLM treatment from #368 for the OpenAI STT and OpenAI TTS
providers. Users running OpenAI-compatible self-hosted services (vLLM,
Speaches, llama.cpp, custom proxies) can now point Dograh at them via
the OpenAI provider with `base_url`, instead of being forced onto the
Speaches provider as a workaround.

Changes:

* `registry.py` — add `base_url` field (default `https://api.openai.com/v1`)
  to `OpenAISTTConfiguration` and `OpenAITTSService`, identical in shape
  to the existing `OpenAILLMService.base_url` from #368.

* `service_factory.py` — in the OPENAI branches of `create_stt_service`
  and `create_tts_service`, lift `base_url` off the user config, run it
  through `_validate_runtime_service_url`, and forward it as a kwarg to
  `OpenAISTTService` / `OpenAITTSService` (both already accept it). Same
  pattern as the LLM branch.

* `test_user_configured_service_url_security.py` — adds four runtime
  validation tests covering private-IP rejection and localhost rejection
  in SaaS mode for both STT and TTS. Existing OSS-mode permissiveness
  is unchanged (DEPLOYMENT_MODE=oss skips the validator, as before).

No schema migration needed — Pydantic populates the default; existing
configurations without `base_url` continue to talk to api.openai.com.

`check_validity.py` requires no edits because the per-service validation
loop already iterates `("base_url", "endpoint")` via `getattr`, and the
`_check_openai_api_key` dispatcher already routes OPENAI providers
through the base_url-aware code path (introduced in #368) for STT and
TTS too.

Tests pass locally:

    pytest api/tests/test_user_configured_service_url_security.py
    23 passed in 4.80s   (19 existing + 4 new)

Co-authored-by: developer603 <developer603@users.noreply.github.com>
											
										
										
											2026-06-02 12:06:58 +05:30
+								            **kwargs,
-												Initial Commit 🚀 🚀

											
										
										
											2025-09-09 14:37:32 +05:30
+								        )
-												feat: add google stt and tts. add folders to organize agents

											
										
										
											2026-05-22 14:36:50 +05:30
+								    elif user_config.tts.provider == ServiceProviders.GOOGLE.value:
 								        model = getattr(user_config.tts, "model", None) or "chirp_3_hd"
 								        language = getattr(user_config.tts, "language", None) or "en-US"
 								        voice = getattr(user_config.tts, "voice", None) or "en-US-Chirp3-HD-Charon"
 								        speed = getattr(user_config.tts, "speed", None)
 								        location = getattr(user_config.tts, "location", None) or None
 								        credentials = getattr(user_config.tts, "credentials", None)
 								        settings_kwargs = {
 								            "model": model,
 								            "voice": voice,
 								            "language": language,
 								        }
 								        if speed is not None and speed != 1.0:
 								            settings_kwargs["speaking_rate"] = speed
 								        return GoogleTTSService(
 								            credentials=credentials,
 								            location=location,
 								            settings=GoogleTTSSettings(**settings_kwargs),
 								            text_filters=[xml_function_tag_filter],
 								            skip_aggregator_types=["recording_router", "recording"],
 								            silence_time_s=1.0,
 								        )
-												Initial Commit 🚀 🚀

											
										
										
											2025-09-09 14:37:32 +05:30
+								    elif user_config.tts.provider == ServiceProviders.ELEVENLABS.value:
-												feat: add voice selectors in elevenlabs (#88)


											
										
										
											2025-12-25 15:05:53 +05:30
+								        # Backward compatible with older configuration "Name - voice_id"
 								        try:
 								            voice_id = user_config.tts.voice.split(" - ")[1]
 								        except IndexError:
 								            voice_id = user_config.tts.voice
-												feat: configurable ElevenLabs base URL for Data Residency (#278)

* feat: configurable ElevenLabs base URL for Data Residency (#269)

Adds a `base_url` field to `ElevenlabsTTSConfiguration` so users on an
ElevenLabs Data Residency plan (EU, etc.) can point Dograh at the
regional endpoint instead of the hardcoded global one. Defaults to
`https://api.elevenlabs.io`, preserving existing behaviour. The
service factory rewrites the HTTP scheme to WSS when constructing the
WebSocket TTS service.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* fix: fix drift

---------

Co-authored-by: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
											
										
										
											2026-05-12 19:01:13 +05:30
+								        # ElevenLabs TTS uses WebSocket. Users configure base_url with an HTTP
 								        # scheme (matching ElevenLabs documentation, e.g.
 								        # https://api.eu.residency.elevenlabs.io); rewrite it to the WS scheme.
-												feat: allow overriding base URL of OpenAI models (#368)

* Add OpenAI-compatible API option in model configuration

Backend-only cherry-pick from 20617db37a8417e4ee4f64efb6063fc5cd4aea98.

* Potential fix for pull request finding

Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com>

* fix: harden the base url settings in SaaS mode

---------

Co-authored-by: Chris Briddock <briddockchristopher@gmail.com>
Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com>
											
										
										
											2026-05-27 13:07:45 +05:30
+								        _validate_runtime_service_url(user_config.tts.base_url, "base_url")
-												feat: configurable ElevenLabs base URL for Data Residency (#278)

* feat: configurable ElevenLabs base URL for Data Residency (#269)

Adds a `base_url` field to `ElevenlabsTTSConfiguration` so users on an
ElevenLabs Data Residency plan (EU, etc.) can point Dograh at the
regional endpoint instead of the hardcoded global one. Defaults to
`https://api.elevenlabs.io`, preserving existing behaviour. The
service factory rewrites the HTTP scheme to WSS when constructing the
WebSocket TTS service.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* fix: fix drift

---------

Co-authored-by: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
											
										
										
											2026-05-12 19:01:13 +05:30
+								        elevenlabs_url = user_config.tts.base_url.replace("https://", "wss://").replace(
 								            "http://", "ws://"
 								        )
-												Initial Commit 🚀 🚀

											
										
										
											2025-09-09 14:37:32 +05:30
+								        return ElevenLabsTTSService(
 								            reconnect_on_error=False,
 								            api_key=user_config.tts.api_key,
-												feat: configurable ElevenLabs base URL for Data Residency (#278)

* feat: configurable ElevenLabs base URL for Data Residency (#269)

Adds a `base_url` field to `ElevenlabsTTSConfiguration` so users on an
ElevenLabs Data Residency plan (EU, etc.) can point Dograh at the
regional endpoint instead of the hardcoded global one. Defaults to
`https://api.elevenlabs.io`, preserving existing behaviour. The
service factory rewrites the HTTP scheme to WSS when constructing the
WebSocket TTS service.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* fix: fix drift

---------

Co-authored-by: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
											
										
										
											2026-05-12 19:01:13 +05:30
+								            url=elevenlabs_url,
-												feat: add hybrid text + recording functionality in agents (#191)

* feat: add recording feature in agents

* chore: pin pipecat version

* feat: show usage in UI

* chore: update pipecat
											
										
										
											2026-03-16 15:04:08 +05:30
+								            settings=ElevenLabsTTSSettings(
 								                voice=voice_id,
 								                model=user_config.tts.model,
 								                stability=0.8,
 								                speed=user_config.tts.speed,
 								                similarity_boost=0.75,
-												Initial Commit 🚀 🚀

											
										
										
											2025-09-09 14:37:32 +05:30
+								            ),
-												fix: prevent pipeline freezes when sending endframe (#77)

* fix: dont cancel task if call is already ending

* Update pipecat
											
										
										
											2025-12-10 08:22:37 +07:00
+								            text_filters=[xml_function_tag_filter],
-												feat: allow recordings in tool transitions

											
										
										
											2026-04-10 16:18:01 +05:30
+								            skip_aggregator_types=["recording_router", "recording"],
-												feat: add hybrid text + recording functionality in agents (#191)

* feat: add recording feature in agents

* chore: pin pipecat version

* feat: show usage in UI

* chore: update pipecat
											
										
										
											2026-03-16 15:04:08 +05:30
+								            silence_time_s=1.0,
-												Initial Commit 🚀 🚀

											
										
										
											2025-09-09 14:37:32 +05:30
+								        )
-												feat: add cartesia tts

											
										
										
											2026-02-20 20:41:11 +05:30
+								    elif user_config.tts.provider == ServiceProviders.CARTESIA.value:
-												feat: add speed configuration for cartesia

											
										
										
											2026-03-23 21:51:16 +05:30
+								        speed = getattr(user_config.tts, "speed", None)
-												fix: send volume in cartesia

											
										
										
											2026-04-08 23:20:14 +05:30
+								        volume = getattr(user_config.tts, "volume", None)
 								        gen_config_kwargs = {}
 								        if speed and speed != 1.0:
 								            gen_config_kwargs["speed"] = speed
 								        if volume and volume != 1.0:
 								            gen_config_kwargs["volume"] = volume
-												feat: add support for self hosted llm models

											
										
										
											2026-03-24 17:50:45 +05:30
+								        generation_config = (
-												fix: send volume in cartesia

											
										
										
											2026-04-08 23:20:14 +05:30
+								            GenerationConfig(**gen_config_kwargs) if gen_config_kwargs else None
-												feat: add support for self hosted llm models

											
										
										
											2026-03-24 17:50:45 +05:30
+								        )
-												feat: add cartesia tts

											
										
										
											2026-02-20 20:41:11 +05:30
+								        return CartesiaTTSService(
 								            api_key=user_config.tts.api_key,
-												feat: add hybrid text + recording functionality in agents (#191)

* feat: add recording feature in agents

* chore: pin pipecat version

* feat: show usage in UI

* chore: update pipecat
											
										
										
											2026-03-16 15:04:08 +05:30
+								            settings=CartesiaTTSSettings(
 								                voice=user_config.tts.voice,
 								                model=user_config.tts.model,
-												feat: add support for self hosted llm models

											
										
										
											2026-03-24 17:50:45 +05:30
+								                **(
 								                    {"generation_config": generation_config}
 								                    if generation_config
 								                    else {}
 								                ),
-												feat: add hybrid text + recording functionality in agents (#191)

* feat: add recording feature in agents

* chore: pin pipecat version

* feat: show usage in UI

* chore: update pipecat
											
										
										
											2026-03-16 15:04:08 +05:30
+								            ),
-												feat: add cartesia tts

											
										
										
											2026-02-20 20:41:11 +05:30
+								            text_filters=[xml_function_tag_filter],
-												feat: allow recordings in tool transitions

											
										
										
											2026-04-10 16:18:01 +05:30
+								            skip_aggregator_types=["recording_router", "recording"],
-												feat: add hybrid text + recording functionality in agents (#191)

* feat: add recording feature in agents

* chore: pin pipecat version

* feat: show usage in UI

* chore: update pipecat
											
										
										
											2026-03-16 15:04:08 +05:30
+								            silence_time_s=1.0,
-												feat: add cartesia tts

											
										
										
											2026-02-20 20:41:11 +05:30
+								        )
-												Initial Commit 🚀 🚀

											
										
										
											2025-09-09 14:37:32 +05:30
+								    elif user_config.tts.provider == ServiceProviders.DOGRAH.value:
 								        # Convert HTTP URL to WebSocket URL for TTS
 								        base_url = MPS_API_URL.replace("http://", "ws://").replace("https://", "wss://")
 								        return DograhTTSService(
 								            base_url=base_url,
 								            api_key=user_config.tts.api_key,
-												feat: add hybrid text + recording functionality in agents (#191)

* feat: add recording feature in agents

* chore: pin pipecat version

* feat: show usage in UI

* chore: update pipecat
											
										
										
											2026-03-16 15:04:08 +05:30
+								            settings=DograhTTSSettings(
 								                model=user_config.tts.model,
 								                voice=user_config.tts.voice,
 								                speed=user_config.tts.speed,
 								            ),
-												fix: prevent pipeline freezes when sending endframe (#77)

* fix: dont cancel task if call is already ending

* Update pipecat
											
										
										
											2025-12-10 08:22:37 +07:00
+								            text_filters=[xml_function_tag_filter],
-												feat: allow recordings in tool transitions

											
										
										
											2026-04-10 16:18:01 +05:30
+								            skip_aggregator_types=["recording_router", "recording"],
-												feat: add hybrid text + recording functionality in agents (#191)

* feat: add recording feature in agents

* chore: pin pipecat version

* feat: show usage in UI

* chore: update pipecat
											
										
										
											2026-03-16 15:04:08 +05:30
+								            silence_time_s=1.0,
-												Initial Commit 🚀 🚀

											
										
										
											2025-09-09 14:37:32 +05:30
+								        )
-												feat: add CAMB AI TTS integration (#187)

Co-authored-by: Abhishek <abhishek@a6k.me>
											
										
										
											2026-03-24 15:24:07 +08:00
+								    elif user_config.tts.provider == ServiceProviders.CAMB.value:
 								        from pipecat.services.camb.tts import CambTTSService
 								        voice_id = int(getattr(user_config.tts, "voice", None) or "147320")
 								        language = getattr(user_config.tts, "language", None) or "en-us"
 								        tts = CambTTSService(
 								            api_key=user_config.tts.api_key,
 								            voice_id=voice_id,
 								            model=user_config.tts.model,
 								            text_filters=[xml_function_tag_filter],
-												feat: allow recordings in tool transitions

											
										
										
											2026-04-10 16:18:01 +05:30
+								            skip_aggregator_types=["recording_router", "recording"],
-												feat: add CAMB AI TTS integration (#187)

Co-authored-by: Abhishek <abhishek@a6k.me>
											
										
										
											2026-03-24 15:24:07 +08:00
+								        )
 								        # Set language directly as BCP-47 code (bypasses Language enum conversion)
 								        tts._settings.language = language
 								        return tts
-												feat: add gemini live and speaches integration (#220)

* feat: add speaches models

* feat: add gemini realtime and speaches integration

- Add gemini realtime support
- Add speaches support for locally hosted LLMs

* chore: bump pipecat

* feat: add language option

* fix: add skip aggregator types to tts settings

* fix: make API key optional for realtime
											
										
										
											2026-03-31 21:42:03 +05:30
+								    elif user_config.tts.provider == ServiceProviders.SPEACHES.value:
-												feat: allow overriding base URL of OpenAI models (#368)

* Add OpenAI-compatible API option in model configuration

Backend-only cherry-pick from 20617db37a8417e4ee4f64efb6063fc5cd4aea98.

* Potential fix for pull request finding

Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com>

* fix: harden the base url settings in SaaS mode

---------

Co-authored-by: Chris Briddock <briddockchristopher@gmail.com>
Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com>
											
										
										
											2026-05-27 13:07:45 +05:30
+								        _validate_runtime_service_url(user_config.tts.base_url, "base_url")
-												feat: add gemini live and speaches integration (#220)

* feat: add speaches models

* feat: add gemini realtime and speaches integration

- Add gemini realtime support
- Add speaches support for locally hosted LLMs

* chore: bump pipecat

* feat: add language option

* fix: add skip aggregator types to tts settings

* fix: make API key optional for realtime
											
										
										
											2026-03-31 21:42:03 +05:30
+								        return SpeachesTTSService(
 								            base_url=user_config.tts.base_url,
 								            api_key=user_config.tts.api_key or "none",
 								            settings=SpeachesTTSSettings(
 								                model=user_config.tts.model,
 								                voice=user_config.tts.voice,
 								                speed=user_config.tts.speed,
 								            ),
 								            text_filters=[xml_function_tag_filter],
-												feat: allow recordings in tool transitions

											
										
										
											2026-04-10 16:18:01 +05:30
+								            skip_aggregator_types=["recording_router", "recording"],
-												feat: add gemini live and speaches integration (#220)

* feat: add speaches models

* feat: add gemini realtime and speaches integration

- Add gemini realtime support
- Add speaches support for locally hosted LLMs

* chore: bump pipecat

* feat: add language option

* fix: add skip aggregator types to tts settings

* fix: make API key optional for realtime
											
										
										
											2026-03-31 21:42:03 +05:30
+								            silence_time_s=1.0,
 								        )
-												feat: add Rime TTS

											
										
										
											2026-04-07 14:05:47 +05:30
+								    elif user_config.tts.provider == ServiceProviders.RIME.value:
 								        speed = getattr(user_config.tts, "speed", None)
-												chore: add language option for Rime

											
										
										
											2026-04-07 18:32:09 +05:30
+								        language_code = getattr(user_config.tts, "language", None) or "en"
 								        rime_language_mapping = {
 								            "en": Language.EN,
 								            "de": Language.DE,
 								            "fr": Language.FR,
 								            "es": Language.ES,
 								            "hi": Language.HI,
 								        }
 								        pipecat_language = rime_language_mapping.get(language_code, Language.EN)
-												feat: add Rime TTS

											
										
										
											2026-04-07 14:05:47 +05:30
+								        settings_kwargs = {
 								            "voice": user_config.tts.voice,
 								            "model": user_config.tts.model,
-												chore: add language option for Rime

											
										
										
											2026-04-07 18:32:09 +05:30
+								            "language": pipecat_language,
-												feat: add Rime TTS

											
										
										
											2026-04-07 14:05:47 +05:30
+								        }
 								        if speed and speed != 1.0:
 								            settings_kwargs["speedAlpha"] = speed
 								        return RimeTTSService(
 								            api_key=user_config.tts.api_key,
 								            settings=RimeTTSSettings(**settings_kwargs),
 								            text_filters=[xml_function_tag_filter],
-												feat: allow recordings in tool transitions

											
										
										
											2026-04-10 16:18:01 +05:30
+								            skip_aggregator_types=["recording_router", "recording"],
-												feat: add Rime TTS

											
										
										
											2026-04-07 14:05:47 +05:30
+								            silence_time_s=1.0,
 								        )
-												feat: add voice selectors in elevenlabs (#88)


											
										
										
											2025-12-25 15:05:53 +05:30
+								    elif user_config.tts.provider == ServiceProviders.SARVAM.value:
 								        # Map Sarvam language code to pipecat Language enum for TTS
 								        language_mapping = {
 								            "bn-IN": Language.BN,
 								            "en-IN": Language.EN,
 								            "gu-IN": Language.GU,
 								            "hi-IN": Language.HI,
 								            "kn-IN": Language.KN,
 								            "ml-IN": Language.ML,
 								            "mr-IN": Language.MR,
 								            "od-IN": Language.OR,
 								            "pa-IN": Language.PA,
 								            "ta-IN": Language.TA,
 								            "te-IN": Language.TE,
 								        }
 								        language = getattr(user_config.tts, "language", None)
-												fix: change type definition from enum to str for consistency

											
										
										
											2025-12-26 16:00:02 +05:30
+								        pipecat_language = language_mapping.get(language, Language.HI)
-												feat: add voice selectors in elevenlabs (#88)


											
										
										
											2025-12-25 15:05:53 +05:30
-												fix: change type definition from enum to str for consistency

											
										
										
											2025-12-26 16:00:02 +05:30
+								        voice = getattr(user_config.tts, "voice", None) or "anushka"
-												feat: add voice selectors in elevenlabs (#88)


											
										
										
											2025-12-25 15:05:53 +05:30
+								        return SarvamTTSService(
 								            api_key=user_config.tts.api_key,
-												feat: add hybrid text + recording functionality in agents (#191)

* feat: add recording feature in agents

* chore: pin pipecat version

* feat: show usage in UI

* chore: update pipecat
											
										
										
											2026-03-16 15:04:08 +05:30
+								            settings=SarvamTTSSettings(
 								                model=user_config.tts.model,
 								                voice=voice,
 								                language=pipecat_language,
 								            ),
-												feat: add voice selectors in elevenlabs (#88)


											
										
										
											2025-12-25 15:05:53 +05:30
+								            text_filters=[xml_function_tag_filter],
-												feat: allow recordings in tool transitions

											
										
										
											2026-04-10 16:18:01 +05:30
+								            skip_aggregator_types=["recording_router", "recording"],
-												feat: add hybrid text + recording functionality in agents (#191)

* feat: add recording feature in agents

* chore: pin pipecat version

* feat: show usage in UI

* chore: update pipecat
											
										
										
											2026-03-16 15:04:08 +05:30
+								            silence_time_s=1.0,
-												feat: add voice selectors in elevenlabs (#88)


											
										
										
											2025-12-25 15:05:53 +05:30
+								        )
-												feat: add MiniMax provider support (Chat + TTS) (#309)

* feat: add MiniMax provider support (Chat + TTS)

- Add MiniMax LLM provider using OpenAI-compatible API
  - Models: MiniMax-M2.7, MiniMax-M2.7-highspeed
  - Default base URL: https://api.minimax.io/v1
  - Uses MINIMAX_API_KEY for authentication
- Add MiniMax TTS provider using Pipecat's MiniMaxHttpTTSService
  - Models: speech-2.8-hd (default), speech-2.8-turbo
  - 6 built-in voices
  - Requires group_id configuration
- Add unit tests for both providers

* fix(minimax): validator, temperature, session cleanup, reasoning filter
  - check_validity.py: wire MiniMax into _validator_map and enforce
    group_id at save time. Without this, saving a config with a valid
    key was rejected.
  - registry.py: surface temperature on the LLM config (gt=0; MiniMax
    rejects 0) and base_url on the TTS config
  - service_factory.py:
    * Plumb temperature through create_llm_service
    * Normalize TTS base_url to include /t2a_v2 — pipecat appends only
      ?GroupId=... to the URL.
    * Use the new MiniMaxLLMService (from pipecat) to strip
      <think>...</think> reasoning that MiniMax-M2.7 emits inline in
      delta.content (otherwise it leaks straight to TTS).
    * Use MiniMaxOwnedSessionTTSService so the per-instance aiohttp
      session gets closed in cleanup() instead of leaking sockets/FDs.
  - minimax_tts.py: small wrapper around MiniMaxHttpTTSService that owns
    the session it was handed (pipecat's caller-owns-session API
    conflicts with the ftory's per-instance pattern).
  - pipecat submodule: bumps to a commit that adds MiniMaxLLMService — a
    thin OpenAILLMService subclass with the streaming <think> filter
    (mirrors NvidiaLLMService's pattern for NIM reasoning models).
  - Tests updated/added for all of the above.

  Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

---------

Co-authored-by: octo-patch <octo-patch@github.com>
Co-authored-by: Sabiha Khan <sabihak89@gmail.com>
											
										
										
											2026-05-22 15:39:41 +08:00
+								    elif user_config.tts.provider == ServiceProviders.MINIMAX.value:
 								        group_id = getattr(user_config.tts, "group_id", None)
 								        if not group_id:
 								            raise HTTPException(
 								                status_code=400,
 								                detail="MiniMax TTS requires a group_id. Configure it in your TTS settings.",
 								            )
 								        voice = getattr(user_config.tts, "voice", None) or "English_Graceful_Lady"
 								        speed = getattr(user_config.tts, "speed", None) or 1.0
 								        # Pipecat appends "?GroupId=..." to base_url as-is, so /t2a_v2 must
 								        # already be in the path.
 								        base_url = (
 								            getattr(user_config.tts, "base_url", None)
 								            or "https://api.minimax.io/v1/t2a_v2"
 								        ).rstrip("/")
 								        if not base_url.endswith("/t2a_v2"):
 								            base_url = f"{base_url}/t2a_v2"
-												feat: allow overriding base URL of OpenAI models (#368)

* Add OpenAI-compatible API option in model configuration

Backend-only cherry-pick from 20617db37a8417e4ee4f64efb6063fc5cd4aea98.

* Potential fix for pull request finding

Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com>

* fix: harden the base url settings in SaaS mode

---------

Co-authored-by: Chris Briddock <briddockchristopher@gmail.com>
Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com>
											
										
										
											2026-05-27 13:07:45 +05:30
+								        _validate_runtime_service_url(base_url, "base_url")
-												feat: add MiniMax provider support (Chat + TTS) (#309)

* feat: add MiniMax provider support (Chat + TTS)

- Add MiniMax LLM provider using OpenAI-compatible API
  - Models: MiniMax-M2.7, MiniMax-M2.7-highspeed
  - Default base URL: https://api.minimax.io/v1
  - Uses MINIMAX_API_KEY for authentication
- Add MiniMax TTS provider using Pipecat's MiniMaxHttpTTSService
  - Models: speech-2.8-hd (default), speech-2.8-turbo
  - 6 built-in voices
  - Requires group_id configuration
- Add unit tests for both providers

* fix(minimax): validator, temperature, session cleanup, reasoning filter
  - check_validity.py: wire MiniMax into _validator_map and enforce
    group_id at save time. Without this, saving a config with a valid
    key was rejected.
  - registry.py: surface temperature on the LLM config (gt=0; MiniMax
    rejects 0) and base_url on the TTS config
  - service_factory.py:
    * Plumb temperature through create_llm_service
    * Normalize TTS base_url to include /t2a_v2 — pipecat appends only
      ?GroupId=... to the URL.
    * Use the new MiniMaxLLMService (from pipecat) to strip
      <think>...</think> reasoning that MiniMax-M2.7 emits inline in
      delta.content (otherwise it leaks straight to TTS).
    * Use MiniMaxOwnedSessionTTSService so the per-instance aiohttp
      session gets closed in cleanup() instead of leaking sockets/FDs.
  - minimax_tts.py: small wrapper around MiniMaxHttpTTSService that owns
    the session it was handed (pipecat's caller-owns-session API
    conflicts with the ftory's per-instance pattern).
  - pipecat submodule: bumps to a commit that adds MiniMaxLLMService — a
    thin OpenAILLMService subclass with the streaming <think> filter
    (mirrors NvidiaLLMService's pattern for NIM reasoning models).
  - Tests updated/added for all of the above.

  Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

---------

Co-authored-by: octo-patch <octo-patch@github.com>
Co-authored-by: Sabiha Khan <sabihak89@gmail.com>
											
										
										
											2026-05-22 15:39:41 +08:00
 								        session = aiohttp.ClientSession()
 								        return MiniMaxOwnedSessionTTSService(
 								            api_key=user_config.tts.api_key,
 								            group_id=group_id,
 								            base_url=base_url,
 								            aiohttp_session=session,
 								            settings=MiniMaxTTSSettings(
 								                model=user_config.tts.model,
 								                voice=voice,
 								                speed=speed,
 								            ),
 								            text_filters=[xml_function_tag_filter],
 								            skip_aggregator_types=["recording_router", "recording"],
 								            silence_time_s=1.0,
 								        )
-												feat: add Azure AI multi-provider support (TTS, STT, Embeddings, Realtime) (#381)

* feat: add Azure AI multi-provider support (TTS, STT, Embeddings, Realtime)

Enables Azure AI services across all model layers so users with Azure
credits can consolidate billing on a single provider.

- Voice (TTS): AzureSpeechTTSConfiguration via azure_speech provider
- Transcriber (STT): AzureSpeechSTTConfiguration via azure_speech provider
- Embedding: AzureOpenAIEmbeddingsConfiguration via azure provider
- Realtime: AzureRealtimeLLMConfiguration via azure_realtime provider

New files:
- api/services/pipecat/realtime/azure_realtime.py
- api/services/gen_ai/embedding/azure_openai_service.py
- api/tests/test_azure_speech_service_factory.py

The UI picks up all four providers automatically from the schema —
no frontend changes required.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

* fix: add validation for URL and params

---------

Co-authored-by: Vishal Dhateria <vishal@finela.ai>
Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com>
Co-authored-by: Abhishek Kumar <abhishek@a6k.me>
											
										
										
											2026-06-02 12:50:00 +05:30
+								    elif user_config.tts.provider == ServiceProviders.AZURE_SPEECH.value:
 								        region = getattr(user_config.tts, "region", None) or "eastus"
 								        voice = getattr(user_config.tts, "voice", None) or "en-US-AriaNeural"
 								        language = getattr(user_config.tts, "language", None) or "en-US"
 								        speed = getattr(user_config.tts, "speed", None) or 1.0
 								        # Map speed multiplier (0.5–2.0) to Azure SSML rate string (e.g. "1.25")
 								        rate = str(speed) if speed != 1.0 else None
 								        settings_kwargs: dict = {
 								            "voice": voice,
 								            "language": language,
 								        }
 								        if rate:
 								            settings_kwargs["rate"] = rate
 								        return AzureTTSService(
 								            api_key=user_config.tts.api_key,
 								            region=region,
 								            settings=AzureTTSSettings(**settings_kwargs),
 								            text_filters=[xml_function_tag_filter],
 								            skip_aggregator_types=["recording_router", "recording"],
 								            silence_time_s=1.0,
 								        )
-												Initial Commit 🚀 🚀

											
										
										
											2025-09-09 14:37:32 +05:30
+								    else:
 								        raise HTTPException(
 								            status_code=400, detail=f"Invalid TTS provider {user_config.tts.provider}"
 								        )
-												feat: add AWS Bedrock support

											
										
										
											2026-03-19 15:06:59 +05:30
+								def create_llm_service_from_provider(
 								    provider: str,
 								    model: str,
-												feat: add xai grok as realtime model

											
										
										
											2026-05-22 18:04:59 +05:30
+								    api_key: str | None,
-												feat: add AWS Bedrock support

											
										
										
											2026-03-19 15:06:59 +05:30
+								    *,
 								    base_url: str | None = None,
 								    endpoint: str | None = None,
 								    aws_access_key: str | None = None,
 								    aws_secret_key: str | None = None,
 								    aws_region: str | None = None,
-												feat: add xai grok as realtime model

											
										
										
											2026-05-22 18:04:59 +05:30
+								    project_id: str | None = None,
 								    location: str | None = None,
 								    credentials: str | None = None,
-												feat: add MiniMax provider support (Chat + TTS) (#309)

* feat: add MiniMax provider support (Chat + TTS)

- Add MiniMax LLM provider using OpenAI-compatible API
  - Models: MiniMax-M2.7, MiniMax-M2.7-highspeed
  - Default base URL: https://api.minimax.io/v1
  - Uses MINIMAX_API_KEY for authentication
- Add MiniMax TTS provider using Pipecat's MiniMaxHttpTTSService
  - Models: speech-2.8-hd (default), speech-2.8-turbo
  - 6 built-in voices
  - Requires group_id configuration
- Add unit tests for both providers

* fix(minimax): validator, temperature, session cleanup, reasoning filter
  - check_validity.py: wire MiniMax into _validator_map and enforce
    group_id at save time. Without this, saving a config with a valid
    key was rejected.
  - registry.py: surface temperature on the LLM config (gt=0; MiniMax
    rejects 0) and base_url on the TTS config
  - service_factory.py:
    * Plumb temperature through create_llm_service
    * Normalize TTS base_url to include /t2a_v2 — pipecat appends only
      ?GroupId=... to the URL.
    * Use the new MiniMaxLLMService (from pipecat) to strip
      <think>...</think> reasoning that MiniMax-M2.7 emits inline in
      delta.content (otherwise it leaks straight to TTS).
    * Use MiniMaxOwnedSessionTTSService so the per-instance aiohttp
      session gets closed in cleanup() instead of leaking sockets/FDs.
  - minimax_tts.py: small wrapper around MiniMaxHttpTTSService that owns
    the session it was handed (pipecat's caller-owns-session API
    conflicts with the ftory's per-instance pattern).
  - pipecat submodule: bumps to a commit that adds MiniMaxLLMService — a
    thin OpenAILLMService subclass with the streaming <think> filter
    (mirrors NvidiaLLMService's pattern for NIM reasoning models).
  - Tests updated/added for all of the above.

  Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

---------

Co-authored-by: octo-patch <octo-patch@github.com>
Co-authored-by: Sabiha Khan <sabihak89@gmail.com>
											
										
										
											2026-05-22 15:39:41 +08:00
+								    temperature: float | None = None,
-												feat: add AWS Bedrock support

											
										
										
											2026-03-19 15:06:59 +05:30
+								):
 								    """Create an LLM service from explicit provider/model/api_key.
 								    Also used by create_llm_service which extracts these from user_config.
 								    """
 								    logger.info(f"Creating LLM service: provider={provider}, model={model}")
 								    if provider == ServiceProviders.OPENAI.value:
-												feat: allow overriding base URL of OpenAI models (#368)

* Add OpenAI-compatible API option in model configuration

Backend-only cherry-pick from 20617db37a8417e4ee4f64efb6063fc5cd4aea98.

* Potential fix for pull request finding

Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com>

* fix: harden the base url settings in SaaS mode

---------

Co-authored-by: Chris Briddock <briddockchristopher@gmail.com>
Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com>
											
										
										
											2026-05-27 13:07:45 +05:30
+								        kwargs = {}
 								        if base_url:
 								            _validate_runtime_service_url(base_url, "base_url")
 								            kwargs["base_url"] = base_url
-												fix: change type definition from enum to str for consistency

											
										
										
											2025-12-26 16:00:02 +05:30
+								        if "gpt-5" in model:
-												Initial Commit 🚀 🚀

											
										
										
											2025-09-09 14:37:32 +05:30
+								            return OpenAILLMService(
-												feat: add AWS Bedrock support

											
										
										
											2026-03-19 15:06:59 +05:30
+								                api_key=api_key,
-												feat: add hybrid text + recording functionality in agents (#191)

* feat: add recording feature in agents

* chore: pin pipecat version

* feat: show usage in UI

* chore: update pipecat
											
										
										
											2026-03-16 15:04:08 +05:30
+								                settings=OpenAILLMSettings(
 								                    model=model,
 								                    extra={"reasoning_effort": "minimal", "verbosity": "low"},
-												Initial Commit 🚀 🚀

											
										
										
											2025-09-09 14:37:32 +05:30
+								                ),
-												feat: allow overriding base URL of OpenAI models (#368)

* Add OpenAI-compatible API option in model configuration

Backend-only cherry-pick from 20617db37a8417e4ee4f64efb6063fc5cd4aea98.

* Potential fix for pull request finding

Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com>

* fix: harden the base url settings in SaaS mode

---------

Co-authored-by: Chris Briddock <briddockchristopher@gmail.com>
Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com>
											
										
										
											2026-05-27 13:07:45 +05:30
+								                **kwargs,
-												Initial Commit 🚀 🚀

											
										
										
											2025-09-09 14:37:32 +05:30
+								            )
-												feat: add AWS Bedrock support

											
										
										
											2026-03-19 15:06:59 +05:30
+								        return OpenAILLMService(
 								            api_key=api_key,
 								            settings=OpenAILLMSettings(model=model, temperature=0.1),
-												feat: allow overriding base URL of OpenAI models (#368)

* Add OpenAI-compatible API option in model configuration

Backend-only cherry-pick from 20617db37a8417e4ee4f64efb6063fc5cd4aea98.

* Potential fix for pull request finding

Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com>

* fix: harden the base url settings in SaaS mode

---------

Co-authored-by: Chris Briddock <briddockchristopher@gmail.com>
Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com>
											
										
										
											2026-05-27 13:07:45 +05:30
+								            **kwargs,
-												Initial Commit 🚀 🚀

											
										
										
											2025-09-09 14:37:32 +05:30
+								        )
-												feat: add AWS Bedrock support

											
										
										
											2026-03-19 15:06:59 +05:30
+								    elif provider == ServiceProviders.GROQ.value:
-												Initial Commit 🚀 🚀

											
										
										
											2025-09-09 14:37:32 +05:30
+								        return GroqLLMService(
-												feat: add AWS Bedrock support

											
										
										
											2026-03-19 15:06:59 +05:30
+								            api_key=api_key,
-												feat: add hybrid text + recording functionality in agents (#191)

* feat: add recording feature in agents

* chore: pin pipecat version

* feat: show usage in UI

* chore: update pipecat
											
										
										
											2026-03-16 15:04:08 +05:30
+								            settings=GroqLLMSettings(model=model, temperature=0.1),
-												Initial Commit 🚀 🚀

											
										
										
											2025-09-09 14:37:32 +05:30
+								        )
-												feat: add AWS Bedrock support

											
										
										
											2026-03-19 15:06:59 +05:30
+								    elif provider == ServiceProviders.OPENROUTER.value:
 								        kwargs = {}
 								        if base_url:
-												feat: allow overriding base URL of OpenAI models (#368)

* Add OpenAI-compatible API option in model configuration

Backend-only cherry-pick from 20617db37a8417e4ee4f64efb6063fc5cd4aea98.

* Potential fix for pull request finding

Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com>

* fix: harden the base url settings in SaaS mode

---------

Co-authored-by: Chris Briddock <briddockchristopher@gmail.com>
Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com>
											
										
										
											2026-05-27 13:07:45 +05:30
+								            _validate_runtime_service_url(base_url, "base_url")
-												feat: add AWS Bedrock support

											
										
										
											2026-03-19 15:06:59 +05:30
+								            kwargs["base_url"] = base_url
-												feat: add openrouter support

											
										
										
											2026-02-09 13:31:32 +05:30
+								        return OpenRouterLLMService(
-												feat: add AWS Bedrock support

											
										
										
											2026-03-19 15:06:59 +05:30
+								            api_key=api_key,
-												feat: add hybrid text + recording functionality in agents (#191)

* feat: add recording feature in agents

* chore: pin pipecat version

* feat: show usage in UI

* chore: update pipecat
											
										
										
											2026-03-16 15:04:08 +05:30
+								            settings=OpenRouterLLMSettings(model=model, temperature=0.1),
-												feat: add AWS Bedrock support

											
										
										
											2026-03-19 15:06:59 +05:30
+								            **kwargs,
-												feat: add openrouter support

											
										
										
											2026-02-09 13:31:32 +05:30
+								        )
-												feat: add AWS Bedrock support

											
										
										
											2026-03-19 15:06:59 +05:30
+								    elif provider == ServiceProviders.GOOGLE.value:
-												Initial Commit 🚀 🚀

											
										
										
											2025-09-09 14:37:32 +05:30
+								        return GoogleLLMService(
-												feat: add AWS Bedrock support

											
										
										
											2026-03-19 15:06:59 +05:30
+								            api_key=api_key,
-												feat: add hybrid text + recording functionality in agents (#191)

* feat: add recording feature in agents

* chore: pin pipecat version

* feat: show usage in UI

* chore: update pipecat
											
										
										
											2026-03-16 15:04:08 +05:30
+								            settings=GoogleLLMSettings(model=model, temperature=0.1),
-												Initial Commit 🚀 🚀

											
										
										
											2025-09-09 14:37:32 +05:30
+								        )
-												feat: add xai grok as realtime model

											
										
										
											2026-05-22 18:04:59 +05:30
+								    elif provider == ServiceProviders.GOOGLE_VERTEX.value:
 								        return GoogleVertexLLMService(
 								            credentials=credentials,
 								            project_id=project_id,
 								            location=location or "us-east4",
 								            settings=GoogleVertexLLMSettings(model=model, temperature=0.1),
 								        )
-												feat: add AWS Bedrock support

											
										
										
											2026-03-19 15:06:59 +05:30
+								    elif provider == ServiceProviders.AZURE.value:
-												feat: allow overriding base URL of OpenAI models (#368)

* Add OpenAI-compatible API option in model configuration

Backend-only cherry-pick from 20617db37a8417e4ee4f64efb6063fc5cd4aea98.

* Potential fix for pull request finding

Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com>

* fix: harden the base url settings in SaaS mode

---------

Co-authored-by: Chris Briddock <briddockchristopher@gmail.com>
Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com>
											
										
										
											2026-05-27 13:07:45 +05:30
+								        if endpoint:
 								            _validate_runtime_service_url(endpoint, "endpoint")
-												Initial Commit 🚀 🚀

											
										
										
											2025-09-09 14:37:32 +05:30
+								        return AzureLLMService(
-												feat: add AWS Bedrock support

											
										
										
											2026-03-19 15:06:59 +05:30
+								            api_key=api_key,
 								            endpoint=endpoint,
-												feat: add hybrid text + recording functionality in agents (#191)

* feat: add recording feature in agents

* chore: pin pipecat version

* feat: show usage in UI

* chore: update pipecat
											
										
										
											2026-03-16 15:04:08 +05:30
+								            settings=AzureLLMSettings(model=model, temperature=0.1),
-												Initial Commit 🚀 🚀

											
										
										
											2025-09-09 14:37:32 +05:30
+								        )
-												feat: add AWS Bedrock support

											
										
										
											2026-03-19 15:06:59 +05:30
+								    elif provider == ServiceProviders.DOGRAH.value:
-												Initial Commit 🚀 🚀

											
										
										
											2025-09-09 14:37:32 +05:30
+								        return DograhLLMService(
 								            base_url=f"{MPS_API_URL}/api/v1/llm",
-												feat: add AWS Bedrock support

											
										
										
											2026-03-19 15:06:59 +05:30
+								            api_key=api_key,
-												feat: add hybrid text + recording functionality in agents (#191)

* feat: add recording feature in agents

* chore: pin pipecat version

* feat: show usage in UI

* chore: update pipecat
											
										
										
											2026-03-16 15:04:08 +05:30
+								            settings=OpenAILLMSettings(model=model),
-												Initial Commit 🚀 🚀

											
										
										
											2025-09-09 14:37:32 +05:30
+								        )
-												feat: add AWS Bedrock support

											
										
										
											2026-03-19 15:06:59 +05:30
+								    elif provider == ServiceProviders.AWS_BEDROCK.value:
 								        return AWSBedrockLLMService(
 								            aws_access_key=aws_access_key,
 								            aws_secret_key=aws_secret_key,
 								            aws_region=aws_region,
 								            settings=AWSBedrockLLMSettings(model=model),
 								        )
-												feat: add gemini live and speaches integration (#220)

* feat: add speaches models

* feat: add gemini realtime and speaches integration

- Add gemini realtime support
- Add speaches support for locally hosted LLMs

* chore: bump pipecat

* feat: add language option

* fix: add skip aggregator types to tts settings

* fix: make API key optional for realtime
											
										
										
											2026-03-31 21:42:03 +05:30
+								    elif provider == ServiceProviders.SPEACHES.value:
-												feat: allow overriding base URL of OpenAI models (#368)

* Add OpenAI-compatible API option in model configuration

Backend-only cherry-pick from 20617db37a8417e4ee4f64efb6063fc5cd4aea98.

* Potential fix for pull request finding

Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com>

* fix: harden the base url settings in SaaS mode

---------

Co-authored-by: Chris Briddock <briddockchristopher@gmail.com>
Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com>
											
										
										
											2026-05-27 13:07:45 +05:30
+								        base_url = base_url or "http://localhost:11434/v1"
 								        _validate_runtime_service_url(base_url, "base_url")
-												feat: add gemini live and speaches integration (#220)

* feat: add speaches models

* feat: add gemini realtime and speaches integration

- Add gemini realtime support
- Add speaches support for locally hosted LLMs

* chore: bump pipecat

* feat: add language option

* fix: add skip aggregator types to tts settings

* fix: make API key optional for realtime
											
										
										
											2026-03-31 21:42:03 +05:30
+								        return SpeachesLLMService(
-												feat: allow overriding base URL of OpenAI models (#368)

* Add OpenAI-compatible API option in model configuration

Backend-only cherry-pick from 20617db37a8417e4ee4f64efb6063fc5cd4aea98.

* Potential fix for pull request finding

Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com>

* fix: harden the base url settings in SaaS mode

---------

Co-authored-by: Chris Briddock <briddockchristopher@gmail.com>
Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com>
											
										
										
											2026-05-27 13:07:45 +05:30
+								            base_url=base_url,
-												feat: add support for self hosted llm models

											
										
										
											2026-03-24 17:50:45 +05:30
+								            api_key=api_key or "none",
-												feat: add gemini live and speaches integration (#220)

* feat: add speaches models

* feat: add gemini realtime and speaches integration

- Add gemini realtime support
- Add speaches support for locally hosted LLMs

* chore: bump pipecat

* feat: add language option

* fix: add skip aggregator types to tts settings

* fix: make API key optional for realtime
											
										
										
											2026-03-31 21:42:03 +05:30
+								            settings=SpeachesLLMSettings(model=model),
-												feat: add support for self hosted llm models

											
										
										
											2026-03-24 17:50:45 +05:30
+								        )
-												feat: add MiniMax provider support (Chat + TTS) (#309)

* feat: add MiniMax provider support (Chat + TTS)

- Add MiniMax LLM provider using OpenAI-compatible API
  - Models: MiniMax-M2.7, MiniMax-M2.7-highspeed
  - Default base URL: https://api.minimax.io/v1
  - Uses MINIMAX_API_KEY for authentication
- Add MiniMax TTS provider using Pipecat's MiniMaxHttpTTSService
  - Models: speech-2.8-hd (default), speech-2.8-turbo
  - 6 built-in voices
  - Requires group_id configuration
- Add unit tests for both providers

* fix(minimax): validator, temperature, session cleanup, reasoning filter
  - check_validity.py: wire MiniMax into _validator_map and enforce
    group_id at save time. Without this, saving a config with a valid
    key was rejected.
  - registry.py: surface temperature on the LLM config (gt=0; MiniMax
    rejects 0) and base_url on the TTS config
  - service_factory.py:
    * Plumb temperature through create_llm_service
    * Normalize TTS base_url to include /t2a_v2 — pipecat appends only
      ?GroupId=... to the URL.
    * Use the new MiniMaxLLMService (from pipecat) to strip
      <think>...</think> reasoning that MiniMax-M2.7 emits inline in
      delta.content (otherwise it leaks straight to TTS).
    * Use MiniMaxOwnedSessionTTSService so the per-instance aiohttp
      session gets closed in cleanup() instead of leaking sockets/FDs.
  - minimax_tts.py: small wrapper around MiniMaxHttpTTSService that owns
    the session it was handed (pipecat's caller-owns-session API
    conflicts with the ftory's per-instance pattern).
  - pipecat submodule: bumps to a commit that adds MiniMaxLLMService — a
    thin OpenAILLMService subclass with the streaming <think> filter
    (mirrors NvidiaLLMService's pattern for NIM reasoning models).
  - Tests updated/added for all of the above.

  Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

---------

Co-authored-by: octo-patch <octo-patch@github.com>
Co-authored-by: Sabiha Khan <sabihak89@gmail.com>
											
										
										
											2026-05-22 15:39:41 +08:00
+								    elif provider == ServiceProviders.MINIMAX.value:
-												feat: allow overriding base URL of OpenAI models (#368)

* Add OpenAI-compatible API option in model configuration

Backend-only cherry-pick from 20617db37a8417e4ee4f64efb6063fc5cd4aea98.

* Potential fix for pull request finding

Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com>

* fix: harden the base url settings in SaaS mode

---------

Co-authored-by: Chris Briddock <briddockchristopher@gmail.com>
Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com>
											
										
										
											2026-05-27 13:07:45 +05:30
+								        base_url = base_url or "https://api.minimax.io/v1"
 								        _validate_runtime_service_url(base_url, "base_url")
-												feat: add MiniMax provider support (Chat + TTS) (#309)

* feat: add MiniMax provider support (Chat + TTS)

- Add MiniMax LLM provider using OpenAI-compatible API
  - Models: MiniMax-M2.7, MiniMax-M2.7-highspeed
  - Default base URL: https://api.minimax.io/v1
  - Uses MINIMAX_API_KEY for authentication
- Add MiniMax TTS provider using Pipecat's MiniMaxHttpTTSService
  - Models: speech-2.8-hd (default), speech-2.8-turbo
  - 6 built-in voices
  - Requires group_id configuration
- Add unit tests for both providers

* fix(minimax): validator, temperature, session cleanup, reasoning filter
  - check_validity.py: wire MiniMax into _validator_map and enforce
    group_id at save time. Without this, saving a config with a valid
    key was rejected.
  - registry.py: surface temperature on the LLM config (gt=0; MiniMax
    rejects 0) and base_url on the TTS config
  - service_factory.py:
    * Plumb temperature through create_llm_service
    * Normalize TTS base_url to include /t2a_v2 — pipecat appends only
      ?GroupId=... to the URL.
    * Use the new MiniMaxLLMService (from pipecat) to strip
      <think>...</think> reasoning that MiniMax-M2.7 emits inline in
      delta.content (otherwise it leaks straight to TTS).
    * Use MiniMaxOwnedSessionTTSService so the per-instance aiohttp
      session gets closed in cleanup() instead of leaking sockets/FDs.
  - minimax_tts.py: small wrapper around MiniMaxHttpTTSService that owns
    the session it was handed (pipecat's caller-owns-session API
    conflicts with the ftory's per-instance pattern).
  - pipecat submodule: bumps to a commit that adds MiniMaxLLMService — a
    thin OpenAILLMService subclass with the streaming <think> filter
    (mirrors NvidiaLLMService's pattern for NIM reasoning models).
  - Tests updated/added for all of the above.

  Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

---------

Co-authored-by: octo-patch <octo-patch@github.com>
Co-authored-by: Sabiha Khan <sabihak89@gmail.com>
											
										
										
											2026-05-22 15:39:41 +08:00
+								        return MiniMaxLLMService(
 								            api_key=api_key,
-												feat: allow overriding base URL of OpenAI models (#368)

* Add OpenAI-compatible API option in model configuration

Backend-only cherry-pick from 20617db37a8417e4ee4f64efb6063fc5cd4aea98.

* Potential fix for pull request finding

Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com>

* fix: harden the base url settings in SaaS mode

---------

Co-authored-by: Chris Briddock <briddockchristopher@gmail.com>
Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com>
											
										
										
											2026-05-27 13:07:45 +05:30
+								            base_url=base_url,
-												feat: add MiniMax provider support (Chat + TTS) (#309)

* feat: add MiniMax provider support (Chat + TTS)

- Add MiniMax LLM provider using OpenAI-compatible API
  - Models: MiniMax-M2.7, MiniMax-M2.7-highspeed
  - Default base URL: https://api.minimax.io/v1
  - Uses MINIMAX_API_KEY for authentication
- Add MiniMax TTS provider using Pipecat's MiniMaxHttpTTSService
  - Models: speech-2.8-hd (default), speech-2.8-turbo
  - 6 built-in voices
  - Requires group_id configuration
- Add unit tests for both providers

* fix(minimax): validator, temperature, session cleanup, reasoning filter
  - check_validity.py: wire MiniMax into _validator_map and enforce
    group_id at save time. Without this, saving a config with a valid
    key was rejected.
  - registry.py: surface temperature on the LLM config (gt=0; MiniMax
    rejects 0) and base_url on the TTS config
  - service_factory.py:
    * Plumb temperature through create_llm_service
    * Normalize TTS base_url to include /t2a_v2 — pipecat appends only
      ?GroupId=... to the URL.
    * Use the new MiniMaxLLMService (from pipecat) to strip
      <think>...</think> reasoning that MiniMax-M2.7 emits inline in
      delta.content (otherwise it leaks straight to TTS).
    * Use MiniMaxOwnedSessionTTSService so the per-instance aiohttp
      session gets closed in cleanup() instead of leaking sockets/FDs.
  - minimax_tts.py: small wrapper around MiniMaxHttpTTSService that owns
    the session it was handed (pipecat's caller-owns-session API
    conflicts with the ftory's per-instance pattern).
  - pipecat submodule: bumps to a commit that adds MiniMaxLLMService — a
    thin OpenAILLMService subclass with the streaming <think> filter
    (mirrors NvidiaLLMService's pattern for NIM reasoning models).
  - Tests updated/added for all of the above.

  Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

---------

Co-authored-by: octo-patch <octo-patch@github.com>
Co-authored-by: Sabiha Khan <sabihak89@gmail.com>
											
										
										
											2026-05-22 15:39:41 +08:00
+								            settings=MiniMaxLLMService.Settings(
 								                model=model,
 								                temperature=temperature if temperature is not None else 1.0,
 								            ),
 								        )
-												Add Sarvam LLM, update Sarvam STT models, expose usage_info on run detail (#351)

* Add Sarvam LLM provider, update Sarvam STT models, expose usage_info on run detail.
Depends on pipecat PR dograh-hq/pipecat#43 for STT string language support.
Submodule bump will follow after that merges.

* test: cover Sarvam STT language mapping; link Sarvam docs

---------

Co-authored-by: Sabiha Khan <sabihak89@gmail.com>
											
										
										
											2026-06-01 10:29:31 +05:30
+								    elif provider == ServiceProviders.SARVAM.value:
 								        return SarvamLLMService(
 								            api_key=api_key,
 								            settings=SarvamLLMSettings(
 								                model=model,
 								                temperature=temperature if temperature is not None else 0.5,
 								            ),
 								        )
-												Initial Commit 🚀 🚀

											
										
										
											2025-09-09 14:37:32 +05:30
+								    else:
-												feat: add AWS Bedrock support

											
										
										
											2026-03-19 15:06:59 +05:30
+								        raise HTTPException(status_code=400, detail=f"Invalid LLM provider {provider}")
-												feat: add gemini live and speaches integration (#220)

* feat: add speaches models

* feat: add gemini realtime and speaches integration

- Add gemini realtime support
- Add speaches support for locally hosted LLMs

* chore: bump pipecat

* feat: add language option

* fix: add skip aggregator types to tts settings

* fix: make API key optional for realtime
											
										
										
											2026-03-31 21:42:03 +05:30
+								def create_realtime_llm_service(user_config, audio_config: "AudioConfig"):
 								    """Create a realtime (speech-to-speech) LLM service that handles STT+LLM+TTS.
 								    These services bypass separate STT/TTS and handle audio directly via
 								    a bidirectional WebSocket connection. Reads from user_config.realtime.
 								    """
 								    realtime_config = user_config.realtime
 								    provider = realtime_config.provider
 								    model = realtime_config.model
 								    api_key = realtime_config.api_key
 								    voice = getattr(realtime_config, "voice", None)
 								    language = getattr(realtime_config, "language", None)
 								    logger.info(
 								        f"Creating realtime LLM service: provider={provider}, model={model}, voice={voice}, language={language}"
 								    )
 								    if provider == ServiceProviders.OPENAI_REALTIME.value:
-												feat: add openai realtime models (#298)

* feat: add openai realtime models

* chore: bump pipecat

* fix: resample telephony audio for openai realtime

* fix: sampling rate fix for openai realtime

* chore: clean up dead code
											
										
										
											2026-05-16 18:05:23 +05:30
+								        from api.services.pipecat.realtime.openai_realtime import (
 								            DograhOpenAIRealtimeLLMService,
 								        )
-												feat: add gemini live and speaches integration (#220)

* feat: add speaches models

* feat: add gemini realtime and speaches integration

- Add gemini realtime support
- Add speaches support for locally hosted LLMs

* chore: bump pipecat

* feat: add language option

* fix: add skip aggregator types to tts settings

* fix: make API key optional for realtime
											
										
										
											2026-03-31 21:42:03 +05:30
+								        from pipecat.services.openai.realtime.events import (
 								            AudioConfiguration,
 								            AudioInput,
 								            AudioOutput,
 								            InputAudioTranscription,
 								            SessionProperties,
 								        )
-												feat: add openai realtime models (#298)

* feat: add openai realtime models

* chore: bump pipecat

* fix: resample telephony audio for openai realtime

* fix: sampling rate fix for openai realtime

* chore: clean up dead code
											
										
										
											2026-05-16 18:05:23 +05:30
+								        return DograhOpenAIRealtimeLLMService(
-												feat: add gemini live and speaches integration (#220)

* feat: add speaches models

* feat: add gemini realtime and speaches integration

- Add gemini realtime support
- Add speaches support for locally hosted LLMs

* chore: bump pipecat

* feat: add language option

* fix: add skip aggregator types to tts settings

* fix: make API key optional for realtime
											
										
										
											2026-03-31 21:42:03 +05:30
+								            api_key=api_key,
-												feat: add openai realtime models (#298)

* feat: add openai realtime models

* chore: bump pipecat

* fix: resample telephony audio for openai realtime

* fix: sampling rate fix for openai realtime

* chore: clean up dead code
											
										
										
											2026-05-16 18:05:23 +05:30
+								            settings=DograhOpenAIRealtimeLLMService.Settings(
-												feat: add gemini live and speaches integration (#220)

* feat: add speaches models

* feat: add gemini realtime and speaches integration

- Add gemini realtime support
- Add speaches support for locally hosted LLMs

* chore: bump pipecat

* feat: add language option

* fix: add skip aggregator types to tts settings

* fix: make API key optional for realtime
											
										
										
											2026-03-31 21:42:03 +05:30
+								                model=model,
 								                session_properties=SessionProperties(
 								                    audio=AudioConfiguration(
 								                        input=AudioInput(
 								                            transcription=InputAudioTranscription(),
 								                        ),
 								                        output=AudioOutput(
 								                            voice=voice or "alloy",
 								                        ),
 								                    ),
 								                ),
 								            ),
 								        )
-												feat: add xai grok as realtime model

											
										
										
											2026-05-22 18:04:59 +05:30
+								    elif provider == ServiceProviders.GROK_REALTIME.value:
 								        from api.services.pipecat.realtime.grok_realtime import (
 								            DograhGrokRealtimeLLMService,
 								        )
 								        from pipecat.services.xai.realtime.events import SessionProperties
 								        return DograhGrokRealtimeLLMService(
 								            api_key=api_key,
 								            settings=DograhGrokRealtimeLLMService.Settings(
 								                model=model,
 								                session_properties=SessionProperties(
 								                    voice=voice or "Ara",
 								                ),
 								            ),
 								        )
-												feat: add ultravox realtime and fix signature issue in telephony (#345)

* feat: add ultravox realtime and fix signature issue in telephony

- Add UltraVox realtime
- Fix signature issue on telephony

* fix: fix regression for wss_backend_endpoint
											
										
										
											2026-05-23 12:51:55 +05:30
+								    elif provider == ServiceProviders.ULTRAVOX_REALTIME.value:
 								        from api.services.pipecat.realtime.ultravox_realtime import (
 								            DograhUltravoxOneShotInputParams,
 								            DograhUltravoxRealtimeLLMService,
 								        )
 								        return DograhUltravoxRealtimeLLMService(
 								            params=DograhUltravoxOneShotInputParams(
 								                api_key=api_key,
 								                model=model,
 								                voice=voice,
 								                output_medium="voice",
 								            ),
 								            settings=DograhUltravoxRealtimeLLMService.Settings(
 								                model=model,
 								                output_medium="voice",
 								            ),
 								        )
-												feat: add gemini live and speaches integration (#220)

* feat: add speaches models

* feat: add gemini realtime and speaches integration

- Add gemini realtime support
- Add speaches support for locally hosted LLMs

* chore: bump pipecat

* feat: add language option

* fix: add skip aggregator types to tts settings

* fix: make API key optional for realtime
											
										
										
											2026-03-31 21:42:03 +05:30
+								    elif provider == ServiceProviders.GOOGLE_REALTIME.value:
-												feat: add openai realtime models (#298)

* feat: add openai realtime models

* chore: bump pipecat

* fix: resample telephony audio for openai realtime

* fix: sampling rate fix for openai realtime

* chore: clean up dead code
											
										
										
											2026-05-16 18:05:23 +05:30
+								        from api.services.pipecat.realtime.gemini_live import (
 								            DograhGeminiLiveLLMService,
 								        )
-												feat: add gemini live and speaches integration (#220)

* feat: add speaches models

* feat: add gemini realtime and speaches integration

- Add gemini realtime support
- Add speaches support for locally hosted LLMs

* chore: bump pipecat

* feat: add language option

* fix: add skip aggregator types to tts settings

* fix: make API key optional for realtime
											
										
										
											2026-03-31 21:42:03 +05:30
 								        # Gemini Live enables input/output audio transcription by default
 								        # in its _connect() method — no need to configure it explicitly.
 								        settings_kwargs = {
 								            "model": model,
 								            "voice": voice or "Puck",
 								        }
 								        if language:
 								            settings_kwargs["language"] = language
-												feat: add openai realtime models (#298)

* feat: add openai realtime models

* chore: bump pipecat

* fix: resample telephony audio for openai realtime

* fix: sampling rate fix for openai realtime

* chore: clean up dead code
											
										
										
											2026-05-16 18:05:23 +05:30
+								        return DograhGeminiLiveLLMService(
-												feat: add gemini live and speaches integration (#220)

* feat: add speaches models

* feat: add gemini realtime and speaches integration

- Add gemini realtime support
- Add speaches support for locally hosted LLMs

* chore: bump pipecat

* feat: add language option

* fix: add skip aggregator types to tts settings

* fix: make API key optional for realtime
											
										
										
											2026-03-31 21:42:03 +05:30
+								            api_key=api_key,
-												feat: add openai realtime models (#298)

* feat: add openai realtime models

* chore: bump pipecat

* fix: resample telephony audio for openai realtime

* fix: sampling rate fix for openai realtime

* chore: clean up dead code
											
										
										
											2026-05-16 18:05:23 +05:30
+								            settings=DograhGeminiLiveLLMService.Settings(**settings_kwargs),
 								        )
 								    elif provider == ServiceProviders.GOOGLE_VERTEX_REALTIME.value:
 								        from api.services.pipecat.realtime.gemini_live_vertex import (
 								            DograhGeminiLiveVertexLLMService,
 								        )
 								        project_id = getattr(realtime_config, "project_id", None)
 								        location = getattr(realtime_config, "location", None) or "us-east4"
 								        credentials = getattr(realtime_config, "credentials", None)
 								        settings_kwargs = {
 								            "model": model,
 								            "voice": voice or "Charon",
 								        }
 								        if language:
 								            settings_kwargs["language"] = language
 								        return DograhGeminiLiveVertexLLMService(
 								            credentials=credentials,
 								            project_id=project_id,
 								            location=location,
 								            settings=DograhGeminiLiveVertexLLMService.Settings(**settings_kwargs),
-												feat: add gemini live and speaches integration (#220)

* feat: add speaches models

* feat: add gemini realtime and speaches integration

- Add gemini realtime support
- Add speaches support for locally hosted LLMs

* chore: bump pipecat

* feat: add language option

* fix: add skip aggregator types to tts settings

* fix: make API key optional for realtime
											
										
										
											2026-03-31 21:42:03 +05:30
+								        )
-												feat: add Azure AI multi-provider support (TTS, STT, Embeddings, Realtime) (#381)

* feat: add Azure AI multi-provider support (TTS, STT, Embeddings, Realtime)

Enables Azure AI services across all model layers so users with Azure
credits can consolidate billing on a single provider.

- Voice (TTS): AzureSpeechTTSConfiguration via azure_speech provider
- Transcriber (STT): AzureSpeechSTTConfiguration via azure_speech provider
- Embedding: AzureOpenAIEmbeddingsConfiguration via azure provider
- Realtime: AzureRealtimeLLMConfiguration via azure_realtime provider

New files:
- api/services/pipecat/realtime/azure_realtime.py
- api/services/gen_ai/embedding/azure_openai_service.py
- api/tests/test_azure_speech_service_factory.py

The UI picks up all four providers automatically from the schema —
no frontend changes required.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

* fix: add validation for URL and params

---------

Co-authored-by: Vishal Dhateria <vishal@finela.ai>
Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com>
Co-authored-by: Abhishek Kumar <abhishek@a6k.me>
											
										
										
											2026-06-02 12:50:00 +05:30
+								    elif provider == ServiceProviders.AZURE_REALTIME.value:
 								        from api.services.pipecat.realtime.azure_realtime import (
 								            DograhAzureRealtimeLLMService,
 								        )
 								        from pipecat.services.openai.realtime.events import (
 								            AudioConfiguration,
 								            AudioInput,
 								            AudioOutput,
 								            InputAudioTranscription,
 								            SessionProperties,
 								        )
 								        endpoint = getattr(realtime_config, "endpoint", None) or ""
 								        if not endpoint:
 								            raise HTTPException(
 								                status_code=400,
 								                detail="Azure Realtime requires an endpoint.",
 								            )
 								        _validate_runtime_service_url(endpoint, "endpoint")
 								        api_version = (
 								            getattr(realtime_config, "api_version", None) or "2025-04-01-preview"
 								        )
 								        # Construct the Azure Realtime WebSocket URL
 								        # https://<resource>.openai.azure.com/openai/realtime?api-version=<ver>&deployment=<model>
 								        parsed_endpoint = urlparse(endpoint)
 								        wss_url = urlunparse(
 								            (
 								                "wss",
 								                parsed_endpoint.netloc,
 								                "/openai/realtime",
 								                "",
 								                urlencode({"api-version": api_version, "deployment": model}),
 								                "",
 								            )
 								        )
 								        return DograhAzureRealtimeLLMService(
 								            api_key=api_key,
 								            base_url=wss_url,
 								            settings=DograhAzureRealtimeLLMService.Settings(
 								                model=model,
 								                session_properties=SessionProperties(
 								                    audio=AudioConfiguration(
 								                        input=AudioInput(
 								                            transcription=InputAudioTranscription(),
 								                        ),
 								                        output=AudioOutput(
 								                            voice=voice or "alloy",
 								                        ),
 								                    ),
 								                ),
 								            ),
 								        )
-												feat: add gemini live and speaches integration (#220)

* feat: add speaches models

* feat: add gemini realtime and speaches integration

- Add gemini realtime support
- Add speaches support for locally hosted LLMs

* chore: bump pipecat

* feat: add language option

* fix: add skip aggregator types to tts settings

* fix: make API key optional for realtime
											
										
										
											2026-03-31 21:42:03 +05:30
+								    else:
 								        raise HTTPException(
 								            status_code=400, detail=f"Invalid realtime LLM provider {provider}"
 								        )
-												feat: add AWS Bedrock support

											
										
										
											2026-03-19 15:06:59 +05:30
+								def create_llm_service(user_config):
 								    """Create and return appropriate LLM service based on user configuration."""
 								    provider = user_config.llm.provider
 								    model = user_config.llm.model
 								    api_key = user_config.llm.api_key
 								    kwargs = {}
-												feat: allow overriding base URL of OpenAI models (#368)

* Add OpenAI-compatible API option in model configuration

Backend-only cherry-pick from 20617db37a8417e4ee4f64efb6063fc5cd4aea98.

* Potential fix for pull request finding

Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com>

* fix: harden the base url settings in SaaS mode

---------

Co-authored-by: Chris Briddock <briddockchristopher@gmail.com>
Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com>
											
										
										
											2026-05-27 13:07:45 +05:30
+								    if provider == ServiceProviders.OPENAI.value:
 								        kwargs["base_url"] = user_config.llm.base_url
 								    elif provider == ServiceProviders.OPENROUTER.value:
-												feat: add AWS Bedrock support

											
										
										
											2026-03-19 15:06:59 +05:30
+								        kwargs["base_url"] = user_config.llm.base_url
 								    elif provider == ServiceProviders.AZURE.value:
 								        kwargs["endpoint"] = user_config.llm.endpoint
-												feat: add gemini live and speaches integration (#220)

* feat: add speaches models

* feat: add gemini realtime and speaches integration

- Add gemini realtime support
- Add speaches support for locally hosted LLMs

* chore: bump pipecat

* feat: add language option

* fix: add skip aggregator types to tts settings

* fix: make API key optional for realtime
											
										
										
											2026-03-31 21:42:03 +05:30
+								    elif provider == ServiceProviders.SPEACHES.value:
-												feat: add support for self hosted llm models

											
										
										
											2026-03-24 17:50:45 +05:30
+								        kwargs["base_url"] = user_config.llm.base_url
-												feat: add AWS Bedrock support

											
										
										
											2026-03-19 15:06:59 +05:30
+								    elif provider == ServiceProviders.AWS_BEDROCK.value:
 								        kwargs["aws_access_key"] = user_config.llm.aws_access_key
 								        kwargs["aws_secret_key"] = user_config.llm.aws_secret_key
 								        kwargs["aws_region"] = user_config.llm.aws_region
-												feat: add xai grok as realtime model

											
										
										
											2026-05-22 18:04:59 +05:30
+								    elif provider == ServiceProviders.GOOGLE_VERTEX.value:
 								        kwargs["project_id"] = user_config.llm.project_id
 								        kwargs["location"] = user_config.llm.location
 								        kwargs["credentials"] = user_config.llm.credentials
-												feat: add MiniMax provider support (Chat + TTS) (#309)

* feat: add MiniMax provider support (Chat + TTS)

- Add MiniMax LLM provider using OpenAI-compatible API
  - Models: MiniMax-M2.7, MiniMax-M2.7-highspeed
  - Default base URL: https://api.minimax.io/v1
  - Uses MINIMAX_API_KEY for authentication
- Add MiniMax TTS provider using Pipecat's MiniMaxHttpTTSService
  - Models: speech-2.8-hd (default), speech-2.8-turbo
  - 6 built-in voices
  - Requires group_id configuration
- Add unit tests for both providers

* fix(minimax): validator, temperature, session cleanup, reasoning filter
  - check_validity.py: wire MiniMax into _validator_map and enforce
    group_id at save time. Without this, saving a config with a valid
    key was rejected.
  - registry.py: surface temperature on the LLM config (gt=0; MiniMax
    rejects 0) and base_url on the TTS config
  - service_factory.py:
    * Plumb temperature through create_llm_service
    * Normalize TTS base_url to include /t2a_v2 — pipecat appends only
      ?GroupId=... to the URL.
    * Use the new MiniMaxLLMService (from pipecat) to strip
      <think>...</think> reasoning that MiniMax-M2.7 emits inline in
      delta.content (otherwise it leaks straight to TTS).
    * Use MiniMaxOwnedSessionTTSService so the per-instance aiohttp
      session gets closed in cleanup() instead of leaking sockets/FDs.
  - minimax_tts.py: small wrapper around MiniMaxHttpTTSService that owns
    the session it was handed (pipecat's caller-owns-session API
    conflicts with the ftory's per-instance pattern).
  - pipecat submodule: bumps to a commit that adds MiniMaxLLMService — a
    thin OpenAILLMService subclass with the streaming <think> filter
    (mirrors NvidiaLLMService's pattern for NIM reasoning models).
  - Tests updated/added for all of the above.

  Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

---------

Co-authored-by: octo-patch <octo-patch@github.com>
Co-authored-by: Sabiha Khan <sabihak89@gmail.com>
											
										
										
											2026-05-22 15:39:41 +08:00
+								    elif provider == ServiceProviders.MINIMAX.value:
 								        kwargs["base_url"] = user_config.llm.base_url
 								        kwargs["temperature"] = user_config.llm.temperature
-												Add Sarvam LLM, update Sarvam STT models, expose usage_info on run detail (#351)

* Add Sarvam LLM provider, update Sarvam STT models, expose usage_info on run detail.
Depends on pipecat PR dograh-hq/pipecat#43 for STT string language support.
Submodule bump will follow after that merges.

* test: cover Sarvam STT language mapping; link Sarvam docs

---------

Co-authored-by: Sabiha Khan <sabihak89@gmail.com>
											
										
										
											2026-06-01 10:29:31 +05:30
+								    elif provider == ServiceProviders.SARVAM.value:
 								        kwargs["temperature"] = user_config.llm.temperature
-												feat: add AWS Bedrock support

											
										
										
											2026-03-19 15:06:59 +05:30
 								    return create_llm_service_from_provider(provider, model, api_key, **kwargs)