diff --git a/api/services/configuration/check_validity.py b/api/services/configuration/check_validity.py index f62ad17..b35f095 100644 --- a/api/services/configuration/check_validity.py +++ b/api/services/configuration/check_validity.py @@ -50,6 +50,7 @@ class UserConfigurationValidator: ServiceProviders.OPENAI_REALTIME.value: self._check_openai_api_key, ServiceProviders.GOOGLE_REALTIME.value: self._check_google_api_key, ServiceProviders.ASSEMBLYAI.value: self._check_assemblyai_api_key, + ServiceProviders.GLADIA.value: self._check_gladia_api_key, } async def validate( @@ -221,3 +222,6 @@ class UserConfigurationValidator: def _check_assemblyai_api_key(self, model: str, service_config) -> bool: return True + + def _check_gladia_api_key(self, model: str, api_key: str) -> bool: + return True diff --git a/api/services/configuration/registry.py b/api/services/configuration/registry.py index 5e77882..c648a21 100644 --- a/api/services/configuration/registry.py +++ b/api/services/configuration/registry.py @@ -30,6 +30,7 @@ class ServiceProviders(str, Enum): AWS_BEDROCK = "aws_bedrock" SPEACHES = "speaches" ASSEMBLYAI = "assemblyai" + GLADIA = "gladia" OPENAI_REALTIME = "openai_realtime" GOOGLE_REALTIME = "google_realtime" @@ -47,6 +48,7 @@ class BaseServiceConfiguration(BaseModel): ServiceProviders.AWS_BEDROCK, ServiceProviders.SPEACHES, ServiceProviders.ASSEMBLYAI, + ServiceProviders.GLADIA, ServiceProviders.OPENAI_REALTIME, ServiceProviders.GOOGLE_REALTIME, # ServiceProviders.SARVAM, @@ -875,6 +877,124 @@ class AssemblyAISTTConfiguration(BaseSTTConfiguration): ) +GLADIA_STT_MODELS = ["solaria-1"] +GLADIA_STT_LANGUAGES = [ + "af", + "am", + "ar", + "as", + "az", + "ba", + "be", + "bg", + "bn", + "bo", + "br", + "bs", + "ca", + "cs", + "cy", + "da", + "de", + "el", + "en", + "es", + "et", + "eu", + "fa", + "fi", + "fo", + "fr", + "gl", + "gu", + "ha", + "haw", + "he", + "hi", + "hr", + "ht", + "hu", + "hy", + "id", + "is", + "it", + "ja", + "jw", + "ka", + "kk", + "km", + "kn", + "ko", + "la", + "lb", + "ln", + "lo", + "lt", + "lv", + "mg", + "mi", + "mk", + "ml", + "mn", + "mr", + "ms", + "mt", + "my", + "ne", + "nl", + "nn", + "no", + "oc", + "pa", + "pl", + "ps", + "pt", + "ro", + "ru", + "sa", + "sd", + "si", + "sk", + "sl", + "sn", + "so", + "sq", + "sr", + "su", + "sv", + "sw", + "ta", + "te", + "tg", + "th", + "tk", + "tl", + "tr", + "tt", + "uk", + "ur", + "uz", + "vi", + "wo", + "yi", + "yo", + "zh", +] + + +@register_stt +class GladiaSTTConfiguration(BaseSTTConfiguration): + provider: Literal[ServiceProviders.GLADIA] = ServiceProviders.GLADIA + model: str = Field( + default="solaria-1", + json_schema_extra={"examples": GLADIA_STT_MODELS}, + ) + language: str = Field( + default="en", + json_schema_extra={"examples": GLADIA_STT_LANGUAGES}, + ) + + STTConfig = Annotated[ Union[ DeepgramSTTConfiguration, @@ -885,6 +1005,7 @@ STTConfig = Annotated[ SarvamSTTConfiguration, SpeachesSTTConfiguration, AssemblyAISTTConfiguration, + GladiaSTTConfiguration, ], Field(discriminator="provider"), ] diff --git a/api/services/pipecat/service_factory.py b/api/services/pipecat/service_factory.py index 5f1128b..0a7a1e2 100644 --- a/api/services/pipecat/service_factory.py +++ b/api/services/pipecat/service_factory.py @@ -24,6 +24,7 @@ from pipecat.services.dograh.llm import DograhLLMService from pipecat.services.dograh.stt import DograhSTTService, DograhSTTSettings from pipecat.services.dograh.tts import DograhTTSService, DograhTTSSettings from pipecat.services.elevenlabs.tts import ElevenLabsTTSService, ElevenLabsTTSSettings +from pipecat.services.gladia.stt import GladiaSTTService, GladiaSTTSettings from pipecat.services.google.llm import GoogleLLMService, GoogleLLMSettings from pipecat.services.groq.llm import GroqLLMService, GroqLLMSettings from pipecat.services.openai.base_llm import OpenAILLMSettings @@ -167,6 +168,21 @@ def create_stt_service( settings=AssemblyAISTTSettings(**settings_kwargs), sample_rate=audio_config.transport_in_sample_rate, ) + elif user_config.stt.provider == ServiceProviders.GLADIA.value: + from pipecat.services.gladia.config import LanguageConfig + + language = getattr(user_config.stt, "language", None) or "en" + settings_kwargs = { + "model": user_config.stt.model, + "language_config": LanguageConfig( + languages=[language], code_switching=False + ), + } + return GladiaSTTService( + api_key=user_config.stt.api_key, + settings=GladiaSTTSettings(**settings_kwargs), + sample_rate=audio_config.transport_in_sample_rate, + ) elif user_config.stt.provider == ServiceProviders.SPEECHMATICS.value: from pipecat.services.speechmatics.stt import ( AdditionalVocabEntry,