From 526d5439f3fc57b30d6d5c084c0f113704827722 Mon Sep 17 00:00:00 2001 From: Abhishek Kumar Date: Tue, 31 Mar 2026 18:17:44 +0530 Subject: [PATCH] feat: add language option --- api/services/configuration/registry.py | 12 +++++++++++- api/services/pipecat/service_factory.py | 14 +++++++++----- 2 files changed, 20 insertions(+), 6 deletions(-) diff --git a/api/services/configuration/registry.py b/api/services/configuration/registry.py index 6ec1e632..51939b77 100644 --- a/api/services/configuration/registry.py +++ b/api/services/configuration/registry.py @@ -318,6 +318,9 @@ OPENAI_REALTIME_VOICES = [ GOOGLE_REALTIME_MODELS = ["gemini-3.1-flash-live-preview"] GOOGLE_REALTIME_VOICES = ["Puck", "Charon", "Kore", "Fenrir", "Aoede"] +GOOGLE_REALTIME_LANGUAGES = [ + "en" +] @register_service(ServiceType.REALTIME) @@ -326,7 +329,7 @@ class GoogleRealtimeLLMConfiguration(BaseLLMConfiguration): ServiceProviders.GOOGLE_REALTIME ) model: str = Field( - default="gemini-2.0-flash-live-001", + default="gemini-3.1-flash-live-preview", json_schema_extra={ "examples": GOOGLE_REALTIME_MODELS, "allow_custom_input": True, @@ -339,6 +342,13 @@ class GoogleRealtimeLLMConfiguration(BaseLLMConfiguration): "allow_custom_input": True, }, ) + language: str = Field( + default="en", + json_schema_extra={ + "examples": GOOGLE_REALTIME_LANGUAGES, + "allow_custom_input": True, + }, + ) REALTIME_PROVIDERS = { diff --git a/api/services/pipecat/service_factory.py b/api/services/pipecat/service_factory.py index 128ae67e..965a359d 100644 --- a/api/services/pipecat/service_factory.py +++ b/api/services/pipecat/service_factory.py @@ -415,9 +415,10 @@ def create_realtime_llm_service(user_config, audio_config: "AudioConfig"): model = realtime_config.model api_key = realtime_config.api_key voice = getattr(realtime_config, "voice", None) + language = getattr(realtime_config, "language", None) logger.info( - f"Creating realtime LLM service: provider={provider}, model={model}, voice={voice}" + f"Creating realtime LLM service: provider={provider}, model={model}, voice={voice}, language={language}" ) if provider == ServiceProviders.OPENAI_REALTIME.value: @@ -451,12 +452,15 @@ def create_realtime_llm_service(user_config, audio_config: "AudioConfig"): # Gemini Live enables input/output audio transcription by default # in its _connect() method — no need to configure it explicitly. + settings_kwargs = { + "model": model, + "voice": voice or "Puck", + } + if language: + settings_kwargs["language"] = language return GeminiLiveLLMService( api_key=api_key, - settings=GeminiLiveLLMService.Settings( - model=model, - voice=voice or "Puck", # vad=GeminiVADParams(disabled=True) - ), + settings=GeminiLiveLLMService.Settings(**settings_kwargs), ) else: raise HTTPException(