From 0b005dad58a385c4a14cbe5a4edb987597245d94 Mon Sep 17 00:00:00 2001
From: Abhishek Kumar <abhishek@a6k.me>
Date: Sat, 16 May 2026 17:44:49 +0530
Subject: [PATCH] fix: sampling rate fix for openai realtime

---
 api/services/configuration/check_validity.py  |  24 +++
 api/services/configuration/registry.py        | 204 ++++++++++++++----
 docs/configurations/inference-providers.mdx   |  64 ++++++
 pipecat                                       |   2 +-
 .../components/ServiceConfigurationForm.tsx   |  56 ++++-
 5 files changed, 296 insertions(+), 54 deletions(-)

diff --git a/api/services/configuration/check_validity.py b/api/services/configuration/check_validity.py
index a78434d..ca46cfc 100644
--- a/api/services/configuration/check_validity.py
+++ b/api/services/configuration/check_validity.py
@@ -49,6 +49,7 @@ class UserConfigurationValidator:
             ServiceProviders.SPEACHES.value: self._check_speaches_api_key,
             ServiceProviders.OPENAI_REALTIME.value: self._check_openai_api_key,
             ServiceProviders.GOOGLE_REALTIME.value: self._check_google_api_key,
+            ServiceProviders.GOOGLE_VERTEX_REALTIME.value: self._check_google_vertex_realtime_api_key,
             ServiceProviders.ASSEMBLYAI.value: self._check_assemblyai_api_key,
             ServiceProviders.GLADIA.value: self._check_gladia_api_key,
             ServiceProviders.RIME.value: self._check_rime_api_key,
@@ -116,6 +117,22 @@ class UserConfigurationValidator:
                 return [{"model": service_name, "message": str(e)}]
             return []
 
+        # Vertex Realtime uses service-account credentials (or ADC) instead of api_key
+        if provider == ServiceProviders.GOOGLE_VERTEX_REALTIME.value:
+            try:
+                if not self._check_google_vertex_realtime_api_key(
+                    provider, service_config
+                ):
+                    return [
+                        {
+                            "model": service_name,
+                            "message": f"Invalid {provider} configuration",
+                        }
+                    ]
+            except ValueError as e:
+                return [{"model": service_name, "message": str(e)}]
+            return []
+
         # AWS Bedrock uses AWS credentials instead of api_key
         if provider == ServiceProviders.AWS_BEDROCK.value:
             try:
@@ -216,6 +233,13 @@ class UserConfigurationValidator:
             raise ValueError("base_url is required for Speaches services")
         return True
 
+    def _check_google_vertex_realtime_api_key(self, model: str, service_config) -> bool:
+        if not getattr(service_config, "project_id", None):
+            raise ValueError("project_id is required for Google Vertex Realtime")
+        if not getattr(service_config, "location", None):
+            raise ValueError("location is required for Google Vertex Realtime")
+        return True
+
     def _check_aws_bedrock_api_key(self, model: str, service_config) -> bool:
         if not service_config.aws_access_key or not service_config.aws_secret_key:
             raise ValueError("AWS access key and secret key are required for Bedrock")
diff --git a/api/services/configuration/registry.py b/api/services/configuration/registry.py
index 441cd6e..3875fc7 100644
--- a/api/services/configuration/registry.py
+++ b/api/services/configuration/registry.py
@@ -207,6 +207,7 @@ class OpenAILLMService(BaseLLMConfiguration):
     provider: Literal[ServiceProviders.OPENAI] = ServiceProviders.OPENAI
     model: str = Field(
         default="gpt-4.1",
+        description="OpenAI chat model to use.",
         json_schema_extra={"examples": OPENAI_MODELS, "allow_custom_input": True},
     )
 
@@ -216,6 +217,7 @@ class GoogleLLMService(BaseLLMConfiguration):
     provider: Literal[ServiceProviders.GOOGLE] = ServiceProviders.GOOGLE
     model: str = Field(
         default="gemini-2.0-flash",
+        description="Gemini model on Google AI Studio (not Vertex).",
         json_schema_extra={"examples": GOOGLE_MODELS, "allow_custom_input": True},
     )
 
@@ -225,6 +227,7 @@ class GroqLLMService(BaseLLMConfiguration):
     provider: Literal[ServiceProviders.GROQ] = ServiceProviders.GROQ
     model: str = Field(
         default="llama-3.3-70b-versatile",
+        description="Groq-hosted model identifier.",
         json_schema_extra={"examples": GROQ_MODELS, "allow_custom_input": True},
     )
 
@@ -234,10 +237,14 @@ class OpenRouterLLMConfiguration(BaseLLMConfiguration):
     provider: Literal[ServiceProviders.OPENROUTER] = ServiceProviders.OPENROUTER
     model: str = Field(
         default="openai/gpt-4.1",
+        description="OpenRouter model slug in 'vendor/model' form.",
         json_schema_extra={"examples": OPENROUTER_MODELS, "allow_custom_input": True},
     )
 
-    base_url: str = Field(default="https://openrouter.ai/api/v1")
+    base_url: str = Field(
+        default="https://openrouter.ai/api/v1",
+        description="Override only if proxying OpenRouter through your own gateway.",
+    )
 
 
 @register_llm
@@ -245,10 +252,13 @@ class AzureLLMService(BaseLLMConfiguration):
     provider: Literal[ServiceProviders.AZURE] = ServiceProviders.AZURE
     model: str = Field(
         default="gpt-4.1-mini",
+        description="Azure deployment name (not the upstream OpenAI model id).",
         json_schema_extra={"examples": AZURE_MODELS, "allow_custom_input": True},
     )
 
-    endpoint: str
+    endpoint: str = Field(
+        description="Azure OpenAI resource endpoint (e.g. https://<resource>.openai.azure.com).",
+    )
 
 
 @register_llm
@@ -256,6 +266,7 @@ class DograhLLMService(BaseLLMConfiguration):
     provider: Literal[ServiceProviders.DOGRAH] = ServiceProviders.DOGRAH
     model: str = Field(
         default="default",
+        description="Dograh-hosted model tier.",
         json_schema_extra={"examples": DOGRAH_LLM_MODELS, "allow_custom_input": True},
     )
 
@@ -265,12 +276,25 @@ class AWSBedrockLLMConfiguration(BaseLLMConfiguration):
     provider: Literal[ServiceProviders.AWS_BEDROCK] = ServiceProviders.AWS_BEDROCK
     model: str = Field(
         default="us.amazon.nova-pro-v1:0",
+        description="Bedrock model ID — include the region inference-profile prefix (e.g. 'us.').",
         json_schema_extra={"examples": AWS_BEDROCK_MODELS, "allow_custom_input": True},
     )
-    aws_access_key: str = Field(default="")
-    aws_secret_key: str = Field(default="")
-    aws_region: str = Field(default="us-east-1")
-    api_key: str | list[str] | None = Field(default=None)
+    aws_access_key: str = Field(
+        default="",
+        description="AWS access key ID with bedrock:InvokeModel permission.",
+    )
+    aws_secret_key: str = Field(
+        default="",
+        description="AWS secret access key paired with the access key ID.",
+    )
+    aws_region: str = Field(
+        default="us-east-1",
+        description="AWS region where the Bedrock model is available.",
+    )
+    api_key: str | list[str] | None = Field(
+        default=None,
+        description="Not used for Bedrock — authentication is via the AWS credentials above. Leave blank.",
+    )
 
 
 SPEACHES_LLM_MODELS = ["llama3", "mistral", "phi3", "qwen2", "gemma2", "deepseek-r1"]
@@ -281,6 +305,7 @@ class SpeachesLLMConfiguration(BaseLLMConfiguration):
     provider: Literal[ServiceProviders.SPEACHES] = ServiceProviders.SPEACHES
     model: str = Field(
         default="llama3",
+        description="Model name as exposed by your OpenAI-compatible server.",
         json_schema_extra={
             "examples": SPEACHES_LLM_MODELS,
             "allow_custom_input": True,
@@ -288,9 +313,12 @@ class SpeachesLLMConfiguration(BaseLLMConfiguration):
     )
     base_url: str = Field(
         default="http://localhost:11434/v1",
-        description="OpenAI-compatible endpoint (Ollama, vLLM, etc.)",
+        description="OpenAI-compatible endpoint (Ollama, vLLM, etc.).",
+    )
+    api_key: str | list[str] | None = Field(
+        default=None,
+        description="Usually not required for self-hosted endpoints. Leave blank unless your server enforces one.",
     )
-    api_key: str | list[str] | None = Field(default=None)
 
 
 OPENAI_REALTIME_MODELS = ["gpt-realtime-2"]
@@ -313,6 +341,7 @@ class OpenAIRealtimeLLMConfiguration(BaseLLMConfiguration):
     )
     model: str = Field(
         default="gpt-realtime-2",
+        description="OpenAI realtime (speech-to-speech) model.",
         json_schema_extra={
             "examples": OPENAI_REALTIME_MODELS,
             "allow_custom_input": True,
@@ -320,6 +349,7 @@ class OpenAIRealtimeLLMConfiguration(BaseLLMConfiguration):
     )
     voice: str = Field(
         default="alloy",
+        description="Voice the model speaks in.",
         json_schema_extra={
             "examples": OPENAI_REALTIME_VOICES,
             "allow_custom_input": True,
@@ -365,6 +395,7 @@ class GoogleRealtimeLLMConfiguration(BaseLLMConfiguration):
     )
     model: str = Field(
         default="gemini-3.1-flash-live-preview",
+        description="Gemini Live model on Google AI Studio (not Vertex).",
         json_schema_extra={
             "examples": GOOGLE_REALTIME_MODELS,
             "allow_custom_input": True,
@@ -372,6 +403,7 @@ class GoogleRealtimeLLMConfiguration(BaseLLMConfiguration):
     )
     voice: str = Field(
         default="Puck",
+        description="Voice the model speaks in.",
         json_schema_extra={
             "examples": GOOGLE_REALTIME_VOICES,
             "allow_custom_input": True,
@@ -379,6 +411,7 @@ class GoogleRealtimeLLMConfiguration(BaseLLMConfiguration):
     )
     language: str = Field(
         default="en",
+        description="ISO 639-1 language code.",
         json_schema_extra={
             "examples": GOOGLE_REALTIME_LANGUAGES,
             "allow_custom_input": True,
@@ -400,6 +433,7 @@ class GoogleVertexRealtimeLLMConfiguration(BaseLLMConfiguration):
     )
     model: str = Field(
         default="google/gemini-live-2.5-flash-native-audio",
+        description="Vertex AI publisher/model identifier.",
         json_schema_extra={
             "examples": GOOGLE_VERTEX_REALTIME_MODELS,
             "allow_custom_input": True,
@@ -407,13 +441,15 @@ class GoogleVertexRealtimeLLMConfiguration(BaseLLMConfiguration):
     )
     voice: str = Field(
         default="Charon",
+        description="Voice the model speaks in.",
         json_schema_extra={
             "examples": GOOGLE_VERTEX_REALTIME_VOICES,
             "allow_custom_input": True,
         },
     )
     language: str = Field(
-        default="en-US",
+        default="en",
+        description="BCP-47 language code (e.g. 'en-US').",
         json_schema_extra={
             "examples": GOOGLE_VERTEX_REALTIME_LANGUAGES,
             "allow_custom_input": True,
@@ -427,11 +463,18 @@ class GoogleVertexRealtimeLLMConfiguration(BaseLLMConfiguration):
     credentials: str | None = Field(
         default=None,
         description=(
-            "Service account JSON credentials string. If omitted, falls back to "
-            "Application Default Credentials (ADC)."
+            "Paste the entire service-account JSON file contents. If omitted, "
+            "falls back to Application Default Credentials (ADC)."
+        ),
+        json_schema_extra={"multiline": True},
+    )
+    api_key: str | list[str] | None = Field(
+        default=None,
+        description=(
+            "Not used for Vertex AI — authentication is via the service account "
+            "in `credentials` (or ADC). Leave blank."
         ),
     )
-    api_key: str | list[str] | None = Field(default=None)
 
 
 REALTIME_PROVIDERS = {
@@ -470,7 +513,10 @@ RealtimeConfig = Annotated[
 @register_tts
 class DeepgramTTSConfiguration(BaseServiceConfiguration):
     provider: Literal[ServiceProviders.DEEPGRAM] = ServiceProviders.DEEPGRAM
-    voice: str = "aura-2-helena-en"
+    voice: str = Field(
+        default="aura-2-helena-en",
+        description="Deepgram voice ID (model is inferred from the 'aura-N' prefix).",
+    )
 
     @computed_field
     @property
@@ -492,10 +538,14 @@ ELEVENLABS_TTS_MODELS = ["eleven_flash_v2_5"]
 @register_tts
 class ElevenlabsTTSConfiguration(BaseServiceConfiguration):
     provider: Literal[ServiceProviders.ELEVENLABS] = ServiceProviders.ELEVENLABS
-    voice: str = "21m00Tcm4TlvDq8ikWAM"  # Rachel voice ID
-    speed: float = Field(default=1.0, ge=0.1, le=2.0, description="Speed of the voice")
+    voice: str = Field(
+        default="21m00Tcm4TlvDq8ikWAM",
+        description="ElevenLabs voice ID from your Voice Library.",
+    )
+    speed: float = Field(default=1.0, ge=0.1, le=2.0, description="Speed of the voice.")
     model: str = Field(
         default="eleven_flash_v2_5",
+        description="ElevenLabs TTS model.",
         json_schema_extra={"examples": ELEVENLABS_TTS_MODELS},
     )
     base_url: str = Field(
@@ -515,9 +565,14 @@ OPENAI_TTS_MODELS = ["gpt-4o-mini-tts"]
 class OpenAITTSService(BaseTTSConfiguration):
     provider: Literal[ServiceProviders.OPENAI] = ServiceProviders.OPENAI
     model: str = Field(
-        default="gpt-4o-mini-tts", json_schema_extra={"examples": OPENAI_TTS_MODELS}
+        default="gpt-4o-mini-tts",
+        description="OpenAI TTS model.",
+        json_schema_extra={"examples": OPENAI_TTS_MODELS},
+    )
+    voice: str = Field(
+        default="alloy",
+        description="OpenAI TTS voice name.",
     )
-    voice: str = "alloy"
 
 
 DOGRAH_TTS_MODELS = ["default"]
@@ -527,10 +582,15 @@ DOGRAH_TTS_MODELS = ["default"]
 class DograhTTSService(BaseTTSConfiguration):
     provider: Literal[ServiceProviders.DOGRAH] = ServiceProviders.DOGRAH
     model: str = Field(
-        default="default", json_schema_extra={"examples": DOGRAH_TTS_MODELS}
+        default="default",
+        description="Dograh TTS tier.",
+        json_schema_extra={"examples": DOGRAH_TTS_MODELS},
     )
-    voice: str = "default"
-    speed: float = Field(default=1.0, ge=0.5, le=2.0, description="Speed of the voice")
+    voice: str = Field(
+        default="default",
+        description="Voice preset.",
+    )
+    speed: float = Field(default=1.0, ge=0.5, le=2.0, description="Speed of the voice.")
 
 
 CARTESIA_TTS_MODELS = ["sonic-3"]
@@ -540,15 +600,20 @@ CARTESIA_TTS_MODELS = ["sonic-3"]
 class CartesiaTTSConfiguration(BaseTTSConfiguration):
     provider: Literal[ServiceProviders.CARTESIA] = ServiceProviders.CARTESIA
     model: str = Field(
-        default="sonic-3", json_schema_extra={"examples": CARTESIA_TTS_MODELS}
+        default="sonic-3",
+        description="Cartesia TTS model.",
+        json_schema_extra={"examples": CARTESIA_TTS_MODELS},
     )
-    voice: str = Field(default="3faa81ae-d3d8-4ab1-9e44-e50e46d33c30")
-    speed: float = Field(default=1.0, ge=0.6, le=1.5, description="Speed of the voice")
+    voice: str = Field(
+        default="3faa81ae-d3d8-4ab1-9e44-e50e46d33c30",
+        description="Cartesia voice UUID from your Cartesia dashboard.",
+    )
+    speed: float = Field(default=1.0, ge=0.6, le=1.5, description="Speed of the voice.")
     volume: float = Field(
         default=1.0,
         ge=0.5,
         le=2.0,
-        description="Volume multiplier for generated speech",
+        description="Volume multiplier for generated speech.",
     )
 
 
@@ -623,10 +688,13 @@ SARVAM_LANGUAGES = [
 class SarvamTTSConfiguration(BaseTTSConfiguration):
     provider: Literal[ServiceProviders.SARVAM] = ServiceProviders.SARVAM
     model: str = Field(
-        default="bulbul:v2", json_schema_extra={"examples": SARVAM_TTS_MODELS}
+        default="bulbul:v2",
+        description="Sarvam TTS model (voice list depends on this).",
+        json_schema_extra={"examples": SARVAM_TTS_MODELS},
     )
     voice: str = Field(
         default="anushka",
+        description="Sarvam voice name; must match the selected model's voice list.",
         json_schema_extra={
             "examples": SARVAM_V2_VOICES,
             "model_options": {
@@ -636,7 +704,9 @@ class SarvamTTSConfiguration(BaseTTSConfiguration):
         },
     )
     language: str = Field(
-        default="hi-IN", json_schema_extra={"examples": SARVAM_LANGUAGES}
+        default="hi-IN",
+        description="BCP-47 Indian-language code (e.g. hi-IN, en-IN).",
+        json_schema_extra={"examples": SARVAM_LANGUAGES},
     )
 
 
@@ -647,10 +717,12 @@ CAMB_TTS_MODELS = ["mars-flash", "mars-pro", "mars-instruct"]
 class CambTTSConfiguration(BaseTTSConfiguration):
     provider: Literal[ServiceProviders.CAMB] = ServiceProviders.CAMB
     model: str = Field(
-        default="mars-flash", json_schema_extra={"examples": CAMB_TTS_MODELS}
+        default="mars-flash",
+        description="Camb.ai TTS model.",
+        json_schema_extra={"examples": CAMB_TTS_MODELS},
     )
-    voice: str = Field(default="147320", description="Camb.ai voice ID")
-    language: str = Field(default="en-us", description="BCP-47 language code")
+    voice: str = Field(default="147320", description="Camb.ai voice ID.")
+    language: str = Field(default="en-us", description="BCP-47 language code.")
 
 
 RIME_TTS_MODELS = ["arcana", "mistv3", "mistv2", "mist"]
@@ -662,17 +734,19 @@ class RimeTTSConfiguration(BaseTTSConfiguration):
     provider: Literal[ServiceProviders.RIME] = ServiceProviders.RIME
     model: str = Field(
         default="arcana",
+        description="Rime TTS model.",
         json_schema_extra={"examples": RIME_TTS_MODELS, "allow_custom_input": True},
     )
     voice: str = Field(
         default="celeste",
-        description="Rime voice ID",
+        description="Rime voice ID.",
     )
     speed: float = Field(
-        default=1.0, ge=0.5, le=2.0, description="Speech speed multiplier"
+        default=1.0, ge=0.5, le=2.0, description="Speech speed multiplier."
     )
     language: str = Field(
         default="en",
+        description="ISO 639-1 language code.",
         json_schema_extra={"examples": RIME_TTS_LANGUAGES, "allow_custom_input": True},
     )
 
@@ -685,6 +759,7 @@ class SpeachesTTSConfiguration(BaseTTSConfiguration):
     provider: Literal[ServiceProviders.SPEACHES] = ServiceProviders.SPEACHES
     model: str = Field(
         default="kokoro",
+        description="Model name as served by your TTS endpoint (e.g. Kokoro-FastAPI).",
         json_schema_extra={
             "examples": SPEACHES_TTS_MODELS,
             "allow_custom_input": True,
@@ -693,16 +768,19 @@ class SpeachesTTSConfiguration(BaseTTSConfiguration):
     voice: str = Field(
         default="af_heart",
         json_schema_extra={"allow_custom_input": True},
-        description="Voice ID for the TTS engine",
+        description="Voice ID for the TTS engine.",
     )
     base_url: str = Field(
         default="http://localhost:8000/v1",
-        description="OpenAI-compatible TTS endpoint (Kokoro-FastAPI, etc.)",
+        description="OpenAI-compatible TTS endpoint (Kokoro-FastAPI, etc.).",
     )
     speed: float = Field(
-        default=1.0, ge=0.25, le=4.0, description="Speech speed (0.25 to 4.0)"
+        default=1.0, ge=0.25, le=4.0, description="Speech speed (0.25 to 4.0)."
+    )
+    api_key: str | list[str] | None = Field(
+        default=None,
+        description="Usually not required for self-hosted TTS. Leave blank unless enforced.",
     )
-    api_key: str | list[str] | None = Field(default=None)
 
 
 TTSConfig = Annotated[
@@ -813,10 +891,13 @@ DEEPGRAM_LANGUAGES = [
 class DeepgramSTTConfiguration(BaseSTTConfiguration):
     provider: Literal[ServiceProviders.DEEPGRAM] = ServiceProviders.DEEPGRAM
     model: str = Field(
-        default="nova-3-general", json_schema_extra={"examples": DEEPGRAM_STT_MODELS}
+        default="nova-3-general",
+        description="Deepgram STT model.",
+        json_schema_extra={"examples": DEEPGRAM_STT_MODELS},
     )
     language: str = Field(
         default="multi",
+        description="Language code; 'multi' enables auto-detect (Nova-3 only).",
         json_schema_extra={
             "examples": DEEPGRAM_LANGUAGES,
             "model_options": {
@@ -834,7 +915,9 @@ CARTESIA_STT_MODELS = ["ink-whisper"]
 class CartesiaSTTConfiguration(BaseSTTConfiguration):
     provider: Literal[ServiceProviders.CARTESIA] = ServiceProviders.CARTESIA
     model: str = Field(
-        default="ink-whisper", json_schema_extra={"examples": CARTESIA_STT_MODELS}
+        default="ink-whisper",
+        description="Cartesia STT model.",
+        json_schema_extra={"examples": CARTESIA_STT_MODELS},
     )
 
 
@@ -845,7 +928,9 @@ OPENAI_STT_MODELS = ["gpt-4o-transcribe"]
 class OpenAISTTConfiguration(BaseSTTConfiguration):
     provider: Literal[ServiceProviders.OPENAI] = ServiceProviders.OPENAI
     model: str = Field(
-        default="gpt-4o-transcribe", json_schema_extra={"examples": OPENAI_STT_MODELS}
+        default="gpt-4o-transcribe",
+        description="OpenAI transcription model.",
+        json_schema_extra={"examples": OPENAI_STT_MODELS},
     )
 
 
@@ -858,10 +943,14 @@ DOGRAH_STT_LANGUAGES = DEEPGRAM_LANGUAGES
 class DograhSTTService(BaseSTTConfiguration):
     provider: Literal[ServiceProviders.DOGRAH] = ServiceProviders.DOGRAH
     model: str = Field(
-        default="default", json_schema_extra={"examples": DOGRAH_STT_MODELS}
+        default="default",
+        description="Dograh STT tier.",
+        json_schema_extra={"examples": DOGRAH_STT_MODELS},
     )
     language: str = Field(
-        default="multi", json_schema_extra={"examples": DOGRAH_STT_LANGUAGES}
+        default="multi",
+        description="Language code; use 'multi' for auto-detect.",
+        json_schema_extra={"examples": DOGRAH_STT_LANGUAGES},
     )
 
 
@@ -873,10 +962,14 @@ SARVAM_STT_MODELS = ["saarika:v2.5", "saaras:v2"]
 class SarvamSTTConfiguration(BaseSTTConfiguration):
     provider: Literal[ServiceProviders.SARVAM] = ServiceProviders.SARVAM
     model: str = Field(
-        default="saarika:v2.5", json_schema_extra={"examples": SARVAM_STT_MODELS}
+        default="saarika:v2.5",
+        description="Sarvam STT model.",
+        json_schema_extra={"examples": SARVAM_STT_MODELS},
     )
     language: str = Field(
-        default="hi-IN", json_schema_extra={"examples": SARVAM_LANGUAGES}
+        default="hi-IN",
+        description="BCP-47 Indian-language code.",
+        json_schema_extra={"examples": SARVAM_LANGUAGES},
     )
 
 
@@ -912,10 +1005,13 @@ SPEECHMATICS_STT_LANGUAGES = [
 class SpeechmaticsSTTConfiguration(BaseSTTConfiguration):
     provider: Literal[ServiceProviders.SPEECHMATICS] = ServiceProviders.SPEECHMATICS
     model: str = Field(
-        default="enhanced", description="Operating point: standard or enhanced"
+        default="enhanced",
+        description="Speechmatics operating point: 'standard' or 'enhanced'.",
     )
     language: str = Field(
-        default="en", json_schema_extra={"examples": SPEECHMATICS_STT_LANGUAGES}
+        default="en",
+        description="ISO 639-1 language code.",
+        json_schema_extra={"examples": SPEECHMATICS_STT_LANGUAGES},
     )
 
 
@@ -931,6 +1027,7 @@ class SpeachesSTTConfiguration(BaseSTTConfiguration):
     provider: Literal[ServiceProviders.SPEACHES] = ServiceProviders.SPEACHES
     model: str = Field(
         default="Systran/faster-distil-whisper-small.en",
+        description="Whisper model identifier as served by your STT endpoint.",
         json_schema_extra={
             "examples": SPEACHES_STT_MODELS,
             "allow_custom_input": True,
@@ -938,6 +1035,7 @@ class SpeachesSTTConfiguration(BaseSTTConfiguration):
     )
     language: str = Field(
         default="en",
+        description="ISO 639-1 language code.",
         json_schema_extra={
             "examples": SPEACHES_STT_LANGUAGES,
             "allow_custom_input": True,
@@ -945,9 +1043,12 @@ class SpeachesSTTConfiguration(BaseSTTConfiguration):
     )
     base_url: str = Field(
         default="http://localhost:8000/v1",
-        description="OpenAI-compatible STT endpoint (Speaches, etc.)",
+        description="OpenAI-compatible STT endpoint (Speaches, etc.).",
+    )
+    api_key: str | list[str] | None = Field(
+        default=None,
+        description="Usually not required for self-hosted STT. Leave blank unless enforced.",
     )
-    api_key: str | list[str] | None = Field(default=None)
 
 
 ASSEMBLYAI_STT_MODELS = ["u3-rt-pro"]
@@ -959,10 +1060,12 @@ class AssemblyAISTTConfiguration(BaseSTTConfiguration):
     provider: Literal[ServiceProviders.ASSEMBLYAI] = ServiceProviders.ASSEMBLYAI
     model: str = Field(
         default="u3-rt-pro",
+        description="AssemblyAI realtime STT model.",
         json_schema_extra={"examples": ASSEMBLYAI_STT_MODELS},
     )
     language: str = Field(
         default="en",
+        description="ISO 639-1 language code.",
         json_schema_extra={"examples": ASSEMBLYAI_STT_LANGUAGES},
     )
 
@@ -1077,10 +1180,12 @@ class GladiaSTTConfiguration(BaseSTTConfiguration):
     provider: Literal[ServiceProviders.GLADIA] = ServiceProviders.GLADIA
     model: str = Field(
         default="solaria-1",
+        description="Gladia STT model.",
         json_schema_extra={"examples": GLADIA_STT_MODELS},
     )
     language: str = Field(
         default="en",
+        description="ISO 639-1 language code.",
         json_schema_extra={"examples": GLADIA_STT_LANGUAGES},
     )
 
@@ -1110,6 +1215,7 @@ class OpenAIEmbeddingsConfiguration(BaseEmbeddingsConfiguration):
     provider: Literal[ServiceProviders.OPENAI] = ServiceProviders.OPENAI
     model: str = Field(
         default="text-embedding-3-small",
+        description="OpenAI embedding model.",
         json_schema_extra={"examples": OPENAI_EMBEDDING_MODELS},
     )
 
@@ -1122,10 +1228,14 @@ class OpenRouterEmbeddingsConfiguration(BaseEmbeddingsConfiguration):
     provider: Literal[ServiceProviders.OPENROUTER] = ServiceProviders.OPENROUTER
     model: str = Field(
         default="openai/text-embedding-3-small",
+        description="OpenRouter-hosted embedding model slug.",
         json_schema_extra={"examples": OPENROUTER_EMBEDDING_MODELS},
     )
 
-    base_url: str = Field(default="https://openrouter.ai/api/v1")
+    base_url: str = Field(
+        default="https://openrouter.ai/api/v1",
+        description="Override only if proxying OpenRouter through your own gateway.",
+    )
 
 
 EmbeddingsConfig = Annotated[
diff --git a/docs/configurations/inference-providers.mdx b/docs/configurations/inference-providers.mdx
index 3fb62ba..d31dbc8 100644
--- a/docs/configurations/inference-providers.mdx
+++ b/docs/configurations/inference-providers.mdx
@@ -120,4 +120,68 @@ To use Gemini 3.1 Live with Dograh, you need a Google Gemini API key. Follow the
 
 <Note>
   When using a Realtime provider like Gemini Live, you do not need to configure separate TTS and STT services — the realtime model handles speech in and out. However, you **must** still configure an **LLM** under the LLM tab: it powers variable extraction and QA analysis, which the realtime service does not perform.
+</Note>
+
+## Gemini Live on Vertex AI
+
+If you want to run Gemini Live through your own Google Cloud project — for billing consolidation, VPC controls, regional residency, or enterprise IAM — Dograh also supports Gemini Live via **Vertex AI** as a separate provider (`google_vertex_realtime`). The default model is `google/gemini-live-2.5-flash-native-audio`.
+
+Unlike Google AI Studio (which uses a single Gemini API key), Vertex AI authenticates with a **service account** belonging to your Google Cloud project.
+
+### Prerequisites
+
+1. A Google Cloud project with billing enabled.
+2. The Vertex AI API enabled on that project:
+
+   ```bash
+   gcloud services enable aiplatform.googleapis.com --project=YOUR_PROJECT_ID
+   ```
+
+3. A service account with the **Vertex AI User** role (`roles/aiplatform.user`) on the project:
+
+   ```bash
+   gcloud projects add-iam-policy-binding YOUR_PROJECT_ID \
+     --member="serviceAccount:YOUR_SA@YOUR_PROJECT_ID.iam.gserviceaccount.com" \
+     --role="roles/aiplatform.user"
+   ```
+
+4. A **JSON** key for that service account (P12 keys are not supported).
+
+### Creating the service account key
+
+1. In the GCP Console, go to **IAM & Admin → Service Accounts**.
+2. Pick an existing service account (or create a new one).
+3. Open the **Keys** tab → **Add Key → Create new key**.
+4. Choose **JSON** as the key type and click **Create**.
+5. The key file will download to your computer — store it securely and treat it as a secret.
+
+<Note>
+  Always pick **JSON**, not P12. The Vertex AI client libraries used by Dograh only accept service-account JSON keys; P12 is a legacy format retained for older Google Workspace integrations.
+</Note>
+
+### Configuring Vertex AI Realtime in Dograh
+
+1. Go to **Model Configurations** in your Dograh dashboard.
+2. Enable the **Realtime** toggle.
+3. Under the **Realtime** section, select `google_vertex_realtime` as the provider.
+4. Fill in the fields:
+
+   | Field | What to put in |
+   |---|---|
+   | **Model** | Vertex publisher/model id, e.g. `google/gemini-live-2.5-flash-native-audio` |
+   | **Voice** | One of the built-in voices (Puck, Charon, Kore, Fenrir, Aoede) |
+   | **Language** | BCP-47 code (e.g. `en-US`) |
+   | **Project Id** | The `project_id` value from your service-account JSON |
+   | **Location** | GCP region where the model is available (e.g. `us-east4`) |
+   | **Credentials** | Paste the **entire contents** of the service-account JSON file |
+   | **API Key** | Leave blank — Vertex AI does not use API keys |
+
+5. Save the configuration.
+
+<Note>
+  Paste the whole JSON file into the **Credentials** field — including `private_key`, `client_email`, and all other entries. Don't try to extract individual fields. If `Credentials` is left blank, Dograh falls back to **Application Default Credentials (ADC)** from the host environment, which is useful when running Dograh on a GCP VM or GKE pod with an attached service account.
+</Note>
+
+<Note>
+  IAM changes can take up to ~60 seconds to propagate. If you see `Permission 'aiplatform.endpoints.predict' denied`, wait a minute and retry — or double-check that the role was granted to the same service account whose JSON you pasted.
 </Note>
\ No newline at end of file
diff --git a/pipecat b/pipecat
index f780c6d..8590e53 160000
--- a/pipecat
+++ b/pipecat
@@ -1 +1 @@
-Subproject commit f780c6de083d607adc7779109cad37f8b5a7030d
+Subproject commit 8590e5333d63eb69b78a193f9eeb2ff0584f9e9a
diff --git a/ui/src/components/ServiceConfigurationForm.tsx b/ui/src/components/ServiceConfigurationForm.tsx
index ddcf4c4..2425e06 100644
--- a/ui/src/components/ServiceConfigurationForm.tsx
+++ b/ui/src/components/ServiceConfigurationForm.tsx
@@ -13,6 +13,7 @@ import { Label } from "@/components/ui/label";
 import { Select, SelectContent, SelectItem, SelectTrigger, SelectValue } from "@/components/ui/select";
 import { Switch } from "@/components/ui/switch";
 import { Tabs, TabsContent, TabsList, TabsTrigger } from "@/components/ui/tabs";
+import { Textarea } from "@/components/ui/textarea";
 import { VoiceSelector } from "@/components/VoiceSelector";
 import { LANGUAGE_DISPLAY_NAMES } from "@/constants/languages";
 import { useUserConfig } from "@/context/UserConfigContext";
@@ -30,6 +31,7 @@ interface SchemaProperty {
     $ref?: string;
     description?: string;
     format?: string;
+    multiline?: boolean;
 }
 
 interface ProviderSchema {
@@ -501,18 +503,26 @@ export function ServiceConfigurationForm({
 
                 {currentProvider && providerSchema && configFields.length > 1 && (
                     <div className="grid grid-cols-2 gap-4">
-                        {configFields.slice(1).map((field) => (
-                            <div key={field} className="space-y-2">
-                                <Label className="capitalize">{field.replace(/_/g, ' ')}</Label>
-                                {renderField(service, field, providerSchema)}
-                            </div>
-                        ))}
+                        {configFields.slice(1).map((field) => {
+                            const fieldSchema = providerSchema.properties[field];
+                            const actualFieldSchema = fieldSchema?.$ref && providerSchema.$defs
+                                ? providerSchema.$defs[fieldSchema.$ref.split('/').pop() || '']
+                                : fieldSchema;
+                            const fullWidth = actualFieldSchema?.multiline;
+                            return (
+                                <div key={field} className={`space-y-2 ${fullWidth ? "col-span-2" : ""}`}>
+                                    <Label className="capitalize">{field.replace(/_/g, ' ')}</Label>
+                                    {renderField(service, field, providerSchema)}
+                                </div>
+                            );
+                        })}
                     </div>
                 )}
 
                 {currentProvider && providerSchema && providerSchema.properties.api_key && (
                     <div className="space-y-2">
                         <Label>{mode === 'override' ? 'API Key (leave empty to use global)' : 'API Key(s)'}</Label>
+                        {renderFieldDescription("api_key", providerSchema)}
                         {apiKeys[service].map((key, index) => (
                             <div key={index} className="flex gap-2">
                                 <Input
@@ -564,7 +574,28 @@ export function ServiceConfigurationForm({
         );
     };
 
+    const renderFieldDescription = (field: string, providerSchema: ProviderSchema) => {
+        const schema = providerSchema.properties[field];
+        if (!schema) return null;
+        const actualSchema = schema.$ref && providerSchema.$defs
+            ? providerSchema.$defs[schema.$ref.split('/').pop() || '']
+            : schema;
+        if (!actualSchema?.description) return null;
+        return (
+            <p className="text-xs text-muted-foreground">{actualSchema.description}</p>
+        );
+    };
+
     const renderField = (service: ServiceSegment, field: string, providerSchema: ProviderSchema) => {
+        return (
+            <>
+                {renderFieldInput(service, field, providerSchema)}
+                {renderFieldDescription(field, providerSchema)}
+            </>
+        );
+    };
+
+    const renderFieldInput = (service: ServiceSegment, field: string, providerSchema: ProviderSchema) => {
         const schema = providerSchema.properties[field];
         const actualSchema = schema.$ref && providerSchema.$defs
             ? providerSchema.$defs[schema.$ref.split('/').pop() || '']
@@ -699,6 +730,19 @@ export function ServiceConfigurationForm({
             );
         }
 
+        if (actualSchema?.multiline) {
+            return (
+                <Textarea
+                    rows={6}
+                    className="font-mono text-xs"
+                    placeholder={`Enter ${field}`}
+                    {...register(`${service}_${field}`, {
+                        required: service !== "embeddings" && providerSchema.required?.includes(field),
+                    })}
+                />
+            );
+        }
+
         return (
             <Input
                 type={actualSchema?.type === "number" ? "number" : "text"}