feat: add openai realtime models (#298)

* feat: add openai realtime models * chore: bump pipecat * fix: resample telephony audio for openai realtime * fix: sampling rate fix for openai realtime * chore: clean up dead code
2026-07-25 12:01:04 +02:00 · 2026-05-16 18:05:23 +05:30 · 2026-05-16 18:05:23 +05:30 · 2381a803ad
commit 2381a803ad
parent 45b00cd5d0
45 changed files with 1991 additions and 173 deletions
--- a/api/routes/tool.py
+++ b/api/routes/tool.py
@ -29,6 +29,20 @@ class ToolParameter(BaseModel):
    )


+class PresetToolParameter(BaseModel):
+    """A parameter injected by Dograh at runtime."""
+
+    name: str = Field(description="Parameter name (used as key in request body)")
+    type: str = Field(description="Parameter type: string, number, or boolean")
+    value_template: str = Field(
+        description="Fixed value or template, e.g. {{initial_context.phone_number}}"
+    )
+    required: bool = Field(
+        default=True,
+        description="Whether the parameter must resolve to a non-empty value",
+    )
+
+
 class HttpApiConfig(BaseModel):
    """Configuration for HTTP API tools."""

@ -43,6 +57,10 @@ class HttpApiConfig(BaseModel):
    parameters: Optional[List[ToolParameter]] = Field(
        default=None, description="Parameters that the tool accepts from LLM"
    )
+    preset_parameters: Optional[List[PresetToolParameter]] = Field(
+        default=None,
+        description="Parameters injected by Dograh from fixed values or workflow context templates",
+    )
    timeout_ms: Optional[int] = Field(
        default=5000, description="Request timeout in milliseconds"
    )
--- a/api/services/campaign/campaign_call_dispatcher.py
+++ b/api/services/campaign/campaign_call_dispatcher.py
@ -20,8 +20,8 @@ from api.utils.common import get_backend_endpoints
 if TYPE_CHECKING:
    # Type-only — importing api.services.telephony eagerly triggers the
    # provider package init, which can pull in this module via the routes
-    # chain and create a circular import. Runtime calls below go through
-    # ``factory.get_telephony_provider`` (lazy import inside the method).
+    # chain and create a circular import. Runtime calls below lazy-import the
+    # factory helpers inside methods instead.
    from api.services.telephony.base import TelephonyProvider


@ -31,12 +31,6 @@ class CampaignCallDispatcher:
    def __init__(self):
        self.default_concurrent_limit = int(DEFAULT_ORG_CONCURRENCY_LIMIT)

-    async def get_telephony_provider(self, organization_id: int) -> "TelephonyProvider":
-        """Get telephony provider instance for specific organization (default config)."""
-        from api.services.telephony.factory import get_default_telephony_provider
-
-        return await get_default_telephony_provider(organization_id)
-
    async def get_provider_for_campaign(self, campaign) -> "TelephonyProvider":
        """Get the telephony provider pinned to this campaign's config. Falls back
        to the org's default config for legacy campaigns whose
--- a/api/services/configuration/check_validity.py
+++ b/api/services/configuration/check_validity.py
@ -49,6 +49,7 @@ class UserConfigurationValidator:
            ServiceProviders.SPEACHES.value: self._check_speaches_api_key,
            ServiceProviders.OPENAI_REALTIME.value: self._check_openai_api_key,
            ServiceProviders.GOOGLE_REALTIME.value: self._check_google_api_key,
+            ServiceProviders.GOOGLE_VERTEX_REALTIME.value: self._check_google_vertex_realtime_api_key,
            ServiceProviders.ASSEMBLYAI.value: self._check_assemblyai_api_key,
            ServiceProviders.GLADIA.value: self._check_gladia_api_key,
            ServiceProviders.RIME.value: self._check_rime_api_key,
@ -116,6 +117,22 @@ class UserConfigurationValidator:
                return [{"model": service_name, "message": str(e)}]
            return []

+        # Vertex Realtime uses service-account credentials (or ADC) instead of api_key
+        if provider == ServiceProviders.GOOGLE_VERTEX_REALTIME.value:
+            try:
+                if not self._check_google_vertex_realtime_api_key(
+                    provider, service_config
+                ):
+                    return [
+                        {
+                            "model": service_name,
+                            "message": f"Invalid {provider} configuration",
+                        }
+                    ]
+            except ValueError as e:
+                return [{"model": service_name, "message": str(e)}]
+            return []
+
        # AWS Bedrock uses AWS credentials instead of api_key
        if provider == ServiceProviders.AWS_BEDROCK.value:
            try:
@ -216,6 +233,13 @@ class UserConfigurationValidator:
            raise ValueError("base_url is required for Speaches services")
        return True

+    def _check_google_vertex_realtime_api_key(self, model: str, service_config) -> bool:
+        if not getattr(service_config, "project_id", None):
+            raise ValueError("project_id is required for Google Vertex Realtime")
+        if not getattr(service_config, "location", None):
+            raise ValueError("location is required for Google Vertex Realtime")
+        return True
+
    def _check_aws_bedrock_api_key(self, model: str, service_config) -> bool:
        if not service_config.aws_access_key or not service_config.aws_secret_key:
            raise ValueError("AWS access key and secret key are required for Bedrock")
--- a/api/services/configuration/registry.py
+++ b/api/services/configuration/registry.py
@ -34,6 +34,7 @@ class ServiceProviders(str, Enum):
    RIME = "rime"
    OPENAI_REALTIME = "openai_realtime"
    GOOGLE_REALTIME = "google_realtime"
+    GOOGLE_VERTEX_REALTIME = "google_vertex_realtime"


 class BaseServiceConfiguration(BaseModel):
@ -53,6 +54,7 @@ class BaseServiceConfiguration(BaseModel):
        ServiceProviders.RIME,
        ServiceProviders.OPENAI_REALTIME,
        ServiceProviders.GOOGLE_REALTIME,
+        ServiceProviders.GOOGLE_VERTEX_REALTIME,
        # ServiceProviders.SARVAM,
    ]
    api_key: str | list[str]
@ -205,6 +207,7 @@ class OpenAILLMService(BaseLLMConfiguration):
    provider: Literal[ServiceProviders.OPENAI] = ServiceProviders.OPENAI
    model: str = Field(
        default="gpt-4.1",
+        description="OpenAI chat model to use.",
        json_schema_extra={"examples": OPENAI_MODELS, "allow_custom_input": True},
    )

@ -214,6 +217,7 @@ class GoogleLLMService(BaseLLMConfiguration):
    provider: Literal[ServiceProviders.GOOGLE] = ServiceProviders.GOOGLE
    model: str = Field(
        default="gemini-2.0-flash",
+        description="Gemini model on Google AI Studio (not Vertex).",
        json_schema_extra={"examples": GOOGLE_MODELS, "allow_custom_input": True},
    )

@ -223,6 +227,7 @@ class GroqLLMService(BaseLLMConfiguration):
    provider: Literal[ServiceProviders.GROQ] = ServiceProviders.GROQ
    model: str = Field(
        default="llama-3.3-70b-versatile",
+        description="Groq-hosted model identifier.",
        json_schema_extra={"examples": GROQ_MODELS, "allow_custom_input": True},
    )

@ -232,10 +237,14 @@ class OpenRouterLLMConfiguration(BaseLLMConfiguration):
    provider: Literal[ServiceProviders.OPENROUTER] = ServiceProviders.OPENROUTER
    model: str = Field(
        default="openai/gpt-4.1",
+        description="OpenRouter model slug in 'vendor/model' form.",
        json_schema_extra={"examples": OPENROUTER_MODELS, "allow_custom_input": True},
    )

-    base_url: str = Field(default="https://openrouter.ai/api/v1")
+    base_url: str = Field(
+        default="https://openrouter.ai/api/v1",
+        description="Override only if proxying OpenRouter through your own gateway.",
+    )


@register_llm
@ -243,10 +252,13 @@ class AzureLLMService(BaseLLMConfiguration):
    provider: Literal[ServiceProviders.AZURE] = ServiceProviders.AZURE
    model: str = Field(
        default="gpt-4.1-mini",
+        description="Azure deployment name (not the upstream OpenAI model id).",
        json_schema_extra={"examples": AZURE_MODELS, "allow_custom_input": True},
    )

-    endpoint: str
+    endpoint: str = Field(
+        description="Azure OpenAI resource endpoint (e.g. https://<resource>.openai.azure.com).",
+    )


@register_llm
@ -254,6 +266,7 @@ class DograhLLMService(BaseLLMConfiguration):
    provider: Literal[ServiceProviders.DOGRAH] = ServiceProviders.DOGRAH
    model: str = Field(
        default="default",
+        description="Dograh-hosted model tier.",
        json_schema_extra={"examples": DOGRAH_LLM_MODELS, "allow_custom_input": True},
    )

@ -263,12 +276,25 @@ class AWSBedrockLLMConfiguration(BaseLLMConfiguration):
    provider: Literal[ServiceProviders.AWS_BEDROCK] = ServiceProviders.AWS_BEDROCK
    model: str = Field(
        default="us.amazon.nova-pro-v1:0",
+        description="Bedrock model ID — include the region inference-profile prefix (e.g. 'us.').",
        json_schema_extra={"examples": AWS_BEDROCK_MODELS, "allow_custom_input": True},
    )
-    aws_access_key: str = Field(default="")
-    aws_secret_key: str = Field(default="")
-    aws_region: str = Field(default="us-east-1")
-    api_key: str | list[str] | None = Field(default=None)
+    aws_access_key: str = Field(
+        default="",
+        description="AWS access key ID with bedrock:InvokeModel permission.",
+    )
+    aws_secret_key: str = Field(
+        default="",
+        description="AWS secret access key paired with the access key ID.",
+    )
+    aws_region: str = Field(
+        default="us-east-1",
+        description="AWS region where the Bedrock model is available.",
+    )
+    api_key: str | list[str] | None = Field(
+        default=None,
+        description="Not used for Bedrock — authentication is via the AWS credentials above. Leave blank.",
+    )


 SPEACHES_LLM_MODELS = ["llama3", "mistral", "phi3", "qwen2", "gemma2", "deepseek-r1"]
@ -279,6 +305,7 @@ class SpeachesLLMConfiguration(BaseLLMConfiguration):
    provider: Literal[ServiceProviders.SPEACHES] = ServiceProviders.SPEACHES
    model: str = Field(
        default="llama3",
+        description="Model name as exposed by your OpenAI-compatible server.",
        json_schema_extra={
            "examples": SPEACHES_LLM_MODELS,
            "allow_custom_input": True,
@ -286,12 +313,15 @@ class SpeachesLLMConfiguration(BaseLLMConfiguration):
    )
    base_url: str = Field(
        default="http://localhost:11434/v1",
-        description="OpenAI-compatible endpoint (Ollama, vLLM, etc.)",
+        description="OpenAI-compatible endpoint (Ollama, vLLM, etc.).",
+    )
+    api_key: str | list[str] | None = Field(
+        default=None,
+        description="Usually not required for self-hosted endpoints. Leave blank unless your server enforces one.",
    )
-    api_key: str | list[str] | None = Field(default=None)


-OPENAI_REALTIME_MODELS = ["gpt-4o-realtime-preview", "gpt-4o-mini-realtime-preview"]
+OPENAI_REALTIME_MODELS = ["gpt-realtime-2"]
 OPENAI_REALTIME_VOICES = [
    "alloy",
    "ash",
@ -304,22 +334,27 @@ OPENAI_REALTIME_VOICES = [
 ]


-# @register_service(ServiceType.REALTIME)
-# class OpenAIRealtimeLLMConfiguration(BaseLLMConfiguration):
-#     provider: Literal[ServiceProviders.OPENAI_REALTIME] = (
-#         ServiceProviders.OPENAI_REALTIME
-#     )
-#     model: str = Field(
-#         default="gpt-4o-realtime-preview",
-#         json_schema_extra={
-#             "examples": OPENAI_REALTIME_MODELS,
-#             "allow_custom_input": True,
-#         },
-#     )
-#     voice: str = Field(
-#         default="alloy",
-#         json_schema_extra={"examples": OPENAI_REALTIME_VOICES},
-#     )
+@register_service(ServiceType.REALTIME)
+class OpenAIRealtimeLLMConfiguration(BaseLLMConfiguration):
+    provider: Literal[ServiceProviders.OPENAI_REALTIME] = (
+        ServiceProviders.OPENAI_REALTIME
+    )
+    model: str = Field(
+        default="gpt-realtime-2",
+        description="OpenAI realtime (speech-to-speech) model.",
+        json_schema_extra={
+            "examples": OPENAI_REALTIME_MODELS,
+            "allow_custom_input": True,
+        },
+    )
+    voice: str = Field(
+        default="alloy",
+        description="Voice the model speaks in.",
+        json_schema_extra={
+            "examples": OPENAI_REALTIME_VOICES,
+            "allow_custom_input": True,
+        },
+    )


 GOOGLE_REALTIME_MODELS = ["gemini-3.1-flash-live-preview"]
@ -360,6 +395,7 @@ class GoogleRealtimeLLMConfiguration(BaseLLMConfiguration):
    )
    model: str = Field(
        default="gemini-3.1-flash-live-preview",
+        description="Gemini Live model on Google AI Studio (not Vertex).",
        json_schema_extra={
            "examples": GOOGLE_REALTIME_MODELS,
            "allow_custom_input": True,
@ -367,6 +403,7 @@ class GoogleRealtimeLLMConfiguration(BaseLLMConfiguration):
    )
    voice: str = Field(
        default="Puck",
+        description="Voice the model speaks in.",
        json_schema_extra={
            "examples": GOOGLE_REALTIME_VOICES,
            "allow_custom_input": True,
@ -374,6 +411,7 @@ class GoogleRealtimeLLMConfiguration(BaseLLMConfiguration):
    )
    language: str = Field(
        default="en",
+        description="ISO 639-1 language code.",
        json_schema_extra={
            "examples": GOOGLE_REALTIME_LANGUAGES,
            "allow_custom_input": True,
@ -381,9 +419,68 @@ class GoogleRealtimeLLMConfiguration(BaseLLMConfiguration):
    )


+GOOGLE_VERTEX_REALTIME_MODELS = [
+    "google/gemini-live-2.5-flash-native-audio",
+]
+GOOGLE_VERTEX_REALTIME_VOICES = GOOGLE_REALTIME_VOICES
+GOOGLE_VERTEX_REALTIME_LANGUAGES = GOOGLE_REALTIME_LANGUAGES
+
+
+@register_service(ServiceType.REALTIME)
+class GoogleVertexRealtimeLLMConfiguration(BaseLLMConfiguration):
+    provider: Literal[ServiceProviders.GOOGLE_VERTEX_REALTIME] = (
+        ServiceProviders.GOOGLE_VERTEX_REALTIME
+    )
+    model: str = Field(
+        default="google/gemini-live-2.5-flash-native-audio",
+        description="Vertex AI publisher/model identifier.",
+        json_schema_extra={
+            "examples": GOOGLE_VERTEX_REALTIME_MODELS,
+            "allow_custom_input": True,
+        },
+    )
+    voice: str = Field(
+        default="Charon",
+        description="Voice the model speaks in.",
+        json_schema_extra={
+            "examples": GOOGLE_VERTEX_REALTIME_VOICES,
+            "allow_custom_input": True,
+        },
+    )
+    language: str = Field(
+        default="en",
+        description="BCP-47 language code (e.g. 'en-US').",
+        json_schema_extra={
+            "examples": GOOGLE_VERTEX_REALTIME_LANGUAGES,
+            "allow_custom_input": True,
+        },
+    )
+    project_id: str = Field(description="Google Cloud project ID for Vertex AI.")
+    location: str = Field(
+        default="us-east4",
+        description="GCP region for the Vertex AI endpoint (e.g. 'us-east4').",
+    )
+    credentials: str | None = Field(
+        default=None,
+        description=(
+            "Paste the entire service-account JSON file contents. If omitted, "
+            "falls back to Application Default Credentials (ADC)."
+        ),
+        json_schema_extra={"multiline": True},
+    )
+    api_key: str | list[str] | None = Field(
+        default=None,
+        description=(
+            "Not used for Vertex AI — authentication is via the service account "
+            "in `credentials` (or ADC). Leave blank."
+        ),
+    )
+
+
 REALTIME_PROVIDERS = {
    ServiceProviders.OPENAI_REALTIME.value,
    ServiceProviders.GOOGLE_REALTIME.value,
+    ServiceProviders.GOOGLE_VERTEX_REALTIME.value,
 }


@ -403,8 +500,9 @@ LLMConfig = Annotated[

 RealtimeConfig = Annotated[
    Union[
-        # OpenAIRealtimeLLMConfiguration,
+        OpenAIRealtimeLLMConfiguration,
        GoogleRealtimeLLMConfiguration,
+        GoogleVertexRealtimeLLMConfiguration,
    ],
    Field(discriminator="provider"),
 ]
@ -415,7 +513,10 @@ RealtimeConfig = Annotated[
@register_tts
 class DeepgramTTSConfiguration(BaseServiceConfiguration):
    provider: Literal[ServiceProviders.DEEPGRAM] = ServiceProviders.DEEPGRAM
-    voice: str = "aura-2-helena-en"
+    voice: str = Field(
+        default="aura-2-helena-en",
+        description="Deepgram voice ID (model is inferred from the 'aura-N' prefix).",
+    )

    @computed_field
    @property
@ -437,10 +538,14 @@ ELEVENLABS_TTS_MODELS = ["eleven_flash_v2_5"]
@register_tts
 class ElevenlabsTTSConfiguration(BaseServiceConfiguration):
    provider: Literal[ServiceProviders.ELEVENLABS] = ServiceProviders.ELEVENLABS
-    voice: str = "21m00Tcm4TlvDq8ikWAM"  # Rachel voice ID
-    speed: float = Field(default=1.0, ge=0.1, le=2.0, description="Speed of the voice")
+    voice: str = Field(
+        default="21m00Tcm4TlvDq8ikWAM",
+        description="ElevenLabs voice ID from your Voice Library.",
+    )
+    speed: float = Field(default=1.0, ge=0.1, le=2.0, description="Speed of the voice.")
    model: str = Field(
        default="eleven_flash_v2_5",
+        description="ElevenLabs TTS model.",
        json_schema_extra={"examples": ELEVENLABS_TTS_MODELS},
    )
    base_url: str = Field(
@ -460,9 +565,14 @@ OPENAI_TTS_MODELS = ["gpt-4o-mini-tts"]
 class OpenAITTSService(BaseTTSConfiguration):
    provider: Literal[ServiceProviders.OPENAI] = ServiceProviders.OPENAI
    model: str = Field(
-        default="gpt-4o-mini-tts", json_schema_extra={"examples": OPENAI_TTS_MODELS}
+        default="gpt-4o-mini-tts",
+        description="OpenAI TTS model.",
+        json_schema_extra={"examples": OPENAI_TTS_MODELS},
+    )
+    voice: str = Field(
+        default="alloy",
+        description="OpenAI TTS voice name.",
    )
-    voice: str = "alloy"


 DOGRAH_TTS_MODELS = ["default"]
@ -472,10 +582,15 @@ DOGRAH_TTS_MODELS = ["default"]
 class DograhTTSService(BaseTTSConfiguration):
    provider: Literal[ServiceProviders.DOGRAH] = ServiceProviders.DOGRAH
    model: str = Field(
-        default="default", json_schema_extra={"examples": DOGRAH_TTS_MODELS}
+        default="default",
+        description="Dograh TTS tier.",
+        json_schema_extra={"examples": DOGRAH_TTS_MODELS},
    )
-    voice: str = "default"
-    speed: float = Field(default=1.0, ge=0.5, le=2.0, description="Speed of the voice")
+    voice: str = Field(
+        default="default",
+        description="Voice preset.",
+    )
+    speed: float = Field(default=1.0, ge=0.5, le=2.0, description="Speed of the voice.")


 CARTESIA_TTS_MODELS = ["sonic-3"]
@ -485,15 +600,20 @@ CARTESIA_TTS_MODELS = ["sonic-3"]
 class CartesiaTTSConfiguration(BaseTTSConfiguration):
    provider: Literal[ServiceProviders.CARTESIA] = ServiceProviders.CARTESIA
    model: str = Field(
-        default="sonic-3", json_schema_extra={"examples": CARTESIA_TTS_MODELS}
+        default="sonic-3",
+        description="Cartesia TTS model.",
+        json_schema_extra={"examples": CARTESIA_TTS_MODELS},
    )
-    voice: str = Field(default="3faa81ae-d3d8-4ab1-9e44-e50e46d33c30")
-    speed: float = Field(default=1.0, ge=0.6, le=1.5, description="Speed of the voice")
+    voice: str = Field(
+        default="3faa81ae-d3d8-4ab1-9e44-e50e46d33c30",
+        description="Cartesia voice UUID from your Cartesia dashboard.",
+    )
+    speed: float = Field(default=1.0, ge=0.6, le=1.5, description="Speed of the voice.")
    volume: float = Field(
        default=1.0,
        ge=0.5,
        le=2.0,
-        description="Volume multiplier for generated speech",
+        description="Volume multiplier for generated speech.",
    )


@ -568,10 +688,13 @@ SARVAM_LANGUAGES = [
 class SarvamTTSConfiguration(BaseTTSConfiguration):
    provider: Literal[ServiceProviders.SARVAM] = ServiceProviders.SARVAM
    model: str = Field(
-        default="bulbul:v2", json_schema_extra={"examples": SARVAM_TTS_MODELS}
+        default="bulbul:v2",
+        description="Sarvam TTS model (voice list depends on this).",
+        json_schema_extra={"examples": SARVAM_TTS_MODELS},
    )
    voice: str = Field(
        default="anushka",
+        description="Sarvam voice name; must match the selected model's voice list.",
        json_schema_extra={
            "examples": SARVAM_V2_VOICES,
            "model_options": {
@ -581,7 +704,9 @@ class SarvamTTSConfiguration(BaseTTSConfiguration):
        },
    )
    language: str = Field(
-        default="hi-IN", json_schema_extra={"examples": SARVAM_LANGUAGES}
+        default="hi-IN",
+        description="BCP-47 Indian-language code (e.g. hi-IN, en-IN).",
+        json_schema_extra={"examples": SARVAM_LANGUAGES},
    )


@ -592,10 +717,12 @@ CAMB_TTS_MODELS = ["mars-flash", "mars-pro", "mars-instruct"]
 class CambTTSConfiguration(BaseTTSConfiguration):
    provider: Literal[ServiceProviders.CAMB] = ServiceProviders.CAMB
    model: str = Field(
-        default="mars-flash", json_schema_extra={"examples": CAMB_TTS_MODELS}
+        default="mars-flash",
+        description="Camb.ai TTS model.",
+        json_schema_extra={"examples": CAMB_TTS_MODELS},
    )
-    voice: str = Field(default="147320", description="Camb.ai voice ID")
-    language: str = Field(default="en-us", description="BCP-47 language code")
+    voice: str = Field(default="147320", description="Camb.ai voice ID.")
+    language: str = Field(default="en-us", description="BCP-47 language code.")


 RIME_TTS_MODELS = ["arcana", "mistv3", "mistv2", "mist"]
@ -607,17 +734,19 @@ class RimeTTSConfiguration(BaseTTSConfiguration):
    provider: Literal[ServiceProviders.RIME] = ServiceProviders.RIME
    model: str = Field(
        default="arcana",
+        description="Rime TTS model.",
        json_schema_extra={"examples": RIME_TTS_MODELS, "allow_custom_input": True},
    )
    voice: str = Field(
        default="celeste",
-        description="Rime voice ID",
+        description="Rime voice ID.",
    )
    speed: float = Field(
-        default=1.0, ge=0.5, le=2.0, description="Speech speed multiplier"
+        default=1.0, ge=0.5, le=2.0, description="Speech speed multiplier."
    )
    language: str = Field(
        default="en",
+        description="ISO 639-1 language code.",
        json_schema_extra={"examples": RIME_TTS_LANGUAGES, "allow_custom_input": True},
    )

@ -630,6 +759,7 @@ class SpeachesTTSConfiguration(BaseTTSConfiguration):
    provider: Literal[ServiceProviders.SPEACHES] = ServiceProviders.SPEACHES
    model: str = Field(
        default="kokoro",
+        description="Model name as served by your TTS endpoint (e.g. Kokoro-FastAPI).",
        json_schema_extra={
            "examples": SPEACHES_TTS_MODELS,
            "allow_custom_input": True,
@ -638,16 +768,19 @@ class SpeachesTTSConfiguration(BaseTTSConfiguration):
    voice: str = Field(
        default="af_heart",
        json_schema_extra={"allow_custom_input": True},
-        description="Voice ID for the TTS engine",
+        description="Voice ID for the TTS engine.",
    )
    base_url: str = Field(
        default="http://localhost:8000/v1",
-        description="OpenAI-compatible TTS endpoint (Kokoro-FastAPI, etc.)",
+        description="OpenAI-compatible TTS endpoint (Kokoro-FastAPI, etc.).",
    )
    speed: float = Field(
-        default=1.0, ge=0.25, le=4.0, description="Speech speed (0.25 to 4.0)"
+        default=1.0, ge=0.25, le=4.0, description="Speech speed (0.25 to 4.0)."
+    )
+    api_key: str | list[str] | None = Field(
+        default=None,
+        description="Usually not required for self-hosted TTS. Leave blank unless enforced.",
    )
-    api_key: str | list[str] | None = Field(default=None)


 TTSConfig = Annotated[
@ -758,10 +891,13 @@ DEEPGRAM_LANGUAGES = [
 class DeepgramSTTConfiguration(BaseSTTConfiguration):
    provider: Literal[ServiceProviders.DEEPGRAM] = ServiceProviders.DEEPGRAM
    model: str = Field(
-        default="nova-3-general", json_schema_extra={"examples": DEEPGRAM_STT_MODELS}
+        default="nova-3-general",
+        description="Deepgram STT model.",
+        json_schema_extra={"examples": DEEPGRAM_STT_MODELS},
    )
    language: str = Field(
        default="multi",
+        description="Language code; 'multi' enables auto-detect (Nova-3 only).",
        json_schema_extra={
            "examples": DEEPGRAM_LANGUAGES,
            "model_options": {
@ -779,7 +915,9 @@ CARTESIA_STT_MODELS = ["ink-whisper"]
 class CartesiaSTTConfiguration(BaseSTTConfiguration):
    provider: Literal[ServiceProviders.CARTESIA] = ServiceProviders.CARTESIA
    model: str = Field(
-        default="ink-whisper", json_schema_extra={"examples": CARTESIA_STT_MODELS}
+        default="ink-whisper",
+        description="Cartesia STT model.",
+        json_schema_extra={"examples": CARTESIA_STT_MODELS},
    )


@ -790,7 +928,9 @@ OPENAI_STT_MODELS = ["gpt-4o-transcribe"]
 class OpenAISTTConfiguration(BaseSTTConfiguration):
    provider: Literal[ServiceProviders.OPENAI] = ServiceProviders.OPENAI
    model: str = Field(
-        default="gpt-4o-transcribe", json_schema_extra={"examples": OPENAI_STT_MODELS}
+        default="gpt-4o-transcribe",
+        description="OpenAI transcription model.",
+        json_schema_extra={"examples": OPENAI_STT_MODELS},
    )


@ -803,10 +943,14 @@ DOGRAH_STT_LANGUAGES = DEEPGRAM_LANGUAGES
 class DograhSTTService(BaseSTTConfiguration):
    provider: Literal[ServiceProviders.DOGRAH] = ServiceProviders.DOGRAH
    model: str = Field(
-        default="default", json_schema_extra={"examples": DOGRAH_STT_MODELS}
+        default="default",
+        description="Dograh STT tier.",
+        json_schema_extra={"examples": DOGRAH_STT_MODELS},
    )
    language: str = Field(
-        default="multi", json_schema_extra={"examples": DOGRAH_STT_LANGUAGES}
+        default="multi",
+        description="Language code; use 'multi' for auto-detect.",
+        json_schema_extra={"examples": DOGRAH_STT_LANGUAGES},
    )


@ -818,10 +962,14 @@ SARVAM_STT_MODELS = ["saarika:v2.5", "saaras:v2"]
 class SarvamSTTConfiguration(BaseSTTConfiguration):
    provider: Literal[ServiceProviders.SARVAM] = ServiceProviders.SARVAM
    model: str = Field(
-        default="saarika:v2.5", json_schema_extra={"examples": SARVAM_STT_MODELS}
+        default="saarika:v2.5",
+        description="Sarvam STT model.",
+        json_schema_extra={"examples": SARVAM_STT_MODELS},
    )
    language: str = Field(
-        default="hi-IN", json_schema_extra={"examples": SARVAM_LANGUAGES}
+        default="hi-IN",
+        description="BCP-47 Indian-language code.",
+        json_schema_extra={"examples": SARVAM_LANGUAGES},
    )


@ -857,10 +1005,13 @@ SPEECHMATICS_STT_LANGUAGES = [
 class SpeechmaticsSTTConfiguration(BaseSTTConfiguration):
    provider: Literal[ServiceProviders.SPEECHMATICS] = ServiceProviders.SPEECHMATICS
    model: str = Field(
-        default="enhanced", description="Operating point: standard or enhanced"
+        default="enhanced",
+        description="Speechmatics operating point: 'standard' or 'enhanced'.",
    )
    language: str = Field(
-        default="en", json_schema_extra={"examples": SPEECHMATICS_STT_LANGUAGES}
+        default="en",
+        description="ISO 639-1 language code.",
+        json_schema_extra={"examples": SPEECHMATICS_STT_LANGUAGES},
    )


@ -876,6 +1027,7 @@ class SpeachesSTTConfiguration(BaseSTTConfiguration):
    provider: Literal[ServiceProviders.SPEACHES] = ServiceProviders.SPEACHES
    model: str = Field(
        default="Systran/faster-distil-whisper-small.en",
+        description="Whisper model identifier as served by your STT endpoint.",
        json_schema_extra={
            "examples": SPEACHES_STT_MODELS,
            "allow_custom_input": True,
@ -883,6 +1035,7 @@ class SpeachesSTTConfiguration(BaseSTTConfiguration):
    )
    language: str = Field(
        default="en",
+        description="ISO 639-1 language code.",
        json_schema_extra={
            "examples": SPEACHES_STT_LANGUAGES,
            "allow_custom_input": True,
@ -890,9 +1043,12 @@ class SpeachesSTTConfiguration(BaseSTTConfiguration):
    )
    base_url: str = Field(
        default="http://localhost:8000/v1",
-        description="OpenAI-compatible STT endpoint (Speaches, etc.)",
+        description="OpenAI-compatible STT endpoint (Speaches, etc.).",
+    )
+    api_key: str | list[str] | None = Field(
+        default=None,
+        description="Usually not required for self-hosted STT. Leave blank unless enforced.",
    )
-    api_key: str | list[str] | None = Field(default=None)


 ASSEMBLYAI_STT_MODELS = ["u3-rt-pro"]
@ -904,10 +1060,12 @@ class AssemblyAISTTConfiguration(BaseSTTConfiguration):
    provider: Literal[ServiceProviders.ASSEMBLYAI] = ServiceProviders.ASSEMBLYAI
    model: str = Field(
        default="u3-rt-pro",
+        description="AssemblyAI realtime STT model.",
        json_schema_extra={"examples": ASSEMBLYAI_STT_MODELS},
    )
    language: str = Field(
        default="en",
+        description="ISO 639-1 language code.",
        json_schema_extra={"examples": ASSEMBLYAI_STT_LANGUAGES},
    )

@ -1022,10 +1180,12 @@ class GladiaSTTConfiguration(BaseSTTConfiguration):
    provider: Literal[ServiceProviders.GLADIA] = ServiceProviders.GLADIA
    model: str = Field(
        default="solaria-1",
+        description="Gladia STT model.",
        json_schema_extra={"examples": GLADIA_STT_MODELS},
    )
    language: str = Field(
        default="en",
+        description="ISO 639-1 language code.",
        json_schema_extra={"examples": GLADIA_STT_LANGUAGES},
    )

@ -1055,6 +1215,7 @@ class OpenAIEmbeddingsConfiguration(BaseEmbeddingsConfiguration):
    provider: Literal[ServiceProviders.OPENAI] = ServiceProviders.OPENAI
    model: str = Field(
        default="text-embedding-3-small",
+        description="OpenAI embedding model.",
        json_schema_extra={"examples": OPENAI_EMBEDDING_MODELS},
    )

@ -1067,10 +1228,14 @@ class OpenRouterEmbeddingsConfiguration(BaseEmbeddingsConfiguration):
    provider: Literal[ServiceProviders.OPENROUTER] = ServiceProviders.OPENROUTER
    model: str = Field(
        default="openai/text-embedding-3-small",
+        description="OpenRouter-hosted embedding model slug.",
        json_schema_extra={"examples": OPENROUTER_EMBEDDING_MODELS},
    )

-    base_url: str = Field(default="https://openrouter.ai/api/v1")
+    base_url: str = Field(
+        default="https://openrouter.ai/api/v1",
+        description="Override only if proxying OpenRouter through your own gateway.",
+    )


 EmbeddingsConfig = Annotated[
--- a/api/services/pipecat/realtime/init.py
+++ b/api/services/pipecat/realtime/init.py
@ -0,0 +1,9 @@
+"""Dograh-specific subclasses of pipecat realtime LLM services.
+
+Each subclass wires Dograh engine integration quirks (user-mute gating,
+TTSSpeakFrame greeting trigger, node-transition handling, function-call
+deferral, etc.) onto the corresponding pipecat realtime service.
+
+The pipecat fork's services stay close to upstream — Dograh behavior lives
+here.
+"""
--- a/api/services/pipecat/realtime/gemini_live.py
+++ b/api/services/pipecat/realtime/gemini_live.py
@ -0,0 +1,237 @@
+"""Dograh subclass of pipecat's Gemini Live LLM service.
+
+Layers Dograh engine integration quirks onto upstream-pristine
+:class:`GeminiLiveLLMService`:
+
+- **Deferred connect.** Connection is held back until ``system_instruction``
+  is set via :meth:`_update_settings`, so pre-call-fetch template variables
+  land before the live session opens.
+- **Reconnect on node transitions.** Gemini Live cannot update
+  ``system_instruction`` mid-session, so a setting change triggers a
+  reconnect (deferred until the bot turn ends if currently responding).
+- **Function-call deferral.** Tool calls emitted mid-turn are queued and run
+  when the bot stops speaking, to avoid racing the turn's audio.
+- **User-mute audio gating.** ``UserMuteStarted/StoppedFrame`` from the
+  user aggregator gates whether incoming audio is forwarded to Gemini.
+- **TTSSpeakFrame as greeting trigger.** The engine queues a TTSSpeakFrame
+  to kick off the first response after node setup; the service intercepts
+  it and runs the initial-context path.
+- **Finalize-pending on transcriptions.** Marks the transcription emitted
+  immediately after VAD-stop as finalized, distinguishing it from
+  mid-turn partials.
+"""
+
+from typing import Any
+
+from loguru import logger
+
+from pipecat.frames.frames import (
+    BotStoppedSpeakingFrame,
+    Frame,
+    TranscriptionFrame,
+    TTSSpeakFrame,
+    UserMuteStartedFrame,
+    UserMuteStoppedFrame,
+)
+from pipecat.processors.aggregators.llm_context import LLMContext
+from pipecat.processors.frame_processor import FrameDirection
+from pipecat.services.google.gemini_live.llm import GeminiLiveLLMService
+from pipecat.services.llm_service import FunctionCallFromLLM
+from pipecat.utils.time import time_now_iso8601
+from pipecat.utils.tracing.service_decorators import traced_gemini_live
+
+
+class DograhGeminiLiveLLMService(GeminiLiveLLMService):
+    """Gemini Live with Dograh engine integration quirks. See module docstring."""
+
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)
+        # User-mute state, driven by broadcast UserMute{Started,Stopped}Frames.
+        # Audio is not forwarded to Gemini while muted.
+        self._user_is_muted: bool = False
+        # Guards initial-response triggering against double-firing across the
+        # initial TTSSpeakFrame and any LLMContextFrame that may arrive.
+        self._handled_initial_context: bool = False
+        # When a system_instruction change arrives mid-bot-turn, the reconnect
+        # is queued and drained when the turn ends.
+        self._reconnect_pending: bool = False
+        # Function calls emitted by Gemini mid-bot-turn are deferred here and
+        # invoked when the turn ends, so they don't race the turn's audio.
+        self._pending_function_calls: list[FunctionCallFromLLM] = []
+        # Tracks whether the next transcription to arrive should be marked as
+        # the finalized transcription for the current user turn.
+        self._finalize_pending: bool = False
+
+    # ------------------------------------------------------------------
+    # Hooks from upstream GeminiLiveLLMService
+    # ------------------------------------------------------------------
+
+    def _should_connect_on_start(self) -> bool:
+        # Hold the connection until the engine sets a system_instruction. This
+        # lets pre-call fetch populate template variables first.
+        return bool(self._settings.system_instruction)
+
+    async def _handle_changed_settings(self, changed: dict[str, Any]) -> set[str]:
+        if "system_instruction" not in changed:
+            return set()
+        if not self._session:
+            # First-time setting after deferred-connect.
+            await self._connect()
+        elif self._bot_is_responding:
+            # Bot is mid-turn — drain the reconnect when it ends so we don't
+            # cut the bot off mid-utterance.
+            self._reconnect_pending = True
+        else:
+            await self._reconnect()
+        return {"system_instruction"}
+
+    async def _run_or_defer_function_calls(
+        self, function_calls_llm: list[FunctionCallFromLLM]
+    ):
+        if self._bot_is_responding:
+            # Latest batch wins; Gemini emits tool calls as one batch per
+            # tool_call message, so this overwrite is intentional.
+            self._pending_function_calls = function_calls_llm
+            logger.debug(
+                f"{self}: deferring {len(function_calls_llm)} function call(s) "
+                "until bot turn ends"
+            )
+            return
+        await super()._run_or_defer_function_calls(function_calls_llm)
+
+    # ------------------------------------------------------------------
+    # State-transition side effects
+    # ------------------------------------------------------------------
+
+    async def _set_bot_is_responding(self, responding: bool):
+        was_responding = self._bot_is_responding
+        await super()._set_bot_is_responding(responding)
+        if was_responding and not responding:
+            await self._run_pending_function_calls()
+            if self._reconnect_pending:
+                self._reconnect_pending = False
+                await self._reconnect()
+
+    async def _run_pending_function_calls(self):
+        """Run any function calls deferred during the bot's last turn."""
+        if not self._pending_function_calls:
+            return
+        fcs = self._pending_function_calls
+        self._pending_function_calls = []
+        logger.debug(
+            f"{self}: executing {len(fcs)} deferred function call(s) "
+            "after bot turn ended"
+        )
+        await self.run_function_calls(fcs)
+
+    # ------------------------------------------------------------------
+    # Frame handling: mute, TTSSpeakFrame, BotStoppedSpeakingFrame flush
+    # ------------------------------------------------------------------
+
+    async def process_frame(self, frame: Frame, direction: FrameDirection):
+        if isinstance(frame, UserMuteStartedFrame):
+            self._user_is_muted = True
+            await self.push_frame(frame, direction)
+            return
+        if isinstance(frame, UserMuteStoppedFrame):
+            self._user_is_muted = False
+            await self.push_frame(frame, direction)
+            return
+        if isinstance(frame, TTSSpeakFrame):
+            # Greeting trigger: the engine queues a TTSSpeakFrame to start the
+            # bot's first turn after node setup. Gemini Live renders its own
+            # audio, so we don't pass the frame through — we re-enter
+            # _handle_context to kick off the initial response.
+            if not self._handled_initial_context:
+                await self._handle_context(self._context)
+            else:
+                logger.warning(
+                    f"{self}: TTSSpeakFrame after initial context already "
+                    "handled — Gemini Live owns audio generation, ignoring"
+                )
+            return
+        if isinstance(frame, BotStoppedSpeakingFrame):
+            # Belt-and-suspenders: the main drain happens in
+            # _set_bot_is_responding(False), but if Gemini delays turn_complete
+            # past the audible end of the turn, flushing here ensures pending
+            # function calls fire promptly.
+            await self._run_pending_function_calls()
+            # Fall through to super for the actual push.
+        await super().process_frame(frame, direction)
+
+    async def _send_user_audio(self, frame):
+        if self._user_is_muted:
+            return
+        await super()._send_user_audio(frame)
+
+    # ------------------------------------------------------------------
+    # Context lifecycle: Dograh pre-populates self._context via the engine,
+    # so upstream's "first arrival === self._context is None" check doesn't
+    # work. We gate on _handled_initial_context instead and skip the
+    # init-instruction reconciliation (Dograh updates system_instruction at
+    # runtime via _update_settings, not via init).
+    # ------------------------------------------------------------------
+
+    async def _handle_context(self, context: LLMContext):
+        if not self._handled_initial_context:
+            self._handled_initial_context = True
+            self._context = context
+            await self._create_initial_response()
+        else:
+            self._context = context
+            await self._process_completed_function_calls(send_new_results=True)
+
+    # ------------------------------------------------------------------
+    # Session lifecycle: drop upstream's automatic reconnect-seed and
+    # initial-context-seed paths. The TTSSpeakFrame trigger and the
+    # function-call-result LLMContextFrame are the only paths that should
+    # kick off bot turns in the Dograh flow.
+    # ------------------------------------------------------------------
+
+    @traced_gemini_live(operation="llm_setup")
+    async def _handle_session_ready(self, session):
+        logger.debug(
+            f"In _handle_session_ready self._run_llm_when_session_ready: {self._run_llm_when_session_ready}"
+        )
+        self._session = session
+        self._ready_for_realtime_input = True
+        if self._run_llm_when_session_ready:
+            # Context arrived before session was ready — fulfil the queued
+            # initial response now.
+            self._run_llm_when_session_ready = False
+            await self._create_initial_response()
+        await self._drain_pending_tool_results()
+        # Otherwise: no automatic seed. Reconnect after a session-resumption
+        # update relies on the server-side restored state; reconnects without
+        # a handle (e.g. node transitions before any handle was issued) are
+        # followed by a function-call-result LLMContextFrame which feeds the
+        # updated-context branch in _handle_context.
+
+    # ------------------------------------------------------------------
+    # Transcription: broadcast (so downstream voicemail detector and
+    # logs buffer both see it) and set finalized= for turn-boundary
+    # semantics.
+    # ------------------------------------------------------------------
+
+    async def _handle_user_started_speaking(self, frame):
+        await super()._handle_user_started_speaking(frame)
+        # A new VAD start invalidates any pending finalize from a prior stop
+        # that hasn't been paired with a transcription yet.
+        self._finalize_pending = False
+
+    async def _handle_user_stopped_speaking(self, frame):
+        await super()._handle_user_stopped_speaking(frame)
+        self._finalize_pending = True
+
+    async def _push_user_transcription(self, text: str, result=None):
+        await self._handle_user_transcription(text, True, self._settings.language)
+        finalized = self._finalize_pending
+        self._finalize_pending = False
+        await self.broadcast_frame(
+            TranscriptionFrame,
+            text=text,
+            user_id="",
+            timestamp=time_now_iso8601(),
+            result=result,
+            finalized=finalized,
+        )
--- a/api/services/pipecat/realtime/gemini_live_vertex.py
+++ b/api/services/pipecat/realtime/gemini_live_vertex.py
@ -0,0 +1,42 @@
+"""Dograh subclass of pipecat's Gemini Live Vertex AI LLM service.
+
+Diamond inheritance: combines the Dograh engine-integration overrides from
+:class:`DograhGeminiLiveLLMService` with the Vertex-specific tweaks from
+upstream's :class:`GeminiLiveVertexLLMService` (no history config,
+``NON_BLOCKING`` tools disabled, service-account credentials).
+
+MRO::
+
+    DograhGeminiLiveVertexLLMService
+      -> DograhGeminiLiveLLMService
+      -> GeminiLiveVertexLLMService
+      -> GeminiLiveLLMService
+      -> LLMService
+      -> ...
+"""
+
+from api.services.pipecat.realtime.gemini_live import DograhGeminiLiveLLMService
+from pipecat.services.google.gemini_live.vertex.llm import (
+    GeminiLiveVertexLLMService,
+)
+
+
+class DograhGeminiLiveVertexLLMService(
+    DograhGeminiLiveLLMService,
+    GeminiLiveVertexLLMService,
+):
+    """Vertex AI variant of Gemini Live with Dograh integration quirks."""
+
+    pass
+
+
+# Guard against MRO regressions: a future refactor that flips inheritance
+# order or breaks the diamond would silently bypass the Dograh overrides.
+_mro = DograhGeminiLiveVertexLLMService.__mro__
+assert _mro[1] is DograhGeminiLiveLLMService, (
+    f"Expected DograhGeminiLiveLLMService at MRO[1], got {_mro[1]}"
+)
+assert _mro[2] is GeminiLiveVertexLLMService, (
+    f"Expected GeminiLiveVertexLLMService at MRO[2], got {_mro[2]}"
+)
+del _mro
--- a/api/services/pipecat/realtime/openai_realtime.py
+++ b/api/services/pipecat/realtime/openai_realtime.py
@ -0,0 +1,274 @@
+"""Dograh subclass of pipecat's OpenAI Realtime LLM service.
+
+Layers Dograh engine integration quirks onto upstream-pristine
+:class:`OpenAIRealtimeLLMService`. Substantially smaller than the Gemini
+subclass because OpenAI Realtime supports runtime ``session.update`` for
+both ``system_instruction`` and tools — no reconnect/defer-tool-call
+machinery needed.
+
+Adds:
+
+- **User-mute audio gating** via ``UserMuteStarted/StoppedFrame``.
+- **TTSSpeakFrame as initial-response trigger** so the engine's greeting
+  flow kicks off the bot's first response.
+- **One-off LLMMessagesAppendFrame handling** for ephemeral realtime prompts
+  like user-idle checks, without mutating Dograh's local ``LLMContext``.
+- **finalized=True on TranscriptionFrame** for parity with the Gemini
+  service (every OpenAI transcription via the ``completed`` event is
+  final by construction).
+"""
+
+import json
+from typing import Any
+
+from loguru import logger
+
+from pipecat.frames.frames import (
+    BotStartedSpeakingFrame,
+    BotStoppedSpeakingFrame,
+    Frame,
+    LLMFullResponseStartFrame,
+    LLMMessagesAppendFrame,
+    TranscriptionFrame,
+    TTSSpeakFrame,
+    UserMuteStartedFrame,
+    UserMuteStoppedFrame,
+)
+from pipecat.processors.aggregators.llm_context import LLMContext
+from pipecat.processors.frame_processor import FrameDirection
+from pipecat.services.llm_service import FunctionCallFromLLM
+from pipecat.services.openai.realtime import events
+from pipecat.services.openai.realtime.llm import OpenAIRealtimeLLMService
+from pipecat.transcriptions.language import Language
+from pipecat.utils.time import time_now_iso8601
+
+
+class DograhOpenAIRealtimeLLMService(OpenAIRealtimeLLMService):
+    """OpenAI Realtime with Dograh engine integration quirks. See module docstring."""
+
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)
+        self._user_is_muted: bool = False
+        # Dograh pre-populates self._context via the engine before the first
+        # LLMContextFrame arrives, so upstream's "first arrival means
+        # self._context is None" check no longer works.
+        self._handled_initial_context: bool = False
+        # Track bot speech locally so tool calls can be deferred until the bot
+        # has finished speaking, matching Dograh's Gemini Live behavior.
+        self._bot_is_speaking: bool = False
+        self._deferred_function_calls: list[FunctionCallFromLLM] = []
+
+    # ------------------------------------------------------------------
+    # Frame handling: mute, TTSSpeakFrame as greeting trigger
+    # ------------------------------------------------------------------
+
+    async def process_frame(self, frame: Frame, direction: FrameDirection):
+        if isinstance(frame, UserMuteStartedFrame):
+            self._user_is_muted = True
+            await self.push_frame(frame, direction)
+            return
+        if isinstance(frame, UserMuteStoppedFrame):
+            self._user_is_muted = False
+            await self.push_frame(frame, direction)
+            return
+        if isinstance(frame, TTSSpeakFrame):
+            # Greeting trigger: the engine queues a TTSSpeakFrame after node
+            # setup. OpenAI Realtime renders its own audio, so we don't pass
+            # the frame to TTS. Route through _handle_context so the initial
+            # response and later tool-result turns share the same context
+            # lifecycle even when Dograh has already pre-populated self._context.
+            if not self._handled_initial_context:
+                await self._handle_context(self._context)
+            else:
+                logger.warning(
+                    f"{self}: TTSSpeakFrame after initial context already "
+                    "handled — OpenAI Realtime owns audio generation, ignoring"
+                )
+            # Don't forward the frame; the audio path is owned by the realtime
+            # service itself.
+            return
+        if isinstance(frame, LLMMessagesAppendFrame):
+            await self._handle_messages_append(frame)
+            return
+        if isinstance(frame, BotStartedSpeakingFrame):
+            self._bot_is_speaking = True
+        elif isinstance(frame, BotStoppedSpeakingFrame):
+            self._bot_is_speaking = False
+            await self._run_pending_function_calls()
+        await super().process_frame(frame, direction)
+
+    async def _handle_messages_append(self, frame: LLMMessagesAppendFrame):
+        """Consume a one-off append frame without mutating the local LLMContext."""
+        if self._disconnecting:
+            return
+
+        if not self._api_session_ready:
+            if frame.run_llm:
+                logger.debug(
+                    f"{self}: LLMMessagesAppendFrame received before session ready; "
+                    "deferring response until the session is initialized"
+                )
+                self._run_llm_when_api_session_ready = True
+            return
+
+        appended_any = False
+        for message in frame.messages:
+            item = self._message_to_conversation_item(message)
+            if item is None:
+                continue
+            evt = events.ConversationItemCreateEvent(item=item)
+            self._messages_added_manually[evt.item.id] = True
+            await self.send_client_event(evt)
+            appended_any = True
+
+        if frame.run_llm and appended_any:
+            await self._send_manual_response_create()
+
+    async def _handle_context(self, context: LLMContext):
+        if not self._handled_initial_context:
+            if context is None:
+                logger.warning(
+                    f"{self}: received initial context trigger before context was set"
+                )
+                return
+            self._handled_initial_context = True
+            self._context = context
+            await self._create_response()
+        else:
+            self._context = context
+            await self._process_completed_function_calls(send_new_results=True)
+
+    async def _send_user_audio(self, frame):
+        if self._user_is_muted:
+            return
+        await super()._send_user_audio(frame)
+
+    def _message_to_conversation_item(
+        self, message: dict[str, Any]
+    ) -> events.ConversationItem | None:
+        if not isinstance(message, dict):
+            logger.warning(
+                f"{self}: skipping unsupported appended message payload {message!r}"
+            )
+            return None
+
+        role = message.get("role")
+        if role not in {"user", "system", "developer"}:
+            logger.warning(
+                f"{self}: skipping unsupported appended message role {role!r}"
+            )
+            return None
+
+        text = self._extract_text_content(message.get("content"))
+        if not text:
+            logger.warning(
+                f"{self}: skipping appended message with unsupported content {message!r}"
+            )
+            return None
+
+        item_role = "system" if role in {"system", "developer"} else "user"
+        return events.ConversationItem(
+            type="message",
+            role=item_role,
+            content=[events.ItemContent(type="input_text", text=text)],
+        )
+
+    @staticmethod
+    def _extract_text_content(content: Any) -> str | None:
+        if isinstance(content, str):
+            return content
+        if isinstance(content, list):
+            parts: list[str] = []
+            for part in content:
+                if not isinstance(part, dict):
+                    return None
+                if part.get("type") != "text":
+                    return None
+                text = part.get("text")
+                if not isinstance(text, str):
+                    return None
+                parts.append(text)
+            return "\n".join(parts) if parts else None
+        return None
+
+    async def _send_manual_response_create(self):
+        """Trigger inference after manually appending conversation items."""
+        await self.push_frame(LLMFullResponseStartFrame())
+        await self.start_processing_metrics()
+        await self.start_ttfb_metrics()
+        await self.send_client_event(
+            events.ResponseCreateEvent(
+                response=events.ResponseProperties(
+                    output_modalities=self._get_enabled_modalities()
+                )
+            )
+        )
+
+    async def _run_pending_function_calls(self):
+        if not self._deferred_function_calls:
+            return
+        function_calls = self._deferred_function_calls
+        self._deferred_function_calls = []
+        logger.debug(
+            f"{self}: executing {len(function_calls)} deferred function call(s) "
+            "after bot turn ended"
+        )
+        await self.run_function_calls(function_calls)
+
+    async def _handle_evt_function_call_arguments_done(self, evt):
+        """Process or defer tool calls until the bot finishes speaking."""
+        try:
+            args = json.loads(evt.arguments)
+
+            function_call_item = self._pending_function_calls.get(evt.call_id)
+            if function_call_item:
+                del self._pending_function_calls[evt.call_id]
+
+                function_calls = [
+                    FunctionCallFromLLM(
+                        context=self._context,
+                        tool_call_id=evt.call_id,
+                        function_name=function_call_item.name,
+                        arguments=args,
+                    )
+                ]
+
+                if self._bot_is_speaking:
+                    self._deferred_function_calls.extend(function_calls)
+                    logger.debug(
+                        f"{self}: deferring function call {function_call_item.name} "
+                        "until bot stops speaking"
+                    )
+                else:
+                    await self.run_function_calls(function_calls)
+                    logger.debug(f"Processed function call: {function_call_item.name}")
+            else:
+                logger.warning(
+                    f"No tracked function call found for call_id: {evt.call_id}"
+                )
+                logger.warning(
+                    f"Available pending calls: {list(self._pending_function_calls.keys())}"
+                )
+
+        except Exception as e:
+            logger.error(f"Failed to process function call arguments: {e}")
+
+    # ------------------------------------------------------------------
+    # Transcription: broadcast with finalized=True for parity with the
+    # Gemini service (consumers that check `finalized` should see True
+    # for every completed-transcription event from OpenAI).
+    # ------------------------------------------------------------------
+
+    async def handle_evt_input_audio_transcription_completed(self, evt):
+        await self._call_event_handler(
+            "on_conversation_item_updated", evt.item_id, None
+        )
+        await self.broadcast_frame(
+            TranscriptionFrame,
+            text=evt.transcript,
+            user_id="",
+            timestamp=time_now_iso8601(),
+            result=evt,
+            finalized=True,
+        )
+        await self._handle_user_transcription(evt.transcript, True, Language.EN)
--- a/api/services/pipecat/run_pipeline.py
+++ b/api/services/pipecat/run_pipeline.py
@ -86,6 +86,43 @@ from pipecat.utils.run_context import set_current_org_id, set_current_run_id
 ensure_tracing()


+def _create_realtime_user_turn_config(provider: str):
+    """Return user turn strategies and optional local VAD for realtime providers."""
+    if provider in {
+        ServiceProviders.GOOGLE_REALTIME.value,
+        ServiceProviders.GOOGLE_VERTEX_REALTIME.value,
+    }:
+        # Let Gemini Live own barge-in via its server-side VAD, but keep local
+        # Silero VAD for early user-turn start and speaking-state tracking.
+        return (
+            UserTurnStrategies(
+                start=[VADUserTurnStartStrategy(enable_interruptions=False)],
+                stop=[SpeechTimeoutUserTurnStopStrategy()],
+            ),
+            SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
+        )
+
+    if provider == ServiceProviders.OPENAI_REALTIME.value:
+        # OpenAI Realtime already emits speaking-state frames and interruption
+        # events from the provider, so the aggregator should follow those
+        # external signals rather than run its own local VAD.
+        return (
+            UserTurnStrategies(
+                start=[ExternalUserTurnStartStrategy()],
+                stop=[ExternalUserTurnStopStrategy()],
+            ),
+            None,
+        )
+
+    return (
+        UserTurnStrategies(
+            start=[VADUserTurnStartStrategy()],
+            stop=[SpeechTimeoutUserTurnStopStrategy()],
+        ),
+        SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
+    )
+
+
 async def run_pipeline_telephony(
    websocket,
    *,
@ -138,6 +175,20 @@ async def run_pipeline_telephony(
            "telephony_configuration_id"
        )

+    # Resolve effective user config here so the transport can tune its
+    # bot-stopped-speaking fallback based on is_realtime; pass the resolved
+    # values into _run_pipeline so it doesn't fetch them again.
+    from api.services.configuration.resolve import resolve_effective_config
+
+    user_config = await db_client.get_user_configurations(user_id)
+    run_configs = (
+        (workflow_run.definition.workflow_configurations or {}) if workflow_run else {}
+    )
+    user_config = resolve_effective_config(
+        user_config, run_configs.get("model_overrides")
+    )
+    is_realtime = bool(user_config.is_realtime and user_config.realtime is not None)
+
    spec = telephony_registry.get(provider_name)
    audio_config = create_audio_config(provider_name)

@ -148,6 +199,7 @@ async def run_pipeline_telephony(
        workflow.organization_id,
        ambient_noise_config=ambient_noise_config,
        telephony_configuration_id=telephony_configuration_id,
+        is_realtime=is_realtime,
        **transport_kwargs,
    )

@ -158,6 +210,8 @@ async def run_pipeline_telephony(
            workflow_run_id,
            user_id,
            audio_config=audio_config,
+            workflow_run=workflow_run,
+            resolved_user_config=user_config,
        )
    except Exception as e:
        logger.error(
@ -198,11 +252,27 @@ async def run_pipeline_smallwebrtc(
    # Create audio configuration for WebRTC
    audio_config = create_audio_config(WorkflowRunMode.SMALLWEBRTC.value)

+    # Resolve workflow_run + effective user_config here so the transport can
+    # tune its bot-stopped-speaking fallback based on is_realtime. _run_pipeline
+    # reuses these via kwargs so we don't fetch twice.
+    from api.services.configuration.resolve import resolve_effective_config
+
+    workflow_run = await db_client.get_workflow_run(workflow_run_id, user_id)
+    user_config = await db_client.get_user_configurations(user_id)
+    run_configs = (
+        (workflow_run.definition.workflow_configurations or {}) if workflow_run else {}
+    )
+    user_config = resolve_effective_config(
+        user_config, run_configs.get("model_overrides")
+    )
+    is_realtime = bool(user_config.is_realtime and user_config.realtime is not None)
+
    transport = await create_webrtc_transport(
        webrtc_connection,
        workflow_run_id,
        audio_config,
        ambient_noise_config,
+        is_realtime=is_realtime,
    )
    await _run_pipeline(
        transport,
@ -212,6 +282,8 @@ async def run_pipeline_smallwebrtc(
        call_context_vars=call_context_vars,
        audio_config=audio_config,
        user_provider_id=user_provider_id,
+        workflow_run=workflow_run,
+        resolved_user_config=user_config,
    )


@ -223,6 +295,8 @@ async def _run_pipeline(
    call_context_vars: dict = {},
    audio_config: AudioConfig = None,
    user_provider_id: str | None = None,
+    workflow_run=None,
+    resolved_user_config=None,
 ) -> None:
    """
    Run the pipeline with the given transport and configuration
@ -232,9 +306,12 @@ async def _run_pipeline(
        workflow_id: The ID of the workflow
        workflow_run_id: The ID of the workflow run
        user_id: The ID of the user
-        mode: The mode of the pipeline (twilio or smallwebrtc)
+        workflow_run: Pre-fetched workflow run row. Fetched here if None.
+        resolved_user_config: User configuration with model_overrides already
+            applied. Fetched and resolved here if None.
    """
-    workflow_run = await db_client.get_workflow_run(workflow_run_id, user_id)
+    if workflow_run is None:
+        workflow_run = await db_client.get_workflow_run(workflow_run_id, user_id)

    # If the workflow run is already completed, we don't need to run it again
    if workflow_run.is_completed:
@ -246,9 +323,6 @@ async def _run_pipeline(
    if call_context_vars:
        merged_call_context_vars = {**merged_call_context_vars, **call_context_vars}

-    # Get user configuration
-    user_config = await db_client.get_user_configurations(user_id)
-
    # Get workflow for metadata (name, organization_id, call_disposition_codes)
    workflow = await db_client.get_workflow(workflow_id, user_id)
    if not workflow:
@ -286,11 +360,17 @@ async def _run_pipeline(
                    term.strip() for term in dictionary.split(",") if term.strip()
                ]

-    # Resolve model overrides from the version onto global user config
-    from api.services.configuration.resolve import resolve_effective_config
+    # Resolve model overrides from the version onto global user config (skip
+    # when the caller already resolved it).
+    if resolved_user_config is None:
+        from api.services.configuration.resolve import resolve_effective_config

-    model_overrides = run_configs.get("model_overrides")
-    user_config = resolve_effective_config(user_config, model_overrides)
+        user_config = await db_client.get_user_configurations(user_id)
+        user_config = resolve_effective_config(
+            user_config, run_configs.get("model_overrides")
+        )
+    else:
+        user_config = resolved_user_config

    # Detect realtime mode (speech-to-speech services like OpenAI Realtime, Gemini Live)
    is_realtime = user_config.is_realtime and user_config.realtime is not None
@ -453,23 +533,20 @@ async def _run_pipeline(
        correct_aggregation_callback=engine.create_aggregation_correction_callback(),
    )

+    user_mute_strategies = [
+        MuteUntilFirstBotCompleteUserMuteStrategy(),
+        FunctionCallUserMuteStrategy(),
+        CallbackUserMuteStrategy(should_mute_callback=engine.should_mute_user),
+    ]
+    user_vad_analyzer = SileroVADAnalyzer(params=VADParams(stop_secs=0.2))
+
    # Configure turn strategies based on STT provider, model, and workflow configuration
    if is_realtime:
-        # Realtime services do server-side turn detection for response generation,
-        # but we still need a client-side stop strategy so the user aggregator emits
-        # UserStoppedSpeakingFrame. Without it, downstream consumers (e.g. voicemail
-        # detector) and Gemini Live's _finalize_pending flag never see a turn end.
-        user_turn_strategies = UserTurnStrategies(
-            start=[VADUserTurnStartStrategy()],
-            stop=[SpeechTimeoutUserTurnStopStrategy()],
+        # Realtime services still need user-turn tracking even when the model
+        # itself owns speech generation and interruption behavior.
+        user_turn_strategies, user_vad_analyzer = _create_realtime_user_turn_config(
+            user_config.realtime.provider
        )
-
-        # Lets not start the pipeline as muted for Realtime
-        # - CallbackUserMuteStrategy: mutes based on engine's _mute_pipeline state
-        user_mute_strategies = [
-            FunctionCallUserMuteStrategy(),
-            CallbackUserMuteStrategy(should_mute_callback=engine.should_mute_user),
-        ]
    else:
        # Deepgram Flux uses external turn detection (VAD + External start/stop)
        # Other models use configurable turn detection strategy
@ -510,18 +587,11 @@ async def _run_pipeline(
                stop=[SpeechTimeoutUserTurnStopStrategy()],
            )

-        # - CallbackUserMuteStrategy: mutes based on engine's _mute_pipeline state
-        user_mute_strategies = [
-            MuteUntilFirstBotCompleteUserMuteStrategy(),
-            FunctionCallUserMuteStrategy(),
-            CallbackUserMuteStrategy(should_mute_callback=engine.should_mute_user),
-        ]
-
    user_params = LLMUserAggregatorParams(
        user_turn_strategies=user_turn_strategies,
        user_mute_strategies=user_mute_strategies,
        user_idle_timeout=max_user_idle_timeout,
-        vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
+        vad_analyzer=user_vad_analyzer,
    )
    context_aggregator = LLMContextAggregatorPair(
        context, assistant_params=assistant_params, user_params=user_params
@ -562,15 +632,14 @@ async def _run_pipeline(
    )
    engine.set_fetch_recording_audio(fetch_audio)

-    # Voicemail detection works in both modes. In realtime mode the detector sits
-    # after the realtime LLM and consumes the TranscriptionFrames it broadcasts;
-    # the LLM gate / TTS gate are not used (the realtime LLM responds to audio
-    # directly, not LLMContextFrames), so on detection we rely on
-    # end_call_with_reason to drop the call.
    voicemail_config = (workflow.workflow_configurations or {}).get(
        "voicemail_detection", {}
    )
-    if voicemail_config.get("enabled", False):
+    if is_realtime and voicemail_config.get("enabled", False):
+        logger.info(
+            f"Disabling voicemail detection for realtime workflow run {workflow_run_id}"
+        )
+    if voicemail_config.get("enabled", False) and not is_realtime:
        logger.info(f"Voicemail detection enabled for workflow run {workflow_run_id}")
        # Create a separate LLM instance for the voicemail sub-pipeline
        # (can't share with main pipeline as it would mess up frame linking)
--- a/api/services/pipecat/service_factory.py
+++ b/api/services/pipecat/service_factory.py
@ -493,6 +493,9 @@ def create_realtime_llm_service(user_config, audio_config: "AudioConfig"):
    )

    if provider == ServiceProviders.OPENAI_REALTIME.value:
+        from api.services.pipecat.realtime.openai_realtime import (
+            DograhOpenAIRealtimeLLMService,
+        )
        from pipecat.services.openai.realtime.events import (
            AudioConfiguration,
            AudioInput,
@ -500,11 +503,10 @@ def create_realtime_llm_service(user_config, audio_config: "AudioConfig"):
            InputAudioTranscription,
            SessionProperties,
        )
-        from pipecat.services.openai.realtime.llm import OpenAIRealtimeLLMService

-        return OpenAIRealtimeLLMService(
+        return DograhOpenAIRealtimeLLMService(
            api_key=api_key,
-            settings=OpenAIRealtimeLLMService.Settings(
+            settings=DograhOpenAIRealtimeLLMService.Settings(
                model=model,
                session_properties=SessionProperties(
                    audio=AudioConfiguration(
@ -519,7 +521,9 @@ def create_realtime_llm_service(user_config, audio_config: "AudioConfig"):
            ),
        )
    elif provider == ServiceProviders.GOOGLE_REALTIME.value:
-        from pipecat.services.google.gemini_live.llm import GeminiLiveLLMService
+        from api.services.pipecat.realtime.gemini_live import (
+            DograhGeminiLiveLLMService,
+        )

        # Gemini Live enables input/output audio transcription by default
        # in its _connect() method — no need to configure it explicitly.
@ -529,9 +533,30 @@ def create_realtime_llm_service(user_config, audio_config: "AudioConfig"):
        }
        if language:
            settings_kwargs["language"] = language
-        return GeminiLiveLLMService(
+        return DograhGeminiLiveLLMService(
            api_key=api_key,
-            settings=GeminiLiveLLMService.Settings(**settings_kwargs),
+            settings=DograhGeminiLiveLLMService.Settings(**settings_kwargs),
+        )
+    elif provider == ServiceProviders.GOOGLE_VERTEX_REALTIME.value:
+        from api.services.pipecat.realtime.gemini_live_vertex import (
+            DograhGeminiLiveVertexLLMService,
+        )
+
+        project_id = getattr(realtime_config, "project_id", None)
+        location = getattr(realtime_config, "location", None) or "us-east4"
+        credentials = getattr(realtime_config, "credentials", None)
+
+        settings_kwargs = {
+            "model": model,
+            "voice": voice or "Charon",
+        }
+        if language:
+            settings_kwargs["language"] = language
+        return DograhGeminiLiveVertexLLMService(
+            credentials=credentials,
+            project_id=project_id,
+            location=location,
+            settings=DograhGeminiLiveVertexLLMService.Settings(**settings_kwargs),
        )
    else:
        raise HTTPException(
--- a/api/services/pipecat/transport_params.py
+++ b/api/services/pipecat/transport_params.py
@ -0,0 +1,25 @@
+"""Shared helpers for tuning pipecat ``TransportParams`` per run mode.
+
+These live outside ``transport_setup.py`` (which is non-telephony only) so
+that both the WebRTC factory there and the telephony provider factories
+under ``api.services.telephony.providers/<name>/transport.py`` can call
+into the same place.
+"""
+
+# Realtime (speech-to-speech) LLMs don't emit ``TTSStoppedFrame``, so the
+# bot-stopped-speaking signal relies on the output-queue-drained fallback.
+# The default 3s tail leaves a long gap before the assistant aggregator
+# closes its turn; 0.5s keeps the conversation snappy without cutting into
+# the bot's own audio (audio chunks arrive far more frequently than this).
+REALTIME_BOT_VAD_STOP_SECS = 0.5
+
+
+def realtime_param_overrides(is_realtime: bool) -> dict:
+    """Return kwargs to splat into ``TransportParams`` for the given run mode.
+
+    Currently this only tunes ``bot_vad_stop_secs``; new realtime-specific
+    knobs should be added here so each transport stays a thin shim.
+    """
+    if not is_realtime:
+        return {}
+    return {"bot_vad_stop_secs": REALTIME_BOT_VAD_STOP_SECS}
--- a/api/services/pipecat/transport_setup.py
+++ b/api/services/pipecat/transport_setup.py
@ -6,6 +6,7 @@ This module hosts only the shared, non-telephony transports (WebRTC).

 from api.services.pipecat.audio_config import AudioConfig
 from api.services.pipecat.audio_mixer import build_audio_out_mixer
+from api.services.pipecat.transport_params import realtime_param_overrides
 from pipecat.transports.base_transport import TransportParams
 from pipecat.transports.smallwebrtc.connection import SmallWebRTCConnection
 from pipecat.transports.smallwebrtc.transport import SmallWebRTCTransport
@ -16,6 +17,7 @@ async def create_webrtc_transport(
    workflow_run_id: int,
    audio_config: AudioConfig,
    ambient_noise_config: dict | None = None,
+    is_realtime: bool = False,
 ):
    """Create a transport for WebRTC connections."""
    mixer = await build_audio_out_mixer(
@ -30,5 +32,6 @@ async def create_webrtc_transport(
            audio_in_sample_rate=audio_config.transport_in_sample_rate,
            audio_out_sample_rate=audio_config.transport_out_sample_rate,
            audio_out_mixer=mixer,
+            **realtime_param_overrides(is_realtime),
        ),
    )
--- a/api/services/pricing/workflow_run_cost.py
+++ b/api/services/pricing/workflow_run_cost.py
@ -27,7 +27,9 @@ async def _fetch_telephony_cost(workflow_run) -> dict | None:
        logger.warning("Workflow not found for workflow run")
        raise Exception("Workflow not found")

-    provider = await get_telephony_provider_for_run(workflow_run, workflow.organization_id)
+    provider = await get_telephony_provider_for_run(
+        workflow_run, workflow.organization_id
+    )
    call_cost_info = await provider.get_call_cost(call_id)

    if call_cost_info.get("status") == "error":
--- a/api/services/telephony/README.md
+++ b/api/services/telephony/README.md
@ -13,10 +13,16 @@ Business Logic → TelephonyProvider (Interface) → Concrete Provider (Twilio,
 ### Using the Provider in Code

 ```python
-from api.services.telephony.factory import get_telephony_provider
+from api.services.telephony.factory import (
+    get_default_telephony_provider,
+    get_telephony_provider_by_id,
+)

-# Get provider based on organization config
-provider = await get_telephony_provider(organization_id)
+# Get the org's default outbound provider
+provider = await get_default_telephony_provider(organization_id)
+
+# Or resolve a specific telephony configuration row
+provider = await get_telephony_provider_by_id(config_id, organization_id)

 # Initiate a call
 result = await provider.initiate_call(
@ -47,7 +53,7 @@ See the [Custom Provider Guide](https://docs.dograh.com/integrations/telephony/c

 Quick checklist:
 1. Create `providers/your_provider.py` implementing `TelephonyProvider`
-2. Update `factory.py` to include your provider
+2. Register the package in `providers/__init__.py` and add its schemas to `api/schemas/telephony_config.py`
 3. Write unit tests
 4. Update documentation

@ -107,7 +113,7 @@ class MockProvider(TelephonyProvider):
    # Implement other required methods...

 # In tests
-@patch('api.services.telephony.factory.get_telephony_provider')
+@patch('api.services.telephony.factory.get_default_telephony_provider')
 async def test_call_initiation(mock_get_provider):
    mock_get_provider.return_value = MockProvider()
    # Test your business logic
@ -141,8 +147,8 @@ await service.initiate_call(...)

 New code:
 ```python
-from api.services.telephony.factory import get_telephony_provider
-provider = await get_telephony_provider(org_id)
+from api.services.telephony.factory import get_default_telephony_provider
+provider = await get_default_telephony_provider(org_id)
 await provider.initiate_call(...)
 ```

@ -164,4 +170,4 @@ await provider.initiate_call(...)
 - [User Documentation](https://docs.dograh.com/integrations/telephony/overview)
 - [Twilio Integration](https://docs.dograh.com/integrations/telephony/twilio)
 - [Custom Providers](https://docs.dograh.com/integrations/telephony/custom)
- [Webhooks Guide](https://docs.dograh.com/integrations/telephony/webhooks)
+- [Webhooks Guide](https://docs.dograh.com/integrations/telephony/webhooks)
--- a/api/services/telephony/factory.py
+++ b/api/services/telephony/factory.py
@ -6,9 +6,7 @@ resolution paths exist:
 * by config id — the canonical path used by outbound (test calls, campaigns,
  API triggers) and by the websocket transport once a workflow run has
  ``initial_context.telephony_configuration_id`` stamped on it.
-* by org default — used as a fallback when no specific config is requested
-  (e.g. the legacy ``/telephony-config`` endpoint, the back-compat
-  ``get_telephony_provider(organization_id)`` shim).
+* by org default — used as a fallback when no specific config is requested.
 * for inbound — given a detected provider and an account-id from the webhook,
  iterate the org's configs of that provider and return the one whose stored
  account-id credential matches.
@ -196,28 +194,6 @@ async def load_credentials_for_transport(
    return config


-# ---------------------------------------------------------------------------
-# Back-compat shims
-# ---------------------------------------------------------------------------
-
-
-async def load_telephony_config(organization_id: int) -> Dict[str, Any]:
-    """Deprecated: returns the org's default config.
-
-    Existing callers that don't carry a config id continue to work via this
-    shim. New code should pass an explicit telephony_configuration_id."""
-    return await load_default_telephony_config(organization_id)
-
-
-async def get_telephony_provider(organization_id: int) -> TelephonyProvider:
-    """Deprecated: returns a provider for the org's default config.
-
-    See ``load_telephony_config`` above. New code should call
-    ``get_telephony_provider_by_id`` with the resolved config id.
-    """
-    return await get_default_telephony_provider(organization_id)
-
-
 async def get_all_telephony_providers() -> List[Type[TelephonyProvider]]:
    """All registered provider classes — used by inbound webhook detection."""
    return [spec.provider_cls for spec in registry.all_specs()]
--- a/api/services/telephony/providers/CLAUDE.md
+++ b/api/services/telephony/providers/CLAUDE.md
@ -28,7 +28,7 @@ If you find yourself editing anything else, re-read the registry plumbing first:

 | Want to change... | Source of truth |
 | --- | --- |
-| Outbound provider lookup | `factory.get_telephony_provider*` reads `registry.get(name).provider_cls` |
+| Outbound provider lookup | `factory.get_default_telephony_provider`, `get_telephony_provider_by_id`, and `get_telephony_provider_for_run` read `registry.get(name).provider_cls` |
 | Stored credentials → constructor dict | `ProviderSpec.config_loader` |
 | Audio sample rate / VAD rate | `ProviderSpec.transport_sample_rate` (full `AudioConfig` is built in `pipecat/audio_config.py::create_audio_config`) |
 | Which transport runs in `run_pipeline_telephony` | `ProviderSpec.transport_factory` |
--- a/api/services/telephony/providers/ari/transport.py
+++ b/api/services/telephony/providers/ari/transport.py
@ -8,6 +8,7 @@ from pipecat.transports.websocket.fastapi import (

 from api.services.pipecat.audio_config import AudioConfig
 from api.services.pipecat.audio_mixer import build_audio_out_mixer
+from api.services.pipecat.transport_params import realtime_param_overrides
 from api.services.telephony.factory import load_credentials_for_transport

 from .serializers import AsteriskFrameSerializer
@ -22,6 +23,7 @@ async def create_transport(
    *,
    ambient_noise_config: dict | None = None,
    telephony_configuration_id: int | None = None,
+    is_realtime: bool = False,
    channel_id: str,
 ):
    """Create a transport for Asterisk ARI connections."""
@ -65,5 +67,6 @@ async def create_transport(
            audio_out_sample_rate=audio_config.transport_out_sample_rate,
            audio_out_mixer=mixer,
            serializer=serializer,
+            **realtime_param_overrides(is_realtime),
        ),
    )
--- a/api/services/telephony/providers/cloudonix/transport.py
+++ b/api/services/telephony/providers/cloudonix/transport.py
@ -8,6 +8,7 @@ from pipecat.transports.websocket.fastapi import (

 from api.services.pipecat.audio_config import AudioConfig
 from api.services.pipecat.audio_mixer import build_audio_out_mixer
+from api.services.pipecat.transport_params import realtime_param_overrides
 from api.services.telephony.factory import load_credentials_for_transport

 from .serializers import CloudonixFrameSerializer
@ -22,6 +23,7 @@ async def create_transport(
    *,
    ambient_noise_config: dict | None = None,
    telephony_configuration_id: int | None = None,
+    is_realtime: bool = False,
    call_id: str,
    stream_sid: str,
    bearer_token: str | None = None,
@ -69,5 +71,6 @@ async def create_transport(
            audio_out_mixer=mixer,
            serializer=serializer,
            audio_out_10ms_chunks=2,
+            **realtime_param_overrides(is_realtime),
        ),
    )
--- a/api/services/telephony/providers/plivo/transport.py
+++ b/api/services/telephony/providers/plivo/transport.py
@ -8,6 +8,7 @@ from pipecat.transports.websocket.fastapi import (

 from api.services.pipecat.audio_config import AudioConfig
 from api.services.pipecat.audio_mixer import build_audio_out_mixer
+from api.services.pipecat.transport_params import realtime_param_overrides
 from api.services.telephony.factory import load_credentials_for_transport

 from .serializers import PlivoFrameSerializer
@ -21,6 +22,7 @@ async def create_transport(
    *,
    ambient_noise_config: dict | None = None,
    telephony_configuration_id: int | None = None,
+    is_realtime: bool = False,
    stream_id: str,
    call_id: str,
 ):
@ -61,5 +63,6 @@ async def create_transport(
            audio_out_sample_rate=audio_config.transport_out_sample_rate,
            audio_out_mixer=mixer,
            serializer=serializer,
+            **realtime_param_overrides(is_realtime),
        ),
    )
--- a/api/services/telephony/providers/telnyx/transport.py
+++ b/api/services/telephony/providers/telnyx/transport.py
@ -8,6 +8,7 @@ from pipecat.transports.websocket.fastapi import (

 from api.services.pipecat.audio_config import AudioConfig
 from api.services.pipecat.audio_mixer import build_audio_out_mixer
+from api.services.pipecat.transport_params import realtime_param_overrides
 from api.services.telephony.factory import load_credentials_for_transport

 from .serializers import TelnyxFrameSerializer
@ -22,6 +23,7 @@ async def create_transport(
    *,
    ambient_noise_config: dict | None = None,
    telephony_configuration_id: int | None = None,
+    is_realtime: bool = False,
    stream_id: str,
    call_control_id: str,
    encoding: str = "PCMU",
@ -64,5 +66,6 @@ async def create_transport(
            audio_out_sample_rate=audio_config.transport_out_sample_rate,
            audio_out_mixer=mixer,
            serializer=serializer,
+            **realtime_param_overrides(is_realtime),
        ),
    )
--- a/api/services/telephony/providers/twilio/transport.py
+++ b/api/services/telephony/providers/twilio/transport.py
@ -8,6 +8,7 @@ from pipecat.transports.websocket.fastapi import (

 from api.services.pipecat.audio_config import AudioConfig
 from api.services.pipecat.audio_mixer import build_audio_out_mixer
+from api.services.pipecat.transport_params import realtime_param_overrides
 from api.services.telephony.factory import load_credentials_for_transport

 from .serializers import TwilioFrameSerializer
@ -22,6 +23,7 @@ async def create_transport(
    *,
    ambient_noise_config: dict | None = None,
    telephony_configuration_id: int | None = None,
+    is_realtime: bool = False,
    stream_sid: str,
    call_sid: str,
 ):
@ -60,5 +62,6 @@ async def create_transport(
            audio_out_sample_rate=audio_config.transport_out_sample_rate,
            audio_out_mixer=mixer,
            serializer=serializer,
+            **realtime_param_overrides(is_realtime),
        ),
    )
--- a/api/services/telephony/providers/vobiz/transport.py
+++ b/api/services/telephony/providers/vobiz/transport.py
@ -14,6 +14,7 @@ from pipecat.transports.websocket.fastapi import (

 from api.services.pipecat.audio_config import AudioConfig
 from api.services.pipecat.audio_mixer import build_audio_out_mixer
+from api.services.pipecat.transport_params import realtime_param_overrides
 from api.services.telephony.factory import load_credentials_for_transport

 from .serializers import VobizFrameSerializer
@ -27,6 +28,7 @@ async def create_transport(
    *,
    ambient_noise_config: dict | None = None,
    telephony_configuration_id: int | None = None,
+    is_realtime: bool = False,
    stream_id: str,
    call_id: str,
 ):
@ -72,6 +74,7 @@ async def create_transport(
            audio_out_sample_rate=audio_config.transport_out_sample_rate,
            audio_out_mixer=mixer,
            serializer=serializer,
+            **realtime_param_overrides(is_realtime),
        ),
    )

--- a/api/services/telephony/providers/vonage/transport.py
+++ b/api/services/telephony/providers/vonage/transport.py
@ -7,6 +7,7 @@ from pipecat.transports.websocket.fastapi import (

 from api.services.pipecat.audio_config import AudioConfig
 from api.services.pipecat.audio_mixer import build_audio_out_mixer
+from api.services.pipecat.transport_params import realtime_param_overrides
 from api.services.telephony.factory import load_credentials_for_transport

 from .serializers import VonageFrameSerializer
@ -20,6 +21,7 @@ async def create_transport(
    *,
    ambient_noise_config: dict | None = None,
    telephony_configuration_id: int | None = None,
+    is_realtime: bool = False,
    call_uuid: str,
 ):
    """Create a transport for Vonage connections."""
@ -59,5 +61,6 @@ async def create_transport(
            audio_out_sample_rate=audio_config.transport_out_sample_rate,
            audio_out_mixer=mixer,
            serializer=serializer,
+            **realtime_param_overrides(is_realtime),
        ),
    )
--- a/api/services/telephony/registry.py
+++ b/api/services/telephony/registry.py
@ -81,8 +81,8 @@ class ProviderSpec:
            stored config JSON and as the WorkflowRunMode value.
        provider_cls: The TelephonyProvider subclass.
        config_loader: Normalizes raw stored config into the dict shape the
-            provider constructor expects. Replaces the if/elif chain in the
-            old factory.load_telephony_config().
+            provider constructor expects. Replaces the old factory if/elif
+            chain.
        transport_factory: Async callable that creates the pipecat transport
            for an accepted WebSocket. Provider-specific kwargs (stream_sid,
            call_sid, etc.) are forwarded as ``**kwargs``.
--- a/api/services/workflow/pipecat_engine_custom_tools.py
+++ b/api/services/workflow/pipecat_engine_custom_tools.py
@ -321,6 +321,7 @@ class CustomToolManager:
                    tool=tool,
                    arguments=function_call_params.arguments,
                    call_context_vars=self._engine._call_context_vars,
+                    gathered_context_vars=self._engine._gathered_context,
                    organization_id=await self.get_organization_id(),
                )

--- a/api/services/workflow/tools/custom_tool.py
+++ b/api/services/workflow/tools/custom_tool.py
@ -1,5 +1,6 @@
 """Custom tool execution for user-defined HTTP API tools."""

+import json
 import re
 from typing import Any, Dict, Optional

@ -8,6 +9,7 @@ from loguru import logger

 from api.db import db_client
 from api.utils.credential_auth import build_auth_header
+from api.utils.template_renderer import render_template

 # Map tool parameter types to JSON schema types
 TYPE_MAP = {
@ -84,10 +86,94 @@ def tool_to_function_schema(tool: Any) -> Dict[str, Any]:
    }


+def _coerce_parameter_value(value: Any, param_type: str) -> Any:
+    """Coerce a rendered preset parameter into the configured JSON type."""
+
+    if value is None:
+        return None
+
+    if param_type == "string":
+        if isinstance(value, str):
+            return value
+        if isinstance(value, (dict, list)):
+            return json.dumps(value)
+        return str(value)
+
+    if param_type == "number":
+        if isinstance(value, (int, float)) and not isinstance(value, bool):
+            return value
+
+        rendered = str(value).strip()
+        if rendered == "":
+            return None
+
+        if re.fullmatch(r"[-+]?\d+", rendered):
+            return int(rendered)
+
+        return float(rendered)
+
+    if param_type == "boolean":
+        if isinstance(value, bool):
+            return value
+
+        if isinstance(value, (int, float)):
+            return bool(value)
+
+        rendered = str(value).strip().lower()
+        if rendered in {"true", "1", "yes", "y", "on"}:
+            return True
+        if rendered in {"false", "0", "no", "n", "off"}:
+            return False
+
+        raise ValueError(f"Cannot convert '{value}' to boolean")
+
+    return value
+
+
+def _resolve_preset_parameters(
+    config: Dict[str, Any],
+    call_context_vars: Optional[Dict[str, Any]],
+    gathered_context_vars: Optional[Dict[str, Any]],
+) -> Dict[str, Any]:
+    """Resolve fixed/template-backed parameters before executing the HTTP request."""
+
+    preset_parameters = config.get("preset_parameters", []) or []
+    if not preset_parameters:
+        return {}
+
+    initial_context = dict(call_context_vars or {})
+    render_context: Dict[str, Any] = {
+        **initial_context,
+        "initial_context": initial_context,
+        "gathered_context": dict(gathered_context_vars or {}),
+    }
+
+    resolved: Dict[str, Any] = {}
+    for param in preset_parameters:
+        param_name = (param.get("name") or "").strip()
+        if not param_name:
+            continue
+
+        rendered = render_template(param.get("value_template", ""), render_context)
+        if rendered in (None, ""):
+            if param.get("required", True):
+                raise ValueError(
+                    f"Preset parameter '{param_name}' resolved to an empty value"
+                )
+            continue
+
+        resolved[param_name] = _coerce_parameter_value(
+            rendered, param.get("type", "string")
+        )
+
+    return resolved
+
+
 async def execute_http_tool(
    tool: Any,
    arguments: Dict[str, Any],
    call_context_vars: Optional[Dict[str, Any]] = None,
+    gathered_context_vars: Optional[Dict[str, Any]] = None,
    organization_id: Optional[int] = None,
 ) -> Dict[str, Any]:
    """Execute an HTTP API tool.
@ -95,7 +181,8 @@ async def execute_http_tool(
    Args:
        tool: ToolModel instance
        arguments: Arguments passed by the LLM (parameter name -> value)
-        call_context_vars: Additional context variables from the call (unused for now)
+        call_context_vars: Initial context variables available at runtime
+        gathered_context_vars: Variables extracted during the conversation
        organization_id: Organization ID for credential lookup

    Returns:
@ -133,17 +220,31 @@ async def execute_http_tool(
    timeout_ms = config.get("timeout_ms", 5000)
    timeout_seconds = timeout_ms / 1000

+    try:
+        preset_arguments = _resolve_preset_parameters(
+            config, call_context_vars, gathered_context_vars
+        )
+    except ValueError as e:
+        logger.error(f"Custom tool '{tool.name}' preset parameter error: {e}")
+        return {"status": "error", "error": str(e)}
+
+    resolved_arguments = {**(arguments or {}), **preset_arguments}
+
    # Build request: JSON body for POST/PUT/PATCH, query params for GET/DELETE
    body = None
    params = None
    if method in ("POST", "PUT", "PATCH"):
-        body = arguments
-    elif method in ("GET", "DELETE") and arguments:
-        params = arguments
+        body = resolved_arguments
+    elif method in ("GET", "DELETE") and resolved_arguments:
+        params = resolved_arguments

    logger.info(
        f"Executing custom tool '{tool.name}' ({tool.tool_uuid}): {method} {url}"
    )
+    if preset_arguments:
+        logger.debug(
+            f"Resolved preset parameters for '{tool.name}': {list(preset_arguments.keys())}"
+        )
    logger.debug(f"Request body: {body}, params: {params}")

    try:
--- a/api/tests/test_custom_tools.py
+++ b/api/tests/test_custom_tools.py
@ -140,6 +140,45 @@ class TestToolToFunctionSchema:
        assert "duration_minutes" in required
        assert "is_priority" not in required

+    def test_preset_parameters_are_not_exposed_to_llm_schema(self):
+        """Test that preset parameters are injected at runtime, not shown to the LLM."""
+        tool = MockToolModel(
+            tool_uuid="test-uuid-preset",
+            name="Lookup Customer",
+            description="Lookup a customer using contextual identifiers",
+            category="http_api",
+            definition={
+                "schema_version": 1,
+                "type": "http_api",
+                "config": {
+                    "method": "POST",
+                    "url": "https://api.example.com/customers/lookup",
+                    "parameters": [
+                        {
+                            "name": "customer_name",
+                            "type": "string",
+                            "description": "Customer name spoken by the caller",
+                            "required": True,
+                        }
+                    ],
+                    "preset_parameters": [
+                        {
+                            "name": "phone_number",
+                            "type": "string",
+                            "value_template": "{{initial_context.phone_number}}",
+                            "required": True,
+                        }
+                    ],
+                },
+            },
+        )
+
+        schema = tool_to_function_schema(tool)
+        props = schema["function"]["parameters"]["properties"]
+
+        assert "customer_name" in props
+        assert "phone_number" not in props
+
    def test_tool_name_sanitization(self):
        """Test that tool names with special characters are sanitized."""
        tool = MockToolModel(
@ -255,6 +294,108 @@ class TestExecuteHttpTool:
            assert result["status_code"] == 201
            assert result["data"]["id"] == 123

+    @pytest.mark.asyncio
+    async def test_post_request_injects_preset_parameters(self):
+        """Test that preset parameters are resolved from runtime context."""
+        tool = MockToolModel(
+            tool_uuid="test-uuid-preset",
+            name="Create Lead",
+            description="Create a lead with caller context",
+            category="http_api",
+            definition={
+                "schema_version": 1,
+                "type": "http_api",
+                "config": {
+                    "method": "POST",
+                    "url": "https://api.example.com/leads",
+                    "timeout_ms": 5000,
+                    "preset_parameters": [
+                        {
+                            "name": "phone_number",
+                            "type": "string",
+                            "value_template": "{{initial_context.phone_number}}",
+                            "required": True,
+                        },
+                        {
+                            "name": "customer_id",
+                            "type": "number",
+                            "value_template": "{{gathered_context.customer_id}}",
+                            "required": True,
+                        },
+                        {
+                            "name": "is_vip",
+                            "type": "boolean",
+                            "value_template": "{{initial_context.is_vip}}",
+                            "required": False,
+                        },
+                    ],
+                },
+            },
+        )
+
+        arguments = {"name": "John"}
+
+        with patch(
+            "api.services.workflow.tools.custom_tool.httpx.AsyncClient"
+        ) as mock_client_class:
+            mock_client = AsyncMock()
+            mock_response = Mock()
+            mock_response.status_code = 201
+            mock_response.json.return_value = {"id": 123}
+            mock_client.request.return_value = mock_response
+            mock_client_class.return_value.__aenter__.return_value = mock_client
+
+            result = await execute_http_tool(
+                tool,
+                arguments,
+                call_context_vars={
+                    "phone_number": "+14155550123",
+                    "is_vip": "true",
+                },
+                gathered_context_vars={"customer_id": "42"},
+            )
+
+            call_kwargs = mock_client.request.call_args.kwargs
+            assert call_kwargs["json"] == {
+                "name": "John",
+                "phone_number": "+14155550123",
+                "customer_id": 42,
+                "is_vip": True,
+            }
+            assert result["status"] == "success"
+
+    @pytest.mark.asyncio
+    async def test_missing_required_preset_parameter_returns_error(self):
+        """Test that required preset parameters fail before the HTTP request."""
+        tool = MockToolModel(
+            tool_uuid="test-uuid-preset-error",
+            name="Create Lead",
+            description="Create a lead with caller context",
+            category="http_api",
+            definition={
+                "schema_version": 1,
+                "type": "http_api",
+                "config": {
+                    "method": "POST",
+                    "url": "https://api.example.com/leads",
+                    "timeout_ms": 5000,
+                    "preset_parameters": [
+                        {
+                            "name": "phone_number",
+                            "type": "string",
+                            "value_template": "{{initial_context.phone_number}}",
+                            "required": True,
+                        }
+                    ],
+                },
+            },
+        )
+
+        result = await execute_http_tool(tool, {"name": "John"}, call_context_vars={})
+
+        assert result["status"] == "error"
+        assert "phone_number" in result["error"]
+
    @pytest.mark.asyncio
    async def test_get_request_sends_query_params(self):
        """Test that GET requests send arguments as query parameters."""
--- a/api/tests/test_gemini_live_reconnect_tool_results.py
+++ b/api/tests/test_gemini_live_reconnect_tool_results.py
@ -0,0 +1,86 @@
+import json
+from types import SimpleNamespace
+from unittest.mock import AsyncMock
+
+import pytest
+from pipecat.processors.aggregators.llm_context import LLMContext
+
+from api.services.pipecat.realtime.gemini_live import DograhGeminiLiveLLMService
+
+
+class _TestDograhGeminiLiveLLMService(DograhGeminiLiveLLMService):
+    """Dograh Gemini service with client creation stubbed for unit tests."""
+
+    def create_client(self):
+        self._client = SimpleNamespace(
+            aio=SimpleNamespace(live=SimpleNamespace(connect=None))
+        )
+
+
+class _FakeSession:
+    def __init__(self):
+        self.send_tool_response = AsyncMock()
+        self.send_realtime_input = AsyncMock()
+        self.close = AsyncMock()
+
+
+def _make_service() -> _TestDograhGeminiLiveLLMService:
+    service = _TestDograhGeminiLiveLLMService(api_key="test-key")
+    service.stop_all_metrics = AsyncMock()
+    service.start_ttfb_metrics = AsyncMock()
+    service.cancel_task = AsyncMock()
+    service.push_error = AsyncMock()
+    return service
+
+
+def _make_tool_result_context(tool_call_id: str) -> LLMContext:
+    return LLMContext(
+        messages=[
+            {
+                "role": "tool",
+                "content": json.dumps({"status": "done"}),
+                "tool_call_id": tool_call_id,
+            }
+        ]
+    )
+
+
+@pytest.mark.asyncio
+async def test_updated_context_during_reconnect_keeps_result_pending_until_session_ready():
+    service = _make_service()
+    service._handled_initial_context = True
+    service._tool_call_id_to_name = {"call-transition": "transition_to_next_node"}
+    service._session = _FakeSession()
+
+    context = _make_tool_result_context("call-transition")
+
+    await service._disconnect()
+    await service._handle_context(context)
+
+    # A reconnect gap should not count as successful delivery to Gemini.
+    assert "call-transition" not in service._completed_tool_calls
+
+    session = _FakeSession()
+    await service._handle_session_ready(session)
+
+    session.send_tool_response.assert_awaited_once()
+    sent_response = session.send_tool_response.await_args.kwargs["function_responses"]
+    assert sent_response.id == "call-transition"
+    assert sent_response.name == "transition_to_next_node"
+    assert "call-transition" in service._completed_tool_calls
+
+
+@pytest.mark.asyncio
+async def test_disconnect_does_not_forget_previously_delivered_tool_results():
+    service = _make_service()
+    service._context = _make_tool_result_context("call-transition")
+    service._completed_tool_calls = {"call-transition"}
+    service._tool_call_id_to_name = {"call-transition": "transition_to_next_node"}
+    service._session = _FakeSession()
+    service._tool_result = AsyncMock()
+
+    await service._disconnect()
+    await service._process_completed_function_calls(send_new_results=True)
+
+    service._tool_result.assert_not_awaited()
+    assert service._completed_tool_calls == {"call-transition"}
--- a/api/tests/test_openai_realtime_initial_context.py
+++ b/api/tests/test_openai_realtime_initial_context.py
@ -0,0 +1,98 @@
+from types import SimpleNamespace
+from unittest.mock import AsyncMock
+
+import pytest
+from pipecat.frames.frames import TTSSpeakFrame
+from pipecat.processors.aggregators.llm_context import LLMContext
+from pipecat.processors.frame_processor import FrameDirection
+
+from api.services.pipecat.realtime.openai_realtime import (
+    DograhOpenAIRealtimeLLMService,
+)
+
+
+def _make_service() -> DograhOpenAIRealtimeLLMService:
+    service = DograhOpenAIRealtimeLLMService(api_key="test-key")
+    service._create_response = AsyncMock()
+    service._process_completed_function_calls = AsyncMock()
+    return service
+
+
+@pytest.mark.asyncio
+async def test_initial_context_triggers_response_when_context_was_prepopulated():
+    service = _make_service()
+    context = LLMContext()
+    service._context = context
+
+    await service._handle_context(context)
+
+    assert service._handled_initial_context is True
+    assert service._context is context
+    service._create_response.assert_awaited_once()
+    service._process_completed_function_calls.assert_not_awaited()
+
+
+@pytest.mark.asyncio
+async def test_updated_context_uses_tool_result_path_after_initial_context():
+    service = _make_service()
+    context = LLMContext()
+    service._handled_initial_context = True
+
+    await service._handle_context(context)
+
+    assert service._context is context
+    service._create_response.assert_not_awaited()
+    service._process_completed_function_calls.assert_awaited_once_with(
+        send_new_results=True
+    )
+
+
+@pytest.mark.asyncio
+async def test_tts_greeting_uses_initial_context_handler():
+    service = _make_service()
+    service._context = LLMContext()
+    service._handle_context = AsyncMock()
+
+    await service.process_frame(
+        TTSSpeakFrame("hello", append_to_context=True),
+        FrameDirection.DOWNSTREAM,
+    )
+
+    service._handle_context.assert_awaited_once_with(service._context)
+    service._create_response.assert_not_awaited()
+
+
+@pytest.mark.asyncio
+async def test_function_call_executes_immediately_when_bot_is_not_speaking():
+    service = _make_service()
+    service._context = LLMContext()
+    service.run_function_calls = AsyncMock()
+    service._pending_function_calls["call-1"] = SimpleNamespace(name="customer_support")
+
+    await service._handle_evt_function_call_arguments_done(
+        SimpleNamespace(call_id="call-1", arguments='{"department":"sales"}')
+    )
+
+    service.run_function_calls.assert_awaited_once()
+    assert service._deferred_function_calls == []
+
+
+@pytest.mark.asyncio
+async def test_function_call_is_deferred_until_bot_stops_speaking():
+    service = _make_service()
+    service._context = LLMContext()
+    service.run_function_calls = AsyncMock()
+    service._bot_is_speaking = True
+    service._pending_function_calls["call-1"] = SimpleNamespace(name="customer_support")
+
+    await service._handle_evt_function_call_arguments_done(
+        SimpleNamespace(call_id="call-1", arguments='{"department":"sales"}')
+    )
+
+    service.run_function_calls.assert_not_awaited()
+    assert len(service._deferred_function_calls) == 1
+
+    await service._run_pending_function_calls()
+
+    service.run_function_calls.assert_awaited_once()
+    assert service._deferred_function_calls == []
--- a/api/tests/test_realtime_message_append.py
+++ b/api/tests/test_realtime_message_append.py
@ -0,0 +1,59 @@
+from types import SimpleNamespace
+from unittest.mock import AsyncMock
+
+import pytest
+from pipecat.frames.frames import LLMMessagesAppendFrame
+from pipecat.services.openai.realtime import events
+
+from api.services.pipecat.realtime.openai_realtime import (
+    DograhOpenAIRealtimeLLMService,
+)
+from api.services.workflow.pipecat_engine_callbacks import UserIdleHandler
+
+
+@pytest.mark.asyncio
+async def test_openai_realtime_messages_append_frame_sends_conversation_item():
+    service = DograhOpenAIRealtimeLLMService(api_key="test")
+    service._api_session_ready = True
+    service.send_client_event = AsyncMock()
+    service._send_manual_response_create = AsyncMock()
+
+    await service._handle_messages_append(
+        LLMMessagesAppendFrame(
+            [{"role": "user", "content": "Are you still there?"}],
+            run_llm=True,
+        )
+    )
+
+    service.send_client_event.assert_awaited_once()
+    event = service.send_client_event.await_args.args[0]
+    assert isinstance(event, events.ConversationItemCreateEvent)
+    assert event.item.role == "user"
+    assert event.item.type == "message"
+    assert event.item.content == [
+        events.ItemContent(type="input_text", text="Are you still there?")
+    ]
+    service._send_manual_response_create.assert_awaited_once()
+
+
+@pytest.mark.asyncio
+async def test_user_idle_handler_uses_realtime_append_path():
+    engine = SimpleNamespace(
+        llm=SimpleNamespace(),
+        end_call_with_reason=AsyncMock(),
+    )
+    aggregator = SimpleNamespace(push_frame=AsyncMock())
+    handler = UserIdleHandler(engine)
+
+    await handler.handle_idle(aggregator)
+
+    aggregator.push_frame.assert_awaited_once()
+    frame = aggregator.push_frame.await_args.args[0]
+    assert isinstance(frame, LLMMessagesAppendFrame)
+    assert frame.run_llm is True
+    assert frame.messages == [
+        {
+            "role": "user",
+            "content": "The user has been quiet. Politely and briefly ask if they're still there in the language that the user has been speaking so far.",
+        }
+    ]
--- a/api/tests/test_run_pipeline_realtime_turn_config.py
+++ b/api/tests/test_run_pipeline_realtime_turn_config.py
@ -0,0 +1,61 @@
+from pipecat.audio.vad.silero import SileroVADAnalyzer
+from pipecat.turns.user_start import (
+    ExternalUserTurnStartStrategy,
+)
+from pipecat.turns.user_start.vad_user_turn_start_strategy import (
+    VADUserTurnStartStrategy,
+)
+from pipecat.turns.user_stop import (
+    ExternalUserTurnStopStrategy,
+    SpeechTimeoutUserTurnStopStrategy,
+)
+
+from api.services.configuration.registry import ServiceProviders
+from api.services.pipecat.run_pipeline import _create_realtime_user_turn_config
+
+
+def test_gemini_realtime_uses_local_vad_without_local_interruptions():
+    strategies, vad_analyzer = _create_realtime_user_turn_config(
+        ServiceProviders.GOOGLE_REALTIME.value
+    )
+
+    assert isinstance(vad_analyzer, SileroVADAnalyzer)
+    assert len(strategies.start) == 1
+    assert isinstance(strategies.start[0], VADUserTurnStartStrategy)
+    assert strategies.start[0]._enable_interruptions is False
+    assert len(strategies.stop) == 1
+    assert isinstance(strategies.stop[0], SpeechTimeoutUserTurnStopStrategy)
+
+
+def test_gemini_vertex_realtime_uses_same_turn_config_as_gemini_live():
+    strategies, vad_analyzer = _create_realtime_user_turn_config(
+        ServiceProviders.GOOGLE_VERTEX_REALTIME.value
+    )
+
+    assert isinstance(vad_analyzer, SileroVADAnalyzer)
+    assert len(strategies.start) == 1
+    assert isinstance(strategies.start[0], VADUserTurnStartStrategy)
+    assert strategies.start[0]._enable_interruptions is False
+
+
+def test_openai_realtime_uses_provider_turn_frames_without_local_vad():
+    strategies, vad_analyzer = _create_realtime_user_turn_config(
+        ServiceProviders.OPENAI_REALTIME.value
+    )
+
+    assert vad_analyzer is None
+    assert len(strategies.start) == 1
+    assert isinstance(strategies.start[0], ExternalUserTurnStartStrategy)
+    assert strategies.start[0]._enable_interruptions is False
+    assert len(strategies.stop) == 1
+    assert isinstance(strategies.stop[0], ExternalUserTurnStopStrategy)
+
+
+def test_unknown_realtime_providers_keep_local_vad():
+    strategies, vad_analyzer = _create_realtime_user_turn_config("other_realtime")
+
+    assert isinstance(vad_analyzer, SileroVADAnalyzer)
+    assert len(strategies.start) == 1
+    assert isinstance(strategies.start[0], VADUserTurnStartStrategy)
+    assert len(strategies.stop) == 1
+    assert isinstance(strategies.stop[0], SpeechTimeoutUserTurnStopStrategy)
--- a/api/tests/test_unregistered_function_call.py
+++ b/api/tests/test_unregistered_function_call.py
@ -66,7 +66,7 @@ class TestUnregisteredFunctionCall:

        # Pipecat's missing-function handler returns a string error.
        assert isinstance(result_frame.result, str)
-        assert "not registered" in result_frame.result
+        assert "not currently available" in result_frame.result
        assert "nonexistent_tool" in result_frame.result

        # In-progress frame should also be emitted before the result so mute