feat: add Assembly AI STT

2026-07-22 11:51:04 +02:00 · 2026-04-03 07:10:37 +05:30 · 2026-04-03 07:10:37 +05:30 · 501d06c00d
commit 501d06c00d
parent 66b085dde2
7 changed files with 94 additions and 8 deletions
--- a/api/services/configuration/check_validity.py
+++ b/api/services/configuration/check_validity.py
@ -49,6 +49,7 @@ class UserConfigurationValidator:
            ServiceProviders.SPEACHES.value: self._check_speaches_api_key,
            ServiceProviders.OPENAI_REALTIME.value: self._check_openai_api_key,
            ServiceProviders.GOOGLE_REALTIME.value: self._check_google_api_key,
+            ServiceProviders.ASSEMBLYAI.value: self._check_assemblyai_api_key,
        }

    async def validate(
@ -217,3 +218,6 @@ class UserConfigurationValidator:
        if not service_config.aws_access_key or not service_config.aws_secret_key:
            raise ValueError("AWS access key and secret key are required for Bedrock")
        return True
+
+    def _check_assemblyai_api_key(self, model: str, service_config) -> bool:
+        return True
--- a/api/services/configuration/registry.py
+++ b/api/services/configuration/registry.py
@ -29,6 +29,7 @@ class ServiceProviders(str, Enum):
    CAMB = "camb"
    AWS_BEDROCK = "aws_bedrock"
    SPEACHES = "speaches"
+    ASSEMBLYAI = "assemblyai"
    OPENAI_REALTIME = "openai_realtime"
    GOOGLE_REALTIME = "google_realtime"

@ -45,6 +46,7 @@ class BaseServiceConfiguration(BaseModel):
        ServiceProviders.DOGRAH,
        ServiceProviders.AWS_BEDROCK,
        ServiceProviders.SPEACHES,
+        ServiceProviders.ASSEMBLYAI,
        ServiceProviders.OPENAI_REALTIME,
        ServiceProviders.GOOGLE_REALTIME,
        # ServiceProviders.SARVAM,
@ -318,7 +320,33 @@ OPENAI_REALTIME_VOICES = [

 GOOGLE_REALTIME_MODELS = ["gemini-3.1-flash-live-preview"]
 GOOGLE_REALTIME_VOICES = ["Puck", "Charon", "Kore", "Fenrir", "Aoede"]
-GOOGLE_REALTIME_LANGUAGES = ["en"]
+GOOGLE_REALTIME_LANGUAGES = [
+    "ar",
+    "bn",
+    "de",
+    "en",
+    "es",
+    "fr",
+    "gu",
+    "hi",
+    "id",
+    "it",
+    "ja",
+    "kn",
+    "ko",
+    "ml",
+    "mr",
+    "nl",
+    "pl",
+    "pt",
+    "ru",
+    "ta",
+    "te",
+    "th",
+    "tr",
+    "vi",
+    "zh",
+]


@register_service(ServiceType.REALTIME)
@ -830,6 +858,23 @@ class SpeachesSTTConfiguration(BaseSTTConfiguration):
    api_key: str | list[str] | None = Field(default=None)


+ASSEMBLYAI_STT_MODELS = ["u3-rt-pro"]
+ASSEMBLYAI_STT_LANGUAGES = ["en", "es", "de", "fr", "pt", "it"]
+
+
+@register_stt
+class AssemblyAISTTConfiguration(BaseSTTConfiguration):
+    provider: Literal[ServiceProviders.ASSEMBLYAI] = ServiceProviders.ASSEMBLYAI
+    model: str = Field(
+        default="u3-rt-pro",
+        json_schema_extra={"examples": ASSEMBLYAI_STT_MODELS},
+    )
+    language: str = Field(
+        default="en",
+        json_schema_extra={"examples": ASSEMBLYAI_STT_LANGUAGES},
+    )
+
+
 STTConfig = Annotated[
    Union[
        DeepgramSTTConfiguration,
@ -839,6 +884,7 @@ STTConfig = Annotated[
        SpeechmaticsSTTConfiguration,
        SarvamSTTConfiguration,
        SpeachesSTTConfiguration,
+        AssemblyAISTTConfiguration,
    ],
    Field(discriminator="provider"),
 ]
--- a/api/services/pipecat/service_factory.py
+++ b/api/services/pipecat/service_factory.py
@ -5,6 +5,7 @@ from loguru import logger

 from api.constants import MPS_API_URL
 from api.services.configuration.registry import ServiceProviders
+from pipecat.services.assemblyai.stt import AssemblyAISTTService, AssemblyAISTTSettings
 from pipecat.services.aws.llm import AWSBedrockLLMService, AWSBedrockLLMSettings
 from pipecat.services.azure.llm import AzureLLMService, AzureLLMSettings
 from pipecat.services.cartesia.stt import CartesiaSTTService
@ -156,6 +157,17 @@ def create_stt_service(
            ),
            sample_rate=audio_config.transport_in_sample_rate,
        )
+    elif user_config.stt.provider == ServiceProviders.ASSEMBLYAI.value:
+        language = getattr(user_config.stt, "language", None)
+        pipecat_language = _to_language_enum(language, default=Language.EN)
+        settings_kwargs = {"model": user_config.stt.model, "language": pipecat_language}
+        if keyterms:
+            settings_kwargs["keyterms_prompt"] = keyterms
+        return AssemblyAISTTService(
+            api_key=user_config.stt.api_key,
+            settings=AssemblyAISTTSettings(**settings_kwargs),
+            sample_rate=audio_config.transport_in_sample_rate,
+        )
    elif user_config.stt.provider == ServiceProviders.SPEECHMATICS.value:
        from pipecat.services.speechmatics.stt import (
            AdditionalVocabEntry,
--- a/api/services/workflow/workflow.py
+++ b/api/services/workflow/workflow.py
@ -26,7 +26,8 @@ def extract_template_variables(text: str) -> Set[str]:
        if "." in var_name:
            continue
        # Skip variables with a fallback (they have a default value)
-        if filter_name == "fallback":
+        # Supports both {{var | default}} and legacy {{var | fallback:default}}
+        if filter_name is not None:
            continue
        # Skip system-injected variables
        if var_name in _SYSTEM_VARIABLES:
--- a/api/utils/template_renderer.py
+++ b/api/utils/template_renderer.py
@ -187,12 +187,19 @@ def _render_string(template_str: str, context: Dict[str, Any]) -> str:
        # Get value using nested path lookup
        value = get_nested_value(context, variable_path)

-        # Apply filters
-        if filter_name == "fallback":
+        # Apply fallback: new syntax {{var | default}} or legacy {{var | fallback:default}}
+        if filter_name is not None:
            if value is None or value == "":
-                value = (
-                    filter_value if filter_value is not None else variable_path.title()
-                )
+                if filter_name == "fallback":
+                    # Legacy syntax: {{var | fallback:default}}
+                    value = (
+                        filter_value
+                        if filter_value is not None
+                        else variable_path.title()
+                    )
+                else:
+                    # New syntax: {{var | default}}
+                    value = filter_name

        # Convert to string for substitution
        if value is None:
--- a/docs/core-concepts/context-and-variables.mdx
+++ b/docs/core-concepts/context-and-variables.mdx
@ -45,6 +45,22 @@ whether they'd like to continue.

 When the call starts, Dograh substitutes the values before sending the prompt to the LLM — so the agent speaks naturally as if it already knows the contact.

+### Fallback values
+
+If a variable might be missing or empty, use a pipe (`|`) to provide a default value:
+
+```
+Hello {{customer_name | there}}, we're calling about your {{plan | current}} plan.
+```
+
+When `customer_name` is not set, the agent will say "Hello there" instead of leaving a blank. The syntax is:
+
+```
+{{variable_name | fallback_value}}
+```
+
+If the variable is present and non-empty, the fallback is ignored and the actual value is used.
+
 ### Default variables

 Built-in variables for current time and weekday, available in any prompt without setting up `initial_context`.
--- a/docs/docs.json
+++ b/docs/docs.json
@ -234,7 +234,7 @@
    }
  },
  "banner": {
-    "content": "🎉 **New: Pre-recorded Audio** — lower latency, reduced TTS costs, and natural-sounding conversations using your own voice recordings. [Learn more →](/voice-agent/pre-recorded-audio)",
+    "content": "🎉 **New: Gemini Live 3.1 Support** — Try the latest Google Gemini Live 3.1 on Dograh platform. [Learn more →](/configurations/inference-providers#gemini-3-1-live)",
    "dismissible": true
  },
  "search": {