mirror of
https://github.com/dograh-hq/dograh.git
synced 2026-06-07 07:55:16 +02:00
feat: add Assembly AI STT
This commit is contained in:
parent
66b085dde2
commit
501d06c00d
7 changed files with 94 additions and 8 deletions
|
|
@ -49,6 +49,7 @@ class UserConfigurationValidator:
|
|||
ServiceProviders.SPEACHES.value: self._check_speaches_api_key,
|
||||
ServiceProviders.OPENAI_REALTIME.value: self._check_openai_api_key,
|
||||
ServiceProviders.GOOGLE_REALTIME.value: self._check_google_api_key,
|
||||
ServiceProviders.ASSEMBLYAI.value: self._check_assemblyai_api_key,
|
||||
}
|
||||
|
||||
async def validate(
|
||||
|
|
@ -217,3 +218,6 @@ class UserConfigurationValidator:
|
|||
if not service_config.aws_access_key or not service_config.aws_secret_key:
|
||||
raise ValueError("AWS access key and secret key are required for Bedrock")
|
||||
return True
|
||||
|
||||
def _check_assemblyai_api_key(self, model: str, service_config) -> bool:
|
||||
return True
|
||||
|
|
|
|||
|
|
@ -29,6 +29,7 @@ class ServiceProviders(str, Enum):
|
|||
CAMB = "camb"
|
||||
AWS_BEDROCK = "aws_bedrock"
|
||||
SPEACHES = "speaches"
|
||||
ASSEMBLYAI = "assemblyai"
|
||||
OPENAI_REALTIME = "openai_realtime"
|
||||
GOOGLE_REALTIME = "google_realtime"
|
||||
|
||||
|
|
@ -45,6 +46,7 @@ class BaseServiceConfiguration(BaseModel):
|
|||
ServiceProviders.DOGRAH,
|
||||
ServiceProviders.AWS_BEDROCK,
|
||||
ServiceProviders.SPEACHES,
|
||||
ServiceProviders.ASSEMBLYAI,
|
||||
ServiceProviders.OPENAI_REALTIME,
|
||||
ServiceProviders.GOOGLE_REALTIME,
|
||||
# ServiceProviders.SARVAM,
|
||||
|
|
@ -318,7 +320,33 @@ OPENAI_REALTIME_VOICES = [
|
|||
|
||||
GOOGLE_REALTIME_MODELS = ["gemini-3.1-flash-live-preview"]
|
||||
GOOGLE_REALTIME_VOICES = ["Puck", "Charon", "Kore", "Fenrir", "Aoede"]
|
||||
GOOGLE_REALTIME_LANGUAGES = ["en"]
|
||||
GOOGLE_REALTIME_LANGUAGES = [
|
||||
"ar",
|
||||
"bn",
|
||||
"de",
|
||||
"en",
|
||||
"es",
|
||||
"fr",
|
||||
"gu",
|
||||
"hi",
|
||||
"id",
|
||||
"it",
|
||||
"ja",
|
||||
"kn",
|
||||
"ko",
|
||||
"ml",
|
||||
"mr",
|
||||
"nl",
|
||||
"pl",
|
||||
"pt",
|
||||
"ru",
|
||||
"ta",
|
||||
"te",
|
||||
"th",
|
||||
"tr",
|
||||
"vi",
|
||||
"zh",
|
||||
]
|
||||
|
||||
|
||||
@register_service(ServiceType.REALTIME)
|
||||
|
|
@ -830,6 +858,23 @@ class SpeachesSTTConfiguration(BaseSTTConfiguration):
|
|||
api_key: str | list[str] | None = Field(default=None)
|
||||
|
||||
|
||||
ASSEMBLYAI_STT_MODELS = ["u3-rt-pro"]
|
||||
ASSEMBLYAI_STT_LANGUAGES = ["en", "es", "de", "fr", "pt", "it"]
|
||||
|
||||
|
||||
@register_stt
|
||||
class AssemblyAISTTConfiguration(BaseSTTConfiguration):
|
||||
provider: Literal[ServiceProviders.ASSEMBLYAI] = ServiceProviders.ASSEMBLYAI
|
||||
model: str = Field(
|
||||
default="u3-rt-pro",
|
||||
json_schema_extra={"examples": ASSEMBLYAI_STT_MODELS},
|
||||
)
|
||||
language: str = Field(
|
||||
default="en",
|
||||
json_schema_extra={"examples": ASSEMBLYAI_STT_LANGUAGES},
|
||||
)
|
||||
|
||||
|
||||
STTConfig = Annotated[
|
||||
Union[
|
||||
DeepgramSTTConfiguration,
|
||||
|
|
@ -839,6 +884,7 @@ STTConfig = Annotated[
|
|||
SpeechmaticsSTTConfiguration,
|
||||
SarvamSTTConfiguration,
|
||||
SpeachesSTTConfiguration,
|
||||
AssemblyAISTTConfiguration,
|
||||
],
|
||||
Field(discriminator="provider"),
|
||||
]
|
||||
|
|
|
|||
|
|
@ -5,6 +5,7 @@ from loguru import logger
|
|||
|
||||
from api.constants import MPS_API_URL
|
||||
from api.services.configuration.registry import ServiceProviders
|
||||
from pipecat.services.assemblyai.stt import AssemblyAISTTService, AssemblyAISTTSettings
|
||||
from pipecat.services.aws.llm import AWSBedrockLLMService, AWSBedrockLLMSettings
|
||||
from pipecat.services.azure.llm import AzureLLMService, AzureLLMSettings
|
||||
from pipecat.services.cartesia.stt import CartesiaSTTService
|
||||
|
|
@ -156,6 +157,17 @@ def create_stt_service(
|
|||
),
|
||||
sample_rate=audio_config.transport_in_sample_rate,
|
||||
)
|
||||
elif user_config.stt.provider == ServiceProviders.ASSEMBLYAI.value:
|
||||
language = getattr(user_config.stt, "language", None)
|
||||
pipecat_language = _to_language_enum(language, default=Language.EN)
|
||||
settings_kwargs = {"model": user_config.stt.model, "language": pipecat_language}
|
||||
if keyterms:
|
||||
settings_kwargs["keyterms_prompt"] = keyterms
|
||||
return AssemblyAISTTService(
|
||||
api_key=user_config.stt.api_key,
|
||||
settings=AssemblyAISTTSettings(**settings_kwargs),
|
||||
sample_rate=audio_config.transport_in_sample_rate,
|
||||
)
|
||||
elif user_config.stt.provider == ServiceProviders.SPEECHMATICS.value:
|
||||
from pipecat.services.speechmatics.stt import (
|
||||
AdditionalVocabEntry,
|
||||
|
|
|
|||
|
|
@ -26,7 +26,8 @@ def extract_template_variables(text: str) -> Set[str]:
|
|||
if "." in var_name:
|
||||
continue
|
||||
# Skip variables with a fallback (they have a default value)
|
||||
if filter_name == "fallback":
|
||||
# Supports both {{var | default}} and legacy {{var | fallback:default}}
|
||||
if filter_name is not None:
|
||||
continue
|
||||
# Skip system-injected variables
|
||||
if var_name in _SYSTEM_VARIABLES:
|
||||
|
|
|
|||
|
|
@ -187,12 +187,19 @@ def _render_string(template_str: str, context: Dict[str, Any]) -> str:
|
|||
# Get value using nested path lookup
|
||||
value = get_nested_value(context, variable_path)
|
||||
|
||||
# Apply filters
|
||||
if filter_name == "fallback":
|
||||
# Apply fallback: new syntax {{var | default}} or legacy {{var | fallback:default}}
|
||||
if filter_name is not None:
|
||||
if value is None or value == "":
|
||||
value = (
|
||||
filter_value if filter_value is not None else variable_path.title()
|
||||
)
|
||||
if filter_name == "fallback":
|
||||
# Legacy syntax: {{var | fallback:default}}
|
||||
value = (
|
||||
filter_value
|
||||
if filter_value is not None
|
||||
else variable_path.title()
|
||||
)
|
||||
else:
|
||||
# New syntax: {{var | default}}
|
||||
value = filter_name
|
||||
|
||||
# Convert to string for substitution
|
||||
if value is None:
|
||||
|
|
|
|||
|
|
@ -45,6 +45,22 @@ whether they'd like to continue.
|
|||
|
||||
When the call starts, Dograh substitutes the values before sending the prompt to the LLM — so the agent speaks naturally as if it already knows the contact.
|
||||
|
||||
### Fallback values
|
||||
|
||||
If a variable might be missing or empty, use a pipe (`|`) to provide a default value:
|
||||
|
||||
```
|
||||
Hello {{customer_name | there}}, we're calling about your {{plan | current}} plan.
|
||||
```
|
||||
|
||||
When `customer_name` is not set, the agent will say "Hello there" instead of leaving a blank. The syntax is:
|
||||
|
||||
```
|
||||
{{variable_name | fallback_value}}
|
||||
```
|
||||
|
||||
If the variable is present and non-empty, the fallback is ignored and the actual value is used.
|
||||
|
||||
### Default variables
|
||||
|
||||
Built-in variables for current time and weekday, available in any prompt without setting up `initial_context`.
|
||||
|
|
|
|||
|
|
@ -234,7 +234,7 @@
|
|||
}
|
||||
},
|
||||
"banner": {
|
||||
"content": "🎉 **New: Pre-recorded Audio** — lower latency, reduced TTS costs, and natural-sounding conversations using your own voice recordings. [Learn more →](/voice-agent/pre-recorded-audio)",
|
||||
"content": "🎉 **New: Gemini Live 3.1 Support** — Try the latest Google Gemini Live 3.1 on Dograh platform. [Learn more →](/configurations/inference-providers#gemini-3-1-live)",
|
||||
"dismissible": true
|
||||
},
|
||||
"search": {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue