mirror of
https://github.com/dograh-hq/dograh.git
synced 2026-06-07 07:55:16 +02:00
* feat: add ultravox realtime and fix signature issue in telephony - Add UltraVox realtime - Fix signature issue on telephony * fix: fix regression for wss_backend_endpoint
1285 lines
41 KiB
Python
1285 lines
41 KiB
Python
import random
|
|
from enum import Enum, auto
|
|
from typing import Annotated, Dict, Literal, Type, TypeVar, Union
|
|
|
|
from pydantic import BaseModel, ConfigDict, Field, computed_field, field_validator
|
|
|
|
from api.services.configuration.options import (
|
|
DEEPGRAM_LANGUAGES,
|
|
DEEPGRAM_STT_MODELS,
|
|
GLADIA_STT_LANGUAGES,
|
|
GLADIA_STT_MODELS,
|
|
GOOGLE_MODELS,
|
|
GOOGLE_REALTIME_LANGUAGES,
|
|
GOOGLE_REALTIME_MODELS,
|
|
GOOGLE_REALTIME_VOICES,
|
|
GOOGLE_STT_LANGUAGES,
|
|
GOOGLE_STT_MODELS,
|
|
GOOGLE_TTS_LANGUAGES,
|
|
GOOGLE_TTS_MODELS,
|
|
GOOGLE_TTS_VOICES,
|
|
GOOGLE_VERTEX_REALTIME_LANGUAGES,
|
|
GOOGLE_VERTEX_REALTIME_MODELS,
|
|
GOOGLE_VERTEX_REALTIME_VOICES,
|
|
SARVAM_LANGUAGES,
|
|
SARVAM_STT_MODELS,
|
|
SARVAM_TTS_MODELS,
|
|
SARVAM_V2_VOICES,
|
|
SARVAM_V3_VOICES,
|
|
SPEECHMATICS_STT_LANGUAGES,
|
|
)
|
|
from api.services.configuration.options.google import GOOGLE_VERTEX_MODELS
|
|
|
|
|
|
class ServiceType(Enum):
|
|
LLM = auto()
|
|
TTS = auto()
|
|
STT = auto()
|
|
EMBEDDINGS = auto()
|
|
REALTIME = auto()
|
|
|
|
|
|
class ServiceProviders(str, Enum):
|
|
OPENAI = "openai"
|
|
DEEPGRAM = "deepgram"
|
|
GROQ = "groq"
|
|
OPENROUTER = "openrouter"
|
|
CARTESIA = "cartesia"
|
|
# NEUPHONIC = "neuphonic"
|
|
ELEVENLABS = "elevenlabs"
|
|
GOOGLE = "google"
|
|
AZURE = "azure"
|
|
DOGRAH = "dograh"
|
|
SARVAM = "sarvam"
|
|
SPEECHMATICS = "speechmatics"
|
|
CAMB = "camb"
|
|
AWS_BEDROCK = "aws_bedrock"
|
|
SPEACHES = "speaches"
|
|
ASSEMBLYAI = "assemblyai"
|
|
GLADIA = "gladia"
|
|
RIME = "rime"
|
|
MINIMAX = "minimax"
|
|
GOOGLE_VERTEX = "google_vertex"
|
|
OPENAI_REALTIME = "openai_realtime"
|
|
GROK_REALTIME = "grok_realtime"
|
|
ULTRAVOX_REALTIME = "ultravox_realtime"
|
|
GOOGLE_REALTIME = "google_realtime"
|
|
GOOGLE_VERTEX_REALTIME = "google_vertex_realtime"
|
|
|
|
|
|
class BaseServiceConfiguration(BaseModel):
|
|
provider: Literal[
|
|
ServiceProviders.OPENAI,
|
|
ServiceProviders.DEEPGRAM,
|
|
ServiceProviders.GROQ,
|
|
ServiceProviders.OPENROUTER,
|
|
ServiceProviders.ELEVENLABS,
|
|
ServiceProviders.GOOGLE,
|
|
ServiceProviders.AZURE,
|
|
ServiceProviders.DOGRAH,
|
|
ServiceProviders.AWS_BEDROCK,
|
|
ServiceProviders.SPEACHES,
|
|
ServiceProviders.ASSEMBLYAI,
|
|
ServiceProviders.GLADIA,
|
|
ServiceProviders.RIME,
|
|
ServiceProviders.MINIMAX,
|
|
ServiceProviders.GOOGLE_VERTEX,
|
|
ServiceProviders.OPENAI_REALTIME,
|
|
ServiceProviders.GROK_REALTIME,
|
|
ServiceProviders.ULTRAVOX_REALTIME,
|
|
ServiceProviders.GOOGLE_REALTIME,
|
|
ServiceProviders.GOOGLE_VERTEX_REALTIME,
|
|
# ServiceProviders.SARVAM,
|
|
]
|
|
api_key: str | list[str]
|
|
|
|
@field_validator("api_key")
|
|
@classmethod
|
|
def validate_api_key(cls, v):
|
|
if v is None:
|
|
return v
|
|
if isinstance(v, list) and len(v) == 0:
|
|
raise ValueError("api_key list must not be empty")
|
|
return v
|
|
|
|
def __getattribute__(self, name: str):
|
|
if name == "api_key":
|
|
value = super().__getattribute__(name)
|
|
if value is None:
|
|
return value
|
|
if isinstance(value, list):
|
|
return random.choice(value)
|
|
return value
|
|
return super().__getattribute__(name)
|
|
|
|
def get_all_api_keys(self) -> list[str]:
|
|
"""Get all API keys as a list (bypasses random selection)."""
|
|
value = super().__getattribute__("api_key")
|
|
if value is None:
|
|
return []
|
|
if isinstance(value, list):
|
|
return list(value)
|
|
return [value]
|
|
|
|
|
|
class BaseLLMConfiguration(BaseServiceConfiguration):
|
|
model: str
|
|
|
|
|
|
class BaseTTSConfiguration(BaseServiceConfiguration):
|
|
model: str
|
|
|
|
|
|
class BaseSTTConfiguration(BaseServiceConfiguration):
|
|
model: str
|
|
|
|
|
|
class BaseEmbeddingsConfiguration(BaseServiceConfiguration):
|
|
model: str
|
|
|
|
|
|
# Unified registry for all service types
|
|
REGISTRY: Dict[ServiceType, Dict[str, Type[BaseServiceConfiguration]]] = {
|
|
ServiceType.LLM: {},
|
|
ServiceType.TTS: {},
|
|
ServiceType.STT: {},
|
|
ServiceType.EMBEDDINGS: {},
|
|
ServiceType.REALTIME: {},
|
|
}
|
|
|
|
T = TypeVar("T", bound=BaseServiceConfiguration)
|
|
|
|
|
|
def register_service(service_type: ServiceType):
|
|
"""Generic decorator for registering service configurations"""
|
|
|
|
def decorator(cls: Type[T]) -> Type[T]:
|
|
# Get provider from class attributes or field defaults
|
|
provider = getattr(cls, "provider", None)
|
|
if provider is None:
|
|
# Try to get from model fields
|
|
provider = cls.model_fields.get("provider", None)
|
|
if provider is not None:
|
|
provider = provider.default
|
|
if provider is None:
|
|
raise ValueError(f"Provider not specified for {cls.__name__}")
|
|
|
|
REGISTRY[service_type][provider] = cls
|
|
return cls
|
|
|
|
return decorator
|
|
|
|
|
|
# Convenience decorators
|
|
def register_llm(cls: Type[BaseLLMConfiguration]):
|
|
return register_service(ServiceType.LLM)(cls)
|
|
|
|
|
|
def register_tts(cls: Type[BaseTTSConfiguration]):
|
|
return register_service(ServiceType.TTS)(cls)
|
|
|
|
|
|
def register_stt(cls: Type[BaseSTTConfiguration]):
|
|
return register_service(ServiceType.STT)(cls)
|
|
|
|
|
|
def register_embeddings(cls: Type[BaseEmbeddingsConfiguration]):
|
|
return register_service(ServiceType.EMBEDDINGS)(cls)
|
|
|
|
|
|
def provider_model_config(
|
|
title: str,
|
|
*,
|
|
description: str | None = None,
|
|
provider_docs_url: str | None = None,
|
|
) -> ConfigDict:
|
|
json_schema_extra: dict[str, str] = {}
|
|
if description is not None:
|
|
json_schema_extra["description"] = description
|
|
if provider_docs_url is not None:
|
|
json_schema_extra["provider_docs_url"] = provider_docs_url
|
|
if json_schema_extra:
|
|
return ConfigDict(title=title, json_schema_extra=json_schema_extra)
|
|
return ConfigDict(title=title)
|
|
|
|
|
|
###################################################### LLM ########################################################################
|
|
|
|
# Suggested models for each provider (used for UI dropdown)
|
|
OPENAI_PROVIDER_MODEL_CONFIG = provider_model_config("OpenAI")
|
|
GOOGLE_PROVIDER_MODEL_CONFIG = provider_model_config("Google")
|
|
GROQ_PROVIDER_MODEL_CONFIG = provider_model_config("Groq")
|
|
OPENROUTER_PROVIDER_MODEL_CONFIG = provider_model_config("Open Router")
|
|
AZURE_OPENAI_PROVIDER_MODEL_CONFIG = provider_model_config("Azure OpenAI")
|
|
DOGRAH_PROVIDER_MODEL_CONFIG = provider_model_config("Dograh")
|
|
AWS_BEDROCK_PROVIDER_MODEL_CONFIG = provider_model_config("AWS Bedrock")
|
|
GOOGLE_VERTEX_PROVIDER_MODEL_CONFIG = provider_model_config("Google Vertex")
|
|
OPENAI_REALTIME_PROVIDER_MODEL_CONFIG = provider_model_config("OpenAI Realtime")
|
|
GROK_REALTIME_PROVIDER_MODEL_CONFIG = provider_model_config("Grok Realtime")
|
|
ULTRAVOX_REALTIME_PROVIDER_MODEL_CONFIG = provider_model_config("Ultravox Realtime")
|
|
GOOGLE_REALTIME_PROVIDER_MODEL_CONFIG = provider_model_config("Google Realtime")
|
|
GOOGLE_VERTEX_REALTIME_PROVIDER_MODEL_CONFIG = provider_model_config(
|
|
"Google Vertex Realtime"
|
|
)
|
|
DEEPGRAM_PROVIDER_MODEL_CONFIG = provider_model_config("Deepgram")
|
|
ELEVENLABS_PROVIDER_MODEL_CONFIG = provider_model_config("ElevenLabs")
|
|
CARTESIA_PROVIDER_MODEL_CONFIG = provider_model_config("Cartesia")
|
|
SARVAM_PROVIDER_MODEL_CONFIG = provider_model_config("Sarvam")
|
|
CAMB_PROVIDER_MODEL_CONFIG = provider_model_config("Camb.ai")
|
|
RIME_PROVIDER_MODEL_CONFIG = provider_model_config("Rime")
|
|
GOOGLE_CLOUD_PROVIDER_MODEL_CONFIG = provider_model_config("Google Cloud")
|
|
SPEECHMATICS_PROVIDER_MODEL_CONFIG = provider_model_config("Speechmatics")
|
|
ASSEMBLYAI_PROVIDER_MODEL_CONFIG = provider_model_config("AssemblyAI")
|
|
GLADIA_PROVIDER_MODEL_CONFIG = provider_model_config("Gladia")
|
|
SPEACHES_PROVIDER_MODEL_CONFIG = provider_model_config(
|
|
"Local Models (Speaches)",
|
|
description=(
|
|
"Self-hosted OpenAI-compatible local models. See the Speaches project "
|
|
"for setup and supported backends."
|
|
),
|
|
provider_docs_url="https://github.com/speaches-ai/speaches",
|
|
)
|
|
|
|
OPENAI_MODELS = [
|
|
"gpt-4.1",
|
|
"gpt-4.1-mini",
|
|
"gpt-4.1-nano",
|
|
"gpt-5",
|
|
"gpt-5-mini",
|
|
"gpt-5-nano",
|
|
"gpt-3.5-turbo",
|
|
]
|
|
|
|
GROQ_MODELS = [
|
|
"llama-3.3-70b-versatile",
|
|
"deepseek-r1-distill-llama-70b",
|
|
"qwen-qwq-32b",
|
|
"meta-llama/llama-4-scout-17b-16e-instruct",
|
|
"meta-llama/llama-4-maverick-17b-128e-instruct",
|
|
"gemma2-9b-it",
|
|
"llama-3.1-8b-instant",
|
|
"openai/gpt-oss-120b",
|
|
]
|
|
OPENROUTER_MODELS = [
|
|
"openai/gpt-4.1",
|
|
"openai/gpt-4.1-mini",
|
|
"anthropic/claude-sonnet-4",
|
|
"google/gemini-2.5-flash",
|
|
"google/gemini-2.0-flash",
|
|
"meta-llama/llama-3.3-70b-instruct",
|
|
"deepseek/deepseek-chat-v3-0324",
|
|
]
|
|
AZURE_MODELS = ["gpt-4.1-mini"]
|
|
DOGRAH_LLM_MODELS = ["default", "accurate", "fast", "lite", "zen"]
|
|
AWS_BEDROCK_MODELS = [
|
|
"us.amazon.nova-pro-v1:0",
|
|
"us.amazon.nova-lite-v1:0",
|
|
"us.amazon.nova-micro-v1:0",
|
|
"us.anthropic.claude-sonnet-4-20250514-v1:0",
|
|
"us.anthropic.claude-3-5-sonnet-20241022-v2:0",
|
|
"us.anthropic.claude-haiku-4-5-20251001-v1:0",
|
|
]
|
|
|
|
|
|
@register_llm
|
|
class OpenAILLMService(BaseLLMConfiguration):
|
|
model_config = OPENAI_PROVIDER_MODEL_CONFIG
|
|
provider: Literal[ServiceProviders.OPENAI] = ServiceProviders.OPENAI
|
|
model: str = Field(
|
|
default="gpt-4.1",
|
|
description="OpenAI chat model to use.",
|
|
json_schema_extra={"examples": OPENAI_MODELS, "allow_custom_input": True},
|
|
)
|
|
|
|
|
|
@register_llm
|
|
class GoogleLLMService(BaseLLMConfiguration):
|
|
model_config = GOOGLE_PROVIDER_MODEL_CONFIG
|
|
provider: Literal[ServiceProviders.GOOGLE] = ServiceProviders.GOOGLE
|
|
model: str = Field(
|
|
default="gemini-2.0-flash",
|
|
description="Gemini model on Google AI Studio (not Vertex).",
|
|
json_schema_extra={"examples": GOOGLE_MODELS, "allow_custom_input": True},
|
|
)
|
|
|
|
|
|
@register_llm
|
|
class GoogleVertexLLMConfiguration(BaseLLMConfiguration):
|
|
model_config = GOOGLE_VERTEX_PROVIDER_MODEL_CONFIG
|
|
provider: Literal[ServiceProviders.GOOGLE_VERTEX] = ServiceProviders.GOOGLE_VERTEX
|
|
model: str = Field(
|
|
default="gemini-2.5-flash",
|
|
description="Gemini model on Vertex AI.",
|
|
json_schema_extra={
|
|
"examples": GOOGLE_VERTEX_MODELS,
|
|
"allow_custom_input": True,
|
|
},
|
|
)
|
|
project_id: str = Field(description="Google Cloud project ID for Vertex AI.")
|
|
location: str = Field(
|
|
default="global",
|
|
description="GCP region for the Vertex AI endpoint (e.g. 'global').",
|
|
)
|
|
credentials: str | None = Field(
|
|
default=None,
|
|
description=(
|
|
"Paste the entire service-account JSON file contents. If omitted, "
|
|
"falls back to Application Default Credentials (ADC)."
|
|
),
|
|
json_schema_extra={"multiline": True},
|
|
)
|
|
api_key: str | list[str] | None = Field(
|
|
default=None,
|
|
description=(
|
|
"Not used for Vertex AI — authentication is via the service account "
|
|
"in `credentials` (or ADC). Leave blank."
|
|
),
|
|
)
|
|
|
|
|
|
@register_llm
|
|
class GroqLLMService(BaseLLMConfiguration):
|
|
model_config = GROQ_PROVIDER_MODEL_CONFIG
|
|
provider: Literal[ServiceProviders.GROQ] = ServiceProviders.GROQ
|
|
model: str = Field(
|
|
default="llama-3.3-70b-versatile",
|
|
description="Groq-hosted model identifier.",
|
|
json_schema_extra={"examples": GROQ_MODELS, "allow_custom_input": True},
|
|
)
|
|
|
|
|
|
@register_llm
|
|
class OpenRouterLLMConfiguration(BaseLLMConfiguration):
|
|
model_config = OPENROUTER_PROVIDER_MODEL_CONFIG
|
|
provider: Literal[ServiceProviders.OPENROUTER] = ServiceProviders.OPENROUTER
|
|
model: str = Field(
|
|
default="openai/gpt-4.1",
|
|
description="OpenRouter model slug in 'vendor/model' form.",
|
|
json_schema_extra={"examples": OPENROUTER_MODELS, "allow_custom_input": True},
|
|
)
|
|
|
|
base_url: str = Field(
|
|
default="https://openrouter.ai/api/v1",
|
|
description="Override only if proxying OpenRouter through your own gateway.",
|
|
)
|
|
|
|
|
|
@register_llm
|
|
class AzureLLMService(BaseLLMConfiguration):
|
|
model_config = AZURE_OPENAI_PROVIDER_MODEL_CONFIG
|
|
provider: Literal[ServiceProviders.AZURE] = ServiceProviders.AZURE
|
|
model: str = Field(
|
|
default="gpt-4.1-mini",
|
|
description="Azure deployment name (not the upstream OpenAI model id).",
|
|
json_schema_extra={"examples": AZURE_MODELS, "allow_custom_input": True},
|
|
)
|
|
|
|
endpoint: str = Field(
|
|
description="Azure OpenAI resource endpoint (e.g. https://<resource>.openai.azure.com).",
|
|
)
|
|
|
|
|
|
@register_llm
|
|
class DograhLLMService(BaseLLMConfiguration):
|
|
model_config = DOGRAH_PROVIDER_MODEL_CONFIG
|
|
provider: Literal[ServiceProviders.DOGRAH] = ServiceProviders.DOGRAH
|
|
model: str = Field(
|
|
default="default",
|
|
description="Dograh-hosted model tier.",
|
|
json_schema_extra={"examples": DOGRAH_LLM_MODELS, "allow_custom_input": True},
|
|
)
|
|
|
|
|
|
@register_llm
|
|
class AWSBedrockLLMConfiguration(BaseLLMConfiguration):
|
|
model_config = AWS_BEDROCK_PROVIDER_MODEL_CONFIG
|
|
provider: Literal[ServiceProviders.AWS_BEDROCK] = ServiceProviders.AWS_BEDROCK
|
|
model: str = Field(
|
|
default="us.amazon.nova-pro-v1:0",
|
|
description="Bedrock model ID — include the region inference-profile prefix (e.g. 'us.').",
|
|
json_schema_extra={"examples": AWS_BEDROCK_MODELS, "allow_custom_input": True},
|
|
)
|
|
aws_access_key: str = Field(
|
|
default="",
|
|
description="AWS access key ID with bedrock:InvokeModel permission.",
|
|
)
|
|
aws_secret_key: str = Field(
|
|
default="",
|
|
description="AWS secret access key paired with the access key ID.",
|
|
)
|
|
aws_region: str = Field(
|
|
default="us-east-1",
|
|
description="AWS region where the Bedrock model is available.",
|
|
)
|
|
api_key: str | list[str] | None = Field(
|
|
default=None,
|
|
description="Not used for Bedrock — authentication is via the AWS credentials above. Leave blank.",
|
|
)
|
|
|
|
|
|
SPEACHES_LLM_MODELS = ["llama3", "mistral", "phi3", "qwen2", "gemma2", "deepseek-r1"]
|
|
|
|
|
|
@register_llm
|
|
class SpeachesLLMConfiguration(BaseLLMConfiguration):
|
|
model_config = SPEACHES_PROVIDER_MODEL_CONFIG
|
|
provider: Literal[ServiceProviders.SPEACHES] = ServiceProviders.SPEACHES
|
|
model: str = Field(
|
|
default="llama3",
|
|
description="Model name as exposed by your OpenAI-compatible server.",
|
|
json_schema_extra={
|
|
"examples": SPEACHES_LLM_MODELS,
|
|
"allow_custom_input": True,
|
|
},
|
|
)
|
|
base_url: str = Field(
|
|
default="http://localhost:11434/v1",
|
|
description="OpenAI-compatible endpoint (Ollama, vLLM, etc.).",
|
|
)
|
|
api_key: str | list[str] | None = Field(
|
|
default=None,
|
|
description="Usually not required for self-hosted endpoints. Leave blank unless your server enforces one.",
|
|
)
|
|
|
|
|
|
MINIMAX_MODELS = [
|
|
"MiniMax-M2.7",
|
|
"MiniMax-M2.7-highspeed",
|
|
]
|
|
|
|
|
|
@register_llm
|
|
class MiniMaxLLMConfiguration(BaseLLMConfiguration):
|
|
provider: Literal[ServiceProviders.MINIMAX] = ServiceProviders.MINIMAX
|
|
model: str = Field(
|
|
default="MiniMax-M2.7",
|
|
description="MiniMax chat model.",
|
|
json_schema_extra={"examples": MINIMAX_MODELS, "allow_custom_input": True},
|
|
)
|
|
base_url: str = Field(
|
|
default="https://api.minimax.io/v1",
|
|
description="MiniMax OpenAI-compatible API endpoint.",
|
|
)
|
|
temperature: float = Field(
|
|
default=1.0,
|
|
gt=0.0,
|
|
le=2.0,
|
|
description="Sampling temperature. MiniMax requires > 0.",
|
|
)
|
|
|
|
|
|
OPENAI_REALTIME_MODELS = ["gpt-realtime-2"]
|
|
OPENAI_REALTIME_VOICES = [
|
|
"alloy",
|
|
"ash",
|
|
"ballad",
|
|
"coral",
|
|
"echo",
|
|
"sage",
|
|
"shimmer",
|
|
"verse",
|
|
]
|
|
|
|
|
|
@register_service(ServiceType.REALTIME)
|
|
class OpenAIRealtimeLLMConfiguration(BaseLLMConfiguration):
|
|
model_config = OPENAI_REALTIME_PROVIDER_MODEL_CONFIG
|
|
provider: Literal[ServiceProviders.OPENAI_REALTIME] = (
|
|
ServiceProviders.OPENAI_REALTIME
|
|
)
|
|
model: str = Field(
|
|
default="gpt-realtime-2",
|
|
description="OpenAI realtime (speech-to-speech) model.",
|
|
json_schema_extra={
|
|
"examples": OPENAI_REALTIME_MODELS,
|
|
"allow_custom_input": True,
|
|
},
|
|
)
|
|
voice: str = Field(
|
|
default="alloy",
|
|
description="Voice the model speaks in.",
|
|
json_schema_extra={
|
|
"examples": OPENAI_REALTIME_VOICES,
|
|
"allow_custom_input": True,
|
|
},
|
|
)
|
|
|
|
|
|
GROK_REALTIME_MODELS = ["grok-voice-think-fast-1.0"]
|
|
GROK_REALTIME_VOICES = ["Ara", "Rex", "Sal", "Eve", "Leo"]
|
|
ULTRAVOX_REALTIME_MODELS = ["ultravox-v0.7", "fixie-ai/ultravox"]
|
|
|
|
|
|
@register_service(ServiceType.REALTIME)
|
|
class GrokRealtimeLLMConfiguration(BaseLLMConfiguration):
|
|
model_config = GROK_REALTIME_PROVIDER_MODEL_CONFIG
|
|
provider: Literal[ServiceProviders.GROK_REALTIME] = ServiceProviders.GROK_REALTIME
|
|
model: str = Field(
|
|
default="grok-voice-think-fast-1.0",
|
|
description="Grok realtime voice-agent model.",
|
|
json_schema_extra={
|
|
"examples": GROK_REALTIME_MODELS,
|
|
"allow_custom_input": True,
|
|
},
|
|
)
|
|
voice: str = Field(
|
|
default="Ara",
|
|
description="Voice the model speaks in.",
|
|
json_schema_extra={
|
|
"examples": GROK_REALTIME_VOICES,
|
|
"allow_custom_input": True,
|
|
},
|
|
)
|
|
|
|
|
|
@register_service(ServiceType.REALTIME)
|
|
class UltravoxRealtimeLLMConfiguration(BaseLLMConfiguration):
|
|
model_config = ULTRAVOX_REALTIME_PROVIDER_MODEL_CONFIG
|
|
provider: Literal[ServiceProviders.ULTRAVOX_REALTIME] = (
|
|
ServiceProviders.ULTRAVOX_REALTIME
|
|
)
|
|
model: str = Field(
|
|
default="ultravox-v0.7",
|
|
description="Ultravox realtime voice-agent model.",
|
|
json_schema_extra={
|
|
"examples": ULTRAVOX_REALTIME_MODELS,
|
|
"allow_custom_input": True,
|
|
},
|
|
)
|
|
voice: str = Field(
|
|
default="Mark",
|
|
description="Ultravox voice name or voice ID.",
|
|
)
|
|
|
|
|
|
@register_service(ServiceType.REALTIME)
|
|
class GoogleRealtimeLLMConfiguration(BaseLLMConfiguration):
|
|
model_config = GOOGLE_REALTIME_PROVIDER_MODEL_CONFIG
|
|
provider: Literal[ServiceProviders.GOOGLE_REALTIME] = (
|
|
ServiceProviders.GOOGLE_REALTIME
|
|
)
|
|
model: str = Field(
|
|
default="gemini-3.1-flash-live-preview",
|
|
description="Gemini Live model on Google AI Studio (not Vertex).",
|
|
json_schema_extra={
|
|
"examples": GOOGLE_REALTIME_MODELS,
|
|
"allow_custom_input": True,
|
|
},
|
|
)
|
|
voice: str = Field(
|
|
default="Puck",
|
|
description="Voice the model speaks in.",
|
|
json_schema_extra={
|
|
"examples": GOOGLE_REALTIME_VOICES,
|
|
"allow_custom_input": True,
|
|
},
|
|
)
|
|
language: str = Field(
|
|
default="en",
|
|
description="ISO 639-1 language code.",
|
|
json_schema_extra={
|
|
"examples": GOOGLE_REALTIME_LANGUAGES,
|
|
"allow_custom_input": True,
|
|
},
|
|
)
|
|
|
|
|
|
@register_service(ServiceType.REALTIME)
|
|
class GoogleVertexRealtimeLLMConfiguration(BaseLLMConfiguration):
|
|
model_config = GOOGLE_VERTEX_REALTIME_PROVIDER_MODEL_CONFIG
|
|
provider: Literal[ServiceProviders.GOOGLE_VERTEX_REALTIME] = (
|
|
ServiceProviders.GOOGLE_VERTEX_REALTIME
|
|
)
|
|
model: str = Field(
|
|
default="google/gemini-live-2.5-flash-native-audio",
|
|
description="Vertex AI publisher/model identifier.",
|
|
json_schema_extra={
|
|
"examples": GOOGLE_VERTEX_REALTIME_MODELS,
|
|
"allow_custom_input": True,
|
|
},
|
|
)
|
|
voice: str = Field(
|
|
default="Charon",
|
|
description="Voice the model speaks in.",
|
|
json_schema_extra={
|
|
"examples": GOOGLE_VERTEX_REALTIME_VOICES,
|
|
"allow_custom_input": True,
|
|
},
|
|
)
|
|
language: str = Field(
|
|
default="en",
|
|
description="BCP-47 language code (e.g. 'en-US').",
|
|
json_schema_extra={
|
|
"examples": GOOGLE_VERTEX_REALTIME_LANGUAGES,
|
|
"allow_custom_input": True,
|
|
},
|
|
)
|
|
project_id: str = Field(description="Google Cloud project ID for Vertex AI.")
|
|
location: str = Field(
|
|
default="global",
|
|
description="GCP region for the Vertex AI endpoint (e.g. 'global').",
|
|
)
|
|
credentials: str | None = Field(
|
|
default=None,
|
|
description=(
|
|
"Paste the entire service-account JSON file contents. If omitted, "
|
|
"falls back to Application Default Credentials (ADC)."
|
|
),
|
|
json_schema_extra={"multiline": True},
|
|
)
|
|
api_key: str | list[str] | None = Field(
|
|
default=None,
|
|
description=(
|
|
"Not used for Vertex AI — authentication is via the service account "
|
|
"in `credentials` (or ADC). Leave blank."
|
|
),
|
|
)
|
|
|
|
|
|
REALTIME_PROVIDERS = {
|
|
ServiceProviders.OPENAI_REALTIME.value,
|
|
ServiceProviders.GROK_REALTIME.value,
|
|
ServiceProviders.ULTRAVOX_REALTIME.value,
|
|
ServiceProviders.GOOGLE_REALTIME.value,
|
|
ServiceProviders.GOOGLE_VERTEX_REALTIME.value,
|
|
}
|
|
|
|
|
|
LLMConfig = Annotated[
|
|
Union[
|
|
OpenAILLMService,
|
|
GoogleVertexLLMConfiguration,
|
|
GroqLLMService,
|
|
OpenRouterLLMConfiguration,
|
|
GoogleLLMService,
|
|
AzureLLMService,
|
|
DograhLLMService,
|
|
AWSBedrockLLMConfiguration,
|
|
SpeachesLLMConfiguration,
|
|
MiniMaxLLMConfiguration,
|
|
],
|
|
Field(discriminator="provider"),
|
|
]
|
|
|
|
RealtimeConfig = Annotated[
|
|
Union[
|
|
OpenAIRealtimeLLMConfiguration,
|
|
GrokRealtimeLLMConfiguration,
|
|
UltravoxRealtimeLLMConfiguration,
|
|
GoogleRealtimeLLMConfiguration,
|
|
GoogleVertexRealtimeLLMConfiguration,
|
|
],
|
|
Field(discriminator="provider"),
|
|
]
|
|
|
|
###################################################### TTS ########################################################################
|
|
|
|
|
|
@register_tts
|
|
class DeepgramTTSConfiguration(BaseServiceConfiguration):
|
|
model_config = DEEPGRAM_PROVIDER_MODEL_CONFIG
|
|
provider: Literal[ServiceProviders.DEEPGRAM] = ServiceProviders.DEEPGRAM
|
|
voice: str = Field(
|
|
default="aura-2-helena-en",
|
|
description="Deepgram voice ID (model is inferred from the 'aura-N' prefix).",
|
|
)
|
|
|
|
@computed_field
|
|
@property
|
|
def model(self) -> str:
|
|
# Deepgram model's name is inferred using the voice name.
|
|
# It can either contain aura-2 or aura-1
|
|
if "aura-2" in self.voice:
|
|
return "aura-2"
|
|
elif "aura-1" in self.voice:
|
|
return "aura-1"
|
|
else:
|
|
# Default fallback
|
|
return "aura-2"
|
|
|
|
|
|
ELEVENLABS_TTS_MODELS = ["eleven_flash_v2_5"]
|
|
|
|
|
|
@register_tts
|
|
class ElevenlabsTTSConfiguration(BaseServiceConfiguration):
|
|
model_config = ELEVENLABS_PROVIDER_MODEL_CONFIG
|
|
provider: Literal[ServiceProviders.ELEVENLABS] = ServiceProviders.ELEVENLABS
|
|
voice: str = Field(
|
|
default="21m00Tcm4TlvDq8ikWAM",
|
|
description="ElevenLabs voice ID from your Voice Library.",
|
|
)
|
|
speed: float = Field(default=1.0, ge=0.1, le=2.0, description="Speed of the voice.")
|
|
model: str = Field(
|
|
default="eleven_flash_v2_5",
|
|
description="ElevenLabs TTS model.",
|
|
json_schema_extra={"examples": ELEVENLABS_TTS_MODELS},
|
|
)
|
|
base_url: str = Field(
|
|
default="https://api.elevenlabs.io",
|
|
description=(
|
|
"ElevenLabs API base URL. Override to use a Data Residency endpoint "
|
|
"(e.g. https://api.eu.residency.elevenlabs.io) for GDPR / HIPAA / "
|
|
"regional compliance."
|
|
),
|
|
)
|
|
|
|
|
|
@register_tts
|
|
class GoogleTTSConfiguration(BaseTTSConfiguration):
|
|
model_config = GOOGLE_CLOUD_PROVIDER_MODEL_CONFIG
|
|
provider: Literal[ServiceProviders.GOOGLE] = ServiceProviders.GOOGLE
|
|
model: str = Field(
|
|
default="chirp_3_hd",
|
|
description=(
|
|
"Google Cloud low-latency TTS engine. Dograh maps this to Pipecat's "
|
|
"streaming Google TTS service for Chirp 3 HD and Journey voices."
|
|
),
|
|
json_schema_extra={
|
|
"examples": GOOGLE_TTS_MODELS,
|
|
"allow_custom_input": True,
|
|
},
|
|
)
|
|
voice: str = Field(
|
|
default="en-US-Chirp3-HD-Charon",
|
|
description="Google Cloud voice name. Use a Chirp 3 HD or Journey voice for streaming TTS.",
|
|
json_schema_extra={
|
|
"examples": GOOGLE_TTS_VOICES,
|
|
"allow_custom_input": True,
|
|
},
|
|
)
|
|
language: str = Field(
|
|
default="en-US",
|
|
description="BCP-47 language code for synthesis.",
|
|
json_schema_extra={
|
|
"examples": GOOGLE_TTS_LANGUAGES,
|
|
"allow_custom_input": True,
|
|
},
|
|
)
|
|
speed: float = Field(
|
|
default=1.0,
|
|
ge=0.25,
|
|
le=2.0,
|
|
description="Speech speed multiplier for Google streaming TTS.",
|
|
)
|
|
location: str | None = Field(
|
|
default=None,
|
|
description=(
|
|
"Optional Google Cloud regional Text-to-Speech endpoint (for example "
|
|
"'us-central1'). Leave blank to use the default endpoint."
|
|
),
|
|
)
|
|
credentials: str | None = Field(
|
|
default=None,
|
|
description=(
|
|
"Paste the entire Google Cloud service-account JSON. If omitted, "
|
|
"the server falls back to Application Default Credentials (ADC)."
|
|
),
|
|
json_schema_extra={"multiline": True},
|
|
)
|
|
api_key: str | list[str] | None = Field(
|
|
default=None,
|
|
description="Not used for Google Cloud TTS. Leave blank.",
|
|
)
|
|
|
|
|
|
OPENAI_TTS_MODELS = ["gpt-4o-mini-tts"]
|
|
|
|
|
|
@register_tts
|
|
class OpenAITTSService(BaseTTSConfiguration):
|
|
model_config = OPENAI_PROVIDER_MODEL_CONFIG
|
|
provider: Literal[ServiceProviders.OPENAI] = ServiceProviders.OPENAI
|
|
model: str = Field(
|
|
default="gpt-4o-mini-tts",
|
|
description="OpenAI TTS model.",
|
|
json_schema_extra={"examples": OPENAI_TTS_MODELS},
|
|
)
|
|
voice: str = Field(
|
|
default="alloy",
|
|
description="OpenAI TTS voice name.",
|
|
)
|
|
|
|
|
|
DOGRAH_TTS_MODELS = ["default"]
|
|
|
|
|
|
@register_tts
|
|
class DograhTTSService(BaseTTSConfiguration):
|
|
model_config = DOGRAH_PROVIDER_MODEL_CONFIG
|
|
provider: Literal[ServiceProviders.DOGRAH] = ServiceProviders.DOGRAH
|
|
model: str = Field(
|
|
default="default",
|
|
description="Dograh TTS tier.",
|
|
json_schema_extra={"examples": DOGRAH_TTS_MODELS},
|
|
)
|
|
voice: str = Field(
|
|
default="default",
|
|
description="Voice preset.",
|
|
)
|
|
speed: float = Field(default=1.0, ge=0.5, le=2.0, description="Speed of the voice.")
|
|
|
|
|
|
CARTESIA_TTS_MODELS = ["sonic-3"]
|
|
|
|
|
|
@register_tts
|
|
class CartesiaTTSConfiguration(BaseTTSConfiguration):
|
|
model_config = CARTESIA_PROVIDER_MODEL_CONFIG
|
|
provider: Literal[ServiceProviders.CARTESIA] = ServiceProviders.CARTESIA
|
|
model: str = Field(
|
|
default="sonic-3",
|
|
description="Cartesia TTS model.",
|
|
json_schema_extra={"examples": CARTESIA_TTS_MODELS},
|
|
)
|
|
voice: str = Field(
|
|
default="3faa81ae-d3d8-4ab1-9e44-e50e46d33c30",
|
|
description="Cartesia voice UUID from your Cartesia dashboard.",
|
|
)
|
|
speed: float = Field(default=1.0, ge=0.6, le=1.5, description="Speed of the voice.")
|
|
volume: float = Field(
|
|
default=1.0,
|
|
ge=0.5,
|
|
le=2.0,
|
|
description="Volume multiplier for generated speech.",
|
|
)
|
|
|
|
|
|
@register_tts
|
|
class SarvamTTSConfiguration(BaseTTSConfiguration):
|
|
model_config = SARVAM_PROVIDER_MODEL_CONFIG
|
|
provider: Literal[ServiceProviders.SARVAM] = ServiceProviders.SARVAM
|
|
model: str = Field(
|
|
default="bulbul:v2",
|
|
description="Sarvam TTS model (voice list depends on this).",
|
|
json_schema_extra={"examples": SARVAM_TTS_MODELS},
|
|
)
|
|
voice: str = Field(
|
|
default="anushka",
|
|
description="Sarvam voice name; must match the selected model's voice list.",
|
|
json_schema_extra={
|
|
"examples": SARVAM_V2_VOICES,
|
|
"model_options": {
|
|
"bulbul:v2": SARVAM_V2_VOICES,
|
|
"bulbul:v3": SARVAM_V3_VOICES,
|
|
},
|
|
},
|
|
)
|
|
language: str = Field(
|
|
default="hi-IN",
|
|
description="BCP-47 Indian-language code (e.g. hi-IN, en-IN).",
|
|
json_schema_extra={"examples": SARVAM_LANGUAGES},
|
|
)
|
|
|
|
|
|
CAMB_TTS_MODELS = ["mars-flash", "mars-pro", "mars-instruct"]
|
|
|
|
|
|
@register_tts
|
|
class CambTTSConfiguration(BaseTTSConfiguration):
|
|
model_config = CAMB_PROVIDER_MODEL_CONFIG
|
|
provider: Literal[ServiceProviders.CAMB] = ServiceProviders.CAMB
|
|
model: str = Field(
|
|
default="mars-flash",
|
|
description="Camb.ai TTS model.",
|
|
json_schema_extra={"examples": CAMB_TTS_MODELS},
|
|
)
|
|
voice: str = Field(default="147320", description="Camb.ai voice ID.")
|
|
language: str = Field(default="en-us", description="BCP-47 language code.")
|
|
|
|
|
|
RIME_TTS_MODELS = ["arcana", "mistv3", "mistv2", "mist"]
|
|
RIME_TTS_LANGUAGES = ["en", "de", "fr", "es", "hi"]
|
|
|
|
|
|
@register_tts
|
|
class RimeTTSConfiguration(BaseTTSConfiguration):
|
|
model_config = RIME_PROVIDER_MODEL_CONFIG
|
|
provider: Literal[ServiceProviders.RIME] = ServiceProviders.RIME
|
|
model: str = Field(
|
|
default="arcana",
|
|
description="Rime TTS model.",
|
|
json_schema_extra={"examples": RIME_TTS_MODELS, "allow_custom_input": True},
|
|
)
|
|
voice: str = Field(
|
|
default="celeste",
|
|
description="Rime voice ID.",
|
|
)
|
|
speed: float = Field(
|
|
default=1.0, ge=0.5, le=2.0, description="Speech speed multiplier."
|
|
)
|
|
language: str = Field(
|
|
default="en",
|
|
description="ISO 639-1 language code.",
|
|
json_schema_extra={"examples": RIME_TTS_LANGUAGES, "allow_custom_input": True},
|
|
)
|
|
|
|
|
|
SPEACHES_TTS_MODELS = ["hexgrad/Kokoro-82M"]
|
|
|
|
|
|
@register_tts
|
|
class SpeachesTTSConfiguration(BaseTTSConfiguration):
|
|
model_config = SPEACHES_PROVIDER_MODEL_CONFIG
|
|
provider: Literal[ServiceProviders.SPEACHES] = ServiceProviders.SPEACHES
|
|
model: str = Field(
|
|
default="kokoro",
|
|
description="Model name as served by your TTS endpoint (e.g. Kokoro-FastAPI).",
|
|
json_schema_extra={
|
|
"examples": SPEACHES_TTS_MODELS,
|
|
"allow_custom_input": True,
|
|
},
|
|
)
|
|
voice: str = Field(
|
|
default="af_heart",
|
|
json_schema_extra={"allow_custom_input": True},
|
|
description="Voice ID for the TTS engine.",
|
|
)
|
|
base_url: str = Field(
|
|
default="http://localhost:8000/v1",
|
|
description="OpenAI-compatible TTS endpoint (Kokoro-FastAPI, etc.).",
|
|
)
|
|
speed: float = Field(
|
|
default=1.0, ge=0.25, le=4.0, description="Speech speed (0.25 to 4.0)."
|
|
)
|
|
api_key: str | list[str] | None = Field(
|
|
default=None,
|
|
description="Usually not required for self-hosted TTS. Leave blank unless enforced.",
|
|
)
|
|
|
|
|
|
MINIMAX_TTS_MODELS = ["speech-2.8-hd", "speech-2.8-turbo"]
|
|
MINIMAX_TTS_VOICES = [
|
|
"English_Graceful_Lady",
|
|
"English_Insightful_Speaker",
|
|
"English_radiant_girl",
|
|
"English_Persuasive_Man",
|
|
"English_Lucky_Robot",
|
|
"English_expressive_narrator",
|
|
]
|
|
|
|
|
|
@register_tts
|
|
class MiniMaxTTSConfiguration(BaseTTSConfiguration):
|
|
provider: Literal[ServiceProviders.MINIMAX] = ServiceProviders.MINIMAX
|
|
model: str = Field(
|
|
default="speech-2.8-hd",
|
|
description="MiniMax TTS model.",
|
|
json_schema_extra={"examples": MINIMAX_TTS_MODELS},
|
|
)
|
|
voice: str = Field(
|
|
default="English_Graceful_Lady",
|
|
description="MiniMax voice ID.",
|
|
json_schema_extra={"examples": MINIMAX_TTS_VOICES, "allow_custom_input": True},
|
|
)
|
|
base_url: str = Field(
|
|
default="https://api.minimax.io/v1/t2a_v2",
|
|
description=(
|
|
"MiniMax TTS API endpoint (must include the /v1/t2a_v2 path). "
|
|
"Defaults to the global endpoint; override with "
|
|
"https://api.minimaxi.chat/v1/t2a_v2 (mainland China) or "
|
|
"https://api-uw.minimax.io/v1/t2a_v2 (US-West)."
|
|
),
|
|
)
|
|
speed: float = Field(
|
|
default=1.0, ge=0.5, le=2.0, description="Speech speed (0.5 to 2.0)."
|
|
)
|
|
group_id: str = Field(
|
|
description="MiniMax Group ID (found in your MiniMax dashboard under Account → Group).",
|
|
)
|
|
|
|
|
|
TTSConfig = Annotated[
|
|
Union[
|
|
DeepgramTTSConfiguration,
|
|
GoogleTTSConfiguration,
|
|
OpenAITTSService,
|
|
ElevenlabsTTSConfiguration,
|
|
CartesiaTTSConfiguration,
|
|
DograhTTSService,
|
|
SarvamTTSConfiguration,
|
|
CambTTSConfiguration,
|
|
RimeTTSConfiguration,
|
|
SpeachesTTSConfiguration,
|
|
MiniMaxTTSConfiguration,
|
|
],
|
|
Field(discriminator="provider"),
|
|
]
|
|
|
|
###################################################### STT ########################################################################
|
|
|
|
|
|
@register_stt
|
|
class DeepgramSTTConfiguration(BaseSTTConfiguration):
|
|
model_config = DEEPGRAM_PROVIDER_MODEL_CONFIG
|
|
provider: Literal[ServiceProviders.DEEPGRAM] = ServiceProviders.DEEPGRAM
|
|
model: str = Field(
|
|
default="nova-3-general",
|
|
description="Deepgram STT model.",
|
|
json_schema_extra={"examples": DEEPGRAM_STT_MODELS},
|
|
)
|
|
language: str = Field(
|
|
default="multi",
|
|
description="Language code; 'multi' enables auto-detect (Nova-3 only).",
|
|
json_schema_extra={
|
|
"examples": DEEPGRAM_LANGUAGES,
|
|
"model_options": {
|
|
"nova-3-general": DEEPGRAM_LANGUAGES,
|
|
"flux-general-en": ("en",),
|
|
},
|
|
},
|
|
)
|
|
|
|
|
|
CARTESIA_STT_MODELS = ["ink-whisper"]
|
|
|
|
|
|
@register_stt
|
|
class CartesiaSTTConfiguration(BaseSTTConfiguration):
|
|
model_config = CARTESIA_PROVIDER_MODEL_CONFIG
|
|
provider: Literal[ServiceProviders.CARTESIA] = ServiceProviders.CARTESIA
|
|
model: str = Field(
|
|
default="ink-whisper",
|
|
description="Cartesia STT model.",
|
|
json_schema_extra={"examples": CARTESIA_STT_MODELS},
|
|
)
|
|
|
|
|
|
OPENAI_STT_MODELS = ["gpt-4o-transcribe"]
|
|
|
|
|
|
@register_stt
|
|
class OpenAISTTConfiguration(BaseSTTConfiguration):
|
|
model_config = OPENAI_PROVIDER_MODEL_CONFIG
|
|
provider: Literal[ServiceProviders.OPENAI] = ServiceProviders.OPENAI
|
|
model: str = Field(
|
|
default="gpt-4o-transcribe",
|
|
description="OpenAI transcription model.",
|
|
json_schema_extra={"examples": OPENAI_STT_MODELS},
|
|
)
|
|
|
|
|
|
@register_stt
|
|
class GoogleSTTConfiguration(BaseSTTConfiguration):
|
|
model_config = GOOGLE_CLOUD_PROVIDER_MODEL_CONFIG
|
|
provider: Literal[ServiceProviders.GOOGLE] = ServiceProviders.GOOGLE
|
|
model: str = Field(
|
|
default="latest_long",
|
|
description="Google Cloud Speech-to-Text V2 recognition model.",
|
|
json_schema_extra={
|
|
"examples": GOOGLE_STT_MODELS,
|
|
"allow_custom_input": True,
|
|
},
|
|
)
|
|
language: str = Field(
|
|
default="en-US",
|
|
description="Primary BCP-47 language code for recognition.",
|
|
json_schema_extra={
|
|
"examples": GOOGLE_STT_LANGUAGES,
|
|
"allow_custom_input": True,
|
|
"docs_url": "https://docs.cloud.google.com/speech-to-text/docs/speech-to-text-supported-languages",
|
|
},
|
|
)
|
|
location: str = Field(
|
|
default="global",
|
|
description="Google Cloud Speech-to-Text region (for example 'global' or 'us-central1').",
|
|
)
|
|
credentials: str | None = Field(
|
|
default=None,
|
|
description=(
|
|
"Paste the entire Google Cloud service-account JSON. If omitted, "
|
|
"the server falls back to Application Default Credentials (ADC)."
|
|
),
|
|
json_schema_extra={"multiline": True},
|
|
)
|
|
api_key: str | list[str] | None = Field(
|
|
default=None,
|
|
description="Not used for Google Cloud STT. Leave blank.",
|
|
)
|
|
|
|
|
|
# Dograh STT Service
|
|
DOGRAH_STT_MODELS = ["default"]
|
|
DOGRAH_STT_LANGUAGES = DEEPGRAM_LANGUAGES
|
|
|
|
|
|
@register_stt
|
|
class DograhSTTService(BaseSTTConfiguration):
|
|
model_config = DOGRAH_PROVIDER_MODEL_CONFIG
|
|
provider: Literal[ServiceProviders.DOGRAH] = ServiceProviders.DOGRAH
|
|
model: str = Field(
|
|
default="default",
|
|
description="Dograh STT tier.",
|
|
json_schema_extra={"examples": DOGRAH_STT_MODELS},
|
|
)
|
|
language: str = Field(
|
|
default="multi",
|
|
description="Language code; use 'multi' for auto-detect.",
|
|
json_schema_extra={"examples": DOGRAH_STT_LANGUAGES},
|
|
)
|
|
|
|
|
|
@register_stt
|
|
class SarvamSTTConfiguration(BaseSTTConfiguration):
|
|
model_config = SARVAM_PROVIDER_MODEL_CONFIG
|
|
provider: Literal[ServiceProviders.SARVAM] = ServiceProviders.SARVAM
|
|
model: str = Field(
|
|
default="saarika:v2.5",
|
|
description="Sarvam STT model.",
|
|
json_schema_extra={"examples": SARVAM_STT_MODELS},
|
|
)
|
|
language: str = Field(
|
|
default="hi-IN",
|
|
description="BCP-47 Indian-language code.",
|
|
json_schema_extra={"examples": SARVAM_LANGUAGES},
|
|
)
|
|
|
|
|
|
@register_stt
|
|
class SpeechmaticsSTTConfiguration(BaseSTTConfiguration):
|
|
model_config = SPEECHMATICS_PROVIDER_MODEL_CONFIG
|
|
provider: Literal[ServiceProviders.SPEECHMATICS] = ServiceProviders.SPEECHMATICS
|
|
model: str = Field(
|
|
default="enhanced",
|
|
description="Speechmatics operating point: 'standard' or 'enhanced'.",
|
|
)
|
|
language: str = Field(
|
|
default="en",
|
|
description="ISO 639-1 language code.",
|
|
json_schema_extra={"examples": SPEECHMATICS_STT_LANGUAGES},
|
|
)
|
|
|
|
|
|
SPEACHES_STT_MODELS = [
|
|
"Systran/faster-distil-whisper-small.en",
|
|
"Systran/faster-whisper-large-v3",
|
|
]
|
|
SPEACHES_STT_LANGUAGES = ["en", "ar", "nl", "fr", "de", "hi", "it", "pt", "es"]
|
|
|
|
|
|
@register_stt
|
|
class SpeachesSTTConfiguration(BaseSTTConfiguration):
|
|
model_config = SPEACHES_PROVIDER_MODEL_CONFIG
|
|
provider: Literal[ServiceProviders.SPEACHES] = ServiceProviders.SPEACHES
|
|
model: str = Field(
|
|
default="Systran/faster-distil-whisper-small.en",
|
|
description="Whisper model identifier as served by your STT endpoint.",
|
|
json_schema_extra={
|
|
"examples": SPEACHES_STT_MODELS,
|
|
"allow_custom_input": True,
|
|
},
|
|
)
|
|
language: str = Field(
|
|
default="en",
|
|
description="ISO 639-1 language code.",
|
|
json_schema_extra={
|
|
"examples": SPEACHES_STT_LANGUAGES,
|
|
"allow_custom_input": True,
|
|
},
|
|
)
|
|
base_url: str = Field(
|
|
default="http://localhost:8000/v1",
|
|
description="OpenAI-compatible STT endpoint (Speaches, etc.).",
|
|
)
|
|
api_key: str | list[str] | None = Field(
|
|
default=None,
|
|
description="Usually not required for self-hosted STT. Leave blank unless enforced.",
|
|
)
|
|
|
|
|
|
ASSEMBLYAI_STT_MODELS = ["u3-rt-pro"]
|
|
ASSEMBLYAI_STT_LANGUAGES = ["en", "es", "de", "fr", "pt", "it"]
|
|
|
|
|
|
@register_stt
|
|
class AssemblyAISTTConfiguration(BaseSTTConfiguration):
|
|
model_config = ASSEMBLYAI_PROVIDER_MODEL_CONFIG
|
|
provider: Literal[ServiceProviders.ASSEMBLYAI] = ServiceProviders.ASSEMBLYAI
|
|
model: str = Field(
|
|
default="u3-rt-pro",
|
|
description="AssemblyAI realtime STT model.",
|
|
json_schema_extra={"examples": ASSEMBLYAI_STT_MODELS},
|
|
)
|
|
language: str = Field(
|
|
default="en",
|
|
description="ISO 639-1 language code.",
|
|
json_schema_extra={"examples": ASSEMBLYAI_STT_LANGUAGES},
|
|
)
|
|
|
|
|
|
@register_stt
|
|
class GladiaSTTConfiguration(BaseSTTConfiguration):
|
|
model_config = GLADIA_PROVIDER_MODEL_CONFIG
|
|
provider: Literal[ServiceProviders.GLADIA] = ServiceProviders.GLADIA
|
|
model: str = Field(
|
|
default="solaria-1",
|
|
description="Gladia STT model.",
|
|
json_schema_extra={"examples": GLADIA_STT_MODELS},
|
|
)
|
|
language: str = Field(
|
|
default="en",
|
|
description="ISO 639-1 language code.",
|
|
json_schema_extra={"examples": GLADIA_STT_LANGUAGES},
|
|
)
|
|
|
|
|
|
STTConfig = Annotated[
|
|
Union[
|
|
DeepgramSTTConfiguration,
|
|
CartesiaSTTConfiguration,
|
|
OpenAISTTConfiguration,
|
|
GoogleSTTConfiguration,
|
|
DograhSTTService,
|
|
SpeechmaticsSTTConfiguration,
|
|
SarvamSTTConfiguration,
|
|
SpeachesSTTConfiguration,
|
|
AssemblyAISTTConfiguration,
|
|
GladiaSTTConfiguration,
|
|
],
|
|
Field(discriminator="provider"),
|
|
]
|
|
|
|
###################################################### EMBEDDINGS ########################################################################
|
|
|
|
OPENAI_EMBEDDING_MODELS = ["text-embedding-3-small"]
|
|
|
|
|
|
@register_embeddings
|
|
class OpenAIEmbeddingsConfiguration(BaseEmbeddingsConfiguration):
|
|
model_config = OPENAI_PROVIDER_MODEL_CONFIG
|
|
provider: Literal[ServiceProviders.OPENAI] = ServiceProviders.OPENAI
|
|
model: str = Field(
|
|
default="text-embedding-3-small",
|
|
description="OpenAI embedding model.",
|
|
json_schema_extra={"examples": OPENAI_EMBEDDING_MODELS},
|
|
)
|
|
|
|
|
|
OPENROUTER_EMBEDDING_MODELS = ["openai/text-embedding-3-small"]
|
|
|
|
|
|
@register_embeddings
|
|
class OpenRouterEmbeddingsConfiguration(BaseEmbeddingsConfiguration):
|
|
model_config = OPENROUTER_PROVIDER_MODEL_CONFIG
|
|
provider: Literal[ServiceProviders.OPENROUTER] = ServiceProviders.OPENROUTER
|
|
model: str = Field(
|
|
default="openai/text-embedding-3-small",
|
|
description="OpenRouter-hosted embedding model slug.",
|
|
json_schema_extra={"examples": OPENROUTER_EMBEDDING_MODELS},
|
|
)
|
|
|
|
base_url: str = Field(
|
|
default="https://openrouter.ai/api/v1",
|
|
description="Override only if proxying OpenRouter through your own gateway.",
|
|
)
|
|
|
|
|
|
EmbeddingsConfig = Annotated[
|
|
Union[OpenAIEmbeddingsConfiguration, OpenRouterEmbeddingsConfiguration],
|
|
Field(discriminator="provider"),
|
|
]
|
|
|
|
ServiceConfig = Annotated[
|
|
Union[LLMConfig, RealtimeConfig, TTSConfig, STTConfig, EmbeddingsConfig],
|
|
Field(discriminator="provider"),
|
|
]
|