mirror of
https://github.com/dograh-hq/dograh.git
synced 2026-06-22 08:38:13 +02:00
feat: add gemini realtime and speaches integration
- Add gemini realtime support - Add speaches support for locally hosted LLMs
This commit is contained in:
parent
2eaaabd936
commit
ee2028eb2d
19 changed files with 531 additions and 185 deletions
|
|
@ -47,6 +47,8 @@ class UserConfigurationValidator:
|
|||
ServiceProviders.CAMB.value: self._check_camb_api_key,
|
||||
ServiceProviders.AWS_BEDROCK.value: self._check_aws_bedrock_api_key,
|
||||
ServiceProviders.SPEACHES.value: self._check_speaches_api_key,
|
||||
ServiceProviders.OPENAI_REALTIME.value: self._check_openai_api_key,
|
||||
ServiceProviders.GOOGLE_REALTIME.value: self._check_google_api_key,
|
||||
}
|
||||
|
||||
async def validate(
|
||||
|
|
@ -70,6 +72,10 @@ class UserConfigurationValidator:
|
|||
configuration.embeddings, "embeddings", required=False
|
||||
)
|
||||
)
|
||||
# Realtime is optional - only validate if configured
|
||||
status_list.extend(
|
||||
self._validate_service(configuration.realtime, "realtime", required=False)
|
||||
)
|
||||
|
||||
if status_list:
|
||||
raise ValueError(status_list)
|
||||
|
|
|
|||
|
|
@ -29,7 +29,7 @@ def contains_masked_key(api_key: str | list[str] | None) -> bool:
|
|||
|
||||
def check_for_masked_keys(config: "UserConfiguration") -> None:
|
||||
"""Raise ValueError if any service in *config* still has a masked API key."""
|
||||
for field in ("llm", "tts", "stt", "embeddings"):
|
||||
for field in ("llm", "tts", "stt", "embeddings", "realtime"):
|
||||
service = getattr(config, field, None)
|
||||
if service is None:
|
||||
continue
|
||||
|
|
@ -121,6 +121,8 @@ def mask_user_config(config: UserConfiguration) -> Dict[str, Any]:
|
|||
"tts": _mask_service(config.tts),
|
||||
"stt": _mask_service(config.stt),
|
||||
"embeddings": _mask_service(config.embeddings),
|
||||
"realtime": _mask_service(config.realtime),
|
||||
"is_realtime": config.is_realtime,
|
||||
"test_phone_number": config.test_phone_number,
|
||||
"timezone": config.timezone,
|
||||
}
|
||||
|
|
|
|||
|
|
@ -9,7 +9,7 @@ from typing import Dict
|
|||
from api.schemas.user_configuration import UserConfiguration
|
||||
from api.services.configuration.masking import resolve_masked_api_keys
|
||||
|
||||
SERVICE_FIELDS = ("llm", "tts", "stt", "embeddings")
|
||||
SERVICE_FIELDS = ("llm", "tts", "stt", "embeddings", "realtime")
|
||||
|
||||
|
||||
def merge_user_configurations(
|
||||
|
|
@ -64,6 +64,9 @@ def merge_user_configurations(
|
|||
_merge_service_block(service)
|
||||
|
||||
# other simple fields
|
||||
if "is_realtime" in incoming_partial:
|
||||
merged["is_realtime"] = incoming_partial["is_realtime"]
|
||||
|
||||
if "test_phone_number" in incoming_partial:
|
||||
merged["test_phone_number"] = incoming_partial["test_phone_number"]
|
||||
|
||||
|
|
|
|||
|
|
@ -10,6 +10,7 @@ class ServiceType(Enum):
|
|||
TTS = auto()
|
||||
STT = auto()
|
||||
EMBEDDINGS = auto()
|
||||
REALTIME = auto()
|
||||
|
||||
|
||||
class ServiceProviders(str, Enum):
|
||||
|
|
@ -28,6 +29,8 @@ class ServiceProviders(str, Enum):
|
|||
CAMB = "camb"
|
||||
AWS_BEDROCK = "aws_bedrock"
|
||||
SPEACHES = "speaches"
|
||||
OPENAI_REALTIME = "openai_realtime"
|
||||
GOOGLE_REALTIME = "google_realtime"
|
||||
|
||||
|
||||
class BaseServiceConfiguration(BaseModel):
|
||||
|
|
@ -42,6 +45,8 @@ class BaseServiceConfiguration(BaseModel):
|
|||
ServiceProviders.DOGRAH,
|
||||
ServiceProviders.AWS_BEDROCK,
|
||||
ServiceProviders.SPEACHES,
|
||||
ServiceProviders.OPENAI_REALTIME,
|
||||
ServiceProviders.GOOGLE_REALTIME,
|
||||
# ServiceProviders.SARVAM,
|
||||
]
|
||||
api_key: str | list[str]
|
||||
|
|
@ -97,6 +102,7 @@ REGISTRY: Dict[ServiceType, Dict[str, Type[BaseServiceConfiguration]]] = {
|
|||
ServiceType.TTS: {},
|
||||
ServiceType.STT: {},
|
||||
ServiceType.EMBEDDINGS: {},
|
||||
ServiceType.REALTIME: {},
|
||||
}
|
||||
|
||||
T = TypeVar("T", bound=BaseServiceConfiguration)
|
||||
|
|
@ -279,6 +285,68 @@ class SpeachesLLMConfiguration(BaseLLMConfiguration):
|
|||
api_key: str | list[str] | None = Field(default=None)
|
||||
|
||||
|
||||
OPENAI_REALTIME_MODELS = ["gpt-4o-realtime-preview", "gpt-4o-mini-realtime-preview"]
|
||||
OPENAI_REALTIME_VOICES = [
|
||||
"alloy",
|
||||
"ash",
|
||||
"ballad",
|
||||
"coral",
|
||||
"echo",
|
||||
"sage",
|
||||
"shimmer",
|
||||
"verse",
|
||||
]
|
||||
|
||||
|
||||
# @register_service(ServiceType.REALTIME)
|
||||
# class OpenAIRealtimeLLMConfiguration(BaseLLMConfiguration):
|
||||
# provider: Literal[ServiceProviders.OPENAI_REALTIME] = (
|
||||
# ServiceProviders.OPENAI_REALTIME
|
||||
# )
|
||||
# model: str = Field(
|
||||
# default="gpt-4o-realtime-preview",
|
||||
# json_schema_extra={
|
||||
# "examples": OPENAI_REALTIME_MODELS,
|
||||
# "allow_custom_input": True,
|
||||
# },
|
||||
# )
|
||||
# voice: str = Field(
|
||||
# default="alloy",
|
||||
# json_schema_extra={"examples": OPENAI_REALTIME_VOICES},
|
||||
# )
|
||||
|
||||
|
||||
GOOGLE_REALTIME_MODELS = ["gemini-3.1-flash-live-preview"]
|
||||
GOOGLE_REALTIME_VOICES = ["Puck", "Charon", "Kore", "Fenrir", "Aoede"]
|
||||
|
||||
|
||||
@register_service(ServiceType.REALTIME)
|
||||
class GoogleRealtimeLLMConfiguration(BaseLLMConfiguration):
|
||||
provider: Literal[ServiceProviders.GOOGLE_REALTIME] = (
|
||||
ServiceProviders.GOOGLE_REALTIME
|
||||
)
|
||||
model: str = Field(
|
||||
default="gemini-2.0-flash-live-001",
|
||||
json_schema_extra={
|
||||
"examples": GOOGLE_REALTIME_MODELS,
|
||||
"allow_custom_input": True,
|
||||
},
|
||||
)
|
||||
voice: str = Field(
|
||||
default="Puck",
|
||||
json_schema_extra={
|
||||
"examples": GOOGLE_REALTIME_VOICES,
|
||||
"allow_custom_input": True,
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
REALTIME_PROVIDERS = {
|
||||
ServiceProviders.OPENAI_REALTIME.value,
|
||||
ServiceProviders.GOOGLE_REALTIME.value,
|
||||
}
|
||||
|
||||
|
||||
LLMConfig = Annotated[
|
||||
Union[
|
||||
OpenAILLMService,
|
||||
|
|
@ -293,6 +361,14 @@ LLMConfig = Annotated[
|
|||
Field(discriminator="provider"),
|
||||
]
|
||||
|
||||
RealtimeConfig = Annotated[
|
||||
Union[
|
||||
# OpenAIRealtimeLLMConfiguration,
|
||||
GoogleRealtimeLLMConfiguration,
|
||||
],
|
||||
Field(discriminator="provider"),
|
||||
]
|
||||
|
||||
###################################################### TTS ########################################################################
|
||||
|
||||
|
||||
|
|
@ -719,6 +795,7 @@ SPEACHES_STT_MODELS = [
|
|||
"Systran/faster-distil-whisper-small.en",
|
||||
"Systran/faster-whisper-large-v3",
|
||||
]
|
||||
SPEACHES_STT_LANGUAGES = ["en", "ar", "nl", "fr", "de", "hi", "it", "pt", "es"]
|
||||
|
||||
|
||||
@register_stt
|
||||
|
|
@ -731,6 +808,13 @@ class SpeachesSTTConfiguration(BaseSTTConfiguration):
|
|||
"allow_custom_input": True,
|
||||
},
|
||||
)
|
||||
language: str = Field(
|
||||
default="en",
|
||||
json_schema_extra={
|
||||
"examples": SPEACHES_STT_LANGUAGES,
|
||||
"allow_custom_input": True,
|
||||
},
|
||||
)
|
||||
base_url: str = Field(
|
||||
default="http://localhost:8000/v1",
|
||||
description="OpenAI-compatible STT endpoint (Speaches, etc.)",
|
||||
|
|
@ -785,6 +869,6 @@ EmbeddingsConfig = Annotated[
|
|||
]
|
||||
|
||||
ServiceConfig = Annotated[
|
||||
Union[LLMConfig, TTSConfig, STTConfig, EmbeddingsConfig],
|
||||
Union[LLMConfig, RealtimeConfig, TTSConfig, STTConfig, EmbeddingsConfig],
|
||||
Field(discriminator="provider"),
|
||||
]
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue