feat: add custom sarvam tts voice (#449)

* feat: add custom sarvam tts voice

* chore: refactor registry and add deepgram multi

---------

Co-authored-by: Abhishek Kumar <abhishek@a6k.me>
This commit is contained in:
Sabiha Khan 2026-06-18 12:33:21 +05:30 committed by GitHub
parent 344c8220de
commit 951e73a645
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
9 changed files with 268 additions and 69 deletions

View file

@ -9,7 +9,13 @@ from .azure import (
AZURE_SPEECH_TTS_LANGUAGES,
AZURE_SPEECH_TTS_VOICES,
)
from .deepgram import DEEPGRAM_LANGUAGES, DEEPGRAM_STT_MODELS
from .deepgram import (
DEEPGRAM_FLUX_MODELS,
DEEPGRAM_FLUX_MULTILINGUAL_LANGUAGE_OPTIONS,
DEEPGRAM_FLUX_MULTILINGUAL_LANGUAGES,
DEEPGRAM_LANGUAGES,
DEEPGRAM_STT_MODELS,
)
from .gladia import GLADIA_STT_LANGUAGES, GLADIA_STT_MODELS
from .google import (
GOOGLE_MODELS,
@ -35,6 +41,11 @@ from .sarvam import (
SARVAM_V2_VOICES,
SARVAM_V3_VOICES,
)
from .smallest import (
SMALLEST_TTS_LANGUAGES,
SMALLEST_TTS_MODELS,
SMALLEST_TTS_VOICES,
)
from .speechmatics import SPEECHMATICS_STT_LANGUAGES
__all__ = [
@ -47,6 +58,9 @@ __all__ = [
"AZURE_SPEECH_STT_LANGUAGES",
"AZURE_SPEECH_TTS_LANGUAGES",
"AZURE_SPEECH_TTS_VOICES",
"DEEPGRAM_FLUX_MODELS",
"DEEPGRAM_FLUX_MULTILINGUAL_LANGUAGES",
"DEEPGRAM_FLUX_MULTILINGUAL_LANGUAGE_OPTIONS",
"DEEPGRAM_LANGUAGES",
"DEEPGRAM_STT_MODELS",
"GLADIA_STT_LANGUAGES",
@ -71,5 +85,8 @@ __all__ = [
"SARVAM_TTS_MODELS",
"SARVAM_V2_VOICES",
"SARVAM_V3_VOICES",
"SMALLEST_TTS_LANGUAGES",
"SMALLEST_TTS_MODELS",
"SMALLEST_TTS_VOICES",
"SPEECHMATICS_STT_LANGUAGES",
]

View file

@ -1,4 +1,21 @@
DEEPGRAM_STT_MODELS = ("nova-3-general", "flux-general-en", "flux-general-multi")
DEEPGRAM_FLUX_MODELS = ("flux-general-en", "flux-general-multi")
DEEPGRAM_FLUX_MULTILINGUAL_LANGUAGES = (
"de",
"en",
"es",
"fr",
"hi",
"it",
"ja",
"nl",
"pt",
"ru",
)
DEEPGRAM_FLUX_MULTILINGUAL_LANGUAGE_OPTIONS = (
"multi",
*DEEPGRAM_FLUX_MULTILINGUAL_LANGUAGES,
)
DEEPGRAM_STT_MODELS = ("nova-3-general", *DEEPGRAM_FLUX_MODELS)
DEEPGRAM_LANGUAGES = (
"multi",
"ar",

View file

@ -0,0 +1,36 @@
SMALLEST_TTS_MODELS = ("lightning_v3.1", "lightning_v3.1_pro")
SMALLEST_TTS_VOICES = (
"sophia",
"avery",
"liam",
"lucas",
"olivia",
"ryan",
"freya",
"william",
"devansh",
"arjun",
"niharika",
"maya",
"dhruv",
"mia",
"maithili",
)
SMALLEST_TTS_LANGUAGES = (
"en",
"hi",
"fr",
"de",
"es",
"it",
"nl",
"pl",
"ru",
"ar",
"bn",
"gu",
"he",
"kn",
"mr",
"ta",
)

View file

@ -14,6 +14,7 @@ from api.services.configuration.options import (
AZURE_SPEECH_STT_LANGUAGES,
AZURE_SPEECH_TTS_LANGUAGES,
AZURE_SPEECH_TTS_VOICES,
DEEPGRAM_FLUX_MULTILINGUAL_LANGUAGE_OPTIONS,
DEEPGRAM_LANGUAGES,
DEEPGRAM_STT_MODELS,
GLADIA_STT_LANGUAGES,
@ -38,6 +39,9 @@ from api.services.configuration.options import (
SARVAM_TTS_MODELS,
SARVAM_V2_VOICES,
SARVAM_V3_VOICES,
SMALLEST_TTS_LANGUAGES,
SMALLEST_TTS_MODELS,
SMALLEST_TTS_VOICES,
SPEECHMATICS_STT_LANGUAGES,
)
from api.services.configuration.options.google import GOOGLE_VERTEX_MODELS
@ -987,9 +991,10 @@ class SarvamTTSConfiguration(BaseTTSConfiguration):
)
voice: str = Field(
default="anushka",
description="Sarvam voice name; must match the selected model's voice list.",
description="Sarvam voice name or custom voice ID.",
json_schema_extra={
"examples": SARVAM_V2_VOICES,
"allow_custom_input": True,
"model_options": {
"bulbul:v2": SARVAM_V2_VOICES,
"bulbul:v3": SARVAM_V3_VOICES,
@ -1172,43 +1177,6 @@ SMALLEST_PROVIDER_MODEL_CONFIG = provider_model_config(
provider_docs_url="https://smallest.ai/docs",
)
SMALLEST_TTS_MODELS = ["lightning_v3.1", "lightning_v3.1_pro"]
SMALLEST_TTS_VOICES = [
"sophia",
"avery",
"liam",
"lucas",
"olivia",
"ryan",
"freya",
"william",
"devansh",
"arjun",
"niharika",
"maya",
"dhruv",
"mia",
"maithili",
]
SMALLEST_TTS_LANGUAGES = [
"en",
"hi",
"fr",
"de",
"es",
"it",
"nl",
"pl",
"ru",
"ar",
"bn",
"gu",
"he",
"kn",
"mr",
"ta",
]
@register_tts
class SmallestAITTSConfiguration(BaseTTSConfiguration):
@ -1273,12 +1241,16 @@ class DeepgramSTTConfiguration(BaseSTTConfiguration):
)
language: str = Field(
default="multi",
description="Language code; 'multi' enables auto-detect (Nova-3 only).",
description=(
"Language code. 'multi' enables Nova-3 auto-detect and omits "
"language hints for Flux multilingual auto-detect."
),
json_schema_extra={
"examples": DEEPGRAM_LANGUAGES,
"model_options": {
"nova-3-general": DEEPGRAM_LANGUAGES,
"flux-general-en": ("en",),
"flux-general-multi": DEEPGRAM_FLUX_MULTILINGUAL_LANGUAGE_OPTIONS,
},
},
)