From a75bc72cb59537098b3b605a012a2ef8a3f0fe6a Mon Sep 17 00:00:00 2001 From: Abhishek Kumar Date: Fri, 13 Feb 2026 10:11:48 +0530 Subject: [PATCH] feat: add sarvam v3 voices --- api/services/configuration/registry.py | 54 +++++++++++++++++++++- ui/src/components/ServiceConfiguration.tsx | 28 ++++++++++- 2 files changed, 78 insertions(+), 4 deletions(-) diff --git a/api/services/configuration/registry.py b/api/services/configuration/registry.py index 4443b5a..6b70c1a 100644 --- a/api/services/configuration/registry.py +++ b/api/services/configuration/registry.py @@ -278,7 +278,48 @@ class DograhTTSService(BaseTTSConfiguration): SARVAM_TTS_MODELS = ["bulbul:v2", "bulbul:v3"] -SARVAM_VOICES = ["anushka", "manisha", "vidya", "arya", "abhilash", "karun", "hitesh"] +SARVAM_V2_VOICES = ["anushka", "manisha", "vidya", "arya", "abhilash", "karun", "hitesh"] +SARVAM_V3_VOICES = [ + "shubh", + "aditya", + "ritu", + "priya", + "neha", + "rahul", + "pooja", + "rohan", + "simran", + "kavya", + "amit", + "dev", + "ishita", + "shreya", + "ratan", + "varun", + "manan", + "sumit", + "roopa", + "kabir", + "aayan", + "ashutosh", + "advait", + "amelia", + "sophia", + "anand", + "tanya", + "tarun", + "sunny", + "mani", + "gokul", + "vijay", + "shruti", + "suhani", + "mohit", + "kavitha", + "rehan", + "soham", + "rupali", +] SARVAM_LANGUAGES = [ "bn-IN", "en-IN", @@ -301,7 +342,16 @@ class SarvamTTSConfiguration(BaseTTSConfiguration): model: str = Field( default="bulbul:v2", json_schema_extra={"examples": SARVAM_TTS_MODELS} ) - voice: str = Field(default="anushka", json_schema_extra={"examples": SARVAM_VOICES}) + voice: str = Field( + default="anushka", + json_schema_extra={ + "examples": SARVAM_V2_VOICES, + "model_options": { + "bulbul:v2": SARVAM_V2_VOICES, + "bulbul:v3": SARVAM_V3_VOICES, + }, + }, + ) language: str = Field( default="hi-IN", json_schema_extra={"examples": SARVAM_LANGUAGES} ) diff --git a/ui/src/components/ServiceConfiguration.tsx b/ui/src/components/ServiceConfiguration.tsx index 637917c..bda9e21 100644 --- a/ui/src/components/ServiceConfiguration.tsx +++ b/ui/src/components/ServiceConfiguration.tsx @@ -21,6 +21,7 @@ interface SchemaProperty { default?: string | number | boolean; enum?: string[]; examples?: string[]; + model_options?: Record; $ref?: string; description?: string; format?: string; @@ -212,6 +213,20 @@ export default function ServiceConfiguration() { } }, [schemas, serviceProviders.llm, userConfig?.llm?.model, hasCheckedManualMode]); + // Reset voice when TTS model changes if the provider has model-dependent voice options + const ttsModel = watch("tts_model"); + useEffect(() => { + const voiceSchema = schemas?.tts?.[serviceProviders.tts]?.properties?.voice; + const modelOptions = voiceSchema?.model_options; + if (!modelOptions || !ttsModel) return; + + const validVoices = modelOptions[ttsModel as string]; + const currentVoice = getValues("tts_voice") as string; + if (validVoices && currentVoice && !validVoices.includes(currentVoice)) { + setValue("tts_voice", validVoices[0], { shouldDirty: true }); + } + }, [ttsModel, serviceProviders.tts, setValue, getValues, schemas]); + const handleProviderChange = (service: ServiceSegment, providerName: string) => { if (!providerName) { return; @@ -516,7 +531,16 @@ export default function ServiceConfiguration() { } // Handle fields with enum or examples (dropdown options) - const dropdownOptions = actualSchema?.enum || actualSchema?.examples; + let dropdownOptions = actualSchema?.enum || actualSchema?.examples; + + // Use model-dependent options when available (e.g., Sarvam voices per model) + if (actualSchema?.model_options) { + const modelValue = watch(`${service}_model`) as string; + if (modelValue && actualSchema.model_options[modelValue]) { + dropdownOptions = actualSchema.model_options[modelValue]; + } + } + if (dropdownOptions && dropdownOptions.length > 0) { // Use friendly display names for language and voice fields const getDisplayName = (value: string) => { @@ -524,7 +548,7 @@ export default function ServiceConfiguration() { return LANGUAGE_DISPLAY_NAMES[value] || value; } if (field === "voice") { - return VOICE_DISPLAY_NAMES[value] || value; + return VOICE_DISPLAY_NAMES[value] || value.charAt(0).toUpperCase() + value.slice(1); } return value; };