feat: add sarvam v3 voices

2026-07-22 11:51:04 +02:00 · 2026-02-13 10:11:48 +05:30 · 2026-02-13 10:11:48 +05:30 · a75bc72cb5
commit a75bc72cb5
parent e1565246fa
2 changed files with 78 additions and 4 deletions
--- a/api/services/configuration/registry.py
+++ b/api/services/configuration/registry.py
@ -278,7 +278,48 @@ class DograhTTSService(BaseTTSConfiguration):


 SARVAM_TTS_MODELS = ["bulbul:v2", "bulbul:v3"]
-SARVAM_VOICES = ["anushka", "manisha", "vidya", "arya", "abhilash", "karun", "hitesh"]
+SARVAM_V2_VOICES = ["anushka", "manisha", "vidya", "arya", "abhilash", "karun", "hitesh"]
+SARVAM_V3_VOICES = [
+    "shubh",
+    "aditya",
+    "ritu",
+    "priya",
+    "neha",
+    "rahul",
+    "pooja",
+    "rohan",
+    "simran",
+    "kavya",
+    "amit",
+    "dev",
+    "ishita",
+    "shreya",
+    "ratan",
+    "varun",
+    "manan",
+    "sumit",
+    "roopa",
+    "kabir",
+    "aayan",
+    "ashutosh",
+    "advait",
+    "amelia",
+    "sophia",
+    "anand",
+    "tanya",
+    "tarun",
+    "sunny",
+    "mani",
+    "gokul",
+    "vijay",
+    "shruti",
+    "suhani",
+    "mohit",
+    "kavitha",
+    "rehan",
+    "soham",
+    "rupali",
+]
 SARVAM_LANGUAGES = [
    "bn-IN",
    "en-IN",
@ -301,7 +342,16 @@ class SarvamTTSConfiguration(BaseTTSConfiguration):
    model: str = Field(
        default="bulbul:v2", json_schema_extra={"examples": SARVAM_TTS_MODELS}
    )
-    voice: str = Field(default="anushka", json_schema_extra={"examples": SARVAM_VOICES})
+    voice: str = Field(
+        default="anushka",
+        json_schema_extra={
+            "examples": SARVAM_V2_VOICES,
+            "model_options": {
+                "bulbul:v2": SARVAM_V2_VOICES,
+                "bulbul:v3": SARVAM_V3_VOICES,
+            },
+        },
+    )
    language: str = Field(
        default="hi-IN", json_schema_extra={"examples": SARVAM_LANGUAGES}
    )
--- a/ui/src/components/ServiceConfiguration.tsx
+++ b/ui/src/components/ServiceConfiguration.tsx
@ -21,6 +21,7 @@ interface SchemaProperty {
    default?: string | number | boolean;
    enum?: string[];
    examples?: string[];
+    model_options?: Record<string, string[]>;
    $ref?: string;
    description?: string;
    format?: string;
@ -212,6 +213,20 @@ export default function ServiceConfiguration() {
        }
    }, [schemas, serviceProviders.llm, userConfig?.llm?.model, hasCheckedManualMode]);

+    // Reset voice when TTS model changes if the provider has model-dependent voice options
+    const ttsModel = watch("tts_model");
+    useEffect(() => {
+        const voiceSchema = schemas?.tts?.[serviceProviders.tts]?.properties?.voice;
+        const modelOptions = voiceSchema?.model_options;
+        if (!modelOptions || !ttsModel) return;
+
+        const validVoices = modelOptions[ttsModel as string];
+        const currentVoice = getValues("tts_voice") as string;
+        if (validVoices && currentVoice && !validVoices.includes(currentVoice)) {
+            setValue("tts_voice", validVoices[0], { shouldDirty: true });
+        }
+    }, [ttsModel, serviceProviders.tts, setValue, getValues, schemas]);
+
    const handleProviderChange = (service: ServiceSegment, providerName: string) => {
        if (!providerName) {
            return;
@ -516,7 +531,16 @@ export default function ServiceConfiguration() {
        }

        // Handle fields with enum or examples (dropdown options)
-        const dropdownOptions = actualSchema?.enum || actualSchema?.examples;
+        let dropdownOptions = actualSchema?.enum || actualSchema?.examples;
+
+        // Use model-dependent options when available (e.g., Sarvam voices per model)
+        if (actualSchema?.model_options) {
+            const modelValue = watch(`${service}_model`) as string;
+            if (modelValue && actualSchema.model_options[modelValue]) {
+                dropdownOptions = actualSchema.model_options[modelValue];
+            }
+        }
+
        if (dropdownOptions && dropdownOptions.length > 0) {
            // Use friendly display names for language and voice fields
            const getDisplayName = (value: string) => {
@ -524,7 +548,7 @@ export default function ServiceConfiguration() {
                    return LANGUAGE_DISPLAY_NAMES[value] || value;
                }
                if (field === "voice") {
-                    return VOICE_DISPLAY_NAMES[value] || value;
+                    return VOICE_DISPLAY_NAMES[value] || value.charAt(0).toUpperCase() + value.slice(1);
                }
                return value;
            };