feat: add sarvam v3 voices

This commit is contained in:
Abhishek Kumar 2026-02-13 10:11:48 +05:30
parent e1565246fa
commit a75bc72cb5
2 changed files with 78 additions and 4 deletions

View file

@ -278,7 +278,48 @@ class DograhTTSService(BaseTTSConfiguration):
SARVAM_TTS_MODELS = ["bulbul:v2", "bulbul:v3"]
SARVAM_VOICES = ["anushka", "manisha", "vidya", "arya", "abhilash", "karun", "hitesh"]
SARVAM_V2_VOICES = ["anushka", "manisha", "vidya", "arya", "abhilash", "karun", "hitesh"]
SARVAM_V3_VOICES = [
"shubh",
"aditya",
"ritu",
"priya",
"neha",
"rahul",
"pooja",
"rohan",
"simran",
"kavya",
"amit",
"dev",
"ishita",
"shreya",
"ratan",
"varun",
"manan",
"sumit",
"roopa",
"kabir",
"aayan",
"ashutosh",
"advait",
"amelia",
"sophia",
"anand",
"tanya",
"tarun",
"sunny",
"mani",
"gokul",
"vijay",
"shruti",
"suhani",
"mohit",
"kavitha",
"rehan",
"soham",
"rupali",
]
SARVAM_LANGUAGES = [
"bn-IN",
"en-IN",
@ -301,7 +342,16 @@ class SarvamTTSConfiguration(BaseTTSConfiguration):
model: str = Field(
default="bulbul:v2", json_schema_extra={"examples": SARVAM_TTS_MODELS}
)
voice: str = Field(default="anushka", json_schema_extra={"examples": SARVAM_VOICES})
voice: str = Field(
default="anushka",
json_schema_extra={
"examples": SARVAM_V2_VOICES,
"model_options": {
"bulbul:v2": SARVAM_V2_VOICES,
"bulbul:v3": SARVAM_V3_VOICES,
},
},
)
language: str = Field(
default="hi-IN", json_schema_extra={"examples": SARVAM_LANGUAGES}
)

View file

@ -21,6 +21,7 @@ interface SchemaProperty {
default?: string | number | boolean;
enum?: string[];
examples?: string[];
model_options?: Record<string, string[]>;
$ref?: string;
description?: string;
format?: string;
@ -212,6 +213,20 @@ export default function ServiceConfiguration() {
}
}, [schemas, serviceProviders.llm, userConfig?.llm?.model, hasCheckedManualMode]);
// Reset voice when TTS model changes if the provider has model-dependent voice options
const ttsModel = watch("tts_model");
useEffect(() => {
const voiceSchema = schemas?.tts?.[serviceProviders.tts]?.properties?.voice;
const modelOptions = voiceSchema?.model_options;
if (!modelOptions || !ttsModel) return;
const validVoices = modelOptions[ttsModel as string];
const currentVoice = getValues("tts_voice") as string;
if (validVoices && currentVoice && !validVoices.includes(currentVoice)) {
setValue("tts_voice", validVoices[0], { shouldDirty: true });
}
}, [ttsModel, serviceProviders.tts, setValue, getValues, schemas]);
const handleProviderChange = (service: ServiceSegment, providerName: string) => {
if (!providerName) {
return;
@ -516,7 +531,16 @@ export default function ServiceConfiguration() {
}
// Handle fields with enum or examples (dropdown options)
const dropdownOptions = actualSchema?.enum || actualSchema?.examples;
let dropdownOptions = actualSchema?.enum || actualSchema?.examples;
// Use model-dependent options when available (e.g., Sarvam voices per model)
if (actualSchema?.model_options) {
const modelValue = watch(`${service}_model`) as string;
if (modelValue && actualSchema.model_options[modelValue]) {
dropdownOptions = actualSchema.model_options[modelValue];
}
}
if (dropdownOptions && dropdownOptions.length > 0) {
// Use friendly display names for language and voice fields
const getDisplayName = (value: string) => {
@ -524,7 +548,7 @@ export default function ServiceConfiguration() {
return LANGUAGE_DISPLAY_NAMES[value] || value;
}
if (field === "voice") {
return VOICE_DISPLAY_NAMES[value] || value;
return VOICE_DISPLAY_NAMES[value] || value.charAt(0).toUpperCase() + value.slice(1);
}
return value;
};