dograh/api/services/configuration/options/sarvam.py
Abhay Babbar 98d2b24cba
Add Sarvam LLM, update Sarvam STT models, expose usage_info on run detail (#351)
* Add Sarvam LLM provider, update Sarvam STT models, expose usage_info on run detail.
Depends on pipecat PR dograh-hq/pipecat#43 for STT string language support.
Submodule bump will follow after that merges.

* test: cover Sarvam STT language mapping; link Sarvam docs

---------

Co-authored-by: Sabiha Khan <sabihak89@gmail.com>
2026-06-01 10:29:31 +05:30

100 lines
1.6 KiB
Python

SARVAM_TTS_MODELS = ("bulbul:v2", "bulbul:v3")
SARVAM_V2_VOICES = (
"anushka",
"manisha",
"vidya",
"arya",
"abhilash",
"karun",
"hitesh",
)
SARVAM_V3_VOICES = (
"shubh",
"aditya",
"ritu",
"priya",
"neha",
"rahul",
"pooja",
"rohan",
"simran",
"kavya",
"amit",
"dev",
"ishita",
"shreya",
"ratan",
"varun",
"manan",
"sumit",
"roopa",
"kabir",
"aayan",
"ashutosh",
"advait",
"amelia",
"sophia",
"anand",
"tanya",
"tarun",
"sunny",
"mani",
"gokul",
"vijay",
"shruti",
"suhani",
"mohit",
"kavitha",
"rehan",
"soham",
"rupali",
)
SARVAM_LANGUAGES = (
"bn-IN",
"en-IN",
"gu-IN",
"hi-IN",
"kn-IN",
"ml-IN",
"mr-IN",
"od-IN",
"pa-IN",
"ta-IN",
"te-IN",
"as-IN",
)
SARVAM_STT_MODELS = ("saarika:v2.5", "saaras:v3")
# saarika:v2.5 language codes (unknown = auto-detect)
SARVAM_STT_LANGUAGES_V25 = (
"unknown",
"hi-IN",
"bn-IN",
"gu-IN",
"kn-IN",
"ml-IN",
"mr-IN",
"od-IN",
"pa-IN",
"ta-IN",
"te-IN",
"en-IN",
)
# saaras:v3 adds these regional languages on top of the v2.5 set. Full list: https://docs.sarvam.ai/api-reference-docs/speech-to-text/transcribe
SARVAM_STT_LANGUAGES_V3 = SARVAM_STT_LANGUAGES_V25 + (
"as-IN",
"ur-IN",
"ne-IN",
"kok-IN",
"ks-IN",
"sd-IN",
"sa-IN",
"sat-IN",
"mni-IN",
"brx-IN",
"mai-IN",
"doi-IN",
)
SARVAM_LLM_MODELS = (
"sarvam-30b",
"sarvam-105b",
)