Merge remote-tracking branch 'origin/main' into pr-381

This commit is contained in:
Abhishek Kumar 2026-06-02 12:11:57 +05:30
commit 858c474139
119 changed files with 5057 additions and 1018 deletions

View file

@ -16,6 +16,9 @@ from .google import (
)
from .sarvam import (
SARVAM_LANGUAGES,
SARVAM_LLM_MODELS,
SARVAM_STT_LANGUAGES_V3,
SARVAM_STT_LANGUAGES_V25,
SARVAM_STT_MODELS,
SARVAM_TTS_MODELS,
SARVAM_V2_VOICES,
@ -41,6 +44,9 @@ __all__ = [
"GOOGLE_VERTEX_REALTIME_MODELS",
"GOOGLE_VERTEX_REALTIME_VOICES",
"SARVAM_LANGUAGES",
"SARVAM_LLM_MODELS",
"SARVAM_STT_LANGUAGES_V25",
"SARVAM_STT_LANGUAGES_V3",
"SARVAM_STT_MODELS",
"SARVAM_TTS_MODELS",
"SARVAM_V2_VOICES",

View file

@ -63,4 +63,38 @@ SARVAM_LANGUAGES = (
"te-IN",
"as-IN",
)
SARVAM_STT_MODELS = ("saarika:v2.5", "saaras:v2")
SARVAM_STT_MODELS = ("saarika:v2.5", "saaras:v3")
# saarika:v2.5 language codes (unknown = auto-detect)
SARVAM_STT_LANGUAGES_V25 = (
"unknown",
"hi-IN",
"bn-IN",
"gu-IN",
"kn-IN",
"ml-IN",
"mr-IN",
"od-IN",
"pa-IN",
"ta-IN",
"te-IN",
"en-IN",
)
# saaras:v3 adds these regional languages on top of the v2.5 set. Full list: https://docs.sarvam.ai/api-reference-docs/speech-to-text/transcribe
SARVAM_STT_LANGUAGES_V3 = SARVAM_STT_LANGUAGES_V25 + (
"as-IN",
"ur-IN",
"ne-IN",
"kok-IN",
"ks-IN",
"sd-IN",
"sa-IN",
"sat-IN",
"mni-IN",
"brx-IN",
"mai-IN",
"doi-IN",
)
SARVAM_LLM_MODELS = (
"sarvam-30b",
"sarvam-105b",
)

View file

@ -22,6 +22,9 @@ from api.services.configuration.options import (
GOOGLE_VERTEX_REALTIME_MODELS,
GOOGLE_VERTEX_REALTIME_VOICES,
SARVAM_LANGUAGES,
SARVAM_LLM_MODELS,
SARVAM_STT_LANGUAGES_V3,
SARVAM_STT_LANGUAGES_V25,
SARVAM_STT_MODELS,
SARVAM_TTS_MODELS,
SARVAM_V2_VOICES,
@ -93,7 +96,7 @@ class BaseServiceConfiguration(BaseModel):
ServiceProviders.GOOGLE_REALTIME,
ServiceProviders.GOOGLE_VERTEX_REALTIME,
ServiceProviders.AZURE_REALTIME,
# ServiceProviders.SARVAM,
ServiceProviders.SARVAM,
]
api_key: str | list[str]
@ -486,6 +489,29 @@ class MiniMaxLLMConfiguration(BaseLLMConfiguration):
)
@register_llm
class SarvamLLMConfiguration(BaseLLMConfiguration):
model_config = SARVAM_PROVIDER_MODEL_CONFIG
provider: Literal[ServiceProviders.SARVAM] = ServiceProviders.SARVAM
model: str = Field(
default="sarvam-30b",
description=(
"Sarvam chat model. Use sarvam-30b for low-latency voice agents; "
"sarvam-105b for complex multi-step reasoning."
),
json_schema_extra={"examples": SARVAM_LLM_MODELS, "allow_custom_input": True},
)
temperature: float = Field(
default=0.5,
ge=0.0,
le=2.0,
description=(
"Sampling temperature. Sarvam recommends 0.5 for balanced "
"conversational responses."
),
)
OPENAI_REALTIME_MODELS = ["gpt-realtime-2"]
OPENAI_REALTIME_VOICES = [
"alloy",
@ -726,6 +752,7 @@ LLMConfig = Annotated[
AWSBedrockLLMConfiguration,
SpeachesLLMConfiguration,
MiniMaxLLMConfiguration,
SarvamLLMConfiguration,
],
Field(discriminator="provider"),
]
@ -869,6 +896,10 @@ class OpenAITTSService(BaseTTSConfiguration):
default="alloy",
description="OpenAI TTS voice name.",
)
base_url: str = Field(
default="https://api.openai.com/v1",
description="Override only if using an OpenAI-compatible API (e.g. local TTS, proxy).",
)
DOGRAH_TTS_MODELS = ["default"]
@ -1238,6 +1269,10 @@ class OpenAISTTConfiguration(BaseSTTConfiguration):
description="OpenAI transcription model.",
json_schema_extra={"examples": OPENAI_STT_MODELS},
)
base_url: str = Field(
default="https://api.openai.com/v1",
description="Override only if using an OpenAI-compatible API (e.g. local STT, proxy).",
)
@register_stt
@ -1306,13 +1341,24 @@ class SarvamSTTConfiguration(BaseSTTConfiguration):
provider: Literal[ServiceProviders.SARVAM] = ServiceProviders.SARVAM
model: str = Field(
default="saarika:v2.5",
description="Sarvam STT model.",
description=(
"Sarvam STT model. saarika:v2.5 transcribes in the spoken language; "
"saaras:v3 is the recommended model with flexible output modes."
),
json_schema_extra={"examples": SARVAM_STT_MODELS},
)
language: str = Field(
default="hi-IN",
description="BCP-47 Indian-language code.",
json_schema_extra={"examples": SARVAM_LANGUAGES},
default="unknown",
description=(
"BCP-47 language code. Use unknown for automatic language detection."
),
json_schema_extra={
"examples": SARVAM_STT_LANGUAGES_V25,
"model_options": {
"saarika:v2.5": SARVAM_STT_LANGUAGES_V25,
"saaras:v3": SARVAM_STT_LANGUAGES_V3,
},
},
)