Add Sarvam LLM, update Sarvam STT models, expose usage_info on run detail (#351)

* Add Sarvam LLM provider, update Sarvam STT models, expose usage_info on run detail.
Depends on pipecat PR dograh-hq/pipecat#43 for STT string language support.
Submodule bump will follow after that merges.

* test: cover Sarvam STT language mapping; link Sarvam docs

---------

Co-authored-by: Sabiha Khan <sabihak89@gmail.com>
This commit is contained in:
Abhay Babbar 2026-06-01 10:29:31 +05:30 committed by GitHub
parent 13b30dee9e
commit 98d2b24cba
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
10 changed files with 272 additions and 9 deletions

View file

@ -16,6 +16,9 @@ from .google import (
)
from .sarvam import (
SARVAM_LANGUAGES,
SARVAM_LLM_MODELS,
SARVAM_STT_LANGUAGES_V25,
SARVAM_STT_LANGUAGES_V3,
SARVAM_STT_MODELS,
SARVAM_TTS_MODELS,
SARVAM_V2_VOICES,
@ -41,6 +44,9 @@ __all__ = [
"GOOGLE_VERTEX_REALTIME_MODELS",
"GOOGLE_VERTEX_REALTIME_VOICES",
"SARVAM_LANGUAGES",
"SARVAM_LLM_MODELS",
"SARVAM_STT_LANGUAGES_V25",
"SARVAM_STT_LANGUAGES_V3",
"SARVAM_STT_MODELS",
"SARVAM_TTS_MODELS",
"SARVAM_V2_VOICES",

View file

@ -63,4 +63,38 @@ SARVAM_LANGUAGES = (
"te-IN",
"as-IN",
)
SARVAM_STT_MODELS = ("saarika:v2.5", "saaras:v2")
SARVAM_STT_MODELS = ("saarika:v2.5", "saaras:v3")
# saarika:v2.5 language codes (unknown = auto-detect)
SARVAM_STT_LANGUAGES_V25 = (
"unknown",
"hi-IN",
"bn-IN",
"gu-IN",
"kn-IN",
"ml-IN",
"mr-IN",
"od-IN",
"pa-IN",
"ta-IN",
"te-IN",
"en-IN",
)
# saaras:v3 adds these regional languages on top of the v2.5 set. Full list: https://docs.sarvam.ai/api-reference-docs/speech-to-text/transcribe
SARVAM_STT_LANGUAGES_V3 = SARVAM_STT_LANGUAGES_V25 + (
"as-IN",
"ur-IN",
"ne-IN",
"kok-IN",
"ks-IN",
"sd-IN",
"sa-IN",
"sat-IN",
"mni-IN",
"brx-IN",
"mai-IN",
"doi-IN",
)
SARVAM_LLM_MODELS = (
"sarvam-30b",
"sarvam-105b",
)

View file

@ -22,6 +22,9 @@ from api.services.configuration.options import (
GOOGLE_VERTEX_REALTIME_MODELS,
GOOGLE_VERTEX_REALTIME_VOICES,
SARVAM_LANGUAGES,
SARVAM_LLM_MODELS,
SARVAM_STT_LANGUAGES_V25,
SARVAM_STT_LANGUAGES_V3,
SARVAM_STT_MODELS,
SARVAM_TTS_MODELS,
SARVAM_V2_VOICES,
@ -89,7 +92,7 @@ class BaseServiceConfiguration(BaseModel):
ServiceProviders.ULTRAVOX_REALTIME,
ServiceProviders.GOOGLE_REALTIME,
ServiceProviders.GOOGLE_VERTEX_REALTIME,
# ServiceProviders.SARVAM,
ServiceProviders.SARVAM,
]
api_key: str | list[str]
@ -472,6 +475,29 @@ class MiniMaxLLMConfiguration(BaseLLMConfiguration):
)
@register_llm
class SarvamLLMConfiguration(BaseLLMConfiguration):
model_config = SARVAM_PROVIDER_MODEL_CONFIG
provider: Literal[ServiceProviders.SARVAM] = ServiceProviders.SARVAM
model: str = Field(
default="sarvam-30b",
description=(
"Sarvam chat model. Use sarvam-30b for low-latency voice agents; "
"sarvam-105b for complex multi-step reasoning."
),
json_schema_extra={"examples": SARVAM_LLM_MODELS, "allow_custom_input": True},
)
temperature: float = Field(
default=0.5,
ge=0.0,
le=2.0,
description=(
"Sampling temperature. Sarvam recommends 0.5 for balanced "
"conversational responses."
),
)
OPENAI_REALTIME_MODELS = ["gpt-realtime-2"]
OPENAI_REALTIME_VOICES = [
"alloy",
@ -661,6 +687,7 @@ LLMConfig = Annotated[
AWSBedrockLLMConfiguration,
SpeachesLLMConfiguration,
MiniMaxLLMConfiguration,
SarvamLLMConfiguration,
],
Field(discriminator="provider"),
]
@ -1129,13 +1156,24 @@ class SarvamSTTConfiguration(BaseSTTConfiguration):
provider: Literal[ServiceProviders.SARVAM] = ServiceProviders.SARVAM
model: str = Field(
default="saarika:v2.5",
description="Sarvam STT model.",
description=(
"Sarvam STT model. saarika:v2.5 transcribes in the spoken language; "
"saaras:v3 is the recommended model with flexible output modes."
),
json_schema_extra={"examples": SARVAM_STT_MODELS},
)
language: str = Field(
default="hi-IN",
description="BCP-47 Indian-language code.",
json_schema_extra={"examples": SARVAM_LANGUAGES},
default="unknown",
description=(
"BCP-47 language code. Use unknown for automatic language detection."
),
json_schema_extra={
"examples": SARVAM_STT_LANGUAGES_V25,
"model_options": {
"saarika:v2.5": SARVAM_STT_LANGUAGES_V25,
"saaras:v3": SARVAM_STT_LANGUAGES_V3,
},
},
)

View file

@ -47,6 +47,7 @@ from pipecat.services.openai.stt import (
from pipecat.services.openai.tts import OpenAITTSService, OpenAITTSSettings
from pipecat.services.openrouter.llm import OpenRouterLLMService, OpenRouterLLMSettings
from pipecat.services.rime.tts import RimeTTSService, RimeTTSSettings
from pipecat.services.sarvam.llm import SarvamLLMService, SarvamLLMSettings
from pipecat.services.sarvam.stt import SarvamSTTService, SarvamSTTSettings
from pipecat.services.sarvam.tts import SarvamTTSService, SarvamTTSSettings
from pipecat.services.speaches.llm import SpeachesLLMService, SpeachesLLMSettings
@ -158,7 +159,7 @@ def create_stt_service(
sample_rate=audio_config.transport_in_sample_rate,
)
elif user_config.stt.provider == ServiceProviders.SARVAM.value:
# Map Sarvam language code to pipecat Language enum
language = getattr(user_config.stt, "language", None)
language_mapping = {
"bn-IN": Language.BN_IN,
"gu-IN": Language.GU_IN,
@ -172,9 +173,18 @@ def create_stt_service(
"od-IN": Language.OR_IN,
"en-IN": Language.EN_IN,
"as-IN": Language.AS_IN,
"ur-IN": Language.UR_IN,
"kok-IN": Language.KOK_IN,
"mai-IN": Language.MAI_IN,
"sd-IN": Language.SD_IN,
}
language = getattr(user_config.stt, "language", None)
pipecat_language = language_mapping.get(language, Language.HI_IN)
if not language or language == "unknown":
pipecat_language = None
elif language in language_mapping:
pipecat_language = language_mapping[language]
else:
# Unmapped BCP-47 codes pass through; Sarvam accepts them per https://docs.sarvam.ai/api-reference-docs/speech-to-text/transcribe
pipecat_language = language
return SarvamSTTService(
api_key=user_config.stt.api_key,
settings=SarvamSTTSettings(
@ -604,6 +614,14 @@ def create_llm_service_from_provider(
temperature=temperature if temperature is not None else 1.0,
),
)
elif provider == ServiceProviders.SARVAM.value:
return SarvamLLMService(
api_key=api_key,
settings=SarvamLLMSettings(
model=model,
temperature=temperature if temperature is not None else 0.5,
),
)
else:
raise HTTPException(status_code=400, detail=f"Invalid LLM provider {provider}")
@ -756,5 +774,7 @@ def create_llm_service(user_config):
elif provider == ServiceProviders.MINIMAX.value:
kwargs["base_url"] = user_config.llm.base_url
kwargs["temperature"] = user_config.llm.temperature
elif provider == ServiceProviders.SARVAM.value:
kwargs["temperature"] = user_config.llm.temperature
return create_llm_service_from_provider(provider, model, api_key, **kwargs)

View file

@ -0,0 +1,13 @@
"""Format workflow run usage for public API responses."""
def format_public_usage_info(usage_info: dict | None) -> dict | None:
if not usage_info:
return None
return {
"llm": usage_info.get("llm") or {},
"tts": usage_info.get("tts") or {},
"stt": usage_info.get("stt") or {},
"call_duration_seconds": usage_info.get("call_duration_seconds"),
}