feat: add voice selectors in elevenlabs (#88)

This commit is contained in:
Abhishek 2025-12-25 15:05:53 +05:30 committed by GitHub
parent 480e8a5f60
commit 45c5b7c304
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
22 changed files with 978 additions and 166 deletions

View file

@ -20,7 +20,7 @@ RUN pip install --user --no-cache-dir -r requirements.txt && \
# Copy and install pipecat from local submodule
COPY pipecat /tmp/pipecat
RUN pip install --user --no-cache-dir '/tmp/pipecat[cartesia,deepgram,openai,elevenlabs,groq,google,azure,soundfile,silero,webrtc,local-smart-turn-v3]' && \
RUN pip install --user --no-cache-dir '/tmp/pipecat[cartesia,deepgram,openai,elevenlabs,groq,google,azure,sarvam,soundfile,silero,webrtc,local-smart-turn-v3]' && \
# Clean up pip cache and temporary pipecat directory
rm -rf /root/.cache/pip /tmp/pipecat

View file

@ -111,9 +111,7 @@ def apply_workflow_run_filters(
# (subscript [] only works in PostgreSQL 14+)
filter_conditions.append(
cast(WorkflowRunModel.gathered_context, JSONB)
.op("->>")(
"mapped_call_disposition"
)
.op("->>")("mapped_call_disposition")
.in_(codes)
)
@ -147,9 +145,7 @@ def apply_workflow_run_filters(
# Use ->> operator for compatibility with all PostgreSQL versions
filter_conditions.append(
cast(WorkflowRunModel.initial_context, JSONB)
.op("->>")(
"phone"
)
.op("->>")("phone")
.contains(phone)
)
@ -178,13 +174,9 @@ def apply_workflow_run_filters(
"total_cost_usd"
)
if min_val is not None:
filter_conditions.append(
cast(cost_text, Integer) >= min_val
)
filter_conditions.append(cast(cost_text, Integer) >= min_val)
if max_val is not None:
filter_conditions.append(
cast(cost_text, Integer) <= max_val
)
filter_conditions.append(cast(cost_text, Integer) <= max_val)
if filter_conditions:
base_query = base_query.where(and_(*filter_conditions))

View file

@ -1,7 +1,8 @@
from datetime import datetime, timedelta
from typing import List, Optional, TypedDict, Union
from typing import List, Literal, Optional, TypedDict, Union
from fastapi import APIRouter, Depends, HTTPException, Query
from loguru import logger
from pydantic import BaseModel
from api.db import db_client
@ -17,6 +18,7 @@ from api.services.configuration.defaults import DEFAULT_SERVICE_PROVIDERS
from api.services.configuration.masking import mask_user_config
from api.services.configuration.merge import merge_user_configurations
from api.services.configuration.registry import REGISTRY, ServiceType
from api.services.mps_service_key_client import mps_service_key_client
router = APIRouter(prefix="/user")
@ -274,3 +276,46 @@ async def reactivate_api_key(
raise HTTPException(status_code=500, detail="Failed to reactivate API key")
return {"success": True, "message": "API key reactivated successfully"}
# Voice Configuration Endpoints
TTSProvider = Literal["elevenlabs", "deepgram", "sarvam", "cartesia", "dograh"]
class VoiceInfo(BaseModel):
voice_id: str
name: str
description: Optional[str] = None
accent: Optional[str] = None
gender: Optional[str] = None
language: Optional[str] = None
preview_url: Optional[str] = None
class VoicesResponse(BaseModel):
provider: str
voices: List[VoiceInfo]
@router.get("/configurations/voices/{provider}")
async def get_voices(
provider: TTSProvider,
user: UserModel = Depends(get_user),
) -> VoicesResponse:
"""Get available voices for a TTS provider."""
try:
result = await mps_service_key_client.get_voices(
provider=provider,
organization_id=user.selected_organization_id,
created_by=user.provider_id,
)
return VoicesResponse(
provider=result.get("provider", provider),
voices=[VoiceInfo(**voice) for voice in result.get("voices", [])],
)
except Exception as e:
logger.error(f"Failed to fetch voices for {provider}: {e}")
raise HTTPException(
status_code=500,
detail=f"Failed to fetch voices for {provider}",
)

View file

@ -11,10 +11,8 @@ from api.db.models import UserModel
from api.schemas.user_configuration import UserConfiguration
from api.services.auth.stack_auth import stackauth
from api.services.configuration.registry import (
DograhLLMModel,
DograhSTTModel,
DograhTTSModel,
DograhVoice,
ServiceProviders,
)
@ -244,13 +242,13 @@ async def create_user_configuration_with_mps_key(
"llm": {
"provider": ServiceProviders.DOGRAH.value,
"api_key": service_key,
"model": DograhLLMModel.DEFAULT.value, # Default model
"model": "default", # Default model
},
"tts": {
"provider": ServiceProviders.DOGRAH.value,
"api_key": service_key,
"model": DograhTTSModel.DEFAULT.value, # Default model
"voice": DograhVoice.DEFAULT.value, # Default voice
"voice": "default", # Default voice
},
"stt": {
"provider": ServiceProviders.DOGRAH.value,

View file

@ -38,6 +38,7 @@ class UserConfigurationValidator:
ServiceProviders.AZURE.value: self._check_azure_api_key,
ServiceProviders.CARTESIA.value: self._check_cartesia_api_key,
ServiceProviders.DOGRAH.value: self._check_dograh_api_key,
ServiceProviders.SARVAM.value: self._check_sarvam_api_key,
}
async def validate(self, configuration: UserConfiguration) -> APIKeyStatusResponse:
@ -134,20 +135,5 @@ class UserConfigurationValidator:
def _check_dograh_api_key(self, model: str, api_key: str) -> bool:
return True
# def _check_neuphonic_api_key(self, model: str, api_key: str) -> bool:
# if not Neuphonic:
# self._provider_api_key_validity_status[model] = False
# return self._provider_api_key_validity_status[model]
# if model in self._provider_api_key_validity_status:
# return self._provider_api_key_validity_status[model]
# client = Neuphonic(api_key=api_key)
# try:
# response = client.voices.list() # get's all available voices
# voices = response.data["voices"]
# self._provider_api_key_validity_status[model] = True
# except Exception:
# self._provider_api_key_validity_status[model] = False
# return self._provider_api_key_validity_status[model]
def _check_sarvam_api_key(self, model: str, api_key: str) -> bool:
return True

View file

@ -20,6 +20,7 @@ class ServiceProviders(str, Enum):
GOOGLE = "google"
AZURE = "azure"
DOGRAH = "dograh"
SARVAM = "sarvam"
class BaseServiceConfiguration(BaseModel):
@ -31,6 +32,7 @@ class BaseServiceConfiguration(BaseModel):
ServiceProviders.GOOGLE,
ServiceProviders.AZURE,
ServiceProviders.DOGRAH,
# ServiceProviders.SARVAM,
]
api_key: str
@ -92,82 +94,56 @@ def register_stt(cls: Type[BaseSTTConfiguration]):
###################################################### LLM ########################################################################
class OpenAIModel(str, Enum):
GPT3_5_TURBO = "gpt-3.5-turbo"
GPT4_1 = "gpt-4.1"
GPT4_1_MINI = "gpt-4.1-mini"
GPT4_1_NANO = "gpt-4.1-nano"
GPT5 = "gpt-5"
GPT5_MINI = "gpt-5-mini"
GPT5_NANO = "gpt-5-nano"
# Suggested models for each provider (used for UI dropdown)
OPENAI_MODELS = ["gpt-4.1", "gpt-4.1-mini", "gpt-4.1-nano", "gpt-5", "gpt-5-mini", "gpt-5-nano", "gpt-3.5-turbo"]
GOOGLE_MODELS = ["gemini-2.0-flash", "gemini-2.0-flash-lite", "gemini-2.5-flash", "gemini-2.5-flash-lite"]
GROQ_MODELS = [
"llama-3.3-70b-versatile",
"deepseek-r1-distill-llama-70b",
"qwen-qwq-32b",
"meta-llama/llama-4-scout-17b-16e-instruct",
"meta-llama/llama-4-maverick-17b-128e-instruct",
"gemma2-9b-it",
"llama-3.1-8b-instant",
"openai/gpt-oss-120b",
]
AZURE_MODELS = ["gpt-4.1-mini"]
DOGRAH_LLM_MODELS = ["default", "accurate", "fast", "lite", "zen", "zen_lite"]
@register_llm
class OpenAILLMService(BaseLLMConfiguration):
provider: Literal[ServiceProviders.OPENAI] = ServiceProviders.OPENAI
model: OpenAIModel = OpenAIModel.GPT4_1
model: str = Field(default="gpt-4.1", json_schema_extra={"examples": OPENAI_MODELS})
api_key: str
class GoogleModel(str, Enum):
GEMINI_2_0_FLASH = "gemini-2.0-flash"
GEMINI_2_0_FLASH_LITE = "gemini-2.0-flash-lite"
GEMINI_2_5_FLASH = "gemini-2.5-flash"
GEMINI_2_5_FLASH_LITE = "gemini-2.5-flash-lite"
@register_llm
class GoogleLLMService(BaseLLMConfiguration):
provider: Literal[ServiceProviders.GOOGLE] = ServiceProviders.GOOGLE
model: GoogleModel = GoogleModel.GEMINI_2_0_FLASH
model: str = Field(default="gemini-2.0-flash", json_schema_extra={"examples": GOOGLE_MODELS})
api_key: str
class GroqModel(str, Enum):
LLAMA_3_3_70B = "llama-3.3-70b-versatile"
DEEPSEEK_R1_DISTILL_LLAMA_70B = "deepseek-r1-distill-llama-70b"
QUEN_QWQ_32B = "qwen-qwq-32b"
LLAMA_4_SCOUT_17B_16E_INSTRUCT = "meta-llama/llama-4-scout-17b-16e-instruct"
LLAMA_4_MAVERICK_17B_128E_INSTRUCT = "meta-llama/llama-4-maverick-17b-128e-instruct"
GEMMA2_9B_IT = "gemma2-9b-it"
LLAMA_3_1_8B_INSTANT = "llama-3.1-8b-instant"
OPENAI_GPT_OSS_120B = "openai/gpt-oss-120b"
@register_llm
class GroqLLMService(BaseLLMConfiguration):
provider: Literal[ServiceProviders.GROQ] = ServiceProviders.GROQ
model: GroqModel = GroqModel.LLAMA_3_3_70B
model: str = Field(default="llama-3.3-70b-versatile", json_schema_extra={"examples": GROQ_MODELS})
api_key: str
class AzureModel(str, Enum):
GPT4_1_MINI = "gpt-4.1-mini"
@register_llm
class AzureLLMService(BaseLLMConfiguration):
provider: Literal[ServiceProviders.AZURE] = ServiceProviders.AZURE
model: AzureModel = AzureModel.GPT4_1_MINI
model: str = Field(default="gpt-4.1-mini", json_schema_extra={"examples": AZURE_MODELS})
api_key: str
endpoint: str
# Dograh LLM Service
class DograhLLMModel(str, Enum):
DEFAULT = "default"
ACCURATE = "accurate"
FAST = "fast"
LITE = "lite"
ZEN = "zen"
ZEN_LITE = "zen_lite"
@register_llm
class DograhLLMService(BaseLLMConfiguration):
provider: Literal[ServiceProviders.DOGRAH] = ServiceProviders.DOGRAH
model: DograhLLMModel = DograhLLMModel.DEFAULT
model: str = Field(default="default", json_schema_extra={"examples": DOGRAH_LLM_MODELS})
api_key: str
@ -185,15 +161,10 @@ LLMConfig = Annotated[
###################################################### TTS ########################################################################
class DeepgramVoice(str, Enum):
HELENA = "aura-2-helena-en"
THALIA = "aura-2-thalia-en"
@register_tts
class DeepgramTTSConfiguration(BaseServiceConfiguration):
provider: Literal[ServiceProviders.DEEPGRAM] = ServiceProviders.DEEPGRAM
voice: DeepgramVoice = DeepgramVoice.HELENA
voice: str = "aura-2-helena-en"
api_key: str
@computed_field
@ -210,42 +181,6 @@ class DeepgramTTSConfiguration(BaseServiceConfiguration):
return "aura-2"
class ElevenlabsVoice(str, Enum):
ALEXANDRA = "Alexandra - 3dzJXoCYueSQiptQ6euE"
AMY = "Amy - oGn4Ha2pe2vSJkmIJgLQ"
ANGELA = "Angela - FUfBrNit0NNZAwb58KWH"
ARIA = "Aria - 9BWtsMINqrJLrRacOk9x"
CHELSEA = "Chelsea - NHRgOEwqx5WZNClv5sat"
CHRISTINA = "Christina - X03mvPuTfprif8QBAVeJ"
CLARA = "Clara - ZIlrSGI4jZqobxRKprJz"
CLYDE = "Clyde - 2EiwWnXFnvU5JabPnv8n"
DAVE = "Dave - CYw3kZ02Hs0563khs1Fj"
DOMI = "Domi - AZnzlk1XvdvUeBnXmlld"
DREW = "Drew - 29vD33N1CtxCmqQRPOHJ"
ELENA = "Elena_German - iFJwt4O7E3aafIpJFfcu"
EVE = "Eve - BZgkqPqms7Kj9ulSkVzn"
FIN = "Fin - D38z5RcWu1voky8WS1ja"
HOPE_BESTIE = "Hope_Bestie - uYXf8XasLslADfZ2MB4u"
HOPE_NATURAL = "Hope_Natural - OYTbf65OHHFELVut7v2H"
JARNATHAN = "Jarnathan - c6SfcYrb2t09NHXiT80T"
JENNA = "Jenna - C2BkQxlGNzBn7WD2bqfR"
JESSICA = "Jessica - cgSgspJ2msm6clMCkdW9"
JOHANNA = "Johanna_German - YYDsZT3K2y6tv7X1aj6N"
JUNIPER = "Juniper - aMSt68OGf4xUZAnLpTU8"
LAUREN = "Lauren - 3liN8q8YoeB9Hk6AboKe"
LINA = "Lina - oWjuL7HSoaEJRMDMP3HD"
MONIKA = "Monika_Hindi_8 - 2bNrEsM0omyhLiEyOwqY"
NEHA = "Neha_Hindi - QTKSa2Iyv0yoxvXY2V8a"
OLIVIA = "Olivia - 1rviaVF7GGGkTU36HNpz"
PAUL = "Paul - 5Q0t7uMcjvnagumLfvZi"
RACHEL = "Rachel - 21m00Tcm4TlvDq8ikWAM"
ROGER = "Roger - CwhRBWXzGAHq8TQ4Fs17"
SAMI_REAL = "Sami_Real - O4cGUVdAocn0z4EpQ9yF"
SARAH = "Sarah - EXAVITQu4vr4xnSDxMaL"
SIA = "Sia_Hindi_10 - ryIIztHPLYSJ74ueXxnO"
ZARA = "Zara - MmQVkVZnQ0dUbfWzcW6f"
class ElevenlabsModel(str, Enum):
FLASH_2 = "eleven_flash_v2_5"
@ -253,16 +188,12 @@ class ElevenlabsModel(str, Enum):
@register_tts
class ElevenlabsTTSConfiguration(BaseServiceConfiguration):
provider: Literal[ServiceProviders.ELEVENLABS] = ServiceProviders.ELEVENLABS
voice: ElevenlabsVoice = ElevenlabsVoice.RACHEL
voice: str = "21m00Tcm4TlvDq8ikWAM" # Rachel voice ID
speed: float = Field(default=1.0, ge=0.1, le=2.0, description="Speed of the voice")
model: ElevenlabsModel = ElevenlabsModel.FLASH_2
api_key: str
class OpenAIVoice(str, Enum):
ALLY = "alloy"
class OpenAITTSModel(str, Enum):
GPT_4o_MINI = "gpt-4o-mini-tts"
@ -271,29 +202,10 @@ class OpenAITTSModel(str, Enum):
class OpenAITTSService(BaseTTSConfiguration):
provider: Literal[ServiceProviders.OPENAI] = ServiceProviders.OPENAI
model: OpenAITTSModel = OpenAITTSModel.GPT_4o_MINI
voice: OpenAIVoice = OpenAIVoice.ALLY
voice: str = "alloy"
api_key: str
# class NeuphonicVoice(str, Enum):
# EMILY = "Emily - fc854436-2dac-4d21-aa69-ae17b54e98eb"
# @register_tts
# class NeuphonicTTSService(BaseTTSConfiguration):
# provider: Literal[ServiceProviders.NEUPHONIC] = ServiceProviders.NEUPHONIC
# voice: NeuphonicVoice = NeuphonicVoice.EMILY
# model: str = "NA"
# api_key: str
# Dograh TTS Service
class DograhVoice(str, Enum):
DEFAULT = "default"
JOEY = "joey"
RACHEL = "rachel"
class DograhTTSModel(str, Enum):
DEFAULT = "default"
@ -302,16 +214,58 @@ class DograhTTSModel(str, Enum):
class DograhTTSService(BaseTTSConfiguration):
provider: Literal[ServiceProviders.DOGRAH] = ServiceProviders.DOGRAH
model: DograhTTSModel = DograhTTSModel.DEFAULT
voice: DograhVoice = DograhVoice.DEFAULT
voice: str = "default"
api_key: str
class SarvamTTSModel(str, Enum):
BULBUL_V2 = "bulbul:v2"
BULBUL_V3 = "bulbul:v3"
class SarvamVoice(str, Enum):
# Female voices
ANUSHKA = "anushka"
MANISHA = "manisha"
VIDYA = "vidya"
ARYA = "arya"
# Male voices
ABHILASH = "abhilash"
KARUN = "karun"
HITESH = "hitesh"
class SarvamLanguage(str, Enum):
BENGALI = "bn-IN"
ENGLISH_INDIA = "en-IN"
GUJARATI = "gu-IN"
HINDI = "hi-IN"
KANNADA = "kn-IN"
MALAYALAM = "ml-IN"
MARATHI = "mr-IN"
ODIA = "od-IN"
PUNJABI = "pa-IN"
TAMIL = "ta-IN"
TELUGU = "te-IN"
ASSAMESE = "as-IN"
# @register_tts
# class SarvamTTSConfiguration(BaseTTSConfiguration):
# provider: Literal[ServiceProviders.SARVAM] = ServiceProviders.SARVAM
# model: SarvamTTSModel = SarvamTTSModel.BULBUL_V2
# voice: SarvamVoice = SarvamVoice.ANUSHKA
# language: SarvamLanguage = SarvamLanguage.HINDI
# api_key: str
TTSConfig = Annotated[
Union[
DeepgramTTSConfiguration,
OpenAITTSService,
ElevenlabsTTSConfiguration,
DograhTTSService,
# SarvamTTSConfiguration,
],
Field(discriminator="provider"),
]
@ -323,10 +277,45 @@ class DeepgramSTTModel(str, Enum):
NOVA_3_GENERAL = "nova-3-general"
class DeepgramLanguage(str, Enum):
MULTI = "multi"
ENGLISH = "en"
ENGLISH_US = "en-US"
ENGLISH_GB = "en-GB"
ENGLISH_AU = "en-AU"
ENGLISH_IN = "en-IN"
SPANISH = "es"
SPANISH_LATAM = "es-419"
FRENCH = "fr"
FRENCH_CA = "fr-CA"
GERMAN = "de"
ITALIAN = "it"
PORTUGUESE = "pt"
PORTUGUESE_BR = "pt-BR"
DUTCH = "nl"
HINDI = "hi"
JAPANESE = "ja"
KOREAN = "ko"
CHINESE_SIMPLIFIED = "zh-CN"
CHINESE_TRADITIONAL = "zh-TW"
RUSSIAN = "ru"
POLISH = "pl"
TURKISH = "tr"
UKRAINIAN = "uk"
VIETNAMESE = "vi"
SWEDISH = "sv"
DANISH = "da"
NORWEGIAN = "no"
FINNISH = "fi"
INDONESIAN = "id"
THAI = "th"
@register_stt
class DeepgramSTTConfiguration(BaseSTTConfiguration):
provider: Literal[ServiceProviders.DEEPGRAM] = ServiceProviders.DEEPGRAM
model: DeepgramSTTModel = DeepgramSTTModel.NOVA_3_GENERAL
language: DeepgramLanguage = DeepgramLanguage.MULTI
api_key: str
@ -359,8 +348,27 @@ class DograhSTTService(BaseSTTConfiguration):
api_key: str
# Sarvam STT Service
class SarvamSTTModel(str, Enum):
SAARIKA_V2_5 = "saarika:v2.5"
SAARAS_V2 = "saaras:v2" # STT-Translate model (auto-detects language)
# @register_stt
# class SarvamSTTConfiguration(BaseSTTConfiguration):
# provider: Literal[ServiceProviders.SARVAM] = ServiceProviders.SARVAM
# model: SarvamSTTModel = SarvamSTTModel.SAARIKA_V2_5
# language: SarvamLanguage = SarvamLanguage.HINDI
# api_key: str
STTConfig = Annotated[
Union[DeepgramSTTConfiguration, OpenAISTTConfiguration, DograhSTTService],
Union[
DeepgramSTTConfiguration,
OpenAISTTConfiguration,
DograhSTTService,
# SarvamSTTConfiguration,
],
Field(discriminator="provider"),
]

View file

@ -285,6 +285,44 @@ class MPSServiceKeyClient:
response=response,
)
async def get_voices(
self,
provider: str,
organization_id: Optional[int] = None,
created_by: Optional[str] = None,
) -> dict:
"""
Get available voices for a TTS provider from MPS.
Args:
provider: TTS provider name (elevenlabs, deepgram, sarvam, cartesia)
organization_id: Organization ID (for authenticated mode)
created_by: User provider ID (for OSS mode)
Returns:
Dictionary containing provider name and list of voices
Raises:
HTTPException: If the API call fails
"""
async with httpx.AsyncClient(timeout=self.timeout) as client:
response = await client.get(
f"{self.base_url}/api/v1/voice-proxy/{provider}/voices",
headers=self._get_headers(organization_id, created_by),
)
if response.status_code == 200:
return response.json()
else:
logger.error(
f"Failed to get voices for {provider}: {response.status_code} - {response.text}"
)
raise httpx.HTTPStatusError(
f"Failed to get voices: {response.text}",
request=response.request,
response=response,
)
async def call_workflow_api(
self,
call_type: str,

View file

@ -17,6 +17,9 @@ from pipecat.services.groq.llm import GroqLLMService
from pipecat.services.openai.llm import OpenAILLMService
from pipecat.services.openai.stt import OpenAISTTService
from pipecat.services.openai.tts import OpenAITTSService
from pipecat.services.sarvam.stt import SarvamSTTService
from pipecat.services.sarvam.tts import SarvamTTSService
from pipecat.transcriptions.language import Language
from pipecat.utils.text.xml_function_tag_filter import XMLFunctionTagFilter
if TYPE_CHECKING:
@ -26,8 +29,13 @@ if TYPE_CHECKING:
def create_stt_service(user_config):
"""Create and return appropriate STT service based on user configuration"""
if user_config.stt.provider == ServiceProviders.DEEPGRAM.value:
# Use language from user config, defaulting to "multi" for multilingual support
language = getattr(user_config.stt, "language", None)
language_value = (
language.value if hasattr(language, "value") else (language or "multi")
)
live_options = LiveOptions(
language="multi", profanity_filter=False, endpointing=100
language=language_value, profanity_filter=False, endpointing=100
)
return DeepgramSTTService(
live_options=live_options,
@ -53,6 +61,32 @@ def create_stt_service(user_config):
model=user_config.stt.model.value,
audio_passthrough=False, # Disable passthrough since audio is buffered separately
)
elif user_config.stt.provider == ServiceProviders.SARVAM.value:
# Map Sarvam language code to pipecat Language enum
language_mapping = {
"bn-IN": Language.BN_IN,
"gu-IN": Language.GU_IN,
"hi-IN": Language.HI_IN,
"kn-IN": Language.KN_IN,
"ml-IN": Language.ML_IN,
"mr-IN": Language.MR_IN,
"ta-IN": Language.TA_IN,
"te-IN": Language.TE_IN,
"pa-IN": Language.PA_IN,
"od-IN": Language.OR_IN,
"en-IN": Language.EN_IN,
"as-IN": Language.AS_IN,
}
language = getattr(user_config.stt, "language", None)
language_value = language.value if hasattr(language, "value") else language
pipecat_language = language_mapping.get(language_value, Language.HI_IN)
return SarvamSTTService(
api_key=user_config.stt.api_key,
model=user_config.stt.model.value,
params=SarvamSTTService.InputParams(language=pipecat_language),
audio_passthrough=False,
)
else:
raise HTTPException(
status_code=400, detail=f"Invalid STT provider {user_config.stt.provider}"
@ -81,7 +115,12 @@ def create_tts_service(user_config, audio_config: "AudioConfig"):
text_filters=[xml_function_tag_filter],
)
elif user_config.tts.provider == ServiceProviders.ELEVENLABS.value:
voice_id = user_config.tts.voice.split(" - ")[1]
# Backward compatible with older configuration "Name - voice_id"
try:
voice_id = user_config.tts.voice.split(" - ")[1]
except IndexError:
voice_id = user_config.tts.voice
return ElevenLabsTTSService(
reconnect_on_error=False,
api_key=user_config.tts.api_key,
@ -103,6 +142,35 @@ def create_tts_service(user_config, audio_config: "AudioConfig"):
voice=user_config.tts.voice.value,
text_filters=[xml_function_tag_filter],
)
elif user_config.tts.provider == ServiceProviders.SARVAM.value:
# Map Sarvam language code to pipecat Language enum for TTS
language_mapping = {
"bn-IN": Language.BN,
"en-IN": Language.EN,
"gu-IN": Language.GU,
"hi-IN": Language.HI,
"kn-IN": Language.KN,
"ml-IN": Language.ML,
"mr-IN": Language.MR,
"od-IN": Language.OR,
"pa-IN": Language.PA,
"ta-IN": Language.TA,
"te-IN": Language.TE,
}
language = getattr(user_config.tts, "language", None)
language_value = language.value if hasattr(language, "value") else language
pipecat_language = language_mapping.get(language_value, Language.HI)
voice = getattr(user_config.tts, "voice", None)
voice_value = voice.value if hasattr(voice, "value") else (voice or "anushka")
return SarvamTTSService(
api_key=user_config.tts.api_key,
model=user_config.tts.model.value,
voice_id=voice_value,
params=SarvamTTSService.InputParams(language=pipecat_language),
text_filters=[xml_function_tag_filter],
)
else:
raise HTTPException(
status_code=400, detail=f"Invalid TTS provider {user_config.tts.provider}"

View file

@ -5,7 +5,6 @@ from api.schemas.user_configuration import UserConfiguration
from api.services.configuration.masking import is_mask_of, mask_key, mask_user_config
from api.services.configuration.merge import merge_user_configurations
from api.services.configuration.registry import (
GroqModel,
OpenAILLMService,
)
@ -70,7 +69,7 @@ def test_merge_drops_old_key_when_provider_changes():
incoming_partial = {
"llm": {
"provider": "groq",
"model": GroqModel.LLAMA_3_3_70B,
"model": "llama-3.3-70b-versatile",
# api_key intentionally absent should NOT inherit old key
}
}