fix: change type definition from enum to str for consistency

This commit is contained in:
Abhishek Kumar 2025-12-26 16:00:02 +05:30
parent 74b069354b
commit e83f3a36d2
4 changed files with 147 additions and 150 deletions

View file

@ -10,11 +10,7 @@ from api.db import db_client
from api.db.models import UserModel
from api.schemas.user_configuration import UserConfiguration
from api.services.auth.stack_auth import stackauth
from api.services.configuration.registry import (
DograhSTTModel,
DograhTTSModel,
ServiceProviders,
)
from api.services.configuration.registry import ServiceProviders
async def get_user(
@ -242,18 +238,18 @@ async def create_user_configuration_with_mps_key(
"llm": {
"provider": ServiceProviders.DOGRAH.value,
"api_key": service_key,
"model": "default", # Default model
"model": "default",
},
"tts": {
"provider": ServiceProviders.DOGRAH.value,
"api_key": service_key,
"model": DograhTTSModel.DEFAULT.value, # Default model
"voice": "default", # Default voice
"model": "default",
"voice": "default",
},
"stt": {
"provider": ServiceProviders.DOGRAH.value,
"api_key": service_key,
"model": DograhSTTModel.DEFAULT.value, # Default model
"model": "default",
},
}
user_config = UserConfiguration(**configuration)

View file

@ -95,8 +95,21 @@ def register_stt(cls: Type[BaseSTTConfiguration]):
###################################################### LLM ########################################################################
# Suggested models for each provider (used for UI dropdown)
OPENAI_MODELS = ["gpt-4.1", "gpt-4.1-mini", "gpt-4.1-nano", "gpt-5", "gpt-5-mini", "gpt-5-nano", "gpt-3.5-turbo"]
GOOGLE_MODELS = ["gemini-2.0-flash", "gemini-2.0-flash-lite", "gemini-2.5-flash", "gemini-2.5-flash-lite"]
OPENAI_MODELS = [
"gpt-4.1",
"gpt-4.1-mini",
"gpt-4.1-nano",
"gpt-5",
"gpt-5-mini",
"gpt-5-nano",
"gpt-3.5-turbo",
]
GOOGLE_MODELS = [
"gemini-2.0-flash",
"gemini-2.0-flash-lite",
"gemini-2.5-flash",
"gemini-2.5-flash-lite",
]
GROQ_MODELS = [
"llama-3.3-70b-versatile",
"deepseek-r1-distill-llama-70b",
@ -121,21 +134,27 @@ class OpenAILLMService(BaseLLMConfiguration):
@register_llm
class GoogleLLMService(BaseLLMConfiguration):
provider: Literal[ServiceProviders.GOOGLE] = ServiceProviders.GOOGLE
model: str = Field(default="gemini-2.0-flash", json_schema_extra={"examples": GOOGLE_MODELS})
model: str = Field(
default="gemini-2.0-flash", json_schema_extra={"examples": GOOGLE_MODELS}
)
api_key: str
@register_llm
class GroqLLMService(BaseLLMConfiguration):
provider: Literal[ServiceProviders.GROQ] = ServiceProviders.GROQ
model: str = Field(default="llama-3.3-70b-versatile", json_schema_extra={"examples": GROQ_MODELS})
model: str = Field(
default="llama-3.3-70b-versatile", json_schema_extra={"examples": GROQ_MODELS}
)
api_key: str
@register_llm
class AzureLLMService(BaseLLMConfiguration):
provider: Literal[ServiceProviders.AZURE] = ServiceProviders.AZURE
model: str = Field(default="gpt-4.1-mini", json_schema_extra={"examples": AZURE_MODELS})
model: str = Field(
default="gpt-4.1-mini", json_schema_extra={"examples": AZURE_MODELS}
)
api_key: str
endpoint: str
@ -143,7 +162,9 @@ class AzureLLMService(BaseLLMConfiguration):
@register_llm
class DograhLLMService(BaseLLMConfiguration):
provider: Literal[ServiceProviders.DOGRAH] = ServiceProviders.DOGRAH
model: str = Field(default="default", json_schema_extra={"examples": DOGRAH_LLM_MODELS})
model: str = Field(
default="default", json_schema_extra={"examples": DOGRAH_LLM_MODELS}
)
api_key: str
@ -181,8 +202,7 @@ class DeepgramTTSConfiguration(BaseServiceConfiguration):
return "aura-2"
class ElevenlabsModel(str, Enum):
FLASH_2 = "eleven_flash_v2_5"
ELEVENLABS_TTS_MODELS = ["eleven_flash_v2_5"]
@register_tts
@ -190,72 +210,63 @@ class ElevenlabsTTSConfiguration(BaseServiceConfiguration):
provider: Literal[ServiceProviders.ELEVENLABS] = ServiceProviders.ELEVENLABS
voice: str = "21m00Tcm4TlvDq8ikWAM" # Rachel voice ID
speed: float = Field(default=1.0, ge=0.1, le=2.0, description="Speed of the voice")
model: ElevenlabsModel = ElevenlabsModel.FLASH_2
model: str = Field(
default="eleven_flash_v2_5",
json_schema_extra={"examples": ELEVENLABS_TTS_MODELS},
)
api_key: str
class OpenAITTSModel(str, Enum):
GPT_4o_MINI = "gpt-4o-mini-tts"
OPENAI_TTS_MODELS = ["gpt-4o-mini-tts"]
@register_tts
class OpenAITTSService(BaseTTSConfiguration):
provider: Literal[ServiceProviders.OPENAI] = ServiceProviders.OPENAI
model: OpenAITTSModel = OpenAITTSModel.GPT_4o_MINI
model: str = Field(
default="gpt-4o-mini-tts", json_schema_extra={"examples": OPENAI_TTS_MODELS}
)
voice: str = "alloy"
api_key: str
class DograhTTSModel(str, Enum):
DEFAULT = "default"
DOGRAH_TTS_MODELS = ["default"]
@register_tts
class DograhTTSService(BaseTTSConfiguration):
provider: Literal[ServiceProviders.DOGRAH] = ServiceProviders.DOGRAH
model: DograhTTSModel = DograhTTSModel.DEFAULT
model: str = Field(
default="default", json_schema_extra={"examples": DOGRAH_TTS_MODELS}
)
voice: str = "default"
api_key: str
class SarvamTTSModel(str, Enum):
BULBUL_V2 = "bulbul:v2"
BULBUL_V3 = "bulbul:v3"
class SarvamVoice(str, Enum):
# Female voices
ANUSHKA = "anushka"
MANISHA = "manisha"
VIDYA = "vidya"
ARYA = "arya"
# Male voices
ABHILASH = "abhilash"
KARUN = "karun"
HITESH = "hitesh"
class SarvamLanguage(str, Enum):
BENGALI = "bn-IN"
ENGLISH_INDIA = "en-IN"
GUJARATI = "gu-IN"
HINDI = "hi-IN"
KANNADA = "kn-IN"
MALAYALAM = "ml-IN"
MARATHI = "mr-IN"
ODIA = "od-IN"
PUNJABI = "pa-IN"
TAMIL = "ta-IN"
TELUGU = "te-IN"
ASSAMESE = "as-IN"
SARVAM_TTS_MODELS = ["bulbul:v2", "bulbul:v3"]
SARVAM_VOICES = ["anushka", "manisha", "vidya", "arya", "abhilash", "karun", "hitesh"]
SARVAM_LANGUAGES = [
"bn-IN",
"en-IN",
"gu-IN",
"hi-IN",
"kn-IN",
"ml-IN",
"mr-IN",
"od-IN",
"pa-IN",
"ta-IN",
"te-IN",
"as-IN",
]
# @register_tts
# class SarvamTTSConfiguration(BaseTTSConfiguration):
# provider: Literal[ServiceProviders.SARVAM] = ServiceProviders.SARVAM
# model: SarvamTTSModel = SarvamTTSModel.BULBUL_V2
# voice: SarvamVoice = SarvamVoice.ANUSHKA
# language: SarvamLanguage = SarvamLanguage.HINDI
# model: str = Field(default="bulbul:v2", json_schema_extra={"examples": SARVAM_TTS_MODELS})
# voice: str = Field(default="anushka", json_schema_extra={"examples": SARVAM_VOICES})
# language: str = Field(default="hi-IN", json_schema_extra={"examples": SARVAM_LANGUAGES})
# api_key: str
@ -273,49 +284,51 @@ TTSConfig = Annotated[
###################################################### STT ########################################################################
class DeepgramSTTModel(str, Enum):
NOVA_3_GENERAL = "nova-3-general"
class DeepgramLanguage(str, Enum):
MULTI = "multi"
ENGLISH = "en"
ENGLISH_US = "en-US"
ENGLISH_GB = "en-GB"
ENGLISH_AU = "en-AU"
ENGLISH_IN = "en-IN"
SPANISH = "es"
SPANISH_LATAM = "es-419"
FRENCH = "fr"
FRENCH_CA = "fr-CA"
GERMAN = "de"
ITALIAN = "it"
PORTUGUESE = "pt"
PORTUGUESE_BR = "pt-BR"
DUTCH = "nl"
HINDI = "hi"
JAPANESE = "ja"
KOREAN = "ko"
CHINESE_SIMPLIFIED = "zh-CN"
CHINESE_TRADITIONAL = "zh-TW"
RUSSIAN = "ru"
POLISH = "pl"
TURKISH = "tr"
UKRAINIAN = "uk"
VIETNAMESE = "vi"
SWEDISH = "sv"
DANISH = "da"
NORWEGIAN = "no"
FINNISH = "fi"
INDONESIAN = "id"
THAI = "th"
DEEPGRAM_STT_MODELS = ["nova-3-general"]
DEEPGRAM_LANGUAGES = [
"multi",
"en",
"en-US",
"en-GB",
"en-AU",
"en-IN",
"es",
"es-419",
"fr",
"fr-CA",
"de",
"it",
"pt",
"pt-BR",
"nl",
"hi",
"ja",
"ko",
"zh-CN",
"zh-TW",
"ru",
"pl",
"tr",
"uk",
"vi",
"sv",
"da",
"no",
"fi",
"id",
"th",
]
@register_stt
class DeepgramSTTConfiguration(BaseSTTConfiguration):
provider: Literal[ServiceProviders.DEEPGRAM] = ServiceProviders.DEEPGRAM
model: DeepgramSTTModel = DeepgramSTTModel.NOVA_3_GENERAL
language: DeepgramLanguage = DeepgramLanguage.MULTI
model: str = Field(
default="nova-3-general", json_schema_extra={"examples": DEEPGRAM_STT_MODELS}
)
language: str = Field(
default="multi", json_schema_extra={"examples": DEEPGRAM_LANGUAGES}
)
api_key: str
@ -325,40 +338,40 @@ class CartesiaSTTConfiguration(BaseSTTConfiguration):
api_key: str
class OpenAISTTModel(str, Enum):
GPT_4o_TRANSCRIBE = "gpt-4o-transcribe"
OPENAI_STT_MODELS = ["gpt-4o-transcribe"]
@register_stt
class OpenAISTTConfiguration(BaseSTTConfiguration):
provider: Literal[ServiceProviders.OPENAI] = ServiceProviders.OPENAI
model: OpenAISTTModel = OpenAISTTModel.GPT_4o_TRANSCRIBE
model: str = Field(
default="gpt-4o-transcribe", json_schema_extra={"examples": OPENAI_STT_MODELS}
)
api_key: str
# Dograh STT Service
class DograhSTTModel(str, Enum):
DEFAULT = "default"
DOGRAH_STT_MODELS = ["default"]
@register_stt
class DograhSTTService(BaseSTTConfiguration):
provider: Literal[ServiceProviders.DOGRAH] = ServiceProviders.DOGRAH
model: DograhSTTModel = DograhSTTModel.DEFAULT
model: str = Field(
default="default", json_schema_extra={"examples": DOGRAH_STT_MODELS}
)
api_key: str
# Sarvam STT Service
class SarvamSTTModel(str, Enum):
SAARIKA_V2_5 = "saarika:v2.5"
SAARAS_V2 = "saaras:v2" # STT-Translate model (auto-detects language)
SARVAM_STT_MODELS = ["saarika:v2.5", "saaras:v2"]
# @register_stt
# class SarvamSTTConfiguration(BaseSTTConfiguration):
# provider: Literal[ServiceProviders.SARVAM] = ServiceProviders.SARVAM
# model: SarvamSTTModel = SarvamSTTModel.SAARIKA_V2_5
# language: SarvamLanguage = SarvamLanguage.HINDI
# model: str = Field(default="saarika:v2.5", json_schema_extra={"examples": SARVAM_STT_MODELS})
# language: str = Field(default="hi-IN", json_schema_extra={"examples": SARVAM_LANGUAGES})
# api_key: str

View file

@ -30,12 +30,9 @@ def create_stt_service(user_config):
"""Create and return appropriate STT service based on user configuration"""
if user_config.stt.provider == ServiceProviders.DEEPGRAM.value:
# Use language from user config, defaulting to "multi" for multilingual support
language = getattr(user_config.stt, "language", None)
language_value = (
language.value if hasattr(language, "value") else (language or "multi")
)
language = getattr(user_config.stt, "language", None) or "multi"
live_options = LiveOptions(
language=language_value, profanity_filter=False, endpointing=100
language=language, profanity_filter=False, endpointing=100
)
return DeepgramSTTService(
live_options=live_options,
@ -45,7 +42,7 @@ def create_stt_service(user_config):
elif user_config.stt.provider == ServiceProviders.OPENAI.value:
return OpenAISTTService(
api_key=user_config.stt.api_key,
model=user_config.stt.model.value,
model=user_config.stt.model,
audio_passthrough=False, # Disable passthrough since audio is buffered separately
)
elif user_config.stt.provider == ServiceProviders.CARTESIA.value:
@ -58,7 +55,7 @@ def create_stt_service(user_config):
return DograhSTTService(
base_url=base_url,
api_key=user_config.stt.api_key,
model=user_config.stt.model.value,
model=user_config.stt.model,
audio_passthrough=False, # Disable passthrough since audio is buffered separately
)
elif user_config.stt.provider == ServiceProviders.SARVAM.value:
@ -78,12 +75,10 @@ def create_stt_service(user_config):
"as-IN": Language.AS_IN,
}
language = getattr(user_config.stt, "language", None)
language_value = language.value if hasattr(language, "value") else language
pipecat_language = language_mapping.get(language_value, Language.HI_IN)
pipecat_language = language_mapping.get(language, Language.HI_IN)
return SarvamSTTService(
api_key=user_config.stt.api_key,
model=user_config.stt.model.value,
model=user_config.stt.model,
params=SarvamSTTService.InputParams(language=pipecat_language),
audio_passthrough=False,
)
@ -105,13 +100,13 @@ def create_tts_service(user_config, audio_config: "AudioConfig"):
if user_config.tts.provider == ServiceProviders.DEEPGRAM.value:
return DeepgramTTSService(
api_key=user_config.tts.api_key,
voice=user_config.tts.voice.value,
voice=user_config.tts.voice,
text_filters=[xml_function_tag_filter],
)
elif user_config.tts.provider == ServiceProviders.OPENAI.value:
return OpenAITTSService(
api_key=user_config.tts.api_key,
model=user_config.tts.model.value,
model=user_config.tts.model,
text_filters=[xml_function_tag_filter],
)
elif user_config.tts.provider == ServiceProviders.ELEVENLABS.value:
@ -120,12 +115,11 @@ def create_tts_service(user_config, audio_config: "AudioConfig"):
voice_id = user_config.tts.voice.split(" - ")[1]
except IndexError:
voice_id = user_config.tts.voice
return ElevenLabsTTSService(
reconnect_on_error=False,
api_key=user_config.tts.api_key,
voice_id=voice_id,
model=user_config.tts.model.value,
model=user_config.tts.model,
params=ElevenLabsTTSService.InputParams(
stability=0.8, speed=user_config.tts.speed, similarity_boost=0.75
),
@ -134,12 +128,11 @@ def create_tts_service(user_config, audio_config: "AudioConfig"):
elif user_config.tts.provider == ServiceProviders.DOGRAH.value:
# Convert HTTP URL to WebSocket URL for TTS
base_url = MPS_API_URL.replace("http://", "ws://").replace("https://", "wss://")
# Handle both enum and string values for model and voice
return DograhTTSService(
base_url=base_url,
api_key=user_config.tts.api_key,
model=user_config.tts.model.value,
voice=user_config.tts.voice.value,
model=user_config.tts.model,
voice=user_config.tts.voice,
text_filters=[xml_function_tag_filter],
)
elif user_config.tts.provider == ServiceProviders.SARVAM.value:
@ -158,16 +151,13 @@ def create_tts_service(user_config, audio_config: "AudioConfig"):
"te-IN": Language.TE,
}
language = getattr(user_config.tts, "language", None)
language_value = language.value if hasattr(language, "value") else language
pipecat_language = language_mapping.get(language_value, Language.HI)
voice = getattr(user_config.tts, "voice", None)
voice_value = voice.value if hasattr(voice, "value") else (voice or "anushka")
pipecat_language = language_mapping.get(language, Language.HI)
voice = getattr(user_config.tts, "voice", None) or "anushka"
return SarvamTTSService(
api_key=user_config.tts.api_key,
model=user_config.tts.model.value,
voice_id=voice_value,
model=user_config.tts.model,
voice_id=voice,
params=SarvamTTSService.InputParams(language=pipecat_language),
text_filters=[xml_function_tag_filter],
)
@ -179,17 +169,12 @@ def create_tts_service(user_config, audio_config: "AudioConfig"):
def create_llm_service(user_config):
"""Create and return appropriate LLM service based on user configuration"""
# Handle both enum and string values for model
model_value = (
user_config.llm.model.value
if hasattr(user_config.llm.model, "value")
else user_config.llm.model
)
model = user_config.llm.model
if user_config.llm.provider == ServiceProviders.OPENAI.value:
if "gpt-5" in model_value:
if "gpt-5" in model:
return OpenAILLMService(
api_key=user_config.llm.api_key,
model=model_value,
model=model,
params=OpenAILLMService.InputParams(
reasoning_effort="minimal", verbosity="low"
),
@ -197,16 +182,16 @@ def create_llm_service(user_config):
else:
return OpenAILLMService(
api_key=user_config.llm.api_key,
model=model_value,
model=model,
params=OpenAILLMService.InputParams(temperature=0.1),
)
elif user_config.llm.provider == ServiceProviders.GROQ.value:
print(
f"Creating Groq LLM service with API key: {user_config.llm.api_key} and model: {model_value}"
f"Creating Groq LLM service with API key: {user_config.llm.api_key} and model: {model}"
)
return GroqLLMService(
api_key=user_config.llm.api_key,
model=model_value,
model=model,
params=OpenAILLMService.InputParams(temperature=0.1),
)
elif user_config.llm.provider == ServiceProviders.GOOGLE.value:
@ -214,21 +199,21 @@ def create_llm_service(user_config):
# NOT_GIVEN sentinels that break Pydantic validation in GoogleLLMService.
return GoogleLLMService(
api_key=user_config.llm.api_key,
model=model_value,
model=model,
params=GoogleLLMService.InputParams(temperature=0.1),
)
elif user_config.llm.provider == ServiceProviders.AZURE.value:
return AzureLLMService(
api_key=user_config.llm.api_key,
endpoint=user_config.llm.endpoint,
model=model_value, # Azure uses deployment name as model
model=model, # Azure uses deployment name as model
params=AzureLLMService.InputParams(temperature=0.1),
)
elif user_config.llm.provider == ServiceProviders.DOGRAH.value:
return DograhLLMService(
base_url=f"{MPS_API_URL}/api/v1/llm",
api_key=user_config.llm.api_key,
model=model_value,
model=model,
)
else:
raise HTTPException(status_code=400, detail="Invalid LLM provider")

View file

@ -383,11 +383,12 @@ export default function ServiceConfiguration() {
? providerSchema.$defs[schema.$ref.split('/').pop() || '']
: schema;
// Use VoiceSelector for voice field in TTS service (except Sarvam which uses enum)
// Use VoiceSelector for voice field in TTS service (except Sarvam which uses predefined options)
if (service === "tts" && field === "voice") {
const currentProvider = serviceProviders.tts;
// Sarvam uses enum-based voice selection, not VoiceSelector
if (currentProvider !== "sarvam" && !actualSchema?.enum) {
// Sarvam uses predefined voice options, not VoiceSelector
const hasVoiceOptions = actualSchema?.enum || actualSchema?.examples;
if (currentProvider !== "sarvam" && !hasVoiceOptions) {
return (
<VoiceSelector
provider={currentProvider}
@ -478,7 +479,9 @@ export default function ServiceConfiguration() {
);
}
if (actualSchema?.enum) {
// Handle fields with enum or examples (dropdown options)
const dropdownOptions = actualSchema?.enum || actualSchema?.examples;
if (dropdownOptions && dropdownOptions.length > 0) {
// Use friendly display names for language and voice fields
const getDisplayName = (value: string) => {
if (field === "language") {
@ -504,7 +507,7 @@ export default function ServiceConfiguration() {
<SelectValue placeholder={`Select ${field}`} />
</SelectTrigger>
<SelectContent>
{actualSchema.enum.map((value: string) => (
{dropdownOptions.map((value: string) => (
<SelectItem key={value} value={value}>
{getDisplayName(value)}
</SelectItem>