fix: change type definition from enum to str for consistency

2026-07-22 11:51:04 +02:00 · 2025-12-26 16:00:02 +05:30 · 2025-12-26 16:00:02 +05:30 · e83f3a36d2
commit e83f3a36d2
parent 74b069354b
4 changed files with 147 additions and 150 deletions
--- a/api/services/auth/depends.py
+++ b/api/services/auth/depends.py
@ -10,11 +10,7 @@ from api.db import db_client
 from api.db.models import UserModel
 from api.schemas.user_configuration import UserConfiguration
 from api.services.auth.stack_auth import stackauth
-from api.services.configuration.registry import (
-    DograhSTTModel,
-    DograhTTSModel,
-    ServiceProviders,
-)
+from api.services.configuration.registry import ServiceProviders


 async def get_user(
@ -242,18 +238,18 @@ async def create_user_configuration_with_mps_key(
                    "llm": {
                        "provider": ServiceProviders.DOGRAH.value,
                        "api_key": service_key,
-                        "model": "default",  # Default model
+                        "model": "default",
                    },
                    "tts": {
                        "provider": ServiceProviders.DOGRAH.value,
                        "api_key": service_key,
-                        "model": DograhTTSModel.DEFAULT.value,  # Default model
-                        "voice": "default",  # Default voice
+                        "model": "default",
+                        "voice": "default",
                    },
                    "stt": {
                        "provider": ServiceProviders.DOGRAH.value,
                        "api_key": service_key,
-                        "model": DograhSTTModel.DEFAULT.value,  # Default model
+                        "model": "default",
                    },
                }
                user_config = UserConfiguration(**configuration)
--- a/api/services/configuration/registry.py
+++ b/api/services/configuration/registry.py
@ -95,8 +95,21 @@ def register_stt(cls: Type[BaseSTTConfiguration]):
 ###################################################### LLM ########################################################################

 # Suggested models for each provider (used for UI dropdown)
-OPENAI_MODELS = ["gpt-4.1", "gpt-4.1-mini", "gpt-4.1-nano", "gpt-5", "gpt-5-mini", "gpt-5-nano", "gpt-3.5-turbo"]
-GOOGLE_MODELS = ["gemini-2.0-flash", "gemini-2.0-flash-lite", "gemini-2.5-flash", "gemini-2.5-flash-lite"]
+OPENAI_MODELS = [
+    "gpt-4.1",
+    "gpt-4.1-mini",
+    "gpt-4.1-nano",
+    "gpt-5",
+    "gpt-5-mini",
+    "gpt-5-nano",
+    "gpt-3.5-turbo",
+]
+GOOGLE_MODELS = [
+    "gemini-2.0-flash",
+    "gemini-2.0-flash-lite",
+    "gemini-2.5-flash",
+    "gemini-2.5-flash-lite",
+]
 GROQ_MODELS = [
    "llama-3.3-70b-versatile",
    "deepseek-r1-distill-llama-70b",
@ -121,21 +134,27 @@ class OpenAILLMService(BaseLLMConfiguration):
@register_llm
 class GoogleLLMService(BaseLLMConfiguration):
    provider: Literal[ServiceProviders.GOOGLE] = ServiceProviders.GOOGLE
-    model: str = Field(default="gemini-2.0-flash", json_schema_extra={"examples": GOOGLE_MODELS})
+    model: str = Field(
+        default="gemini-2.0-flash", json_schema_extra={"examples": GOOGLE_MODELS}
+    )
    api_key: str


@register_llm
 class GroqLLMService(BaseLLMConfiguration):
    provider: Literal[ServiceProviders.GROQ] = ServiceProviders.GROQ
-    model: str = Field(default="llama-3.3-70b-versatile", json_schema_extra={"examples": GROQ_MODELS})
+    model: str = Field(
+        default="llama-3.3-70b-versatile", json_schema_extra={"examples": GROQ_MODELS}
+    )
    api_key: str


@register_llm
 class AzureLLMService(BaseLLMConfiguration):
    provider: Literal[ServiceProviders.AZURE] = ServiceProviders.AZURE
-    model: str = Field(default="gpt-4.1-mini", json_schema_extra={"examples": AZURE_MODELS})
+    model: str = Field(
+        default="gpt-4.1-mini", json_schema_extra={"examples": AZURE_MODELS}
+    )
    api_key: str
    endpoint: str

@ -143,7 +162,9 @@ class AzureLLMService(BaseLLMConfiguration):
@register_llm
 class DograhLLMService(BaseLLMConfiguration):
    provider: Literal[ServiceProviders.DOGRAH] = ServiceProviders.DOGRAH
-    model: str = Field(default="default", json_schema_extra={"examples": DOGRAH_LLM_MODELS})
+    model: str = Field(
+        default="default", json_schema_extra={"examples": DOGRAH_LLM_MODELS}
+    )
    api_key: str


@ -181,8 +202,7 @@ class DeepgramTTSConfiguration(BaseServiceConfiguration):
            return "aura-2"


-class ElevenlabsModel(str, Enum):
-    FLASH_2 = "eleven_flash_v2_5"
+ELEVENLABS_TTS_MODELS = ["eleven_flash_v2_5"]


@register_tts
@ -190,72 +210,63 @@ class ElevenlabsTTSConfiguration(BaseServiceConfiguration):
    provider: Literal[ServiceProviders.ELEVENLABS] = ServiceProviders.ELEVENLABS
    voice: str = "21m00Tcm4TlvDq8ikWAM"  # Rachel voice ID
    speed: float = Field(default=1.0, ge=0.1, le=2.0, description="Speed of the voice")
-    model: ElevenlabsModel = ElevenlabsModel.FLASH_2
+    model: str = Field(
+        default="eleven_flash_v2_5",
+        json_schema_extra={"examples": ELEVENLABS_TTS_MODELS},
+    )
    api_key: str


-class OpenAITTSModel(str, Enum):
-    GPT_4o_MINI = "gpt-4o-mini-tts"
+OPENAI_TTS_MODELS = ["gpt-4o-mini-tts"]


@register_tts
 class OpenAITTSService(BaseTTSConfiguration):
    provider: Literal[ServiceProviders.OPENAI] = ServiceProviders.OPENAI
-    model: OpenAITTSModel = OpenAITTSModel.GPT_4o_MINI
+    model: str = Field(
+        default="gpt-4o-mini-tts", json_schema_extra={"examples": OPENAI_TTS_MODELS}
+    )
    voice: str = "alloy"
    api_key: str


-class DograhTTSModel(str, Enum):
-    DEFAULT = "default"
+DOGRAH_TTS_MODELS = ["default"]


@register_tts
 class DograhTTSService(BaseTTSConfiguration):
    provider: Literal[ServiceProviders.DOGRAH] = ServiceProviders.DOGRAH
-    model: DograhTTSModel = DograhTTSModel.DEFAULT
+    model: str = Field(
+        default="default", json_schema_extra={"examples": DOGRAH_TTS_MODELS}
+    )
    voice: str = "default"
    api_key: str


-class SarvamTTSModel(str, Enum):
-    BULBUL_V2 = "bulbul:v2"
-    BULBUL_V3 = "bulbul:v3"
-
-
-class SarvamVoice(str, Enum):
-    # Female voices
-    ANUSHKA = "anushka"
-    MANISHA = "manisha"
-    VIDYA = "vidya"
-    ARYA = "arya"
-    # Male voices
-    ABHILASH = "abhilash"
-    KARUN = "karun"
-    HITESH = "hitesh"
-
-
-class SarvamLanguage(str, Enum):
-    BENGALI = "bn-IN"
-    ENGLISH_INDIA = "en-IN"
-    GUJARATI = "gu-IN"
-    HINDI = "hi-IN"
-    KANNADA = "kn-IN"
-    MALAYALAM = "ml-IN"
-    MARATHI = "mr-IN"
-    ODIA = "od-IN"
-    PUNJABI = "pa-IN"
-    TAMIL = "ta-IN"
-    TELUGU = "te-IN"
-    ASSAMESE = "as-IN"
+SARVAM_TTS_MODELS = ["bulbul:v2", "bulbul:v3"]
+SARVAM_VOICES = ["anushka", "manisha", "vidya", "arya", "abhilash", "karun", "hitesh"]
+SARVAM_LANGUAGES = [
+    "bn-IN",
+    "en-IN",
+    "gu-IN",
+    "hi-IN",
+    "kn-IN",
+    "ml-IN",
+    "mr-IN",
+    "od-IN",
+    "pa-IN",
+    "ta-IN",
+    "te-IN",
+    "as-IN",
+]


 # @register_tts
 # class SarvamTTSConfiguration(BaseTTSConfiguration):
 #     provider: Literal[ServiceProviders.SARVAM] = ServiceProviders.SARVAM
-#     model: SarvamTTSModel = SarvamTTSModel.BULBUL_V2
-#     voice: SarvamVoice = SarvamVoice.ANUSHKA
-#     language: SarvamLanguage = SarvamLanguage.HINDI
+#     model: str = Field(default="bulbul:v2", json_schema_extra={"examples": SARVAM_TTS_MODELS})
+#     voice: str = Field(default="anushka", json_schema_extra={"examples": SARVAM_VOICES})
+#     language: str = Field(default="hi-IN", json_schema_extra={"examples": SARVAM_LANGUAGES})
 #     api_key: str


@ -273,49 +284,51 @@ TTSConfig = Annotated[
 ###################################################### STT ########################################################################


-class DeepgramSTTModel(str, Enum):
-    NOVA_3_GENERAL = "nova-3-general"
-
-
-class DeepgramLanguage(str, Enum):
-    MULTI = "multi"
-    ENGLISH = "en"
-    ENGLISH_US = "en-US"
-    ENGLISH_GB = "en-GB"
-    ENGLISH_AU = "en-AU"
-    ENGLISH_IN = "en-IN"
-    SPANISH = "es"
-    SPANISH_LATAM = "es-419"
-    FRENCH = "fr"
-    FRENCH_CA = "fr-CA"
-    GERMAN = "de"
-    ITALIAN = "it"
-    PORTUGUESE = "pt"
-    PORTUGUESE_BR = "pt-BR"
-    DUTCH = "nl"
-    HINDI = "hi"
-    JAPANESE = "ja"
-    KOREAN = "ko"
-    CHINESE_SIMPLIFIED = "zh-CN"
-    CHINESE_TRADITIONAL = "zh-TW"
-    RUSSIAN = "ru"
-    POLISH = "pl"
-    TURKISH = "tr"
-    UKRAINIAN = "uk"
-    VIETNAMESE = "vi"
-    SWEDISH = "sv"
-    DANISH = "da"
-    NORWEGIAN = "no"
-    FINNISH = "fi"
-    INDONESIAN = "id"
-    THAI = "th"
+DEEPGRAM_STT_MODELS = ["nova-3-general"]
+DEEPGRAM_LANGUAGES = [
+    "multi",
+    "en",
+    "en-US",
+    "en-GB",
+    "en-AU",
+    "en-IN",
+    "es",
+    "es-419",
+    "fr",
+    "fr-CA",
+    "de",
+    "it",
+    "pt",
+    "pt-BR",
+    "nl",
+    "hi",
+    "ja",
+    "ko",
+    "zh-CN",
+    "zh-TW",
+    "ru",
+    "pl",
+    "tr",
+    "uk",
+    "vi",
+    "sv",
+    "da",
+    "no",
+    "fi",
+    "id",
+    "th",
+]


@register_stt
 class DeepgramSTTConfiguration(BaseSTTConfiguration):
    provider: Literal[ServiceProviders.DEEPGRAM] = ServiceProviders.DEEPGRAM
-    model: DeepgramSTTModel = DeepgramSTTModel.NOVA_3_GENERAL
-    language: DeepgramLanguage = DeepgramLanguage.MULTI
+    model: str = Field(
+        default="nova-3-general", json_schema_extra={"examples": DEEPGRAM_STT_MODELS}
+    )
+    language: str = Field(
+        default="multi", json_schema_extra={"examples": DEEPGRAM_LANGUAGES}
+    )
    api_key: str


@ -325,40 +338,40 @@ class CartesiaSTTConfiguration(BaseSTTConfiguration):
    api_key: str


-class OpenAISTTModel(str, Enum):
-    GPT_4o_TRANSCRIBE = "gpt-4o-transcribe"
+OPENAI_STT_MODELS = ["gpt-4o-transcribe"]


@register_stt
 class OpenAISTTConfiguration(BaseSTTConfiguration):
    provider: Literal[ServiceProviders.OPENAI] = ServiceProviders.OPENAI
-    model: OpenAISTTModel = OpenAISTTModel.GPT_4o_TRANSCRIBE
+    model: str = Field(
+        default="gpt-4o-transcribe", json_schema_extra={"examples": OPENAI_STT_MODELS}
+    )
    api_key: str


 # Dograh STT Service
-class DograhSTTModel(str, Enum):
-    DEFAULT = "default"
+DOGRAH_STT_MODELS = ["default"]


@register_stt
 class DograhSTTService(BaseSTTConfiguration):
    provider: Literal[ServiceProviders.DOGRAH] = ServiceProviders.DOGRAH
-    model: DograhSTTModel = DograhSTTModel.DEFAULT
+    model: str = Field(
+        default="default", json_schema_extra={"examples": DOGRAH_STT_MODELS}
+    )
    api_key: str


 # Sarvam STT Service
-class SarvamSTTModel(str, Enum):
-    SAARIKA_V2_5 = "saarika:v2.5"
-    SAARAS_V2 = "saaras:v2"  # STT-Translate model (auto-detects language)
+SARVAM_STT_MODELS = ["saarika:v2.5", "saaras:v2"]


 # @register_stt
 # class SarvamSTTConfiguration(BaseSTTConfiguration):
 #     provider: Literal[ServiceProviders.SARVAM] = ServiceProviders.SARVAM
-#     model: SarvamSTTModel = SarvamSTTModel.SAARIKA_V2_5
-#     language: SarvamLanguage = SarvamLanguage.HINDI
+#     model: str = Field(default="saarika:v2.5", json_schema_extra={"examples": SARVAM_STT_MODELS})
+#     language: str = Field(default="hi-IN", json_schema_extra={"examples": SARVAM_LANGUAGES})
 #     api_key: str


--- a/api/services/pipecat/service_factory.py
+++ b/api/services/pipecat/service_factory.py
@ -30,12 +30,9 @@ def create_stt_service(user_config):
    """Create and return appropriate STT service based on user configuration"""
    if user_config.stt.provider == ServiceProviders.DEEPGRAM.value:
        # Use language from user config, defaulting to "multi" for multilingual support
-        language = getattr(user_config.stt, "language", None)
-        language_value = (
-            language.value if hasattr(language, "value") else (language or "multi")
-        )
+        language = getattr(user_config.stt, "language", None) or "multi"
        live_options = LiveOptions(
-            language=language_value, profanity_filter=False, endpointing=100
+            language=language, profanity_filter=False, endpointing=100
        )
        return DeepgramSTTService(
            live_options=live_options,
@ -45,7 +42,7 @@ def create_stt_service(user_config):
    elif user_config.stt.provider == ServiceProviders.OPENAI.value:
        return OpenAISTTService(
            api_key=user_config.stt.api_key,
-            model=user_config.stt.model.value,
+            model=user_config.stt.model,
            audio_passthrough=False,  # Disable passthrough since audio is buffered separately
        )
    elif user_config.stt.provider == ServiceProviders.CARTESIA.value:
@ -58,7 +55,7 @@ def create_stt_service(user_config):
        return DograhSTTService(
            base_url=base_url,
            api_key=user_config.stt.api_key,
-            model=user_config.stt.model.value,
+            model=user_config.stt.model,
            audio_passthrough=False,  # Disable passthrough since audio is buffered separately
        )
    elif user_config.stt.provider == ServiceProviders.SARVAM.value:
@ -78,12 +75,10 @@ def create_stt_service(user_config):
            "as-IN": Language.AS_IN,
        }
        language = getattr(user_config.stt, "language", None)
-        language_value = language.value if hasattr(language, "value") else language
-        pipecat_language = language_mapping.get(language_value, Language.HI_IN)
-
+        pipecat_language = language_mapping.get(language, Language.HI_IN)
        return SarvamSTTService(
            api_key=user_config.stt.api_key,
-            model=user_config.stt.model.value,
+            model=user_config.stt.model,
            params=SarvamSTTService.InputParams(language=pipecat_language),
            audio_passthrough=False,
        )
@ -105,13 +100,13 @@ def create_tts_service(user_config, audio_config: "AudioConfig"):
    if user_config.tts.provider == ServiceProviders.DEEPGRAM.value:
        return DeepgramTTSService(
            api_key=user_config.tts.api_key,
-            voice=user_config.tts.voice.value,
+            voice=user_config.tts.voice,
            text_filters=[xml_function_tag_filter],
        )
    elif user_config.tts.provider == ServiceProviders.OPENAI.value:
        return OpenAITTSService(
            api_key=user_config.tts.api_key,
-            model=user_config.tts.model.value,
+            model=user_config.tts.model,
            text_filters=[xml_function_tag_filter],
        )
    elif user_config.tts.provider == ServiceProviders.ELEVENLABS.value:
@ -120,12 +115,11 @@ def create_tts_service(user_config, audio_config: "AudioConfig"):
            voice_id = user_config.tts.voice.split(" - ")[1]
        except IndexError:
            voice_id = user_config.tts.voice
-
        return ElevenLabsTTSService(
            reconnect_on_error=False,
            api_key=user_config.tts.api_key,
            voice_id=voice_id,
-            model=user_config.tts.model.value,
+            model=user_config.tts.model,
            params=ElevenLabsTTSService.InputParams(
                stability=0.8, speed=user_config.tts.speed, similarity_boost=0.75
            ),
@ -134,12 +128,11 @@ def create_tts_service(user_config, audio_config: "AudioConfig"):
    elif user_config.tts.provider == ServiceProviders.DOGRAH.value:
        # Convert HTTP URL to WebSocket URL for TTS
        base_url = MPS_API_URL.replace("http://", "ws://").replace("https://", "wss://")
-        # Handle both enum and string values for model and voice
        return DograhTTSService(
            base_url=base_url,
            api_key=user_config.tts.api_key,
-            model=user_config.tts.model.value,
-            voice=user_config.tts.voice.value,
+            model=user_config.tts.model,
+            voice=user_config.tts.voice,
            text_filters=[xml_function_tag_filter],
        )
    elif user_config.tts.provider == ServiceProviders.SARVAM.value:
@ -158,16 +151,13 @@ def create_tts_service(user_config, audio_config: "AudioConfig"):
            "te-IN": Language.TE,
        }
        language = getattr(user_config.tts, "language", None)
-        language_value = language.value if hasattr(language, "value") else language
-        pipecat_language = language_mapping.get(language_value, Language.HI)
-
-        voice = getattr(user_config.tts, "voice", None)
-        voice_value = voice.value if hasattr(voice, "value") else (voice or "anushka")
+        pipecat_language = language_mapping.get(language, Language.HI)

+        voice = getattr(user_config.tts, "voice", None) or "anushka"
        return SarvamTTSService(
            api_key=user_config.tts.api_key,
-            model=user_config.tts.model.value,
-            voice_id=voice_value,
+            model=user_config.tts.model,
+            voice_id=voice,
            params=SarvamTTSService.InputParams(language=pipecat_language),
            text_filters=[xml_function_tag_filter],
        )
@ -179,17 +169,12 @@ def create_tts_service(user_config, audio_config: "AudioConfig"):

 def create_llm_service(user_config):
    """Create and return appropriate LLM service based on user configuration"""
-    # Handle both enum and string values for model
-    model_value = (
-        user_config.llm.model.value
-        if hasattr(user_config.llm.model, "value")
-        else user_config.llm.model
-    )
+    model = user_config.llm.model
    if user_config.llm.provider == ServiceProviders.OPENAI.value:
-        if "gpt-5" in model_value:
+        if "gpt-5" in model:
            return OpenAILLMService(
                api_key=user_config.llm.api_key,
-                model=model_value,
+                model=model,
                params=OpenAILLMService.InputParams(
                    reasoning_effort="minimal", verbosity="low"
                ),
@ -197,16 +182,16 @@ def create_llm_service(user_config):
        else:
            return OpenAILLMService(
                api_key=user_config.llm.api_key,
-                model=model_value,
+                model=model,
                params=OpenAILLMService.InputParams(temperature=0.1),
            )
    elif user_config.llm.provider == ServiceProviders.GROQ.value:
        print(
-            f"Creating Groq LLM service with API key: {user_config.llm.api_key} and model: {model_value}"
+            f"Creating Groq LLM service with API key: {user_config.llm.api_key} and model: {model}"
        )
        return GroqLLMService(
            api_key=user_config.llm.api_key,
-            model=model_value,
+            model=model,
            params=OpenAILLMService.InputParams(temperature=0.1),
        )
    elif user_config.llm.provider == ServiceProviders.GOOGLE.value:
@ -214,21 +199,21 @@ def create_llm_service(user_config):
        # NOT_GIVEN sentinels that break Pydantic validation in GoogleLLMService.
        return GoogleLLMService(
            api_key=user_config.llm.api_key,
-            model=model_value,
+            model=model,
            params=GoogleLLMService.InputParams(temperature=0.1),
        )
    elif user_config.llm.provider == ServiceProviders.AZURE.value:
        return AzureLLMService(
            api_key=user_config.llm.api_key,
            endpoint=user_config.llm.endpoint,
-            model=model_value,  # Azure uses deployment name as model
+            model=model,  # Azure uses deployment name as model
            params=AzureLLMService.InputParams(temperature=0.1),
        )
    elif user_config.llm.provider == ServiceProviders.DOGRAH.value:
        return DograhLLMService(
            base_url=f"{MPS_API_URL}/api/v1/llm",
            api_key=user_config.llm.api_key,
-            model=model_value,
+            model=model,
        )
    else:
        raise HTTPException(status_code=400, detail="Invalid LLM provider")
--- a/ui/src/components/ServiceConfiguration.tsx
+++ b/ui/src/components/ServiceConfiguration.tsx
@ -383,11 +383,12 @@ export default function ServiceConfiguration() {
            ? providerSchema.$defs[schema.$ref.split('/').pop() || '']
            : schema;

-        // Use VoiceSelector for voice field in TTS service (except Sarvam which uses enum)
+        // Use VoiceSelector for voice field in TTS service (except Sarvam which uses predefined options)
        if (service === "tts" && field === "voice") {
            const currentProvider = serviceProviders.tts;
-            // Sarvam uses enum-based voice selection, not VoiceSelector
-            if (currentProvider !== "sarvam" && !actualSchema?.enum) {
+            // Sarvam uses predefined voice options, not VoiceSelector
+            const hasVoiceOptions = actualSchema?.enum || actualSchema?.examples;
+            if (currentProvider !== "sarvam" && !hasVoiceOptions) {
                return (
                    <VoiceSelector
                        provider={currentProvider}
@ -478,7 +479,9 @@ export default function ServiceConfiguration() {
            );
        }

-        if (actualSchema?.enum) {
+        // Handle fields with enum or examples (dropdown options)
+        const dropdownOptions = actualSchema?.enum || actualSchema?.examples;
+        if (dropdownOptions && dropdownOptions.length > 0) {
            // Use friendly display names for language and voice fields
            const getDisplayName = (value: string) => {
                if (field === "language") {
@ -504,7 +507,7 @@ export default function ServiceConfiguration() {
                        <SelectValue placeholder={`Select ${field}`} />
                    </SelectTrigger>
                    <SelectContent>
-                        {actualSchema.enum.map((value: string) => (
+                        {dropdownOptions.map((value: string) => (
                            <SelectItem key={value} value={value}>
                                {getDisplayName(value)}
                            </SelectItem>