From e83f3a36d2b422171113ecdf6432d8aaf9c63789 Mon Sep 17 00:00:00 2001
From: Abhishek Kumar <abhishek@a6k.me>
Date: Fri, 26 Dec 2025 16:00:02 +0530
Subject: [PATCH] fix: change type definition from enum to str for consistency

---
 api/services/auth/depends.py               |  14 +-
 api/services/configuration/registry.py     | 207 +++++++++++----------
 api/services/pipecat/service_factory.py    |  63 +++----
 ui/src/components/ServiceConfiguration.tsx |  13 +-
 4 files changed, 147 insertions(+), 150 deletions(-)

diff --git a/api/services/auth/depends.py b/api/services/auth/depends.py
index d995ade..6d38b33 100644
--- a/api/services/auth/depends.py
+++ b/api/services/auth/depends.py
@@ -10,11 +10,7 @@ from api.db import db_client
 from api.db.models import UserModel
 from api.schemas.user_configuration import UserConfiguration
 from api.services.auth.stack_auth import stackauth
-from api.services.configuration.registry import (
-    DograhSTTModel,
-    DograhTTSModel,
-    ServiceProviders,
-)
+from api.services.configuration.registry import ServiceProviders
 
 
 async def get_user(
@@ -242,18 +238,18 @@ async def create_user_configuration_with_mps_key(
                     "llm": {
                         "provider": ServiceProviders.DOGRAH.value,
                         "api_key": service_key,
-                        "model": "default",  # Default model
+                        "model": "default",
                     },
                     "tts": {
                         "provider": ServiceProviders.DOGRAH.value,
                         "api_key": service_key,
-                        "model": DograhTTSModel.DEFAULT.value,  # Default model
-                        "voice": "default",  # Default voice
+                        "model": "default",
+                        "voice": "default",
                     },
                     "stt": {
                         "provider": ServiceProviders.DOGRAH.value,
                         "api_key": service_key,
-                        "model": DograhSTTModel.DEFAULT.value,  # Default model
+                        "model": "default",
                     },
                 }
                 user_config = UserConfiguration(**configuration)
diff --git a/api/services/configuration/registry.py b/api/services/configuration/registry.py
index c6f0324..6db0131 100644
--- a/api/services/configuration/registry.py
+++ b/api/services/configuration/registry.py
@@ -95,8 +95,21 @@ def register_stt(cls: Type[BaseSTTConfiguration]):
 ###################################################### LLM ########################################################################
 
 # Suggested models for each provider (used for UI dropdown)
-OPENAI_MODELS = ["gpt-4.1", "gpt-4.1-mini", "gpt-4.1-nano", "gpt-5", "gpt-5-mini", "gpt-5-nano", "gpt-3.5-turbo"]
-GOOGLE_MODELS = ["gemini-2.0-flash", "gemini-2.0-flash-lite", "gemini-2.5-flash", "gemini-2.5-flash-lite"]
+OPENAI_MODELS = [
+    "gpt-4.1",
+    "gpt-4.1-mini",
+    "gpt-4.1-nano",
+    "gpt-5",
+    "gpt-5-mini",
+    "gpt-5-nano",
+    "gpt-3.5-turbo",
+]
+GOOGLE_MODELS = [
+    "gemini-2.0-flash",
+    "gemini-2.0-flash-lite",
+    "gemini-2.5-flash",
+    "gemini-2.5-flash-lite",
+]
 GROQ_MODELS = [
     "llama-3.3-70b-versatile",
     "deepseek-r1-distill-llama-70b",
@@ -121,21 +134,27 @@ class OpenAILLMService(BaseLLMConfiguration):
 @register_llm
 class GoogleLLMService(BaseLLMConfiguration):
     provider: Literal[ServiceProviders.GOOGLE] = ServiceProviders.GOOGLE
-    model: str = Field(default="gemini-2.0-flash", json_schema_extra={"examples": GOOGLE_MODELS})
+    model: str = Field(
+        default="gemini-2.0-flash", json_schema_extra={"examples": GOOGLE_MODELS}
+    )
     api_key: str
 
 
 @register_llm
 class GroqLLMService(BaseLLMConfiguration):
     provider: Literal[ServiceProviders.GROQ] = ServiceProviders.GROQ
-    model: str = Field(default="llama-3.3-70b-versatile", json_schema_extra={"examples": GROQ_MODELS})
+    model: str = Field(
+        default="llama-3.3-70b-versatile", json_schema_extra={"examples": GROQ_MODELS}
+    )
     api_key: str
 
 
 @register_llm
 class AzureLLMService(BaseLLMConfiguration):
     provider: Literal[ServiceProviders.AZURE] = ServiceProviders.AZURE
-    model: str = Field(default="gpt-4.1-mini", json_schema_extra={"examples": AZURE_MODELS})
+    model: str = Field(
+        default="gpt-4.1-mini", json_schema_extra={"examples": AZURE_MODELS}
+    )
     api_key: str
     endpoint: str
 
@@ -143,7 +162,9 @@ class AzureLLMService(BaseLLMConfiguration):
 @register_llm
 class DograhLLMService(BaseLLMConfiguration):
     provider: Literal[ServiceProviders.DOGRAH] = ServiceProviders.DOGRAH
-    model: str = Field(default="default", json_schema_extra={"examples": DOGRAH_LLM_MODELS})
+    model: str = Field(
+        default="default", json_schema_extra={"examples": DOGRAH_LLM_MODELS}
+    )
     api_key: str
 
 
@@ -181,8 +202,7 @@ class DeepgramTTSConfiguration(BaseServiceConfiguration):
             return "aura-2"
 
 
-class ElevenlabsModel(str, Enum):
-    FLASH_2 = "eleven_flash_v2_5"
+ELEVENLABS_TTS_MODELS = ["eleven_flash_v2_5"]
 
 
 @register_tts
@@ -190,72 +210,63 @@ class ElevenlabsTTSConfiguration(BaseServiceConfiguration):
     provider: Literal[ServiceProviders.ELEVENLABS] = ServiceProviders.ELEVENLABS
     voice: str = "21m00Tcm4TlvDq8ikWAM"  # Rachel voice ID
     speed: float = Field(default=1.0, ge=0.1, le=2.0, description="Speed of the voice")
-    model: ElevenlabsModel = ElevenlabsModel.FLASH_2
+    model: str = Field(
+        default="eleven_flash_v2_5",
+        json_schema_extra={"examples": ELEVENLABS_TTS_MODELS},
+    )
     api_key: str
 
 
-class OpenAITTSModel(str, Enum):
-    GPT_4o_MINI = "gpt-4o-mini-tts"
+OPENAI_TTS_MODELS = ["gpt-4o-mini-tts"]
 
 
 @register_tts
 class OpenAITTSService(BaseTTSConfiguration):
     provider: Literal[ServiceProviders.OPENAI] = ServiceProviders.OPENAI
-    model: OpenAITTSModel = OpenAITTSModel.GPT_4o_MINI
+    model: str = Field(
+        default="gpt-4o-mini-tts", json_schema_extra={"examples": OPENAI_TTS_MODELS}
+    )
     voice: str = "alloy"
     api_key: str
 
 
-class DograhTTSModel(str, Enum):
-    DEFAULT = "default"
+DOGRAH_TTS_MODELS = ["default"]
 
 
 @register_tts
 class DograhTTSService(BaseTTSConfiguration):
     provider: Literal[ServiceProviders.DOGRAH] = ServiceProviders.DOGRAH
-    model: DograhTTSModel = DograhTTSModel.DEFAULT
+    model: str = Field(
+        default="default", json_schema_extra={"examples": DOGRAH_TTS_MODELS}
+    )
     voice: str = "default"
     api_key: str
 
 
-class SarvamTTSModel(str, Enum):
-    BULBUL_V2 = "bulbul:v2"
-    BULBUL_V3 = "bulbul:v3"
-
-
-class SarvamVoice(str, Enum):
-    # Female voices
-    ANUSHKA = "anushka"
-    MANISHA = "manisha"
-    VIDYA = "vidya"
-    ARYA = "arya"
-    # Male voices
-    ABHILASH = "abhilash"
-    KARUN = "karun"
-    HITESH = "hitesh"
-
-
-class SarvamLanguage(str, Enum):
-    BENGALI = "bn-IN"
-    ENGLISH_INDIA = "en-IN"
-    GUJARATI = "gu-IN"
-    HINDI = "hi-IN"
-    KANNADA = "kn-IN"
-    MALAYALAM = "ml-IN"
-    MARATHI = "mr-IN"
-    ODIA = "od-IN"
-    PUNJABI = "pa-IN"
-    TAMIL = "ta-IN"
-    TELUGU = "te-IN"
-    ASSAMESE = "as-IN"
+SARVAM_TTS_MODELS = ["bulbul:v2", "bulbul:v3"]
+SARVAM_VOICES = ["anushka", "manisha", "vidya", "arya", "abhilash", "karun", "hitesh"]
+SARVAM_LANGUAGES = [
+    "bn-IN",
+    "en-IN",
+    "gu-IN",
+    "hi-IN",
+    "kn-IN",
+    "ml-IN",
+    "mr-IN",
+    "od-IN",
+    "pa-IN",
+    "ta-IN",
+    "te-IN",
+    "as-IN",
+]
 
 
 # @register_tts
 # class SarvamTTSConfiguration(BaseTTSConfiguration):
 #     provider: Literal[ServiceProviders.SARVAM] = ServiceProviders.SARVAM
-#     model: SarvamTTSModel = SarvamTTSModel.BULBUL_V2
-#     voice: SarvamVoice = SarvamVoice.ANUSHKA
-#     language: SarvamLanguage = SarvamLanguage.HINDI
+#     model: str = Field(default="bulbul:v2", json_schema_extra={"examples": SARVAM_TTS_MODELS})
+#     voice: str = Field(default="anushka", json_schema_extra={"examples": SARVAM_VOICES})
+#     language: str = Field(default="hi-IN", json_schema_extra={"examples": SARVAM_LANGUAGES})
 #     api_key: str
 
 
@@ -273,49 +284,51 @@ TTSConfig = Annotated[
 ###################################################### STT ########################################################################
 
 
-class DeepgramSTTModel(str, Enum):
-    NOVA_3_GENERAL = "nova-3-general"
-
-
-class DeepgramLanguage(str, Enum):
-    MULTI = "multi"
-    ENGLISH = "en"
-    ENGLISH_US = "en-US"
-    ENGLISH_GB = "en-GB"
-    ENGLISH_AU = "en-AU"
-    ENGLISH_IN = "en-IN"
-    SPANISH = "es"
-    SPANISH_LATAM = "es-419"
-    FRENCH = "fr"
-    FRENCH_CA = "fr-CA"
-    GERMAN = "de"
-    ITALIAN = "it"
-    PORTUGUESE = "pt"
-    PORTUGUESE_BR = "pt-BR"
-    DUTCH = "nl"
-    HINDI = "hi"
-    JAPANESE = "ja"
-    KOREAN = "ko"
-    CHINESE_SIMPLIFIED = "zh-CN"
-    CHINESE_TRADITIONAL = "zh-TW"
-    RUSSIAN = "ru"
-    POLISH = "pl"
-    TURKISH = "tr"
-    UKRAINIAN = "uk"
-    VIETNAMESE = "vi"
-    SWEDISH = "sv"
-    DANISH = "da"
-    NORWEGIAN = "no"
-    FINNISH = "fi"
-    INDONESIAN = "id"
-    THAI = "th"
+DEEPGRAM_STT_MODELS = ["nova-3-general"]
+DEEPGRAM_LANGUAGES = [
+    "multi",
+    "en",
+    "en-US",
+    "en-GB",
+    "en-AU",
+    "en-IN",
+    "es",
+    "es-419",
+    "fr",
+    "fr-CA",
+    "de",
+    "it",
+    "pt",
+    "pt-BR",
+    "nl",
+    "hi",
+    "ja",
+    "ko",
+    "zh-CN",
+    "zh-TW",
+    "ru",
+    "pl",
+    "tr",
+    "uk",
+    "vi",
+    "sv",
+    "da",
+    "no",
+    "fi",
+    "id",
+    "th",
+]
 
 
 @register_stt
 class DeepgramSTTConfiguration(BaseSTTConfiguration):
     provider: Literal[ServiceProviders.DEEPGRAM] = ServiceProviders.DEEPGRAM
-    model: DeepgramSTTModel = DeepgramSTTModel.NOVA_3_GENERAL
-    language: DeepgramLanguage = DeepgramLanguage.MULTI
+    model: str = Field(
+        default="nova-3-general", json_schema_extra={"examples": DEEPGRAM_STT_MODELS}
+    )
+    language: str = Field(
+        default="multi", json_schema_extra={"examples": DEEPGRAM_LANGUAGES}
+    )
     api_key: str
 
 
@@ -325,40 +338,40 @@ class CartesiaSTTConfiguration(BaseSTTConfiguration):
     api_key: str
 
 
-class OpenAISTTModel(str, Enum):
-    GPT_4o_TRANSCRIBE = "gpt-4o-transcribe"
+OPENAI_STT_MODELS = ["gpt-4o-transcribe"]
 
 
 @register_stt
 class OpenAISTTConfiguration(BaseSTTConfiguration):
     provider: Literal[ServiceProviders.OPENAI] = ServiceProviders.OPENAI
-    model: OpenAISTTModel = OpenAISTTModel.GPT_4o_TRANSCRIBE
+    model: str = Field(
+        default="gpt-4o-transcribe", json_schema_extra={"examples": OPENAI_STT_MODELS}
+    )
     api_key: str
 
 
 # Dograh STT Service
-class DograhSTTModel(str, Enum):
-    DEFAULT = "default"
+DOGRAH_STT_MODELS = ["default"]
 
 
 @register_stt
 class DograhSTTService(BaseSTTConfiguration):
     provider: Literal[ServiceProviders.DOGRAH] = ServiceProviders.DOGRAH
-    model: DograhSTTModel = DograhSTTModel.DEFAULT
+    model: str = Field(
+        default="default", json_schema_extra={"examples": DOGRAH_STT_MODELS}
+    )
     api_key: str
 
 
 # Sarvam STT Service
-class SarvamSTTModel(str, Enum):
-    SAARIKA_V2_5 = "saarika:v2.5"
-    SAARAS_V2 = "saaras:v2"  # STT-Translate model (auto-detects language)
+SARVAM_STT_MODELS = ["saarika:v2.5", "saaras:v2"]
 
 
 # @register_stt
 # class SarvamSTTConfiguration(BaseSTTConfiguration):
 #     provider: Literal[ServiceProviders.SARVAM] = ServiceProviders.SARVAM
-#     model: SarvamSTTModel = SarvamSTTModel.SAARIKA_V2_5
-#     language: SarvamLanguage = SarvamLanguage.HINDI
+#     model: str = Field(default="saarika:v2.5", json_schema_extra={"examples": SARVAM_STT_MODELS})
+#     language: str = Field(default="hi-IN", json_schema_extra={"examples": SARVAM_LANGUAGES})
 #     api_key: str
 
 
diff --git a/api/services/pipecat/service_factory.py b/api/services/pipecat/service_factory.py
index 9869e78..948fbdd 100644
--- a/api/services/pipecat/service_factory.py
+++ b/api/services/pipecat/service_factory.py
@@ -30,12 +30,9 @@ def create_stt_service(user_config):
     """Create and return appropriate STT service based on user configuration"""
     if user_config.stt.provider == ServiceProviders.DEEPGRAM.value:
         # Use language from user config, defaulting to "multi" for multilingual support
-        language = getattr(user_config.stt, "language", None)
-        language_value = (
-            language.value if hasattr(language, "value") else (language or "multi")
-        )
+        language = getattr(user_config.stt, "language", None) or "multi"
         live_options = LiveOptions(
-            language=language_value, profanity_filter=False, endpointing=100
+            language=language, profanity_filter=False, endpointing=100
         )
         return DeepgramSTTService(
             live_options=live_options,
@@ -45,7 +42,7 @@ def create_stt_service(user_config):
     elif user_config.stt.provider == ServiceProviders.OPENAI.value:
         return OpenAISTTService(
             api_key=user_config.stt.api_key,
-            model=user_config.stt.model.value,
+            model=user_config.stt.model,
             audio_passthrough=False,  # Disable passthrough since audio is buffered separately
         )
     elif user_config.stt.provider == ServiceProviders.CARTESIA.value:
@@ -58,7 +55,7 @@ def create_stt_service(user_config):
         return DograhSTTService(
             base_url=base_url,
             api_key=user_config.stt.api_key,
-            model=user_config.stt.model.value,
+            model=user_config.stt.model,
             audio_passthrough=False,  # Disable passthrough since audio is buffered separately
         )
     elif user_config.stt.provider == ServiceProviders.SARVAM.value:
@@ -78,12 +75,10 @@ def create_stt_service(user_config):
             "as-IN": Language.AS_IN,
         }
         language = getattr(user_config.stt, "language", None)
-        language_value = language.value if hasattr(language, "value") else language
-        pipecat_language = language_mapping.get(language_value, Language.HI_IN)
-
+        pipecat_language = language_mapping.get(language, Language.HI_IN)
         return SarvamSTTService(
             api_key=user_config.stt.api_key,
-            model=user_config.stt.model.value,
+            model=user_config.stt.model,
             params=SarvamSTTService.InputParams(language=pipecat_language),
             audio_passthrough=False,
         )
@@ -105,13 +100,13 @@ def create_tts_service(user_config, audio_config: "AudioConfig"):
     if user_config.tts.provider == ServiceProviders.DEEPGRAM.value:
         return DeepgramTTSService(
             api_key=user_config.tts.api_key,
-            voice=user_config.tts.voice.value,
+            voice=user_config.tts.voice,
             text_filters=[xml_function_tag_filter],
         )
     elif user_config.tts.provider == ServiceProviders.OPENAI.value:
         return OpenAITTSService(
             api_key=user_config.tts.api_key,
-            model=user_config.tts.model.value,
+            model=user_config.tts.model,
             text_filters=[xml_function_tag_filter],
         )
     elif user_config.tts.provider == ServiceProviders.ELEVENLABS.value:
@@ -120,12 +115,11 @@ def create_tts_service(user_config, audio_config: "AudioConfig"):
             voice_id = user_config.tts.voice.split(" - ")[1]
         except IndexError:
             voice_id = user_config.tts.voice
-
         return ElevenLabsTTSService(
             reconnect_on_error=False,
             api_key=user_config.tts.api_key,
             voice_id=voice_id,
-            model=user_config.tts.model.value,
+            model=user_config.tts.model,
             params=ElevenLabsTTSService.InputParams(
                 stability=0.8, speed=user_config.tts.speed, similarity_boost=0.75
             ),
@@ -134,12 +128,11 @@ def create_tts_service(user_config, audio_config: "AudioConfig"):
     elif user_config.tts.provider == ServiceProviders.DOGRAH.value:
         # Convert HTTP URL to WebSocket URL for TTS
         base_url = MPS_API_URL.replace("http://", "ws://").replace("https://", "wss://")
-        # Handle both enum and string values for model and voice
         return DograhTTSService(
             base_url=base_url,
             api_key=user_config.tts.api_key,
-            model=user_config.tts.model.value,
-            voice=user_config.tts.voice.value,
+            model=user_config.tts.model,
+            voice=user_config.tts.voice,
             text_filters=[xml_function_tag_filter],
         )
     elif user_config.tts.provider == ServiceProviders.SARVAM.value:
@@ -158,16 +151,13 @@ def create_tts_service(user_config, audio_config: "AudioConfig"):
             "te-IN": Language.TE,
         }
         language = getattr(user_config.tts, "language", None)
-        language_value = language.value if hasattr(language, "value") else language
-        pipecat_language = language_mapping.get(language_value, Language.HI)
-
-        voice = getattr(user_config.tts, "voice", None)
-        voice_value = voice.value if hasattr(voice, "value") else (voice or "anushka")
+        pipecat_language = language_mapping.get(language, Language.HI)
 
+        voice = getattr(user_config.tts, "voice", None) or "anushka"
         return SarvamTTSService(
             api_key=user_config.tts.api_key,
-            model=user_config.tts.model.value,
-            voice_id=voice_value,
+            model=user_config.tts.model,
+            voice_id=voice,
             params=SarvamTTSService.InputParams(language=pipecat_language),
             text_filters=[xml_function_tag_filter],
         )
@@ -179,17 +169,12 @@ def create_tts_service(user_config, audio_config: "AudioConfig"):
 
 def create_llm_service(user_config):
     """Create and return appropriate LLM service based on user configuration"""
-    # Handle both enum and string values for model
-    model_value = (
-        user_config.llm.model.value
-        if hasattr(user_config.llm.model, "value")
-        else user_config.llm.model
-    )
+    model = user_config.llm.model
     if user_config.llm.provider == ServiceProviders.OPENAI.value:
-        if "gpt-5" in model_value:
+        if "gpt-5" in model:
             return OpenAILLMService(
                 api_key=user_config.llm.api_key,
-                model=model_value,
+                model=model,
                 params=OpenAILLMService.InputParams(
                     reasoning_effort="minimal", verbosity="low"
                 ),
@@ -197,16 +182,16 @@ def create_llm_service(user_config):
         else:
             return OpenAILLMService(
                 api_key=user_config.llm.api_key,
-                model=model_value,
+                model=model,
                 params=OpenAILLMService.InputParams(temperature=0.1),
             )
     elif user_config.llm.provider == ServiceProviders.GROQ.value:
         print(
-            f"Creating Groq LLM service with API key: {user_config.llm.api_key} and model: {model_value}"
+            f"Creating Groq LLM service with API key: {user_config.llm.api_key} and model: {model}"
         )
         return GroqLLMService(
             api_key=user_config.llm.api_key,
-            model=model_value,
+            model=model,
             params=OpenAILLMService.InputParams(temperature=0.1),
         )
     elif user_config.llm.provider == ServiceProviders.GOOGLE.value:
@@ -214,21 +199,21 @@ def create_llm_service(user_config):
         # NOT_GIVEN sentinels that break Pydantic validation in GoogleLLMService.
         return GoogleLLMService(
             api_key=user_config.llm.api_key,
-            model=model_value,
+            model=model,
             params=GoogleLLMService.InputParams(temperature=0.1),
         )
     elif user_config.llm.provider == ServiceProviders.AZURE.value:
         return AzureLLMService(
             api_key=user_config.llm.api_key,
             endpoint=user_config.llm.endpoint,
-            model=model_value,  # Azure uses deployment name as model
+            model=model,  # Azure uses deployment name as model
             params=AzureLLMService.InputParams(temperature=0.1),
         )
     elif user_config.llm.provider == ServiceProviders.DOGRAH.value:
         return DograhLLMService(
             base_url=f"{MPS_API_URL}/api/v1/llm",
             api_key=user_config.llm.api_key,
-            model=model_value,
+            model=model,
         )
     else:
         raise HTTPException(status_code=400, detail="Invalid LLM provider")
diff --git a/ui/src/components/ServiceConfiguration.tsx b/ui/src/components/ServiceConfiguration.tsx
index 63c37a8..c2d88ff 100644
--- a/ui/src/components/ServiceConfiguration.tsx
+++ b/ui/src/components/ServiceConfiguration.tsx
@@ -383,11 +383,12 @@ export default function ServiceConfiguration() {
             ? providerSchema.$defs[schema.$ref.split('/').pop() || '']
             : schema;
 
-        // Use VoiceSelector for voice field in TTS service (except Sarvam which uses enum)
+        // Use VoiceSelector for voice field in TTS service (except Sarvam which uses predefined options)
         if (service === "tts" && field === "voice") {
             const currentProvider = serviceProviders.tts;
-            // Sarvam uses enum-based voice selection, not VoiceSelector
-            if (currentProvider !== "sarvam" && !actualSchema?.enum) {
+            // Sarvam uses predefined voice options, not VoiceSelector
+            const hasVoiceOptions = actualSchema?.enum || actualSchema?.examples;
+            if (currentProvider !== "sarvam" && !hasVoiceOptions) {
                 return (
                     <VoiceSelector
                         provider={currentProvider}
@@ -478,7 +479,9 @@ export default function ServiceConfiguration() {
             );
         }
 
-        if (actualSchema?.enum) {
+        // Handle fields with enum or examples (dropdown options)
+        const dropdownOptions = actualSchema?.enum || actualSchema?.examples;
+        if (dropdownOptions && dropdownOptions.length > 0) {
             // Use friendly display names for language and voice fields
             const getDisplayName = (value: string) => {
                 if (field === "language") {
@@ -504,7 +507,7 @@ export default function ServiceConfiguration() {
                         <SelectValue placeholder={`Select ${field}`} />
                     </SelectTrigger>
                     <SelectContent>
-                        {actualSchema.enum.map((value: string) => (
+                        {dropdownOptions.map((value: string) => (
                             <SelectItem key={value} value={value}>
                                 {getDisplayName(value)}
                             </SelectItem>