feat: add MiniMax provider support (Chat + TTS) (#309)

* feat: add MiniMax provider support (Chat + TTS) - Add MiniMax LLM provider using OpenAI-compatible API - Models: MiniMax-M2.7, MiniMax-M2.7-highspeed - Default base URL: https://api.minimax.io/v1 - Uses MINIMAX_API_KEY for authentication - Add MiniMax TTS provider using Pipecat's MiniMaxHttpTTSService - Models: speech-2.8-hd (default), speech-2.8-turbo - 6 built-in voices - Requires group_id configuration - Add unit tests for both providers * fix(minimax): validator, temperature, session cleanup, reasoning filter - check_validity.py: wire MiniMax into _validator_map and enforce group_id at save time. Without this, saving a config with a valid key was rejected. - registry.py: surface temperature on the LLM config (gt=0; MiniMax rejects 0) and base_url on the TTS config - service_factory.py: * Plumb temperature through create_llm_service * Normalize TTS base_url to include /t2a_v2 — pipecat appends only ?GroupId=... to the URL. * Use the new MiniMaxLLMService (from pipecat) to strip <think>...</think> reasoning that MiniMax-M2.7 emits inline in delta.content (otherwise it leaks straight to TTS). * Use MiniMaxOwnedSessionTTSService so the per-instance aiohttp session gets closed in cleanup() instead of leaking sockets/FDs. - minimax_tts.py: small wrapper around MiniMaxHttpTTSService that owns the session it was handed (pipecat's caller-owns-session API conflicts with the ftory's per-instance pattern). - pipecat submodule: bumps to a commit that adds MiniMaxLLMService — a thin OpenAILLMService subclass with the streaming <think> filter (mirrors NvidiaLLMService's pattern for NIM reasoning models). - Tests updated/added for all of the above. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> --------- Co-authored-by: octo-patch <octo-patch@github.com> Co-authored-by: Sabiha Khan <sabihak89@gmail.com>
2026-06-28 08:49:42 +02:00 · 2026-05-22 15:39:41 +08:00 · 2026-05-22 15:39:41 +08:00 · 0e0d3136ca
commit 0e0d3136ca
parent 38c2003734
6 changed files with 291 additions and 1 deletions
--- a/api/services/configuration/check_validity.py
+++ b/api/services/configuration/check_validity.py
@ -53,6 +53,7 @@ class UserConfigurationValidator:
            ServiceProviders.ASSEMBLYAI.value: self._check_assemblyai_api_key,
            ServiceProviders.GLADIA.value: self._check_gladia_api_key,
            ServiceProviders.RIME.value: self._check_rime_api_key,
+            ServiceProviders.MINIMAX.value: self._check_minimax_api_key,
        }

    async def validate(
@ -147,6 +148,19 @@ class UserConfigurationValidator:
                return [{"model": service_name, "message": str(e)}]
            return []

+        # MiniMax TTS requires a group_id alongside the API key.
+        # LLM configs don't expose group_id, so only check when the field exists.
+        if provider == ServiceProviders.MINIMAX.value and hasattr(
+            service_config, "group_id"
+        ):
+            if not getattr(service_config, "group_id", None):
+                return [
+                    {
+                        "model": service_name,
+                        "message": "group_id is required for MiniMax TTS",
+                    }
+                ]
+
        api_key = service_config.api_key

        try:
@ -253,3 +267,8 @@ class UserConfigurationValidator:

    def _check_rime_api_key(self, model: str, api_key: str) -> bool:
        return True
+
+    def _check_minimax_api_key(self, model: str, api_key: str) -> bool:
+        # MiniMax doesn't publish a cheap key-validation endpoint; trust the key
+        # at save time and surface auth errors at first call (same as Rime/Sarvam).
+        return True
--- a/api/services/configuration/registry.py
+++ b/api/services/configuration/registry.py
@ -32,6 +32,7 @@ class ServiceProviders(str, Enum):
    ASSEMBLYAI = "assemblyai"
    GLADIA = "gladia"
    RIME = "rime"
+    MINIMAX = "minimax"
    OPENAI_REALTIME = "openai_realtime"
    GOOGLE_REALTIME = "google_realtime"
    GOOGLE_VERTEX_REALTIME = "google_vertex_realtime"
@ -52,6 +53,7 @@ class BaseServiceConfiguration(BaseModel):
        ServiceProviders.ASSEMBLYAI,
        ServiceProviders.GLADIA,
        ServiceProviders.RIME,
+        ServiceProviders.MINIMAX,
        ServiceProviders.OPENAI_REALTIME,
        ServiceProviders.GOOGLE_REALTIME,
        ServiceProviders.GOOGLE_VERTEX_REALTIME,
@ -321,6 +323,32 @@ class SpeachesLLMConfiguration(BaseLLMConfiguration):
    )


+MINIMAX_MODELS = [
+    "MiniMax-M2.7",
+    "MiniMax-M2.7-highspeed",
+]
+
+
+@register_llm
+class MiniMaxLLMConfiguration(BaseLLMConfiguration):
+    provider: Literal[ServiceProviders.MINIMAX] = ServiceProviders.MINIMAX
+    model: str = Field(
+        default="MiniMax-M2.7",
+        description="MiniMax chat model.",
+        json_schema_extra={"examples": MINIMAX_MODELS, "allow_custom_input": True},
+    )
+    base_url: str = Field(
+        default="https://api.minimax.io/v1",
+        description="MiniMax OpenAI-compatible API endpoint.",
+    )
+    temperature: float = Field(
+        default=1.0,
+        gt=0.0,
+        le=2.0,
+        description="Sampling temperature. MiniMax requires > 0.",
+    )
+
+
 OPENAI_REALTIME_MODELS = ["gpt-realtime-2"]
 OPENAI_REALTIME_VOICES = [
    "alloy",
@ -494,6 +522,7 @@ LLMConfig = Annotated[
        DograhLLMService,
        AWSBedrockLLMConfiguration,
        SpeachesLLMConfiguration,
+        MiniMaxLLMConfiguration,
    ],
    Field(discriminator="provider"),
 ]
@ -783,6 +812,47 @@ class SpeachesTTSConfiguration(BaseTTSConfiguration):
    )


+MINIMAX_TTS_MODELS = ["speech-2.8-hd", "speech-2.8-turbo"]
+MINIMAX_TTS_VOICES = [
+    "English_Graceful_Lady",
+    "English_Insightful_Speaker",
+    "English_radiant_girl",
+    "English_Persuasive_Man",
+    "English_Lucky_Robot",
+    "English_expressive_narrator",
+]
+
+
+@register_tts
+class MiniMaxTTSConfiguration(BaseTTSConfiguration):
+    provider: Literal[ServiceProviders.MINIMAX] = ServiceProviders.MINIMAX
+    model: str = Field(
+        default="speech-2.8-hd",
+        description="MiniMax TTS model.",
+        json_schema_extra={"examples": MINIMAX_TTS_MODELS},
+    )
+    voice: str = Field(
+        default="English_Graceful_Lady",
+        description="MiniMax voice ID.",
+        json_schema_extra={"examples": MINIMAX_TTS_VOICES, "allow_custom_input": True},
+    )
+    base_url: str = Field(
+        default="https://api.minimax.io/v1/t2a_v2",
+        description=(
+            "MiniMax TTS API endpoint (must include the /v1/t2a_v2 path). "
+            "Defaults to the global endpoint; override with "
+            "https://api.minimaxi.chat/v1/t2a_v2 (mainland China) or "
+            "https://api-uw.minimax.io/v1/t2a_v2 (US-West)."
+        ),
+    )
+    speed: float = Field(
+        default=1.0, ge=0.5, le=2.0, description="Speech speed (0.5 to 2.0)."
+    )
+    group_id: str = Field(
+        description="MiniMax Group ID (found in your MiniMax dashboard under Account → Group).",
+    )
+
+
 TTSConfig = Annotated[
    Union[
        DeepgramTTSConfiguration,
@ -794,6 +864,7 @@ TTSConfig = Annotated[
        CambTTSConfiguration,
        RimeTTSConfiguration,
        SpeachesTTSConfiguration,
+        MiniMaxTTSConfiguration,
    ],
    Field(discriminator="provider"),
 ]