feat: add MiniMax provider support (Chat + TTS) (#309)

* feat: add MiniMax provider support (Chat + TTS)

- Add MiniMax LLM provider using OpenAI-compatible API
  - Models: MiniMax-M2.7, MiniMax-M2.7-highspeed
  - Default base URL: https://api.minimax.io/v1
  - Uses MINIMAX_API_KEY for authentication
- Add MiniMax TTS provider using Pipecat's MiniMaxHttpTTSService
  - Models: speech-2.8-hd (default), speech-2.8-turbo
  - 6 built-in voices
  - Requires group_id configuration
- Add unit tests for both providers

* fix(minimax): validator, temperature, session cleanup, reasoning filter
  - check_validity.py: wire MiniMax into _validator_map and enforce
    group_id at save time. Without this, saving a config with a valid
    key was rejected.
  - registry.py: surface temperature on the LLM config (gt=0; MiniMax
    rejects 0) and base_url on the TTS config
  - service_factory.py:
    * Plumb temperature through create_llm_service
    * Normalize TTS base_url to include /t2a_v2 — pipecat appends only
      ?GroupId=... to the URL.
    * Use the new MiniMaxLLMService (from pipecat) to strip
      <think>...</think> reasoning that MiniMax-M2.7 emits inline in
      delta.content (otherwise it leaks straight to TTS).
    * Use MiniMaxOwnedSessionTTSService so the per-instance aiohttp
      session gets closed in cleanup() instead of leaking sockets/FDs.
  - minimax_tts.py: small wrapper around MiniMaxHttpTTSService that owns
    the session it was handed (pipecat's caller-owns-session API
    conflicts with the ftory's per-instance pattern).
  - pipecat submodule: bumps to a commit that adds MiniMaxLLMService — a
    thin OpenAILLMService subclass with the streaming <think> filter
    (mirrors NvidiaLLMService's pattern for NIM reasoning models).
  - Tests updated/added for all of the above.

  Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

---------

Co-authored-by: octo-patch <octo-patch@github.com>
Co-authored-by: Sabiha Khan <sabihak89@gmail.com>
This commit is contained in:
Octopus 2026-05-22 15:39:41 +08:00 committed by GitHub
parent 38c2003734
commit 0e0d3136ca
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 291 additions and 1 deletions

View file

@ -53,6 +53,7 @@ class UserConfigurationValidator:
ServiceProviders.ASSEMBLYAI.value: self._check_assemblyai_api_key,
ServiceProviders.GLADIA.value: self._check_gladia_api_key,
ServiceProviders.RIME.value: self._check_rime_api_key,
ServiceProviders.MINIMAX.value: self._check_minimax_api_key,
}
async def validate(
@ -147,6 +148,19 @@ class UserConfigurationValidator:
return [{"model": service_name, "message": str(e)}]
return []
# MiniMax TTS requires a group_id alongside the API key.
# LLM configs don't expose group_id, so only check when the field exists.
if provider == ServiceProviders.MINIMAX.value and hasattr(
service_config, "group_id"
):
if not getattr(service_config, "group_id", None):
return [
{
"model": service_name,
"message": "group_id is required for MiniMax TTS",
}
]
api_key = service_config.api_key
try:
@ -253,3 +267,8 @@ class UserConfigurationValidator:
def _check_rime_api_key(self, model: str, api_key: str) -> bool:
return True
def _check_minimax_api_key(self, model: str, api_key: str) -> bool:
# MiniMax doesn't publish a cheap key-validation endpoint; trust the key
# at save time and surface auth errors at first call (same as Rime/Sarvam).
return True

View file

@ -32,6 +32,7 @@ class ServiceProviders(str, Enum):
ASSEMBLYAI = "assemblyai"
GLADIA = "gladia"
RIME = "rime"
MINIMAX = "minimax"
OPENAI_REALTIME = "openai_realtime"
GOOGLE_REALTIME = "google_realtime"
GOOGLE_VERTEX_REALTIME = "google_vertex_realtime"
@ -52,6 +53,7 @@ class BaseServiceConfiguration(BaseModel):
ServiceProviders.ASSEMBLYAI,
ServiceProviders.GLADIA,
ServiceProviders.RIME,
ServiceProviders.MINIMAX,
ServiceProviders.OPENAI_REALTIME,
ServiceProviders.GOOGLE_REALTIME,
ServiceProviders.GOOGLE_VERTEX_REALTIME,
@ -321,6 +323,32 @@ class SpeachesLLMConfiguration(BaseLLMConfiguration):
)
MINIMAX_MODELS = [
"MiniMax-M2.7",
"MiniMax-M2.7-highspeed",
]
@register_llm
class MiniMaxLLMConfiguration(BaseLLMConfiguration):
provider: Literal[ServiceProviders.MINIMAX] = ServiceProviders.MINIMAX
model: str = Field(
default="MiniMax-M2.7",
description="MiniMax chat model.",
json_schema_extra={"examples": MINIMAX_MODELS, "allow_custom_input": True},
)
base_url: str = Field(
default="https://api.minimax.io/v1",
description="MiniMax OpenAI-compatible API endpoint.",
)
temperature: float = Field(
default=1.0,
gt=0.0,
le=2.0,
description="Sampling temperature. MiniMax requires > 0.",
)
OPENAI_REALTIME_MODELS = ["gpt-realtime-2"]
OPENAI_REALTIME_VOICES = [
"alloy",
@ -494,6 +522,7 @@ LLMConfig = Annotated[
DograhLLMService,
AWSBedrockLLMConfiguration,
SpeachesLLMConfiguration,
MiniMaxLLMConfiguration,
],
Field(discriminator="provider"),
]
@ -783,6 +812,47 @@ class SpeachesTTSConfiguration(BaseTTSConfiguration):
)
MINIMAX_TTS_MODELS = ["speech-2.8-hd", "speech-2.8-turbo"]
MINIMAX_TTS_VOICES = [
"English_Graceful_Lady",
"English_Insightful_Speaker",
"English_radiant_girl",
"English_Persuasive_Man",
"English_Lucky_Robot",
"English_expressive_narrator",
]
@register_tts
class MiniMaxTTSConfiguration(BaseTTSConfiguration):
provider: Literal[ServiceProviders.MINIMAX] = ServiceProviders.MINIMAX
model: str = Field(
default="speech-2.8-hd",
description="MiniMax TTS model.",
json_schema_extra={"examples": MINIMAX_TTS_MODELS},
)
voice: str = Field(
default="English_Graceful_Lady",
description="MiniMax voice ID.",
json_schema_extra={"examples": MINIMAX_TTS_VOICES, "allow_custom_input": True},
)
base_url: str = Field(
default="https://api.minimax.io/v1/t2a_v2",
description=(
"MiniMax TTS API endpoint (must include the /v1/t2a_v2 path). "
"Defaults to the global endpoint; override with "
"https://api.minimaxi.chat/v1/t2a_v2 (mainland China) or "
"https://api-uw.minimax.io/v1/t2a_v2 (US-West)."
),
)
speed: float = Field(
default=1.0, ge=0.5, le=2.0, description="Speech speed (0.5 to 2.0)."
)
group_id: str = Field(
description="MiniMax Group ID (found in your MiniMax dashboard under Account → Group).",
)
TTSConfig = Annotated[
Union[
DeepgramTTSConfiguration,
@ -794,6 +864,7 @@ TTSConfig = Annotated[
CambTTSConfiguration,
RimeTTSConfiguration,
SpeachesTTSConfiguration,
MiniMaxTTSConfiguration,
],
Field(discriminator="provider"),
]

View file

@ -0,0 +1,23 @@
"""MiniMax TTS wrapper that closes its aiohttp session in cleanup().
Pipecat's MiniMaxHttpTTSService leaves session disposal to the caller. Our
factory creates a fresh session per service instance, so we own its close
here to avoid leaking sockets/FDs on shutdown.
"""
import aiohttp
from pipecat.services.minimax.tts import MiniMaxHttpTTSService
class MiniMaxOwnedSessionTTSService(MiniMaxHttpTTSService):
"""MiniMaxHttpTTSService variant that owns its aiohttp session lifecycle."""
def __init__(self, *args, aiohttp_session: aiohttp.ClientSession, **kwargs):
super().__init__(*args, aiohttp_session=aiohttp_session, **kwargs)
self._owned_session = aiohttp_session
async def cleanup(self):
await super().cleanup()
if not self._owned_session.closed:
await self._owned_session.close()

View file

@ -1,10 +1,12 @@
from typing import TYPE_CHECKING
import aiohttp
from fastapi import HTTPException
from loguru import logger
from api.constants import MPS_API_URL
from api.services.configuration.registry import ServiceProviders
from api.services.pipecat.minimax_tts import MiniMaxOwnedSessionTTSService
from pipecat.services.assemblyai.stt import AssemblyAISTTService, AssemblyAISTTSettings
from pipecat.services.aws.llm import AWSBedrockLLMService, AWSBedrockLLMSettings
from pipecat.services.azure.llm import AzureLLMService, AzureLLMSettings
@ -36,6 +38,8 @@ from pipecat.services.openai.stt import (
from pipecat.services.openai.tts import OpenAITTSService, OpenAITTSSettings
from pipecat.services.openrouter.llm import OpenRouterLLMService, OpenRouterLLMSettings
from pipecat.services.rime.tts import RimeTTSService, RimeTTSSettings
from pipecat.services.minimax.llm import MiniMaxLLMService
from pipecat.services.minimax.tts import MiniMaxHttpTTSService, MiniMaxTTSSettings
from pipecat.services.sarvam.stt import SarvamSTTService, SarvamSTTSettings
from pipecat.services.sarvam.tts import SarvamTTSService, SarvamTTSSettings
from pipecat.services.speaches.llm import SpeachesLLMService, SpeachesLLMSettings
@ -392,6 +396,40 @@ def create_tts_service(user_config, audio_config: "AudioConfig"):
skip_aggregator_types=["recording_router", "recording"],
silence_time_s=1.0,
)
elif user_config.tts.provider == ServiceProviders.MINIMAX.value:
group_id = getattr(user_config.tts, "group_id", None)
if not group_id:
raise HTTPException(
status_code=400,
detail="MiniMax TTS requires a group_id. Configure it in your TTS settings.",
)
voice = getattr(user_config.tts, "voice", None) or "English_Graceful_Lady"
speed = getattr(user_config.tts, "speed", None) or 1.0
# Pipecat appends "?GroupId=..." to base_url as-is, so /t2a_v2 must
# already be in the path.
base_url = (
getattr(user_config.tts, "base_url", None)
or "https://api.minimax.io/v1/t2a_v2"
).rstrip("/")
if not base_url.endswith("/t2a_v2"):
base_url = f"{base_url}/t2a_v2"
session = aiohttp.ClientSession()
return MiniMaxOwnedSessionTTSService(
api_key=user_config.tts.api_key,
group_id=group_id,
base_url=base_url,
aiohttp_session=session,
settings=MiniMaxTTSSettings(
model=user_config.tts.model,
voice=voice,
speed=speed,
),
text_filters=[xml_function_tag_filter],
skip_aggregator_types=["recording_router", "recording"],
silence_time_s=1.0,
)
else:
raise HTTPException(
status_code=400, detail=f"Invalid TTS provider {user_config.tts.provider}"
@ -408,6 +446,7 @@ def create_llm_service_from_provider(
aws_access_key: str | None = None,
aws_secret_key: str | None = None,
aws_region: str | None = None,
temperature: float | None = None,
):
"""Create an LLM service from explicit provider/model/api_key.
@ -471,6 +510,15 @@ def create_llm_service_from_provider(
api_key=api_key or "none",
settings=SpeachesLLMSettings(model=model),
)
elif provider == ServiceProviders.MINIMAX.value:
return MiniMaxLLMService(
api_key=api_key,
base_url=base_url or "https://api.minimax.io/v1",
settings=MiniMaxLLMService.Settings(
model=model,
temperature=temperature if temperature is not None else 1.0,
),
)
else:
raise HTTPException(status_code=400, detail=f"Invalid LLM provider {provider}")
@ -581,5 +629,8 @@ def create_llm_service(user_config):
kwargs["aws_access_key"] = user_config.llm.aws_access_key
kwargs["aws_secret_key"] = user_config.llm.aws_secret_key
kwargs["aws_region"] = user_config.llm.aws_region
elif provider == ServiceProviders.MINIMAX.value:
kwargs["base_url"] = user_config.llm.base_url
kwargs["temperature"] = user_config.llm.temperature
return create_llm_service_from_provider(provider, model, api_key, **kwargs)

View file

@ -0,0 +1,126 @@
from types import SimpleNamespace
from unittest.mock import patch
from pipecat.services.minimax.llm import MiniMaxLLMService as RealMiniMaxLLMService
from api.services.configuration.registry import (
MiniMaxLLMConfiguration,
MiniMaxTTSConfiguration,
ServiceProviders,
)
from api.services.pipecat.service_factory import (
create_llm_service_from_provider,
create_tts_service,
)
class TestMiniMaxLLMConfiguration:
def test_default_values(self):
config = MiniMaxLLMConfiguration(api_key="test-key")
assert config.provider == ServiceProviders.MINIMAX
assert config.model == "MiniMax-M2.7"
assert config.base_url == "https://api.minimax.io/v1"
def test_custom_model(self):
config = MiniMaxLLMConfiguration(
api_key="test-key", model="MiniMax-M2.7-highspeed"
)
assert config.model == "MiniMax-M2.7-highspeed"
def test_custom_base_url(self):
config = MiniMaxLLMConfiguration(
api_key="test-key", base_url="https://api.minimaxi.com/v1"
)
assert config.base_url == "https://api.minimaxi.com/v1"
class TestMiniMaxTTSConfiguration:
def test_default_values(self):
config = MiniMaxTTSConfiguration(api_key="test-key", group_id="test-group")
assert config.provider == ServiceProviders.MINIMAX
assert config.model == "speech-2.8-hd"
assert config.voice == "English_Graceful_Lady"
assert config.speed == 1.0
assert config.group_id == "test-group"
class TestMiniMaxLLMServiceFactory:
def test_create_minimax_llm_service_uses_openai_compatible(self):
with patch(
"api.services.pipecat.service_factory.MiniMaxLLMService"
) as mock_service:
mock_service.Settings = RealMiniMaxLLMService.Settings
create_llm_service_from_provider(
provider=ServiceProviders.MINIMAX.value,
model="MiniMax-M2.7",
api_key="test-key",
)
assert mock_service.call_count == 1
kwargs = mock_service.call_args.kwargs
assert kwargs["api_key"] == "test-key"
assert kwargs["base_url"] == "https://api.minimax.io/v1"
assert kwargs["settings"].model == "MiniMax-M2.7"
assert kwargs["settings"].temperature == 1.0
def test_create_minimax_llm_service_custom_base_url(self):
with patch(
"api.services.pipecat.service_factory.MiniMaxLLMService"
) as mock_service:
mock_service.Settings = RealMiniMaxLLMService.Settings
create_llm_service_from_provider(
provider=ServiceProviders.MINIMAX.value,
model="MiniMax-M2.7-highspeed",
api_key="test-key",
base_url="https://api.minimaxi.com/v1",
)
kwargs = mock_service.call_args.kwargs
assert kwargs["base_url"] == "https://api.minimaxi.com/v1"
assert kwargs["settings"].model == "MiniMax-M2.7-highspeed"
def test_create_minimax_llm_service_passes_user_temperature(self):
with patch(
"api.services.pipecat.service_factory.MiniMaxLLMService"
) as mock_service:
mock_service.Settings = RealMiniMaxLLMService.Settings
create_llm_service_from_provider(
provider=ServiceProviders.MINIMAX.value,
model="MiniMax-M2.7",
api_key="test-key",
temperature=0.3,
)
kwargs = mock_service.call_args.kwargs
assert kwargs["settings"].temperature == 0.3
class TestMiniMaxTTSServiceFactory:
def test_create_minimax_tts_service(self):
user_config = SimpleNamespace(
tts=SimpleNamespace(
provider=ServiceProviders.MINIMAX.value,
api_key="test-key",
model="speech-2.8-hd",
voice="English_Graceful_Lady",
speed=1.0,
base_url="https://api.minimax.io/v1",
group_id="test-group",
)
)
audio_config = SimpleNamespace(transport_in_sample_rate=16000)
with patch(
"api.services.pipecat.service_factory.aiohttp.ClientSession"
), patch(
"api.services.pipecat.service_factory.MiniMaxOwnedSessionTTSService"
) as mock_service:
create_tts_service(user_config, audio_config)
assert mock_service.call_count == 1
kwargs = mock_service.call_args.kwargs
assert kwargs["api_key"] == "test-key"
assert kwargs["group_id"] == "test-group"
assert kwargs["settings"].model == "speech-2.8-hd"
assert kwargs["settings"].voice == "English_Graceful_Lady"
assert kwargs["settings"].speed == 1.0
assert kwargs["aiohttp_session"] is not None

@ -1 +1 @@
Subproject commit d1e23ca521f5412a9dc09430ada730500e15a7ab
Subproject commit c771a50ed36c49002b4bf4e5cb66cf1e4b73c97d