diff --git a/api/services/configuration/check_validity.py b/api/services/configuration/check_validity.py index ca46cfc..289e643 100644 --- a/api/services/configuration/check_validity.py +++ b/api/services/configuration/check_validity.py @@ -53,6 +53,7 @@ class UserConfigurationValidator: ServiceProviders.ASSEMBLYAI.value: self._check_assemblyai_api_key, ServiceProviders.GLADIA.value: self._check_gladia_api_key, ServiceProviders.RIME.value: self._check_rime_api_key, + ServiceProviders.MINIMAX.value: self._check_minimax_api_key, } async def validate( @@ -147,6 +148,19 @@ class UserConfigurationValidator: return [{"model": service_name, "message": str(e)}] return [] + # MiniMax TTS requires a group_id alongside the API key. + # LLM configs don't expose group_id, so only check when the field exists. + if provider == ServiceProviders.MINIMAX.value and hasattr( + service_config, "group_id" + ): + if not getattr(service_config, "group_id", None): + return [ + { + "model": service_name, + "message": "group_id is required for MiniMax TTS", + } + ] + api_key = service_config.api_key try: @@ -253,3 +267,8 @@ class UserConfigurationValidator: def _check_rime_api_key(self, model: str, api_key: str) -> bool: return True + + def _check_minimax_api_key(self, model: str, api_key: str) -> bool: + # MiniMax doesn't publish a cheap key-validation endpoint; trust the key + # at save time and surface auth errors at first call (same as Rime/Sarvam). + return True diff --git a/api/services/configuration/registry.py b/api/services/configuration/registry.py index 05ff6a3..7d94f11 100644 --- a/api/services/configuration/registry.py +++ b/api/services/configuration/registry.py @@ -32,6 +32,7 @@ class ServiceProviders(str, Enum): ASSEMBLYAI = "assemblyai" GLADIA = "gladia" RIME = "rime" + MINIMAX = "minimax" OPENAI_REALTIME = "openai_realtime" GOOGLE_REALTIME = "google_realtime" GOOGLE_VERTEX_REALTIME = "google_vertex_realtime" @@ -52,6 +53,7 @@ class BaseServiceConfiguration(BaseModel): ServiceProviders.ASSEMBLYAI, ServiceProviders.GLADIA, ServiceProviders.RIME, + ServiceProviders.MINIMAX, ServiceProviders.OPENAI_REALTIME, ServiceProviders.GOOGLE_REALTIME, ServiceProviders.GOOGLE_VERTEX_REALTIME, @@ -321,6 +323,32 @@ class SpeachesLLMConfiguration(BaseLLMConfiguration): ) +MINIMAX_MODELS = [ + "MiniMax-M2.7", + "MiniMax-M2.7-highspeed", +] + + +@register_llm +class MiniMaxLLMConfiguration(BaseLLMConfiguration): + provider: Literal[ServiceProviders.MINIMAX] = ServiceProviders.MINIMAX + model: str = Field( + default="MiniMax-M2.7", + description="MiniMax chat model.", + json_schema_extra={"examples": MINIMAX_MODELS, "allow_custom_input": True}, + ) + base_url: str = Field( + default="https://api.minimax.io/v1", + description="MiniMax OpenAI-compatible API endpoint.", + ) + temperature: float = Field( + default=1.0, + gt=0.0, + le=2.0, + description="Sampling temperature. MiniMax requires > 0.", + ) + + OPENAI_REALTIME_MODELS = ["gpt-realtime-2"] OPENAI_REALTIME_VOICES = [ "alloy", @@ -494,6 +522,7 @@ LLMConfig = Annotated[ DograhLLMService, AWSBedrockLLMConfiguration, SpeachesLLMConfiguration, + MiniMaxLLMConfiguration, ], Field(discriminator="provider"), ] @@ -783,6 +812,47 @@ class SpeachesTTSConfiguration(BaseTTSConfiguration): ) +MINIMAX_TTS_MODELS = ["speech-2.8-hd", "speech-2.8-turbo"] +MINIMAX_TTS_VOICES = [ + "English_Graceful_Lady", + "English_Insightful_Speaker", + "English_radiant_girl", + "English_Persuasive_Man", + "English_Lucky_Robot", + "English_expressive_narrator", +] + + +@register_tts +class MiniMaxTTSConfiguration(BaseTTSConfiguration): + provider: Literal[ServiceProviders.MINIMAX] = ServiceProviders.MINIMAX + model: str = Field( + default="speech-2.8-hd", + description="MiniMax TTS model.", + json_schema_extra={"examples": MINIMAX_TTS_MODELS}, + ) + voice: str = Field( + default="English_Graceful_Lady", + description="MiniMax voice ID.", + json_schema_extra={"examples": MINIMAX_TTS_VOICES, "allow_custom_input": True}, + ) + base_url: str = Field( + default="https://api.minimax.io/v1/t2a_v2", + description=( + "MiniMax TTS API endpoint (must include the /v1/t2a_v2 path). " + "Defaults to the global endpoint; override with " + "https://api.minimaxi.chat/v1/t2a_v2 (mainland China) or " + "https://api-uw.minimax.io/v1/t2a_v2 (US-West)." + ), + ) + speed: float = Field( + default=1.0, ge=0.5, le=2.0, description="Speech speed (0.5 to 2.0)." + ) + group_id: str = Field( + description="MiniMax Group ID (found in your MiniMax dashboard under Account → Group).", + ) + + TTSConfig = Annotated[ Union[ DeepgramTTSConfiguration, @@ -794,6 +864,7 @@ TTSConfig = Annotated[ CambTTSConfiguration, RimeTTSConfiguration, SpeachesTTSConfiguration, + MiniMaxTTSConfiguration, ], Field(discriminator="provider"), ] diff --git a/api/services/pipecat/minimax_tts.py b/api/services/pipecat/minimax_tts.py new file mode 100644 index 0000000..ef53a81 --- /dev/null +++ b/api/services/pipecat/minimax_tts.py @@ -0,0 +1,23 @@ +"""MiniMax TTS wrapper that closes its aiohttp session in cleanup(). + +Pipecat's MiniMaxHttpTTSService leaves session disposal to the caller. Our +factory creates a fresh session per service instance, so we own its close +here to avoid leaking sockets/FDs on shutdown. +""" + +import aiohttp + +from pipecat.services.minimax.tts import MiniMaxHttpTTSService + + +class MiniMaxOwnedSessionTTSService(MiniMaxHttpTTSService): + """MiniMaxHttpTTSService variant that owns its aiohttp session lifecycle.""" + + def __init__(self, *args, aiohttp_session: aiohttp.ClientSession, **kwargs): + super().__init__(*args, aiohttp_session=aiohttp_session, **kwargs) + self._owned_session = aiohttp_session + + async def cleanup(self): + await super().cleanup() + if not self._owned_session.closed: + await self._owned_session.close() diff --git a/api/services/pipecat/service_factory.py b/api/services/pipecat/service_factory.py index eb058fb..207b504 100644 --- a/api/services/pipecat/service_factory.py +++ b/api/services/pipecat/service_factory.py @@ -1,10 +1,12 @@ from typing import TYPE_CHECKING +import aiohttp from fastapi import HTTPException from loguru import logger from api.constants import MPS_API_URL from api.services.configuration.registry import ServiceProviders +from api.services.pipecat.minimax_tts import MiniMaxOwnedSessionTTSService from pipecat.services.assemblyai.stt import AssemblyAISTTService, AssemblyAISTTSettings from pipecat.services.aws.llm import AWSBedrockLLMService, AWSBedrockLLMSettings from pipecat.services.azure.llm import AzureLLMService, AzureLLMSettings @@ -36,6 +38,8 @@ from pipecat.services.openai.stt import ( from pipecat.services.openai.tts import OpenAITTSService, OpenAITTSSettings from pipecat.services.openrouter.llm import OpenRouterLLMService, OpenRouterLLMSettings from pipecat.services.rime.tts import RimeTTSService, RimeTTSSettings +from pipecat.services.minimax.llm import MiniMaxLLMService +from pipecat.services.minimax.tts import MiniMaxHttpTTSService, MiniMaxTTSSettings from pipecat.services.sarvam.stt import SarvamSTTService, SarvamSTTSettings from pipecat.services.sarvam.tts import SarvamTTSService, SarvamTTSSettings from pipecat.services.speaches.llm import SpeachesLLMService, SpeachesLLMSettings @@ -392,6 +396,40 @@ def create_tts_service(user_config, audio_config: "AudioConfig"): skip_aggregator_types=["recording_router", "recording"], silence_time_s=1.0, ) + elif user_config.tts.provider == ServiceProviders.MINIMAX.value: + group_id = getattr(user_config.tts, "group_id", None) + if not group_id: + raise HTTPException( + status_code=400, + detail="MiniMax TTS requires a group_id. Configure it in your TTS settings.", + ) + voice = getattr(user_config.tts, "voice", None) or "English_Graceful_Lady" + speed = getattr(user_config.tts, "speed", None) or 1.0 + + # Pipecat appends "?GroupId=..." to base_url as-is, so /t2a_v2 must + # already be in the path. + base_url = ( + getattr(user_config.tts, "base_url", None) + or "https://api.minimax.io/v1/t2a_v2" + ).rstrip("/") + if not base_url.endswith("/t2a_v2"): + base_url = f"{base_url}/t2a_v2" + + session = aiohttp.ClientSession() + return MiniMaxOwnedSessionTTSService( + api_key=user_config.tts.api_key, + group_id=group_id, + base_url=base_url, + aiohttp_session=session, + settings=MiniMaxTTSSettings( + model=user_config.tts.model, + voice=voice, + speed=speed, + ), + text_filters=[xml_function_tag_filter], + skip_aggregator_types=["recording_router", "recording"], + silence_time_s=1.0, + ) else: raise HTTPException( status_code=400, detail=f"Invalid TTS provider {user_config.tts.provider}" @@ -408,6 +446,7 @@ def create_llm_service_from_provider( aws_access_key: str | None = None, aws_secret_key: str | None = None, aws_region: str | None = None, + temperature: float | None = None, ): """Create an LLM service from explicit provider/model/api_key. @@ -471,6 +510,15 @@ def create_llm_service_from_provider( api_key=api_key or "none", settings=SpeachesLLMSettings(model=model), ) + elif provider == ServiceProviders.MINIMAX.value: + return MiniMaxLLMService( + api_key=api_key, + base_url=base_url or "https://api.minimax.io/v1", + settings=MiniMaxLLMService.Settings( + model=model, + temperature=temperature if temperature is not None else 1.0, + ), + ) else: raise HTTPException(status_code=400, detail=f"Invalid LLM provider {provider}") @@ -581,5 +629,8 @@ def create_llm_service(user_config): kwargs["aws_access_key"] = user_config.llm.aws_access_key kwargs["aws_secret_key"] = user_config.llm.aws_secret_key kwargs["aws_region"] = user_config.llm.aws_region + elif provider == ServiceProviders.MINIMAX.value: + kwargs["base_url"] = user_config.llm.base_url + kwargs["temperature"] = user_config.llm.temperature return create_llm_service_from_provider(provider, model, api_key, **kwargs) diff --git a/api/tests/test_minimax_service_factory.py b/api/tests/test_minimax_service_factory.py new file mode 100644 index 0000000..ecf4676 --- /dev/null +++ b/api/tests/test_minimax_service_factory.py @@ -0,0 +1,126 @@ +from types import SimpleNamespace +from unittest.mock import patch + +from pipecat.services.minimax.llm import MiniMaxLLMService as RealMiniMaxLLMService + +from api.services.configuration.registry import ( + MiniMaxLLMConfiguration, + MiniMaxTTSConfiguration, + ServiceProviders, +) +from api.services.pipecat.service_factory import ( + create_llm_service_from_provider, + create_tts_service, +) + + +class TestMiniMaxLLMConfiguration: + def test_default_values(self): + config = MiniMaxLLMConfiguration(api_key="test-key") + assert config.provider == ServiceProviders.MINIMAX + assert config.model == "MiniMax-M2.7" + assert config.base_url == "https://api.minimax.io/v1" + + def test_custom_model(self): + config = MiniMaxLLMConfiguration( + api_key="test-key", model="MiniMax-M2.7-highspeed" + ) + assert config.model == "MiniMax-M2.7-highspeed" + + def test_custom_base_url(self): + config = MiniMaxLLMConfiguration( + api_key="test-key", base_url="https://api.minimaxi.com/v1" + ) + assert config.base_url == "https://api.minimaxi.com/v1" + + +class TestMiniMaxTTSConfiguration: + def test_default_values(self): + config = MiniMaxTTSConfiguration(api_key="test-key", group_id="test-group") + assert config.provider == ServiceProviders.MINIMAX + assert config.model == "speech-2.8-hd" + assert config.voice == "English_Graceful_Lady" + assert config.speed == 1.0 + assert config.group_id == "test-group" + + +class TestMiniMaxLLMServiceFactory: + def test_create_minimax_llm_service_uses_openai_compatible(self): + with patch( + "api.services.pipecat.service_factory.MiniMaxLLMService" + ) as mock_service: + mock_service.Settings = RealMiniMaxLLMService.Settings + create_llm_service_from_provider( + provider=ServiceProviders.MINIMAX.value, + model="MiniMax-M2.7", + api_key="test-key", + ) + + assert mock_service.call_count == 1 + kwargs = mock_service.call_args.kwargs + assert kwargs["api_key"] == "test-key" + assert kwargs["base_url"] == "https://api.minimax.io/v1" + assert kwargs["settings"].model == "MiniMax-M2.7" + assert kwargs["settings"].temperature == 1.0 + + def test_create_minimax_llm_service_custom_base_url(self): + with patch( + "api.services.pipecat.service_factory.MiniMaxLLMService" + ) as mock_service: + mock_service.Settings = RealMiniMaxLLMService.Settings + create_llm_service_from_provider( + provider=ServiceProviders.MINIMAX.value, + model="MiniMax-M2.7-highspeed", + api_key="test-key", + base_url="https://api.minimaxi.com/v1", + ) + + kwargs = mock_service.call_args.kwargs + assert kwargs["base_url"] == "https://api.minimaxi.com/v1" + assert kwargs["settings"].model == "MiniMax-M2.7-highspeed" + + def test_create_minimax_llm_service_passes_user_temperature(self): + with patch( + "api.services.pipecat.service_factory.MiniMaxLLMService" + ) as mock_service: + mock_service.Settings = RealMiniMaxLLMService.Settings + create_llm_service_from_provider( + provider=ServiceProviders.MINIMAX.value, + model="MiniMax-M2.7", + api_key="test-key", + temperature=0.3, + ) + kwargs = mock_service.call_args.kwargs + assert kwargs["settings"].temperature == 0.3 + + +class TestMiniMaxTTSServiceFactory: + def test_create_minimax_tts_service(self): + user_config = SimpleNamespace( + tts=SimpleNamespace( + provider=ServiceProviders.MINIMAX.value, + api_key="test-key", + model="speech-2.8-hd", + voice="English_Graceful_Lady", + speed=1.0, + base_url="https://api.minimax.io/v1", + group_id="test-group", + ) + ) + audio_config = SimpleNamespace(transport_in_sample_rate=16000) + + with patch( + "api.services.pipecat.service_factory.aiohttp.ClientSession" + ), patch( + "api.services.pipecat.service_factory.MiniMaxOwnedSessionTTSService" + ) as mock_service: + create_tts_service(user_config, audio_config) + + assert mock_service.call_count == 1 + kwargs = mock_service.call_args.kwargs + assert kwargs["api_key"] == "test-key" + assert kwargs["group_id"] == "test-group" + assert kwargs["settings"].model == "speech-2.8-hd" + assert kwargs["settings"].voice == "English_Graceful_Lady" + assert kwargs["settings"].speed == 1.0 + assert kwargs["aiohttp_session"] is not None diff --git a/pipecat b/pipecat index d1e23ca..c771a50 160000 --- a/pipecat +++ b/pipecat @@ -1 +1 @@ -Subproject commit d1e23ca521f5412a9dc09430ada730500e15a7ab +Subproject commit c771a50ed36c49002b4bf4e5cb66cf1e4b73c97d