mirror of
https://github.com/dograh-hq/dograh.git
synced 2026-06-07 07:55:16 +02:00
feat: add MiniMax provider support (Chat + TTS) (#309)
* feat: add MiniMax provider support (Chat + TTS) - Add MiniMax LLM provider using OpenAI-compatible API - Models: MiniMax-M2.7, MiniMax-M2.7-highspeed - Default base URL: https://api.minimax.io/v1 - Uses MINIMAX_API_KEY for authentication - Add MiniMax TTS provider using Pipecat's MiniMaxHttpTTSService - Models: speech-2.8-hd (default), speech-2.8-turbo - 6 built-in voices - Requires group_id configuration - Add unit tests for both providers * fix(minimax): validator, temperature, session cleanup, reasoning filter - check_validity.py: wire MiniMax into _validator_map and enforce group_id at save time. Without this, saving a config with a valid key was rejected. - registry.py: surface temperature on the LLM config (gt=0; MiniMax rejects 0) and base_url on the TTS config - service_factory.py: * Plumb temperature through create_llm_service * Normalize TTS base_url to include /t2a_v2 — pipecat appends only ?GroupId=... to the URL. * Use the new MiniMaxLLMService (from pipecat) to strip <think>...</think> reasoning that MiniMax-M2.7 emits inline in delta.content (otherwise it leaks straight to TTS). * Use MiniMaxOwnedSessionTTSService so the per-instance aiohttp session gets closed in cleanup() instead of leaking sockets/FDs. - minimax_tts.py: small wrapper around MiniMaxHttpTTSService that owns the session it was handed (pipecat's caller-owns-session API conflicts with the ftory's per-instance pattern). - pipecat submodule: bumps to a commit that adds MiniMaxLLMService — a thin OpenAILLMService subclass with the streaming <think> filter (mirrors NvidiaLLMService's pattern for NIM reasoning models). - Tests updated/added for all of the above. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> --------- Co-authored-by: octo-patch <octo-patch@github.com> Co-authored-by: Sabiha Khan <sabihak89@gmail.com>
This commit is contained in:
parent
38c2003734
commit
0e0d3136ca
6 changed files with 291 additions and 1 deletions
|
|
@ -53,6 +53,7 @@ class UserConfigurationValidator:
|
|||
ServiceProviders.ASSEMBLYAI.value: self._check_assemblyai_api_key,
|
||||
ServiceProviders.GLADIA.value: self._check_gladia_api_key,
|
||||
ServiceProviders.RIME.value: self._check_rime_api_key,
|
||||
ServiceProviders.MINIMAX.value: self._check_minimax_api_key,
|
||||
}
|
||||
|
||||
async def validate(
|
||||
|
|
@ -147,6 +148,19 @@ class UserConfigurationValidator:
|
|||
return [{"model": service_name, "message": str(e)}]
|
||||
return []
|
||||
|
||||
# MiniMax TTS requires a group_id alongside the API key.
|
||||
# LLM configs don't expose group_id, so only check when the field exists.
|
||||
if provider == ServiceProviders.MINIMAX.value and hasattr(
|
||||
service_config, "group_id"
|
||||
):
|
||||
if not getattr(service_config, "group_id", None):
|
||||
return [
|
||||
{
|
||||
"model": service_name,
|
||||
"message": "group_id is required for MiniMax TTS",
|
||||
}
|
||||
]
|
||||
|
||||
api_key = service_config.api_key
|
||||
|
||||
try:
|
||||
|
|
@ -253,3 +267,8 @@ class UserConfigurationValidator:
|
|||
|
||||
def _check_rime_api_key(self, model: str, api_key: str) -> bool:
|
||||
return True
|
||||
|
||||
def _check_minimax_api_key(self, model: str, api_key: str) -> bool:
|
||||
# MiniMax doesn't publish a cheap key-validation endpoint; trust the key
|
||||
# at save time and surface auth errors at first call (same as Rime/Sarvam).
|
||||
return True
|
||||
|
|
|
|||
|
|
@ -32,6 +32,7 @@ class ServiceProviders(str, Enum):
|
|||
ASSEMBLYAI = "assemblyai"
|
||||
GLADIA = "gladia"
|
||||
RIME = "rime"
|
||||
MINIMAX = "minimax"
|
||||
OPENAI_REALTIME = "openai_realtime"
|
||||
GOOGLE_REALTIME = "google_realtime"
|
||||
GOOGLE_VERTEX_REALTIME = "google_vertex_realtime"
|
||||
|
|
@ -52,6 +53,7 @@ class BaseServiceConfiguration(BaseModel):
|
|||
ServiceProviders.ASSEMBLYAI,
|
||||
ServiceProviders.GLADIA,
|
||||
ServiceProviders.RIME,
|
||||
ServiceProviders.MINIMAX,
|
||||
ServiceProviders.OPENAI_REALTIME,
|
||||
ServiceProviders.GOOGLE_REALTIME,
|
||||
ServiceProviders.GOOGLE_VERTEX_REALTIME,
|
||||
|
|
@ -321,6 +323,32 @@ class SpeachesLLMConfiguration(BaseLLMConfiguration):
|
|||
)
|
||||
|
||||
|
||||
MINIMAX_MODELS = [
|
||||
"MiniMax-M2.7",
|
||||
"MiniMax-M2.7-highspeed",
|
||||
]
|
||||
|
||||
|
||||
@register_llm
|
||||
class MiniMaxLLMConfiguration(BaseLLMConfiguration):
|
||||
provider: Literal[ServiceProviders.MINIMAX] = ServiceProviders.MINIMAX
|
||||
model: str = Field(
|
||||
default="MiniMax-M2.7",
|
||||
description="MiniMax chat model.",
|
||||
json_schema_extra={"examples": MINIMAX_MODELS, "allow_custom_input": True},
|
||||
)
|
||||
base_url: str = Field(
|
||||
default="https://api.minimax.io/v1",
|
||||
description="MiniMax OpenAI-compatible API endpoint.",
|
||||
)
|
||||
temperature: float = Field(
|
||||
default=1.0,
|
||||
gt=0.0,
|
||||
le=2.0,
|
||||
description="Sampling temperature. MiniMax requires > 0.",
|
||||
)
|
||||
|
||||
|
||||
OPENAI_REALTIME_MODELS = ["gpt-realtime-2"]
|
||||
OPENAI_REALTIME_VOICES = [
|
||||
"alloy",
|
||||
|
|
@ -494,6 +522,7 @@ LLMConfig = Annotated[
|
|||
DograhLLMService,
|
||||
AWSBedrockLLMConfiguration,
|
||||
SpeachesLLMConfiguration,
|
||||
MiniMaxLLMConfiguration,
|
||||
],
|
||||
Field(discriminator="provider"),
|
||||
]
|
||||
|
|
@ -783,6 +812,47 @@ class SpeachesTTSConfiguration(BaseTTSConfiguration):
|
|||
)
|
||||
|
||||
|
||||
MINIMAX_TTS_MODELS = ["speech-2.8-hd", "speech-2.8-turbo"]
|
||||
MINIMAX_TTS_VOICES = [
|
||||
"English_Graceful_Lady",
|
||||
"English_Insightful_Speaker",
|
||||
"English_radiant_girl",
|
||||
"English_Persuasive_Man",
|
||||
"English_Lucky_Robot",
|
||||
"English_expressive_narrator",
|
||||
]
|
||||
|
||||
|
||||
@register_tts
|
||||
class MiniMaxTTSConfiguration(BaseTTSConfiguration):
|
||||
provider: Literal[ServiceProviders.MINIMAX] = ServiceProviders.MINIMAX
|
||||
model: str = Field(
|
||||
default="speech-2.8-hd",
|
||||
description="MiniMax TTS model.",
|
||||
json_schema_extra={"examples": MINIMAX_TTS_MODELS},
|
||||
)
|
||||
voice: str = Field(
|
||||
default="English_Graceful_Lady",
|
||||
description="MiniMax voice ID.",
|
||||
json_schema_extra={"examples": MINIMAX_TTS_VOICES, "allow_custom_input": True},
|
||||
)
|
||||
base_url: str = Field(
|
||||
default="https://api.minimax.io/v1/t2a_v2",
|
||||
description=(
|
||||
"MiniMax TTS API endpoint (must include the /v1/t2a_v2 path). "
|
||||
"Defaults to the global endpoint; override with "
|
||||
"https://api.minimaxi.chat/v1/t2a_v2 (mainland China) or "
|
||||
"https://api-uw.minimax.io/v1/t2a_v2 (US-West)."
|
||||
),
|
||||
)
|
||||
speed: float = Field(
|
||||
default=1.0, ge=0.5, le=2.0, description="Speech speed (0.5 to 2.0)."
|
||||
)
|
||||
group_id: str = Field(
|
||||
description="MiniMax Group ID (found in your MiniMax dashboard under Account → Group).",
|
||||
)
|
||||
|
||||
|
||||
TTSConfig = Annotated[
|
||||
Union[
|
||||
DeepgramTTSConfiguration,
|
||||
|
|
@ -794,6 +864,7 @@ TTSConfig = Annotated[
|
|||
CambTTSConfiguration,
|
||||
RimeTTSConfiguration,
|
||||
SpeachesTTSConfiguration,
|
||||
MiniMaxTTSConfiguration,
|
||||
],
|
||||
Field(discriminator="provider"),
|
||||
]
|
||||
|
|
|
|||
23
api/services/pipecat/minimax_tts.py
Normal file
23
api/services/pipecat/minimax_tts.py
Normal file
|
|
@ -0,0 +1,23 @@
|
|||
"""MiniMax TTS wrapper that closes its aiohttp session in cleanup().
|
||||
|
||||
Pipecat's MiniMaxHttpTTSService leaves session disposal to the caller. Our
|
||||
factory creates a fresh session per service instance, so we own its close
|
||||
here to avoid leaking sockets/FDs on shutdown.
|
||||
"""
|
||||
|
||||
import aiohttp
|
||||
|
||||
from pipecat.services.minimax.tts import MiniMaxHttpTTSService
|
||||
|
||||
|
||||
class MiniMaxOwnedSessionTTSService(MiniMaxHttpTTSService):
|
||||
"""MiniMaxHttpTTSService variant that owns its aiohttp session lifecycle."""
|
||||
|
||||
def __init__(self, *args, aiohttp_session: aiohttp.ClientSession, **kwargs):
|
||||
super().__init__(*args, aiohttp_session=aiohttp_session, **kwargs)
|
||||
self._owned_session = aiohttp_session
|
||||
|
||||
async def cleanup(self):
|
||||
await super().cleanup()
|
||||
if not self._owned_session.closed:
|
||||
await self._owned_session.close()
|
||||
|
|
@ -1,10 +1,12 @@
|
|||
from typing import TYPE_CHECKING
|
||||
|
||||
import aiohttp
|
||||
from fastapi import HTTPException
|
||||
from loguru import logger
|
||||
|
||||
from api.constants import MPS_API_URL
|
||||
from api.services.configuration.registry import ServiceProviders
|
||||
from api.services.pipecat.minimax_tts import MiniMaxOwnedSessionTTSService
|
||||
from pipecat.services.assemblyai.stt import AssemblyAISTTService, AssemblyAISTTSettings
|
||||
from pipecat.services.aws.llm import AWSBedrockLLMService, AWSBedrockLLMSettings
|
||||
from pipecat.services.azure.llm import AzureLLMService, AzureLLMSettings
|
||||
|
|
@ -36,6 +38,8 @@ from pipecat.services.openai.stt import (
|
|||
from pipecat.services.openai.tts import OpenAITTSService, OpenAITTSSettings
|
||||
from pipecat.services.openrouter.llm import OpenRouterLLMService, OpenRouterLLMSettings
|
||||
from pipecat.services.rime.tts import RimeTTSService, RimeTTSSettings
|
||||
from pipecat.services.minimax.llm import MiniMaxLLMService
|
||||
from pipecat.services.minimax.tts import MiniMaxHttpTTSService, MiniMaxTTSSettings
|
||||
from pipecat.services.sarvam.stt import SarvamSTTService, SarvamSTTSettings
|
||||
from pipecat.services.sarvam.tts import SarvamTTSService, SarvamTTSSettings
|
||||
from pipecat.services.speaches.llm import SpeachesLLMService, SpeachesLLMSettings
|
||||
|
|
@ -392,6 +396,40 @@ def create_tts_service(user_config, audio_config: "AudioConfig"):
|
|||
skip_aggregator_types=["recording_router", "recording"],
|
||||
silence_time_s=1.0,
|
||||
)
|
||||
elif user_config.tts.provider == ServiceProviders.MINIMAX.value:
|
||||
group_id = getattr(user_config.tts, "group_id", None)
|
||||
if not group_id:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail="MiniMax TTS requires a group_id. Configure it in your TTS settings.",
|
||||
)
|
||||
voice = getattr(user_config.tts, "voice", None) or "English_Graceful_Lady"
|
||||
speed = getattr(user_config.tts, "speed", None) or 1.0
|
||||
|
||||
# Pipecat appends "?GroupId=..." to base_url as-is, so /t2a_v2 must
|
||||
# already be in the path.
|
||||
base_url = (
|
||||
getattr(user_config.tts, "base_url", None)
|
||||
or "https://api.minimax.io/v1/t2a_v2"
|
||||
).rstrip("/")
|
||||
if not base_url.endswith("/t2a_v2"):
|
||||
base_url = f"{base_url}/t2a_v2"
|
||||
|
||||
session = aiohttp.ClientSession()
|
||||
return MiniMaxOwnedSessionTTSService(
|
||||
api_key=user_config.tts.api_key,
|
||||
group_id=group_id,
|
||||
base_url=base_url,
|
||||
aiohttp_session=session,
|
||||
settings=MiniMaxTTSSettings(
|
||||
model=user_config.tts.model,
|
||||
voice=voice,
|
||||
speed=speed,
|
||||
),
|
||||
text_filters=[xml_function_tag_filter],
|
||||
skip_aggregator_types=["recording_router", "recording"],
|
||||
silence_time_s=1.0,
|
||||
)
|
||||
else:
|
||||
raise HTTPException(
|
||||
status_code=400, detail=f"Invalid TTS provider {user_config.tts.provider}"
|
||||
|
|
@ -408,6 +446,7 @@ def create_llm_service_from_provider(
|
|||
aws_access_key: str | None = None,
|
||||
aws_secret_key: str | None = None,
|
||||
aws_region: str | None = None,
|
||||
temperature: float | None = None,
|
||||
):
|
||||
"""Create an LLM service from explicit provider/model/api_key.
|
||||
|
||||
|
|
@ -471,6 +510,15 @@ def create_llm_service_from_provider(
|
|||
api_key=api_key or "none",
|
||||
settings=SpeachesLLMSettings(model=model),
|
||||
)
|
||||
elif provider == ServiceProviders.MINIMAX.value:
|
||||
return MiniMaxLLMService(
|
||||
api_key=api_key,
|
||||
base_url=base_url or "https://api.minimax.io/v1",
|
||||
settings=MiniMaxLLMService.Settings(
|
||||
model=model,
|
||||
temperature=temperature if temperature is not None else 1.0,
|
||||
),
|
||||
)
|
||||
else:
|
||||
raise HTTPException(status_code=400, detail=f"Invalid LLM provider {provider}")
|
||||
|
||||
|
|
@ -581,5 +629,8 @@ def create_llm_service(user_config):
|
|||
kwargs["aws_access_key"] = user_config.llm.aws_access_key
|
||||
kwargs["aws_secret_key"] = user_config.llm.aws_secret_key
|
||||
kwargs["aws_region"] = user_config.llm.aws_region
|
||||
elif provider == ServiceProviders.MINIMAX.value:
|
||||
kwargs["base_url"] = user_config.llm.base_url
|
||||
kwargs["temperature"] = user_config.llm.temperature
|
||||
|
||||
return create_llm_service_from_provider(provider, model, api_key, **kwargs)
|
||||
|
|
|
|||
126
api/tests/test_minimax_service_factory.py
Normal file
126
api/tests/test_minimax_service_factory.py
Normal file
|
|
@ -0,0 +1,126 @@
|
|||
from types import SimpleNamespace
|
||||
from unittest.mock import patch
|
||||
|
||||
from pipecat.services.minimax.llm import MiniMaxLLMService as RealMiniMaxLLMService
|
||||
|
||||
from api.services.configuration.registry import (
|
||||
MiniMaxLLMConfiguration,
|
||||
MiniMaxTTSConfiguration,
|
||||
ServiceProviders,
|
||||
)
|
||||
from api.services.pipecat.service_factory import (
|
||||
create_llm_service_from_provider,
|
||||
create_tts_service,
|
||||
)
|
||||
|
||||
|
||||
class TestMiniMaxLLMConfiguration:
|
||||
def test_default_values(self):
|
||||
config = MiniMaxLLMConfiguration(api_key="test-key")
|
||||
assert config.provider == ServiceProviders.MINIMAX
|
||||
assert config.model == "MiniMax-M2.7"
|
||||
assert config.base_url == "https://api.minimax.io/v1"
|
||||
|
||||
def test_custom_model(self):
|
||||
config = MiniMaxLLMConfiguration(
|
||||
api_key="test-key", model="MiniMax-M2.7-highspeed"
|
||||
)
|
||||
assert config.model == "MiniMax-M2.7-highspeed"
|
||||
|
||||
def test_custom_base_url(self):
|
||||
config = MiniMaxLLMConfiguration(
|
||||
api_key="test-key", base_url="https://api.minimaxi.com/v1"
|
||||
)
|
||||
assert config.base_url == "https://api.minimaxi.com/v1"
|
||||
|
||||
|
||||
class TestMiniMaxTTSConfiguration:
|
||||
def test_default_values(self):
|
||||
config = MiniMaxTTSConfiguration(api_key="test-key", group_id="test-group")
|
||||
assert config.provider == ServiceProviders.MINIMAX
|
||||
assert config.model == "speech-2.8-hd"
|
||||
assert config.voice == "English_Graceful_Lady"
|
||||
assert config.speed == 1.0
|
||||
assert config.group_id == "test-group"
|
||||
|
||||
|
||||
class TestMiniMaxLLMServiceFactory:
|
||||
def test_create_minimax_llm_service_uses_openai_compatible(self):
|
||||
with patch(
|
||||
"api.services.pipecat.service_factory.MiniMaxLLMService"
|
||||
) as mock_service:
|
||||
mock_service.Settings = RealMiniMaxLLMService.Settings
|
||||
create_llm_service_from_provider(
|
||||
provider=ServiceProviders.MINIMAX.value,
|
||||
model="MiniMax-M2.7",
|
||||
api_key="test-key",
|
||||
)
|
||||
|
||||
assert mock_service.call_count == 1
|
||||
kwargs = mock_service.call_args.kwargs
|
||||
assert kwargs["api_key"] == "test-key"
|
||||
assert kwargs["base_url"] == "https://api.minimax.io/v1"
|
||||
assert kwargs["settings"].model == "MiniMax-M2.7"
|
||||
assert kwargs["settings"].temperature == 1.0
|
||||
|
||||
def test_create_minimax_llm_service_custom_base_url(self):
|
||||
with patch(
|
||||
"api.services.pipecat.service_factory.MiniMaxLLMService"
|
||||
) as mock_service:
|
||||
mock_service.Settings = RealMiniMaxLLMService.Settings
|
||||
create_llm_service_from_provider(
|
||||
provider=ServiceProviders.MINIMAX.value,
|
||||
model="MiniMax-M2.7-highspeed",
|
||||
api_key="test-key",
|
||||
base_url="https://api.minimaxi.com/v1",
|
||||
)
|
||||
|
||||
kwargs = mock_service.call_args.kwargs
|
||||
assert kwargs["base_url"] == "https://api.minimaxi.com/v1"
|
||||
assert kwargs["settings"].model == "MiniMax-M2.7-highspeed"
|
||||
|
||||
def test_create_minimax_llm_service_passes_user_temperature(self):
|
||||
with patch(
|
||||
"api.services.pipecat.service_factory.MiniMaxLLMService"
|
||||
) as mock_service:
|
||||
mock_service.Settings = RealMiniMaxLLMService.Settings
|
||||
create_llm_service_from_provider(
|
||||
provider=ServiceProviders.MINIMAX.value,
|
||||
model="MiniMax-M2.7",
|
||||
api_key="test-key",
|
||||
temperature=0.3,
|
||||
)
|
||||
kwargs = mock_service.call_args.kwargs
|
||||
assert kwargs["settings"].temperature == 0.3
|
||||
|
||||
|
||||
class TestMiniMaxTTSServiceFactory:
|
||||
def test_create_minimax_tts_service(self):
|
||||
user_config = SimpleNamespace(
|
||||
tts=SimpleNamespace(
|
||||
provider=ServiceProviders.MINIMAX.value,
|
||||
api_key="test-key",
|
||||
model="speech-2.8-hd",
|
||||
voice="English_Graceful_Lady",
|
||||
speed=1.0,
|
||||
base_url="https://api.minimax.io/v1",
|
||||
group_id="test-group",
|
||||
)
|
||||
)
|
||||
audio_config = SimpleNamespace(transport_in_sample_rate=16000)
|
||||
|
||||
with patch(
|
||||
"api.services.pipecat.service_factory.aiohttp.ClientSession"
|
||||
), patch(
|
||||
"api.services.pipecat.service_factory.MiniMaxOwnedSessionTTSService"
|
||||
) as mock_service:
|
||||
create_tts_service(user_config, audio_config)
|
||||
|
||||
assert mock_service.call_count == 1
|
||||
kwargs = mock_service.call_args.kwargs
|
||||
assert kwargs["api_key"] == "test-key"
|
||||
assert kwargs["group_id"] == "test-group"
|
||||
assert kwargs["settings"].model == "speech-2.8-hd"
|
||||
assert kwargs["settings"].voice == "English_Graceful_Lady"
|
||||
assert kwargs["settings"].speed == 1.0
|
||||
assert kwargs["aiohttp_session"] is not None
|
||||
2
pipecat
2
pipecat
|
|
@ -1 +1 @@
|
|||
Subproject commit d1e23ca521f5412a9dc09430ada730500e15a7ab
|
||||
Subproject commit c771a50ed36c49002b4bf4e5cb66cf1e4b73c97d
|
||||
Loading…
Add table
Add a link
Reference in a new issue