mirror of
https://github.com/dograh-hq/dograh.git
synced 2026-06-28 08:49:42 +02:00
feat: add MiniMax provider support (Chat + TTS) (#309)
* feat: add MiniMax provider support (Chat + TTS) - Add MiniMax LLM provider using OpenAI-compatible API - Models: MiniMax-M2.7, MiniMax-M2.7-highspeed - Default base URL: https://api.minimax.io/v1 - Uses MINIMAX_API_KEY for authentication - Add MiniMax TTS provider using Pipecat's MiniMaxHttpTTSService - Models: speech-2.8-hd (default), speech-2.8-turbo - 6 built-in voices - Requires group_id configuration - Add unit tests for both providers * fix(minimax): validator, temperature, session cleanup, reasoning filter - check_validity.py: wire MiniMax into _validator_map and enforce group_id at save time. Without this, saving a config with a valid key was rejected. - registry.py: surface temperature on the LLM config (gt=0; MiniMax rejects 0) and base_url on the TTS config - service_factory.py: * Plumb temperature through create_llm_service * Normalize TTS base_url to include /t2a_v2 — pipecat appends only ?GroupId=... to the URL. * Use the new MiniMaxLLMService (from pipecat) to strip <think>...</think> reasoning that MiniMax-M2.7 emits inline in delta.content (otherwise it leaks straight to TTS). * Use MiniMaxOwnedSessionTTSService so the per-instance aiohttp session gets closed in cleanup() instead of leaking sockets/FDs. - minimax_tts.py: small wrapper around MiniMaxHttpTTSService that owns the session it was handed (pipecat's caller-owns-session API conflicts with the ftory's per-instance pattern). - pipecat submodule: bumps to a commit that adds MiniMaxLLMService — a thin OpenAILLMService subclass with the streaming <think> filter (mirrors NvidiaLLMService's pattern for NIM reasoning models). - Tests updated/added for all of the above. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> --------- Co-authored-by: octo-patch <octo-patch@github.com> Co-authored-by: Sabiha Khan <sabihak89@gmail.com>
This commit is contained in:
parent
38c2003734
commit
0e0d3136ca
6 changed files with 291 additions and 1 deletions
|
|
@ -53,6 +53,7 @@ class UserConfigurationValidator:
|
|||
ServiceProviders.ASSEMBLYAI.value: self._check_assemblyai_api_key,
|
||||
ServiceProviders.GLADIA.value: self._check_gladia_api_key,
|
||||
ServiceProviders.RIME.value: self._check_rime_api_key,
|
||||
ServiceProviders.MINIMAX.value: self._check_minimax_api_key,
|
||||
}
|
||||
|
||||
async def validate(
|
||||
|
|
@ -147,6 +148,19 @@ class UserConfigurationValidator:
|
|||
return [{"model": service_name, "message": str(e)}]
|
||||
return []
|
||||
|
||||
# MiniMax TTS requires a group_id alongside the API key.
|
||||
# LLM configs don't expose group_id, so only check when the field exists.
|
||||
if provider == ServiceProviders.MINIMAX.value and hasattr(
|
||||
service_config, "group_id"
|
||||
):
|
||||
if not getattr(service_config, "group_id", None):
|
||||
return [
|
||||
{
|
||||
"model": service_name,
|
||||
"message": "group_id is required for MiniMax TTS",
|
||||
}
|
||||
]
|
||||
|
||||
api_key = service_config.api_key
|
||||
|
||||
try:
|
||||
|
|
@ -253,3 +267,8 @@ class UserConfigurationValidator:
|
|||
|
||||
def _check_rime_api_key(self, model: str, api_key: str) -> bool:
|
||||
return True
|
||||
|
||||
def _check_minimax_api_key(self, model: str, api_key: str) -> bool:
|
||||
# MiniMax doesn't publish a cheap key-validation endpoint; trust the key
|
||||
# at save time and surface auth errors at first call (same as Rime/Sarvam).
|
||||
return True
|
||||
|
|
|
|||
|
|
@ -32,6 +32,7 @@ class ServiceProviders(str, Enum):
|
|||
ASSEMBLYAI = "assemblyai"
|
||||
GLADIA = "gladia"
|
||||
RIME = "rime"
|
||||
MINIMAX = "minimax"
|
||||
OPENAI_REALTIME = "openai_realtime"
|
||||
GOOGLE_REALTIME = "google_realtime"
|
||||
GOOGLE_VERTEX_REALTIME = "google_vertex_realtime"
|
||||
|
|
@ -52,6 +53,7 @@ class BaseServiceConfiguration(BaseModel):
|
|||
ServiceProviders.ASSEMBLYAI,
|
||||
ServiceProviders.GLADIA,
|
||||
ServiceProviders.RIME,
|
||||
ServiceProviders.MINIMAX,
|
||||
ServiceProviders.OPENAI_REALTIME,
|
||||
ServiceProviders.GOOGLE_REALTIME,
|
||||
ServiceProviders.GOOGLE_VERTEX_REALTIME,
|
||||
|
|
@ -321,6 +323,32 @@ class SpeachesLLMConfiguration(BaseLLMConfiguration):
|
|||
)
|
||||
|
||||
|
||||
MINIMAX_MODELS = [
|
||||
"MiniMax-M2.7",
|
||||
"MiniMax-M2.7-highspeed",
|
||||
]
|
||||
|
||||
|
||||
@register_llm
|
||||
class MiniMaxLLMConfiguration(BaseLLMConfiguration):
|
||||
provider: Literal[ServiceProviders.MINIMAX] = ServiceProviders.MINIMAX
|
||||
model: str = Field(
|
||||
default="MiniMax-M2.7",
|
||||
description="MiniMax chat model.",
|
||||
json_schema_extra={"examples": MINIMAX_MODELS, "allow_custom_input": True},
|
||||
)
|
||||
base_url: str = Field(
|
||||
default="https://api.minimax.io/v1",
|
||||
description="MiniMax OpenAI-compatible API endpoint.",
|
||||
)
|
||||
temperature: float = Field(
|
||||
default=1.0,
|
||||
gt=0.0,
|
||||
le=2.0,
|
||||
description="Sampling temperature. MiniMax requires > 0.",
|
||||
)
|
||||
|
||||
|
||||
OPENAI_REALTIME_MODELS = ["gpt-realtime-2"]
|
||||
OPENAI_REALTIME_VOICES = [
|
||||
"alloy",
|
||||
|
|
@ -494,6 +522,7 @@ LLMConfig = Annotated[
|
|||
DograhLLMService,
|
||||
AWSBedrockLLMConfiguration,
|
||||
SpeachesLLMConfiguration,
|
||||
MiniMaxLLMConfiguration,
|
||||
],
|
||||
Field(discriminator="provider"),
|
||||
]
|
||||
|
|
@ -783,6 +812,47 @@ class SpeachesTTSConfiguration(BaseTTSConfiguration):
|
|||
)
|
||||
|
||||
|
||||
MINIMAX_TTS_MODELS = ["speech-2.8-hd", "speech-2.8-turbo"]
|
||||
MINIMAX_TTS_VOICES = [
|
||||
"English_Graceful_Lady",
|
||||
"English_Insightful_Speaker",
|
||||
"English_radiant_girl",
|
||||
"English_Persuasive_Man",
|
||||
"English_Lucky_Robot",
|
||||
"English_expressive_narrator",
|
||||
]
|
||||
|
||||
|
||||
@register_tts
|
||||
class MiniMaxTTSConfiguration(BaseTTSConfiguration):
|
||||
provider: Literal[ServiceProviders.MINIMAX] = ServiceProviders.MINIMAX
|
||||
model: str = Field(
|
||||
default="speech-2.8-hd",
|
||||
description="MiniMax TTS model.",
|
||||
json_schema_extra={"examples": MINIMAX_TTS_MODELS},
|
||||
)
|
||||
voice: str = Field(
|
||||
default="English_Graceful_Lady",
|
||||
description="MiniMax voice ID.",
|
||||
json_schema_extra={"examples": MINIMAX_TTS_VOICES, "allow_custom_input": True},
|
||||
)
|
||||
base_url: str = Field(
|
||||
default="https://api.minimax.io/v1/t2a_v2",
|
||||
description=(
|
||||
"MiniMax TTS API endpoint (must include the /v1/t2a_v2 path). "
|
||||
"Defaults to the global endpoint; override with "
|
||||
"https://api.minimaxi.chat/v1/t2a_v2 (mainland China) or "
|
||||
"https://api-uw.minimax.io/v1/t2a_v2 (US-West)."
|
||||
),
|
||||
)
|
||||
speed: float = Field(
|
||||
default=1.0, ge=0.5, le=2.0, description="Speech speed (0.5 to 2.0)."
|
||||
)
|
||||
group_id: str = Field(
|
||||
description="MiniMax Group ID (found in your MiniMax dashboard under Account → Group).",
|
||||
)
|
||||
|
||||
|
||||
TTSConfig = Annotated[
|
||||
Union[
|
||||
DeepgramTTSConfiguration,
|
||||
|
|
@ -794,6 +864,7 @@ TTSConfig = Annotated[
|
|||
CambTTSConfiguration,
|
||||
RimeTTSConfiguration,
|
||||
SpeachesTTSConfiguration,
|
||||
MiniMaxTTSConfiguration,
|
||||
],
|
||||
Field(discriminator="provider"),
|
||||
]
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue