This commit is contained in:
Abhishek Kumar 2026-05-22 14:36:54 +05:30
commit 291264de7b
16 changed files with 418 additions and 2 deletions

View file

@ -53,6 +53,7 @@ class UserConfigurationValidator:
ServiceProviders.ASSEMBLYAI.value: self._check_assemblyai_api_key,
ServiceProviders.GLADIA.value: self._check_gladia_api_key,
ServiceProviders.RIME.value: self._check_rime_api_key,
ServiceProviders.MINIMAX.value: self._check_minimax_api_key,
}
async def validate(
@ -147,6 +148,19 @@ class UserConfigurationValidator:
return [{"model": service_name, "message": str(e)}]
return []
# MiniMax TTS requires a group_id alongside the API key.
# LLM configs don't expose group_id, so only check when the field exists.
if provider == ServiceProviders.MINIMAX.value and hasattr(
service_config, "group_id"
):
if not getattr(service_config, "group_id", None):
return [
{
"model": service_name,
"message": "group_id is required for MiniMax TTS",
}
]
api_key = service_config.api_key
try:
@ -253,3 +267,8 @@ class UserConfigurationValidator:
def _check_rime_api_key(self, model: str, api_key: str) -> bool:
return True
def _check_minimax_api_key(self, model: str, api_key: str) -> bool:
# MiniMax doesn't publish a cheap key-validation endpoint; trust the key
# at save time and surface auth errors at first call (same as Rime/Sarvam).
return True

View file

@ -57,6 +57,7 @@ class ServiceProviders(str, Enum):
ASSEMBLYAI = "assemblyai"
GLADIA = "gladia"
RIME = "rime"
MINIMAX = "minimax"
OPENAI_REALTIME = "openai_realtime"
GOOGLE_REALTIME = "google_realtime"
GOOGLE_VERTEX_REALTIME = "google_vertex_realtime"
@ -77,6 +78,7 @@ class BaseServiceConfiguration(BaseModel):
ServiceProviders.ASSEMBLYAI,
ServiceProviders.GLADIA,
ServiceProviders.RIME,
ServiceProviders.MINIMAX,
ServiceProviders.OPENAI_REALTIME,
ServiceProviders.GOOGLE_REALTIME,
ServiceProviders.GOOGLE_VERTEX_REALTIME,
@ -395,6 +397,32 @@ class SpeachesLLMConfiguration(BaseLLMConfiguration):
)
MINIMAX_MODELS = [
"MiniMax-M2.7",
"MiniMax-M2.7-highspeed",
]
@register_llm
class MiniMaxLLMConfiguration(BaseLLMConfiguration):
provider: Literal[ServiceProviders.MINIMAX] = ServiceProviders.MINIMAX
model: str = Field(
default="MiniMax-M2.7",
description="MiniMax chat model.",
json_schema_extra={"examples": MINIMAX_MODELS, "allow_custom_input": True},
)
base_url: str = Field(
default="https://api.minimax.io/v1",
description="MiniMax OpenAI-compatible API endpoint.",
)
temperature: float = Field(
default=1.0,
gt=0.0,
le=2.0,
description="Sampling temperature. MiniMax requires > 0.",
)
OPENAI_REALTIME_MODELS = ["gpt-realtime-2"]
OPENAI_REALTIME_VOICES = [
"alloy",
@ -533,6 +561,7 @@ LLMConfig = Annotated[
DograhLLMService,
AWSBedrockLLMConfiguration,
SpeachesLLMConfiguration,
MiniMaxLLMConfiguration,
],
Field(discriminator="provider"),
]
@ -822,6 +851,47 @@ class SpeachesTTSConfiguration(BaseTTSConfiguration):
)
MINIMAX_TTS_MODELS = ["speech-2.8-hd", "speech-2.8-turbo"]
MINIMAX_TTS_VOICES = [
"English_Graceful_Lady",
"English_Insightful_Speaker",
"English_radiant_girl",
"English_Persuasive_Man",
"English_Lucky_Robot",
"English_expressive_narrator",
]
@register_tts
class MiniMaxTTSConfiguration(BaseTTSConfiguration):
provider: Literal[ServiceProviders.MINIMAX] = ServiceProviders.MINIMAX
model: str = Field(
default="speech-2.8-hd",
description="MiniMax TTS model.",
json_schema_extra={"examples": MINIMAX_TTS_MODELS},
)
voice: str = Field(
default="English_Graceful_Lady",
description="MiniMax voice ID.",
json_schema_extra={"examples": MINIMAX_TTS_VOICES, "allow_custom_input": True},
)
base_url: str = Field(
default="https://api.minimax.io/v1/t2a_v2",
description=(
"MiniMax TTS API endpoint (must include the /v1/t2a_v2 path). "
"Defaults to the global endpoint; override with "
"https://api.minimaxi.chat/v1/t2a_v2 (mainland China) or "
"https://api-uw.minimax.io/v1/t2a_v2 (US-West)."
),
)
speed: float = Field(
default=1.0, ge=0.5, le=2.0, description="Speech speed (0.5 to 2.0)."
)
group_id: str = Field(
description="MiniMax Group ID (found in your MiniMax dashboard under Account → Group).",
)
TTSConfig = Annotated[
Union[
DeepgramTTSConfiguration,
@ -834,6 +904,7 @@ TTSConfig = Annotated[
CambTTSConfiguration,
RimeTTSConfiguration,
SpeachesTTSConfiguration,
MiniMaxTTSConfiguration,
],
Field(discriminator="provider"),
]

View file

@ -0,0 +1,23 @@
"""MiniMax TTS wrapper that closes its aiohttp session in cleanup().
Pipecat's MiniMaxHttpTTSService leaves session disposal to the caller. Our
factory creates a fresh session per service instance, so we own its close
here to avoid leaking sockets/FDs on shutdown.
"""
import aiohttp
from pipecat.services.minimax.tts import MiniMaxHttpTTSService
class MiniMaxOwnedSessionTTSService(MiniMaxHttpTTSService):
"""MiniMaxHttpTTSService variant that owns its aiohttp session lifecycle."""
def __init__(self, *args, aiohttp_session: aiohttp.ClientSession, **kwargs):
super().__init__(*args, aiohttp_session=aiohttp_session, **kwargs)
self._owned_session = aiohttp_session
async def cleanup(self):
await super().cleanup()
if not self._owned_session.closed:
await self._owned_session.close()

View file

@ -1,10 +1,12 @@
from typing import TYPE_CHECKING
import aiohttp
from fastapi import HTTPException
from loguru import logger
from api.constants import MPS_API_URL
from api.services.configuration.registry import ServiceProviders
from api.services.pipecat.minimax_tts import MiniMaxOwnedSessionTTSService
from pipecat.services.assemblyai.stt import AssemblyAISTTService, AssemblyAISTTSettings
from pipecat.services.aws.llm import AWSBedrockLLMService, AWSBedrockLLMSettings
from pipecat.services.azure.llm import AzureLLMService, AzureLLMSettings
@ -38,6 +40,8 @@ from pipecat.services.openai.stt import (
from pipecat.services.openai.tts import OpenAITTSService, OpenAITTSSettings
from pipecat.services.openrouter.llm import OpenRouterLLMService, OpenRouterLLMSettings
from pipecat.services.rime.tts import RimeTTSService, RimeTTSSettings
from pipecat.services.minimax.llm import MiniMaxLLMService
from pipecat.services.minimax.tts import MiniMaxHttpTTSService, MiniMaxTTSSettings
from pipecat.services.sarvam.stt import SarvamSTTService, SarvamSTTSettings
from pipecat.services.sarvam.tts import SarvamTTSService, SarvamTTSSettings
from pipecat.services.speaches.llm import SpeachesLLMService, SpeachesLLMSettings
@ -435,6 +439,40 @@ def create_tts_service(user_config, audio_config: "AudioConfig"):
skip_aggregator_types=["recording_router", "recording"],
silence_time_s=1.0,
)
elif user_config.tts.provider == ServiceProviders.MINIMAX.value:
group_id = getattr(user_config.tts, "group_id", None)
if not group_id:
raise HTTPException(
status_code=400,
detail="MiniMax TTS requires a group_id. Configure it in your TTS settings.",
)
voice = getattr(user_config.tts, "voice", None) or "English_Graceful_Lady"
speed = getattr(user_config.tts, "speed", None) or 1.0
# Pipecat appends "?GroupId=..." to base_url as-is, so /t2a_v2 must
# already be in the path.
base_url = (
getattr(user_config.tts, "base_url", None)
or "https://api.minimax.io/v1/t2a_v2"
).rstrip("/")
if not base_url.endswith("/t2a_v2"):
base_url = f"{base_url}/t2a_v2"
session = aiohttp.ClientSession()
return MiniMaxOwnedSessionTTSService(
api_key=user_config.tts.api_key,
group_id=group_id,
base_url=base_url,
aiohttp_session=session,
settings=MiniMaxTTSSettings(
model=user_config.tts.model,
voice=voice,
speed=speed,
),
text_filters=[xml_function_tag_filter],
skip_aggregator_types=["recording_router", "recording"],
silence_time_s=1.0,
)
else:
raise HTTPException(
status_code=400, detail=f"Invalid TTS provider {user_config.tts.provider}"
@ -451,6 +489,7 @@ def create_llm_service_from_provider(
aws_access_key: str | None = None,
aws_secret_key: str | None = None,
aws_region: str | None = None,
temperature: float | None = None,
):
"""Create an LLM service from explicit provider/model/api_key.
@ -514,6 +553,15 @@ def create_llm_service_from_provider(
api_key=api_key or "none",
settings=SpeachesLLMSettings(model=model),
)
elif provider == ServiceProviders.MINIMAX.value:
return MiniMaxLLMService(
api_key=api_key,
base_url=base_url or "https://api.minimax.io/v1",
settings=MiniMaxLLMService.Settings(
model=model,
temperature=temperature if temperature is not None else 1.0,
),
)
else:
raise HTTPException(status_code=400, detail=f"Invalid LLM provider {provider}")
@ -624,5 +672,8 @@ def create_llm_service(user_config):
kwargs["aws_access_key"] = user_config.llm.aws_access_key
kwargs["aws_secret_key"] = user_config.llm.aws_secret_key
kwargs["aws_region"] = user_config.llm.aws_region
elif provider == ServiceProviders.MINIMAX.value:
kwargs["base_url"] = user_config.llm.base_url
kwargs["temperature"] = user_config.llm.temperature
return create_llm_service_from_provider(provider, model, api_key, **kwargs)

View file

@ -0,0 +1,126 @@
from types import SimpleNamespace
from unittest.mock import patch
from pipecat.services.minimax.llm import MiniMaxLLMService as RealMiniMaxLLMService
from api.services.configuration.registry import (
MiniMaxLLMConfiguration,
MiniMaxTTSConfiguration,
ServiceProviders,
)
from api.services.pipecat.service_factory import (
create_llm_service_from_provider,
create_tts_service,
)
class TestMiniMaxLLMConfiguration:
def test_default_values(self):
config = MiniMaxLLMConfiguration(api_key="test-key")
assert config.provider == ServiceProviders.MINIMAX
assert config.model == "MiniMax-M2.7"
assert config.base_url == "https://api.minimax.io/v1"
def test_custom_model(self):
config = MiniMaxLLMConfiguration(
api_key="test-key", model="MiniMax-M2.7-highspeed"
)
assert config.model == "MiniMax-M2.7-highspeed"
def test_custom_base_url(self):
config = MiniMaxLLMConfiguration(
api_key="test-key", base_url="https://api.minimaxi.com/v1"
)
assert config.base_url == "https://api.minimaxi.com/v1"
class TestMiniMaxTTSConfiguration:
def test_default_values(self):
config = MiniMaxTTSConfiguration(api_key="test-key", group_id="test-group")
assert config.provider == ServiceProviders.MINIMAX
assert config.model == "speech-2.8-hd"
assert config.voice == "English_Graceful_Lady"
assert config.speed == 1.0
assert config.group_id == "test-group"
class TestMiniMaxLLMServiceFactory:
def test_create_minimax_llm_service_uses_openai_compatible(self):
with patch(
"api.services.pipecat.service_factory.MiniMaxLLMService"
) as mock_service:
mock_service.Settings = RealMiniMaxLLMService.Settings
create_llm_service_from_provider(
provider=ServiceProviders.MINIMAX.value,
model="MiniMax-M2.7",
api_key="test-key",
)
assert mock_service.call_count == 1
kwargs = mock_service.call_args.kwargs
assert kwargs["api_key"] == "test-key"
assert kwargs["base_url"] == "https://api.minimax.io/v1"
assert kwargs["settings"].model == "MiniMax-M2.7"
assert kwargs["settings"].temperature == 1.0
def test_create_minimax_llm_service_custom_base_url(self):
with patch(
"api.services.pipecat.service_factory.MiniMaxLLMService"
) as mock_service:
mock_service.Settings = RealMiniMaxLLMService.Settings
create_llm_service_from_provider(
provider=ServiceProviders.MINIMAX.value,
model="MiniMax-M2.7-highspeed",
api_key="test-key",
base_url="https://api.minimaxi.com/v1",
)
kwargs = mock_service.call_args.kwargs
assert kwargs["base_url"] == "https://api.minimaxi.com/v1"
assert kwargs["settings"].model == "MiniMax-M2.7-highspeed"
def test_create_minimax_llm_service_passes_user_temperature(self):
with patch(
"api.services.pipecat.service_factory.MiniMaxLLMService"
) as mock_service:
mock_service.Settings = RealMiniMaxLLMService.Settings
create_llm_service_from_provider(
provider=ServiceProviders.MINIMAX.value,
model="MiniMax-M2.7",
api_key="test-key",
temperature=0.3,
)
kwargs = mock_service.call_args.kwargs
assert kwargs["settings"].temperature == 0.3
class TestMiniMaxTTSServiceFactory:
def test_create_minimax_tts_service(self):
user_config = SimpleNamespace(
tts=SimpleNamespace(
provider=ServiceProviders.MINIMAX.value,
api_key="test-key",
model="speech-2.8-hd",
voice="English_Graceful_Lady",
speed=1.0,
base_url="https://api.minimax.io/v1",
group_id="test-group",
)
)
audio_config = SimpleNamespace(transport_in_sample_rate=16000)
with patch(
"api.services.pipecat.service_factory.aiohttp.ClientSession"
), patch(
"api.services.pipecat.service_factory.MiniMaxOwnedSessionTTSService"
) as mock_service:
create_tts_service(user_config, audio_config)
assert mock_service.call_count == 1
kwargs = mock_service.call_args.kwargs
assert kwargs["api_key"] == "test-key"
assert kwargs["group_id"] == "test-group"
assert kwargs["settings"].model == "speech-2.8-hd"
assert kwargs["settings"].voice == "English_Graceful_Lady"
assert kwargs["settings"].speed == 1.0
assert kwargs["aiohttp_session"] is not None

View file

@ -115,7 +115,8 @@
"group": "Integrations",
"tag": "NEW",
"pages": [
"integrations/mcp"
"integrations/mcp",
"integrations/tuner"
]
}
]

Binary file not shown.

After

Width:  |  Height:  |  Size: 48 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 58 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 81 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 33 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 76 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 43 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 32 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 51 KiB

125
docs/integrations/tuner.mdx Normal file
View file

@ -0,0 +1,125 @@
---
title: "Tuner Integration"
description: "Connect Dograh to Tuner — the observability, simulation, and testing layer for voice"
---
<iframe
width="100%"
height="400"
src="https://www.youtube.com/embed/Zxse1yLorbk"
title="Tuner Integration Walkthrough"
frameBorder="0"
allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture"
allowFullScreen
/>
## Overview
The Tuner integration node automatically sends your completed call data (transcript, metadata, and call outcomes) to Tuner after each call finishes. This lets you monitor agent performance, run evaluations, and track quality trends without any custom code.
## Prerequisites
- A [Tuner account](https://app.usetuner.ai) with an active workspace
- A Dograh voice agent workflow
## Setup
### 1. Create an agent in Tuner
Log in to [Tuner](https://app.usetuner.ai) and create a new agent. When configuring the agent, set the **Provider** to **Custom API** — this is required for the Dograh integration.
<img
src="/images/tuner-create-agent.png"
alt="Create a New Agent modal in Tuner — set Provider to Custom API"
/>
### 2. Gather your Tuner credentials
You'll need three values from your Tuner account:
| Credential | Where to find it |
|---|---|
| **Agent ID** | Agent Settings → Agent Remote ID |
| **Workspace ID** | Workspace Settings → General Settings → Workspace ID |
| **API Key** | Workspace Settings → Tuner API Key |
**Agent Remote ID** — open Agent Settings for your agent:
<img
src="/images/tuner-agent-settings.png"
alt="Tuner Agent Settings showing Agent Remote ID"
/>
**Workspace ID and API Key** — open Workspace Settings:
<img
src="/images/tuner-workspace-settings.png"
alt="Tuner Workspace Settings showing Workspace ID and API Key"
/>
### 3. Add the Tuner node to your workflow
In your Dograh workflow editor, click **Add node** and scroll to the **Integrations** section. Select **Tuner**.
<img
src="/images/tuner-dograh-workflow-builder.png"
alt="Dograh voice workflow builder — click Add node"
/>
Scroll to **Integrations** and select **Tuner**:
<img
src="/images/tuner-integrations-panel.png"
alt="Dograh Integrations panel showing QA Analysis, Tuner, and Webhook"
/>
The node appears on your canvas with a **Not configured** badge:
<img
src="/images/tuner-node-not-configured.png"
alt="Tuner node on the canvas showing Not configured"
/>
### 4. Configure the node
Click on the Tuner node and fill in the following fields:
- **Tuner Agent ID** — The Agent Remote ID from Tuner
- **Tuner Workspace ID** — Your numeric workspace ID
- **Tuner API Key** — Your workspace API key
- **Enabled** — Toggle on to activate the export
<img
src="/images/tuner-edit-modal.png"
alt="Dograh Edit Tuner modal with credential fields and Enabled toggle"
/>
Click **Save**, then **Publish** your workflow.
<img
src="/images/tuner-node-configured.png"
alt="Tuner node on the canvas showing configured and enabled"
/>
### 5. Verify the connection
Make a test call through your agent. Once the call completes, check the **Call Logs** tab in your Tuner agent dashboard. The call should appear within a few moments.
## Disabling the integration
To temporarily stop exporting calls to Tuner, open the Tuner node configuration and toggle **Enabled** off. Your credentials are preserved — toggle it back on anytime to resume.
## Troubleshooting
| Issue | Solution |
|---|---|
| Calls not appearing in Tuner | Verify all three credentials are correct with no extra whitespace |
| Node shows "Not configured" | Open the node and fill in Agent ID, Workspace ID, and API Key |
| Workflow not sending data | Make sure the workflow is published, not just saved as a draft |
| Wrong agent in Tuner | Confirm the Tuner agent's Provider is set to **Custom API** |
## Learn more
- [Tuner](https://usetuner.ai) — The observability, simulation, and testing layer for voice
- [Tuner documentation](https://docs.usetuner.ai) — Complete Tuner platform docs

@ -1 +1 @@
Subproject commit d1e23ca521f5412a9dc09430ada730500e15a7ab
Subproject commit c771a50ed36c49002b4bf4e5cb66cf1e4b73c97d