Merge branch 'main' of https://github.com/dograh-hq/dograh

2026-07-25 12:01:04 +02:00 · 2026-05-22 14:36:54 +05:30 · 2026-05-22 14:36:54 +05:30 · 291264de7b
commit 291264de7b
parent ad2fa07058 0e0d3136ca
16 changed files with 418 additions and 2 deletions
--- a/api/services/configuration/check_validity.py
+++ b/api/services/configuration/check_validity.py
@ -53,6 +53,7 @@ class UserConfigurationValidator:
            ServiceProviders.ASSEMBLYAI.value: self._check_assemblyai_api_key,
            ServiceProviders.GLADIA.value: self._check_gladia_api_key,
            ServiceProviders.RIME.value: self._check_rime_api_key,
+            ServiceProviders.MINIMAX.value: self._check_minimax_api_key,
        }

    async def validate(
@ -147,6 +148,19 @@ class UserConfigurationValidator:
                return [{"model": service_name, "message": str(e)}]
            return []

+        # MiniMax TTS requires a group_id alongside the API key.
+        # LLM configs don't expose group_id, so only check when the field exists.
+        if provider == ServiceProviders.MINIMAX.value and hasattr(
+            service_config, "group_id"
+        ):
+            if not getattr(service_config, "group_id", None):
+                return [
+                    {
+                        "model": service_name,
+                        "message": "group_id is required for MiniMax TTS",
+                    }
+                ]
+
        api_key = service_config.api_key

        try:
@ -253,3 +267,8 @@ class UserConfigurationValidator:

    def _check_rime_api_key(self, model: str, api_key: str) -> bool:
        return True
+
+    def _check_minimax_api_key(self, model: str, api_key: str) -> bool:
+        # MiniMax doesn't publish a cheap key-validation endpoint; trust the key
+        # at save time and surface auth errors at first call (same as Rime/Sarvam).
+        return True
--- a/api/services/configuration/registry.py
+++ b/api/services/configuration/registry.py
@ -57,6 +57,7 @@ class ServiceProviders(str, Enum):
    ASSEMBLYAI = "assemblyai"
    GLADIA = "gladia"
    RIME = "rime"
+    MINIMAX = "minimax"
    OPENAI_REALTIME = "openai_realtime"
    GOOGLE_REALTIME = "google_realtime"
    GOOGLE_VERTEX_REALTIME = "google_vertex_realtime"
@ -77,6 +78,7 @@ class BaseServiceConfiguration(BaseModel):
        ServiceProviders.ASSEMBLYAI,
        ServiceProviders.GLADIA,
        ServiceProviders.RIME,
+        ServiceProviders.MINIMAX,
        ServiceProviders.OPENAI_REALTIME,
        ServiceProviders.GOOGLE_REALTIME,
        ServiceProviders.GOOGLE_VERTEX_REALTIME,
@ -395,6 +397,32 @@ class SpeachesLLMConfiguration(BaseLLMConfiguration):
    )


+MINIMAX_MODELS = [
+    "MiniMax-M2.7",
+    "MiniMax-M2.7-highspeed",
+]
+
+
+@register_llm
+class MiniMaxLLMConfiguration(BaseLLMConfiguration):
+    provider: Literal[ServiceProviders.MINIMAX] = ServiceProviders.MINIMAX
+    model: str = Field(
+        default="MiniMax-M2.7",
+        description="MiniMax chat model.",
+        json_schema_extra={"examples": MINIMAX_MODELS, "allow_custom_input": True},
+    )
+    base_url: str = Field(
+        default="https://api.minimax.io/v1",
+        description="MiniMax OpenAI-compatible API endpoint.",
+    )
+    temperature: float = Field(
+        default=1.0,
+        gt=0.0,
+        le=2.0,
+        description="Sampling temperature. MiniMax requires > 0.",
+    )
+
+
 OPENAI_REALTIME_MODELS = ["gpt-realtime-2"]
 OPENAI_REALTIME_VOICES = [
    "alloy",
@ -533,6 +561,7 @@ LLMConfig = Annotated[
        DograhLLMService,
        AWSBedrockLLMConfiguration,
        SpeachesLLMConfiguration,
+        MiniMaxLLMConfiguration,
    ],
    Field(discriminator="provider"),
 ]
@ -822,6 +851,47 @@ class SpeachesTTSConfiguration(BaseTTSConfiguration):
    )


+MINIMAX_TTS_MODELS = ["speech-2.8-hd", "speech-2.8-turbo"]
+MINIMAX_TTS_VOICES = [
+    "English_Graceful_Lady",
+    "English_Insightful_Speaker",
+    "English_radiant_girl",
+    "English_Persuasive_Man",
+    "English_Lucky_Robot",
+    "English_expressive_narrator",
+]
+
+
+@register_tts
+class MiniMaxTTSConfiguration(BaseTTSConfiguration):
+    provider: Literal[ServiceProviders.MINIMAX] = ServiceProviders.MINIMAX
+    model: str = Field(
+        default="speech-2.8-hd",
+        description="MiniMax TTS model.",
+        json_schema_extra={"examples": MINIMAX_TTS_MODELS},
+    )
+    voice: str = Field(
+        default="English_Graceful_Lady",
+        description="MiniMax voice ID.",
+        json_schema_extra={"examples": MINIMAX_TTS_VOICES, "allow_custom_input": True},
+    )
+    base_url: str = Field(
+        default="https://api.minimax.io/v1/t2a_v2",
+        description=(
+            "MiniMax TTS API endpoint (must include the /v1/t2a_v2 path). "
+            "Defaults to the global endpoint; override with "
+            "https://api.minimaxi.chat/v1/t2a_v2 (mainland China) or "
+            "https://api-uw.minimax.io/v1/t2a_v2 (US-West)."
+        ),
+    )
+    speed: float = Field(
+        default=1.0, ge=0.5, le=2.0, description="Speech speed (0.5 to 2.0)."
+    )
+    group_id: str = Field(
+        description="MiniMax Group ID (found in your MiniMax dashboard under Account → Group).",
+    )
+
+
 TTSConfig = Annotated[
    Union[
        DeepgramTTSConfiguration,
@ -834,6 +904,7 @@ TTSConfig = Annotated[
        CambTTSConfiguration,
        RimeTTSConfiguration,
        SpeachesTTSConfiguration,
+        MiniMaxTTSConfiguration,
    ],
    Field(discriminator="provider"),
 ]
--- a/api/services/pipecat/minimax_tts.py
+++ b/api/services/pipecat/minimax_tts.py
@ -0,0 +1,23 @@
+"""MiniMax TTS wrapper that closes its aiohttp session in cleanup().
+
+Pipecat's MiniMaxHttpTTSService leaves session disposal to the caller. Our
+factory creates a fresh session per service instance, so we own its close
+here to avoid leaking sockets/FDs on shutdown.
+"""
+
+import aiohttp
+
+from pipecat.services.minimax.tts import MiniMaxHttpTTSService
+
+
+class MiniMaxOwnedSessionTTSService(MiniMaxHttpTTSService):
+    """MiniMaxHttpTTSService variant that owns its aiohttp session lifecycle."""
+
+    def __init__(self, *args, aiohttp_session: aiohttp.ClientSession, **kwargs):
+        super().__init__(*args, aiohttp_session=aiohttp_session, **kwargs)
+        self._owned_session = aiohttp_session
+
+    async def cleanup(self):
+        await super().cleanup()
+        if not self._owned_session.closed:
+            await self._owned_session.close()
--- a/api/services/pipecat/service_factory.py
+++ b/api/services/pipecat/service_factory.py
@ -1,10 +1,12 @@
 from typing import TYPE_CHECKING

+import aiohttp
 from fastapi import HTTPException
 from loguru import logger

 from api.constants import MPS_API_URL
 from api.services.configuration.registry import ServiceProviders
+from api.services.pipecat.minimax_tts import MiniMaxOwnedSessionTTSService
 from pipecat.services.assemblyai.stt import AssemblyAISTTService, AssemblyAISTTSettings
 from pipecat.services.aws.llm import AWSBedrockLLMService, AWSBedrockLLMSettings
 from pipecat.services.azure.llm import AzureLLMService, AzureLLMSettings
@ -38,6 +40,8 @@ from pipecat.services.openai.stt import (
 from pipecat.services.openai.tts import OpenAITTSService, OpenAITTSSettings
 from pipecat.services.openrouter.llm import OpenRouterLLMService, OpenRouterLLMSettings
 from pipecat.services.rime.tts import RimeTTSService, RimeTTSSettings
+from pipecat.services.minimax.llm import MiniMaxLLMService
+from pipecat.services.minimax.tts import MiniMaxHttpTTSService, MiniMaxTTSSettings
 from pipecat.services.sarvam.stt import SarvamSTTService, SarvamSTTSettings
 from pipecat.services.sarvam.tts import SarvamTTSService, SarvamTTSSettings
 from pipecat.services.speaches.llm import SpeachesLLMService, SpeachesLLMSettings
@ -435,6 +439,40 @@ def create_tts_service(user_config, audio_config: "AudioConfig"):
            skip_aggregator_types=["recording_router", "recording"],
            silence_time_s=1.0,
        )
+    elif user_config.tts.provider == ServiceProviders.MINIMAX.value:
+        group_id = getattr(user_config.tts, "group_id", None)
+        if not group_id:
+            raise HTTPException(
+                status_code=400,
+                detail="MiniMax TTS requires a group_id. Configure it in your TTS settings.",
+            )
+        voice = getattr(user_config.tts, "voice", None) or "English_Graceful_Lady"
+        speed = getattr(user_config.tts, "speed", None) or 1.0
+
+        # Pipecat appends "?GroupId=..." to base_url as-is, so /t2a_v2 must
+        # already be in the path.
+        base_url = (
+            getattr(user_config.tts, "base_url", None)
+            or "https://api.minimax.io/v1/t2a_v2"
+        ).rstrip("/")
+        if not base_url.endswith("/t2a_v2"):
+            base_url = f"{base_url}/t2a_v2"
+
+        session = aiohttp.ClientSession()
+        return MiniMaxOwnedSessionTTSService(
+            api_key=user_config.tts.api_key,
+            group_id=group_id,
+            base_url=base_url,
+            aiohttp_session=session,
+            settings=MiniMaxTTSSettings(
+                model=user_config.tts.model,
+                voice=voice,
+                speed=speed,
+            ),
+            text_filters=[xml_function_tag_filter],
+            skip_aggregator_types=["recording_router", "recording"],
+            silence_time_s=1.0,
+        )
    else:
        raise HTTPException(
            status_code=400, detail=f"Invalid TTS provider {user_config.tts.provider}"
@ -451,6 +489,7 @@ def create_llm_service_from_provider(
    aws_access_key: str | None = None,
    aws_secret_key: str | None = None,
    aws_region: str | None = None,
+    temperature: float | None = None,
 ):
    """Create an LLM service from explicit provider/model/api_key.

@ -514,6 +553,15 @@ def create_llm_service_from_provider(
            api_key=api_key or "none",
            settings=SpeachesLLMSettings(model=model),
        )
+    elif provider == ServiceProviders.MINIMAX.value:
+        return MiniMaxLLMService(
+            api_key=api_key,
+            base_url=base_url or "https://api.minimax.io/v1",
+            settings=MiniMaxLLMService.Settings(
+                model=model,
+                temperature=temperature if temperature is not None else 1.0,
+            ),
+        )
    else:
        raise HTTPException(status_code=400, detail=f"Invalid LLM provider {provider}")

@ -624,5 +672,8 @@ def create_llm_service(user_config):
        kwargs["aws_access_key"] = user_config.llm.aws_access_key
        kwargs["aws_secret_key"] = user_config.llm.aws_secret_key
        kwargs["aws_region"] = user_config.llm.aws_region
+    elif provider == ServiceProviders.MINIMAX.value:
+        kwargs["base_url"] = user_config.llm.base_url
+        kwargs["temperature"] = user_config.llm.temperature

    return create_llm_service_from_provider(provider, model, api_key, **kwargs)
--- a/api/tests/test_minimax_service_factory.py
+++ b/api/tests/test_minimax_service_factory.py
@ -0,0 +1,126 @@
+from types import SimpleNamespace
+from unittest.mock import patch
+
+from pipecat.services.minimax.llm import MiniMaxLLMService as RealMiniMaxLLMService
+
+from api.services.configuration.registry import (
+    MiniMaxLLMConfiguration,
+    MiniMaxTTSConfiguration,
+    ServiceProviders,
+)
+from api.services.pipecat.service_factory import (
+    create_llm_service_from_provider,
+    create_tts_service,
+)
+
+
+class TestMiniMaxLLMConfiguration:
+    def test_default_values(self):
+        config = MiniMaxLLMConfiguration(api_key="test-key")
+        assert config.provider == ServiceProviders.MINIMAX
+        assert config.model == "MiniMax-M2.7"
+        assert config.base_url == "https://api.minimax.io/v1"
+
+    def test_custom_model(self):
+        config = MiniMaxLLMConfiguration(
+            api_key="test-key", model="MiniMax-M2.7-highspeed"
+        )
+        assert config.model == "MiniMax-M2.7-highspeed"
+
+    def test_custom_base_url(self):
+        config = MiniMaxLLMConfiguration(
+            api_key="test-key", base_url="https://api.minimaxi.com/v1"
+        )
+        assert config.base_url == "https://api.minimaxi.com/v1"
+
+
+class TestMiniMaxTTSConfiguration:
+    def test_default_values(self):
+        config = MiniMaxTTSConfiguration(api_key="test-key", group_id="test-group")
+        assert config.provider == ServiceProviders.MINIMAX
+        assert config.model == "speech-2.8-hd"
+        assert config.voice == "English_Graceful_Lady"
+        assert config.speed == 1.0
+        assert config.group_id == "test-group"
+
+
+class TestMiniMaxLLMServiceFactory:
+    def test_create_minimax_llm_service_uses_openai_compatible(self):
+        with patch(
+            "api.services.pipecat.service_factory.MiniMaxLLMService"
+        ) as mock_service:
+            mock_service.Settings = RealMiniMaxLLMService.Settings
+            create_llm_service_from_provider(
+                provider=ServiceProviders.MINIMAX.value,
+                model="MiniMax-M2.7",
+                api_key="test-key",
+            )
+
+        assert mock_service.call_count == 1
+        kwargs = mock_service.call_args.kwargs
+        assert kwargs["api_key"] == "test-key"
+        assert kwargs["base_url"] == "https://api.minimax.io/v1"
+        assert kwargs["settings"].model == "MiniMax-M2.7"
+        assert kwargs["settings"].temperature == 1.0
+
+    def test_create_minimax_llm_service_custom_base_url(self):
+        with patch(
+            "api.services.pipecat.service_factory.MiniMaxLLMService"
+        ) as mock_service:
+            mock_service.Settings = RealMiniMaxLLMService.Settings
+            create_llm_service_from_provider(
+                provider=ServiceProviders.MINIMAX.value,
+                model="MiniMax-M2.7-highspeed",
+                api_key="test-key",
+                base_url="https://api.minimaxi.com/v1",
+            )
+
+        kwargs = mock_service.call_args.kwargs
+        assert kwargs["base_url"] == "https://api.minimaxi.com/v1"
+        assert kwargs["settings"].model == "MiniMax-M2.7-highspeed"
+
+    def test_create_minimax_llm_service_passes_user_temperature(self):
+        with patch(
+            "api.services.pipecat.service_factory.MiniMaxLLMService"
+        ) as mock_service:
+            mock_service.Settings = RealMiniMaxLLMService.Settings
+            create_llm_service_from_provider(
+                provider=ServiceProviders.MINIMAX.value,
+                model="MiniMax-M2.7",
+                api_key="test-key",
+                temperature=0.3,
+            )
+        kwargs = mock_service.call_args.kwargs
+        assert kwargs["settings"].temperature == 0.3
+
+
+class TestMiniMaxTTSServiceFactory:
+    def test_create_minimax_tts_service(self):
+        user_config = SimpleNamespace(
+            tts=SimpleNamespace(
+                provider=ServiceProviders.MINIMAX.value,
+                api_key="test-key",
+                model="speech-2.8-hd",
+                voice="English_Graceful_Lady",
+                speed=1.0,
+                base_url="https://api.minimax.io/v1",
+                group_id="test-group",
+            )
+        )
+        audio_config = SimpleNamespace(transport_in_sample_rate=16000)
+
+        with patch(
+            "api.services.pipecat.service_factory.aiohttp.ClientSession"
+        ), patch(
+            "api.services.pipecat.service_factory.MiniMaxOwnedSessionTTSService"
+        ) as mock_service:
+            create_tts_service(user_config, audio_config)
+
+        assert mock_service.call_count == 1
+        kwargs = mock_service.call_args.kwargs
+        assert kwargs["api_key"] == "test-key"
+        assert kwargs["group_id"] == "test-group"
+        assert kwargs["settings"].model == "speech-2.8-hd"
+        assert kwargs["settings"].voice == "English_Graceful_Lady"
+        assert kwargs["settings"].speed == 1.0
+        assert kwargs["aiohttp_session"] is not None
--- a/docs/docs.json
+++ b/docs/docs.json
@ -115,7 +115,8 @@
            "group": "Integrations",
            "tag": "NEW",
            "pages": [
-              "integrations/mcp"
+              "integrations/mcp",
+              "integrations/tuner"
            ]
          }
        ]
--- a/docs/images/tuner-agent-settings.png
+++ b/docs/images/tuner-agent-settings.png
--- a/docs/images/tuner-create-agent.png
+++ b/docs/images/tuner-create-agent.png
--- a/docs/images/tuner-dograh-workflow-builder.png
+++ b/docs/images/tuner-dograh-workflow-builder.png
--- a/docs/images/tuner-edit-modal.png
+++ b/docs/images/tuner-edit-modal.png
--- a/docs/images/tuner-integrations-panel.png
+++ b/docs/images/tuner-integrations-panel.png
--- a/docs/images/tuner-node-configured.png
+++ b/docs/images/tuner-node-configured.png
--- a/docs/images/tuner-node-not-configured.png
+++ b/docs/images/tuner-node-not-configured.png
--- a/docs/images/tuner-workspace-settings.png
+++ b/docs/images/tuner-workspace-settings.png
--- a/docs/integrations/tuner.mdx
+++ b/docs/integrations/tuner.mdx
@ -0,0 +1,125 @@
+---
+title: "Tuner Integration"
+description: "Connect Dograh to Tuner — the observability, simulation, and testing layer for voice"
+---
+
+<iframe
+  width="100%"
+  height="400"
+  src="https://www.youtube.com/embed/Zxse1yLorbk"
+  title="Tuner Integration Walkthrough"
+  frameBorder="0"
+  allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture"
+  allowFullScreen
+/>
+
+## Overview
+
+The Tuner integration node automatically sends your completed call data (transcript, metadata, and call outcomes) to Tuner after each call finishes. This lets you monitor agent performance, run evaluations, and track quality trends without any custom code.
+
+## Prerequisites
+
+- A [Tuner account](https://app.usetuner.ai) with an active workspace
+- A Dograh voice agent workflow
+
+## Setup
+
+### 1. Create an agent in Tuner
+
+Log in to [Tuner](https://app.usetuner.ai) and create a new agent. When configuring the agent, set the **Provider** to **Custom API** — this is required for the Dograh integration.
+
+<img
+  src="/images/tuner-create-agent.png"
+  alt="Create a New Agent modal in Tuner — set Provider to Custom API"
+/>
+
+
+### 2. Gather your Tuner credentials
+
+You'll need three values from your Tuner account:
+
+| Credential | Where to find it |
+|---|---|
+| **Agent ID** | Agent Settings → Agent Remote ID |
+| **Workspace ID** | Workspace Settings → General Settings → Workspace ID |
+| **API Key** | Workspace Settings → Tuner API Key |
+
+**Agent Remote ID** — open Agent Settings for your agent:
+
+<img
+  src="/images/tuner-agent-settings.png"
+  alt="Tuner Agent Settings showing Agent Remote ID"
+/>
+
+**Workspace ID and API Key** — open Workspace Settings:
+
+<img
+  src="/images/tuner-workspace-settings.png"
+  alt="Tuner Workspace Settings showing Workspace ID and API Key"
+/>
+
+### 3. Add the Tuner node to your workflow
+
+In your Dograh workflow editor, click **Add node** and scroll to the **Integrations** section. Select **Tuner**.
+
+<img
+  src="/images/tuner-dograh-workflow-builder.png"
+  alt="Dograh voice workflow builder — click Add node"
+/>
+
+Scroll to **Integrations** and select **Tuner**:
+
+<img
+  src="/images/tuner-integrations-panel.png"
+  alt="Dograh Integrations panel showing QA Analysis, Tuner, and Webhook"
+/>
+
+The node appears on your canvas with a **Not configured** badge:
+
+<img
+  src="/images/tuner-node-not-configured.png"
+  alt="Tuner node on the canvas showing Not configured"
+/>
+
+### 4. Configure the node
+
+Click on the Tuner node and fill in the following fields:
+
+- **Tuner Agent ID** — The Agent Remote ID from Tuner
+- **Tuner Workspace ID** — Your numeric workspace ID
+- **Tuner API Key** — Your workspace API key
+- **Enabled** — Toggle on to activate the export
+
+<img
+  src="/images/tuner-edit-modal.png"
+  alt="Dograh Edit Tuner modal with credential fields and Enabled toggle"
+/>
+
+Click **Save**, then **Publish** your workflow.
+
+<img
+  src="/images/tuner-node-configured.png"
+  alt="Tuner node on the canvas showing configured and enabled"
+/>
+
+### 5. Verify the connection
+
+Make a test call through your agent. Once the call completes, check the **Call Logs** tab in your Tuner agent dashboard. The call should appear within a few moments.
+
+## Disabling the integration
+
+To temporarily stop exporting calls to Tuner, open the Tuner node configuration and toggle **Enabled** off. Your credentials are preserved — toggle it back on anytime to resume.
+
+## Troubleshooting
+
+| Issue | Solution |
+|---|---|
+| Calls not appearing in Tuner | Verify all three credentials are correct with no extra whitespace |
+| Node shows "Not configured" | Open the node and fill in Agent ID, Workspace ID, and API Key |
+| Workflow not sending data | Make sure the workflow is published, not just saved as a draft |
+| Wrong agent in Tuner | Confirm the Tuner agent's Provider is set to **Custom API** |
+
+## Learn more
+
+- [Tuner](https://usetuner.ai) — The observability, simulation, and testing layer for voice
+- [Tuner documentation](https://docs.usetuner.ai) — Complete Tuner platform docs
--- a/2
+++ b/2
@ -1 +1 @@
-Subproject commit d1e23ca521f5412a9dc09430ada730500e15a7ab
+Subproject commit c771a50ed36c49002b4bf4e5cb66cf1e4b73c97d