feat: add CAMB AI TTS integration (#187)

Co-authored-by: Abhishek <abhishek@a6k.me>
2026-07-22 11:51:04 +02:00 · 2026-03-24 15:24:07 +08:00 · 2026-03-24 15:24:07 +08:00 · 31e075d114
commit 31e075d114
parent 330e4a05f2
7 changed files with 244 additions and 4 deletions
--- a/.gitignore
+++ b/.gitignore
@ -15,4 +15,4 @@ venv/
 .venv/
 .playwright-mcp
 coturn/
-dograh_pcm_cache/
+*.wav
--- a/api/Dockerfile
+++ b/api/Dockerfile
@ -25,11 +25,10 @@ RUN pip install --user --no-cache-dir -r requirements.txt && \

 # Copy and install pipecat from local submodule
 COPY pipecat /tmp/pipecat
-RUN pip install --user --no-cache-dir '/tmp/pipecat[cartesia,deepgram,openai,elevenlabs,groq,google,azure,sarvam,soundfile,silero,webrtc,local-smart-turn-v3,speechmatics,openrouter]' && \
+RUN pip install --user --no-cache-dir '/tmp/pipecat[cartesia,deepgram,openai,elevenlabs,groq,google,azure,sarvam,soundfile,silero,webrtc,local-smart-turn-v3,speechmatics,openrouter,camb]' && \
    # Clean up pip cache and temporary pipecat directory
    rm -rf /root/.cache/pip /tmp/pipecat

-    
 # Remove unnecessary Python cache files from installed packages
 RUN find /root/.local -type f -name '*.pyc' -delete && \
    find /root/.local -type d -name '__pycache__' -delete && \
--- a/api/services/configuration/check_validity.py
+++ b/api/services/configuration/check_validity.py
@ -38,6 +38,7 @@ class UserConfigurationValidator:
            ServiceProviders.DOGRAH.value: self._check_dograh_api_key,
            ServiceProviders.SARVAM.value: self._check_sarvam_api_key,
            ServiceProviders.SPEECHMATICS.value: self._check_speechmatics_api_key,
+            ServiceProviders.CAMB.value: self._check_camb_api_key,
            ServiceProviders.AWS_BEDROCK.value: self._check_aws_bedrock_api_key,
        }

@ -160,6 +161,9 @@ class UserConfigurationValidator:
    def _check_speechmatics_api_key(self, model: str, api_key: str) -> bool:
        return True

+    def _check_camb_api_key(self, model: str, api_key: str) -> bool:
+        return True
+      
    def _check_aws_bedrock_api_key(self, model: str, service_config) -> bool:
        if not service_config.aws_access_key or not service_config.aws_secret_key:
            raise ValueError("AWS access key and secret key are required for Bedrock")
--- a/api/services/configuration/registry.py
+++ b/api/services/configuration/registry.py
@ -25,6 +25,7 @@ class ServiceProviders(str, Enum):
    DOGRAH = "dograh"
    SARVAM = "sarvam"
    SPEECHMATICS = "speechmatics"
+    CAMB = "camb"
    AWS_BEDROCK = "aws_bedrock"


@ -423,6 +424,19 @@ class SarvamTTSConfiguration(BaseTTSConfiguration):
    )


+CAMB_TTS_MODELS = ["mars-flash", "mars-pro", "mars-instruct"]
+
+
+@register_tts
+class CambTTSConfiguration(BaseTTSConfiguration):
+    provider: Literal[ServiceProviders.CAMB] = ServiceProviders.CAMB
+    model: str = Field(
+        default="mars-flash", json_schema_extra={"examples": CAMB_TTS_MODELS}
+    )
+    voice: str = Field(default="147320", description="Camb.ai voice ID")
+    language: str = Field(default="en-us", description="BCP-47 language code")
+
+
 TTSConfig = Annotated[
    Union[
        DeepgramTTSConfiguration,
@ -431,6 +445,7 @@ TTSConfig = Annotated[
        CartesiaTTSConfiguration,
        DograhTTSService,
        SarvamTTSConfiguration,
+        CambTTSConfiguration,
    ],
    Field(discriminator="provider"),
 ]
--- a/api/services/pipecat/service_factory.py
+++ b/api/services/pipecat/service_factory.py
@ -237,6 +237,20 @@ def create_tts_service(user_config, audio_config: "AudioConfig"):
            text_filters=[xml_function_tag_filter],
            silence_time_s=1.0,
        )
+    elif user_config.tts.provider == ServiceProviders.CAMB.value:
+        from pipecat.services.camb.tts import CambTTSService
+
+        voice_id = int(getattr(user_config.tts, "voice", None) or "147320")
+        language = getattr(user_config.tts, "language", None) or "en-us"
+        tts = CambTTSService(
+            api_key=user_config.tts.api_key,
+            voice_id=voice_id,
+            model=user_config.tts.model,
+            text_filters=[xml_function_tag_filter],
+        )
+        # Set language directly as BCP-47 code (bypasses Language enum conversion)
+        tts._settings.language = language
+        return tts
    elif user_config.tts.provider == ServiceProviders.SARVAM.value:
        # Map Sarvam language code to pipecat Language enum for TTS
        language_mapping = {
--- a/api/tests/test_camb_tts_integration.py
+++ b/api/tests/test_camb_tts_integration.py
@ -0,0 +1,208 @@
+"""Tests for CAMB AI TTS integration into Dograh.
+
+Covers:
+- CambTTSConfiguration model (defaults, custom values, JSON schema)
+- Service factory CAMB branch
+- API key validation
+- Pipeline integration (mocked)
+- Error handling
+"""
+
+from types import SimpleNamespace
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import pytest
+from pydantic import ValidationError
+
+from api.services.configuration.check_validity import UserConfigurationValidator
+from api.services.configuration.registry import (
+    CAMB_TTS_MODELS,
+    CambTTSConfiguration,
+    REGISTRY,
+    ServiceProviders,
+    ServiceType,
+)
+
+
+# ---------------------------------------------------------------------------
+# 1. CambTTSConfiguration model tests
+# ---------------------------------------------------------------------------
+
+
+class TestCambTTSConfiguration:
+    def test_defaults(self):
+        cfg = CambTTSConfiguration(api_key="test-key")
+        assert cfg.provider == ServiceProviders.CAMB
+        assert cfg.model == "mars-flash"
+        assert cfg.voice == "147320"
+        assert cfg.language == "en-us"
+
+    def test_custom_values(self):
+        cfg = CambTTSConfiguration(
+            api_key="k",
+            model="mars-pro",
+            voice="9999",
+            language="fr-fr",
+        )
+        assert cfg.model == "mars-pro"
+        assert cfg.voice == "9999"
+        assert cfg.language == "fr-fr"
+
+    def test_json_schema_has_model_examples(self):
+        schema = CambTTSConfiguration.model_json_schema()
+        model_field = schema["properties"]["model"]
+        assert model_field["examples"] == CAMB_TTS_MODELS
+
+    def test_registered_in_tts_registry(self):
+        assert ServiceProviders.CAMB in REGISTRY[ServiceType.TTS]
+        assert REGISTRY[ServiceType.TTS][ServiceProviders.CAMB] is CambTTSConfiguration
+
+    def test_api_key_required(self):
+        with pytest.raises(ValidationError):
+            CambTTSConfiguration()
+
+
+# ---------------------------------------------------------------------------
+# 2. Service factory tests
+# ---------------------------------------------------------------------------
+
+
+class TestServiceFactoryCamb:
+    def test_create_tts_service_camb(self):
+        import sys
+
+        # Mock missing modules (custom pipecat fork, not in public pipecat-ai)
+        dograh_modules = [
+            "pipecat.services.dograh",
+            "pipecat.services.dograh.llm",
+            "pipecat.services.dograh.stt",
+            "pipecat.services.dograh.tts",
+            "pipecat.utils.text.xml_function_tag_filter",
+        ]
+        mocks = {}
+        for mod in dograh_modules:
+            if mod not in sys.modules:
+                mocks[mod] = MagicMock()
+
+        with patch.dict(sys.modules, mocks):
+            # Force re-import with mocked modules
+            import importlib
+
+            if "api.services.pipecat.service_factory" in sys.modules:
+                importlib.reload(sys.modules["api.services.pipecat.service_factory"])
+            from api.services.pipecat.service_factory import create_tts_service
+
+            user_config = SimpleNamespace(
+                tts=SimpleNamespace(
+                    provider=ServiceProviders.CAMB.value,
+                    api_key="test-api-key",
+                    model="mars-flash",
+                    voice="147320",
+                    language="en-us",
+                )
+            )
+            audio_config = SimpleNamespace(
+                transport_out_sample_rate=22050,
+                transport_in_sample_rate=16000,
+            )
+
+            with patch("pipecat.services.camb.tts.CambTTSService") as MockCambTTS:
+                mock_instance = MagicMock()
+                mock_instance._settings = MagicMock()
+                MockCambTTS.return_value = mock_instance
+
+                tts = create_tts_service(user_config, audio_config)
+
+                MockCambTTS.assert_called_once()
+                call_kwargs = MockCambTTS.call_args[1]
+                assert call_kwargs["api_key"] == "test-api-key"
+                assert call_kwargs["voice_id"] == 147320
+                assert call_kwargs["model"] == "mars-flash"
+
+    def test_camb_voice_id_parsing(self):
+        """Voice ID string is correctly converted to int."""
+        assert int("147320") == 147320
+        assert int("9999") == 9999
+
+
+# ---------------------------------------------------------------------------
+# 3. API key validation tests
+# ---------------------------------------------------------------------------
+
+
+class TestCambAPIKeyValidation:
+    def test_camb_validator_returns_true(self):
+        validator = UserConfigurationValidator()
+        assert validator._check_camb_api_key("mars-flash", "any-key") is True
+
+    def test_camb_in_validator_map(self):
+        validator = UserConfigurationValidator()
+        assert ServiceProviders.CAMB.value in validator._validator_map
+
+    def test_check_api_key_delegates_to_camb(self):
+        validator = UserConfigurationValidator()
+        assert validator._check_api_key(ServiceProviders.CAMB.value, "test-key") is True
+
+
+# ---------------------------------------------------------------------------
+# 4. Pipeline integration tests (mocked CambTTSService)
+# ---------------------------------------------------------------------------
+
+
+class TestCambPipelineIntegration:
+    @pytest.mark.asyncio
+    async def test_run_tts_yields_correct_frame_sequence(self):
+        """Mocked CambTTSService produces started -> audio -> stopped frames."""
+        started = MagicMock()
+        started.__class__.__name__ = "TTSStartedFrame"
+        audio = MagicMock()
+        audio.__class__.__name__ = "TTSAudioRawFrame"
+        stopped = MagicMock()
+        stopped.__class__.__name__ = "TTSStoppedFrame"
+
+        async def mock_run_tts(text):
+            for f in [started, audio, stopped]:
+                yield f
+
+        collected = []
+        async for frame in mock_run_tts("Hello world"):
+            collected.append(frame)
+
+        assert len(collected) == 3
+        assert collected[0].__class__.__name__ == "TTSStartedFrame"
+        assert collected[1].__class__.__name__ == "TTSAudioRawFrame"
+        assert collected[2].__class__.__name__ == "TTSStoppedFrame"
+
+    @pytest.mark.asyncio
+    async def test_error_yields_error_frame(self):
+        """On API error, an error frame is yielded."""
+        error_frame = MagicMock()
+        error_frame.error = "Camb.ai TTS error: 500 Internal Server Error"
+
+        async def mock_run_tts_error(text):
+            yield error_frame
+
+        collected = []
+        async for frame in mock_run_tts_error("Hello"):
+            collected.append(frame)
+
+        assert len(collected) == 1
+        assert "Camb.ai TTS error" in collected[0].error
+
+
+# ---------------------------------------------------------------------------
+# 5. Error handling tests
+# ---------------------------------------------------------------------------
+
+
+class TestCambErrorHandling:
+    @pytest.mark.asyncio
+    async def test_error_frame_contains_message(self):
+        error_frame = MagicMock()
+        error_frame.error = "Camb.ai TTS error: Invalid API key"
+
+        async def mock_error(text):
+            yield error_frame
+
+        async for frame in mock_error("test"):
+            assert "Camb.ai TTS error" in frame.error
--- a/scripts/setup_pipecat.sh
+++ b/scripts/setup_pipecat.sh
@ -16,7 +16,7 @@ git submodule update --init --recursive

 # Install pipecat in editable mode with all extras
 echo "Installing pipecat dependencies..."
-pip install -e ./pipecat[cartesia,deepgram,openai,elevenlabs,groq,google,azure,sarvam,soundfile,silero,webrtc,speechmatics,openrouter]
+pip install -e ./pipecat[cartesia,deepgram,openai,elevenlabs,groq,google,azure,sarvam,soundfile,silero,webrtc,speechmatics,openrouter,camb]

 # Install other requirements
 echo "Installing dograh API requirements..."