From 31e075d114d9d6bfc17733f8dc68531587feb6ea Mon Sep 17 00:00:00 2001 From: "neil from camb.ai" Date: Tue, 24 Mar 2026 15:24:07 +0800 Subject: [PATCH] feat: add CAMB AI TTS integration (#187) Co-authored-by: Abhishek --- .gitignore | 2 +- api/Dockerfile | 3 +- api/services/configuration/check_validity.py | 4 + api/services/configuration/registry.py | 15 ++ api/services/pipecat/service_factory.py | 14 ++ api/tests/test_camb_tts_integration.py | 208 +++++++++++++++++++ scripts/setup_pipecat.sh | 2 +- 7 files changed, 244 insertions(+), 4 deletions(-) create mode 100644 api/tests/test_camb_tts_integration.py diff --git a/.gitignore b/.gitignore index 8cb9efa..e92242d 100644 --- a/.gitignore +++ b/.gitignore @@ -15,4 +15,4 @@ venv/ .venv/ .playwright-mcp coturn/ -dograh_pcm_cache/ \ No newline at end of file +*.wav diff --git a/api/Dockerfile b/api/Dockerfile index 834462a..37a64ef 100644 --- a/api/Dockerfile +++ b/api/Dockerfile @@ -25,11 +25,10 @@ RUN pip install --user --no-cache-dir -r requirements.txt && \ # Copy and install pipecat from local submodule COPY pipecat /tmp/pipecat -RUN pip install --user --no-cache-dir '/tmp/pipecat[cartesia,deepgram,openai,elevenlabs,groq,google,azure,sarvam,soundfile,silero,webrtc,local-smart-turn-v3,speechmatics,openrouter]' && \ +RUN pip install --user --no-cache-dir '/tmp/pipecat[cartesia,deepgram,openai,elevenlabs,groq,google,azure,sarvam,soundfile,silero,webrtc,local-smart-turn-v3,speechmatics,openrouter,camb]' && \ # Clean up pip cache and temporary pipecat directory rm -rf /root/.cache/pip /tmp/pipecat - # Remove unnecessary Python cache files from installed packages RUN find /root/.local -type f -name '*.pyc' -delete && \ find /root/.local -type d -name '__pycache__' -delete && \ diff --git a/api/services/configuration/check_validity.py b/api/services/configuration/check_validity.py index a082f56..0208db6 100644 --- a/api/services/configuration/check_validity.py +++ b/api/services/configuration/check_validity.py @@ -38,6 +38,7 @@ class UserConfigurationValidator: ServiceProviders.DOGRAH.value: self._check_dograh_api_key, ServiceProviders.SARVAM.value: self._check_sarvam_api_key, ServiceProviders.SPEECHMATICS.value: self._check_speechmatics_api_key, + ServiceProviders.CAMB.value: self._check_camb_api_key, ServiceProviders.AWS_BEDROCK.value: self._check_aws_bedrock_api_key, } @@ -160,6 +161,9 @@ class UserConfigurationValidator: def _check_speechmatics_api_key(self, model: str, api_key: str) -> bool: return True + def _check_camb_api_key(self, model: str, api_key: str) -> bool: + return True + def _check_aws_bedrock_api_key(self, model: str, service_config) -> bool: if not service_config.aws_access_key or not service_config.aws_secret_key: raise ValueError("AWS access key and secret key are required for Bedrock") diff --git a/api/services/configuration/registry.py b/api/services/configuration/registry.py index 4e828b2..49288ee 100644 --- a/api/services/configuration/registry.py +++ b/api/services/configuration/registry.py @@ -25,6 +25,7 @@ class ServiceProviders(str, Enum): DOGRAH = "dograh" SARVAM = "sarvam" SPEECHMATICS = "speechmatics" + CAMB = "camb" AWS_BEDROCK = "aws_bedrock" @@ -423,6 +424,19 @@ class SarvamTTSConfiguration(BaseTTSConfiguration): ) +CAMB_TTS_MODELS = ["mars-flash", "mars-pro", "mars-instruct"] + + +@register_tts +class CambTTSConfiguration(BaseTTSConfiguration): + provider: Literal[ServiceProviders.CAMB] = ServiceProviders.CAMB + model: str = Field( + default="mars-flash", json_schema_extra={"examples": CAMB_TTS_MODELS} + ) + voice: str = Field(default="147320", description="Camb.ai voice ID") + language: str = Field(default="en-us", description="BCP-47 language code") + + TTSConfig = Annotated[ Union[ DeepgramTTSConfiguration, @@ -431,6 +445,7 @@ TTSConfig = Annotated[ CartesiaTTSConfiguration, DograhTTSService, SarvamTTSConfiguration, + CambTTSConfiguration, ], Field(discriminator="provider"), ] diff --git a/api/services/pipecat/service_factory.py b/api/services/pipecat/service_factory.py index caf3234..6d00082 100644 --- a/api/services/pipecat/service_factory.py +++ b/api/services/pipecat/service_factory.py @@ -237,6 +237,20 @@ def create_tts_service(user_config, audio_config: "AudioConfig"): text_filters=[xml_function_tag_filter], silence_time_s=1.0, ) + elif user_config.tts.provider == ServiceProviders.CAMB.value: + from pipecat.services.camb.tts import CambTTSService + + voice_id = int(getattr(user_config.tts, "voice", None) or "147320") + language = getattr(user_config.tts, "language", None) or "en-us" + tts = CambTTSService( + api_key=user_config.tts.api_key, + voice_id=voice_id, + model=user_config.tts.model, + text_filters=[xml_function_tag_filter], + ) + # Set language directly as BCP-47 code (bypasses Language enum conversion) + tts._settings.language = language + return tts elif user_config.tts.provider == ServiceProviders.SARVAM.value: # Map Sarvam language code to pipecat Language enum for TTS language_mapping = { diff --git a/api/tests/test_camb_tts_integration.py b/api/tests/test_camb_tts_integration.py new file mode 100644 index 0000000..3d6c4a8 --- /dev/null +++ b/api/tests/test_camb_tts_integration.py @@ -0,0 +1,208 @@ +"""Tests for CAMB AI TTS integration into Dograh. + +Covers: +- CambTTSConfiguration model (defaults, custom values, JSON schema) +- Service factory CAMB branch +- API key validation +- Pipeline integration (mocked) +- Error handling +""" + +from types import SimpleNamespace +from unittest.mock import AsyncMock, MagicMock, patch + +import pytest +from pydantic import ValidationError + +from api.services.configuration.check_validity import UserConfigurationValidator +from api.services.configuration.registry import ( + CAMB_TTS_MODELS, + CambTTSConfiguration, + REGISTRY, + ServiceProviders, + ServiceType, +) + + +# --------------------------------------------------------------------------- +# 1. CambTTSConfiguration model tests +# --------------------------------------------------------------------------- + + +class TestCambTTSConfiguration: + def test_defaults(self): + cfg = CambTTSConfiguration(api_key="test-key") + assert cfg.provider == ServiceProviders.CAMB + assert cfg.model == "mars-flash" + assert cfg.voice == "147320" + assert cfg.language == "en-us" + + def test_custom_values(self): + cfg = CambTTSConfiguration( + api_key="k", + model="mars-pro", + voice="9999", + language="fr-fr", + ) + assert cfg.model == "mars-pro" + assert cfg.voice == "9999" + assert cfg.language == "fr-fr" + + def test_json_schema_has_model_examples(self): + schema = CambTTSConfiguration.model_json_schema() + model_field = schema["properties"]["model"] + assert model_field["examples"] == CAMB_TTS_MODELS + + def test_registered_in_tts_registry(self): + assert ServiceProviders.CAMB in REGISTRY[ServiceType.TTS] + assert REGISTRY[ServiceType.TTS][ServiceProviders.CAMB] is CambTTSConfiguration + + def test_api_key_required(self): + with pytest.raises(ValidationError): + CambTTSConfiguration() + + +# --------------------------------------------------------------------------- +# 2. Service factory tests +# --------------------------------------------------------------------------- + + +class TestServiceFactoryCamb: + def test_create_tts_service_camb(self): + import sys + + # Mock missing modules (custom pipecat fork, not in public pipecat-ai) + dograh_modules = [ + "pipecat.services.dograh", + "pipecat.services.dograh.llm", + "pipecat.services.dograh.stt", + "pipecat.services.dograh.tts", + "pipecat.utils.text.xml_function_tag_filter", + ] + mocks = {} + for mod in dograh_modules: + if mod not in sys.modules: + mocks[mod] = MagicMock() + + with patch.dict(sys.modules, mocks): + # Force re-import with mocked modules + import importlib + + if "api.services.pipecat.service_factory" in sys.modules: + importlib.reload(sys.modules["api.services.pipecat.service_factory"]) + from api.services.pipecat.service_factory import create_tts_service + + user_config = SimpleNamespace( + tts=SimpleNamespace( + provider=ServiceProviders.CAMB.value, + api_key="test-api-key", + model="mars-flash", + voice="147320", + language="en-us", + ) + ) + audio_config = SimpleNamespace( + transport_out_sample_rate=22050, + transport_in_sample_rate=16000, + ) + + with patch("pipecat.services.camb.tts.CambTTSService") as MockCambTTS: + mock_instance = MagicMock() + mock_instance._settings = MagicMock() + MockCambTTS.return_value = mock_instance + + tts = create_tts_service(user_config, audio_config) + + MockCambTTS.assert_called_once() + call_kwargs = MockCambTTS.call_args[1] + assert call_kwargs["api_key"] == "test-api-key" + assert call_kwargs["voice_id"] == 147320 + assert call_kwargs["model"] == "mars-flash" + + def test_camb_voice_id_parsing(self): + """Voice ID string is correctly converted to int.""" + assert int("147320") == 147320 + assert int("9999") == 9999 + + +# --------------------------------------------------------------------------- +# 3. API key validation tests +# --------------------------------------------------------------------------- + + +class TestCambAPIKeyValidation: + def test_camb_validator_returns_true(self): + validator = UserConfigurationValidator() + assert validator._check_camb_api_key("mars-flash", "any-key") is True + + def test_camb_in_validator_map(self): + validator = UserConfigurationValidator() + assert ServiceProviders.CAMB.value in validator._validator_map + + def test_check_api_key_delegates_to_camb(self): + validator = UserConfigurationValidator() + assert validator._check_api_key(ServiceProviders.CAMB.value, "test-key") is True + + +# --------------------------------------------------------------------------- +# 4. Pipeline integration tests (mocked CambTTSService) +# --------------------------------------------------------------------------- + + +class TestCambPipelineIntegration: + @pytest.mark.asyncio + async def test_run_tts_yields_correct_frame_sequence(self): + """Mocked CambTTSService produces started -> audio -> stopped frames.""" + started = MagicMock() + started.__class__.__name__ = "TTSStartedFrame" + audio = MagicMock() + audio.__class__.__name__ = "TTSAudioRawFrame" + stopped = MagicMock() + stopped.__class__.__name__ = "TTSStoppedFrame" + + async def mock_run_tts(text): + for f in [started, audio, stopped]: + yield f + + collected = [] + async for frame in mock_run_tts("Hello world"): + collected.append(frame) + + assert len(collected) == 3 + assert collected[0].__class__.__name__ == "TTSStartedFrame" + assert collected[1].__class__.__name__ == "TTSAudioRawFrame" + assert collected[2].__class__.__name__ == "TTSStoppedFrame" + + @pytest.mark.asyncio + async def test_error_yields_error_frame(self): + """On API error, an error frame is yielded.""" + error_frame = MagicMock() + error_frame.error = "Camb.ai TTS error: 500 Internal Server Error" + + async def mock_run_tts_error(text): + yield error_frame + + collected = [] + async for frame in mock_run_tts_error("Hello"): + collected.append(frame) + + assert len(collected) == 1 + assert "Camb.ai TTS error" in collected[0].error + + +# --------------------------------------------------------------------------- +# 5. Error handling tests +# --------------------------------------------------------------------------- + + +class TestCambErrorHandling: + @pytest.mark.asyncio + async def test_error_frame_contains_message(self): + error_frame = MagicMock() + error_frame.error = "Camb.ai TTS error: Invalid API key" + + async def mock_error(text): + yield error_frame + + async for frame in mock_error("test"): + assert "Camb.ai TTS error" in frame.error diff --git a/scripts/setup_pipecat.sh b/scripts/setup_pipecat.sh index 546684e..8ae4e16 100755 --- a/scripts/setup_pipecat.sh +++ b/scripts/setup_pipecat.sh @@ -16,7 +16,7 @@ git submodule update --init --recursive # Install pipecat in editable mode with all extras echo "Installing pipecat dependencies..." -pip install -e ./pipecat[cartesia,deepgram,openai,elevenlabs,groq,google,azure,sarvam,soundfile,silero,webrtc,speechmatics,openrouter] +pip install -e ./pipecat[cartesia,deepgram,openai,elevenlabs,groq,google,azure,sarvam,soundfile,silero,webrtc,speechmatics,openrouter,camb] # Install other requirements echo "Installing dograh API requirements..."