From e111cbb36d945acc231961313051f1b2198d95bf Mon Sep 17 00:00:00 2001 From: Abhishek Kumar Date: Fri, 20 Feb 2026 20:41:11 +0530 Subject: [PATCH] feat: add cartesia tts --- api/services/configuration/registry.py | 20 ++++++++++++++++++++ api/services/pipecat/service_factory.py | 8 ++++++++ 2 files changed, 28 insertions(+) diff --git a/api/services/configuration/registry.py b/api/services/configuration/registry.py index ab28a0d..7955a4b 100644 --- a/api/services/configuration/registry.py +++ b/api/services/configuration/registry.py @@ -277,6 +277,19 @@ class DograhTTSService(BaseTTSConfiguration): api_key: str +CARTESIA_TTS_MODELS = ["sonic-3"] + + +@register_tts +class CartesiaTTSConfiguration(BaseTTSConfiguration): + provider: Literal[ServiceProviders.CARTESIA] = ServiceProviders.CARTESIA + model: str = Field( + default="sonic-3", json_schema_extra={"examples": CARTESIA_TTS_MODELS} + ) + voice: str = Field(default="a167e0f3-df7e-4d52-a9c3-f949145571bd") + api_key: str + + SARVAM_TTS_MODELS = ["bulbul:v2", "bulbul:v3"] SARVAM_V2_VOICES = [ "anushka", @@ -371,6 +384,7 @@ TTSConfig = Annotated[ DeepgramTTSConfiguration, OpenAITTSService, ElevenlabsTTSConfiguration, + CartesiaTTSConfiguration, DograhTTSService, SarvamTTSConfiguration, ], @@ -485,9 +499,15 @@ class DeepgramSTTConfiguration(BaseSTTConfiguration): api_key: str +CARTESIA_STT_MODELS = ["ink-whisper"] + + @register_stt class CartesiaSTTConfiguration(BaseSTTConfiguration): provider: Literal[ServiceProviders.CARTESIA] = ServiceProviders.CARTESIA + model: str = Field( + default="ink-whisper", json_schema_extra={"examples": CARTESIA_STT_MODELS} + ) api_key: str diff --git a/api/services/pipecat/service_factory.py b/api/services/pipecat/service_factory.py index ee76263..07a7d04 100644 --- a/api/services/pipecat/service_factory.py +++ b/api/services/pipecat/service_factory.py @@ -7,6 +7,7 @@ from api.constants import MPS_API_URL from api.services.configuration.registry import ServiceProviders from pipecat.services.azure.llm import AzureLLMService from pipecat.services.cartesia.stt import CartesiaSTTService +from pipecat.services.cartesia.tts import CartesiaTTSService from pipecat.services.deepgram.flux.stt import DeepgramFluxSTTService from pipecat.services.deepgram.stt import DeepgramSTTService, LiveOptions from pipecat.services.deepgram.tts import DeepgramTTSService @@ -191,6 +192,13 @@ def create_tts_service(user_config, audio_config: "AudioConfig"): ), text_filters=[xml_function_tag_filter], ) + elif user_config.tts.provider == ServiceProviders.CARTESIA.value: + return CartesiaTTSService( + api_key=user_config.tts.api_key, + voice_id=user_config.tts.voice, + model=user_config.tts.model, + text_filters=[xml_function_tag_filter], + ) elif user_config.tts.provider == ServiceProviders.DOGRAH.value: # Convert HTTP URL to WebSocket URL for TTS base_url = MPS_API_URL.replace("http://", "ws://").replace("https://", "wss://")