diff --git a/api/services/configuration/registry.py b/api/services/configuration/registry.py index eca8381f..9ac9b7d2 100644 --- a/api/services/configuration/registry.py +++ b/api/services/configuration/registry.py @@ -1001,6 +1001,12 @@ class SarvamTTSConfiguration(BaseTTSConfiguration): description="BCP-47 Indian-language code (e.g. hi-IN, en-IN).", json_schema_extra={"examples": SARVAM_LANGUAGES}, ) + speed: float = Field( + default=1.0, + ge=0.5, + le=2.0, + description="Speech speed multiplier.", + ) CAMB_TTS_MODELS = ["mars-flash", "mars-pro", "mars-instruct"] diff --git a/api/services/pipecat/service_factory.py b/api/services/pipecat/service_factory.py index 11df51e9..68f7c303 100644 --- a/api/services/pipecat/service_factory.py +++ b/api/services/pipecat/service_factory.py @@ -535,13 +535,17 @@ def create_tts_service( pipecat_language = language_mapping.get(language, Language.HI) voice = getattr(user_config.tts, "voice", None) or "anushka" + speed = getattr(user_config.tts, "speed", None) + settings_kwargs = { + "model": user_config.tts.model, + "voice": voice, + "language": pipecat_language, + } + if speed and speed != 1.0: + settings_kwargs["pace"] = speed return SarvamTTSService( api_key=user_config.tts.api_key, - settings=SarvamTTSSettings( - model=user_config.tts.model, - voice=voice, - language=pipecat_language, - ), + settings=SarvamTTSSettings(**settings_kwargs), text_filters=[xml_function_tag_filter], skip_aggregator_types=["recording_router", "recording"], silence_time_s=1.0, diff --git a/api/tests/test_sarvam_service_factory.py b/api/tests/test_sarvam_service_factory.py index 3040769f..7abd0d0c 100644 --- a/api/tests/test_sarvam_service_factory.py +++ b/api/tests/test_sarvam_service_factory.py @@ -7,6 +7,7 @@ from pipecat.transcriptions.language import Language from api.services.configuration.registry import ( SarvamLLMConfiguration, + SarvamTTSConfiguration, ServiceProviders, ) from api.services.pipecat.audio_config import AudioConfig @@ -14,6 +15,7 @@ from api.services.pipecat.service_factory import ( create_llm_service, create_llm_service_from_provider, create_stt_service, + create_tts_service, ) @@ -112,3 +114,41 @@ class TestSarvamSTTServiceFactory: kwargs = mock_service.call_args.kwargs assert kwargs["settings"].language == expected_language + + +class TestSarvamTTSServiceFactory: + def test_sarvam_tts_configuration_defaults(self): + config = SarvamTTSConfiguration(api_key="test-key") + + assert config.provider == ServiceProviders.SARVAM + assert config.model == "bulbul:v2" + assert config.voice == "anushka" + assert config.language == "hi-IN" + assert config.speed == 1.0 + + def test_create_sarvam_tts_service_maps_speed_to_pace(self): + user_config = SimpleNamespace( + tts=SimpleNamespace( + provider=ServiceProviders.SARVAM.value, + api_key="test-key", + model="bulbul:v2", + voice="anushka", + language="hi-IN", + speed=1.25, + ) + ) + audio_config = AudioConfig( + transport_in_sample_rate=16000, transport_out_sample_rate=16000 + ) + + with patch( + "api.services.pipecat.service_factory.SarvamTTSService" + ) as mock_service: + create_tts_service(user_config, audio_config) + + kwargs = mock_service.call_args.kwargs + assert kwargs["api_key"] == "test-key" + assert kwargs["settings"].model == "bulbul:v2" + assert kwargs["settings"].voice == "anushka" + assert kwargs["settings"].language == Language.HI + assert kwargs["settings"].pace == 1.25 diff --git a/ui/src/client/types.gen.ts b/ui/src/client/types.gen.ts index c2a4c6ae..e1b0770a 100644 --- a/ui/src/client/types.gen.ts +++ b/ui/src/client/types.gen.ts @@ -4750,6 +4750,12 @@ export type SarvamTtsConfiguration = { * BCP-47 Indian-language code (e.g. hi-IN, en-IN). */ language?: string; + /** + * Speed + * + * Speech speed multiplier. + */ + speed?: number; }; /**