mirror of
https://github.com/dograh-hq/dograh.git
synced 2026-06-19 08:28:10 +02:00
fix: add pace option in sarvam tts config (#447)
* fix: add pace option in sarvam tts config * fix: generate client
This commit is contained in:
parent
e79cb42f31
commit
a2d9ed24ed
4 changed files with 61 additions and 5 deletions
|
|
@ -1001,6 +1001,12 @@ class SarvamTTSConfiguration(BaseTTSConfiguration):
|
|||
description="BCP-47 Indian-language code (e.g. hi-IN, en-IN).",
|
||||
json_schema_extra={"examples": SARVAM_LANGUAGES},
|
||||
)
|
||||
speed: float = Field(
|
||||
default=1.0,
|
||||
ge=0.5,
|
||||
le=2.0,
|
||||
description="Speech speed multiplier.",
|
||||
)
|
||||
|
||||
|
||||
CAMB_TTS_MODELS = ["mars-flash", "mars-pro", "mars-instruct"]
|
||||
|
|
|
|||
|
|
@ -535,13 +535,17 @@ def create_tts_service(
|
|||
pipecat_language = language_mapping.get(language, Language.HI)
|
||||
|
||||
voice = getattr(user_config.tts, "voice", None) or "anushka"
|
||||
speed = getattr(user_config.tts, "speed", None)
|
||||
settings_kwargs = {
|
||||
"model": user_config.tts.model,
|
||||
"voice": voice,
|
||||
"language": pipecat_language,
|
||||
}
|
||||
if speed and speed != 1.0:
|
||||
settings_kwargs["pace"] = speed
|
||||
return SarvamTTSService(
|
||||
api_key=user_config.tts.api_key,
|
||||
settings=SarvamTTSSettings(
|
||||
model=user_config.tts.model,
|
||||
voice=voice,
|
||||
language=pipecat_language,
|
||||
),
|
||||
settings=SarvamTTSSettings(**settings_kwargs),
|
||||
text_filters=[xml_function_tag_filter],
|
||||
skip_aggregator_types=["recording_router", "recording"],
|
||||
silence_time_s=1.0,
|
||||
|
|
|
|||
|
|
@ -7,6 +7,7 @@ from pipecat.transcriptions.language import Language
|
|||
|
||||
from api.services.configuration.registry import (
|
||||
SarvamLLMConfiguration,
|
||||
SarvamTTSConfiguration,
|
||||
ServiceProviders,
|
||||
)
|
||||
from api.services.pipecat.audio_config import AudioConfig
|
||||
|
|
@ -14,6 +15,7 @@ from api.services.pipecat.service_factory import (
|
|||
create_llm_service,
|
||||
create_llm_service_from_provider,
|
||||
create_stt_service,
|
||||
create_tts_service,
|
||||
)
|
||||
|
||||
|
||||
|
|
@ -112,3 +114,41 @@ class TestSarvamSTTServiceFactory:
|
|||
|
||||
kwargs = mock_service.call_args.kwargs
|
||||
assert kwargs["settings"].language == expected_language
|
||||
|
||||
|
||||
class TestSarvamTTSServiceFactory:
|
||||
def test_sarvam_tts_configuration_defaults(self):
|
||||
config = SarvamTTSConfiguration(api_key="test-key")
|
||||
|
||||
assert config.provider == ServiceProviders.SARVAM
|
||||
assert config.model == "bulbul:v2"
|
||||
assert config.voice == "anushka"
|
||||
assert config.language == "hi-IN"
|
||||
assert config.speed == 1.0
|
||||
|
||||
def test_create_sarvam_tts_service_maps_speed_to_pace(self):
|
||||
user_config = SimpleNamespace(
|
||||
tts=SimpleNamespace(
|
||||
provider=ServiceProviders.SARVAM.value,
|
||||
api_key="test-key",
|
||||
model="bulbul:v2",
|
||||
voice="anushka",
|
||||
language="hi-IN",
|
||||
speed=1.25,
|
||||
)
|
||||
)
|
||||
audio_config = AudioConfig(
|
||||
transport_in_sample_rate=16000, transport_out_sample_rate=16000
|
||||
)
|
||||
|
||||
with patch(
|
||||
"api.services.pipecat.service_factory.SarvamTTSService"
|
||||
) as mock_service:
|
||||
create_tts_service(user_config, audio_config)
|
||||
|
||||
kwargs = mock_service.call_args.kwargs
|
||||
assert kwargs["api_key"] == "test-key"
|
||||
assert kwargs["settings"].model == "bulbul:v2"
|
||||
assert kwargs["settings"].voice == "anushka"
|
||||
assert kwargs["settings"].language == Language.HI
|
||||
assert kwargs["settings"].pace == 1.25
|
||||
|
|
|
|||
|
|
@ -4750,6 +4750,12 @@ export type SarvamTtsConfiguration = {
|
|||
* BCP-47 Indian-language code (e.g. hi-IN, en-IN).
|
||||
*/
|
||||
language?: string;
|
||||
/**
|
||||
* Speed
|
||||
*
|
||||
* Speech speed multiplier.
|
||||
*/
|
||||
speed?: number;
|
||||
};
|
||||
|
||||
/**
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue