feat: add cartesia ink 2 in STT models

This commit is contained in:
Abhishek Kumar 2026-06-28 10:22:36 +05:30
parent 557de72b9c
commit 327ec561d5
10 changed files with 309 additions and 17 deletions

View file

@ -0,0 +1,94 @@
from types import SimpleNamespace
from unittest.mock import patch
from api.services.configuration.options import (
CARTESIA_INK_2_STT_LANGUAGES,
CARTESIA_INK_WHISPER_STT_LANGUAGES,
CARTESIA_STT_MODELS,
)
from api.services.configuration.registry import (
CartesiaSTTConfiguration,
ServiceProviders,
)
from api.services.pipecat.audio_config import AudioConfig
from api.services.pipecat.service_factory import (
create_stt_service,
stt_uses_external_turns,
)
def _audio_config() -> AudioConfig:
return AudioConfig(
transport_in_sample_rate=16000,
transport_out_sample_rate=16000,
)
def _cartesia_config(model: str, language: str = "en") -> SimpleNamespace:
return SimpleNamespace(
stt=SimpleNamespace(
provider=ServiceProviders.CARTESIA.value,
api_key="test-key",
model=model,
language=language,
)
)
def test_cartesia_stt_configuration_exposes_ink_2_and_ink_whisper_languages():
config = CartesiaSTTConfiguration(api_key="test-key")
language_schema = CartesiaSTTConfiguration.model_json_schema()["properties"][
"language"
]
assert config.provider == ServiceProviders.CARTESIA
assert config.model == "ink-whisper"
assert config.language == "en"
assert CARTESIA_STT_MODELS == ["ink-2", "ink-whisper"]
assert CARTESIA_INK_2_STT_LANGUAGES == ("en",)
assert "es" in CARTESIA_INK_WHISPER_STT_LANGUAGES
assert language_schema["model_options"]["ink-2"] == ["en"]
assert "es" in language_schema["model_options"]["ink-whisper"]
def test_cartesia_ink_2_uses_external_turns_and_turns_service():
user_config = _cartesia_config("ink-2")
assert stt_uses_external_turns(user_config)
with (
patch(
"api.services.pipecat.service_factory.CartesiaTurnsSTTService"
) as turns_service,
patch("api.services.pipecat.service_factory.CartesiaSTTService") as stt_service,
):
create_stt_service(user_config, _audio_config())
turns_service.assert_called_once()
stt_service.assert_not_called()
kwargs = turns_service.call_args.kwargs
assert kwargs["api_key"] == "test-key"
assert kwargs["sample_rate"] == 16000
assert kwargs["should_interrupt"] is False
def test_cartesia_ink_whisper_uses_manual_stt_service_with_model_and_language():
user_config = _cartesia_config("ink-whisper", language="es")
assert not stt_uses_external_turns(user_config)
with (
patch(
"api.services.pipecat.service_factory.CartesiaTurnsSTTService"
) as turns_service,
patch("api.services.pipecat.service_factory.CartesiaSTTService") as stt_service,
):
create_stt_service(user_config, _audio_config())
turns_service.assert_not_called()
stt_service.assert_called_once()
kwargs = stt_service.call_args.kwargs
assert kwargs["api_key"] == "test-key"
assert kwargs["sample_rate"] == 16000
assert kwargs["settings"].model == "ink-whisper"
assert kwargs["settings"].language == "es"

View file

@ -9,7 +9,7 @@ from api.services.pipecat.audio_config import AudioConfig
from api.services.pipecat.service_factory import (
create_stt_service,
dograh_stt_uses_flux_language,
stt_uses_flux_turns,
stt_uses_external_turns,
)
@ -38,10 +38,10 @@ def test_dograh_flux_language_predicate_matches_multilingual_support():
assert not dograh_stt_uses_flux_language("ar")
def test_stt_uses_flux_turns_only_for_dograh_flux_supported_languages():
assert stt_uses_flux_turns(_dograh_config("multi"))
assert stt_uses_flux_turns(_dograh_config("es"))
assert not stt_uses_flux_turns(_dograh_config("ar"))
def test_stt_uses_external_turns_only_for_dograh_flux_supported_languages():
assert stt_uses_external_turns(_dograh_config("multi"))
assert stt_uses_external_turns(_dograh_config("es"))
assert not stt_uses_external_turns(_dograh_config("ar"))
def test_create_dograh_multi_uses_flux_service_without_language_hint():

View file

@ -11,7 +11,12 @@ from pipecat.turns.user_stop import (
)
from api.services.configuration.registry import ServiceProviders
from api.services.pipecat.run_pipeline import _create_realtime_user_turn_config
from api.services.pipecat.run_pipeline import (
DEFAULT_USER_TURN_STOP_TIMEOUT,
EXTERNAL_TURN_USER_STOP_TIMEOUT,
_create_realtime_user_turn_config,
_resolve_user_turn_stop_timeout,
)
def test_gemini_realtime_uses_local_vad_without_local_interruptions():
@ -72,3 +77,27 @@ def test_unknown_realtime_providers_keep_local_vad():
assert isinstance(strategies.start[0], VADUserTurnStartStrategy)
assert len(strategies.stop) == 1
assert isinstance(strategies.stop[0], SpeechTimeoutUserTurnStopStrategy)
def test_external_turn_stt_uses_longer_stop_timeout():
assert (
_resolve_user_turn_stop_timeout({}, uses_external_turns=True)
== EXTERNAL_TURN_USER_STOP_TIMEOUT
)
def test_standard_stt_keeps_default_stop_timeout():
assert (
_resolve_user_turn_stop_timeout({}, uses_external_turns=False)
== DEFAULT_USER_TURN_STOP_TIMEOUT
)
def test_workflow_config_can_override_user_turn_stop_timeout():
assert (
_resolve_user_turn_stop_timeout(
{"user_turn_stop_timeout": "12.5"},
uses_external_turns=True,
)
== 12.5
)