mirror of
https://github.com/dograh-hq/dograh.git
synced 2026-06-19 08:28:10 +02:00
feat: add Smallest AI TTS and STT provider integration (#444)
* feat: add Smallest AI TTS and STT provider integration Integrates Smallest AI's Waves (TTS) and Pulse (STT) APIs as selectable providers in the Dograh platform. Dograh's pipecat fork already contains the pipecat-level service implementations; this wires them into the API configuration registry and service factory. - Added `SMALLEST = "smallest"` to `ServiceProviders` enum - Registered `SmallestAITTSConfiguration` (lightning-v3.1/v2, voices, language, speed) and `SmallestAISTTConfiguration` (pulse model, 30+ languages) Pydantic config classes with the TTS/STT registries - Added factory branches in `create_tts_service` and `create_stt_service` routing to `SmallestTTSService` and `SmallestSTTService` from pipecat * fix: update Smallest AI models to v4 naming convention - TTS: rename lightning-v3.1 → lightning_v3.1, add lightning_v3.1_pro, drop deprecated lightning-v2 - STT: keep pulse only (pulse-pro is not a streaming model) * fix: change default TTS voice from emily to sophia for lightning_v3.1 emily is not a verified lightning_v3.1 voice; sophia is the pipecat SmallestTTSService default and confirmed to work with the standard pool. * fix: replace 9 invalid lightning_v3.1 voice IDs with verified ones jasmine, james, michael, aria, lara, asel, sarah, rishi, deepika do not exist in the lightning_v3.1 voice catalog. Replaced with avery, liam, lucas, olivia, freya, devansh, maya, dhruv, maithili — all verified against the API. * fix: smallest ai config validation and tts model compatibility * chore: ruff fix * chore: updated smallest ai schema in openapi.json --------- Co-authored-by: Sabiha Khan <sabihak89@gmail.com> Co-authored-by: Sabiha Khan <87858386+chewwbaka@users.noreply.github.com>
This commit is contained in:
parent
a849c9b244
commit
e79cb42f31
6 changed files with 258 additions and 4 deletions
|
|
@ -61,6 +61,7 @@ class UserConfigurationValidator:
|
|||
ServiceProviders.GLADIA.value: self._check_gladia_api_key,
|
||||
ServiceProviders.RIME.value: self._check_rime_api_key,
|
||||
ServiceProviders.MINIMAX.value: self._check_minimax_api_key,
|
||||
ServiceProviders.SMALLEST.value: self._check_smallest_api_key,
|
||||
}
|
||||
|
||||
async def validate(
|
||||
|
|
@ -398,6 +399,7 @@ class UserConfigurationValidator:
|
|||
return True
|
||||
|
||||
def _check_minimax_api_key(self, model: str, api_key: str) -> bool:
|
||||
# MiniMax doesn't publish a cheap key-validation endpoint; trust the key
|
||||
# at save time and surface auth errors at first call (same as Rime/Sarvam).
|
||||
return True
|
||||
|
||||
def _check_smallest_api_key(self, model: str, api_key: str) -> bool:
|
||||
return True
|
||||
|
|
|
|||
|
|
@ -80,6 +80,7 @@ class ServiceProviders(str, Enum):
|
|||
GOOGLE_REALTIME = "google_realtime"
|
||||
GOOGLE_VERTEX_REALTIME = "google_vertex_realtime"
|
||||
AZURE_REALTIME = "azure_realtime"
|
||||
SMALLEST = "smallest"
|
||||
|
||||
|
||||
class BaseServiceConfiguration(BaseModel):
|
||||
|
|
@ -108,6 +109,7 @@ class BaseServiceConfiguration(BaseModel):
|
|||
ServiceProviders.GOOGLE_VERTEX_REALTIME,
|
||||
ServiceProviders.AZURE_REALTIME,
|
||||
ServiceProviders.SARVAM,
|
||||
ServiceProviders.SMALLEST,
|
||||
]
|
||||
api_key: str | list[str]
|
||||
|
||||
|
|
@ -1158,6 +1160,80 @@ class AzureSpeechTTSConfiguration(BaseTTSConfiguration):
|
|||
)
|
||||
|
||||
|
||||
SMALLEST_PROVIDER_MODEL_CONFIG = provider_model_config(
|
||||
"Smallest AI",
|
||||
description="Smallest AI ultralow-latency TTS (Waves) and STT (Pulse) APIs.",
|
||||
provider_docs_url="https://smallest.ai/docs",
|
||||
)
|
||||
|
||||
SMALLEST_TTS_MODELS = ["lightning_v3.1", "lightning_v3.1_pro"]
|
||||
SMALLEST_TTS_VOICES = [
|
||||
"sophia",
|
||||
"avery",
|
||||
"liam",
|
||||
"lucas",
|
||||
"olivia",
|
||||
"ryan",
|
||||
"freya",
|
||||
"william",
|
||||
"devansh",
|
||||
"arjun",
|
||||
"niharika",
|
||||
"maya",
|
||||
"dhruv",
|
||||
"mia",
|
||||
"maithili",
|
||||
]
|
||||
SMALLEST_TTS_LANGUAGES = [
|
||||
"en",
|
||||
"hi",
|
||||
"fr",
|
||||
"de",
|
||||
"es",
|
||||
"it",
|
||||
"nl",
|
||||
"pl",
|
||||
"ru",
|
||||
"ar",
|
||||
"bn",
|
||||
"gu",
|
||||
"he",
|
||||
"kn",
|
||||
"mr",
|
||||
"ta",
|
||||
]
|
||||
|
||||
|
||||
@register_tts
|
||||
class SmallestAITTSConfiguration(BaseTTSConfiguration):
|
||||
model_config = SMALLEST_PROVIDER_MODEL_CONFIG
|
||||
provider: Literal[ServiceProviders.SMALLEST] = ServiceProviders.SMALLEST
|
||||
model: str = Field(
|
||||
default="lightning_v3.1",
|
||||
description="Smallest AI TTS model. lightning_v3.1_pro is the premium pool (American, British, Indian accents); lightning_v3.1 is the standard pool with 217 voices across 12 languages.",
|
||||
json_schema_extra={"examples": SMALLEST_TTS_MODELS},
|
||||
)
|
||||
voice: str = Field(
|
||||
default="sophia",
|
||||
description="Smallest AI voice ID.",
|
||||
json_schema_extra={"examples": SMALLEST_TTS_VOICES, "allow_custom_input": True},
|
||||
)
|
||||
language: str = Field(
|
||||
default="en",
|
||||
description="ISO 639-1 language code for synthesis.",
|
||||
json_schema_extra={
|
||||
"examples": SMALLEST_TTS_LANGUAGES,
|
||||
"allow_custom_input": True,
|
||||
},
|
||||
)
|
||||
speed: float = Field(
|
||||
default=1.0,
|
||||
ge=0.5,
|
||||
le=2.0,
|
||||
description="Speech speed multiplier (0.5 to 2.0).",
|
||||
)
|
||||
|
||||
|
||||
TTSConfig = Annotated[
|
||||
Union[
|
||||
DeepgramTTSConfiguration,
|
||||
|
|
@ -1172,6 +1248,7 @@ TTSConfig = Annotated[
|
|||
SpeachesTTSConfiguration,
|
||||
MiniMaxTTSConfiguration,
|
||||
AzureSpeechTTSConfiguration,
|
||||
SmallestAITTSConfiguration,
|
||||
],
|
||||
Field(discriminator="provider"),
|
||||
]
|
||||
|
|
@ -1466,6 +1543,62 @@ class AzureSpeechSTTConfiguration(BaseSTTConfiguration):
|
|||
)
|
||||
|
||||
|
||||
SMALLEST_STT_MODELS = ["pulse"]
|
||||
SMALLEST_STT_LANGUAGES = [
|
||||
"en",
|
||||
"hi",
|
||||
"fr",
|
||||
"de",
|
||||
"es",
|
||||
"it",
|
||||
"nl",
|
||||
"pl",
|
||||
"ru",
|
||||
"pt",
|
||||
"bn",
|
||||
"gu",
|
||||
"kn",
|
||||
"ml",
|
||||
"mr",
|
||||
"ta",
|
||||
"te",
|
||||
"pa",
|
||||
"or",
|
||||
"bg",
|
||||
"cs",
|
||||
"da",
|
||||
"et",
|
||||
"fi",
|
||||
"hu",
|
||||
"lt",
|
||||
"lv",
|
||||
"mt",
|
||||
"ro",
|
||||
"sk",
|
||||
"sv",
|
||||
"uk",
|
||||
]
|
||||
|
||||
|
||||
@register_stt
|
||||
class SmallestAISTTConfiguration(BaseSTTConfiguration):
|
||||
model_config = SMALLEST_PROVIDER_MODEL_CONFIG
|
||||
provider: Literal[ServiceProviders.SMALLEST] = ServiceProviders.SMALLEST
|
||||
model: str = Field(
|
||||
default="pulse",
|
||||
description="Smallest AI STT model. Supports 38 languages with real-time streaming.",
|
||||
json_schema_extra={"examples": SMALLEST_STT_MODELS},
|
||||
)
|
||||
language: str = Field(
|
||||
default="en",
|
||||
description="ISO 639-1 language code for transcription.",
|
||||
json_schema_extra={
|
||||
"examples": SMALLEST_STT_LANGUAGES,
|
||||
"allow_custom_input": True,
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
STTConfig = Annotated[
|
||||
Union[
|
||||
DeepgramSTTConfiguration,
|
||||
|
|
@ -1480,6 +1613,7 @@ STTConfig = Annotated[
|
|||
AssemblyAISTTConfiguration,
|
||||
GladiaSTTConfiguration,
|
||||
AzureSpeechSTTConfiguration,
|
||||
SmallestAISTTConfiguration,
|
||||
],
|
||||
Field(discriminator="provider"),
|
||||
]
|
||||
|
|
|
|||
|
|
@ -62,6 +62,8 @@ from pipecat.services.rime.tts import RimeTTSService, RimeTTSSettings
|
|||
from pipecat.services.sarvam.llm import SarvamLLMService, SarvamLLMSettings
|
||||
from pipecat.services.sarvam.stt import SarvamSTTService, SarvamSTTSettings
|
||||
from pipecat.services.sarvam.tts import SarvamTTSService, SarvamTTSSettings
|
||||
from pipecat.services.smallest.stt import SmallestSTTService, SmallestSTTSettings
|
||||
from pipecat.services.smallest.tts import SmallestTTSService, SmallestTTSSettings
|
||||
from pipecat.services.speaches.llm import SpeachesLLMService, SpeachesLLMSettings
|
||||
from pipecat.services.speaches.stt import SpeachesSTTService, SpeachesSTTSettings
|
||||
from pipecat.services.speaches.tts import SpeachesTTSService, SpeachesTTSSettings
|
||||
|
|
@ -309,6 +311,20 @@ def create_stt_service(
|
|||
settings=AzureSTTSettings(language=pipecat_language),
|
||||
sample_rate=audio_config.transport_in_sample_rate,
|
||||
)
|
||||
elif user_config.stt.provider == ServiceProviders.SMALLEST.value:
|
||||
language_code = getattr(user_config.stt, "language", None) or "en"
|
||||
try:
|
||||
pipecat_language = Language(language_code)
|
||||
except ValueError:
|
||||
pipecat_language = Language.EN
|
||||
return SmallestSTTService(
|
||||
api_key=user_config.stt.api_key,
|
||||
settings=SmallestSTTSettings(
|
||||
model=user_config.stt.model,
|
||||
language=pipecat_language,
|
||||
),
|
||||
sample_rate=audio_config.transport_in_sample_rate,
|
||||
)
|
||||
else:
|
||||
raise HTTPException(
|
||||
status_code=400, detail=f"Invalid STT provider {user_config.stt.provider}"
|
||||
|
|
@ -586,6 +602,28 @@ def create_tts_service(
|
|||
skip_aggregator_types=["recording_router", "recording"],
|
||||
silence_time_s=1.0,
|
||||
)
|
||||
elif user_config.tts.provider == ServiceProviders.SMALLEST.value:
|
||||
language_code = getattr(user_config.tts, "language", None) or "en"
|
||||
try:
|
||||
pipecat_language = Language(language_code)
|
||||
except ValueError:
|
||||
pipecat_language = Language.EN
|
||||
speed = getattr(user_config.tts, "speed", None)
|
||||
model = user_config.tts.model.replace("lightning-v", "lightning_v")
|
||||
settings_kwargs = SmallestTTSSettings(
|
||||
model=model,
|
||||
voice=user_config.tts.voice,
|
||||
language=pipecat_language,
|
||||
)
|
||||
if speed and speed != 1.0:
|
||||
settings_kwargs.speed = speed
|
||||
return SmallestTTSService(
|
||||
api_key=user_config.tts.api_key,
|
||||
settings=settings_kwargs,
|
||||
text_filters=[xml_function_tag_filter],
|
||||
skip_aggregator_types=["recording_router", "recording"],
|
||||
silence_time_s=1.0,
|
||||
)
|
||||
else:
|
||||
raise HTTPException(
|
||||
status_code=400, detail=f"Invalid TTS provider {user_config.tts.provider}"
|
||||
|
|
|
|||
80
api/tests/test_smallest_service_factory.py
Normal file
80
api/tests/test_smallest_service_factory.py
Normal file
|
|
@ -0,0 +1,80 @@
|
|||
from types import SimpleNamespace
|
||||
from unittest.mock import patch
|
||||
|
||||
from api.services.configuration.check_validity import UserConfigurationValidator
|
||||
from api.services.configuration.registry import (
|
||||
REGISTRY,
|
||||
ServiceProviders,
|
||||
ServiceType,
|
||||
SmallestAISTTConfiguration,
|
||||
SmallestAITTSConfiguration,
|
||||
)
|
||||
from api.services.pipecat.service_factory import create_tts_service
|
||||
|
||||
|
||||
def test_smallest_tts_configuration_defaults_and_registry():
|
||||
config = SmallestAITTSConfiguration(api_key="test-key")
|
||||
|
||||
assert config.provider == ServiceProviders.SMALLEST
|
||||
assert config.model == "lightning_v3.1"
|
||||
assert config.voice == "sophia"
|
||||
assert config.language == "en"
|
||||
assert config.speed == 1.0
|
||||
assert (
|
||||
REGISTRY[ServiceType.TTS][ServiceProviders.SMALLEST]
|
||||
is SmallestAITTSConfiguration
|
||||
)
|
||||
|
||||
|
||||
def test_smallest_stt_configuration_defaults_and_registry():
|
||||
config = SmallestAISTTConfiguration(api_key="test-key")
|
||||
|
||||
assert config.provider == ServiceProviders.SMALLEST
|
||||
assert config.model == "pulse"
|
||||
assert config.language == "en"
|
||||
assert (
|
||||
REGISTRY[ServiceType.STT][ServiceProviders.SMALLEST]
|
||||
is SmallestAISTTConfiguration
|
||||
)
|
||||
|
||||
|
||||
def test_validator_accepts_smallest_services():
|
||||
validator = UserConfigurationValidator()
|
||||
|
||||
assert (
|
||||
validator._validate_service(
|
||||
SmallestAITTSConfiguration(api_key="test-key"),
|
||||
"tts",
|
||||
)
|
||||
== []
|
||||
)
|
||||
assert (
|
||||
validator._validate_service(
|
||||
SmallestAISTTConfiguration(api_key="test-key"),
|
||||
"stt",
|
||||
)
|
||||
== []
|
||||
)
|
||||
|
||||
|
||||
def test_create_smallest_tts_service_normalizes_hyphenated_model_values():
|
||||
user_config = SimpleNamespace(
|
||||
tts=SimpleNamespace(
|
||||
provider=ServiceProviders.SMALLEST.value,
|
||||
api_key="test-key",
|
||||
model="lightning-v3.1",
|
||||
voice="sophia",
|
||||
language="en",
|
||||
speed=1.0,
|
||||
)
|
||||
)
|
||||
audio_config = SimpleNamespace(transport_in_sample_rate=16000)
|
||||
|
||||
with patch(
|
||||
"api.services.pipecat.service_factory.SmallestTTSService"
|
||||
) as mock_service:
|
||||
create_tts_service(user_config, audio_config)
|
||||
|
||||
assert mock_service.call_count == 1
|
||||
kwargs = mock_service.call_args.kwargs
|
||||
assert kwargs["settings"].model == "lightning_v3.1"
|
||||
Loading…
Add table
Add a link
Reference in a new issue