feat: add Smallest AI TTS and STT provider integration (#444)

* feat: add Smallest AI TTS and STT provider integration

Integrates Smallest AI's Waves (TTS) and Pulse (STT) APIs as selectable
providers in the Dograh platform. Dograh's pipecat fork already contains
the pipecat-level service implementations; this wires them into the API
configuration registry and service factory.

- Added `SMALLEST = "smallest"` to `ServiceProviders` enum
- Registered `SmallestAITTSConfiguration` (lightning-v3.1/v2, voices,
  language, speed) and `SmallestAISTTConfiguration` (pulse model, 30+
  languages) Pydantic config classes with the TTS/STT registries
- Added factory branches in `create_tts_service` and `create_stt_service`
  routing to `SmallestTTSService` and `SmallestSTTService` from pipecat

* fix: update Smallest AI models to v4 naming convention

- TTS: rename lightning-v3.1 → lightning_v3.1, add lightning_v3.1_pro, drop deprecated lightning-v2
- STT: keep pulse only (pulse-pro is not a streaming model)

* fix: change default TTS voice from emily to sophia for lightning_v3.1

emily is not a verified lightning_v3.1 voice; sophia is the pipecat
SmallestTTSService default and confirmed to work with the standard pool.

* fix: replace 9 invalid lightning_v3.1 voice IDs with verified ones

jasmine, james, michael, aria, lara, asel, sarah, rishi, deepika do not
exist in the lightning_v3.1 voice catalog. Replaced with avery, liam,
lucas, olivia, freya, devansh, maya, dhruv, maithili — all verified
against the API.

* fix: smallest ai config validation and tts model compatibility

* chore: ruff fix

* chore: updated smallest ai schema in openapi.json

---------

Co-authored-by: Sabiha Khan <sabihak89@gmail.com>
Co-authored-by: Sabiha Khan <87858386+chewwbaka@users.noreply.github.com>
This commit is contained in:
Harshita Jain 2026-06-17 12:55:53 +05:30 committed by GitHub
parent a849c9b244
commit e79cb42f31
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 258 additions and 4 deletions

View file

@ -61,6 +61,7 @@ class UserConfigurationValidator:
ServiceProviders.GLADIA.value: self._check_gladia_api_key,
ServiceProviders.RIME.value: self._check_rime_api_key,
ServiceProviders.MINIMAX.value: self._check_minimax_api_key,
ServiceProviders.SMALLEST.value: self._check_smallest_api_key,
}
async def validate(
@ -398,6 +399,7 @@ class UserConfigurationValidator:
return True
def _check_minimax_api_key(self, model: str, api_key: str) -> bool:
# MiniMax doesn't publish a cheap key-validation endpoint; trust the key
# at save time and surface auth errors at first call (same as Rime/Sarvam).
return True
def _check_smallest_api_key(self, model: str, api_key: str) -> bool:
return True

View file

@ -80,6 +80,7 @@ class ServiceProviders(str, Enum):
GOOGLE_REALTIME = "google_realtime"
GOOGLE_VERTEX_REALTIME = "google_vertex_realtime"
AZURE_REALTIME = "azure_realtime"
SMALLEST = "smallest"
class BaseServiceConfiguration(BaseModel):
@ -108,6 +109,7 @@ class BaseServiceConfiguration(BaseModel):
ServiceProviders.GOOGLE_VERTEX_REALTIME,
ServiceProviders.AZURE_REALTIME,
ServiceProviders.SARVAM,
ServiceProviders.SMALLEST,
]
api_key: str | list[str]
@ -1158,6 +1160,80 @@ class AzureSpeechTTSConfiguration(BaseTTSConfiguration):
)
SMALLEST_PROVIDER_MODEL_CONFIG = provider_model_config(
"Smallest AI",
description="Smallest AI ultralow-latency TTS (Waves) and STT (Pulse) APIs.",
provider_docs_url="https://smallest.ai/docs",
)
SMALLEST_TTS_MODELS = ["lightning_v3.1", "lightning_v3.1_pro"]
SMALLEST_TTS_VOICES = [
"sophia",
"avery",
"liam",
"lucas",
"olivia",
"ryan",
"freya",
"william",
"devansh",
"arjun",
"niharika",
"maya",
"dhruv",
"mia",
"maithili",
]
SMALLEST_TTS_LANGUAGES = [
"en",
"hi",
"fr",
"de",
"es",
"it",
"nl",
"pl",
"ru",
"ar",
"bn",
"gu",
"he",
"kn",
"mr",
"ta",
]
@register_tts
class SmallestAITTSConfiguration(BaseTTSConfiguration):
model_config = SMALLEST_PROVIDER_MODEL_CONFIG
provider: Literal[ServiceProviders.SMALLEST] = ServiceProviders.SMALLEST
model: str = Field(
default="lightning_v3.1",
description="Smallest AI TTS model. lightning_v3.1_pro is the premium pool (American, British, Indian accents); lightning_v3.1 is the standard pool with 217 voices across 12 languages.",
json_schema_extra={"examples": SMALLEST_TTS_MODELS},
)
voice: str = Field(
default="sophia",
description="Smallest AI voice ID.",
json_schema_extra={"examples": SMALLEST_TTS_VOICES, "allow_custom_input": True},
)
language: str = Field(
default="en",
description="ISO 639-1 language code for synthesis.",
json_schema_extra={
"examples": SMALLEST_TTS_LANGUAGES,
"allow_custom_input": True,
},
)
speed: float = Field(
default=1.0,
ge=0.5,
le=2.0,
description="Speech speed multiplier (0.5 to 2.0).",
)
TTSConfig = Annotated[
Union[
DeepgramTTSConfiguration,
@ -1172,6 +1248,7 @@ TTSConfig = Annotated[
SpeachesTTSConfiguration,
MiniMaxTTSConfiguration,
AzureSpeechTTSConfiguration,
SmallestAITTSConfiguration,
],
Field(discriminator="provider"),
]
@ -1466,6 +1543,62 @@ class AzureSpeechSTTConfiguration(BaseSTTConfiguration):
)
SMALLEST_STT_MODELS = ["pulse"]
SMALLEST_STT_LANGUAGES = [
"en",
"hi",
"fr",
"de",
"es",
"it",
"nl",
"pl",
"ru",
"pt",
"bn",
"gu",
"kn",
"ml",
"mr",
"ta",
"te",
"pa",
"or",
"bg",
"cs",
"da",
"et",
"fi",
"hu",
"lt",
"lv",
"mt",
"ro",
"sk",
"sv",
"uk",
]
@register_stt
class SmallestAISTTConfiguration(BaseSTTConfiguration):
model_config = SMALLEST_PROVIDER_MODEL_CONFIG
provider: Literal[ServiceProviders.SMALLEST] = ServiceProviders.SMALLEST
model: str = Field(
default="pulse",
description="Smallest AI STT model. Supports 38 languages with real-time streaming.",
json_schema_extra={"examples": SMALLEST_STT_MODELS},
)
language: str = Field(
default="en",
description="ISO 639-1 language code for transcription.",
json_schema_extra={
"examples": SMALLEST_STT_LANGUAGES,
"allow_custom_input": True,
},
)
STTConfig = Annotated[
Union[
DeepgramSTTConfiguration,
@ -1480,6 +1613,7 @@ STTConfig = Annotated[
AssemblyAISTTConfiguration,
GladiaSTTConfiguration,
AzureSpeechSTTConfiguration,
SmallestAISTTConfiguration,
],
Field(discriminator="provider"),
]

View file

@ -62,6 +62,8 @@ from pipecat.services.rime.tts import RimeTTSService, RimeTTSSettings
from pipecat.services.sarvam.llm import SarvamLLMService, SarvamLLMSettings
from pipecat.services.sarvam.stt import SarvamSTTService, SarvamSTTSettings
from pipecat.services.sarvam.tts import SarvamTTSService, SarvamTTSSettings
from pipecat.services.smallest.stt import SmallestSTTService, SmallestSTTSettings
from pipecat.services.smallest.tts import SmallestTTSService, SmallestTTSSettings
from pipecat.services.speaches.llm import SpeachesLLMService, SpeachesLLMSettings
from pipecat.services.speaches.stt import SpeachesSTTService, SpeachesSTTSettings
from pipecat.services.speaches.tts import SpeachesTTSService, SpeachesTTSSettings
@ -309,6 +311,20 @@ def create_stt_service(
settings=AzureSTTSettings(language=pipecat_language),
sample_rate=audio_config.transport_in_sample_rate,
)
elif user_config.stt.provider == ServiceProviders.SMALLEST.value:
language_code = getattr(user_config.stt, "language", None) or "en"
try:
pipecat_language = Language(language_code)
except ValueError:
pipecat_language = Language.EN
return SmallestSTTService(
api_key=user_config.stt.api_key,
settings=SmallestSTTSettings(
model=user_config.stt.model,
language=pipecat_language,
),
sample_rate=audio_config.transport_in_sample_rate,
)
else:
raise HTTPException(
status_code=400, detail=f"Invalid STT provider {user_config.stt.provider}"
@ -586,6 +602,28 @@ def create_tts_service(
skip_aggregator_types=["recording_router", "recording"],
silence_time_s=1.0,
)
elif user_config.tts.provider == ServiceProviders.SMALLEST.value:
language_code = getattr(user_config.tts, "language", None) or "en"
try:
pipecat_language = Language(language_code)
except ValueError:
pipecat_language = Language.EN
speed = getattr(user_config.tts, "speed", None)
model = user_config.tts.model.replace("lightning-v", "lightning_v")
settings_kwargs = SmallestTTSSettings(
model=model,
voice=user_config.tts.voice,
language=pipecat_language,
)
if speed and speed != 1.0:
settings_kwargs.speed = speed
return SmallestTTSService(
api_key=user_config.tts.api_key,
settings=settings_kwargs,
text_filters=[xml_function_tag_filter],
skip_aggregator_types=["recording_router", "recording"],
silence_time_s=1.0,
)
else:
raise HTTPException(
status_code=400, detail=f"Invalid TTS provider {user_config.tts.provider}"