feat: add Smallest AI TTS and STT provider integration (#444)

* feat: add Smallest AI TTS and STT provider integration Integrates Smallest AI's Waves (TTS) and Pulse (STT) APIs as selectable providers in the Dograh platform. Dograh's pipecat fork already contains the pipecat-level service implementations; this wires them into the API configuration registry and service factory. - Added `SMALLEST = "smallest"` to `ServiceProviders` enum - Registered `SmallestAITTSConfiguration` (lightning-v3.1/v2, voices, language, speed) and `SmallestAISTTConfiguration` (pulse model, 30+ languages) Pydantic config classes with the TTS/STT registries - Added factory branches in `create_tts_service` and `create_stt_service` routing to `SmallestTTSService` and `SmallestSTTService` from pipecat * fix: update Smallest AI models to v4 naming convention - TTS: rename lightning-v3.1 → lightning_v3.1, add lightning_v3.1_pro, drop deprecated lightning-v2 - STT: keep pulse only (pulse-pro is not a streaming model) * fix: change default TTS voice from emily to sophia for lightning_v3.1 emily is not a verified lightning_v3.1 voice; sophia is the pipecat SmallestTTSService default and confirmed to work with the standard pool. * fix: replace 9 invalid lightning_v3.1 voice IDs with verified ones jasmine, james, michael, aria, lara, asel, sarah, rishi, deepika do not exist in the lightning_v3.1 voice catalog. Replaced with avery, liam, lucas, olivia, freya, devansh, maya, dhruv, maithili — all verified against the API. * fix: smallest ai config validation and tts model compatibility * chore: ruff fix * chore: updated smallest ai schema in openapi.json --------- Co-authored-by: Sabiha Khan <sabihak89@gmail.com> Co-authored-by: Sabiha Khan <87858386+chewwbaka@users.noreply.github.com>
2026-06-19 08:28:10 +02:00 · 2026-06-17 12:55:53 +05:30 · 2026-06-17 12:55:53 +05:30 · e79cb42f31
commit e79cb42f31
parent a849c9b244
6 changed files with 258 additions and 4 deletions
--- a/api/services/configuration/check_validity.py
+++ b/api/services/configuration/check_validity.py
@ -61,6 +61,7 @@ class UserConfigurationValidator:
            ServiceProviders.GLADIA.value: self._check_gladia_api_key,
            ServiceProviders.RIME.value: self._check_rime_api_key,
            ServiceProviders.MINIMAX.value: self._check_minimax_api_key,
+            ServiceProviders.SMALLEST.value: self._check_smallest_api_key,
        }

    async def validate(
@ -398,6 +399,7 @@ class UserConfigurationValidator:
        return True

    def _check_minimax_api_key(self, model: str, api_key: str) -> bool:
-        # MiniMax doesn't publish a cheap key-validation endpoint; trust the key
-        # at save time and surface auth errors at first call (same as Rime/Sarvam).
+        return True
+
+    def _check_smallest_api_key(self, model: str, api_key: str) -> bool:
        return True
--- a/api/services/configuration/registry.py
+++ b/api/services/configuration/registry.py
@ -80,6 +80,7 @@ class ServiceProviders(str, Enum):
    GOOGLE_REALTIME = "google_realtime"
    GOOGLE_VERTEX_REALTIME = "google_vertex_realtime"
    AZURE_REALTIME = "azure_realtime"
+    SMALLEST = "smallest"


 class BaseServiceConfiguration(BaseModel):
@ -108,6 +109,7 @@ class BaseServiceConfiguration(BaseModel):
        ServiceProviders.GOOGLE_VERTEX_REALTIME,
        ServiceProviders.AZURE_REALTIME,
        ServiceProviders.SARVAM,
+        ServiceProviders.SMALLEST,
    ]
    api_key: str | list[str]

@ -1158,6 +1160,80 @@ class AzureSpeechTTSConfiguration(BaseTTSConfiguration):
    )


+SMALLEST_PROVIDER_MODEL_CONFIG = provider_model_config(
+    "Smallest AI",
+    description="Smallest AI ultralow-latency TTS (Waves) and STT (Pulse) APIs.",
+    provider_docs_url="https://smallest.ai/docs",
+)
+
+SMALLEST_TTS_MODELS = ["lightning_v3.1", "lightning_v3.1_pro"]
+SMALLEST_TTS_VOICES = [
+    "sophia",
+    "avery",
+    "liam",
+    "lucas",
+    "olivia",
+    "ryan",
+    "freya",
+    "william",
+    "devansh",
+    "arjun",
+    "niharika",
+    "maya",
+    "dhruv",
+    "mia",
+    "maithili",
+]
+SMALLEST_TTS_LANGUAGES = [
+    "en",
+    "hi",
+    "fr",
+    "de",
+    "es",
+    "it",
+    "nl",
+    "pl",
+    "ru",
+    "ar",
+    "bn",
+    "gu",
+    "he",
+    "kn",
+    "mr",
+    "ta",
+]
+
+
+@register_tts
+class SmallestAITTSConfiguration(BaseTTSConfiguration):
+    model_config = SMALLEST_PROVIDER_MODEL_CONFIG
+    provider: Literal[ServiceProviders.SMALLEST] = ServiceProviders.SMALLEST
+    model: str = Field(
+        default="lightning_v3.1",
+        description="Smallest AI TTS model. lightning_v3.1_pro is the premium pool (American, British, Indian accents); lightning_v3.1 is the standard pool with 217 voices across 12 languages.",
+        json_schema_extra={"examples": SMALLEST_TTS_MODELS},
+    )
+    voice: str = Field(
+        default="sophia",
+        description="Smallest AI voice ID.",
+        json_schema_extra={"examples": SMALLEST_TTS_VOICES, "allow_custom_input": True},
+    )
+    language: str = Field(
+        default="en",
+        description="ISO 639-1 language code for synthesis.",
+        json_schema_extra={
+            "examples": SMALLEST_TTS_LANGUAGES,
+            "allow_custom_input": True,
+        },
+    )
+    speed: float = Field(
+        default=1.0,
+        ge=0.5,
+        le=2.0,
+        description="Speech speed multiplier (0.5 to 2.0).",
+    )
+
+
 TTSConfig = Annotated[
    Union[
        DeepgramTTSConfiguration,
@ -1172,6 +1248,7 @@ TTSConfig = Annotated[
        SpeachesTTSConfiguration,
        MiniMaxTTSConfiguration,
        AzureSpeechTTSConfiguration,
+        SmallestAITTSConfiguration,
    ],
    Field(discriminator="provider"),
 ]
@ -1466,6 +1543,62 @@ class AzureSpeechSTTConfiguration(BaseSTTConfiguration):
    )


+SMALLEST_STT_MODELS = ["pulse"]
+SMALLEST_STT_LANGUAGES = [
+    "en",
+    "hi",
+    "fr",
+    "de",
+    "es",
+    "it",
+    "nl",
+    "pl",
+    "ru",
+    "pt",
+    "bn",
+    "gu",
+    "kn",
+    "ml",
+    "mr",
+    "ta",
+    "te",
+    "pa",
+    "or",
+    "bg",
+    "cs",
+    "da",
+    "et",
+    "fi",
+    "hu",
+    "lt",
+    "lv",
+    "mt",
+    "ro",
+    "sk",
+    "sv",
+    "uk",
+]
+
+
+@register_stt
+class SmallestAISTTConfiguration(BaseSTTConfiguration):
+    model_config = SMALLEST_PROVIDER_MODEL_CONFIG
+    provider: Literal[ServiceProviders.SMALLEST] = ServiceProviders.SMALLEST
+    model: str = Field(
+        default="pulse",
+        description="Smallest AI STT model. Supports 38 languages with real-time streaming.",
+        json_schema_extra={"examples": SMALLEST_STT_MODELS},
+    )
+    language: str = Field(
+        default="en",
+        description="ISO 639-1 language code for transcription.",
+        json_schema_extra={
+            "examples": SMALLEST_STT_LANGUAGES,
+            "allow_custom_input": True,
+        },
+    )
+
+
 STTConfig = Annotated[
    Union[
        DeepgramSTTConfiguration,
@ -1480,6 +1613,7 @@ STTConfig = Annotated[
        AssemblyAISTTConfiguration,
        GladiaSTTConfiguration,
        AzureSpeechSTTConfiguration,
+        SmallestAISTTConfiguration,
    ],
    Field(discriminator="provider"),
 ]
--- a/api/services/pipecat/service_factory.py
+++ b/api/services/pipecat/service_factory.py
@ -62,6 +62,8 @@ from pipecat.services.rime.tts import RimeTTSService, RimeTTSSettings
 from pipecat.services.sarvam.llm import SarvamLLMService, SarvamLLMSettings
 from pipecat.services.sarvam.stt import SarvamSTTService, SarvamSTTSettings
 from pipecat.services.sarvam.tts import SarvamTTSService, SarvamTTSSettings
+from pipecat.services.smallest.stt import SmallestSTTService, SmallestSTTSettings
+from pipecat.services.smallest.tts import SmallestTTSService, SmallestTTSSettings
 from pipecat.services.speaches.llm import SpeachesLLMService, SpeachesLLMSettings
 from pipecat.services.speaches.stt import SpeachesSTTService, SpeachesSTTSettings
 from pipecat.services.speaches.tts import SpeachesTTSService, SpeachesTTSSettings
@ -309,6 +311,20 @@ def create_stt_service(
            settings=AzureSTTSettings(language=pipecat_language),
            sample_rate=audio_config.transport_in_sample_rate,
        )
+    elif user_config.stt.provider == ServiceProviders.SMALLEST.value:
+        language_code = getattr(user_config.stt, "language", None) or "en"
+        try:
+            pipecat_language = Language(language_code)
+        except ValueError:
+            pipecat_language = Language.EN
+        return SmallestSTTService(
+            api_key=user_config.stt.api_key,
+            settings=SmallestSTTSettings(
+                model=user_config.stt.model,
+                language=pipecat_language,
+            ),
+            sample_rate=audio_config.transport_in_sample_rate,
+        )
    else:
        raise HTTPException(
            status_code=400, detail=f"Invalid STT provider {user_config.stt.provider}"
@ -586,6 +602,28 @@ def create_tts_service(
            skip_aggregator_types=["recording_router", "recording"],
            silence_time_s=1.0,
        )
+    elif user_config.tts.provider == ServiceProviders.SMALLEST.value:
+        language_code = getattr(user_config.tts, "language", None) or "en"
+        try:
+            pipecat_language = Language(language_code)
+        except ValueError:
+            pipecat_language = Language.EN
+        speed = getattr(user_config.tts, "speed", None)
+        model = user_config.tts.model.replace("lightning-v", "lightning_v")
+        settings_kwargs = SmallestTTSSettings(
+            model=model,
+            voice=user_config.tts.voice,
+            language=pipecat_language,
+        )
+        if speed and speed != 1.0:
+            settings_kwargs.speed = speed
+        return SmallestTTSService(
+            api_key=user_config.tts.api_key,
+            settings=settings_kwargs,
+            text_filters=[xml_function_tag_filter],
+            skip_aggregator_types=["recording_router", "recording"],
+            silence_time_s=1.0,
+        )
    else:
        raise HTTPException(
            status_code=400, detail=f"Invalid TTS provider {user_config.tts.provider}"