feat: add Smallest AI TTS and STT provider integration (#444)

* feat: add Smallest AI TTS and STT provider integration Integrates Smallest AI's Waves (TTS) and Pulse (STT) APIs as selectable providers in the Dograh platform. Dograh's pipecat fork already contains the pipecat-level service implementations; this wires them into the API configuration registry and service factory. - Added `SMALLEST = "smallest"` to `ServiceProviders` enum - Registered `SmallestAITTSConfiguration` (lightning-v3.1/v2, voices, language, speed) and `SmallestAISTTConfiguration` (pulse model, 30+ languages) Pydantic config classes with the TTS/STT registries - Added factory branches in `create_tts_service` and `create_stt_service` routing to `SmallestTTSService` and `SmallestSTTService` from pipecat * fix: update Smallest AI models to v4 naming convention - TTS: rename lightning-v3.1 → lightning_v3.1, add lightning_v3.1_pro, drop deprecated lightning-v2 - STT: keep pulse only (pulse-pro is not a streaming model) * fix: change default TTS voice from emily to sophia for lightning_v3.1 emily is not a verified lightning_v3.1 voice; sophia is the pipecat SmallestTTSService default and confirmed to work with the standard pool. * fix: replace 9 invalid lightning_v3.1 voice IDs with verified ones jasmine, james, michael, aria, lara, asel, sarah, rishi, deepika do not exist in the lightning_v3.1 voice catalog. Replaced with avery, liam, lucas, olivia, freya, devansh, maya, dhruv, maithili — all verified against the API. * fix: smallest ai config validation and tts model compatibility * chore: ruff fix * chore: updated smallest ai schema in openapi.json --------- Co-authored-by: Sabiha Khan <sabihak89@gmail.com> Co-authored-by: Sabiha Khan <87858386+chewwbaka@users.noreply.github.com>
2026-06-19 08:28:10 +02:00 · 2026-06-17 12:55:53 +05:30 · 2026-06-17 12:55:53 +05:30 · e79cb42f31
commit e79cb42f31
parent a849c9b244
6 changed files with 258 additions and 4 deletions
--- a/api/services/pipecat/service_factory.py
+++ b/api/services/pipecat/service_factory.py
@ -62,6 +62,8 @@ from pipecat.services.rime.tts import RimeTTSService, RimeTTSSettings
 from pipecat.services.sarvam.llm import SarvamLLMService, SarvamLLMSettings
 from pipecat.services.sarvam.stt import SarvamSTTService, SarvamSTTSettings
 from pipecat.services.sarvam.tts import SarvamTTSService, SarvamTTSSettings
+from pipecat.services.smallest.stt import SmallestSTTService, SmallestSTTSettings
+from pipecat.services.smallest.tts import SmallestTTSService, SmallestTTSSettings
 from pipecat.services.speaches.llm import SpeachesLLMService, SpeachesLLMSettings
 from pipecat.services.speaches.stt import SpeachesSTTService, SpeachesSTTSettings
 from pipecat.services.speaches.tts import SpeachesTTSService, SpeachesTTSSettings
@ -309,6 +311,20 @@ def create_stt_service(
            settings=AzureSTTSettings(language=pipecat_language),
            sample_rate=audio_config.transport_in_sample_rate,
        )
+    elif user_config.stt.provider == ServiceProviders.SMALLEST.value:
+        language_code = getattr(user_config.stt, "language", None) or "en"
+        try:
+            pipecat_language = Language(language_code)
+        except ValueError:
+            pipecat_language = Language.EN
+        return SmallestSTTService(
+            api_key=user_config.stt.api_key,
+            settings=SmallestSTTSettings(
+                model=user_config.stt.model,
+                language=pipecat_language,
+            ),
+            sample_rate=audio_config.transport_in_sample_rate,
+        )
    else:
        raise HTTPException(
            status_code=400, detail=f"Invalid STT provider {user_config.stt.provider}"
@ -586,6 +602,28 @@ def create_tts_service(
            skip_aggregator_types=["recording_router", "recording"],
            silence_time_s=1.0,
        )
+    elif user_config.tts.provider == ServiceProviders.SMALLEST.value:
+        language_code = getattr(user_config.tts, "language", None) or "en"
+        try:
+            pipecat_language = Language(language_code)
+        except ValueError:
+            pipecat_language = Language.EN
+        speed = getattr(user_config.tts, "speed", None)
+        model = user_config.tts.model.replace("lightning-v", "lightning_v")
+        settings_kwargs = SmallestTTSSettings(
+            model=model,
+            voice=user_config.tts.voice,
+            language=pipecat_language,
+        )
+        if speed and speed != 1.0:
+            settings_kwargs.speed = speed
+        return SmallestTTSService(
+            api_key=user_config.tts.api_key,
+            settings=settings_kwargs,
+            text_filters=[xml_function_tag_filter],
+            skip_aggregator_types=["recording_router", "recording"],
+            silence_time_s=1.0,
+        )
    else:
        raise HTTPException(
            status_code=400, detail=f"Invalid TTS provider {user_config.tts.provider}"