fix: migrate from custom audio recorder to native AudioBuffer (#115)

* fix: update to pipecat VM Detector * fix: refactor to remove audio synchronizer * feat: add speechmatics as STT
2026-06-22 08:38:13 +02:00 · 2026-01-08 18:03:26 +05:30 · 2026-01-08 18:03:26 +05:30 · edf0fa4fbc
commit edf0fa4fbc
parent 31521008cf
12 changed files with 193 additions and 591 deletions
--- a/api/services/configuration/check_validity.py
+++ b/api/services/configuration/check_validity.py
@ -39,6 +39,7 @@ class UserConfigurationValidator:
            ServiceProviders.CARTESIA.value: self._check_cartesia_api_key,
            ServiceProviders.DOGRAH.value: self._check_dograh_api_key,
            ServiceProviders.SARVAM.value: self._check_sarvam_api_key,
+            ServiceProviders.SPEECHMATICS.value: self._check_speechmatics_api_key,
        }

    async def validate(self, configuration: UserConfiguration) -> APIKeyStatusResponse:
@ -137,3 +138,6 @@ class UserConfigurationValidator:

    def _check_sarvam_api_key(self, model: str, api_key: str) -> bool:
        return True
+    
+    def _check_speechmatics_api_key(self, model: str, api_key: str) -> bool:
+        return True
--- a/api/services/configuration/registry.py
+++ b/api/services/configuration/registry.py
@ -21,6 +21,7 @@ class ServiceProviders(str, Enum):
    AZURE = "azure"
    DOGRAH = "dograh"
    SARVAM = "sarvam"
+    SPEECHMATICS = "speechmatics"


 class BaseServiceConfiguration(BaseModel):
@ -240,6 +241,7 @@ class DograhTTSService(BaseTTSConfiguration):
        default="default", json_schema_extra={"examples": DOGRAH_TTS_MODELS}
    )
    voice: str = "default"
+    speed: float = Field(default=1.0, ge=0.5, le=2.0, description="Speed of the voice")
    api_key: str


@ -375,11 +377,50 @@ SARVAM_STT_MODELS = ["saarika:v2.5", "saaras:v2"]
 #     api_key: str


+# Speechmatics STT Service
+SPEECHMATICS_STT_LANGUAGES = [
+    "en",
+    "es",
+    "fr",
+    "de",
+    "it",
+    "pt",
+    "nl",
+    "ja",
+    "ko",
+    "zh",
+    "ru",
+    "ar",
+    "hi",
+    "pl",
+    "tr",
+    "vi",
+    "th",
+    "id",
+    "ms",
+    "sv",
+    "da",
+    "no",
+    "fi",
+]
+
+
+@register_stt
+class SpeechmaticsSTTConfiguration(BaseSTTConfiguration):
+    provider: Literal[ServiceProviders.SPEECHMATICS] = ServiceProviders.SPEECHMATICS
+    model: str = Field(default="enhanced", description="Operating point: standard or enhanced")
+    language: str = Field(
+        default="en", json_schema_extra={"examples": SPEECHMATICS_STT_LANGUAGES}
+    )
+    api_key: str
+
+
 STTConfig = Annotated[
    Union[
        DeepgramSTTConfiguration,
        OpenAISTTConfiguration,
        DograhSTTService,
+        SpeechmaticsSTTConfiguration,
        # SarvamSTTConfiguration,
    ],
    Field(discriminator="provider"),