Merge remote-tracking branch 'origin/main' into feat/user-onboarding

# Conflicts: # docs/api-reference/openapi.json # sdk/python/src/dograh_sdk/_generated_models.py # ui/src/client/index.ts # ui/src/components/AIModelConfigurationV2Editor.tsx
2026-07-04 10:52:17 +02:00 · 2026-06-17 19:19:20 +05:30 · 2026-06-17 19:19:20 +05:30 · 5559ed686f
commit 5559ed686f
parent e074d2037f a2d9ed24ed
44 changed files with 2155 additions and 321 deletions
--- a/api/services/configuration/check_validity.py
+++ b/api/services/configuration/check_validity.py
@ -49,6 +49,7 @@ class UserConfigurationValidator:
            ServiceProviders.CAMB.value: self._check_camb_api_key,
            ServiceProviders.AWS_BEDROCK.value: self._check_aws_bedrock_api_key,
            ServiceProviders.SPEACHES.value: self._check_speaches_api_key,
+            ServiceProviders.HUGGINGFACE.value: self._check_huggingface_api_key,
            ServiceProviders.GOOGLE_VERTEX.value: self._check_google_vertex_llm_api_key,
            ServiceProviders.OPENAI_REALTIME.value: self._check_openai_api_key,
            ServiceProviders.GROK_REALTIME.value: self._check_grok_realtime_api_key,
@ -60,6 +61,7 @@ class UserConfigurationValidator:
            ServiceProviders.GLADIA.value: self._check_gladia_api_key,
            ServiceProviders.RIME.value: self._check_rime_api_key,
            ServiceProviders.MINIMAX.value: self._check_minimax_api_key,
+            ServiceProviders.SMALLEST.value: self._check_smallest_api_key,
        }

    async def validate(
@ -360,6 +362,14 @@ class UserConfigurationValidator:
            raise ValueError("base_url is required for Speaches services")
        return True

+    def _check_huggingface_api_key(self, model: str, api_key: str) -> bool:
+        if not api_key.startswith("hf_"):
+            raise ValueError(
+                "Invalid Hugging Face API token format. Use a token that starts with "
+                "'hf_' and has Inference Providers permission."
+            )
+        return True
+
    def _check_google_vertex_realtime_api_key(self, model: str, service_config) -> bool:
        if not getattr(service_config, "project_id", None):
            raise ValueError("project_id is required for Google Vertex Realtime")
@ -389,6 +399,7 @@ class UserConfigurationValidator:
        return True

    def _check_minimax_api_key(self, model: str, api_key: str) -> bool:
-        # MiniMax doesn't publish a cheap key-validation endpoint; trust the key
-        # at save time and surface auth errors at first call (same as Rime/Sarvam).
+        return True
+
+    def _check_smallest_api_key(self, model: str, api_key: str) -> bool:
        return True
--- a/api/services/configuration/registry.py
+++ b/api/services/configuration/registry.py
@ -68,6 +68,7 @@ class ServiceProviders(str, Enum):
    CAMB = "camb"
    AWS_BEDROCK = "aws_bedrock"
    SPEACHES = "speaches"
+    HUGGINGFACE = "huggingface"
    ASSEMBLYAI = "assemblyai"
    GLADIA = "gladia"
    RIME = "rime"
@ -79,6 +80,7 @@ class ServiceProviders(str, Enum):
    GOOGLE_REALTIME = "google_realtime"
    GOOGLE_VERTEX_REALTIME = "google_vertex_realtime"
    AZURE_REALTIME = "azure_realtime"
+    SMALLEST = "smallest"


 class BaseServiceConfiguration(BaseModel):
@ -94,6 +96,7 @@ class BaseServiceConfiguration(BaseModel):
        ServiceProviders.DOGRAH,
        ServiceProviders.AWS_BEDROCK,
        ServiceProviders.SPEACHES,
+        ServiceProviders.HUGGINGFACE,
        ServiceProviders.ASSEMBLYAI,
        ServiceProviders.GLADIA,
        ServiceProviders.RIME,
@ -106,6 +109,7 @@ class BaseServiceConfiguration(BaseModel):
        ServiceProviders.GOOGLE_VERTEX_REALTIME,
        ServiceProviders.AZURE_REALTIME,
        ServiceProviders.SARVAM,
+        ServiceProviders.SMALLEST,
    ]
    api_key: str | list[str]

@ -255,6 +259,11 @@ SPEACHES_PROVIDER_MODEL_CONFIG = provider_model_config(
    ),
    provider_docs_url="https://github.com/speaches-ai/speaches",
 )
+HUGGINGFACE_PROVIDER_MODEL_CONFIG = provider_model_config(
+    "Hugging Face",
+    description="Hosted Hugging Face Inference Providers API for usage-based inference.",
+    provider_docs_url="https://huggingface.co/docs/inference-providers/en/index",
+)
 AZURE_SPEECH_PROVIDER_MODEL_CONFIG = provider_model_config(
    "Azure Speech Services",
    description="Azure Cognitive Services Speech — TTS and STT via the Azure Speech SDK.",
@ -471,6 +480,35 @@ class SpeachesLLMConfiguration(BaseLLMConfiguration):
    )


+HUGGINGFACE_LLM_MODELS = [
+    "openai/gpt-oss-120b:cerebras",
+    "deepseek-ai/DeepSeek-R1:fastest",
+    "Qwen/Qwen3-Coder-480B-A35B-Instruct:fastest",
+]
+
+
+@register_llm
+class HuggingFaceLLMConfiguration(BaseLLMConfiguration):
+    model_config = HUGGINGFACE_PROVIDER_MODEL_CONFIG
+    provider: Literal[ServiceProviders.HUGGINGFACE] = ServiceProviders.HUGGINGFACE
+    model: str = Field(
+        default="openai/gpt-oss-120b:cerebras",
+        description="Hugging Face chat-completion model identifier, optionally with provider suffix.",
+        json_schema_extra={
+            "examples": HUGGINGFACE_LLM_MODELS,
+            "allow_custom_input": True,
+        },
+    )
+    base_url: str = Field(
+        default="https://router.huggingface.co/v1",
+        description="Hugging Face OpenAI-compatible chat-completions router base URL.",
+    )
+    bill_to: str | None = Field(
+        default=None,
+        description="Optional Hugging Face organization or user to bill using X-HF-Bill-To.",
+    )
+
+
 MINIMAX_MODELS = [
    "MiniMax-M2.7",
    "MiniMax-M2.7-highspeed",
@ -741,6 +779,7 @@ LLMConfig = Annotated[
        DograhLLMService,
        AWSBedrockLLMConfiguration,
        SpeachesLLMConfiguration,
+        HuggingFaceLLMConfiguration,
        MiniMaxLLMConfiguration,
        SarvamLLMConfiguration,
    ],
@ -907,6 +946,7 @@ class DograhTTSService(BaseTTSConfiguration):
    voice: str = Field(
        default="default",
        description="Voice preset.",
+        json_schema_extra={"allow_custom_input": True},
    )
    speed: float = Field(default=1.0, ge=0.5, le=2.0, description="Speed of the voice.")

@ -961,6 +1001,12 @@ class SarvamTTSConfiguration(BaseTTSConfiguration):
        description="BCP-47 Indian-language code (e.g. hi-IN, en-IN).",
        json_schema_extra={"examples": SARVAM_LANGUAGES},
    )
+    speed: float = Field(
+        default=1.0,
+        ge=0.5,
+        le=2.0,
+        description="Speech speed multiplier.",
+    )


 CAMB_TTS_MODELS = ["mars-flash", "mars-pro", "mars-instruct"]
@ -1120,6 +1166,80 @@ class AzureSpeechTTSConfiguration(BaseTTSConfiguration):
    )


+SMALLEST_PROVIDER_MODEL_CONFIG = provider_model_config(
+    "Smallest AI",
+    description="Smallest AI ultralow-latency TTS (Waves) and STT (Pulse) APIs.",
+    provider_docs_url="https://smallest.ai/docs",
+)
+
+SMALLEST_TTS_MODELS = ["lightning_v3.1", "lightning_v3.1_pro"]
+SMALLEST_TTS_VOICES = [
+    "sophia",
+    "avery",
+    "liam",
+    "lucas",
+    "olivia",
+    "ryan",
+    "freya",
+    "william",
+    "devansh",
+    "arjun",
+    "niharika",
+    "maya",
+    "dhruv",
+    "mia",
+    "maithili",
+]
+SMALLEST_TTS_LANGUAGES = [
+    "en",
+    "hi",
+    "fr",
+    "de",
+    "es",
+    "it",
+    "nl",
+    "pl",
+    "ru",
+    "ar",
+    "bn",
+    "gu",
+    "he",
+    "kn",
+    "mr",
+    "ta",
+]
+
+
+@register_tts
+class SmallestAITTSConfiguration(BaseTTSConfiguration):
+    model_config = SMALLEST_PROVIDER_MODEL_CONFIG
+    provider: Literal[ServiceProviders.SMALLEST] = ServiceProviders.SMALLEST
+    model: str = Field(
+        default="lightning_v3.1",
+        description="Smallest AI TTS model. lightning_v3.1_pro is the premium pool (American, British, Indian accents); lightning_v3.1 is the standard pool with 217 voices across 12 languages.",
+        json_schema_extra={"examples": SMALLEST_TTS_MODELS},
+    )
+    voice: str = Field(
+        default="sophia",
+        description="Smallest AI voice ID.",
+        json_schema_extra={"examples": SMALLEST_TTS_VOICES, "allow_custom_input": True},
+    )
+    language: str = Field(
+        default="en",
+        description="ISO 639-1 language code for synthesis.",
+        json_schema_extra={
+            "examples": SMALLEST_TTS_LANGUAGES,
+            "allow_custom_input": True,
+        },
+    )
+    speed: float = Field(
+        default=1.0,
+        ge=0.5,
+        le=2.0,
+        description="Speech speed multiplier (0.5 to 2.0).",
+    )
+
+
 TTSConfig = Annotated[
    Union[
        DeepgramTTSConfiguration,
@ -1134,6 +1254,7 @@ TTSConfig = Annotated[
        SpeachesTTSConfiguration,
        MiniMaxTTSConfiguration,
        AzureSpeechTTSConfiguration,
+        SmallestAITTSConfiguration,
    ],
    Field(discriminator="provider"),
 ]
@ -1334,6 +1455,38 @@ class SpeachesSTTConfiguration(BaseSTTConfiguration):
    )


+HUGGINGFACE_STT_MODELS = [
+    "openai/whisper-large-v3-turbo",
+    "openai/whisper-large-v3",
+]
+
+
+@register_stt
+class HuggingFaceSTTConfiguration(BaseSTTConfiguration):
+    model_config = HUGGINGFACE_PROVIDER_MODEL_CONFIG
+    provider: Literal[ServiceProviders.HUGGINGFACE] = ServiceProviders.HUGGINGFACE
+    model: str = Field(
+        default="openai/whisper-large-v3-turbo",
+        description="Hugging Face ASR model identifier served through Inference Providers.",
+        json_schema_extra={
+            "examples": HUGGINGFACE_STT_MODELS,
+            "allow_custom_input": True,
+        },
+    )
+    base_url: str = Field(
+        default="https://router.huggingface.co/hf-inference",
+        description="Hugging Face Inference Providers router base URL.",
+    )
+    bill_to: str | None = Field(
+        default=None,
+        description="Optional Hugging Face organization or user to bill using X-HF-Bill-To.",
+    )
+    return_timestamps: bool = Field(
+        default=False,
+        description="Request timestamp chunks when supported by the selected provider/model.",
+    )
+
+
 ASSEMBLYAI_STT_MODELS = ["u3-rt-pro"]
 ASSEMBLYAI_STT_LANGUAGES = ["en", "es", "de", "fr", "pt", "it"]

@ -1396,6 +1549,62 @@ class AzureSpeechSTTConfiguration(BaseSTTConfiguration):
    )


+SMALLEST_STT_MODELS = ["pulse"]
+SMALLEST_STT_LANGUAGES = [
+    "en",
+    "hi",
+    "fr",
+    "de",
+    "es",
+    "it",
+    "nl",
+    "pl",
+    "ru",
+    "pt",
+    "bn",
+    "gu",
+    "kn",
+    "ml",
+    "mr",
+    "ta",
+    "te",
+    "pa",
+    "or",
+    "bg",
+    "cs",
+    "da",
+    "et",
+    "fi",
+    "hu",
+    "lt",
+    "lv",
+    "mt",
+    "ro",
+    "sk",
+    "sv",
+    "uk",
+]
+
+
+@register_stt
+class SmallestAISTTConfiguration(BaseSTTConfiguration):
+    model_config = SMALLEST_PROVIDER_MODEL_CONFIG
+    provider: Literal[ServiceProviders.SMALLEST] = ServiceProviders.SMALLEST
+    model: str = Field(
+        default="pulse",
+        description="Smallest AI STT model. Supports 38 languages with real-time streaming.",
+        json_schema_extra={"examples": SMALLEST_STT_MODELS},
+    )
+    language: str = Field(
+        default="en",
+        description="ISO 639-1 language code for transcription.",
+        json_schema_extra={
+            "examples": SMALLEST_STT_LANGUAGES,
+            "allow_custom_input": True,
+        },
+    )
+
+
 STTConfig = Annotated[
    Union[
        DeepgramSTTConfiguration,
@ -1406,9 +1615,11 @@ STTConfig = Annotated[
        SpeechmaticsSTTConfiguration,
        SarvamSTTConfiguration,
        SpeachesSTTConfiguration,
+        HuggingFaceSTTConfiguration,
        AssemblyAISTTConfiguration,
        GladiaSTTConfiguration,
        AzureSpeechSTTConfiguration,
+        SmallestAISTTConfiguration,
    ],
    Field(discriminator="provider"),
 ]
--- a/api/services/pipecat/event_handlers.py
+++ b/api/services/pipecat/event_handlers.py
@ -9,8 +9,8 @@ from api.services.integrations import IntegrationRuntimeSession
 from api.services.pipecat.audio_config import AudioConfig
 from api.services.pipecat.audio_playback import play_audio_loop
 from api.services.pipecat.in_memory_buffers import (
-    InMemoryAudioBuffer,
    InMemoryLogsBuffer,
+    InMemoryRecordingBuffers,
 )
 from api.services.pipecat.pipeline_metrics_aggregator import PipelineMetricsAggregator
 from api.services.pipecat.tracing_config import get_trace_url
@ -40,11 +40,11 @@ async def _capture_call_event(
            "workflow_run_id": workflow_run_id,
            "workflow_id": workflow_run.workflow_id if workflow_run else None,
            "call_type": workflow_run.mode if workflow_run else None,
-            "call_direction": (workflow_run.initial_context or {}).get(
-                "direction", "outbound"
-            )
-            if workflow_run
-            else None,
+            "call_direction": (
+                (workflow_run.initial_context or {}).get("direction", "outbound")
+                if workflow_run
+                else None
+            ),
        }
        if extra_properties:
            properties.update(extra_properties)
@ -73,7 +73,7 @@ def register_event_handlers(
    """Register all event handlers for transport and task events.

    Returns:
-        in_memory_audio_buffer for use by other handlers.
+        In-memory recording buffers for use by other handlers.
    """
    # Initialize in-memory buffers with proper audio configuration
    sample_rate = audio_config.pipeline_sample_rate if audio_config else 16000
@ -84,7 +84,7 @@ def register_event_handlers(
        f"with sample_rate={sample_rate}Hz, channels={num_channels}"
    )

-    in_memory_audio_buffer = InMemoryAudioBuffer(
+    in_memory_audio_buffers = InMemoryRecordingBuffers(
        workflow_run_id=workflow_run_id,
        sample_rate=sample_rate,
        num_channels=num_channels,
@ -363,14 +363,32 @@ def register_event_handlers(

        # Write buffers to temp files and enqueue combined processing task
        audio_temp_path = None
+        user_audio_temp_path = None
+        bot_audio_temp_path = None
        transcript_temp_path = None

        try:
-            if not in_memory_audio_buffer.is_empty:
-                audio_temp_path = await in_memory_audio_buffer.write_to_temp_file()
+            if not in_memory_audio_buffers.mixed.is_empty:
+                audio_temp_path = (
+                    await in_memory_audio_buffers.mixed.write_to_temp_file()
+                )
            else:
                logger.debug("Audio buffer is empty, skipping upload")

+            if not in_memory_audio_buffers.user.is_empty:
+                user_audio_temp_path = (
+                    await in_memory_audio_buffers.user.write_to_temp_file()
+                )
+            else:
+                logger.debug("User audio buffer is empty, skipping upload")
+
+            if not in_memory_audio_buffers.bot.is_empty:
+                bot_audio_temp_path = (
+                    await in_memory_audio_buffers.bot.write_to_temp_file()
+                )
+            else:
+                logger.debug("Bot audio buffer is empty, skipping upload")
+
            transcript_temp_path = in_memory_logs_buffer.write_transcript_to_temp_file()
            if not transcript_temp_path:
                logger.debug("No transcript events in logs buffer, skipping upload")
@ -385,16 +403,18 @@ def register_event_handlers(
            workflow_run_id,
            audio_temp_path,
            transcript_temp_path,
+            user_audio_temp_path,
+            bot_audio_temp_path,
        )

    # Return the buffer so it can be passed to other handlers
-    return in_memory_audio_buffer
+    return in_memory_audio_buffers


 def register_audio_data_handler(
    audio_buffer: AudioBufferProcessor,
    workflow_run_id,
-    in_memory_buffer: InMemoryAudioBuffer,
+    in_memory_buffers: InMemoryRecordingBuffers,
 ):
    """Register event handler for audio data"""
    logger.info(f"Registering audio data handler for workflow run {workflow_run_id}")
@ -404,9 +424,19 @@ def register_audio_data_handler(
        if not audio:
            return

-        # Use in-memory buffer
        try:
-            await in_memory_buffer.append(audio)
+            await in_memory_buffers.mixed.append(audio)
        except MemoryError as e:
-            logger.error(f"Memory buffer full: {e}")
-            # Could implement overflow to disk here if needed
+            logger.error(f"Mixed audio buffer full: {e}")
+
+    @audio_buffer.event_handler("on_track_audio_data")
+    async def on_track_audio_data(
+        buffer, user_audio, bot_audio, sample_rate, num_channels
+    ):
+        try:
+            if user_audio:
+                await in_memory_buffers.user.append(user_audio)
+            if bot_audio:
+                await in_memory_buffers.bot.append(bot_audio)
+        except MemoryError as e:
+            logger.error(f"Track audio buffer full: {e}")
--- a/api/services/pipecat/in_memory_buffers.py
+++ b/api/services/pipecat/in_memory_buffers.py
@ -75,6 +75,27 @@ class InMemoryAudioBuffer:
        return self._total_size


+class InMemoryRecordingBuffers:
+    """Holds the mixed recording plus aligned user and bot mono tracks."""
+
+    def __init__(self, workflow_run_id: int, sample_rate: int, num_channels: int = 1):
+        self.mixed = InMemoryAudioBuffer(
+            workflow_run_id=workflow_run_id,
+            sample_rate=sample_rate,
+            num_channels=num_channels,
+        )
+        self.user = InMemoryAudioBuffer(
+            workflow_run_id=workflow_run_id,
+            sample_rate=sample_rate,
+            num_channels=1,
+        )
+        self.bot = InMemoryAudioBuffer(
+            workflow_run_id=workflow_run_id,
+            sample_rate=sample_rate,
+            num_channels=1,
+        )
+
+
 class InMemoryLogsBuffer:
    """Buffer real-time feedback events in memory during a call, then save to workflow run logs."""

--- a/api/services/pipecat/service_factory.py
+++ b/api/services/pipecat/service_factory.py
@ -39,8 +39,17 @@ from pipecat.services.google.vertex.llm import (
    GoogleVertexLLMSettings,
 )
 from pipecat.services.groq.llm import GroqLLMService, GroqLLMSettings
+from pipecat.services.huggingface.llm import (
+    HuggingFaceLLMService,
+    HuggingFaceLLMSettings,
+)
+from pipecat.services.huggingface.stt import (
+    HuggingFaceSTTService,
+    HuggingFaceSTTSettings,
+)
 from pipecat.services.minimax.llm import MiniMaxLLMService
 from pipecat.services.minimax.tts import MiniMaxTTSSettings
+from pipecat.services.openai._constants import OPENAI_SAMPLE_RATE
 from pipecat.services.openai.base_llm import OpenAILLMSettings
 from pipecat.services.openai.llm import OpenAILLMService
 from pipecat.services.openai.stt import (
@ -53,6 +62,8 @@ from pipecat.services.rime.tts import RimeTTSService, RimeTTSSettings
 from pipecat.services.sarvam.llm import SarvamLLMService, SarvamLLMSettings
 from pipecat.services.sarvam.stt import SarvamSTTService, SarvamSTTSettings
 from pipecat.services.sarvam.tts import SarvamTTSService, SarvamTTSSettings
+from pipecat.services.smallest.stt import SmallestSTTService, SmallestSTTSettings
+from pipecat.services.smallest.tts import SmallestTTSService, SmallestTTSSettings
 from pipecat.services.speaches.llm import SpeachesLLMService, SpeachesLLMSettings
 from pipecat.services.speaches.stt import SpeachesSTTService, SpeachesSTTSettings
 from pipecat.services.speaches.tts import SpeachesTTSService, SpeachesTTSSettings
@ -218,6 +229,22 @@ def create_stt_service(
            ),
            sample_rate=audio_config.transport_in_sample_rate,
        )
+    elif user_config.stt.provider == ServiceProviders.HUGGINGFACE.value:
+        base_url = (
+            getattr(user_config.stt, "base_url", None)
+            or "https://router.huggingface.co/hf-inference"
+        )
+        _validate_runtime_service_url(base_url, "base_url")
+        return HuggingFaceSTTService(
+            api_key=user_config.stt.api_key,
+            base_url=base_url,
+            bill_to=getattr(user_config.stt, "bill_to", None),
+            settings=HuggingFaceSTTSettings(
+                model=user_config.stt.model,
+                return_timestamps=getattr(user_config.stt, "return_timestamps", False),
+            ),
+            sample_rate=audio_config.transport_in_sample_rate,
+        )
    elif user_config.stt.provider == ServiceProviders.ASSEMBLYAI.value:
        language = getattr(user_config.stt, "language", None)
        settings_kwargs = {"model": user_config.stt.model, "language": language}
@ -284,6 +311,20 @@ def create_stt_service(
            settings=AzureSTTSettings(language=pipecat_language),
            sample_rate=audio_config.transport_in_sample_rate,
        )
+    elif user_config.stt.provider == ServiceProviders.SMALLEST.value:
+        language_code = getattr(user_config.stt, "language", None) or "en"
+        try:
+            pipecat_language = Language(language_code)
+        except ValueError:
+            pipecat_language = Language.EN
+        return SmallestSTTService(
+            api_key=user_config.stt.api_key,
+            settings=SmallestSTTSettings(
+                model=user_config.stt.model,
+                language=pipecat_language,
+            ),
+            sample_rate=audio_config.transport_in_sample_rate,
+        )
    else:
        raise HTTPException(
            status_code=400, detail=f"Invalid STT provider {user_config.stt.provider}"
@ -320,6 +361,7 @@ def create_tts_service(
            kwargs["base_url"] = base_url
        return OpenAITTSService(
            api_key=user_config.tts.api_key,
+            sample_rate=OPENAI_SAMPLE_RATE,
            settings=OpenAITTSSettings(model=user_config.tts.model),
            text_filters=[xml_function_tag_filter],
            skip_aggregator_types=["recording_router", "recording"],
@ -493,13 +535,17 @@ def create_tts_service(
        pipecat_language = language_mapping.get(language, Language.HI)

        voice = getattr(user_config.tts, "voice", None) or "anushka"
+        speed = getattr(user_config.tts, "speed", None)
+        settings_kwargs = {
+            "model": user_config.tts.model,
+            "voice": voice,
+            "language": pipecat_language,
+        }
+        if speed and speed != 1.0:
+            settings_kwargs["pace"] = speed
        return SarvamTTSService(
            api_key=user_config.tts.api_key,
-            settings=SarvamTTSSettings(
-                model=user_config.tts.model,
-                voice=voice,
-                language=pipecat_language,
-            ),
+            settings=SarvamTTSSettings(**settings_kwargs),
            text_filters=[xml_function_tag_filter],
            skip_aggregator_types=["recording_router", "recording"],
            silence_time_s=1.0,
@ -560,6 +606,28 @@ def create_tts_service(
            skip_aggregator_types=["recording_router", "recording"],
            silence_time_s=1.0,
        )
+    elif user_config.tts.provider == ServiceProviders.SMALLEST.value:
+        language_code = getattr(user_config.tts, "language", None) or "en"
+        try:
+            pipecat_language = Language(language_code)
+        except ValueError:
+            pipecat_language = Language.EN
+        speed = getattr(user_config.tts, "speed", None)
+        model = user_config.tts.model.replace("lightning-v", "lightning_v")
+        settings_kwargs = SmallestTTSSettings(
+            model=model,
+            voice=user_config.tts.voice,
+            language=pipecat_language,
+        )
+        if speed and speed != 1.0:
+            settings_kwargs.speed = speed
+        return SmallestTTSService(
+            api_key=user_config.tts.api_key,
+            settings=settings_kwargs,
+            text_filters=[xml_function_tag_filter],
+            skip_aggregator_types=["recording_router", "recording"],
+            silence_time_s=1.0,
+        )
    else:
        raise HTTPException(
            status_code=400, detail=f"Invalid TTS provider {user_config.tts.provider}"
@ -581,6 +649,7 @@ def create_llm_service_from_provider(
    location: str | None = None,
    credentials: str | None = None,
    temperature: float | None = None,
+    bill_to: str | None = None,
 ):
    """Create an LLM service from explicit provider/model/api_key.

@ -663,6 +732,15 @@ def create_llm_service_from_provider(
            api_key=api_key or "none",
            settings=SpeachesLLMSettings(model=model),
        )
+    elif provider == ServiceProviders.HUGGINGFACE.value:
+        base_url = base_url or "https://router.huggingface.co/v1"
+        _validate_runtime_service_url(base_url, "base_url")
+        return HuggingFaceLLMService(
+            api_key=api_key,
+            base_url=base_url,
+            bill_to=bill_to,
+            settings=HuggingFaceLLMSettings(model=model, temperature=0.1),
+        )
    elif provider == ServiceProviders.MINIMAX.value:
        base_url = base_url or "https://api.minimax.io/v1"
        _validate_runtime_service_url(base_url, "base_url")
@ -875,6 +953,9 @@ def create_llm_service(user_config, correlation_id: str | None = None):
        kwargs["endpoint"] = user_config.llm.endpoint
    elif provider == ServiceProviders.SPEACHES.value:
        kwargs["base_url"] = user_config.llm.base_url
+    elif provider == ServiceProviders.HUGGINGFACE.value:
+        kwargs["base_url"] = user_config.llm.base_url
+        kwargs["bill_to"] = user_config.llm.bill_to
    elif provider == ServiceProviders.AWS_BEDROCK.value:
        kwargs["aws_access_key"] = user_config.llm.aws_access_key
        kwargs["aws_secret_key"] = user_config.llm.aws_secret_key
--- a/api/services/workflow/dto.py
+++ b/api/services/workflow/dto.py
@ -718,6 +718,8 @@ class TriggerNodeData(BaseNodeData):
                "rsvp": "{{gathered_context.rsvp}}",
                "duration": "{{cost_info.call_duration_seconds}}",
                "recording_url": "{{recording_url}}",
+                "user_recording_url": "{{user_recording_url}}",
+                "bot_recording_url": "{{bot_recording_url}}",
                "transcript_url": "{{transcript_url}}",
            },
        },