mirror of
https://github.com/dograh-hq/dograh.git
synced 2026-07-04 10:52:17 +02:00
Merge remote-tracking branch 'origin/main' into feat/user-onboarding
# Conflicts: # docs/api-reference/openapi.json # sdk/python/src/dograh_sdk/_generated_models.py # ui/src/client/index.ts # ui/src/components/AIModelConfigurationV2Editor.tsx
This commit is contained in:
commit
5559ed686f
44 changed files with 2155 additions and 321 deletions
|
|
@ -49,6 +49,7 @@ class UserConfigurationValidator:
|
|||
ServiceProviders.CAMB.value: self._check_camb_api_key,
|
||||
ServiceProviders.AWS_BEDROCK.value: self._check_aws_bedrock_api_key,
|
||||
ServiceProviders.SPEACHES.value: self._check_speaches_api_key,
|
||||
ServiceProviders.HUGGINGFACE.value: self._check_huggingface_api_key,
|
||||
ServiceProviders.GOOGLE_VERTEX.value: self._check_google_vertex_llm_api_key,
|
||||
ServiceProviders.OPENAI_REALTIME.value: self._check_openai_api_key,
|
||||
ServiceProviders.GROK_REALTIME.value: self._check_grok_realtime_api_key,
|
||||
|
|
@ -60,6 +61,7 @@ class UserConfigurationValidator:
|
|||
ServiceProviders.GLADIA.value: self._check_gladia_api_key,
|
||||
ServiceProviders.RIME.value: self._check_rime_api_key,
|
||||
ServiceProviders.MINIMAX.value: self._check_minimax_api_key,
|
||||
ServiceProviders.SMALLEST.value: self._check_smallest_api_key,
|
||||
}
|
||||
|
||||
async def validate(
|
||||
|
|
@ -360,6 +362,14 @@ class UserConfigurationValidator:
|
|||
raise ValueError("base_url is required for Speaches services")
|
||||
return True
|
||||
|
||||
def _check_huggingface_api_key(self, model: str, api_key: str) -> bool:
|
||||
if not api_key.startswith("hf_"):
|
||||
raise ValueError(
|
||||
"Invalid Hugging Face API token format. Use a token that starts with "
|
||||
"'hf_' and has Inference Providers permission."
|
||||
)
|
||||
return True
|
||||
|
||||
def _check_google_vertex_realtime_api_key(self, model: str, service_config) -> bool:
|
||||
if not getattr(service_config, "project_id", None):
|
||||
raise ValueError("project_id is required for Google Vertex Realtime")
|
||||
|
|
@ -389,6 +399,7 @@ class UserConfigurationValidator:
|
|||
return True
|
||||
|
||||
def _check_minimax_api_key(self, model: str, api_key: str) -> bool:
|
||||
# MiniMax doesn't publish a cheap key-validation endpoint; trust the key
|
||||
# at save time and surface auth errors at first call (same as Rime/Sarvam).
|
||||
return True
|
||||
|
||||
def _check_smallest_api_key(self, model: str, api_key: str) -> bool:
|
||||
return True
|
||||
|
|
|
|||
|
|
@ -68,6 +68,7 @@ class ServiceProviders(str, Enum):
|
|||
CAMB = "camb"
|
||||
AWS_BEDROCK = "aws_bedrock"
|
||||
SPEACHES = "speaches"
|
||||
HUGGINGFACE = "huggingface"
|
||||
ASSEMBLYAI = "assemblyai"
|
||||
GLADIA = "gladia"
|
||||
RIME = "rime"
|
||||
|
|
@ -79,6 +80,7 @@ class ServiceProviders(str, Enum):
|
|||
GOOGLE_REALTIME = "google_realtime"
|
||||
GOOGLE_VERTEX_REALTIME = "google_vertex_realtime"
|
||||
AZURE_REALTIME = "azure_realtime"
|
||||
SMALLEST = "smallest"
|
||||
|
||||
|
||||
class BaseServiceConfiguration(BaseModel):
|
||||
|
|
@ -94,6 +96,7 @@ class BaseServiceConfiguration(BaseModel):
|
|||
ServiceProviders.DOGRAH,
|
||||
ServiceProviders.AWS_BEDROCK,
|
||||
ServiceProviders.SPEACHES,
|
||||
ServiceProviders.HUGGINGFACE,
|
||||
ServiceProviders.ASSEMBLYAI,
|
||||
ServiceProviders.GLADIA,
|
||||
ServiceProviders.RIME,
|
||||
|
|
@ -106,6 +109,7 @@ class BaseServiceConfiguration(BaseModel):
|
|||
ServiceProviders.GOOGLE_VERTEX_REALTIME,
|
||||
ServiceProviders.AZURE_REALTIME,
|
||||
ServiceProviders.SARVAM,
|
||||
ServiceProviders.SMALLEST,
|
||||
]
|
||||
api_key: str | list[str]
|
||||
|
||||
|
|
@ -255,6 +259,11 @@ SPEACHES_PROVIDER_MODEL_CONFIG = provider_model_config(
|
|||
),
|
||||
provider_docs_url="https://github.com/speaches-ai/speaches",
|
||||
)
|
||||
HUGGINGFACE_PROVIDER_MODEL_CONFIG = provider_model_config(
|
||||
"Hugging Face",
|
||||
description="Hosted Hugging Face Inference Providers API for usage-based inference.",
|
||||
provider_docs_url="https://huggingface.co/docs/inference-providers/en/index",
|
||||
)
|
||||
AZURE_SPEECH_PROVIDER_MODEL_CONFIG = provider_model_config(
|
||||
"Azure Speech Services",
|
||||
description="Azure Cognitive Services Speech — TTS and STT via the Azure Speech SDK.",
|
||||
|
|
@ -471,6 +480,35 @@ class SpeachesLLMConfiguration(BaseLLMConfiguration):
|
|||
)
|
||||
|
||||
|
||||
HUGGINGFACE_LLM_MODELS = [
|
||||
"openai/gpt-oss-120b:cerebras",
|
||||
"deepseek-ai/DeepSeek-R1:fastest",
|
||||
"Qwen/Qwen3-Coder-480B-A35B-Instruct:fastest",
|
||||
]
|
||||
|
||||
|
||||
@register_llm
|
||||
class HuggingFaceLLMConfiguration(BaseLLMConfiguration):
|
||||
model_config = HUGGINGFACE_PROVIDER_MODEL_CONFIG
|
||||
provider: Literal[ServiceProviders.HUGGINGFACE] = ServiceProviders.HUGGINGFACE
|
||||
model: str = Field(
|
||||
default="openai/gpt-oss-120b:cerebras",
|
||||
description="Hugging Face chat-completion model identifier, optionally with provider suffix.",
|
||||
json_schema_extra={
|
||||
"examples": HUGGINGFACE_LLM_MODELS,
|
||||
"allow_custom_input": True,
|
||||
},
|
||||
)
|
||||
base_url: str = Field(
|
||||
default="https://router.huggingface.co/v1",
|
||||
description="Hugging Face OpenAI-compatible chat-completions router base URL.",
|
||||
)
|
||||
bill_to: str | None = Field(
|
||||
default=None,
|
||||
description="Optional Hugging Face organization or user to bill using X-HF-Bill-To.",
|
||||
)
|
||||
|
||||
|
||||
MINIMAX_MODELS = [
|
||||
"MiniMax-M2.7",
|
||||
"MiniMax-M2.7-highspeed",
|
||||
|
|
@ -741,6 +779,7 @@ LLMConfig = Annotated[
|
|||
DograhLLMService,
|
||||
AWSBedrockLLMConfiguration,
|
||||
SpeachesLLMConfiguration,
|
||||
HuggingFaceLLMConfiguration,
|
||||
MiniMaxLLMConfiguration,
|
||||
SarvamLLMConfiguration,
|
||||
],
|
||||
|
|
@ -907,6 +946,7 @@ class DograhTTSService(BaseTTSConfiguration):
|
|||
voice: str = Field(
|
||||
default="default",
|
||||
description="Voice preset.",
|
||||
json_schema_extra={"allow_custom_input": True},
|
||||
)
|
||||
speed: float = Field(default=1.0, ge=0.5, le=2.0, description="Speed of the voice.")
|
||||
|
||||
|
|
@ -961,6 +1001,12 @@ class SarvamTTSConfiguration(BaseTTSConfiguration):
|
|||
description="BCP-47 Indian-language code (e.g. hi-IN, en-IN).",
|
||||
json_schema_extra={"examples": SARVAM_LANGUAGES},
|
||||
)
|
||||
speed: float = Field(
|
||||
default=1.0,
|
||||
ge=0.5,
|
||||
le=2.0,
|
||||
description="Speech speed multiplier.",
|
||||
)
|
||||
|
||||
|
||||
CAMB_TTS_MODELS = ["mars-flash", "mars-pro", "mars-instruct"]
|
||||
|
|
@ -1120,6 +1166,80 @@ class AzureSpeechTTSConfiguration(BaseTTSConfiguration):
|
|||
)
|
||||
|
||||
|
||||
SMALLEST_PROVIDER_MODEL_CONFIG = provider_model_config(
|
||||
"Smallest AI",
|
||||
description="Smallest AI ultralow-latency TTS (Waves) and STT (Pulse) APIs.",
|
||||
provider_docs_url="https://smallest.ai/docs",
|
||||
)
|
||||
|
||||
SMALLEST_TTS_MODELS = ["lightning_v3.1", "lightning_v3.1_pro"]
|
||||
SMALLEST_TTS_VOICES = [
|
||||
"sophia",
|
||||
"avery",
|
||||
"liam",
|
||||
"lucas",
|
||||
"olivia",
|
||||
"ryan",
|
||||
"freya",
|
||||
"william",
|
||||
"devansh",
|
||||
"arjun",
|
||||
"niharika",
|
||||
"maya",
|
||||
"dhruv",
|
||||
"mia",
|
||||
"maithili",
|
||||
]
|
||||
SMALLEST_TTS_LANGUAGES = [
|
||||
"en",
|
||||
"hi",
|
||||
"fr",
|
||||
"de",
|
||||
"es",
|
||||
"it",
|
||||
"nl",
|
||||
"pl",
|
||||
"ru",
|
||||
"ar",
|
||||
"bn",
|
||||
"gu",
|
||||
"he",
|
||||
"kn",
|
||||
"mr",
|
||||
"ta",
|
||||
]
|
||||
|
||||
|
||||
@register_tts
|
||||
class SmallestAITTSConfiguration(BaseTTSConfiguration):
|
||||
model_config = SMALLEST_PROVIDER_MODEL_CONFIG
|
||||
provider: Literal[ServiceProviders.SMALLEST] = ServiceProviders.SMALLEST
|
||||
model: str = Field(
|
||||
default="lightning_v3.1",
|
||||
description="Smallest AI TTS model. lightning_v3.1_pro is the premium pool (American, British, Indian accents); lightning_v3.1 is the standard pool with 217 voices across 12 languages.",
|
||||
json_schema_extra={"examples": SMALLEST_TTS_MODELS},
|
||||
)
|
||||
voice: str = Field(
|
||||
default="sophia",
|
||||
description="Smallest AI voice ID.",
|
||||
json_schema_extra={"examples": SMALLEST_TTS_VOICES, "allow_custom_input": True},
|
||||
)
|
||||
language: str = Field(
|
||||
default="en",
|
||||
description="ISO 639-1 language code for synthesis.",
|
||||
json_schema_extra={
|
||||
"examples": SMALLEST_TTS_LANGUAGES,
|
||||
"allow_custom_input": True,
|
||||
},
|
||||
)
|
||||
speed: float = Field(
|
||||
default=1.0,
|
||||
ge=0.5,
|
||||
le=2.0,
|
||||
description="Speech speed multiplier (0.5 to 2.0).",
|
||||
)
|
||||
|
||||
|
||||
TTSConfig = Annotated[
|
||||
Union[
|
||||
DeepgramTTSConfiguration,
|
||||
|
|
@ -1134,6 +1254,7 @@ TTSConfig = Annotated[
|
|||
SpeachesTTSConfiguration,
|
||||
MiniMaxTTSConfiguration,
|
||||
AzureSpeechTTSConfiguration,
|
||||
SmallestAITTSConfiguration,
|
||||
],
|
||||
Field(discriminator="provider"),
|
||||
]
|
||||
|
|
@ -1334,6 +1455,38 @@ class SpeachesSTTConfiguration(BaseSTTConfiguration):
|
|||
)
|
||||
|
||||
|
||||
HUGGINGFACE_STT_MODELS = [
|
||||
"openai/whisper-large-v3-turbo",
|
||||
"openai/whisper-large-v3",
|
||||
]
|
||||
|
||||
|
||||
@register_stt
|
||||
class HuggingFaceSTTConfiguration(BaseSTTConfiguration):
|
||||
model_config = HUGGINGFACE_PROVIDER_MODEL_CONFIG
|
||||
provider: Literal[ServiceProviders.HUGGINGFACE] = ServiceProviders.HUGGINGFACE
|
||||
model: str = Field(
|
||||
default="openai/whisper-large-v3-turbo",
|
||||
description="Hugging Face ASR model identifier served through Inference Providers.",
|
||||
json_schema_extra={
|
||||
"examples": HUGGINGFACE_STT_MODELS,
|
||||
"allow_custom_input": True,
|
||||
},
|
||||
)
|
||||
base_url: str = Field(
|
||||
default="https://router.huggingface.co/hf-inference",
|
||||
description="Hugging Face Inference Providers router base URL.",
|
||||
)
|
||||
bill_to: str | None = Field(
|
||||
default=None,
|
||||
description="Optional Hugging Face organization or user to bill using X-HF-Bill-To.",
|
||||
)
|
||||
return_timestamps: bool = Field(
|
||||
default=False,
|
||||
description="Request timestamp chunks when supported by the selected provider/model.",
|
||||
)
|
||||
|
||||
|
||||
ASSEMBLYAI_STT_MODELS = ["u3-rt-pro"]
|
||||
ASSEMBLYAI_STT_LANGUAGES = ["en", "es", "de", "fr", "pt", "it"]
|
||||
|
||||
|
|
@ -1396,6 +1549,62 @@ class AzureSpeechSTTConfiguration(BaseSTTConfiguration):
|
|||
)
|
||||
|
||||
|
||||
SMALLEST_STT_MODELS = ["pulse"]
|
||||
SMALLEST_STT_LANGUAGES = [
|
||||
"en",
|
||||
"hi",
|
||||
"fr",
|
||||
"de",
|
||||
"es",
|
||||
"it",
|
||||
"nl",
|
||||
"pl",
|
||||
"ru",
|
||||
"pt",
|
||||
"bn",
|
||||
"gu",
|
||||
"kn",
|
||||
"ml",
|
||||
"mr",
|
||||
"ta",
|
||||
"te",
|
||||
"pa",
|
||||
"or",
|
||||
"bg",
|
||||
"cs",
|
||||
"da",
|
||||
"et",
|
||||
"fi",
|
||||
"hu",
|
||||
"lt",
|
||||
"lv",
|
||||
"mt",
|
||||
"ro",
|
||||
"sk",
|
||||
"sv",
|
||||
"uk",
|
||||
]
|
||||
|
||||
|
||||
@register_stt
|
||||
class SmallestAISTTConfiguration(BaseSTTConfiguration):
|
||||
model_config = SMALLEST_PROVIDER_MODEL_CONFIG
|
||||
provider: Literal[ServiceProviders.SMALLEST] = ServiceProviders.SMALLEST
|
||||
model: str = Field(
|
||||
default="pulse",
|
||||
description="Smallest AI STT model. Supports 38 languages with real-time streaming.",
|
||||
json_schema_extra={"examples": SMALLEST_STT_MODELS},
|
||||
)
|
||||
language: str = Field(
|
||||
default="en",
|
||||
description="ISO 639-1 language code for transcription.",
|
||||
json_schema_extra={
|
||||
"examples": SMALLEST_STT_LANGUAGES,
|
||||
"allow_custom_input": True,
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
STTConfig = Annotated[
|
||||
Union[
|
||||
DeepgramSTTConfiguration,
|
||||
|
|
@ -1406,9 +1615,11 @@ STTConfig = Annotated[
|
|||
SpeechmaticsSTTConfiguration,
|
||||
SarvamSTTConfiguration,
|
||||
SpeachesSTTConfiguration,
|
||||
HuggingFaceSTTConfiguration,
|
||||
AssemblyAISTTConfiguration,
|
||||
GladiaSTTConfiguration,
|
||||
AzureSpeechSTTConfiguration,
|
||||
SmallestAISTTConfiguration,
|
||||
],
|
||||
Field(discriminator="provider"),
|
||||
]
|
||||
|
|
|
|||
|
|
@ -9,8 +9,8 @@ from api.services.integrations import IntegrationRuntimeSession
|
|||
from api.services.pipecat.audio_config import AudioConfig
|
||||
from api.services.pipecat.audio_playback import play_audio_loop
|
||||
from api.services.pipecat.in_memory_buffers import (
|
||||
InMemoryAudioBuffer,
|
||||
InMemoryLogsBuffer,
|
||||
InMemoryRecordingBuffers,
|
||||
)
|
||||
from api.services.pipecat.pipeline_metrics_aggregator import PipelineMetricsAggregator
|
||||
from api.services.pipecat.tracing_config import get_trace_url
|
||||
|
|
@ -40,11 +40,11 @@ async def _capture_call_event(
|
|||
"workflow_run_id": workflow_run_id,
|
||||
"workflow_id": workflow_run.workflow_id if workflow_run else None,
|
||||
"call_type": workflow_run.mode if workflow_run else None,
|
||||
"call_direction": (workflow_run.initial_context or {}).get(
|
||||
"direction", "outbound"
|
||||
)
|
||||
if workflow_run
|
||||
else None,
|
||||
"call_direction": (
|
||||
(workflow_run.initial_context or {}).get("direction", "outbound")
|
||||
if workflow_run
|
||||
else None
|
||||
),
|
||||
}
|
||||
if extra_properties:
|
||||
properties.update(extra_properties)
|
||||
|
|
@ -73,7 +73,7 @@ def register_event_handlers(
|
|||
"""Register all event handlers for transport and task events.
|
||||
|
||||
Returns:
|
||||
in_memory_audio_buffer for use by other handlers.
|
||||
In-memory recording buffers for use by other handlers.
|
||||
"""
|
||||
# Initialize in-memory buffers with proper audio configuration
|
||||
sample_rate = audio_config.pipeline_sample_rate if audio_config else 16000
|
||||
|
|
@ -84,7 +84,7 @@ def register_event_handlers(
|
|||
f"with sample_rate={sample_rate}Hz, channels={num_channels}"
|
||||
)
|
||||
|
||||
in_memory_audio_buffer = InMemoryAudioBuffer(
|
||||
in_memory_audio_buffers = InMemoryRecordingBuffers(
|
||||
workflow_run_id=workflow_run_id,
|
||||
sample_rate=sample_rate,
|
||||
num_channels=num_channels,
|
||||
|
|
@ -363,14 +363,32 @@ def register_event_handlers(
|
|||
|
||||
# Write buffers to temp files and enqueue combined processing task
|
||||
audio_temp_path = None
|
||||
user_audio_temp_path = None
|
||||
bot_audio_temp_path = None
|
||||
transcript_temp_path = None
|
||||
|
||||
try:
|
||||
if not in_memory_audio_buffer.is_empty:
|
||||
audio_temp_path = await in_memory_audio_buffer.write_to_temp_file()
|
||||
if not in_memory_audio_buffers.mixed.is_empty:
|
||||
audio_temp_path = (
|
||||
await in_memory_audio_buffers.mixed.write_to_temp_file()
|
||||
)
|
||||
else:
|
||||
logger.debug("Audio buffer is empty, skipping upload")
|
||||
|
||||
if not in_memory_audio_buffers.user.is_empty:
|
||||
user_audio_temp_path = (
|
||||
await in_memory_audio_buffers.user.write_to_temp_file()
|
||||
)
|
||||
else:
|
||||
logger.debug("User audio buffer is empty, skipping upload")
|
||||
|
||||
if not in_memory_audio_buffers.bot.is_empty:
|
||||
bot_audio_temp_path = (
|
||||
await in_memory_audio_buffers.bot.write_to_temp_file()
|
||||
)
|
||||
else:
|
||||
logger.debug("Bot audio buffer is empty, skipping upload")
|
||||
|
||||
transcript_temp_path = in_memory_logs_buffer.write_transcript_to_temp_file()
|
||||
if not transcript_temp_path:
|
||||
logger.debug("No transcript events in logs buffer, skipping upload")
|
||||
|
|
@ -385,16 +403,18 @@ def register_event_handlers(
|
|||
workflow_run_id,
|
||||
audio_temp_path,
|
||||
transcript_temp_path,
|
||||
user_audio_temp_path,
|
||||
bot_audio_temp_path,
|
||||
)
|
||||
|
||||
# Return the buffer so it can be passed to other handlers
|
||||
return in_memory_audio_buffer
|
||||
return in_memory_audio_buffers
|
||||
|
||||
|
||||
def register_audio_data_handler(
|
||||
audio_buffer: AudioBufferProcessor,
|
||||
workflow_run_id,
|
||||
in_memory_buffer: InMemoryAudioBuffer,
|
||||
in_memory_buffers: InMemoryRecordingBuffers,
|
||||
):
|
||||
"""Register event handler for audio data"""
|
||||
logger.info(f"Registering audio data handler for workflow run {workflow_run_id}")
|
||||
|
|
@ -404,9 +424,19 @@ def register_audio_data_handler(
|
|||
if not audio:
|
||||
return
|
||||
|
||||
# Use in-memory buffer
|
||||
try:
|
||||
await in_memory_buffer.append(audio)
|
||||
await in_memory_buffers.mixed.append(audio)
|
||||
except MemoryError as e:
|
||||
logger.error(f"Memory buffer full: {e}")
|
||||
# Could implement overflow to disk here if needed
|
||||
logger.error(f"Mixed audio buffer full: {e}")
|
||||
|
||||
@audio_buffer.event_handler("on_track_audio_data")
|
||||
async def on_track_audio_data(
|
||||
buffer, user_audio, bot_audio, sample_rate, num_channels
|
||||
):
|
||||
try:
|
||||
if user_audio:
|
||||
await in_memory_buffers.user.append(user_audio)
|
||||
if bot_audio:
|
||||
await in_memory_buffers.bot.append(bot_audio)
|
||||
except MemoryError as e:
|
||||
logger.error(f"Track audio buffer full: {e}")
|
||||
|
|
|
|||
|
|
@ -75,6 +75,27 @@ class InMemoryAudioBuffer:
|
|||
return self._total_size
|
||||
|
||||
|
||||
class InMemoryRecordingBuffers:
|
||||
"""Holds the mixed recording plus aligned user and bot mono tracks."""
|
||||
|
||||
def __init__(self, workflow_run_id: int, sample_rate: int, num_channels: int = 1):
|
||||
self.mixed = InMemoryAudioBuffer(
|
||||
workflow_run_id=workflow_run_id,
|
||||
sample_rate=sample_rate,
|
||||
num_channels=num_channels,
|
||||
)
|
||||
self.user = InMemoryAudioBuffer(
|
||||
workflow_run_id=workflow_run_id,
|
||||
sample_rate=sample_rate,
|
||||
num_channels=1,
|
||||
)
|
||||
self.bot = InMemoryAudioBuffer(
|
||||
workflow_run_id=workflow_run_id,
|
||||
sample_rate=sample_rate,
|
||||
num_channels=1,
|
||||
)
|
||||
|
||||
|
||||
class InMemoryLogsBuffer:
|
||||
"""Buffer real-time feedback events in memory during a call, then save to workflow run logs."""
|
||||
|
||||
|
|
|
|||
|
|
@ -39,8 +39,17 @@ from pipecat.services.google.vertex.llm import (
|
|||
GoogleVertexLLMSettings,
|
||||
)
|
||||
from pipecat.services.groq.llm import GroqLLMService, GroqLLMSettings
|
||||
from pipecat.services.huggingface.llm import (
|
||||
HuggingFaceLLMService,
|
||||
HuggingFaceLLMSettings,
|
||||
)
|
||||
from pipecat.services.huggingface.stt import (
|
||||
HuggingFaceSTTService,
|
||||
HuggingFaceSTTSettings,
|
||||
)
|
||||
from pipecat.services.minimax.llm import MiniMaxLLMService
|
||||
from pipecat.services.minimax.tts import MiniMaxTTSSettings
|
||||
from pipecat.services.openai._constants import OPENAI_SAMPLE_RATE
|
||||
from pipecat.services.openai.base_llm import OpenAILLMSettings
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.services.openai.stt import (
|
||||
|
|
@ -53,6 +62,8 @@ from pipecat.services.rime.tts import RimeTTSService, RimeTTSSettings
|
|||
from pipecat.services.sarvam.llm import SarvamLLMService, SarvamLLMSettings
|
||||
from pipecat.services.sarvam.stt import SarvamSTTService, SarvamSTTSettings
|
||||
from pipecat.services.sarvam.tts import SarvamTTSService, SarvamTTSSettings
|
||||
from pipecat.services.smallest.stt import SmallestSTTService, SmallestSTTSettings
|
||||
from pipecat.services.smallest.tts import SmallestTTSService, SmallestTTSSettings
|
||||
from pipecat.services.speaches.llm import SpeachesLLMService, SpeachesLLMSettings
|
||||
from pipecat.services.speaches.stt import SpeachesSTTService, SpeachesSTTSettings
|
||||
from pipecat.services.speaches.tts import SpeachesTTSService, SpeachesTTSSettings
|
||||
|
|
@ -218,6 +229,22 @@ def create_stt_service(
|
|||
),
|
||||
sample_rate=audio_config.transport_in_sample_rate,
|
||||
)
|
||||
elif user_config.stt.provider == ServiceProviders.HUGGINGFACE.value:
|
||||
base_url = (
|
||||
getattr(user_config.stt, "base_url", None)
|
||||
or "https://router.huggingface.co/hf-inference"
|
||||
)
|
||||
_validate_runtime_service_url(base_url, "base_url")
|
||||
return HuggingFaceSTTService(
|
||||
api_key=user_config.stt.api_key,
|
||||
base_url=base_url,
|
||||
bill_to=getattr(user_config.stt, "bill_to", None),
|
||||
settings=HuggingFaceSTTSettings(
|
||||
model=user_config.stt.model,
|
||||
return_timestamps=getattr(user_config.stt, "return_timestamps", False),
|
||||
),
|
||||
sample_rate=audio_config.transport_in_sample_rate,
|
||||
)
|
||||
elif user_config.stt.provider == ServiceProviders.ASSEMBLYAI.value:
|
||||
language = getattr(user_config.stt, "language", None)
|
||||
settings_kwargs = {"model": user_config.stt.model, "language": language}
|
||||
|
|
@ -284,6 +311,20 @@ def create_stt_service(
|
|||
settings=AzureSTTSettings(language=pipecat_language),
|
||||
sample_rate=audio_config.transport_in_sample_rate,
|
||||
)
|
||||
elif user_config.stt.provider == ServiceProviders.SMALLEST.value:
|
||||
language_code = getattr(user_config.stt, "language", None) or "en"
|
||||
try:
|
||||
pipecat_language = Language(language_code)
|
||||
except ValueError:
|
||||
pipecat_language = Language.EN
|
||||
return SmallestSTTService(
|
||||
api_key=user_config.stt.api_key,
|
||||
settings=SmallestSTTSettings(
|
||||
model=user_config.stt.model,
|
||||
language=pipecat_language,
|
||||
),
|
||||
sample_rate=audio_config.transport_in_sample_rate,
|
||||
)
|
||||
else:
|
||||
raise HTTPException(
|
||||
status_code=400, detail=f"Invalid STT provider {user_config.stt.provider}"
|
||||
|
|
@ -320,6 +361,7 @@ def create_tts_service(
|
|||
kwargs["base_url"] = base_url
|
||||
return OpenAITTSService(
|
||||
api_key=user_config.tts.api_key,
|
||||
sample_rate=OPENAI_SAMPLE_RATE,
|
||||
settings=OpenAITTSSettings(model=user_config.tts.model),
|
||||
text_filters=[xml_function_tag_filter],
|
||||
skip_aggregator_types=["recording_router", "recording"],
|
||||
|
|
@ -493,13 +535,17 @@ def create_tts_service(
|
|||
pipecat_language = language_mapping.get(language, Language.HI)
|
||||
|
||||
voice = getattr(user_config.tts, "voice", None) or "anushka"
|
||||
speed = getattr(user_config.tts, "speed", None)
|
||||
settings_kwargs = {
|
||||
"model": user_config.tts.model,
|
||||
"voice": voice,
|
||||
"language": pipecat_language,
|
||||
}
|
||||
if speed and speed != 1.0:
|
||||
settings_kwargs["pace"] = speed
|
||||
return SarvamTTSService(
|
||||
api_key=user_config.tts.api_key,
|
||||
settings=SarvamTTSSettings(
|
||||
model=user_config.tts.model,
|
||||
voice=voice,
|
||||
language=pipecat_language,
|
||||
),
|
||||
settings=SarvamTTSSettings(**settings_kwargs),
|
||||
text_filters=[xml_function_tag_filter],
|
||||
skip_aggregator_types=["recording_router", "recording"],
|
||||
silence_time_s=1.0,
|
||||
|
|
@ -560,6 +606,28 @@ def create_tts_service(
|
|||
skip_aggregator_types=["recording_router", "recording"],
|
||||
silence_time_s=1.0,
|
||||
)
|
||||
elif user_config.tts.provider == ServiceProviders.SMALLEST.value:
|
||||
language_code = getattr(user_config.tts, "language", None) or "en"
|
||||
try:
|
||||
pipecat_language = Language(language_code)
|
||||
except ValueError:
|
||||
pipecat_language = Language.EN
|
||||
speed = getattr(user_config.tts, "speed", None)
|
||||
model = user_config.tts.model.replace("lightning-v", "lightning_v")
|
||||
settings_kwargs = SmallestTTSSettings(
|
||||
model=model,
|
||||
voice=user_config.tts.voice,
|
||||
language=pipecat_language,
|
||||
)
|
||||
if speed and speed != 1.0:
|
||||
settings_kwargs.speed = speed
|
||||
return SmallestTTSService(
|
||||
api_key=user_config.tts.api_key,
|
||||
settings=settings_kwargs,
|
||||
text_filters=[xml_function_tag_filter],
|
||||
skip_aggregator_types=["recording_router", "recording"],
|
||||
silence_time_s=1.0,
|
||||
)
|
||||
else:
|
||||
raise HTTPException(
|
||||
status_code=400, detail=f"Invalid TTS provider {user_config.tts.provider}"
|
||||
|
|
@ -581,6 +649,7 @@ def create_llm_service_from_provider(
|
|||
location: str | None = None,
|
||||
credentials: str | None = None,
|
||||
temperature: float | None = None,
|
||||
bill_to: str | None = None,
|
||||
):
|
||||
"""Create an LLM service from explicit provider/model/api_key.
|
||||
|
||||
|
|
@ -663,6 +732,15 @@ def create_llm_service_from_provider(
|
|||
api_key=api_key or "none",
|
||||
settings=SpeachesLLMSettings(model=model),
|
||||
)
|
||||
elif provider == ServiceProviders.HUGGINGFACE.value:
|
||||
base_url = base_url or "https://router.huggingface.co/v1"
|
||||
_validate_runtime_service_url(base_url, "base_url")
|
||||
return HuggingFaceLLMService(
|
||||
api_key=api_key,
|
||||
base_url=base_url,
|
||||
bill_to=bill_to,
|
||||
settings=HuggingFaceLLMSettings(model=model, temperature=0.1),
|
||||
)
|
||||
elif provider == ServiceProviders.MINIMAX.value:
|
||||
base_url = base_url or "https://api.minimax.io/v1"
|
||||
_validate_runtime_service_url(base_url, "base_url")
|
||||
|
|
@ -875,6 +953,9 @@ def create_llm_service(user_config, correlation_id: str | None = None):
|
|||
kwargs["endpoint"] = user_config.llm.endpoint
|
||||
elif provider == ServiceProviders.SPEACHES.value:
|
||||
kwargs["base_url"] = user_config.llm.base_url
|
||||
elif provider == ServiceProviders.HUGGINGFACE.value:
|
||||
kwargs["base_url"] = user_config.llm.base_url
|
||||
kwargs["bill_to"] = user_config.llm.bill_to
|
||||
elif provider == ServiceProviders.AWS_BEDROCK.value:
|
||||
kwargs["aws_access_key"] = user_config.llm.aws_access_key
|
||||
kwargs["aws_secret_key"] = user_config.llm.aws_secret_key
|
||||
|
|
|
|||
|
|
@ -718,6 +718,8 @@ class TriggerNodeData(BaseNodeData):
|
|||
"rsvp": "{{gathered_context.rsvp}}",
|
||||
"duration": "{{cost_info.call_duration_seconds}}",
|
||||
"recording_url": "{{recording_url}}",
|
||||
"user_recording_url": "{{user_recording_url}}",
|
||||
"bot_recording_url": "{{bot_recording_url}}",
|
||||
"transcript_url": "{{transcript_url}}",
|
||||
},
|
||||
},
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue