mirror of
https://github.com/dograh-hq/dograh.git
synced 2026-06-16 08:25:18 +02:00
Merge branch 'main' into feat/call-tags
This commit is contained in:
commit
ea0967fd9c
41 changed files with 480 additions and 317 deletions
|
|
@ -278,7 +278,48 @@ class DograhTTSService(BaseTTSConfiguration):
|
|||
|
||||
|
||||
SARVAM_TTS_MODELS = ["bulbul:v2", "bulbul:v3"]
|
||||
SARVAM_VOICES = ["anushka", "manisha", "vidya", "arya", "abhilash", "karun", "hitesh"]
|
||||
SARVAM_V2_VOICES = ["anushka", "manisha", "vidya", "arya", "abhilash", "karun", "hitesh"]
|
||||
SARVAM_V3_VOICES = [
|
||||
"shubh",
|
||||
"aditya",
|
||||
"ritu",
|
||||
"priya",
|
||||
"neha",
|
||||
"rahul",
|
||||
"pooja",
|
||||
"rohan",
|
||||
"simran",
|
||||
"kavya",
|
||||
"amit",
|
||||
"dev",
|
||||
"ishita",
|
||||
"shreya",
|
||||
"ratan",
|
||||
"varun",
|
||||
"manan",
|
||||
"sumit",
|
||||
"roopa",
|
||||
"kabir",
|
||||
"aayan",
|
||||
"ashutosh",
|
||||
"advait",
|
||||
"amelia",
|
||||
"sophia",
|
||||
"anand",
|
||||
"tanya",
|
||||
"tarun",
|
||||
"sunny",
|
||||
"mani",
|
||||
"gokul",
|
||||
"vijay",
|
||||
"shruti",
|
||||
"suhani",
|
||||
"mohit",
|
||||
"kavitha",
|
||||
"rehan",
|
||||
"soham",
|
||||
"rupali",
|
||||
]
|
||||
SARVAM_LANGUAGES = [
|
||||
"bn-IN",
|
||||
"en-IN",
|
||||
|
|
@ -301,7 +342,16 @@ class SarvamTTSConfiguration(BaseTTSConfiguration):
|
|||
model: str = Field(
|
||||
default="bulbul:v2", json_schema_extra={"examples": SARVAM_TTS_MODELS}
|
||||
)
|
||||
voice: str = Field(default="anushka", json_schema_extra={"examples": SARVAM_VOICES})
|
||||
voice: str = Field(
|
||||
default="anushka",
|
||||
json_schema_extra={
|
||||
"examples": SARVAM_V2_VOICES,
|
||||
"model_options": {
|
||||
"bulbul:v2": SARVAM_V2_VOICES,
|
||||
"bulbul:v3": SARVAM_V3_VOICES,
|
||||
},
|
||||
},
|
||||
)
|
||||
language: str = Field(
|
||||
default="hi-IN", json_schema_extra={"examples": SARVAM_LANGUAGES}
|
||||
)
|
||||
|
|
@ -322,39 +372,89 @@ TTSConfig = Annotated[
|
|||
###################################################### STT ########################################################################
|
||||
|
||||
|
||||
DEEPGRAM_STT_MODELS = ["nova-2", "nova-3-general", "flux-general-en"]
|
||||
DEEPGRAM_STT_MODELS = ["nova-3-general", "flux-general-en"]
|
||||
DEEPGRAM_LANGUAGES = [
|
||||
"multi",
|
||||
"ar",
|
||||
"ar-AE",
|
||||
"ar-SA",
|
||||
"ar-QA",
|
||||
"ar-KW",
|
||||
"ar-SY",
|
||||
"ar-LB",
|
||||
"ar-PS",
|
||||
"ar-JO",
|
||||
"ar-EG",
|
||||
"ar-SD",
|
||||
"ar-TD",
|
||||
"ar-MA",
|
||||
"ar-DZ",
|
||||
"ar-TN",
|
||||
"ar-IQ",
|
||||
"ar-IR",
|
||||
"be",
|
||||
"bn",
|
||||
"bs",
|
||||
"bg",
|
||||
"ca",
|
||||
"cs",
|
||||
"da",
|
||||
"da-DK",
|
||||
"de",
|
||||
"de-CH",
|
||||
"el",
|
||||
"en",
|
||||
"en-US",
|
||||
"en-GB",
|
||||
"en-AU",
|
||||
"en-GB",
|
||||
"en-IN",
|
||||
"en-NZ",
|
||||
"es",
|
||||
"es-419",
|
||||
"et",
|
||||
"fa",
|
||||
"fi",
|
||||
"fr",
|
||||
"fr-CA",
|
||||
"de",
|
||||
"he",
|
||||
"hi",
|
||||
"hr",
|
||||
"hu",
|
||||
"id",
|
||||
"it",
|
||||
"ja",
|
||||
"kn",
|
||||
"ko",
|
||||
"ko-KR",
|
||||
"lt",
|
||||
"lv",
|
||||
"mk",
|
||||
"mr",
|
||||
"ms",
|
||||
"nl",
|
||||
"nl-BE",
|
||||
"no",
|
||||
"pl",
|
||||
"pt",
|
||||
"pt-BR",
|
||||
"nl",
|
||||
"hi",
|
||||
"ja",
|
||||
"ko",
|
||||
"zh-CN",
|
||||
"zh-TW",
|
||||
"pt-PT",
|
||||
"ro",
|
||||
"ru",
|
||||
"pl",
|
||||
"sk",
|
||||
"sl",
|
||||
"sr",
|
||||
"sv",
|
||||
"sv-SE",
|
||||
"ta",
|
||||
"te",
|
||||
"th",
|
||||
"tl",
|
||||
"tr",
|
||||
"uk",
|
||||
"ur",
|
||||
"vi",
|
||||
"sv",
|
||||
"da",
|
||||
"no",
|
||||
"fi",
|
||||
"id",
|
||||
"th",
|
||||
"zh-CN",
|
||||
"zh-TW",
|
||||
]
|
||||
|
||||
|
||||
|
|
@ -365,7 +465,14 @@ class DeepgramSTTConfiguration(BaseSTTConfiguration):
|
|||
default="nova-3-general", json_schema_extra={"examples": DEEPGRAM_STT_MODELS}
|
||||
)
|
||||
language: str = Field(
|
||||
default="multi", json_schema_extra={"examples": DEEPGRAM_LANGUAGES}
|
||||
default="multi",
|
||||
json_schema_extra={
|
||||
"examples": DEEPGRAM_LANGUAGES,
|
||||
"model_options": {
|
||||
"nova-3-general": DEEPGRAM_LANGUAGES,
|
||||
"flux-general-en": ["en"],
|
||||
},
|
||||
},
|
||||
)
|
||||
api_key: str
|
||||
|
||||
|
|
@ -390,39 +497,7 @@ class OpenAISTTConfiguration(BaseSTTConfiguration):
|
|||
|
||||
# Dograh STT Service
|
||||
DOGRAH_STT_MODELS = ["default"]
|
||||
DOGRAH_STT_LANGUAGES = [
|
||||
"multi",
|
||||
"en",
|
||||
"en-US",
|
||||
"en-GB",
|
||||
"en-AU",
|
||||
"en-IN",
|
||||
"es",
|
||||
"es-419",
|
||||
"fr",
|
||||
"fr-CA",
|
||||
"de",
|
||||
"it",
|
||||
"pt",
|
||||
"pt-BR",
|
||||
"nl",
|
||||
"hi",
|
||||
"ja",
|
||||
"ko",
|
||||
"zh-CN",
|
||||
"zh-TW",
|
||||
"ru",
|
||||
"pl",
|
||||
"tr",
|
||||
"uk",
|
||||
"vi",
|
||||
"sv",
|
||||
"da",
|
||||
"no",
|
||||
"fi",
|
||||
"id",
|
||||
"th",
|
||||
]
|
||||
DOGRAH_STT_LANGUAGES = DEEPGRAM_LANGUAGES
|
||||
|
||||
|
||||
@register_stt
|
||||
|
|
|
|||
|
|
@ -14,7 +14,7 @@ from api.services.looptalk.internal_transport import (
|
|||
)
|
||||
from api.services.pipecat.transport_setup import create_internal_transport
|
||||
from pipecat.pipeline.task import PipelineTask
|
||||
from pipecat.utils.context import set_current_run_id
|
||||
from pipecat.utils.run_context import set_current_run_id
|
||||
|
||||
from .core.pipeline_builder import LoopTalkPipelineBuilder
|
||||
from .core.recording_manager import RecordingManager
|
||||
|
|
|
|||
|
|
@ -10,7 +10,7 @@ from pipecat.pipeline.pipeline import Pipeline
|
|||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
||||
from pipecat.processors.audio.audio_buffer_processor import AudioBufferProcessor
|
||||
from pipecat.utils.context import turn_var
|
||||
from pipecat.utils.run_context import turn_var
|
||||
|
||||
|
||||
def create_pipeline_components(audio_config: AudioConfig):
|
||||
|
|
|
|||
|
|
@ -62,6 +62,7 @@ from pipecat.turns.user_mute import (
|
|||
MuteUntilFirstBotCompleteUserMuteStrategy,
|
||||
)
|
||||
from pipecat.turns.user_start import (
|
||||
ExternalUserTurnStartStrategy,
|
||||
TranscriptionUserTurnStartStrategy,
|
||||
)
|
||||
from pipecat.turns.user_start.vad_user_turn_start_strategy import (
|
||||
|
|
@ -69,12 +70,12 @@ from pipecat.turns.user_start.vad_user_turn_start_strategy import (
|
|||
)
|
||||
from pipecat.turns.user_stop import (
|
||||
ExternalUserTurnStopStrategy,
|
||||
TranscriptionUserTurnStopStrategy,
|
||||
SpeechTimeoutUserTurnStopStrategy,
|
||||
TurnAnalyzerUserTurnStopStrategy,
|
||||
)
|
||||
from pipecat.turns.user_turn_strategies import UserTurnStrategies
|
||||
from pipecat.utils.context import set_current_run_id
|
||||
from pipecat.utils.enums import EndTaskReason
|
||||
from pipecat.utils.run_context import set_current_run_id
|
||||
from pipecat.utils.tracing.context_registry import ContextProviderRegistry
|
||||
|
||||
# Setup tracing if enabled
|
||||
|
|
@ -265,7 +266,6 @@ async def run_pipeline_vobiz(
|
|||
async def run_pipeline_cloudonix(
|
||||
websocket_client: WebSocket,
|
||||
stream_sid: str,
|
||||
call_sid: str,
|
||||
workflow_id: int,
|
||||
workflow_run_id: int,
|
||||
user_id: int,
|
||||
|
|
@ -274,10 +274,15 @@ async def run_pipeline_cloudonix(
|
|||
logger.debug(
|
||||
f"Running pipeline for Cloudonix connection with workflow_id: {workflow_id} and workflow_run_id: {workflow_run_id}"
|
||||
)
|
||||
set_current_run_id(workflow_run_id)
|
||||
|
||||
workflow_run = await db_client.get_workflow_run_by_id(workflow_run_id)
|
||||
call_id = workflow_run.gathered_context.get("call_id")
|
||||
if not call_id:
|
||||
logger.warning("call_id not found in gathered_context")
|
||||
raise Exception()
|
||||
|
||||
# Store call ID in cost_info for later cost calculation (provider-agnostic)
|
||||
cost_info = {"call_id": call_sid}
|
||||
cost_info = {"call_id": call_id}
|
||||
await db_client.update_workflow_run(workflow_run_id, cost_info=cost_info)
|
||||
|
||||
# Get workflow to extract all pipeline configurations
|
||||
|
|
@ -292,26 +297,18 @@ async def run_pipeline_cloudonix(
|
|||
"ambient_noise_configuration"
|
||||
]
|
||||
|
||||
# Retrieve session_token from workflow_run gathered_context
|
||||
workflow_run = await db_client.get_workflow_run(workflow_run_id)
|
||||
session_token = None
|
||||
if workflow_run and workflow_run.gathered_context:
|
||||
session_token = workflow_run.gathered_context.get("session_token")
|
||||
logger.debug(f"Retrieved session_token from workflow_run: {session_token}")
|
||||
|
||||
# Create audio configuration for Cloudonix
|
||||
audio_config = create_audio_config(WorkflowRunMode.CLOUDONIX.value)
|
||||
|
||||
transport = await create_cloudonix_transport(
|
||||
websocket_client,
|
||||
call_id,
|
||||
stream_sid,
|
||||
call_sid,
|
||||
workflow_run_id,
|
||||
audio_config,
|
||||
workflow.organization_id,
|
||||
vad_config,
|
||||
ambient_noise_config,
|
||||
session_token,
|
||||
)
|
||||
await _run_pipeline(
|
||||
transport,
|
||||
|
|
@ -580,7 +577,10 @@ async def _run_pipeline(
|
|||
|
||||
if is_deepgram_flux:
|
||||
user_turn_strategies = UserTurnStrategies(
|
||||
start=[VADUserTurnStartStrategy(), TranscriptionUserTurnStartStrategy()],
|
||||
start=[
|
||||
VADUserTurnStartStrategy(),
|
||||
ExternalUserTurnStartStrategy(enable_interruptions=True),
|
||||
],
|
||||
stop=[ExternalUserTurnStopStrategy()],
|
||||
)
|
||||
elif turn_stop_strategy == "turn_analyzer":
|
||||
|
|
@ -598,7 +598,7 @@ async def _run_pipeline(
|
|||
# Transcription-based (default): best for short 1-2 word responses
|
||||
user_turn_strategies = UserTurnStrategies(
|
||||
start=[VADUserTurnStartStrategy(), TranscriptionUserTurnStartStrategy()],
|
||||
stop=[TranscriptionUserTurnStopStrategy()],
|
||||
stop=[SpeechTimeoutUserTurnStopStrategy()],
|
||||
)
|
||||
|
||||
# Create user mute strategies
|
||||
|
|
|
|||
|
|
@ -30,7 +30,9 @@ if TYPE_CHECKING:
|
|||
from api.services.pipecat.audio_config import AudioConfig
|
||||
|
||||
|
||||
def create_stt_service(user_config, audio_config: "AudioConfig", keyterms: list[str] | None = None):
|
||||
def create_stt_service(
|
||||
user_config, audio_config: "AudioConfig", keyterms: list[str] | None = None
|
||||
):
|
||||
"""Create and return appropriate STT service based on user configuration
|
||||
|
||||
Args:
|
||||
|
|
@ -53,7 +55,7 @@ def create_stt_service(user_config, audio_config: "AudioConfig", keyterms: list[
|
|||
keyterm=keyterms or [],
|
||||
),
|
||||
should_interrupt=False, # Let UserAggregator take care of sending InterruptionFrame
|
||||
sample_rate=audio_config.transport_in_sample_rate
|
||||
sample_rate=audio_config.transport_in_sample_rate,
|
||||
)
|
||||
|
||||
# Other models than flux
|
||||
|
|
@ -64,21 +66,24 @@ def create_stt_service(user_config, audio_config: "AudioConfig", keyterms: list[
|
|||
profanity_filter=False,
|
||||
endpointing=100,
|
||||
model=user_config.stt.model,
|
||||
keyterm=keyterms or []
|
||||
keyterm=keyterms or [],
|
||||
)
|
||||
logger.debug(f"Using DeepGram Model - {user_config.stt.model}")
|
||||
return DeepgramSTTService(
|
||||
live_options=live_options,
|
||||
api_key=user_config.stt.api_key,
|
||||
should_interrupt=False, # Let UserAggregator take care of sending InterruptionFrame
|
||||
sample_rate=audio_config.transport_in_sample_rate
|
||||
sample_rate=audio_config.transport_in_sample_rate,
|
||||
)
|
||||
elif user_config.stt.provider == ServiceProviders.OPENAI.value:
|
||||
return OpenAISTTService(
|
||||
api_key=user_config.stt.api_key, model=user_config.stt.model
|
||||
)
|
||||
elif user_config.stt.provider == ServiceProviders.CARTESIA.value:
|
||||
return CartesiaSTTService(api_key=user_config.stt.api_key, sample_rate=audio_config.transport_in_sample_rate)
|
||||
return CartesiaSTTService(
|
||||
api_key=user_config.stt.api_key,
|
||||
sample_rate=audio_config.transport_in_sample_rate,
|
||||
)
|
||||
elif user_config.stt.provider == ServiceProviders.DOGRAH.value:
|
||||
base_url = MPS_API_URL.replace("http://", "ws://").replace("https://", "wss://")
|
||||
language = getattr(user_config.stt, "language", None) or "multi"
|
||||
|
|
@ -88,7 +93,7 @@ def create_stt_service(user_config, audio_config: "AudioConfig", keyterms: list[
|
|||
model=user_config.stt.model,
|
||||
language=language,
|
||||
keyterms=keyterms,
|
||||
sample_rate=audio_config.transport_in_sample_rate
|
||||
sample_rate=audio_config.transport_in_sample_rate,
|
||||
)
|
||||
elif user_config.stt.provider == ServiceProviders.SARVAM.value:
|
||||
# Map Sarvam language code to pipecat Language enum
|
||||
|
|
@ -112,7 +117,7 @@ def create_stt_service(user_config, audio_config: "AudioConfig", keyterms: list[
|
|||
api_key=user_config.stt.api_key,
|
||||
model=user_config.stt.model,
|
||||
params=SarvamSTTService.InputParams(language=pipecat_language),
|
||||
sample_rate=audio_config.transport_in_sample_rate
|
||||
sample_rate=audio_config.transport_in_sample_rate,
|
||||
)
|
||||
elif user_config.stt.provider == ServiceProviders.SPEECHMATICS.value:
|
||||
from pipecat.services.speechmatics.stt import (
|
||||
|
|
@ -138,7 +143,7 @@ def create_stt_service(user_config, audio_config: "AudioConfig", keyterms: list[
|
|||
operating_point=operating_point,
|
||||
additional_vocab=additional_vocab,
|
||||
),
|
||||
sample_rate=audio_config.transport_in_sample_rate
|
||||
sample_rate=audio_config.transport_in_sample_rate,
|
||||
)
|
||||
else:
|
||||
raise HTTPException(
|
||||
|
|
|
|||
|
|
@ -94,14 +94,13 @@ async def create_twilio_transport(
|
|||
|
||||
async def create_cloudonix_transport(
|
||||
websocket_client: WebSocket,
|
||||
call_id: str,
|
||||
stream_sid: str,
|
||||
call_sid: str,
|
||||
workflow_run_id: int,
|
||||
audio_config: AudioConfig,
|
||||
organization_id: int,
|
||||
vad_config: dict | None = None,
|
||||
ambient_noise_config: dict | None = None,
|
||||
session_token: str | None = None,
|
||||
):
|
||||
"""Create a transport for Cloudonix connections"""
|
||||
|
||||
|
|
@ -125,11 +124,10 @@ async def create_cloudonix_transport(
|
|||
from pipecat.serializers.cloudonix import CloudonixFrameSerializer
|
||||
|
||||
serializer = CloudonixFrameSerializer(
|
||||
call_id=call_id,
|
||||
stream_sid=stream_sid,
|
||||
call_sid=call_sid,
|
||||
domain_id=domain_id,
|
||||
bearer_token=bearer_token,
|
||||
session_token=session_token,
|
||||
)
|
||||
|
||||
return FastAPIWebsocketTransport(
|
||||
|
|
|
|||
|
|
@ -8,7 +8,7 @@ propagate through asyncio.create_task() calls.
|
|||
import asyncio
|
||||
from typing import Dict, Optional
|
||||
|
||||
from pipecat.utils.context import turn_var
|
||||
from pipecat.utils.run_context import turn_var
|
||||
|
||||
|
||||
class TurnContextManager:
|
||||
|
|
|
|||
|
|
@ -395,10 +395,6 @@ class CloudonixProvider(TelephonyProvider):
|
|||
await websocket.close(code=4400, reason="Expected connected event")
|
||||
return
|
||||
|
||||
logger.debug(
|
||||
f"Cloudonix WebSocket connected for workflow_run {workflow_run_id}"
|
||||
)
|
||||
|
||||
# Wait for "start" event with stream details
|
||||
start_msg = await websocket.receive_text()
|
||||
logger.debug(f"Received start message: {start_msg}")
|
||||
|
|
@ -418,9 +414,14 @@ class CloudonixProvider(TelephonyProvider):
|
|||
await websocket.close(code=4400, reason="Missing stream identifiers")
|
||||
return
|
||||
|
||||
logger.debug(
|
||||
f"Cloudonix WebSocket connected for workflow_run {workflow_run_id} "
|
||||
f"stream_sid: {stream_sid} call_sid: {call_sid}"
|
||||
)
|
||||
|
||||
# Run the Cloudonix pipeline
|
||||
await run_pipeline_cloudonix(
|
||||
websocket, stream_sid, call_sid, workflow_id, workflow_run_id, user_id
|
||||
websocket, stream_sid, workflow_id, workflow_run_id, user_id
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
|
|
|
|||
|
|
@ -110,7 +110,7 @@ class TwilioProvider(TelephonyProvider):
|
|||
return CallInitiationResult(
|
||||
call_id=response_data["sid"],
|
||||
status=response_data.get("status", "queued"),
|
||||
provider_metadata={}, # Twilio doesn't need to persist extra data
|
||||
provider_metadata={"call_id": response_data["sid"]},
|
||||
raw_response=response_data,
|
||||
)
|
||||
|
||||
|
|
|
|||
|
|
@ -150,7 +150,7 @@ class VobizProvider(TelephonyProvider):
|
|||
return CallInitiationResult(
|
||||
call_id=call_id,
|
||||
status="queued", # Vobiz returns "message": "call fired"
|
||||
provider_metadata={},
|
||||
provider_metadata={"call_id": call_id},
|
||||
raw_response=response_data,
|
||||
)
|
||||
|
||||
|
|
|
|||
|
|
@ -138,10 +138,8 @@ class VonageProvider(TelephonyProvider):
|
|||
call_id=response_data["uuid"],
|
||||
status=response_data.get("status", "started"),
|
||||
provider_metadata={
|
||||
"call_uuid": response_data[
|
||||
"uuid"
|
||||
] # Vonage needs UUID persisted for WebSocket
|
||||
},
|
||||
"call_uuid": response_data["uuid"]
|
||||
}, # Vonage needs UUID persisted for WebSocket
|
||||
raw_response=response_data,
|
||||
)
|
||||
|
||||
|
|
|
|||
|
|
@ -23,7 +23,7 @@ from api.services.telephony.stasis_event_protocol import (
|
|||
parse_event,
|
||||
)
|
||||
from api.services.telephony.stasis_rtp_connection import StasisRTPConnection
|
||||
from pipecat.utils.context import set_current_run_id
|
||||
from pipecat.utils.run_context import set_current_run_id
|
||||
|
||||
|
||||
class WorkerEventSubscriber:
|
||||
|
|
|
|||
|
|
@ -116,6 +116,10 @@ def create_aggregation_correction_callback(engine: "PipecatEngine"):
|
|||
if corrupted in ref or len(alnum_ref) < len(alnum_corr) or len(alnum_corr) < 10:
|
||||
return corrupted
|
||||
|
||||
logger.debug(
|
||||
f"In correct_corrupted_aggregation: ref: {ref} corrupted: {corrupted}"
|
||||
)
|
||||
|
||||
# 2) Find where in `ref` we should start aligning.
|
||||
# We take the first N (N=10) characters of `corrupted`
|
||||
# and look for all their occurrences in `ref`.
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue