mirror of
https://github.com/dograh-hq/dograh.git
synced 2026-06-10 08:05:22 +02:00
Merge branch 'main' into feat/telnyx-telephony
This commit is contained in:
commit
9dc64456d8
39 changed files with 1071 additions and 313 deletions
1
.gitignore
vendored
1
.gitignore
vendored
|
|
@ -16,3 +16,4 @@ venv/
|
|||
.playwright-mcp
|
||||
coturn/
|
||||
*.wav
|
||||
dograh_pcm_cache/
|
||||
|
|
@ -64,7 +64,7 @@ class WorkflowRecordingClient(BaseDBClient):
|
|||
storage_key=storage_key,
|
||||
storage_backend=storage_backend,
|
||||
created_by=created_by,
|
||||
metadata=metadata or {},
|
||||
recording_metadata=metadata or {},
|
||||
)
|
||||
|
||||
session.add(recording)
|
||||
|
|
|
|||
|
|
@ -40,6 +40,38 @@ class PresignedUploadUrlResponse(BaseModel):
|
|||
router = APIRouter(prefix="/s3", tags=["s3"])
|
||||
|
||||
|
||||
def _extract_org_id_from_key(key: str) -> Optional[int]:
|
||||
"""Try to extract an organization ID from a storage key.
|
||||
|
||||
Matches keys of the form ``{prefix}/{org_id}/...`` where *org_id* is a
|
||||
positive integer. Returns ``None`` when the pattern does not match.
|
||||
"""
|
||||
parts = key.split("/")
|
||||
if len(parts) >= 3 and parts[1].isdigit():
|
||||
return int(parts[1])
|
||||
return None
|
||||
|
||||
|
||||
def _extract_legacy_workflow_run_id(key: str) -> Optional[int]:
|
||||
"""Extract a workflow_run_id from legacy key formats.
|
||||
|
||||
Supports:
|
||||
- ``transcripts/{run_id}.txt``
|
||||
- ``recordings/{run_id}.wav``
|
||||
|
||||
Returns ``None`` when the key does not match a legacy pattern.
|
||||
"""
|
||||
if key.startswith("transcripts/") and key.endswith(".txt"):
|
||||
run_id_str = key[len("transcripts/") : -4]
|
||||
elif key.startswith("recordings/") and key.endswith(".wav"):
|
||||
run_id_str = key[len("recordings/") : -4]
|
||||
else:
|
||||
return None
|
||||
|
||||
return int(run_id_str) if run_id_str.isdigit() else None
|
||||
|
||||
|
||||
# Keep for backward compat with file-metadata endpoint
|
||||
async def _validate_and_extract_workflow_run_id(
|
||||
key: str, allow_special_paths: bool = False
|
||||
) -> Optional[int]:
|
||||
|
|
@ -118,64 +150,68 @@ async def get_signed_url(
|
|||
key: Annotated[str, Query(description="S3 object key")],
|
||||
expires_in: int = 3600,
|
||||
inline: bool = False,
|
||||
storage_backend: Annotated[
|
||||
Optional[str],
|
||||
Query(
|
||||
description="Storage backend to use (e.g. 'minio', 's3'). "
|
||||
"When omitted the backend is inferred from the resource."
|
||||
),
|
||||
] = None,
|
||||
user=Depends(get_user),
|
||||
):
|
||||
"""Return a short-lived signed URL for a transcript or recording file stored on S3.
|
||||
"""Return a short-lived signed URL for a file stored on S3 / MinIO.
|
||||
|
||||
Access Control:
|
||||
* Keys that embed an organization ID (``{prefix}/{org_id}/...``) are
|
||||
authorized by matching the org_id against the requesting user's
|
||||
organization.
|
||||
* Legacy keys (``recordings/{run_id}.wav``, ``transcripts/{run_id}.txt``)
|
||||
are authorized via the workflow run they belong to.
|
||||
* Superusers can request any key.
|
||||
* Regular users can only request resources belonging to **their** workflow runs.
|
||||
"""
|
||||
|
||||
# Validate key and extract workflow_run_id (don't allow special paths for signed URLs)
|
||||
run_id = await _validate_and_extract_workflow_run_id(key, allow_special_paths=False)
|
||||
if run_id is None:
|
||||
raise HTTPException(status_code=400, detail="Invalid key format")
|
||||
# ------------------------------------------------------------------
|
||||
# 1. Authorize
|
||||
# ------------------------------------------------------------------
|
||||
workflow_run = None
|
||||
|
||||
# Authorize and get workflow run
|
||||
workflow_run = await _authorize_and_get_workflow_run(run_id, user)
|
||||
org_id = _extract_org_id_from_key(key)
|
||||
if org_id is not None:
|
||||
# Generic org-based auth
|
||||
if not user.is_superuser and org_id != user.selected_organization_id:
|
||||
raise HTTPException(status_code=403, detail="Access denied")
|
||||
else:
|
||||
# Legacy workflow-run-based auth
|
||||
run_id = _extract_legacy_workflow_run_id(key)
|
||||
if run_id is None:
|
||||
raise HTTPException(status_code=400, detail="Invalid key format")
|
||||
workflow_run = await _authorize_and_get_workflow_run(run_id, user)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# 3. Generate the signed URL using the correct storage backend
|
||||
# 2. Resolve storage backend
|
||||
# ------------------------------------------------------------------
|
||||
try:
|
||||
# Use the storage backend recorded when the file was uploaded
|
||||
if (
|
||||
if storage_backend:
|
||||
storage = get_storage_for_backend(storage_backend)
|
||||
elif (
|
||||
workflow_run
|
||||
and hasattr(workflow_run, "storage_backend")
|
||||
and workflow_run.storage_backend
|
||||
):
|
||||
backend = workflow_run.storage_backend
|
||||
storage = get_storage_for_backend(backend)
|
||||
logger.info(
|
||||
f"DOWNLOAD: Using stored {backend} (value: {backend}) for signed URL generation - workflow_run_id: {run_id}, key: {key}"
|
||||
)
|
||||
storage = get_storage_for_backend(workflow_run.storage_backend)
|
||||
else:
|
||||
# Fallback to current storage for legacy records without storage_backend
|
||||
storage = storage_fs
|
||||
current_backend = StorageBackend.get_current_backend()
|
||||
logger.warning(
|
||||
f"DOWNLOAD: No storage_backend found for workflow run {run_id}, falling back to current {current_backend.name} - key: {key}"
|
||||
)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# 3. Generate the signed URL
|
||||
# ------------------------------------------------------------------
|
||||
url = await storage.aget_signed_url(
|
||||
key, expiration=expires_in, force_inline=inline
|
||||
)
|
||||
if not url:
|
||||
raise HTTPException(status_code=500, detail="Failed to generate signed URL")
|
||||
|
||||
# Log successful URL generation
|
||||
backend_info = (
|
||||
f"stored {backend}"
|
||||
if workflow_run
|
||||
and hasattr(workflow_run, "storage_backend")
|
||||
and workflow_run.storage_backend
|
||||
else f"current {StorageBackend.get_current_backend().name}"
|
||||
)
|
||||
logger.info(
|
||||
f"Successfully generated signed URL using {backend_info} - expires in {expires_in}s"
|
||||
)
|
||||
|
||||
logger.info(f"Generated signed URL for key={key}, expires_in={expires_in}s")
|
||||
return {"url": url, "expires_in": expires_in}
|
||||
except ClientError as exc:
|
||||
logger.error(f"Error generating signed URL: {exc}")
|
||||
|
|
|
|||
|
|
@ -2,9 +2,10 @@
|
|||
|
||||
from typing import Annotated, Optional
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException, Query
|
||||
from fastapi import APIRouter, Depends, File, Form, HTTPException, Query, UploadFile
|
||||
from loguru import logger
|
||||
|
||||
from api.constants import DEPLOYMENT_MODE
|
||||
from api.db import db_client
|
||||
from api.db.workflow_recording_client import generate_short_id
|
||||
from api.enums import StorageBackend
|
||||
|
|
@ -16,6 +17,7 @@ from api.schemas.workflow_recording import (
|
|||
RecordingUploadResponseSchema,
|
||||
)
|
||||
from api.services.auth.depends import get_user
|
||||
from api.services.mps_service_key_client import mps_service_key_client
|
||||
from api.services.storage import storage_fs
|
||||
|
||||
router = APIRouter(prefix="/workflow-recordings", tags=["workflow-recordings"])
|
||||
|
|
@ -216,3 +218,42 @@ async def delete_recording(
|
|||
raise HTTPException(
|
||||
status_code=500, detail="Failed to delete recording"
|
||||
) from exc
|
||||
|
||||
|
||||
@router.post(
|
||||
"/transcribe",
|
||||
summary="Transcribe an audio file",
|
||||
)
|
||||
async def transcribe_audio(
|
||||
file: UploadFile = File(...),
|
||||
language: str = Form("en"),
|
||||
user=Depends(get_user),
|
||||
):
|
||||
"""Transcribe an uploaded audio file using MPS STT."""
|
||||
try:
|
||||
audio_data = await file.read()
|
||||
|
||||
if DEPLOYMENT_MODE == "oss":
|
||||
result = await mps_service_key_client.transcribe_audio(
|
||||
audio_data=audio_data,
|
||||
filename=file.filename or "audio.wav",
|
||||
content_type=file.content_type or "audio/wav",
|
||||
language=language,
|
||||
created_by=str(user.provider_id),
|
||||
)
|
||||
else:
|
||||
result = await mps_service_key_client.transcribe_audio(
|
||||
audio_data=audio_data,
|
||||
filename=file.filename or "audio.wav",
|
||||
content_type=file.content_type or "audio/wav",
|
||||
language=language,
|
||||
organization_id=user.selected_organization_id,
|
||||
)
|
||||
|
||||
return result
|
||||
|
||||
except Exception as exc:
|
||||
logger.error(f"Error transcribing audio: {exc}")
|
||||
raise HTTPException(
|
||||
status_code=500, detail="Failed to transcribe audio"
|
||||
) from exc
|
||||
|
|
|
|||
|
|
@ -40,6 +40,7 @@ class UserConfigurationValidator:
|
|||
ServiceProviders.SPEECHMATICS.value: self._check_speechmatics_api_key,
|
||||
ServiceProviders.CAMB.value: self._check_camb_api_key,
|
||||
ServiceProviders.AWS_BEDROCK.value: self._check_aws_bedrock_api_key,
|
||||
ServiceProviders.SELF_HOSTED.value: self._check_self_hosted_api_key,
|
||||
}
|
||||
|
||||
async def validate(self, configuration: UserConfiguration) -> APIKeyStatusResponse:
|
||||
|
|
@ -74,6 +75,20 @@ class UserConfigurationValidator:
|
|||
|
||||
provider = service_config.provider
|
||||
|
||||
# Self-hosted doesn't require an API key
|
||||
if provider == ServiceProviders.SELF_HOSTED.value:
|
||||
try:
|
||||
if not self._check_self_hosted_api_key(provider, service_config):
|
||||
return [
|
||||
{
|
||||
"model": service_name,
|
||||
"message": f"Invalid {provider} configuration",
|
||||
}
|
||||
]
|
||||
except ValueError as e:
|
||||
return [{"model": service_name, "message": str(e)}]
|
||||
return []
|
||||
|
||||
# AWS Bedrock uses AWS credentials instead of api_key
|
||||
if provider == ServiceProviders.AWS_BEDROCK.value:
|
||||
try:
|
||||
|
|
@ -163,7 +178,12 @@ class UserConfigurationValidator:
|
|||
|
||||
def _check_camb_api_key(self, model: str, api_key: str) -> bool:
|
||||
return True
|
||||
|
||||
|
||||
def _check_self_hosted_api_key(self, model: str, service_config) -> bool:
|
||||
if not getattr(service_config, "base_url", None):
|
||||
raise ValueError("base_url is required for self-hosted LLM")
|
||||
return True
|
||||
|
||||
def _check_aws_bedrock_api_key(self, model: str, service_config) -> bool:
|
||||
if not service_config.aws_access_key or not service_config.aws_secret_key:
|
||||
raise ValueError("AWS access key and secret key are required for Bedrock")
|
||||
|
|
|
|||
|
|
@ -27,6 +27,7 @@ class ServiceProviders(str, Enum):
|
|||
SPEECHMATICS = "speechmatics"
|
||||
CAMB = "camb"
|
||||
AWS_BEDROCK = "aws_bedrock"
|
||||
SELF_HOSTED = "self_hosted"
|
||||
|
||||
|
||||
class BaseServiceConfiguration(BaseModel):
|
||||
|
|
@ -40,6 +41,7 @@ class BaseServiceConfiguration(BaseModel):
|
|||
ServiceProviders.AZURE,
|
||||
ServiceProviders.DOGRAH,
|
||||
ServiceProviders.AWS_BEDROCK,
|
||||
ServiceProviders.SELF_HOSTED,
|
||||
# ServiceProviders.SARVAM,
|
||||
]
|
||||
api_key: str | list[str]
|
||||
|
|
@ -249,6 +251,22 @@ class AWSBedrockLLMConfiguration(BaseLLMConfiguration):
|
|||
api_key: str | list[str] | None = Field(default=None)
|
||||
|
||||
|
||||
SELF_HOSTED_LLM_MODELS = ["llama3", "mistral", "phi3", "qwen2", "gemma2", "deepseek-r1"]
|
||||
|
||||
|
||||
@register_llm
|
||||
class SelfHostedLLMConfiguration(BaseLLMConfiguration):
|
||||
provider: Literal[ServiceProviders.SELF_HOSTED] = ServiceProviders.SELF_HOSTED
|
||||
model: str = Field(
|
||||
default="llama3", json_schema_extra={"examples": SELF_HOSTED_LLM_MODELS}
|
||||
)
|
||||
base_url: str = Field(
|
||||
default="http://localhost:11434/v1",
|
||||
description="OpenAI-compatible endpoint (Ollama, vLLM, etc.)",
|
||||
)
|
||||
api_key: str | list[str] | None = Field(default=None)
|
||||
|
||||
|
||||
LLMConfig = Annotated[
|
||||
Union[
|
||||
OpenAILLMService,
|
||||
|
|
@ -258,6 +276,7 @@ LLMConfig = Annotated[
|
|||
AzureLLMService,
|
||||
DograhLLMService,
|
||||
AWSBedrockLLMConfiguration,
|
||||
SelfHostedLLMConfiguration,
|
||||
],
|
||||
Field(discriminator="provider"),
|
||||
]
|
||||
|
|
@ -334,6 +353,12 @@ class CartesiaTTSConfiguration(BaseTTSConfiguration):
|
|||
)
|
||||
voice: str = Field(default="3faa81ae-d3d8-4ab1-9e44-e50e46d33c30")
|
||||
speed: float = Field(default=1.0, ge=0.6, le=1.5, description="Speed of the voice")
|
||||
volume: float = Field(
|
||||
default=1.0,
|
||||
ge=0.5,
|
||||
le=2.0,
|
||||
description="Volume multiplier for generated speech",
|
||||
)
|
||||
|
||||
|
||||
SARVAM_TTS_MODELS = ["bulbul:v2", "bulbul:v3"]
|
||||
|
|
|
|||
|
|
@ -351,6 +351,71 @@ class MPSServiceKeyClient:
|
|||
response=response,
|
||||
)
|
||||
|
||||
async def transcribe_audio(
|
||||
self,
|
||||
audio_data: bytes,
|
||||
filename: str = "audio.wav",
|
||||
content_type: str = "audio/wav",
|
||||
language: str = "en",
|
||||
model: str = "default",
|
||||
correlation_id: Optional[str] = None,
|
||||
organization_id: Optional[int] = None,
|
||||
created_by: Optional[str] = None,
|
||||
) -> dict:
|
||||
"""
|
||||
Transcribe an audio file via MPS STT API.
|
||||
|
||||
Args:
|
||||
audio_data: Raw audio bytes
|
||||
filename: Name of the audio file
|
||||
content_type: MIME type of the audio (e.g., audio/wav, audio/mp3)
|
||||
language: Language code for transcription (default: "en")
|
||||
model: Model tier name (default: "default")
|
||||
correlation_id: Optional correlation ID for tracking
|
||||
organization_id: Organization ID (for authenticated mode)
|
||||
created_by: User provider ID (for OSS mode)
|
||||
|
||||
Returns:
|
||||
Dictionary containing transcription result with keys like
|
||||
'transcript', 'duration_seconds', etc.
|
||||
|
||||
Raises:
|
||||
httpx.HTTPStatusError: If the API call fails
|
||||
"""
|
||||
async with httpx.AsyncClient(timeout=httpx.Timeout(60.0)) as client:
|
||||
files = {
|
||||
"file": (filename, audio_data, content_type),
|
||||
}
|
||||
data = {
|
||||
"language": language,
|
||||
"model": model,
|
||||
}
|
||||
if correlation_id:
|
||||
data["correlation_id"] = correlation_id
|
||||
|
||||
headers = self._get_headers(organization_id, created_by)
|
||||
# Remove Content-Type so httpx sets the correct multipart boundary
|
||||
headers.pop("Content-Type", None)
|
||||
|
||||
response = await client.post(
|
||||
f"{self.base_url}/api/v1/stt/transcribe",
|
||||
files=files,
|
||||
data=data,
|
||||
headers=headers,
|
||||
)
|
||||
|
||||
if response.status_code == 200:
|
||||
return response.json()
|
||||
else:
|
||||
logger.error(
|
||||
f"Failed to transcribe audio: {response.status_code} - {response.text}"
|
||||
)
|
||||
raise httpx.HTTPStatusError(
|
||||
f"Failed to transcribe audio: {response.text}",
|
||||
request=response.request,
|
||||
response=response,
|
||||
)
|
||||
|
||||
def validate_service_key(self, service_key: str) -> bool:
|
||||
"""
|
||||
Synchronously validate a Dograh service key by checking usage via MPS.
|
||||
|
|
|
|||
|
|
@ -165,49 +165,39 @@ class RealtimeFeedbackObserver(BaseObserver):
|
|||
frame = data.frame
|
||||
frame_direction = data.direction
|
||||
|
||||
logger.trace(f"{self} Received Frame: {frame} Direction: {frame_direction}")
|
||||
|
||||
# Handle pipeline termination - stop clock task
|
||||
if isinstance(frame, (EndFrame, CancelFrame, StopFrame)):
|
||||
await self._cancel_clock_task()
|
||||
return
|
||||
|
||||
# Handle interruptions - clear any queued bot text
|
||||
if isinstance(frame, InterruptionFrame):
|
||||
await self._handle_interruption()
|
||||
return
|
||||
|
||||
# Bot speaking state - WS only (ephemeral state signals, not persisted)
|
||||
if isinstance(frame, BotStartedSpeakingFrame):
|
||||
await self._send_ws(
|
||||
{"type": RealtimeFeedbackType.BOT_STARTED_SPEAKING.value, "payload": {}}
|
||||
)
|
||||
return
|
||||
if isinstance(frame, BotStoppedSpeakingFrame):
|
||||
await self._send_ws(
|
||||
{"type": RealtimeFeedbackType.BOT_STOPPED_SPEAKING.value, "payload": {}}
|
||||
)
|
||||
return
|
||||
|
||||
# User mute state - WS only (ephemeral state signals, not persisted)
|
||||
if isinstance(frame, UserMuteStartedFrame):
|
||||
await self._send_ws(
|
||||
{"type": RealtimeFeedbackType.USER_MUTE_STARTED.value, "payload": {}}
|
||||
)
|
||||
return
|
||||
if isinstance(frame, UserMuteStoppedFrame):
|
||||
await self._send_ws(
|
||||
{"type": RealtimeFeedbackType.USER_MUTE_STOPPED.value, "payload": {}}
|
||||
)
|
||||
return
|
||||
|
||||
# Skip already processed frames (frames can be observed multiple times)
|
||||
if frame.id in self._frames_seen:
|
||||
return
|
||||
self._frames_seen.add(frame.id)
|
||||
|
||||
logger.trace(f"{self} Received Frame: {frame} Direction: {frame_direction}")
|
||||
|
||||
# Handle pipeline termination - stop clock task
|
||||
if isinstance(frame, (EndFrame, CancelFrame, StopFrame)):
|
||||
await self._cancel_clock_task()
|
||||
# Handle interruptions - clear any queued bot text
|
||||
elif isinstance(frame, InterruptionFrame):
|
||||
await self._handle_interruption()
|
||||
# Bot speaking state - WS only (ephemeral state signals, not persisted)
|
||||
elif isinstance(frame, BotStartedSpeakingFrame):
|
||||
await self._send_ws(
|
||||
{"type": RealtimeFeedbackType.BOT_STARTED_SPEAKING.value, "payload": {}}
|
||||
)
|
||||
elif isinstance(frame, BotStoppedSpeakingFrame):
|
||||
await self._send_ws(
|
||||
{"type": RealtimeFeedbackType.BOT_STOPPED_SPEAKING.value, "payload": {}}
|
||||
)
|
||||
# User mute state - WS only (ephemeral state signals, not persisted)
|
||||
elif isinstance(frame, UserMuteStartedFrame):
|
||||
await self._send_ws(
|
||||
{"type": RealtimeFeedbackType.USER_MUTE_STARTED.value, "payload": {}}
|
||||
)
|
||||
elif isinstance(frame, UserMuteStoppedFrame):
|
||||
await self._send_ws(
|
||||
{"type": RealtimeFeedbackType.USER_MUTE_STOPPED.value, "payload": {}}
|
||||
)
|
||||
# Handle user transcriptions (interim) - WebSocket only
|
||||
if isinstance(frame, InterimTranscriptionFrame):
|
||||
elif isinstance(frame, InterimTranscriptionFrame):
|
||||
await self._send_ws(
|
||||
{
|
||||
"type": RealtimeFeedbackType.USER_TRANSCRIPTION.value,
|
||||
|
|
|
|||
|
|
@ -66,6 +66,7 @@ class RecordingRouterProcessor(FrameProcessor):
|
|||
self._frame_buffer: list[tuple[LLMTextFrame, FrameDirection]] = []
|
||||
self._mode: Optional[str] = None # None = detecting, "tts", "recording"
|
||||
self._recording_id_buffer = ""
|
||||
self._recording_playback_started = False
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Frame dispatch
|
||||
|
|
@ -99,9 +100,15 @@ class RecordingRouterProcessor(FrameProcessor):
|
|||
await self.push_frame(frame, direction)
|
||||
return
|
||||
|
||||
# --- Recording mode: accumulate recording_id silently ---
|
||||
# --- Recording mode: accumulate text and start playback ASAP ---
|
||||
if self._mode == "recording":
|
||||
self._recording_id_buffer += frame.text
|
||||
if not self._recording_playback_started:
|
||||
buf = self._recording_id_buffer.lstrip()
|
||||
if " " in buf:
|
||||
recording_id = buf.split()[0]
|
||||
self._recording_playback_started = True
|
||||
await self._play_recording(recording_id)
|
||||
return
|
||||
|
||||
# --- Detection mode: buffer until marker found ---
|
||||
|
|
@ -178,16 +185,21 @@ class RecordingRouterProcessor(FrameProcessor):
|
|||
self, frame: LLMFullResponseEndFrame, direction: FrameDirection
|
||||
):
|
||||
if self._mode == "recording":
|
||||
recording_id = self._recording_id_buffer.strip()
|
||||
if recording_id:
|
||||
# Push accumulated text as TTSTextFrame for UI feedback via observer
|
||||
full_text = self._recording_id_buffer.strip()
|
||||
if full_text:
|
||||
recording_id = full_text.split()[0]
|
||||
|
||||
# Push full text (marker + id + transcript) for assistant context
|
||||
await self.push_frame(
|
||||
TTSTextFrame(
|
||||
text=RECORDING_MARKER + self._recording_id_buffer,
|
||||
aggregated_by="recording_router",
|
||||
)
|
||||
)
|
||||
await self._play_recording(recording_id)
|
||||
|
||||
# Fallback: if response ended before a space arrived (no transcript)
|
||||
if not self._recording_playback_started:
|
||||
await self._play_recording(recording_id)
|
||||
else:
|
||||
logger.warning(
|
||||
"RecordingRouterProcessor: recording mode but empty recording_id"
|
||||
|
|
@ -256,3 +268,4 @@ class RecordingRouterProcessor(FrameProcessor):
|
|||
self._frame_buffer = []
|
||||
self._mode = None
|
||||
self._recording_id_buffer = ""
|
||||
self._recording_playback_started = False
|
||||
|
|
|
|||
|
|
@ -8,7 +8,11 @@ from api.services.configuration.registry import ServiceProviders
|
|||
from pipecat.services.aws.llm import AWSBedrockLLMService, AWSBedrockLLMSettings
|
||||
from pipecat.services.azure.llm import AzureLLMService, AzureLLMSettings
|
||||
from pipecat.services.cartesia.stt import CartesiaSTTService
|
||||
from pipecat.services.cartesia.tts import CartesiaTTSService, CartesiaTTSSettings, GenerationConfig
|
||||
from pipecat.services.cartesia.tts import (
|
||||
CartesiaTTSService,
|
||||
CartesiaTTSSettings,
|
||||
GenerationConfig,
|
||||
)
|
||||
from pipecat.services.deepgram.flux.stt import (
|
||||
DeepgramFluxSTTService,
|
||||
DeepgramFluxSTTSettings,
|
||||
|
|
@ -212,13 +216,19 @@ def create_tts_service(user_config, audio_config: "AudioConfig"):
|
|||
)
|
||||
elif user_config.tts.provider == ServiceProviders.CARTESIA.value:
|
||||
speed = getattr(user_config.tts, "speed", None)
|
||||
generation_config = GenerationConfig(speed=speed) if speed and speed != 1.0 else None
|
||||
generation_config = (
|
||||
GenerationConfig(speed=speed) if speed and speed != 1.0 else None
|
||||
)
|
||||
return CartesiaTTSService(
|
||||
api_key=user_config.tts.api_key,
|
||||
settings=CartesiaTTSSettings(
|
||||
voice=user_config.tts.voice,
|
||||
model=user_config.tts.model,
|
||||
**({"generation_config": generation_config} if generation_config else {}),
|
||||
**(
|
||||
{"generation_config": generation_config}
|
||||
if generation_config
|
||||
else {}
|
||||
),
|
||||
),
|
||||
text_filters=[xml_function_tag_filter],
|
||||
silence_time_s=1.0,
|
||||
|
|
@ -353,6 +363,12 @@ def create_llm_service_from_provider(
|
|||
aws_region=aws_region,
|
||||
settings=AWSBedrockLLMSettings(model=model),
|
||||
)
|
||||
elif provider == ServiceProviders.SELF_HOSTED.value:
|
||||
return OpenAILLMService(
|
||||
base_url=base_url or "http://localhost:11434/v1",
|
||||
api_key=api_key or "none",
|
||||
settings=OpenAILLMSettings(model=model),
|
||||
)
|
||||
else:
|
||||
raise HTTPException(status_code=400, detail=f"Invalid LLM provider {provider}")
|
||||
|
||||
|
|
@ -368,6 +384,8 @@ def create_llm_service(user_config):
|
|||
kwargs["base_url"] = user_config.llm.base_url
|
||||
elif provider == ServiceProviders.AZURE.value:
|
||||
kwargs["endpoint"] = user_config.llm.endpoint
|
||||
elif provider == ServiceProviders.SELF_HOSTED.value:
|
||||
kwargs["base_url"] = user_config.llm.base_url
|
||||
elif provider == ServiceProviders.AWS_BEDROCK.value:
|
||||
kwargs["aws_access_key"] = user_config.llm.aws_access_key
|
||||
kwargs["aws_secret_key"] = user_config.llm.aws_secret_key
|
||||
|
|
|
|||
|
|
@ -437,9 +437,7 @@ class PipecatEngine:
|
|||
|
||||
async def _do_extraction():
|
||||
try:
|
||||
logger.debug(
|
||||
f"Starting variable extraction for node: {node.name}"
|
||||
)
|
||||
logger.debug(f"Starting variable extraction for node: {node.name}")
|
||||
extracted_data = (
|
||||
await self._variable_extraction_manager._perform_extraction(
|
||||
extraction_variables, parent_context, extraction_prompt
|
||||
|
|
@ -454,7 +452,9 @@ class PipecatEngine:
|
|||
f"Variable extraction completed for node: {node.name}. Extracted: {extracted_data}"
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Error during variable extraction for node {node.name}: {str(e)}")
|
||||
logger.error(
|
||||
f"Error during variable extraction for node {node.name}: {str(e)}"
|
||||
)
|
||||
|
||||
if run_in_background:
|
||||
logger.debug(
|
||||
|
|
@ -497,9 +497,7 @@ class PipecatEngine:
|
|||
logger.error(
|
||||
f"Pending extraction task '{task_name}' failed: {result}"
|
||||
)
|
||||
logger.debug(
|
||||
f"All pending extraction tasks completed in {elapsed:.2f}s"
|
||||
)
|
||||
logger.debug(f"All pending extraction tasks completed in {elapsed:.2f}s")
|
||||
except asyncio.TimeoutError:
|
||||
incomplete = [
|
||||
t.get_name() for t in self._pending_extraction_tasks if not t.done()
|
||||
|
|
|
|||
|
|
@ -34,13 +34,13 @@ You have two modes for responding:
|
|||
Example: ▸ Hello! How can I help you today?
|
||||
|
||||
2. PRE-RECORDED AUDIO (●): Play a pre-recorded audio message.
|
||||
Format: `●` followed by a space and ONLY the recording_id. Nothing else.
|
||||
Example: ● rec_greeting_01
|
||||
Format: `●` followed by a space followed by recording_id followed by provided transcript. Nothing else.
|
||||
Example: ● rec_greeting_01 [ Provided Transcript ]
|
||||
|
||||
RULES:
|
||||
- Your response MUST start with either `▸` or `●` as the very first character.
|
||||
- For `▸` (dynamic speech): Follow with a space and your full response text.
|
||||
- For `●` (pre-recorded audio): Follow with a space and ONLY the recording_id. No other text.
|
||||
- For `●` (pre-recorded audio): Follow with a space and the recording_id and the provided transcript. No other text.
|
||||
- Use `●` when a pre-recorded message matches the situation well.
|
||||
- Use `▸` when you need to generate a dynamic, contextual response.
|
||||
- NEVER mix modes in a single response. Choose one."""
|
||||
|
|
@ -77,11 +77,8 @@ def compose_system_prompt_for_node(
|
|||
|
||||
parts = [p for p in (global_prompt, formatted_node_prompt) if p]
|
||||
|
||||
if has_recordings:
|
||||
if has_recordings and "RECORDING_ID:" in formatted_node_prompt:
|
||||
parts.append(RECORDING_RESPONSE_MODE_INSTRUCTIONS)
|
||||
# TODO: Append per-node available recordings list here once
|
||||
# Node.recording_ids is populated. The list should include
|
||||
# recording_id and a short description so the LLM can choose.
|
||||
|
||||
return "\n\n".join(parts)
|
||||
|
||||
|
|
|
|||
|
|
@ -28,7 +28,9 @@ from api.utils.template_renderer import render_template
|
|||
from pipecat.processors.aggregators.llm_context import LLMContext
|
||||
|
||||
|
||||
async def _run_llm_inference(llm, messages: list[dict], system_prompt: str) -> str | None:
|
||||
async def _run_llm_inference(
|
||||
llm, messages: list[dict], system_prompt: str
|
||||
) -> str | None:
|
||||
"""Run a one-shot LLM inference using the pipecat service."""
|
||||
context = LLMContext()
|
||||
context.set_messages(messages)
|
||||
|
|
@ -51,7 +53,10 @@ async def _generate_conversation_summary(
|
|||
]
|
||||
|
||||
try:
|
||||
summary = await _run_llm_inference(llm, messages, CONVERSATION_SUMMARY_SYSTEM_PROMPT) or ""
|
||||
summary = (
|
||||
await _run_llm_inference(llm, messages, CONVERSATION_SUMMARY_SYSTEM_PROMPT)
|
||||
or ""
|
||||
)
|
||||
|
||||
span_name = f"conversation-summary-before-{node_name}"
|
||||
add_qa_span_to_trace(parent_ctx, model, messages, summary, span_name)
|
||||
|
|
|
|||
|
|
@ -154,7 +154,12 @@ async def ensure_node_summaries(
|
|||
try:
|
||||
context = LLMContext()
|
||||
context.set_messages(messages)
|
||||
summary_text = await llm.run_inference(context, system_instruction=NODE_SUMMARY_SYSTEM_PROMPT) or ""
|
||||
summary_text = (
|
||||
await llm.run_inference(
|
||||
context, system_instruction=NODE_SUMMARY_SYSTEM_PROMPT
|
||||
)
|
||||
or ""
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to generate summary for node {node_id}: {e}")
|
||||
updated_summaries[node_id] = {"summary": ""}
|
||||
|
|
|
|||
|
|
@ -9,7 +9,7 @@ Covers:
|
|||
"""
|
||||
|
||||
from types import SimpleNamespace
|
||||
from unittest.mock import AsyncMock, MagicMock, patch
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
import pytest
|
||||
from pydantic import ValidationError
|
||||
|
|
@ -17,13 +17,12 @@ from pydantic import ValidationError
|
|||
from api.services.configuration.check_validity import UserConfigurationValidator
|
||||
from api.services.configuration.registry import (
|
||||
CAMB_TTS_MODELS,
|
||||
CambTTSConfiguration,
|
||||
REGISTRY,
|
||||
CambTTSConfiguration,
|
||||
ServiceProviders,
|
||||
ServiceType,
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# 1. CambTTSConfiguration model tests
|
||||
# ---------------------------------------------------------------------------
|
||||
|
|
|
|||
|
|
@ -54,6 +54,7 @@
|
|||
"pages": [
|
||||
"voice-agent/introduction",
|
||||
"voice-agent/editing-a-workflow",
|
||||
"voice-agent/custom-recordings",
|
||||
"voice-agent/template-variables",
|
||||
{
|
||||
"group": "Tools",
|
||||
|
|
|
|||
79
docs/voice-agent/custom-recordings.mdx
Normal file
79
docs/voice-agent/custom-recordings.mdx
Normal file
|
|
@ -0,0 +1,79 @@
|
|||
---
|
||||
title: "Custom Recordings"
|
||||
description: "Build hybrid voice agents that combine pre-recorded audio with dynamic text generation for lower latency, reduced TTS costs, and natural-sounding conversations."
|
||||
---
|
||||
|
||||
Custom recordings allow you to build **hybrid voice agents** that use your own pre-recorded audio for key parts of the conversation, while falling back to LLM-generated speech (via a cloned voice) for dynamic responses. This gives you the best of both worlds — the emotional depth of real human speech and the flexibility of AI-generated dialogue.
|
||||
|
||||
<iframe
|
||||
width="560"
|
||||
height="315"
|
||||
src="https://www.youtube.com/embed/1uZqhG0_cIo"
|
||||
title="YouTube video player"
|
||||
frameborder="0"
|
||||
allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture; web-share"
|
||||
referrerpolicy="strict-origin-when-cross-origin"
|
||||
allowfullscreen>
|
||||
</iframe>
|
||||
|
||||
## Why use custom recordings?
|
||||
|
||||
- **Reduced TTS cost** — Pre-recorded audio is played directly, so you are not charged for TTS synthesis on those segments.
|
||||
- **Emotional variance** — Real recordings carry natural intonation and emotion that TTS cannot fully replicate.
|
||||
- **Lower latency** — Playing a pre-recorded clip is faster than synthesizing text at runtime.
|
||||
|
||||
## Prerequisites
|
||||
|
||||
- A TTS provider that supports **voice cloning** (e.g., Cartesia, ElevenLabs, or Deepgram).
|
||||
- An API key for your chosen TTS provider, configured in [Voice settings](/configurations/voice).
|
||||
|
||||
## Step 1: Clone your voice
|
||||
|
||||
Clone your voice with your TTS provider so that dynamically generated speech sounds similar to your recordings. For example, with Cartesia:
|
||||
|
||||
1. Go to Cartesia and navigate to **Instant Clone**.
|
||||
2. Record a short audio clip (up to 10 seconds) of your voice.
|
||||
3. Give the clone a name and select your language.
|
||||
4. Copy the **Voice ID** — you will need it in the next step.
|
||||
|
||||
<Note>
|
||||
You can use any TTS provider that supports voice cloning. The steps will vary by provider, but the key output is always a **Voice ID** tied to your cloned voice.
|
||||
</Note>
|
||||
|
||||
## Step 2: Configure the cloned voice in Dograh
|
||||
|
||||
1. Go to your agent's **Model Configuration** in the Dograh dashboard.
|
||||
2. Under voice settings, select **Add Voice ID manually**.
|
||||
3. Paste the Voice ID from your cloned voice.
|
||||
4. Make sure the **provider** matches where you cloned your voice (e.g., Cartesia).
|
||||
5. Enter the provider's API key if you haven't already.
|
||||
6. Save the configuration.
|
||||
|
||||
## Step 3: Upload recordings
|
||||
|
||||
Navigate to your agent in the workflow builder and open the **Recordings** panel. You can either upload pre-recorded audio files or record directly in the browser.
|
||||
|
||||
For each recording:
|
||||
|
||||
1. Click **Record** (or upload a file).
|
||||
2. Speak the exact phrase you want the agent to use.
|
||||
3. Give the recording a descriptive name (e.g., `greeting`, `invitation`, `venue`).
|
||||
4. Verify the transcription is correct — edit it if needed.
|
||||
5. Click **Upload**.
|
||||
|
||||
<Warning>
|
||||
Recordings are scoped to a specific **provider and Voice ID**. If you change either, you will need to re-upload your recordings to ensure consistency between the recorded audio and the cloned voice used for dynamic responses.
|
||||
</Warning>
|
||||
|
||||
## Step 4: Build the workflow
|
||||
|
||||
Open your agent's workflow and write the conversation flow in natural language. To insert a recording, type **`@`** in the prompt editor — this will show a list of all available recordings scoped to your current Voice ID.
|
||||
|
||||
For any user question that falls outside your recordings, the agent automatically generates a dynamic response using the LLM, which is then synthesized using your cloned voice via TTS.
|
||||
|
||||
## Tips for best results
|
||||
|
||||
- **Record in a quiet environment** to improve audio quality and consistency with the cloned voice.
|
||||
- **Use pro cloning services** (when available) and provide more sample audio for a higher-quality voice clone.
|
||||
- **Keep recordings concise** — short, focused clips work best for specific conversation moments.
|
||||
- **Review call recordings** after testing to identify where the transition between pre-recorded and dynamic audio can be improved.
|
||||
2
pipecat
2
pipecat
|
|
@ -1 +1 @@
|
|||
Subproject commit 3f566a4ba1e112255cc7459735bdb4b716948d59
|
||||
Subproject commit 2e2171e2a64ec87b3964fbc2440b5291489912a8
|
||||
|
|
@ -48,6 +48,12 @@ new api route in backend, and wish to use it in the UI, generate the client usin
|
|||
npm run generate-client
|
||||
```
|
||||
|
||||
## Conventions
|
||||
|
||||
### File Uploads
|
||||
|
||||
Always use a hidden `<input type="file">` with a visible `<Button>` that triggers it via `fileInputRef.current?.click()`. Never use a visible `<Input type="file">` — the native file input styling is inconsistent and confusing. Show the selected filename next to or below the button.
|
||||
|
||||
## Development
|
||||
|
||||
```bash
|
||||
|
|
|
|||
|
|
@ -519,13 +519,17 @@ export default function RunsPage() {
|
|||
variant="outline"
|
||||
size="icon"
|
||||
onClick={() => {
|
||||
const filter = encodeURIComponent(
|
||||
`metadata;stringObject;attributes;contains;conversation.id,metadata;stringObject;attributes;contains;${run.id}`,
|
||||
);
|
||||
window.open(
|
||||
`${process.env.NEXT_PUBLIC_LANGFUSE_ENDPOINT}/project/${process.env.NEXT_PUBLIC_LANGFUSE_PROJECT_ID}/traces?search=&filter=${filter}&dateRange=All+time`,
|
||||
'_blank',
|
||||
);
|
||||
if (run.gathered_context?.trace_url) {
|
||||
window.open(String(run.gathered_context.trace_url), '_blank');
|
||||
} else {
|
||||
const filter = encodeURIComponent(
|
||||
`metadata;stringObject;attributes;contains;conversation.id,metadata;stringObject;attributes;contains;${run.id}`,
|
||||
);
|
||||
window.open(
|
||||
`${process.env.NEXT_PUBLIC_LANGFUSE_ENDPOINT}/project/${process.env.NEXT_PUBLIC_LANGFUSE_PROJECT_ID}/traces?search=&filter=${filter}&dateRange=All+time`,
|
||||
'_blank',
|
||||
);
|
||||
}
|
||||
}}
|
||||
>
|
||||
<Image
|
||||
|
|
|
|||
|
|
@ -14,6 +14,7 @@ import type { DocumentResponseSchema, RecordingResponseSchema, ToolResponse } fr
|
|||
import { FlowEdge, FlowNode, NodeType } from "@/components/flow/types";
|
||||
import { Button } from '@/components/ui/button';
|
||||
import { Tooltip, TooltipContent, TooltipProvider, TooltipTrigger } from '@/components/ui/tooltip';
|
||||
import { useUserConfig } from '@/context/UserConfigContext';
|
||||
import { WorkflowConfigurations } from '@/types/workflow-configurations';
|
||||
|
||||
import AddNodePanel from "../../../components/flow/AddNodePanel";
|
||||
|
|
@ -64,6 +65,11 @@ interface RenderWorkflowProps {
|
|||
}
|
||||
|
||||
function RenderWorkflow({ initialWorkflowName, workflowId, initialFlow, initialTemplateContextVariables, initialWorkflowConfigurations, user }: RenderWorkflowProps) {
|
||||
const { userConfig } = useUserConfig();
|
||||
const ttsProvider = (userConfig?.tts?.provider as string) ?? "";
|
||||
const ttsModel = (userConfig?.tts?.model as string) ?? "";
|
||||
const ttsVoiceId = (userConfig?.tts?.voice as string) ?? "";
|
||||
|
||||
const [isContextVarsDialogOpen, setIsContextVarsDialogOpen] = useState(false);
|
||||
const [isConfigurationsDialogOpen, setIsConfigurationsDialogOpen] = useState(false);
|
||||
const [isDictionaryDialogOpen, setIsDictionaryDialogOpen] = useState(false);
|
||||
|
|
@ -125,10 +131,15 @@ function RenderWorkflow({ initialWorkflowName, workflowId, initialFlow, initialT
|
|||
setTools(toolsResponse.data);
|
||||
}
|
||||
|
||||
// Fetch recordings for this workflow
|
||||
// Fetch recordings for this workflow filtered by active TTS config
|
||||
try {
|
||||
const recordingsResponse = await listRecordingsApiV1WorkflowRecordingsGet({
|
||||
query: { workflow_id: workflowId },
|
||||
query: {
|
||||
workflow_id: workflowId,
|
||||
tts_provider: ttsProvider || undefined,
|
||||
tts_model: ttsModel || undefined,
|
||||
tts_voice_id: ttsVoiceId || undefined,
|
||||
},
|
||||
});
|
||||
if (recordingsResponse.data) {
|
||||
setRecordings(recordingsResponse.data.recordings);
|
||||
|
|
@ -142,7 +153,7 @@ function RenderWorkflow({ initialWorkflowName, workflowId, initialFlow, initialT
|
|||
};
|
||||
|
||||
fetchData();
|
||||
}, [workflowId]);
|
||||
}, [workflowId, ttsProvider, ttsModel, ttsVoiceId]);
|
||||
|
||||
// Memoize defaultEdgeOptions to prevent unnecessary re-renders
|
||||
const defaultEdgeOptions = useMemo(() => ({
|
||||
|
|
|
|||
|
|
@ -1,11 +1,13 @@
|
|||
import { Loader2, Trash2Icon, Upload } from "lucide-react";
|
||||
import { Loader2, Mic, Pause, Play, Square, Trash2Icon, Upload } from "lucide-react";
|
||||
import { useCallback, useEffect, useRef, useState } from "react";
|
||||
|
||||
import {
|
||||
createRecordingApiV1WorkflowRecordingsPost,
|
||||
deleteRecordingApiV1WorkflowRecordingsRecordingIdDelete,
|
||||
getSignedUrlApiV1S3SignedUrlGet,
|
||||
getUploadUrlApiV1WorkflowRecordingsUploadUrlPost,
|
||||
listRecordingsApiV1WorkflowRecordingsGet,
|
||||
transcribeAudioApiV1WorkflowRecordingsTranscribePost,
|
||||
} from "@/client";
|
||||
import type { RecordingResponseSchema } from "@/client/types.gen";
|
||||
import { Button } from "@/components/ui/button";
|
||||
|
|
@ -18,6 +20,15 @@ import {
|
|||
} from "@/components/ui/dialog";
|
||||
import { Input } from "@/components/ui/input";
|
||||
import { Label } from "@/components/ui/label";
|
||||
import {
|
||||
Select,
|
||||
SelectContent,
|
||||
SelectItem,
|
||||
SelectTrigger,
|
||||
SelectValue,
|
||||
} from "@/components/ui/select";
|
||||
import { Textarea } from "@/components/ui/textarea";
|
||||
import { LANGUAGE_DISPLAY_NAMES } from "@/constants/languages";
|
||||
import { useUserConfig } from "@/context/UserConfigContext";
|
||||
|
||||
interface RecordingsDialogProps {
|
||||
|
|
@ -29,6 +40,8 @@ interface RecordingsDialogProps {
|
|||
|
||||
const MAX_FILE_SIZE = 5 * 1024 * 1024; // 5MB
|
||||
|
||||
type RecordingStep = "idle" | "naming" | "recording" | "transcribing";
|
||||
|
||||
export const RecordingsDialog = ({
|
||||
open,
|
||||
onOpenChange,
|
||||
|
|
@ -42,7 +55,18 @@ export const RecordingsDialog = ({
|
|||
const [transcript, setTranscript] = useState("");
|
||||
const [selectedFile, setSelectedFile] = useState<File | null>(null);
|
||||
const [error, setError] = useState<string | null>(null);
|
||||
const [language, setLanguage] = useState("multi");
|
||||
const [recordingStep, setRecordingStep] = useState<RecordingStep>("idle");
|
||||
const [recordingFilename, setRecordingFilename] = useState("");
|
||||
const [recordingDuration, setRecordingDuration] = useState(0);
|
||||
const [playingId, setPlayingId] = useState<string | null>(null);
|
||||
const audioRef = useRef<HTMLAudioElement | null>(null);
|
||||
const mediaRecorderRef = useRef<MediaRecorder | null>(null);
|
||||
const audioChunksRef = useRef<Blob[]>([]);
|
||||
const recordingTimerRef = useRef<ReturnType<typeof setInterval> | null>(null);
|
||||
const fileInputRef = useRef<HTMLInputElement>(null);
|
||||
const languageRef = useRef(language);
|
||||
languageRef.current = language;
|
||||
|
||||
const ttsProvider = (userConfig?.tts?.provider as string) ?? "";
|
||||
const ttsModel = (userConfig?.tts?.model as string) ?? "";
|
||||
|
|
@ -70,14 +94,128 @@ export const RecordingsDialog = ({
|
|||
}
|
||||
}, [workflowId, ttsProvider, ttsModel, ttsVoiceId, onRecordingsChange]);
|
||||
|
||||
const stopRecordingTimer = useCallback(() => {
|
||||
if (recordingTimerRef.current) {
|
||||
clearInterval(recordingTimerRef.current);
|
||||
recordingTimerRef.current = null;
|
||||
}
|
||||
}, []);
|
||||
|
||||
const stopRecording = useCallback(() => {
|
||||
if (mediaRecorderRef.current && mediaRecorderRef.current.state !== "inactive") {
|
||||
mediaRecorderRef.current.stop();
|
||||
}
|
||||
}, []);
|
||||
|
||||
const resetRecordingState = useCallback(() => {
|
||||
setRecordingStep("idle");
|
||||
setRecordingFilename("");
|
||||
setRecordingDuration(0);
|
||||
}, []);
|
||||
|
||||
const stopPlayback = useCallback(() => {
|
||||
if (audioRef.current) {
|
||||
audioRef.current.pause();
|
||||
audioRef.current = null;
|
||||
}
|
||||
setPlayingId(null);
|
||||
}, []);
|
||||
|
||||
useEffect(() => {
|
||||
if (open) {
|
||||
fetchRecordings();
|
||||
setError(null);
|
||||
setTranscript("");
|
||||
setSelectedFile(null);
|
||||
setLanguage("multi");
|
||||
resetRecordingState();
|
||||
}
|
||||
}, [open, fetchRecordings]);
|
||||
}, [open, fetchRecordings, resetRecordingState]);
|
||||
|
||||
useEffect(() => {
|
||||
if (!open) {
|
||||
stopRecording();
|
||||
stopRecordingTimer();
|
||||
stopPlayback();
|
||||
}
|
||||
}, [open, stopRecording, stopRecordingTimer, stopPlayback]);
|
||||
|
||||
const transcribeFile = async (file: File) => {
|
||||
setRecordingStep("transcribing");
|
||||
try {
|
||||
const currentLang = languageRef.current;
|
||||
const result = await transcribeAudioApiV1WorkflowRecordingsTranscribePost({
|
||||
body: { file, language: currentLang },
|
||||
});
|
||||
const data = result.data as Record<string, unknown> | undefined;
|
||||
if (data?.transcript) {
|
||||
setTranscript(data.transcript as string);
|
||||
}
|
||||
} catch {
|
||||
// Transcription failed — user can still type manually
|
||||
setError("Auto-transcription failed. You can type the transcript manually.");
|
||||
} finally {
|
||||
setRecordingStep("idle");
|
||||
}
|
||||
};
|
||||
|
||||
const startRecording = async () => {
|
||||
try {
|
||||
const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
|
||||
const mediaRecorder = new MediaRecorder(stream);
|
||||
mediaRecorderRef.current = mediaRecorder;
|
||||
audioChunksRef.current = [];
|
||||
|
||||
mediaRecorder.ondataavailable = (e) => {
|
||||
if (e.data.size > 0) audioChunksRef.current.push(e.data);
|
||||
};
|
||||
|
||||
const filename = recordingFilename.trim() || "recording";
|
||||
mediaRecorder.onstop = () => {
|
||||
stream.getTracks().forEach((t) => t.stop());
|
||||
stopRecordingTimer();
|
||||
|
||||
const blob = new Blob(audioChunksRef.current, { type: mediaRecorder.mimeType });
|
||||
if (blob.size > MAX_FILE_SIZE) {
|
||||
setError(`Recording (${(blob.size / (1024 * 1024)).toFixed(1)}MB) exceeds the maximum allowed size of 5MB.`);
|
||||
resetRecordingState();
|
||||
return;
|
||||
}
|
||||
const ext = mediaRecorder.mimeType.includes("webm") ? "webm" : "mp4";
|
||||
const file = new File([blob], `${filename}.${ext}`, { type: mediaRecorder.mimeType });
|
||||
setSelectedFile(file);
|
||||
setError(null);
|
||||
transcribeFile(file);
|
||||
};
|
||||
|
||||
mediaRecorder.start();
|
||||
setRecordingStep("recording");
|
||||
setRecordingDuration(0);
|
||||
setError(null);
|
||||
recordingTimerRef.current = setInterval(() => {
|
||||
setRecordingDuration((d) => d + 1);
|
||||
}, 1000);
|
||||
} catch {
|
||||
setError("Microphone access denied. Please allow microphone permissions.");
|
||||
resetRecordingState();
|
||||
}
|
||||
};
|
||||
|
||||
const handleStopRecording = () => {
|
||||
stopRecording();
|
||||
};
|
||||
|
||||
const handleFileSelect = (file: File | null) => {
|
||||
if (file && file.size > MAX_FILE_SIZE) {
|
||||
setError(`File size (${(file.size / (1024 * 1024)).toFixed(1)}MB) exceeds the maximum allowed size of 5MB.`);
|
||||
setSelectedFile(null);
|
||||
if (fileInputRef.current) fileInputRef.current.value = "";
|
||||
return;
|
||||
}
|
||||
setError(null);
|
||||
setSelectedFile(file);
|
||||
if (file) transcribeFile(file);
|
||||
};
|
||||
|
||||
const handleUpload = async () => {
|
||||
if (!selectedFile || !transcript.trim()) return;
|
||||
|
|
@ -137,6 +275,7 @@ export const RecordingsDialog = ({
|
|||
original_filename: selectedFile.name,
|
||||
file_size_bytes: selectedFile.size,
|
||||
mime_type: selectedFile.type,
|
||||
language,
|
||||
},
|
||||
},
|
||||
});
|
||||
|
|
@ -144,6 +283,8 @@ export const RecordingsDialog = ({
|
|||
// Reset form and refresh list
|
||||
setTranscript("");
|
||||
setSelectedFile(null);
|
||||
setLanguage("multi");
|
||||
resetRecordingState();
|
||||
if (fileInputRef.current) fileInputRef.current.value = "";
|
||||
await fetchRecordings();
|
||||
} catch (err) {
|
||||
|
|
@ -166,13 +307,44 @@ export const RecordingsDialog = ({
|
|||
}
|
||||
};
|
||||
|
||||
const handlePlay = async (rec: RecordingResponseSchema) => {
|
||||
if (playingId === rec.recording_id) {
|
||||
stopPlayback();
|
||||
return;
|
||||
}
|
||||
stopPlayback();
|
||||
try {
|
||||
const result = await getSignedUrlApiV1S3SignedUrlGet({
|
||||
query: {
|
||||
key: rec.storage_key,
|
||||
storage_backend: rec.storage_backend,
|
||||
},
|
||||
});
|
||||
if (!result.data?.url) {
|
||||
setError("Failed to get audio URL");
|
||||
return;
|
||||
}
|
||||
const audio = new Audio(result.data.url);
|
||||
audio.onended = () => setPlayingId(null);
|
||||
audioRef.current = audio;
|
||||
setPlayingId(rec.recording_id);
|
||||
await audio.play();
|
||||
} catch {
|
||||
setError("Failed to play recording");
|
||||
}
|
||||
};
|
||||
|
||||
const isRecording = recordingStep === "recording";
|
||||
const isTranscribing = recordingStep === "transcribing";
|
||||
const isBusy = uploading || isRecording || isTranscribing;
|
||||
|
||||
return (
|
||||
<Dialog open={open} onOpenChange={onOpenChange}>
|
||||
<DialogContent className="max-w-lg max-h-[80vh] overflow-y-auto">
|
||||
<DialogHeader>
|
||||
<DialogTitle>Workflow Recordings</DialogTitle>
|
||||
<DialogDescription>
|
||||
Upload audio recordings for hybrid prompts. Recordings are
|
||||
Upload or record audio for hybrid prompts. Recordings are
|
||||
scoped to your current TTS configuration. Use{" "}
|
||||
<code className="text-xs bg-muted px-1 rounded">@</code> in
|
||||
prompt fields to insert them.
|
||||
|
|
@ -211,48 +383,158 @@ export const RecordingsDialog = ({
|
|||
|
||||
{/* Upload Section */}
|
||||
<div className="space-y-3 border rounded-md p-3">
|
||||
<Label className="text-sm font-medium">Upload New Recording</Label>
|
||||
<Label className="text-sm font-medium">Add New Recording</Label>
|
||||
|
||||
{/* Audio source: file picker or record */}
|
||||
<div>
|
||||
<Label className="text-xs text-muted-foreground">
|
||||
Audio File
|
||||
</Label>
|
||||
<Input
|
||||
ref={fileInputRef}
|
||||
type="file"
|
||||
accept="audio/*"
|
||||
onChange={(e) => {
|
||||
const file = e.target.files?.[0] ?? null;
|
||||
if (file && file.size > MAX_FILE_SIZE) {
|
||||
setError(
|
||||
`File size (${(file.size / (1024 * 1024)).toFixed(1)}MB) exceeds the maximum allowed size of 5MB.`
|
||||
);
|
||||
setSelectedFile(null);
|
||||
if (fileInputRef.current) fileInputRef.current.value = "";
|
||||
return;
|
||||
}
|
||||
setError(null);
|
||||
setSelectedFile(file);
|
||||
}}
|
||||
className="text-sm"
|
||||
/>
|
||||
<p className="text-xs text-muted-foreground mt-1">
|
||||
Max 5MB
|
||||
</p>
|
||||
<div className="flex gap-2">
|
||||
<input
|
||||
ref={fileInputRef}
|
||||
type="file"
|
||||
accept="audio/*"
|
||||
onChange={(e) => handleFileSelect(e.target.files?.[0] ?? null)}
|
||||
className="hidden"
|
||||
/>
|
||||
<Button
|
||||
type="button"
|
||||
variant="outline"
|
||||
size="sm"
|
||||
className="flex-1 justify-start text-sm font-normal"
|
||||
onClick={() => fileInputRef.current?.click()}
|
||||
disabled={isBusy}
|
||||
>
|
||||
<Upload className="w-4 h-4 mr-2 shrink-0" />
|
||||
{selectedFile && recordingStep !== "naming" ? (
|
||||
<span className="truncate">
|
||||
{selectedFile.name} ({(selectedFile.size / (1024 * 1024)).toFixed(1)}MB)
|
||||
</span>
|
||||
) : (
|
||||
<span className="text-muted-foreground">Choose audio file (max 5MB)</span>
|
||||
)}
|
||||
</Button>
|
||||
{recordingStep === "idle" && (
|
||||
<Button
|
||||
type="button"
|
||||
variant="outline"
|
||||
size="sm"
|
||||
onClick={() => setRecordingStep("naming")}
|
||||
disabled={uploading || isTranscribing}
|
||||
>
|
||||
<Mic className="w-4 h-4 mr-1" />
|
||||
Record
|
||||
</Button>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Recording: filename + start/stop */}
|
||||
{(recordingStep === "naming" || isRecording) && (
|
||||
<div className="space-y-2 rounded-md border border-dashed p-3 bg-muted/20">
|
||||
{recordingStep === "naming" && (
|
||||
<>
|
||||
<div>
|
||||
<Label className="text-xs text-muted-foreground">
|
||||
Recording Name
|
||||
</Label>
|
||||
<Input
|
||||
placeholder="e.g. greeting, hold-message"
|
||||
value={recordingFilename}
|
||||
onChange={(e) => setRecordingFilename(e.target.value)}
|
||||
autoFocus
|
||||
/>
|
||||
</div>
|
||||
<div className="flex gap-2">
|
||||
<Button
|
||||
size="sm"
|
||||
onClick={startRecording}
|
||||
disabled={!recordingFilename.trim()}
|
||||
>
|
||||
<Mic className="w-4 h-4 mr-1" />
|
||||
Start Recording
|
||||
</Button>
|
||||
<Button
|
||||
size="sm"
|
||||
variant="ghost"
|
||||
onClick={resetRecordingState}
|
||||
>
|
||||
Cancel
|
||||
</Button>
|
||||
</div>
|
||||
</>
|
||||
)}
|
||||
{isRecording && (
|
||||
<div className="flex items-center gap-3">
|
||||
<span className="relative flex h-3 w-3">
|
||||
<span className="animate-ping absolute inline-flex h-full w-full rounded-full bg-red-400 opacity-75" />
|
||||
<span className="relative inline-flex rounded-full h-3 w-3 bg-red-500" />
|
||||
</span>
|
||||
<span className="text-sm font-mono">
|
||||
{Math.floor(recordingDuration / 60)}:{(recordingDuration % 60).toString().padStart(2, "0")}
|
||||
</span>
|
||||
<span className="text-xs text-muted-foreground">{recordingFilename}</span>
|
||||
<Button
|
||||
size="sm"
|
||||
variant="destructive"
|
||||
onClick={handleStopRecording}
|
||||
className="ml-auto"
|
||||
>
|
||||
<Square className="w-4 h-4 mr-1" />
|
||||
Stop
|
||||
</Button>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Transcribing progress */}
|
||||
{isTranscribing && (
|
||||
<div className="flex items-center gap-2 text-sm text-muted-foreground">
|
||||
<Loader2 className="w-4 h-4 animate-spin" />
|
||||
Transcribing audio...
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Language */}
|
||||
<div>
|
||||
<Label className="text-xs text-muted-foreground">
|
||||
Language
|
||||
</Label>
|
||||
<Select value={language} onValueChange={setLanguage}>
|
||||
<SelectTrigger className="h-9 text-sm">
|
||||
<SelectValue />
|
||||
</SelectTrigger>
|
||||
<SelectContent>
|
||||
{Object.entries(LANGUAGE_DISPLAY_NAMES).map(([code, name]) => (
|
||||
<SelectItem key={code} value={code}>
|
||||
{name}
|
||||
</SelectItem>
|
||||
))}
|
||||
</SelectContent>
|
||||
</Select>
|
||||
</div>
|
||||
|
||||
{/* Transcript */}
|
||||
<div>
|
||||
<Label className="text-xs text-muted-foreground">
|
||||
Transcript
|
||||
</Label>
|
||||
<Input
|
||||
placeholder="What does this recording say?"
|
||||
<Textarea
|
||||
placeholder={isTranscribing ? "Transcribing..." : "What does this recording say?"}
|
||||
value={transcript}
|
||||
onChange={(e) => setTranscript(e.target.value)}
|
||||
disabled={isTranscribing}
|
||||
rows={3}
|
||||
className="resize-none text-sm"
|
||||
/>
|
||||
</div>
|
||||
|
||||
<Button
|
||||
size="sm"
|
||||
onClick={handleUpload}
|
||||
disabled={!selectedFile || !transcript.trim() || uploading}
|
||||
disabled={!selectedFile || !transcript.trim() || isBusy}
|
||||
>
|
||||
{uploading ? (
|
||||
<Loader2 className="w-4 h-4 mr-1 animate-spin" />
|
||||
|
|
@ -289,14 +571,25 @@ export const RecordingsDialog = ({
|
|||
>
|
||||
<div className="flex-1 min-w-0">
|
||||
<div className="flex items-center gap-2">
|
||||
<code className="text-xs bg-muted px-1.5 py-0.5 rounded font-mono">
|
||||
{rec.recording_id}
|
||||
<code className="text-xs bg-muted px-1.5 py-0.5 rounded font-mono truncate max-w-[300px]">
|
||||
{(rec.metadata?.original_filename as string) || rec.recording_id}
|
||||
</code>
|
||||
</div>
|
||||
<p className="text-sm text-muted-foreground mt-1 break-all line-clamp-2">
|
||||
{rec.transcript}
|
||||
</p>
|
||||
</div>
|
||||
<Button
|
||||
size="sm"
|
||||
variant="ghost"
|
||||
onClick={() => handlePlay(rec)}
|
||||
>
|
||||
{playingId === rec.recording_id ? (
|
||||
<Pause className="w-4 h-4" />
|
||||
) : (
|
||||
<Play className="w-4 h-4" />
|
||||
)}
|
||||
</Button>
|
||||
<Button
|
||||
size="sm"
|
||||
variant="ghost"
|
||||
|
|
|
|||
|
|
@ -363,7 +363,13 @@ export const useWorkflowState = ({
|
|||
// Save workflow function
|
||||
const saveWorkflow = useCallback(async (updateWorkflowDefinition: boolean = true) => {
|
||||
if (!user || !rfInstance.current) return;
|
||||
const flow = rfInstance.current.toObject();
|
||||
// Read nodes/edges from the Zustand store (synchronously up-to-date)
|
||||
// and viewport from the ReactFlow instance to build the flow object.
|
||||
// This avoids a race condition where rfInstance.toObject() may return
|
||||
// stale node data if React hasn't re-rendered yet after a store update.
|
||||
const { nodes: currentNodes, edges: currentEdges } = useWorkflowStore.getState();
|
||||
const viewport = rfInstance.current.getViewport();
|
||||
const flow = { nodes: currentNodes, edges: currentEdges, viewport };
|
||||
try {
|
||||
await updateWorkflowApiV1WorkflowWorkflowIdPut({
|
||||
path: {
|
||||
|
|
|
|||
|
|
@ -18,14 +18,12 @@ export const layoutNodes = (
|
|||
// Separate nodes by type
|
||||
const triggerNodes = nodes.filter(n => n.type === NodeType.TRIGGER);
|
||||
const webhookNodes = nodes.filter(n => n.type === NodeType.WEBHOOK);
|
||||
const globalNodes = nodes.filter(n => n.type === NodeType.GLOBAL_NODE || n.type === 'global');
|
||||
const qaNodes = nodes.filter(n => n.type === NodeType.QA);
|
||||
const globalNodes = nodes.filter(n => n.type === NodeType.GLOBAL_NODE);
|
||||
const workflowNodes = nodes.filter(n =>
|
||||
n.type === NodeType.START_CALL ||
|
||||
n.type === NodeType.AGENT_NODE ||
|
||||
n.type === NodeType.END_CALL ||
|
||||
n.type === 'startCall' ||
|
||||
n.type === 'agentNode' ||
|
||||
n.type === 'endCall'
|
||||
n.type === NodeType.END_CALL
|
||||
);
|
||||
|
||||
// If no workflow nodes, just return original nodes
|
||||
|
|
@ -161,12 +159,26 @@ export const layoutNodes = (
|
|||
};
|
||||
});
|
||||
|
||||
// Position QA nodes below webhook nodes on the right side
|
||||
const qaStartY = webhookNodes.length > 0
|
||||
? workflowCenterY - (webhookNodes.length * NODE_HEIGHT + (webhookNodes.length - 1) * VERTICAL_SPACING) / 2
|
||||
+ webhookNodes.length * (NODE_HEIGHT + VERTICAL_SPACING) + VERTICAL_SPACING
|
||||
: workflowCenterY;
|
||||
const positionedQaNodes = qaNodes.map((node, index) => ({
|
||||
...node,
|
||||
position: {
|
||||
x: webhookNodesX,
|
||||
y: qaStartY + index * (NODE_HEIGHT + VERTICAL_SPACING)
|
||||
}
|
||||
}));
|
||||
|
||||
// Combine all positioned nodes
|
||||
const allPositionedNodes = [
|
||||
...positionedTriggerNodes,
|
||||
...positionedGlobalNodes,
|
||||
...positionedWorkflowNodes,
|
||||
...positionedWebhookNodes
|
||||
...positionedWebhookNodes,
|
||||
...positionedQaNodes
|
||||
];
|
||||
|
||||
// Create a map for quick lookup
|
||||
|
|
|
|||
File diff suppressed because one or more lines are too long
|
|
@ -80,6 +80,11 @@ export type AuthUserResponse = {
|
|||
is_superuser: boolean;
|
||||
};
|
||||
|
||||
export type BodyTranscribeAudioApiV1WorkflowRecordingsTranscribePost = {
|
||||
file: Blob | File;
|
||||
language?: string;
|
||||
};
|
||||
|
||||
export type CallDispositionCodes = {
|
||||
disposition_codes?: Array<string>;
|
||||
};
|
||||
|
|
@ -4019,6 +4024,10 @@ export type GetSignedUrlApiV1S3SignedUrlGetData = {
|
|||
key: string;
|
||||
expires_in?: number;
|
||||
inline?: boolean;
|
||||
/**
|
||||
* Storage backend to use (e.g. 'minio', 's3'). When omitted the backend is inferred from the resource.
|
||||
*/
|
||||
storage_backend?: string | null;
|
||||
};
|
||||
url: '/api/v1/s3/signed-url';
|
||||
};
|
||||
|
|
@ -5568,6 +5577,37 @@ export type DeleteRecordingApiV1WorkflowRecordingsRecordingIdDeleteResponses = {
|
|||
200: unknown;
|
||||
};
|
||||
|
||||
export type TranscribeAudioApiV1WorkflowRecordingsTranscribePostData = {
|
||||
body: BodyTranscribeAudioApiV1WorkflowRecordingsTranscribePost;
|
||||
headers?: {
|
||||
authorization?: string | null;
|
||||
'X-API-Key'?: string | null;
|
||||
};
|
||||
path?: never;
|
||||
query?: never;
|
||||
url: '/api/v1/workflow-recordings/transcribe';
|
||||
};
|
||||
|
||||
export type TranscribeAudioApiV1WorkflowRecordingsTranscribePostErrors = {
|
||||
/**
|
||||
* Not found
|
||||
*/
|
||||
404: unknown;
|
||||
/**
|
||||
* Validation Error
|
||||
*/
|
||||
422: HttpValidationError;
|
||||
};
|
||||
|
||||
export type TranscribeAudioApiV1WorkflowRecordingsTranscribePostError = TranscribeAudioApiV1WorkflowRecordingsTranscribePostErrors[keyof TranscribeAudioApiV1WorkflowRecordingsTranscribePostErrors];
|
||||
|
||||
export type TranscribeAudioApiV1WorkflowRecordingsTranscribePostResponses = {
|
||||
/**
|
||||
* Successful Response
|
||||
*/
|
||||
200: unknown;
|
||||
};
|
||||
|
||||
export type SignupApiV1AuthSignupPostData = {
|
||||
body: SignupRequest;
|
||||
path?: never;
|
||||
|
|
|
|||
|
|
@ -13,6 +13,7 @@ import { Label } from "@/components/ui/label";
|
|||
import { Select, SelectContent, SelectItem, SelectTrigger, SelectValue } from "@/components/ui/select";
|
||||
import { Tabs, TabsContent, TabsList, TabsTrigger } from "@/components/ui/tabs";
|
||||
import { VoiceSelector } from "@/components/VoiceSelector";
|
||||
import { LANGUAGE_DISPLAY_NAMES } from "@/constants/languages";
|
||||
import { useUserConfig } from "@/context/UserConfigContext";
|
||||
|
||||
type ServiceSegment = "llm" | "tts" | "stt" | "embeddings";
|
||||
|
|
@ -46,105 +47,6 @@ const TAB_CONFIG: { key: ServiceSegment; label: string }[] = [
|
|||
{ key: "embeddings", label: "Embedding" },
|
||||
];
|
||||
|
||||
// Display names for language codes (Deepgram + Sarvam)
|
||||
const LANGUAGE_DISPLAY_NAMES: Record<string, string> = {
|
||||
"multi": "Multilingual (Auto-detect)",
|
||||
// Arabic
|
||||
"ar": "Arabic",
|
||||
"ar-AE": "Arabic (UAE)",
|
||||
"ar-SA": "Arabic (Saudi Arabia)",
|
||||
"ar-QA": "Arabic (Qatar)",
|
||||
"ar-KW": "Arabic (Kuwait)",
|
||||
"ar-SY": "Arabic (Syria)",
|
||||
"ar-LB": "Arabic (Lebanon)",
|
||||
"ar-PS": "Arabic (Palestine)",
|
||||
"ar-JO": "Arabic (Jordan)",
|
||||
"ar-EG": "Arabic (Egypt)",
|
||||
"ar-SD": "Arabic (Sudan)",
|
||||
"ar-TD": "Arabic (Chad)",
|
||||
"ar-MA": "Arabic (Morocco)",
|
||||
"ar-DZ": "Arabic (Algeria)",
|
||||
"ar-TN": "Arabic (Tunisia)",
|
||||
"ar-IQ": "Arabic (Iraq)",
|
||||
"ar-IR": "Arabic (Iran)",
|
||||
// Other languages
|
||||
"be": "Belarusian",
|
||||
"bn": "Bengali",
|
||||
"bs": "Bosnian",
|
||||
"bg": "Bulgarian",
|
||||
"ca": "Catalan",
|
||||
"cs": "Czech",
|
||||
"da": "Danish",
|
||||
"da-DK": "Danish (Denmark)",
|
||||
"de": "German",
|
||||
"de-CH": "German (Switzerland)",
|
||||
"el": "Greek",
|
||||
"en": "English",
|
||||
"en-US": "English (US)",
|
||||
"en-AU": "English (Australia)",
|
||||
"en-GB": "English (UK)",
|
||||
"en-IN": "English (India)",
|
||||
"en-NZ": "English (New Zealand)",
|
||||
"es": "Spanish",
|
||||
"es-419": "Spanish (Latin America)",
|
||||
"et": "Estonian",
|
||||
"fa": "Persian",
|
||||
"fi": "Finnish",
|
||||
"fr": "French",
|
||||
"fr-CA": "French (Canada)",
|
||||
"he": "Hebrew",
|
||||
"hi": "Hindi",
|
||||
"hr": "Croatian",
|
||||
"hu": "Hungarian",
|
||||
"id": "Indonesian",
|
||||
"it": "Italian",
|
||||
"ja": "Japanese",
|
||||
"kn": "Kannada",
|
||||
"ko": "Korean",
|
||||
"ko-KR": "Korean (South Korea)",
|
||||
"lt": "Lithuanian",
|
||||
"lv": "Latvian",
|
||||
"mk": "Macedonian",
|
||||
"mr": "Marathi",
|
||||
"ms": "Malay",
|
||||
"nl": "Dutch",
|
||||
"nl-BE": "Flemish",
|
||||
"no": "Norwegian",
|
||||
"pl": "Polish",
|
||||
"pt": "Portuguese",
|
||||
"pt-BR": "Portuguese (Brazil)",
|
||||
"pt-PT": "Portuguese (Portugal)",
|
||||
"ro": "Romanian",
|
||||
"ru": "Russian",
|
||||
"sk": "Slovak",
|
||||
"sl": "Slovenian",
|
||||
"sr": "Serbian",
|
||||
"sv": "Swedish",
|
||||
"sv-SE": "Swedish (Sweden)",
|
||||
"ta": "Tamil",
|
||||
"te": "Telugu",
|
||||
"th": "Thai",
|
||||
"tl": "Tagalog",
|
||||
"tr": "Turkish",
|
||||
"uk": "Ukrainian",
|
||||
"ur": "Urdu",
|
||||
"vi": "Vietnamese",
|
||||
"zh-CN": "Chinese (Simplified)",
|
||||
"zh-TW": "Chinese (Traditional)",
|
||||
// Sarvam Indian languages
|
||||
"bn-IN": "Bengali",
|
||||
"gu-IN": "Gujarati",
|
||||
"hi-IN": "Hindi",
|
||||
"kn-IN": "Kannada",
|
||||
"ml-IN": "Malayalam",
|
||||
"mr-IN": "Marathi",
|
||||
"od-IN": "Odia",
|
||||
"pa-IN": "Punjabi",
|
||||
"ta-IN": "Tamil",
|
||||
"te-IN": "Telugu",
|
||||
"as-IN": "Assamese",
|
||||
};
|
||||
|
||||
// Display names for Sarvam voices
|
||||
const VOICE_DISPLAY_NAMES: Record<string, string> = {
|
||||
"anushka": "Anushka (Female)",
|
||||
|
|
@ -236,11 +138,21 @@ export default function ServiceConfiguration() {
|
|||
}
|
||||
});
|
||||
selectedProviders[service] = userConfig?.[service]?.provider as string;
|
||||
// Fill in schema defaults for fields not present in userConfig
|
||||
const properties = response.data[service]?.[selectedProviders[service]]?.properties as Record<string, SchemaProperty>;
|
||||
if (properties) {
|
||||
Object.entries(properties).forEach(([field, schema]) => {
|
||||
const key = `${service}_${field}`;
|
||||
if (field !== "provider" && field !== "api_key" && schema.default !== undefined && !(key in defaultValues)) {
|
||||
defaultValues[key] = schema.default;
|
||||
}
|
||||
});
|
||||
}
|
||||
} else {
|
||||
const properties = response.data[service]?.[selectedProviders[service]]?.properties as Record<string, SchemaProperty>;
|
||||
if (properties) {
|
||||
Object.entries(properties).forEach(([field, schema]) => {
|
||||
if (field !== "provider" && schema.default) {
|
||||
if (field !== "provider" && schema.default !== undefined) {
|
||||
defaultValues[`${service}_${field}`] = schema.default;
|
||||
}
|
||||
});
|
||||
|
|
|
|||
|
|
@ -15,6 +15,7 @@ export interface MentionItem {
|
|||
id: string;
|
||||
name: string;
|
||||
description: string;
|
||||
filename: string;
|
||||
}
|
||||
|
||||
interface MentionTextareaProps {
|
||||
|
|
@ -46,6 +47,7 @@ export function MentionTextarea({
|
|||
id: r.recording_id,
|
||||
name: r.transcript,
|
||||
description: r.transcript,
|
||||
filename: (r.metadata?.original_filename as string) || r.recording_id,
|
||||
})),
|
||||
[recordings]
|
||||
);
|
||||
|
|
@ -195,7 +197,7 @@ export function MentionTextarea({
|
|||
>
|
||||
<div className="flex items-center gap-2">
|
||||
<code className="text-xs bg-muted px-1 py-0.5 rounded font-mono">
|
||||
{item.id}
|
||||
{item.filename}
|
||||
</code>
|
||||
<span className="font-medium truncate">{item.name}</span>
|
||||
</div>
|
||||
|
|
|
|||
|
|
@ -215,11 +215,7 @@ export default function CustomEdge(props: CustomEdgeProps) {
|
|||
const handleSaveEdgeData = useCallback(async (updatedData: FlowEdgeData) => {
|
||||
// Use the workflow store's updateEdge method to properly track history
|
||||
updateEdge(id, { data: updatedData });
|
||||
|
||||
// Save the workflow after updating edge data with a small delay to ensure state is updated
|
||||
setTimeout(async () => {
|
||||
await saveWorkflow();
|
||||
}, 100);
|
||||
await saveWorkflow();
|
||||
}, [id, updateEdge, saveWorkflow]);
|
||||
|
||||
return (
|
||||
|
|
|
|||
|
|
@ -89,10 +89,7 @@ export const AgentNode = memo(({ data, selected, id }: AgentNodeProps) => {
|
|||
document_uuids: documentUuids.length > 0 ? documentUuids : undefined,
|
||||
});
|
||||
setOpen(false);
|
||||
// Save the workflow after updating node data with a small delay to ensure state is updated
|
||||
setTimeout(async () => {
|
||||
await saveWorkflow();
|
||||
}, 100);
|
||||
await saveWorkflow();
|
||||
};
|
||||
|
||||
// Reset form state when dialog opens
|
||||
|
|
@ -127,27 +124,23 @@ export const AgentNode = memo(({ data, selected, id }: AgentNodeProps) => {
|
|||
}, [data, open]);
|
||||
|
||||
// Handle cleanup of stale document UUIDs
|
||||
const handleStaleDocuments = useCallback((staleUuids: string[]) => {
|
||||
const handleStaleDocuments = useCallback(async (staleUuids: string[]) => {
|
||||
const cleanedUuids = (data.document_uuids ?? []).filter(uuid => !staleUuids.includes(uuid));
|
||||
handleSaveNodeData({
|
||||
...data,
|
||||
document_uuids: cleanedUuids.length > 0 ? cleanedUuids : undefined,
|
||||
});
|
||||
setTimeout(async () => {
|
||||
await saveWorkflow();
|
||||
}, 100);
|
||||
await saveWorkflow();
|
||||
}, [data, handleSaveNodeData, saveWorkflow]);
|
||||
|
||||
// Handle cleanup of stale tool UUIDs
|
||||
const handleStaleTools = useCallback((staleUuids: string[]) => {
|
||||
const handleStaleTools = useCallback(async (staleUuids: string[]) => {
|
||||
const cleanedUuids = (data.tool_uuids ?? []).filter(uuid => !staleUuids.includes(uuid));
|
||||
handleSaveNodeData({
|
||||
...data,
|
||||
tool_uuids: cleanedUuids.length > 0 ? cleanedUuids : undefined,
|
||||
});
|
||||
setTimeout(async () => {
|
||||
await saveWorkflow();
|
||||
}, 100);
|
||||
await saveWorkflow();
|
||||
}, [data, handleSaveNodeData, saveWorkflow]);
|
||||
|
||||
return (
|
||||
|
|
|
|||
|
|
@ -75,10 +75,7 @@ export const EndCall = memo(({ data, selected, id }: EndCallNodeProps) => {
|
|||
add_global_prompt: addGlobalPrompt,
|
||||
});
|
||||
setOpen(false);
|
||||
// Save the workflow after updating node data with a small delay to ensure state is updated
|
||||
setTimeout(async () => {
|
||||
await saveWorkflow();
|
||||
}, 100);
|
||||
await saveWorkflow();
|
||||
};
|
||||
|
||||
// Reset form state when dialog opens
|
||||
|
|
|
|||
|
|
@ -52,10 +52,7 @@ export const GlobalNode = memo(({ data, selected, id }: GlobalNodeProps) => {
|
|||
name
|
||||
});
|
||||
setOpen(false);
|
||||
// Save the workflow after updating node data with a small delay to ensure state is updated
|
||||
setTimeout(async () => {
|
||||
await saveWorkflow();
|
||||
}, 100);
|
||||
await saveWorkflow();
|
||||
};
|
||||
|
||||
// Reset form state when dialog opens
|
||||
|
|
|
|||
|
|
@ -66,9 +66,7 @@ export const QANode = memo(({ data, selected, id }: QANodeProps) => {
|
|||
qa_sample_rate: qaSampleRate,
|
||||
});
|
||||
setOpen(false);
|
||||
setTimeout(async () => {
|
||||
await saveWorkflow();
|
||||
}, 100);
|
||||
await saveWorkflow();
|
||||
};
|
||||
|
||||
const resetFormState = () => {
|
||||
|
|
|
|||
|
|
@ -104,10 +104,7 @@ export const StartCall = memo(({ data, selected, id }: StartCallNodeProps) => {
|
|||
document_uuids: documentUuids.length > 0 ? documentUuids : undefined,
|
||||
});
|
||||
setOpen(false);
|
||||
// Save the workflow after updating node data with a small delay to ensure state is updated
|
||||
setTimeout(async () => {
|
||||
await saveWorkflow();
|
||||
}, 100);
|
||||
await saveWorkflow();
|
||||
};
|
||||
|
||||
// Reset form state when dialog opens
|
||||
|
|
@ -148,27 +145,23 @@ export const StartCall = memo(({ data, selected, id }: StartCallNodeProps) => {
|
|||
}, [data, open]);
|
||||
|
||||
// Handle cleanup of stale document UUIDs
|
||||
const handleStaleDocuments = useCallback((staleUuids: string[]) => {
|
||||
const handleStaleDocuments = useCallback(async (staleUuids: string[]) => {
|
||||
const cleanedUuids = (data.document_uuids ?? []).filter(uuid => !staleUuids.includes(uuid));
|
||||
handleSaveNodeData({
|
||||
...data,
|
||||
document_uuids: cleanedUuids.length > 0 ? cleanedUuids : undefined,
|
||||
});
|
||||
setTimeout(async () => {
|
||||
await saveWorkflow();
|
||||
}, 100);
|
||||
await saveWorkflow();
|
||||
}, [data, handleSaveNodeData, saveWorkflow]);
|
||||
|
||||
// Handle cleanup of stale tool UUIDs
|
||||
const handleStaleTools = useCallback((staleUuids: string[]) => {
|
||||
const handleStaleTools = useCallback(async (staleUuids: string[]) => {
|
||||
const cleanedUuids = (data.tool_uuids ?? []).filter(uuid => !staleUuids.includes(uuid));
|
||||
handleSaveNodeData({
|
||||
...data,
|
||||
tool_uuids: cleanedUuids.length > 0 ? cleanedUuids : undefined,
|
||||
});
|
||||
setTimeout(async () => {
|
||||
await saveWorkflow();
|
||||
}, 100);
|
||||
await saveWorkflow();
|
||||
}, [data, handleSaveNodeData, saveWorkflow]);
|
||||
|
||||
return (
|
||||
|
|
|
|||
|
|
@ -61,10 +61,7 @@ export const TriggerNode = memo(({ data, selected, id }: TriggerNodeProps) => {
|
|||
trigger_path: triggerPath,
|
||||
});
|
||||
setOpen(false);
|
||||
// Save the workflow after updating node data
|
||||
setTimeout(async () => {
|
||||
await saveWorkflow();
|
||||
}, 100);
|
||||
await saveWorkflow();
|
||||
};
|
||||
|
||||
// Reset form state when dialog opens
|
||||
|
|
|
|||
|
|
@ -86,9 +86,7 @@ export const WebhookNode = memo(({ data, selected, id }: WebhookNodeProps) => {
|
|||
payload_template: validation.parsed as Record<string, unknown>,
|
||||
});
|
||||
setOpen(false);
|
||||
setTimeout(async () => {
|
||||
await saveWorkflow();
|
||||
}, 100);
|
||||
await saveWorkflow();
|
||||
};
|
||||
|
||||
const handleOpenChange = (newOpen: boolean) => {
|
||||
|
|
|
|||
|
|
@ -71,12 +71,13 @@ export const NodeEditDialog = ({
|
|||
const handleKeyDown = (e: KeyboardEvent) => {
|
||||
if ((e.metaKey || e.ctrlKey) && e.key === 's') {
|
||||
e.preventDefault();
|
||||
e.stopImmediatePropagation();
|
||||
handleSave();
|
||||
}
|
||||
};
|
||||
|
||||
window.addEventListener('keydown', handleKeyDown);
|
||||
return () => window.removeEventListener('keydown', handleKeyDown);
|
||||
window.addEventListener('keydown', handleKeyDown, true);
|
||||
return () => window.removeEventListener('keydown', handleKeyDown, true);
|
||||
}, [open, handleSave]);
|
||||
|
||||
return (
|
||||
|
|
|
|||
|
|
@ -1,22 +1,114 @@
|
|||
'use client';
|
||||
|
||||
import { PlusIcon } from 'lucide-react';
|
||||
import { Bot, ChevronDown, LayoutTemplate, PlusIcon } from 'lucide-react';
|
||||
import { useRouter } from 'next/navigation';
|
||||
import { useState } from 'react';
|
||||
import { toast } from 'sonner';
|
||||
|
||||
import { createWorkflowApiV1WorkflowCreateDefinitionPost } from '@/client/sdk.gen';
|
||||
import { Button } from "@/components/ui/button";
|
||||
import {
|
||||
DropdownMenu,
|
||||
DropdownMenuContent,
|
||||
DropdownMenuItem,
|
||||
DropdownMenuTrigger,
|
||||
} from "@/components/ui/dropdown-menu";
|
||||
import { useAuth } from '@/lib/auth';
|
||||
import logger from '@/lib/logger';
|
||||
import { getRandomId } from '@/lib/utils';
|
||||
|
||||
const BLANK_WORKFLOW_DEFINITION = {
|
||||
nodes: [
|
||||
{
|
||||
id: "1",
|
||||
type: "startCall",
|
||||
position: { x: 175, y: 60 },
|
||||
data: {
|
||||
prompt: "# Goal\nYou are a helpful agent who is handing a conversation over voice with a human. This is a voice conversation, so transcripts can be error prone.\n\n## Rules\n- Language: UK English but does not have to be correct english\n- Keep responses short and 2-3 sentences max\n- If you have to repeat something that you said in your previous two turns, then rephrase a bit while keeping the same meaning. Never repeat the exact same words as in your previous 2 responses.\n\n## Speech Handling\n- There could be multiple transcription errors. \n- Accept variations: yes/yeah/yep/aye, no/nah/nope\n- If user says \"sorry?\" or \"pardon me\" or \"can you repeat\" or \"what?\", they might not have heard you- so just repeat what you just said.\n\n### Flow\nStart by saying \"Hi\". Be polite and courteous. ",
|
||||
name: "start call",
|
||||
allow_interrupt: false,
|
||||
invalid: false,
|
||||
validationMessage: null,
|
||||
is_static: false,
|
||||
add_global_prompt: false,
|
||||
wait_for_user_response: false,
|
||||
detect_voicemail: true,
|
||||
delayed_start: false,
|
||||
is_start: true,
|
||||
selected_through_edge: false,
|
||||
hovered_through_edge: false,
|
||||
extraction_enabled: false,
|
||||
selected: false,
|
||||
dragging: false,
|
||||
},
|
||||
},
|
||||
],
|
||||
edges: [],
|
||||
viewport: { x: 808, y: 269, zoom: 0.75 },
|
||||
};
|
||||
|
||||
export function CreateWorkflowButton() {
|
||||
const router = useRouter();
|
||||
const handleClick = () => {
|
||||
const { user, getAccessToken } = useAuth();
|
||||
const [isCreating, setIsCreating] = useState(false);
|
||||
|
||||
const handleAgentBuilder = () => {
|
||||
router.push('/workflow/create');
|
||||
};
|
||||
|
||||
const handleBlankCanvas = async () => {
|
||||
if (isCreating || !user) return;
|
||||
setIsCreating(true);
|
||||
|
||||
try {
|
||||
const accessToken = await getAccessToken();
|
||||
const name = `Workflow-${getRandomId()}`;
|
||||
const response = await createWorkflowApiV1WorkflowCreateDefinitionPost({
|
||||
body: {
|
||||
name,
|
||||
workflow_definition: BLANK_WORKFLOW_DEFINITION as unknown as { [key: string]: unknown },
|
||||
},
|
||||
headers: {
|
||||
'Authorization': `Bearer ${accessToken}`,
|
||||
},
|
||||
});
|
||||
|
||||
if (response.data?.id) {
|
||||
router.push(`/workflow/${response.data.id}`);
|
||||
}
|
||||
} catch (err) {
|
||||
logger.error(`Error creating blank workflow: ${err}`);
|
||||
toast.error('Failed to create workflow');
|
||||
} finally {
|
||||
setIsCreating(false);
|
||||
}
|
||||
};
|
||||
|
||||
return (
|
||||
<Button
|
||||
onClick={handleClick}
|
||||
>
|
||||
<PlusIcon className="w-4 h-4" />
|
||||
Create Agent
|
||||
</Button>
|
||||
<DropdownMenu>
|
||||
<DropdownMenuTrigger asChild>
|
||||
<Button disabled={isCreating}>
|
||||
<PlusIcon className="w-4 h-4" />
|
||||
{isCreating ? 'Creating...' : 'Create Agent'}
|
||||
<ChevronDown className="w-4 h-4" />
|
||||
</Button>
|
||||
</DropdownMenuTrigger>
|
||||
<DropdownMenuContent align="end">
|
||||
<DropdownMenuItem onClick={handleAgentBuilder} className="cursor-pointer">
|
||||
<Bot className="w-4 h-4 mr-2" />
|
||||
<div>
|
||||
<div className="font-medium">Use Agent Builder</div>
|
||||
<div className="text-xs text-muted-foreground">AI generates a workflow from your description</div>
|
||||
</div>
|
||||
</DropdownMenuItem>
|
||||
<DropdownMenuItem onClick={handleBlankCanvas} disabled={isCreating} className="cursor-pointer">
|
||||
<LayoutTemplate className="w-4 h-4 mr-2" />
|
||||
<div>
|
||||
<div className="font-medium">Blank Canvas</div>
|
||||
<div className="text-xs text-muted-foreground">Start from scratch with an empty workflow</div>
|
||||
</div>
|
||||
</DropdownMenuItem>
|
||||
</DropdownMenuContent>
|
||||
</DropdownMenu>
|
||||
);
|
||||
}
|
||||
|
|
|
|||
98
ui/src/constants/languages.ts
Normal file
98
ui/src/constants/languages.ts
Normal file
|
|
@ -0,0 +1,98 @@
|
|||
// Display names for language codes (Deepgram + Sarvam)
|
||||
export const LANGUAGE_DISPLAY_NAMES: Record<string, string> = {
|
||||
"multi": "Multilingual (Auto-detect)",
|
||||
// Arabic
|
||||
"ar": "Arabic",
|
||||
"ar-AE": "Arabic (UAE)",
|
||||
"ar-SA": "Arabic (Saudi Arabia)",
|
||||
"ar-QA": "Arabic (Qatar)",
|
||||
"ar-KW": "Arabic (Kuwait)",
|
||||
"ar-SY": "Arabic (Syria)",
|
||||
"ar-LB": "Arabic (Lebanon)",
|
||||
"ar-PS": "Arabic (Palestine)",
|
||||
"ar-JO": "Arabic (Jordan)",
|
||||
"ar-EG": "Arabic (Egypt)",
|
||||
"ar-SD": "Arabic (Sudan)",
|
||||
"ar-TD": "Arabic (Chad)",
|
||||
"ar-MA": "Arabic (Morocco)",
|
||||
"ar-DZ": "Arabic (Algeria)",
|
||||
"ar-TN": "Arabic (Tunisia)",
|
||||
"ar-IQ": "Arabic (Iraq)",
|
||||
"ar-IR": "Arabic (Iran)",
|
||||
// Other languages
|
||||
"be": "Belarusian",
|
||||
"bn": "Bengali",
|
||||
"bs": "Bosnian",
|
||||
"bg": "Bulgarian",
|
||||
"ca": "Catalan",
|
||||
"cs": "Czech",
|
||||
"da": "Danish",
|
||||
"da-DK": "Danish (Denmark)",
|
||||
"de": "German",
|
||||
"de-CH": "German (Switzerland)",
|
||||
"el": "Greek",
|
||||
"en": "English",
|
||||
"en-US": "English (US)",
|
||||
"en-AU": "English (Australia)",
|
||||
"en-GB": "English (UK)",
|
||||
"en-IN": "English (India)",
|
||||
"en-NZ": "English (New Zealand)",
|
||||
"es": "Spanish",
|
||||
"es-419": "Spanish (Latin America)",
|
||||
"et": "Estonian",
|
||||
"fa": "Persian",
|
||||
"fi": "Finnish",
|
||||
"fr": "French",
|
||||
"fr-CA": "French (Canada)",
|
||||
"he": "Hebrew",
|
||||
"hi": "Hindi",
|
||||
"hr": "Croatian",
|
||||
"hu": "Hungarian",
|
||||
"id": "Indonesian",
|
||||
"it": "Italian",
|
||||
"ja": "Japanese",
|
||||
"kn": "Kannada",
|
||||
"ko": "Korean",
|
||||
"ko-KR": "Korean (South Korea)",
|
||||
"lt": "Lithuanian",
|
||||
"lv": "Latvian",
|
||||
"mk": "Macedonian",
|
||||
"mr": "Marathi",
|
||||
"ms": "Malay",
|
||||
"nl": "Dutch",
|
||||
"nl-BE": "Flemish",
|
||||
"no": "Norwegian",
|
||||
"pl": "Polish",
|
||||
"pt": "Portuguese",
|
||||
"pt-BR": "Portuguese (Brazil)",
|
||||
"pt-PT": "Portuguese (Portugal)",
|
||||
"ro": "Romanian",
|
||||
"ru": "Russian",
|
||||
"sk": "Slovak",
|
||||
"sl": "Slovenian",
|
||||
"sr": "Serbian",
|
||||
"sv": "Swedish",
|
||||
"sv-SE": "Swedish (Sweden)",
|
||||
"ta": "Tamil",
|
||||
"te": "Telugu",
|
||||
"th": "Thai",
|
||||
"tl": "Tagalog",
|
||||
"tr": "Turkish",
|
||||
"uk": "Ukrainian",
|
||||
"ur": "Urdu",
|
||||
"vi": "Vietnamese",
|
||||
"zh-CN": "Chinese (Simplified)",
|
||||
"zh-TW": "Chinese (Traditional)",
|
||||
// Sarvam Indian languages
|
||||
"bn-IN": "Bengali",
|
||||
"gu-IN": "Gujarati",
|
||||
"hi-IN": "Hindi",
|
||||
"kn-IN": "Kannada",
|
||||
"ml-IN": "Malayalam",
|
||||
"mr-IN": "Marathi",
|
||||
"od-IN": "Odia",
|
||||
"pa-IN": "Punjabi",
|
||||
"ta-IN": "Tamil",
|
||||
"te-IN": "Telugu",
|
||||
"as-IN": "Assamese",
|
||||
};
|
||||
Loading…
Add table
Add a link
Reference in a new issue