feat: persist split user and bot audio

This commit is contained in:
Abhishek Kumar 2026-06-16 15:19:49 +05:30
parent dd3f2e7323
commit 3d1886c450
30 changed files with 1322 additions and 253 deletions

View file

@ -0,0 +1,34 @@
"""add extra column in workflow runs
Revision ID: efe356f488f9
Revises: 384be6596b36
Create Date: 2026-06-16 12:24:30.081058
"""
from typing import Sequence, Union
import sqlalchemy as sa
from alembic import op
# revision identifiers, used by Alembic.
revision: str = "efe356f488f9"
down_revision: Union[str, None] = "384be6596b36"
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def upgrade() -> None:
op.add_column(
"workflow_runs",
sa.Column(
"extra",
sa.JSON(),
server_default=sa.text("'{}'::json"),
nullable=False,
),
)
def downgrade() -> None:
op.drop_column("workflow_runs", "extra")

View file

@ -10,6 +10,7 @@ from api.db.filters import apply_workflow_run_filters, get_workflow_run_order_cl
from api.db.models import CampaignModel, QueuedRunModel, WorkflowRunModel
from api.schemas.workflow import WorkflowRunResponseSchema
from api.services.workflow.run_usage_response import format_public_cost_info
from api.utils.recording_artifacts import get_recording_storage_key
class CampaignClient(BaseDBClient):
@ -45,9 +46,11 @@ class CampaignClient(BaseDBClient):
source_id=source_id,
created_by=user_id,
organization_id=organization_id,
retry_config=retry_config
if retry_config
else CampaignModel.retry_config.default.arg,
retry_config=(
retry_config
if retry_config
else CampaignModel.retry_config.default.arg
),
orchestrator_metadata=orchestrator_metadata,
telephony_configuration_id=telephony_configuration_id,
)
@ -216,6 +219,12 @@ class CampaignClient(BaseDBClient):
"is_completed": run.is_completed,
"recording_url": run.recording_url,
"transcript_url": run.transcript_url,
"user_recording_url": get_recording_storage_key(
run.extra, "user"
),
"bot_recording_url": get_recording_storage_key(
run.extra, "bot"
),
"cost_info": format_public_cost_info(
run.cost_info, run.usage_info
),
@ -270,9 +279,11 @@ class CampaignClient(BaseDBClient):
source_id=parent_campaign.source_id,
created_by=parent_campaign.created_by,
organization_id=parent_campaign.organization_id,
retry_config=retry_config
if retry_config
else CampaignModel.retry_config.default.arg,
retry_config=(
retry_config
if retry_config
else CampaignModel.retry_config.default.arg
),
orchestrator_metadata=child_meta,
rate_limit_per_second=parent_campaign.rate_limit_per_second,
total_rows=len(queued_runs_data),
@ -338,8 +349,7 @@ class CampaignClient(BaseDBClient):
# Retries create new queued_runs with suffixed source_uuids linked via
# parent_queued_run_id, so group by the ROOT queued_run using a
# recursive walk and pick the latest workflow_run across the tree.
sql = text(
f"""
sql = text(f"""
WITH RECURSIVE run_tree AS (
SELECT id AS root_id, id AS run_id
FROM queued_runs
@ -366,8 +376,7 @@ class CampaignClient(BaseDBClient):
JOIN latest_run_per_root lr ON lr.root_id = q0.id
WHERE q0.campaign_id = :cid
AND ({tag_filter})
"""
)
""")
async with self.async_session() as session:
result = await session.execute(sql, {"cid": campaign_id})

View file

@ -532,6 +532,9 @@ class WorkflowRunModel(Base):
is_completed = Column(Boolean, default=False)
recording_url = Column(String, nullable=True)
transcript_url = Column(String, nullable=True)
extra = Column(
JSON, nullable=False, default=dict, server_default=text("'{}'::json")
)
# Store storage backend as string enum (s3, minio)
storage_backend = Column(
Enum("s3", "minio", name="storage_backend"),

View file

@ -20,6 +20,7 @@ from api.db.models import (
)
from api.enums import OrganizationConfigurationKey
from api.schemas.ai_model_configuration import EffectiveAIModelConfiguration
from api.utils.recording_artifacts import get_recording_storage_key
class OrganizationUsageClient(BaseDBClient):
@ -226,6 +227,9 @@ class OrganizationUsageClient(BaseDBClient):
"call_duration_seconds": int(round(call_duration)),
"recording_url": run.recording_url,
"transcript_url": run.transcript_url,
"user_recording_url": get_recording_storage_key(run.extra, "user"),
"bot_recording_url": get_recording_storage_key(run.extra, "bot"),
"extra": run.extra,
"public_access_token": run.public_access_token,
"phone_number": phone_number,
"caller_number": caller_number,

View file

@ -17,6 +17,7 @@ from api.db.models import (
from api.enums import CallType, StorageBackend
from api.schemas.workflow import WorkflowRunResponseSchema
from api.services.workflow.run_usage_response import format_public_cost_info
from api.utils.recording_artifacts import get_recording_storage_key
class WorkflowRunClient(BaseDBClient):
@ -188,13 +189,19 @@ class WorkflowRunClient(BaseDBClient):
"workflow_name": run.workflow.name if run.workflow else None,
"user_id": run.workflow.user_id if run.workflow else None,
"organization_id": organization.id if organization else None,
"organization_name": organization.provider_id
if organization
else None,
"organization_name": (
organization.provider_id if organization else None
),
"mode": run.mode,
"is_completed": run.is_completed,
"recording_url": run.recording_url,
"transcript_url": run.transcript_url,
"user_recording_url": get_recording_storage_key(
run.extra, "user"
),
"bot_recording_url": get_recording_storage_key(
run.extra, "bot"
),
"usage_info": run.usage_info,
"cost_info": run.cost_info,
"initial_context": run.initial_context,
@ -313,6 +320,12 @@ class WorkflowRunClient(BaseDBClient):
"is_completed": run.is_completed,
"recording_url": run.recording_url,
"transcript_url": run.transcript_url,
"user_recording_url": get_recording_storage_key(
run.extra, "user"
),
"bot_recording_url": get_recording_storage_key(
run.extra, "bot"
),
"cost_info": format_public_cost_info(
run.cost_info, run.usage_info
),
@ -340,6 +353,7 @@ class WorkflowRunClient(BaseDBClient):
logs: dict | None = None,
state: str | None = None,
annotations: dict | None = None,
extra: dict | None = None,
) -> WorkflowRunModel:
async with self.async_session() as session:
# Use SELECT FOR UPDATE to lock the row during the update
@ -374,6 +388,8 @@ class WorkflowRunClient(BaseDBClient):
run.logs = {**run.logs, **logs}
if annotations:
run.annotations = {**run.annotations, **annotations}
if extra:
run.extra = {**run.extra, **extra}
if is_completed:
run.is_completed = is_completed
if state:

View file

@ -56,6 +56,7 @@ from api.services.configuration.masking import is_mask_of, mask_key, mask_user_c
from api.services.configuration.registry import (
DOGRAH_STT_LANGUAGES,
REGISTRY,
DograhTTSService,
ServiceProviders,
ServiceType,
)
@ -210,6 +211,13 @@ async def get_telephony_config_warnings(user: UserModel = Depends(get_user)):
# ---------------------------------------------------------------------------
def _dograh_allows_custom_voice() -> bool:
extra = DograhTTSService.model_fields["voice"].json_schema_extra
if isinstance(extra, dict):
return bool(extra.get("allow_custom_input", False))
return False
def _byok_provider_schemas(service_type: ServiceType) -> dict[str, dict]:
return {
provider: model_cls.model_json_schema()
@ -251,6 +259,7 @@ async def get_model_configuration_v2_defaults(
return {
"dograh": {
"voices": [DOGRAH_DEFAULT_VOICE],
"allow_custom_input": _dograh_allows_custom_voice(),
"speeds": list(DOGRAH_SPEED_OPTIONS),
"languages": DOGRAH_STT_LANGUAGES,
"defaults": {

View file

@ -14,6 +14,7 @@ from api.services.auth.depends import get_user, get_user_with_selected_organizat
from api.services.mps_service_key_client import mps_service_key_client
from api.services.reports import generate_usage_runs_report_csv
from api.utils.artifacts import artifact_url
from api.utils.recording_artifacts import has_recording_track
router = APIRouter(prefix="/organizations")
@ -99,8 +100,12 @@ class WorkflowRunUsageResponse(BaseModel):
call_duration_seconds: int
recording_url: Optional[str] = None
transcript_url: Optional[str] = None
user_recording_url: Optional[str] = None
bot_recording_url: Optional[str] = None
recording_public_url: Optional[str] = None
transcript_public_url: Optional[str] = None
user_recording_public_url: Optional[str] = None
bot_recording_public_url: Optional[str] = None
public_access_token: Optional[str] = None
phone_number: Optional[str] = Field(
default=None,
@ -308,14 +313,18 @@ async def get_billing_credits(
aggregation_key=entry.get("aggregation_key"),
usage_event_id=_optional_int(entry.get("usage_event_id")),
workflow_run_id=_optional_int(entry.get("workflow_run_id")),
workflow_id=workflow_ids_by_run_id.get(
_optional_int(entry.get("workflow_run_id"))
)
if entry.get("workflow_run_id") is not None
else None,
billable_quantity=float(entry["billable_quantity"])
if entry.get("billable_quantity") is not None
else None,
workflow_id=(
workflow_ids_by_run_id.get(
_optional_int(entry.get("workflow_run_id"))
)
if entry.get("workflow_run_id") is not None
else None
),
billable_quantity=(
float(entry["billable_quantity"])
if entry.get("billable_quantity") is not None
else None
),
quantity_unit=entry.get("quantity_unit"),
metadata=entry.get("metadata") or {},
created_at=str(entry["created_at"]),
@ -478,6 +487,17 @@ async def get_usage_history(
public_access_token, "transcript"
)
run["recording_public_url"] = artifact_url(public_access_token, "recording")
run["user_recording_public_url"] = (
artifact_url(public_access_token, "user_recording")
if has_recording_track(run.get("extra"), "user")
else None
)
run["bot_recording_public_url"] = (
artifact_url(public_access_token, "bot_recording")
if has_recording_track(run.get("extra"), "bot")
else None
)
run.pop("extra", None)
return {
"runs": runs,

View file

@ -6,14 +6,16 @@ post-call processing for runs that execute integrations, QA, or campaign
reporting.
"""
from typing import Literal
from fastapi import APIRouter, HTTPException, Query
from fastapi.responses import RedirectResponse
from loguru import logger
from api.db import db_client
from api.services.storage import get_storage_for_backend
from api.utils.recording_artifacts import (
get_recording_storage_backend,
get_recording_storage_key,
)
router = APIRouter(prefix="/public/download")
@ -21,7 +23,7 @@ router = APIRouter(prefix="/public/download")
@router.get("/workflow/{token}/{artifact_type}")
async def download_workflow_artifact(
token: str,
artifact_type: Literal["recording", "transcript"],
artifact_type: str,
inline: bool = Query(
default=False, description="Display inline in browser instead of download"
),
@ -36,13 +38,15 @@ async def download_workflow_artifact(
Args:
token: The public access token (UUID format)
artifact_type: Type of artifact - "recording" or "transcript"
artifact_type: Type of artifact - "recording", "transcript",
"user_recording", or "bot_recording"
inline: If true, sets Content-Disposition to inline for browser preview
Returns:
RedirectResponse to the signed URL (302 redirect)
Raises:
HTTPException 400: If artifact type is unsupported
HTTPException 404: If token is invalid or artifact not found
"""
# 1. Lookup workflow run by token
@ -52,10 +56,26 @@ async def download_workflow_artifact(
raise HTTPException(status_code=404, detail="Invalid or expired token")
# 2. Get file path based on artifact type
artifact_storage_backend = None
if artifact_type == "recording":
file_path = workflow_run.recording_url
else: # transcript
elif artifact_type == "transcript":
file_path = workflow_run.transcript_url
elif artifact_type == "user_recording":
file_path = get_recording_storage_key(workflow_run.extra, "user")
artifact_storage_backend = get_recording_storage_backend(
workflow_run.extra, "user"
)
elif artifact_type == "bot_recording":
file_path = get_recording_storage_key(workflow_run.extra, "bot")
artifact_storage_backend = get_recording_storage_backend(
workflow_run.extra, "bot"
)
else:
logger.warning(
f"Unsupported artifact type: type={artifact_type}, workflow_run_id={workflow_run.id}"
)
raise HTTPException(status_code=400, detail="Unsupported artifact type")
if not file_path:
logger.warning(
@ -68,7 +88,9 @@ async def download_workflow_artifact(
# 3. Get storage backend for this workflow run
try:
storage = get_storage_for_backend(workflow_run.storage_backend)
storage = get_storage_for_backend(
artifact_storage_backend or workflow_run.storage_backend
)
except ValueError as e:
logger.error(f"Invalid storage backend: {workflow_run.storage_backend}")
raise HTTPException(status_code=500, detail="Storage configuration error")

View file

@ -40,14 +40,22 @@ class PresignedUploadUrlResponse(BaseModel):
router = APIRouter(prefix="/s3", tags=["s3"])
ORG_SCOPED_STORAGE_PREFIXES = ("campaigns", "knowledge_base")
def _extract_org_id_from_key(key: str) -> Optional[int]:
"""Try to extract an organization ID from a storage key.
Matches keys of the form ``{prefix}/{org_id}/...`` where *org_id* is a
positive integer. Returns ``None`` when the pattern does not match.
Matches known org-scoped keys of the form ``{prefix}/{org_id}/...`` where
*org_id* is a positive integer. Returns ``None`` when the pattern does not
match.
"""
parts = key.split("/")
if len(parts) >= 3 and parts[1].isdigit():
if (
len(parts) >= 3
and parts[0] in ORG_SCOPED_STORAGE_PREFIXES
and parts[1].isdigit()
):
return int(parts[1])
return None
@ -58,15 +66,20 @@ def _extract_legacy_workflow_run_id(key: str) -> Optional[int]:
Supports:
- ``transcripts/{run_id}.txt``
- ``recordings/{run_id}.wav``
- ``recordings/{run_id}/user.wav``
- ``recordings/{run_id}/bot.wav``
Returns ``None`` when the key does not match a legacy pattern.
"""
if key.startswith("transcripts/") and key.endswith(".txt"):
run_id_str = key[len("transcripts/") : -4]
elif key.startswith("recordings/") and key.endswith(".wav"):
run_id_str = key[len("recordings/") : -4]
else:
return None
recording_match = re.fullmatch(
r"recordings/(\d+)(?:\.wav|/(?:user|bot)\.wav)", key
)
if not recording_match:
return None
run_id_str = recording_match.group(1)
return int(run_id_str) if run_id_str.isdigit() else None
@ -89,8 +102,13 @@ async def _validate_and_extract_workflow_run_id(
"""
if key.startswith("transcripts/") and key.endswith(".txt"):
run_id_str = key[len("transcripts/") : -4] # strip prefix & suffix
elif key.startswith("recordings/") and key.endswith(".wav"):
run_id_str = key[len("recordings/") : -4]
elif key.startswith("recordings/"):
run_id = _extract_legacy_workflow_run_id(key)
if run_id is None:
raise HTTPException(
status_code=400, detail="Invalid workflow_run_id in key"
)
return run_id
elif allow_special_paths and key.startswith("voicemail_detections/"):
return None # Skip validation for these paths
else:
@ -159,9 +177,9 @@ async def get_signed_url(
"""Return a short-lived signed URL for a file stored on S3 / MinIO.
Access Control:
* Keys that embed an organization ID (``{prefix}/{org_id}/...``) are
authorized by matching the org_id against the requesting user's
organization.
* Known org-scoped keys (for example ``campaigns/{org_id}/...`` and
``knowledge_base/{org_id}/...``) are authorized by matching the org_id
against the requesting user's organization.
* Legacy keys (``recordings/{run_id}.wav``, ``transcripts/{run_id}.txt``)
are authorized via the workflow run they belong to.
* Superusers can request any key.

View file

@ -19,7 +19,7 @@ import ipaddress
import os
from datetime import UTC, datetime
from enum import Enum
from typing import Dict, List, Optional
from typing import Dict, List, Optional, Set
from aiortc import RTCIceServer
from aiortc.sdp import candidate_from_sdp
@ -246,6 +246,74 @@ class SignalingManager:
def __init__(self):
self._connections: Dict[str, WebSocket] = {}
self._peer_connections: Dict[str, SmallWebRTCConnection] = {}
self._connection_peer_ids: Dict[str, Set[str]] = {}
self._peer_connection_owners: Dict[str, str] = {}
def _track_peer_connection(
self, connection_id: str, pc_id: str, pc: SmallWebRTCConnection
) -> None:
self._peer_connections[pc_id] = pc
self._peer_connection_owners[pc_id] = connection_id
self._connection_peer_ids.setdefault(connection_id, set()).add(pc_id)
def _forget_peer_connection(self, pc_id: str) -> Optional[str]:
connection_id = self._peer_connection_owners.pop(pc_id, None)
self._peer_connections.pop(pc_id, None)
if connection_id:
peer_ids = self._connection_peer_ids.get(connection_id)
if peer_ids is not None:
peer_ids.discard(pc_id)
if not peer_ids:
self._connection_peer_ids.pop(connection_id, None)
return connection_id
async def _send_json_if_connected(
self, websocket: WebSocket, message: dict
) -> bool:
if websocket.application_state != WebSocketState.CONNECTED:
return False
try:
await websocket.send_json(message)
return True
except Exception as e:
logger.debug(f"Failed to send signaling WebSocket message: {e}")
return False
async def _close_websocket_if_connected(
self, websocket: WebSocket, code: int = 1000, reason: str = ""
) -> None:
if websocket.application_state != WebSocketState.CONNECTED:
return
try:
await websocket.close(code=code, reason=reason)
except Exception as e:
logger.debug(f"Failed to close signaling WebSocket: {e}")
async def _notify_call_ended_and_close_websocket(
self,
websocket: WebSocket,
workflow_run_id: int,
pc_id: str,
reason: str,
) -> None:
await self._send_json_if_connected(
websocket,
{
"type": "call-ended",
"payload": {
"workflow_run_id": workflow_run_id,
"pc_id": pc_id,
"reason": reason,
},
},
)
await self._close_websocket_if_connected(
websocket, code=1000, reason="call ended"
)
async def handle_websocket(
self,
@ -257,35 +325,51 @@ class SignalingManager:
"""Handle WebSocket connection for signaling."""
await websocket.accept()
connection_id = f"{workflow_id}:{workflow_run_id}:{user.id}"
self._connections[connection_id] = websocket
connection_key = f"{connection_id}:{id(websocket)}"
self._connections[connection_key] = websocket
try:
while True:
message = await websocket.receive_json()
await self._handle_message(
websocket, message, workflow_id, workflow_run_id, user
websocket,
message,
workflow_id,
workflow_run_id,
user,
connection_key,
)
except WebSocketDisconnect:
logger.info(f"WebSocket disconnected for {connection_id}")
except Exception as e:
logger.error(f"WebSocket error for {connection_id}: {e}")
if websocket.application_state == WebSocketState.DISCONNECTED:
logger.info(f"WebSocket disconnected for {connection_id}")
else:
logger.error(f"WebSocket error for {connection_id}: {e}")
finally:
# Cleanup
self._connections.pop(connection_id, None)
self._connections.pop(connection_key, None)
peer_ids = list(self._connection_peer_ids.pop(connection_key, set()))
# Unregister WebSocket sender for real-time feedback
unregister_ws_sender(workflow_run_id)
# Clean up all peer connections for this workflow run
# Clean up peer connections owned by this WebSocket.
# Note: In a WebSocket-based signaling approach (vs HTTP PATCH),
# we maintain our own connection map instead of relying on
# SmallWebRTCRequestHandler's _pcs_map. This is suitable for
# multi-worker FastAPI deployments where state cannot be shared.
for pc_id in list(self._peer_connections.keys()):
for pc_id in peer_ids:
self._peer_connection_owners.pop(pc_id, None)
pc = self._peer_connections.pop(pc_id, None)
if pc:
await pc.disconnect()
logger.debug(f"Disconnected peer connection: {pc_id}")
try:
await pc.disconnect()
logger.debug(f"Disconnected peer connection: {pc_id}")
except Exception as e:
logger.debug(
f"Failed to disconnect peer connection {pc_id}: {e}"
)
async def _handle_message(
self,
@ -294,17 +378,20 @@ class SignalingManager:
workflow_id: int,
workflow_run_id: int,
user: UserModel,
connection_key: str,
):
"""Handle incoming WebSocket messages."""
msg_type = message.get("type")
payload = message.get("payload", {})
if msg_type == "offer":
await self._handle_offer(ws, payload, workflow_id, workflow_run_id, user)
await self._handle_offer(
ws, payload, workflow_id, workflow_run_id, user, connection_key
)
elif msg_type == "ice-candidate":
await self._handle_ice_candidate(ws, payload, workflow_run_id)
await self._handle_ice_candidate(payload, connection_key)
elif msg_type == "renegotiate":
await self._handle_renegotiation(ws, payload, workflow_id, workflow_run_id)
await self._handle_renegotiation(ws, payload, connection_key)
async def _handle_offer(
self,
@ -313,6 +400,7 @@ class SignalingManager:
workflow_id: int,
workflow_run_id: int,
user: UserModel,
connection_key: str,
):
"""Handle offer message and create answer with ICE trickling."""
pc_id = payload.get("pc_id")
@ -320,6 +408,15 @@ class SignalingManager:
type_ = payload.get("type")
call_context_vars = payload.get("call_context_vars", {})
if not pc_id or not sdp or not type_:
await ws.send_json(
{
"type": "error",
"payload": {"message": "Missing offer fields"},
}
)
return
# Set run context for logging and tracing. org_id must be set before
# pc.initialize() so that aiortc's internal tasks inherit it.
set_current_run_id(workflow_run_id)
@ -347,7 +444,16 @@ class SignalingManager:
)
return
if pc_id and pc_id in self._peer_connections:
if pc_id in self._peer_connections:
if self._peer_connection_owners.get(pc_id) != connection_key:
await ws.send_json(
{
"type": "error",
"payload": {"message": "Peer connection already owned"},
}
)
return
# Reuse existing connection
logger.info(f"Reusing existing connection for pc_id: {pc_id}")
pc = self._peer_connections[pc_id]
@ -379,7 +485,7 @@ class SignalingManager:
await pc.initialize(sdp=sdp, type=type_)
# Store peer connection using client's pc_id
self._peer_connections[pc_id] = pc
self._track_peer_connection(connection_key, pc_id, pc)
# Register WebSocket sender for real-time feedback
async def ws_sender(message: dict):
@ -392,7 +498,16 @@ class SignalingManager:
@pc.event_handler("closed")
async def handle_disconnected(webrtc_connection: SmallWebRTCConnection):
logger.info(f"PeerConnection closed: {webrtc_connection.pc_id}")
self._peer_connections.pop(webrtc_connection.pc_id, None)
owner_connection_id = self._forget_peer_connection(
webrtc_connection.pc_id
)
if owner_connection_id == connection_key:
await self._notify_call_ended_and_close_websocket(
ws,
workflow_run_id,
webrtc_connection.pc_id,
reason="peer_connection_closed",
)
# Start pipeline in background
asyncio.create_task(
@ -421,9 +536,7 @@ class SignalingManager:
}
)
async def _handle_ice_candidate(
self, ws: WebSocket, payload: dict, workflow_run_id: int
):
async def _handle_ice_candidate(self, payload: dict, connection_key: str):
"""Handle incoming ICE candidate from client.
Uses SmallWebRTC's native ICE trickling support via add_ice_candidate().
@ -442,6 +555,9 @@ class SignalingManager:
if not pc:
logger.warning(f"No peer connection found for pc_id: {pc_id}")
return
if self._peer_connection_owners.get(pc_id) != connection_key:
logger.warning(f"Ignoring ICE candidate for unowned pc_id: {pc_id}")
return
if candidate_data:
candidate_str = candidate_data.get("candidate", "")
@ -466,7 +582,7 @@ class SignalingManager:
logger.debug(f"End of ICE candidates for pc_id: {pc_id}")
async def _handle_renegotiation(
self, ws: WebSocket, payload: dict, workflow_id: int, workflow_run_id: int
self, ws: WebSocket, payload: dict, connection_key: str
):
"""Handle renegotiation request."""
pc_id = payload.get("pc_id")
@ -479,6 +595,11 @@ class SignalingManager:
{"type": "error", "payload": {"message": "Peer connection not found"}}
)
return
if self._peer_connection_owners.get(pc_id) != connection_key:
await ws.send_json(
{"type": "error", "payload": {"message": "Peer connection not found"}}
)
return
pc = self._peer_connections[pc_id]
await pc.renegotiate(sdp=sdp, type=type_, restart_pc=restart_pc)

View file

@ -60,6 +60,10 @@ from api.services.workflow.trigger_paths import (
)
from api.services.workflow.workflow_graph import WorkflowGraph
from api.utils.artifacts import artifact_url
from api.utils.recording_artifacts import (
get_recording_storage_key,
has_recording_track,
)
router = APIRouter(prefix="/workflow")
@ -1255,7 +1259,16 @@ async def get_workflow_run(
raise HTTPException(status_code=404, detail="Workflow run not found")
public_access_token = run.public_access_token
if (run.transcript_url or run.recording_url) and not public_access_token:
user_recording_url = get_recording_storage_key(run.extra, "user")
bot_recording_url = get_recording_storage_key(run.extra, "bot")
has_user_recording = has_recording_track(run.extra, "user")
has_bot_recording = has_recording_track(run.extra, "bot")
if (
run.transcript_url
or run.recording_url
or has_user_recording
or has_bot_recording
) and not public_access_token:
public_access_token = await db_client.ensure_public_access_token(run.id)
return {
@ -1266,8 +1279,20 @@ async def get_workflow_run(
"is_completed": run.is_completed,
"transcript_url": run.transcript_url,
"recording_url": run.recording_url,
"user_recording_url": user_recording_url,
"bot_recording_url": bot_recording_url,
"transcript_public_url": artifact_url(public_access_token, "transcript"),
"recording_public_url": artifact_url(public_access_token, "recording"),
"user_recording_public_url": (
artifact_url(public_access_token, "user_recording")
if has_user_recording
else None
),
"bot_recording_public_url": (
artifact_url(public_access_token, "bot_recording")
if has_bot_recording
else None
),
"public_access_token": public_access_token,
"cost_info": format_public_cost_info(run.cost_info, run.usage_info),
"usage_info": format_public_usage_info(run.usage_info),

View file

@ -15,8 +15,12 @@ class WorkflowRunResponseSchema(BaseModel):
is_completed: bool
transcript_url: str | None
recording_url: str | None
user_recording_url: str | None = None
bot_recording_url: str | None = None
transcript_public_url: str | None = None
recording_public_url: str | None = None
user_recording_public_url: str | None = None
bot_recording_public_url: str | None = None
public_access_token: str | None = None
cost_info: Dict[str, Any] | None
usage_info: Dict[str, Any] | None = None

View file

@ -944,6 +944,7 @@ class DograhTTSService(BaseTTSConfiguration):
voice: str = Field(
default="default",
description="Voice preset.",
json_schema_extra={"allow_custom_input": True},
)
speed: float = Field(default=1.0, ge=0.5, le=2.0, description="Speed of the voice.")

View file

@ -9,8 +9,8 @@ from api.services.integrations import IntegrationRuntimeSession
from api.services.pipecat.audio_config import AudioConfig
from api.services.pipecat.audio_playback import play_audio_loop
from api.services.pipecat.in_memory_buffers import (
InMemoryAudioBuffer,
InMemoryLogsBuffer,
InMemoryRecordingBuffers,
)
from api.services.pipecat.pipeline_metrics_aggregator import PipelineMetricsAggregator
from api.services.pipecat.tracing_config import get_trace_url
@ -40,11 +40,11 @@ async def _capture_call_event(
"workflow_run_id": workflow_run_id,
"workflow_id": workflow_run.workflow_id if workflow_run else None,
"call_type": workflow_run.mode if workflow_run else None,
"call_direction": (workflow_run.initial_context or {}).get(
"direction", "outbound"
)
if workflow_run
else None,
"call_direction": (
(workflow_run.initial_context or {}).get("direction", "outbound")
if workflow_run
else None
),
}
if extra_properties:
properties.update(extra_properties)
@ -73,7 +73,7 @@ def register_event_handlers(
"""Register all event handlers for transport and task events.
Returns:
in_memory_audio_buffer for use by other handlers.
In-memory recording buffers for use by other handlers.
"""
# Initialize in-memory buffers with proper audio configuration
sample_rate = audio_config.pipeline_sample_rate if audio_config else 16000
@ -84,7 +84,7 @@ def register_event_handlers(
f"with sample_rate={sample_rate}Hz, channels={num_channels}"
)
in_memory_audio_buffer = InMemoryAudioBuffer(
in_memory_audio_buffers = InMemoryRecordingBuffers(
workflow_run_id=workflow_run_id,
sample_rate=sample_rate,
num_channels=num_channels,
@ -363,14 +363,32 @@ def register_event_handlers(
# Write buffers to temp files and enqueue combined processing task
audio_temp_path = None
user_audio_temp_path = None
bot_audio_temp_path = None
transcript_temp_path = None
try:
if not in_memory_audio_buffer.is_empty:
audio_temp_path = await in_memory_audio_buffer.write_to_temp_file()
if not in_memory_audio_buffers.mixed.is_empty:
audio_temp_path = (
await in_memory_audio_buffers.mixed.write_to_temp_file()
)
else:
logger.debug("Audio buffer is empty, skipping upload")
if not in_memory_audio_buffers.user.is_empty:
user_audio_temp_path = (
await in_memory_audio_buffers.user.write_to_temp_file()
)
else:
logger.debug("User audio buffer is empty, skipping upload")
if not in_memory_audio_buffers.bot.is_empty:
bot_audio_temp_path = (
await in_memory_audio_buffers.bot.write_to_temp_file()
)
else:
logger.debug("Bot audio buffer is empty, skipping upload")
transcript_temp_path = in_memory_logs_buffer.write_transcript_to_temp_file()
if not transcript_temp_path:
logger.debug("No transcript events in logs buffer, skipping upload")
@ -385,16 +403,18 @@ def register_event_handlers(
workflow_run_id,
audio_temp_path,
transcript_temp_path,
user_audio_temp_path,
bot_audio_temp_path,
)
# Return the buffer so it can be passed to other handlers
return in_memory_audio_buffer
return in_memory_audio_buffers
def register_audio_data_handler(
audio_buffer: AudioBufferProcessor,
workflow_run_id,
in_memory_buffer: InMemoryAudioBuffer,
in_memory_buffers: InMemoryRecordingBuffers,
):
"""Register event handler for audio data"""
logger.info(f"Registering audio data handler for workflow run {workflow_run_id}")
@ -404,9 +424,19 @@ def register_audio_data_handler(
if not audio:
return
# Use in-memory buffer
try:
await in_memory_buffer.append(audio)
await in_memory_buffers.mixed.append(audio)
except MemoryError as e:
logger.error(f"Memory buffer full: {e}")
# Could implement overflow to disk here if needed
logger.error(f"Mixed audio buffer full: {e}")
@audio_buffer.event_handler("on_track_audio_data")
async def on_track_audio_data(
buffer, user_audio, bot_audio, sample_rate, num_channels
):
try:
if user_audio:
await in_memory_buffers.user.append(user_audio)
if bot_audio:
await in_memory_buffers.bot.append(bot_audio)
except MemoryError as e:
logger.error(f"Track audio buffer full: {e}")

View file

@ -75,6 +75,27 @@ class InMemoryAudioBuffer:
return self._total_size
class InMemoryRecordingBuffers:
"""Holds the mixed recording plus aligned user and bot mono tracks."""
def __init__(self, workflow_run_id: int, sample_rate: int, num_channels: int = 1):
self.mixed = InMemoryAudioBuffer(
workflow_run_id=workflow_run_id,
sample_rate=sample_rate,
num_channels=num_channels,
)
self.user = InMemoryAudioBuffer(
workflow_run_id=workflow_run_id,
sample_rate=sample_rate,
num_channels=1,
)
self.bot = InMemoryAudioBuffer(
workflow_run_id=workflow_run_id,
sample_rate=sample_rate,
num_channels=1,
)
class InMemoryLogsBuffer:
"""Buffer real-time feedback events in memory during a call, then save to workflow run logs."""

View file

@ -718,6 +718,8 @@ class TriggerNodeData(BaseNodeData):
"rsvp": "{{gathered_context.rsvp}}",
"duration": "{{cost_info.call_duration_seconds}}",
"recording_url": "{{recording_url}}",
"user_recording_url": "{{user_recording_url}}",
"bot_recording_url": "{{bot_recording_url}}",
"transcript_url": "{{transcript_url}}",
},
},

View file

@ -27,6 +27,7 @@ from api.services.workflow.dto import (
)
from api.services.workflow.qa import run_per_node_qa_analysis
from api.utils.credential_auth import build_auth_header
from api.utils.recording_artifacts import get_recording_storage_key
from api.utils.template_renderer import render_template
@ -339,6 +340,10 @@ def _build_render_context(
Returns:
Dict containing all fields available for template rendering
"""
extra = workflow_run.extra or {}
user_recording_key = get_recording_storage_key(extra, "user")
bot_recording_key = get_recording_storage_key(extra, "bot")
context = {
# Top-level fields
"workflow_run_id": workflow_run.id,
@ -353,6 +358,7 @@ def _build_render_context(
"cost_info": workflow_run.usage_info or {},
# Annotations (includes QA results)
"annotations": workflow_run.annotations or {},
"extra": extra,
}
# Add public download URLs if token is available
@ -366,9 +372,17 @@ def _build_render_context(
context["transcript_url"] = (
f"{base_url}/transcript" if workflow_run.transcript_url else None
)
context["user_recording_url"] = (
f"{base_url}/user_recording" if user_recording_key else None
)
context["bot_recording_url"] = (
f"{base_url}/bot_recording" if bot_recording_key else None
)
else:
context["recording_url"] = workflow_run.recording_url
context["transcript_url"] = workflow_run.transcript_url
context["user_recording_url"] = user_recording_key
context["bot_recording_url"] = bot_recording_key
return context

View file

@ -12,11 +12,51 @@ from api.services.workflow_run_billing import (
from api.tasks.run_integrations import run_integrations_post_workflow_run
def _recording_metadata(storage_key: str, storage_backend: str, track: str) -> dict:
return {
"storage_key": storage_key,
"storage_backend": storage_backend,
"format": "wav",
"track": track,
}
async def _upload_temp_file(
workflow_run_id: int,
temp_file_path: str,
storage_key: str,
label: str,
) -> bool:
try:
if not os.path.exists(temp_file_path):
logger.warning(f"{label} temp file not found: {temp_file_path}")
return False
file_size = os.path.getsize(temp_file_path)
logger.debug(f"{label} file size: {file_size} bytes")
await storage_fs.aupload_file(temp_file_path, storage_key)
logger.info(f"Successfully uploaded {label}: {storage_key}")
return True
except Exception as e:
logger.error(f"Error uploading {label} for workflow {workflow_run_id}: {e}")
return False
finally:
if os.path.exists(temp_file_path):
try:
os.remove(temp_file_path)
logger.debug(f"Cleaned up temp {label} file: {temp_file_path}")
except Exception as e:
logger.warning(f"Failed to clean up temp {label} file: {e}")
async def process_workflow_completion(
_ctx,
workflow_run_id: int,
audio_temp_path: Optional[str] = None,
transcript_temp_path: Optional[str] = None,
user_audio_temp_path: Optional[str] = None,
bot_audio_temp_path: Optional[str] = None,
):
"""Process workflow completion: upload artifacts and run integrations.
@ -28,6 +68,8 @@ async def process_workflow_completion(
workflow_run_id: The workflow run ID
audio_temp_path: Optional path to temp audio file
transcript_temp_path: Optional path to temp transcript file
user_audio_temp_path: Optional path to temp user-track audio file
bot_audio_temp_path: Optional path to temp bot-track audio file
"""
run_id = str(workflow_run_id)
set_current_run_id(run_id)
@ -37,35 +79,55 @@ async def process_workflow_completion(
storage_backend = get_current_storage_backend()
# Step 1: Upload audio if provided
recordings_metadata: dict[str, dict] = {}
if audio_temp_path:
try:
if os.path.exists(audio_temp_path):
file_size = os.path.getsize(audio_temp_path)
logger.debug(f"Audio file size: {file_size} bytes")
recording_url = f"recordings/{workflow_run_id}.wav"
logger.info(
f"Uploading mixed audio to {storage_backend.name} - workflow_run_id: {workflow_run_id}"
)
if await _upload_temp_file(
workflow_run_id, audio_temp_path, recording_url, "mixed audio"
):
recordings_metadata["mixed"] = _recording_metadata(
recording_url, storage_backend.value, "mixed"
)
await db_client.update_workflow_run(
run_id=workflow_run_id,
recording_url=recording_url,
storage_backend=storage_backend.value,
)
recording_url = f"recordings/{workflow_run_id}.wav"
logger.info(
f"Uploading audio to {storage_backend.name} - workflow_run_id: {workflow_run_id}"
)
if user_audio_temp_path:
user_recording_url = f"recordings/{workflow_run_id}/user.wav"
logger.info(
f"Uploading user audio to {storage_backend.name} - workflow_run_id: {workflow_run_id}"
)
if await _upload_temp_file(
workflow_run_id, user_audio_temp_path, user_recording_url, "user audio"
):
recordings_metadata["user"] = _recording_metadata(
user_recording_url, storage_backend.value, "user"
)
await storage_fs.aupload_file(audio_temp_path, recording_url)
await db_client.update_workflow_run(
run_id=workflow_run_id,
recording_url=recording_url,
storage_backend=storage_backend.value,
)
logger.info(f"Successfully uploaded audio: {recording_url}")
else:
logger.warning(f"Audio temp file not found: {audio_temp_path}")
except Exception as e:
logger.error(f"Error uploading audio for workflow {workflow_run_id}: {e}")
finally:
if audio_temp_path and os.path.exists(audio_temp_path):
try:
os.remove(audio_temp_path)
logger.debug(f"Cleaned up temp audio file: {audio_temp_path}")
except Exception as e:
logger.warning(f"Failed to clean up temp audio file: {e}")
if bot_audio_temp_path:
bot_recording_url = f"recordings/{workflow_run_id}/bot.wav"
logger.info(
f"Uploading bot audio to {storage_backend.name} - workflow_run_id: {workflow_run_id}"
)
if await _upload_temp_file(
workflow_run_id, bot_audio_temp_path, bot_recording_url, "bot audio"
):
recordings_metadata["bot"] = _recording_metadata(
bot_recording_url, storage_backend.value, "bot"
)
if recordings_metadata:
await db_client.update_workflow_run(
run_id=workflow_run_id,
storage_backend=storage_backend.value,
extra={"recordings": recordings_metadata},
)
# Step 2: Upload transcript if provided
if transcript_temp_path:

View file

@ -0,0 +1,30 @@
from api.routes.s3_signed_url import (
_extract_legacy_workflow_run_id,
_extract_org_id_from_key,
)
def test_split_recording_keys_are_workflow_run_artifacts_not_org_keys():
assert _extract_legacy_workflow_run_id("recordings/1855/user.wav") == 1855
assert _extract_legacy_workflow_run_id("recordings/1855/bot.wav") == 1855
assert _extract_org_id_from_key("recordings/1855/user.wav") is None
assert _extract_org_id_from_key("recordings/1855/bot.wav") is None
def test_legacy_recording_keys_do_not_fall_through_to_org_scoped_auth():
assert _extract_legacy_workflow_run_id("recordings/1855.wav") == 1855
assert _extract_legacy_workflow_run_id("recordings/1855/other.wav") is None
assert _extract_org_id_from_key("recordings/1855.wav") is None
assert _extract_org_id_from_key("recordings/1855/other.wav") is None
def test_known_org_scoped_keys_extract_org_id():
assert _extract_org_id_from_key("campaigns/42/source.csv") == 42
assert _extract_org_id_from_key("knowledge_base/42/document/file.pdf") == 42
assert _extract_legacy_workflow_run_id("campaigns/42/source.csv") is None
def test_unknown_numeric_prefix_is_not_treated_as_org_scoped():
assert _extract_org_id_from_key("unknown/42/file.wav") is None

View file

@ -0,0 +1,35 @@
from typing import Literal
RecordingTrack = Literal["mixed", "user", "bot"]
def get_recording_storage_key(extra: dict | None, track: RecordingTrack) -> str | None:
recordings = (extra or {}).get("recordings", {})
if not isinstance(recordings, dict):
return None
artifact = recordings.get(track)
if isinstance(artifact, str):
return artifact
if isinstance(artifact, dict):
storage_key = artifact.get("storage_key")
return storage_key if isinstance(storage_key, str) else None
return None
def get_recording_storage_backend(
extra: dict | None, track: RecordingTrack
) -> str | None:
recordings = (extra or {}).get("recordings", {})
if not isinstance(recordings, dict):
return None
artifact = recordings.get(track)
if isinstance(artifact, dict):
storage_backend = artifact.get("storage_backend")
return storage_backend if isinstance(storage_backend, str) else None
return None
def has_recording_track(extra: dict | None, track: RecordingTrack) -> bool:
return bool(get_recording_storage_key(extra, track))

File diff suppressed because one or more lines are too long

View file

@ -1,6 +1,6 @@
# generated by datamodel-codegen:
# filename: dograh-openapi-XXXXXX.json.mKgFDhNhca
# timestamp: 2026-06-09T10:10:10+00:00
# filename: dograh-openapi-XXXXXX.json.CuRRn2w89H
# timestamp: 2026-06-16T06:27:36+00:00
from __future__ import annotations

View file

@ -69,7 +69,7 @@ class Webhook(TypedNode):
Additional HTTP headers to include with the request.
"""
payload_template: dict[str, Any] = field(default_factory=lambda: {'call_id': '{{workflow_run_id}}', 'first_name': '{{initial_context.first_name}}', 'rsvp': '{{gathered_context.rsvp}}', 'duration': '{{cost_info.call_duration_seconds}}', 'recording_url': '{{recording_url}}', 'transcript_url': '{{transcript_url}}'})
payload_template: dict[str, Any] = field(default_factory=lambda: {'call_id': '{{workflow_run_id}}', 'first_name': '{{initial_context.first_name}}', 'rsvp': '{{gathered_context.rsvp}}', 'duration': '{{cost_info.call_duration_seconds}}', 'recording_url': '{{recording_url}}', 'user_recording_url': '{{user_recording_url}}', 'bot_recording_url': '{{bot_recording_url}}', 'transcript_url': '{{transcript_url}}'})
"""
JSON body of the request. Values are Jinja-rendered against the run
context `{{workflow_run_id}}`, `{{gathered_context.foo}}`,

View file

@ -26,10 +26,12 @@
stream: null,
sessionToken: null,
workflowRunId: null,
pcId: null,
connectionStatus: 'idle', // idle, connecting, connected, failed
audioElement: null,
turnCredentials: null, // TURN server credentials
callStartedAt: null, // Timestamp when call connected (for duration tracking)
gracefulDisconnect: false,
callbacks: {
onReady: null,
onCallStart: null,
@ -611,6 +613,7 @@
* Start voice call
*/
async function startCall() {
state.gracefulDisconnect = false;
updateStatus('connecting', 'Connecting...', 'Please wait while we establish the connection');
if (state.callbacks.onCallStart) {
@ -766,45 +769,69 @@
};
// Monitor connection state
state.pc.oniceconnectionstatechange = () => {
console.log('ICE connection state:', state.pc.iceConnectionState);
state.pc.oniceconnectionstatechange = handlePeerConnectionStateChange;
state.pc.onconnectionstatechange = handlePeerConnectionStateChange;
state.pc.onicecandidate = sendIceCandidate;
}
if (state.pc.iceConnectionState === 'connected' || state.pc.iceConnectionState === 'completed') {
const wasAlreadyConnected = state.callStartedAt !== null;
updateStatus('connected', 'Connected', 'Your voice call is now active');
if (!wasAlreadyConnected) {
state.callStartedAt = Date.now();
if (state.callbacks.onCallConnected) {
state.callbacks.onCallConnected({
agentId: state.config.workflowId || null,
token: state.config.token || null,
workflowRunId: state.workflowRunId || null
});
}
function handlePeerConnectionStateChange() {
const pc = state.pc;
if (!pc) return;
console.log('Peer connection state:', pc.connectionState, 'ICE:', pc.iceConnectionState);
if (pc.connectionState === 'connected' || pc.iceConnectionState === 'connected' || pc.iceConnectionState === 'completed') {
const wasAlreadyConnected = state.callStartedAt !== null;
updateStatus('connected', 'Connected', 'Your voice call is now active');
if (!wasAlreadyConnected) {
state.callStartedAt = Date.now();
if (state.callbacks.onCallConnected) {
state.callbacks.onCallConnected({
agentId: state.config.workflowId || null,
token: state.config.token || null,
workflowRunId: state.workflowRunId || null
});
}
} else if (state.pc.iceConnectionState === 'failed' || state.pc.iceConnectionState === 'disconnected') {
updateStatus('failed', 'Connection lost', 'The call has been disconnected');
stopCall();
}
};
return;
}
if (pc.connectionState === 'failed' || pc.iceConnectionState === 'failed') {
stopCall({
graceful: false,
status: 'failed',
text: 'Connection lost',
subtext: 'The call has been disconnected'
});
return;
}
if (
pc.connectionState === 'closed' ||
pc.connectionState === 'disconnected' ||
pc.iceConnectionState === 'closed' ||
pc.iceConnectionState === 'disconnected'
) {
stopCall({ graceful: true });
}
}
function sendIceCandidate(event) {
// Handle ICE candidates for trickling
state.pc.onicecandidate = (event) => {
if (state.ws && state.ws.readyState === WebSocket.OPEN) {
const message = {
type: 'ice-candidate',
payload: {
candidate: event.candidate ? {
candidate: event.candidate.candidate,
sdpMid: event.candidate.sdpMid,
sdpMLineIndex: event.candidate.sdpMLineIndex
} : null,
pc_id: state.pcId
}
};
state.ws.send(JSON.stringify(message));
}
};
if (state.ws && state.ws.readyState === WebSocket.OPEN) {
const message = {
type: 'ice-candidate',
payload: {
candidate: event.candidate ? {
candidate: event.candidate.candidate,
sdpMid: event.candidate.sdpMid,
sdpMLineIndex: event.candidate.sdpMLineIndex
} : null,
pc_id: state.pcId
}
};
state.ws.send(JSON.stringify(message));
}
}
/**
@ -828,9 +855,16 @@
reject(error);
};
state.ws.onclose = () => {
state.ws.onclose = (event) => {
console.log('WebSocket closed');
if (state.connectionStatus === 'connected') {
state.ws = null;
if (event.reason === 'call ended') {
stopCall({ graceful: true, closeWebSocket: false });
return;
}
if (state.connectionStatus === 'connected' && !state.gracefulDisconnect) {
updateStatus('failed', 'Connection lost', 'The call has been disconnected');
}
};
@ -882,6 +916,11 @@
updateStatus('failed', 'Server error', message.payload.message || 'An error occurred');
break;
case 'call-ended':
console.log('Call ended by server:', message.payload);
stopCall({ graceful: true });
break;
default:
console.warn('Unknown message type:', message.type);
}
@ -913,7 +952,15 @@
/**
* Stop voice call
*/
function stopCall() {
function stopCall(options = {}) {
const graceful = options.graceful !== false;
const closeWebSocket = options.closeWebSocket !== false;
const status = options.status || 'idle';
const text = options.text || 'Call ended';
const subtext = options.subtext || 'Click below to start a new call';
state.gracefulDisconnect = graceful;
// Fire onCallDisconnected only if the call had actually connected, with
// identifiers and duration. Must run before we clear callStartedAt.
if (state.callStartedAt && state.callbacks.onCallDisconnected) {
@ -927,15 +974,20 @@
}
state.callStartedAt = null;
updateStatus('idle', 'Call ended', 'Click below to start a new call');
updateStatus(status, text, subtext);
if (state.callbacks.onCallEnd) {
state.callbacks.onCallEnd();
}
// Close WebSocket
if (state.ws) {
state.ws.close();
if (closeWebSocket && state.ws) {
const ws = state.ws;
state.ws = null;
if (ws.readyState !== WebSocket.CLOSED && ws.readyState !== WebSocket.CLOSING) {
ws.close();
}
} else if (!closeWebSocket) {
state.ws = null;
}
@ -947,8 +999,11 @@
// Close peer connection
if (state.pc) {
state.pc.close();
const pc = state.pc;
state.pc = null;
if (pc.signalingState !== 'closed') {
pc.close();
}
}
// Clear audio

View file

@ -19,6 +19,16 @@ interface UseWebSocketRTCProps {
onNodeTransition?: (transition: ConversationNodeTransitionItem) => void;
}
type ConnectionStatus = 'idle' | 'connecting' | 'connected' | 'failed';
interface CleanupConnectionOptions {
graceful?: boolean;
status?: ConnectionStatus;
closeWebSocket?: boolean;
closePeerConnection?: boolean;
delayPeerClose?: boolean;
}
const HANDLED_SERVICE_ERROR_TYPES = new Set([
'quota_exceeded',
'insufficient_credits',
@ -27,7 +37,7 @@ const HANDLED_SERVICE_ERROR_TYPES = new Set([
]);
export const useWebSocketRTC = ({ workflowId, workflowRunId, accessToken, initialContextVariables, onNodeTransition }: UseWebSocketRTCProps) => {
const [connectionStatus, setConnectionStatus] = useState<'idle' | 'connecting' | 'connected' | 'failed'>('idle');
const [connectionStatus, setConnectionStatus] = useState<ConnectionStatus>('idle');
const [connectionActive, setConnectionActive] = useState(false);
const [isCompleted, setIsCompleted] = useState(false);
const [apiKeyModalOpen, setApiKeyModalOpen] = useState(false);
@ -62,11 +72,22 @@ export const useWebSocketRTC = ({ workflowId, workflowRunId, accessToken, initia
const wsRef = useRef<WebSocket | null>(null);
const timeStartRef = useRef<number | null>(null);
const onNodeTransitionRef = useRef(onNodeTransition);
const connectionActiveRef = useRef(connectionActive);
const isCompletedRef = useRef(isCompleted);
const gracefulDisconnectRef = useRef(false);
useEffect(() => {
onNodeTransitionRef.current = onNodeTransition;
}, [onNodeTransition]);
useEffect(() => {
connectionActiveRef.current = connectionActive;
}, [connectionActive]);
useEffect(() => {
isCompletedRef.current = isCompleted;
}, [isCompleted]);
// Generate a cryptographically secure unique ID
const generateSecureId = () => {
// Use Web Crypto API to generate random bytes
@ -95,6 +116,68 @@ export const useWebSocketRTC = ({ workflowId, workflowRunId, accessToken, initia
return `${wsUrl}/api/v1/ws/signaling/${workflowId}/${workflowRunId}?token=${accessToken}`;
}, [workflowId, workflowRunId, accessToken]);
const closePeerConnection = useCallback((pc: RTCPeerConnection | null, delayClose = false) => {
if (!pc) return;
if (pc.getTransceivers) {
pc.getTransceivers().forEach((transceiver) => {
if (transceiver.stop) {
try {
transceiver.stop();
} catch (e) {
logger.debug('Failed to stop transceiver during cleanup:', e);
}
}
});
}
pc.getSenders().forEach((sender) => {
if (sender.track) {
sender.track.stop();
}
});
const close = () => {
if (pcRef.current === pc) {
pcRef.current = null;
}
if (pc.signalingState !== 'closed') {
pc.close();
}
};
if (delayClose) {
setTimeout(close, 500);
} else {
close();
}
}, []);
const cleanupConnection = useCallback((options: CleanupConnectionOptions = {}) => {
const graceful = options.graceful ?? true;
const status = options.status ?? (graceful ? 'idle' : 'failed');
gracefulDisconnectRef.current = graceful;
connectionActiveRef.current = false;
isCompletedRef.current = graceful;
setConnectionActive(false);
setIsCompleted(graceful);
setConnectionStatus(status);
if (options.closeWebSocket !== false) {
const ws = wsRef.current;
if (ws && ws.readyState !== WebSocket.CLOSED && ws.readyState !== WebSocket.CLOSING) {
ws.close();
}
wsRef.current = null;
}
if (options.closePeerConnection !== false) {
closePeerConnection(pcRef.current, options.delayPeerClose ?? false);
}
}, [closePeerConnection]);
const createPeerConnection = () => {
// Build ICE servers list
const iceServers: RTCIceServer[] = [];
@ -155,43 +238,36 @@ export const useWebSocketRTC = ({ workflowId, workflowRunId, accessToken, initia
}
});
pc.addEventListener('iceconnectionstatechange', () => {
logger.info(`ICE connection state changed: ${pc.iceConnectionState}`);
if (pc.iceConnectionState === 'connected' || pc.iceConnectionState === 'completed') {
const handlePeerStateChange = () => {
logger.info(`Peer connection state changed: ${pc.connectionState}; ICE: ${pc.iceConnectionState}`);
if (
pc.connectionState === 'connected' ||
pc.iceConnectionState === 'connected' ||
pc.iceConnectionState === 'completed'
) {
setConnectionStatus('connected');
} else if (pc.iceConnectionState === 'failed') {
setConnectionStatus('failed');
} else if (pc.iceConnectionState === 'disconnected') {
// Server-initiated disconnect - clean up gracefully
logger.info('Server initiated disconnect - cleaning up connection');
// Close WebSocket if still open
if (wsRef.current) {
wsRef.current.close();
wsRef.current = null;
}
// Mark as completed to trigger recording check
setConnectionActive(false);
setIsCompleted(true);
setConnectionStatus('idle');
// Clean up peer connection
if (pc.getTransceivers) {
pc.getTransceivers().forEach((transceiver) => {
if (transceiver.stop) {
transceiver.stop();
}
});
}
pc.getSenders().forEach((sender) => {
if (sender.track) {
sender.track.stop();
}
});
return;
}
});
if (pc.connectionState === 'failed' || pc.iceConnectionState === 'failed') {
cleanupConnection({ graceful: false, status: 'failed' });
return;
}
if (
pc.connectionState === 'closed' ||
pc.connectionState === 'disconnected' ||
pc.iceConnectionState === 'closed' ||
pc.iceConnectionState === 'disconnected'
) {
logger.info('Peer connection ended - cleaning up connection');
cleanupConnection({ graceful: true, status: 'idle' });
}
};
pc.addEventListener('iceconnectionstatechange', handlePeerStateChange);
pc.addEventListener('connectionstatechange', handlePeerStateChange);
pc.addEventListener('track', (evt) => {
if (evt.track.kind === 'audio' && audioRef.current) {
@ -221,11 +297,23 @@ export const useWebSocketRTC = ({ workflowId, workflowRunId, accessToken, initia
reject(error);
};
ws.onclose = () => {
ws.onclose = (event) => {
logger.info('WebSocket closed');
wsRef.current = null;
if (event.reason === 'call ended') {
cleanupConnection({
graceful: true,
status: 'idle',
closeWebSocket: false,
});
return;
}
// Don't set failed status if already completed (graceful disconnect)
if (connectionActive && !isCompleted) {
if (
connectionActiveRef.current &&
!isCompletedRef.current &&
!gracefulDisconnectRef.current
) {
setConnectionStatus('failed');
}
};
@ -245,6 +333,7 @@ export const useWebSocketRTC = ({ workflowId, workflowRunId, accessToken, initia
type: 'answer',
sdp: answer.sdp
});
connectionActiveRef.current = true;
setConnectionActive(true);
logger.info('Remote description set');
}
@ -281,25 +370,19 @@ export const useWebSocketRTC = ({ workflowId, workflowRunId, accessToken, initia
setApiKeyError(message.payload.message || 'Service quota exceeded');
setApiKeyModalOpen(true);
// Stop the connection gracefully
setConnectionStatus('failed');
setConnectionActive(false);
// Close WebSocket and peer connection
if (wsRef.current) {
wsRef.current.close();
wsRef.current = null;
}
if (pcRef.current) {
pcRef.current.close();
pcRef.current = null;
}
// Stop the connection and surface the handled service error.
cleanupConnection({ graceful: false, status: 'failed' });
} else {
// Log other errors as actual errors
logger.error('Server error:', message.payload);
}
break;
case 'call-ended':
logger.info('Call ended by server:', message.payload);
cleanupConnection({ graceful: true, status: 'idle' });
break;
case 'rtf-user-transcription': {
const transcription = message.payload;
@ -503,7 +586,7 @@ export const useWebSocketRTC = ({ workflowId, workflowRunId, accessToken, initia
}
};
});
}, [getWebSocketUrl, connectionActive, isCompleted]);
}, [getWebSocketUrl, cleanupConnection]);
const negotiate = async () => {
const pc = pcRef.current;
@ -552,7 +635,12 @@ export const useWebSocketRTC = ({ workflowId, workflowRunId, accessToken, initia
const start = async () => {
if (isStarting || !accessToken) return;
gracefulDisconnectRef.current = false;
connectionActiveRef.current = false;
isCompletedRef.current = false;
setIsStarting(true);
setConnectionActive(false);
setIsCompleted(false);
setConnectionStatus('connecting');
try {
@ -676,40 +764,7 @@ export const useWebSocketRTC = ({ workflowId, workflowRunId, accessToken, initia
};
const stop = () => {
setConnectionActive(false);
setIsCompleted(true);
setConnectionStatus('idle');
// Close WebSocket
if (wsRef.current) {
wsRef.current.close();
wsRef.current = null;
}
// Close peer connection
const pc = pcRef.current;
if (!pc) return;
if (pc.getTransceivers) {
pc.getTransceivers().forEach((transceiver) => {
if (transceiver.stop) {
transceiver.stop();
}
});
}
pc.getSenders().forEach((sender) => {
if (sender.track) {
sender.track.stop();
}
});
setTimeout(() => {
if (pcRef.current) {
pcRef.current.close();
pcRef.current = null;
}
}, 500);
cleanupConnection({ graceful: true, status: 'idle', delayPeerClose: true });
};
// Cleanup on unmount

View file

@ -1,6 +1,18 @@
'use client';
import { Check, Copy, ExternalLink, FileText, Video } from 'lucide-react';
import {
Bot,
Check,
Copy,
Download,
ExternalLink,
FileText,
Loader2,
Pause,
Play,
UserRound,
Video,
} from 'lucide-react';
import Link from 'next/link';
import { useParams } from 'next/navigation';
import posthog from 'posthog-js';
@ -18,13 +30,16 @@ import { PostHogEvent } from '@/constants/posthog-events';
import { WORKFLOW_RUN_MODES } from '@/constants/workflowRunModes';
import { useOnboarding } from '@/context/OnboardingContext';
import { useAuth } from '@/lib/auth';
import { downloadFile } from '@/lib/files';
import { downloadFile, getSignedUrl } from '@/lib/files';
import { cn } from '@/lib/utils';
interface WorkflowRunResponse {
mode: string;
is_completed: boolean;
transcript_url: string | null;
recording_url: string | null;
user_recording_url: string | null;
bot_recording_url: string | null;
cost_info: {
dograh_token_usage?: number | null;
call_duration_seconds?: number | null;
@ -36,6 +51,7 @@ interface WorkflowRunResponse {
}
const RUN_SHELL_HEIGHT_CLASS = "h-[calc(100svh-49px)] min-h-[calc(100svh-49px)] max-h-[calc(100svh-49px)]";
const WAVEFORM_BAR_COUNT = 96;
function formatDuration(seconds?: number | null) {
if (seconds == null || Number.isNaN(seconds)) return 'N/A';
@ -71,6 +87,309 @@ function MetricCard({ label, value }: { label: string; value: string }) {
);
}
function buildWaveformPeaks(audioBuffer: AudioBuffer) {
const channel = audioBuffer.getChannelData(0);
const samplesPerBar = Math.max(1, Math.floor(channel.length / WAVEFORM_BAR_COUNT));
return Array.from({ length: WAVEFORM_BAR_COUNT }, (_, index) => {
const start = index * samplesPerBar;
const end = Math.min(start + samplesPerBar, channel.length);
let sum = 0;
for (let i = start; i < end; i += 1) {
sum += channel[i] * channel[i];
}
const rms = Math.sqrt(sum / Math.max(1, end - start));
return Math.max(0.08, Math.min(1, rms * 5));
});
}
async function loadWaveformPeaks(url: string) {
const response = await fetch(url);
const audioData = await response.arrayBuffer();
const AudioContextConstructor =
window.AudioContext ||
(window as typeof window & { webkitAudioContext?: typeof AudioContext })
.webkitAudioContext;
if (!AudioContextConstructor) return null;
const audioContext = new AudioContextConstructor();
try {
const decoded = await audioContext.decodeAudioData(audioData);
return buildWaveformPeaks(decoded);
} finally {
void audioContext.close();
}
}
function WaveformLane({
peaks,
track,
position,
}: {
peaks: number[] | null;
track: 'user' | 'bot';
position: 'top' | 'bottom';
}) {
return (
<div
className={cn(
'absolute left-3 right-3 flex gap-0.5',
position === 'top' ? 'top-5 h-12 items-end' : 'bottom-5 h-12 items-start'
)}
>
{peaks ? (
peaks.map((peak, index) => (
<span
key={`${track}-${index}`}
className={cn(
'min-h-1 flex-1 rounded-full opacity-85',
track === 'user' ? 'bg-sky-500' : 'bg-emerald-500'
)}
style={{ height: `${Math.round(peak * 100)}%` }}
/>
))
) : (
<div className="my-auto h-px w-full bg-border" />
)}
</div>
);
}
function SplitTracksSection({
userRecordingUrl,
botRecordingUrl,
}: {
userRecordingUrl: string;
botRecordingUrl: string;
}) {
const userAudioRef = useRef<HTMLAudioElement | null>(null);
const botAudioRef = useRef<HTMLAudioElement | null>(null);
const [signedUrls, setSignedUrls] = useState<{ user: string | null; bot: string | null }>({
user: null,
bot: null,
});
const [peaks, setPeaks] = useState<{ user: number[] | null; bot: number[] | null }>({
user: null,
bot: null,
});
const [isLoading, setIsLoading] = useState(false);
const [isPlaying, setIsPlaying] = useState(false);
const [progress, setProgress] = useState(0);
useEffect(() => {
let isActive = true;
const userAudio = userAudioRef.current;
const botAudio = botAudioRef.current;
userAudio?.pause();
botAudio?.pause();
setSignedUrls({ user: null, bot: null });
setPeaks({ user: null, bot: null });
setIsPlaying(false);
setProgress(0);
setIsLoading(true);
async function loadTracks() {
try {
const [userUrl, botUrl] = await Promise.all([
getSignedUrl(userRecordingUrl, true),
getSignedUrl(botRecordingUrl, true),
]);
if (!isActive) return;
setSignedUrls({ user: userUrl, bot: botUrl });
if (!userUrl || !botUrl) return;
const [userPeaks, botPeaks] = await Promise.all([
loadWaveformPeaks(userUrl),
loadWaveformPeaks(botUrl),
]);
if (isActive) {
setPeaks({ user: userPeaks, bot: botPeaks });
}
} catch (error) {
console.error('Error loading split track waveforms:', error);
} finally {
if (isActive) {
setIsLoading(false);
}
}
}
void loadTracks();
return () => {
isActive = false;
userAudio?.pause();
botAudio?.pause();
};
}, [userRecordingUrl, botRecordingUrl]);
useEffect(() => {
if (!isPlaying) return;
let frameId: number;
const updateProgress = () => {
const userAudio = userAudioRef.current;
const botAudio = botAudioRef.current;
const userDuration = Number.isFinite(userAudio?.duration) ? userAudio?.duration ?? 0 : 0;
const botDuration = Number.isFinite(botAudio?.duration) ? botAudio?.duration ?? 0 : 0;
const duration = Math.max(userDuration, botDuration);
const currentTime = Math.max(userAudio?.currentTime ?? 0, botAudio?.currentTime ?? 0);
setProgress(duration > 0 ? Math.min(1, currentTime / duration) : 0);
frameId = window.requestAnimationFrame(updateProgress);
};
frameId = window.requestAnimationFrame(updateProgress);
return () => window.cancelAnimationFrame(frameId);
}, [isPlaying]);
const pauseTracks = () => {
userAudioRef.current?.pause();
botAudioRef.current?.pause();
setIsPlaying(false);
};
const handleTrackEnded = () => {
const userAudio = userAudioRef.current;
const botAudio = botAudioRef.current;
const userDone = !userAudio || userAudio.ended;
const botDone = !botAudio || botAudio.ended;
if (userDone && botDone) {
setIsPlaying(false);
setProgress(1);
}
};
const togglePlayback = async () => {
const userAudio = userAudioRef.current;
const botAudio = botAudioRef.current;
if (!userAudio || !botAudio || !signedUrls.user || !signedUrls.bot) return;
if (isPlaying) {
pauseTracks();
return;
}
const userDuration = Number.isFinite(userAudio.duration) ? userAudio.duration : 0;
const botDuration = Number.isFinite(botAudio.duration) ? botAudio.duration : 0;
const duration = Math.max(userDuration, botDuration);
const currentTime = Math.max(userAudio.currentTime, botAudio.currentTime);
const startTime = duration > 0 && currentTime >= duration - 0.1 ? 0 : currentTime;
userAudio.currentTime = Math.min(startTime, userDuration || startTime);
botAudio.currentTime = Math.min(startTime, botDuration || startTime);
try {
await Promise.all([userAudio.play(), botAudio.play()]);
setIsPlaying(true);
} catch (error) {
pauseTracks();
console.error('Error playing split tracks:', error);
}
};
const canPlay = Boolean(signedUrls.user && signedUrls.bot);
const progressPercent = Math.round(progress * 1000) / 10;
return (
<Card className="border-border">
<audio
ref={userAudioRef}
src={signedUrls.user ?? undefined}
preload="metadata"
className="hidden"
onEnded={handleTrackEnded}
/>
<audio
ref={botAudioRef}
src={signedUrls.bot ?? undefined}
preload="metadata"
className="hidden"
onEnded={handleTrackEnded}
/>
<CardHeader className="pb-3">
<CardTitle className="text-lg">Split Tracks</CardTitle>
</CardHeader>
<CardContent className="space-y-4">
<div className="flex flex-wrap items-center justify-between gap-3">
<div className="flex items-center gap-2">
<span className="inline-flex items-center gap-1.5 text-sm font-medium text-sky-600">
<UserRound className="h-4 w-4" />
User
</span>
<span className="h-4 w-px bg-border" />
<span className="inline-flex items-center gap-1.5 text-sm font-medium text-emerald-600">
<Bot className="h-4 w-4" />
Bot
</span>
</div>
<div className="flex items-center gap-2">
<Button
type="button"
variant="outline"
size="sm"
onClick={() => downloadFile(userRecordingUrl)}
className="gap-2"
>
<Download className="h-4 w-4" />
User
</Button>
<Button
type="button"
variant="outline"
size="sm"
onClick={() => downloadFile(botRecordingUrl)}
className="gap-2"
>
<Download className="h-4 w-4" />
Bot
</Button>
</div>
</div>
<div className="flex items-center gap-4">
<Button
type="button"
size="icon"
variant={isPlaying ? 'default' : 'outline'}
onClick={togglePlayback}
disabled={!canPlay}
aria-label={isPlaying ? 'Pause split tracks' : 'Play split tracks'}
className="h-10 w-10 shrink-0"
>
{isPlaying ? <Pause className="h-4 w-4" /> : <Play className="h-4 w-4" />}
</Button>
<div className="relative h-36 min-w-0 flex-1 overflow-hidden rounded-lg border border-border/70 bg-background">
<div className="absolute left-3 right-3 top-1/2 h-px bg-border/80" />
<WaveformLane peaks={peaks.user} track="user" position="top" />
<WaveformLane peaks={peaks.bot} track="bot" position="bottom" />
{canPlay && (
<div className="pointer-events-none absolute inset-x-3 inset-y-3">
<div
className="absolute top-0 bottom-0 w-px bg-foreground/50"
style={{ left: `${progressPercent}%` }}
/>
</div>
)}
{isLoading && (
<div className="absolute inset-0 flex items-center justify-center bg-background/70 text-xs text-muted-foreground">
<Loader2 className="mr-2 h-3.5 w-3.5 animate-spin" />
Loading
</div>
)}
</div>
</div>
</CardContent>
</Card>
);
}
function RunMetricsSection({
costInfo,
logs,
@ -180,6 +499,8 @@ export default function WorkflowRunPage() {
is_completed: response.data?.is_completed ?? false,
transcript_url: response.data?.transcript_url ?? null,
recording_url: response.data?.recording_url ?? null,
user_recording_url: response.data?.user_recording_url ?? null,
bot_recording_url: response.data?.bot_recording_url ?? null,
cost_info: response.data?.cost_info ?? null,
initial_context: response.data?.initial_context as Record<string, string> | null ?? null,
gathered_context: response.data?.gathered_context as Record<string, string> | null ?? null,
@ -192,6 +513,7 @@ export default function WorkflowRunPage() {
run_id: Number(runId),
is_completed: runData.is_completed,
has_recording: !!runData.recording_url,
has_split_recordings: !!runData.user_recording_url && !!runData.bot_recording_url,
has_transcript: !!runData.transcript_url,
});
};
@ -201,6 +523,9 @@ export default function WorkflowRunPage() {
let returnValue = null;
const isTextChatRun = workflowRun?.mode === WORKFLOW_RUN_MODES.TEXTCHAT;
const showRunDetailsView = Boolean(workflowRun?.is_completed || isTextChatRun);
const userSplitRecordingUrl = workflowRun?.user_recording_url ?? null;
const botSplitRecordingUrl = workflowRun?.bot_recording_url ?? null;
const hasSplitTracks = Boolean(userSplitRecordingUrl && botSplitRecordingUrl);
if (isLoading) {
returnValue = (
@ -336,6 +661,13 @@ export default function WorkflowRunPage() {
gatheredContext={workflowRun?.gathered_context ?? null}
/>
{!isTextChatRun && hasSplitTracks && (
<SplitTracksSection
userRecordingUrl={userSplitRecordingUrl as string}
botRecordingUrl={botSplitRecordingUrl as string}
/>
)}
<div className="grid gap-6 md:grid-cols-2">
<ContextDisplay
title="Initial Context"

File diff suppressed because one or more lines are too long

View file

@ -1152,9 +1152,9 @@ export const getCampaignDefaultsApiV1OrganizationsCampaignDefaultsGet = <ThrowOn
* Return a short-lived signed URL for a file stored on S3 / MinIO.
*
* Access Control:
* * Keys that embed an organization ID (``{prefix}/{org_id}/...``) are
* authorized by matching the org_id against the requesting user's
* organization.
* * Known org-scoped keys (for example ``campaigns/{org_id}/...`` and
* ``knowledge_base/{org_id}/...``) are authorized by matching the org_id
* against the requesting user's organization.
* * Legacy keys (``recordings/{run_id}.wav``, ``transcripts/{run_id}.txt``)
* are authorized via the workflow run they belong to.
* * Superusers can request any key.
@ -1474,13 +1474,15 @@ export const initiateCallTestByWorkflowUuidApiV1PublicAgentTestWorkflowWorkflowU
*
* Args:
* token: The public access token (UUID format)
* artifact_type: Type of artifact - "recording" or "transcript"
* artifact_type: Type of artifact - "recording", "transcript",
* "user_recording", or "bot_recording"
* inline: If true, sets Content-Disposition to inline for browser preview
*
* Returns:
* RedirectResponse to the signed URL (302 redirect)
*
* Raises:
* HTTPException 400: If artifact type is unsupported
* HTTPException 404: If token is invalid or artifact not found
*/
export const downloadWorkflowArtifactApiV1PublicDownloadWorkflowTokenArtifactTypeGet = <ThrowOnError extends boolean = false>(options: Options<DownloadWorkflowArtifactApiV1PublicDownloadWorkflowTokenArtifactTypeGetData, ThrowOnError>) => (options.client ?? client).get<DownloadWorkflowArtifactApiV1PublicDownloadWorkflowTokenArtifactTypeGetResponses, DownloadWorkflowArtifactApiV1PublicDownloadWorkflowTokenArtifactTypeGetErrors, ThrowOnError>({ url: '/api/v1/public/download/workflow/{token}/{artifact_type}', ...options });

View file

@ -499,6 +499,8 @@ export type ByokPipelineAiModelConfiguration = {
} & AwsBedrockLlmConfiguration) | ({
provider: 'speaches';
} & SpeachesLlmConfiguration) | ({
provider: 'huggingface';
} & HuggingFaceLlmConfiguration) | ({
provider: 'minimax';
} & MiniMaxLlmConfiguration) | ({
provider: 'sarvam';
@ -551,6 +553,8 @@ export type ByokPipelineAiModelConfiguration = {
} & SarvamSttConfiguration) | ({
provider: 'speaches';
} & SpeachesSttConfiguration) | ({
provider: 'huggingface';
} & HuggingFaceSttConfiguration) | ({
provider: 'assemblyai';
} & AssemblyAisttConfiguration) | ({
provider: 'gladia';
@ -613,6 +617,8 @@ export type ByokRealtimeAiModelConfiguration = {
} & AwsBedrockLlmConfiguration) | ({
provider: 'speaches';
} & SpeachesLlmConfiguration) | ({
provider: 'huggingface';
} & HuggingFaceLlmConfiguration) | ({
provider: 'minimax';
} & MiniMaxLlmConfiguration) | ({
provider: 'sarvam';
@ -2916,6 +2922,80 @@ export type HttpApiToolDefinition = {
config: HttpApiConfig;
};
/**
* Hugging Face
*
* Hosted Hugging Face Inference Providers API for usage-based inference.
*/
export type HuggingFaceLlmConfiguration = {
/**
* Provider
*/
provider?: 'huggingface';
/**
* Api Key
*/
api_key: string | Array<string>;
/**
* Model
*
* Hugging Face chat-completion model identifier, optionally with provider suffix.
*/
model?: string;
/**
* Base Url
*
* Hugging Face OpenAI-compatible chat-completions router base URL.
*/
base_url?: string;
/**
* Bill To
*
* Optional Hugging Face organization or user to bill using X-HF-Bill-To.
*/
bill_to?: string | null;
};
/**
* Hugging Face
*
* Hosted Hugging Face Inference Providers API for usage-based inference.
*/
export type HuggingFaceSttConfiguration = {
/**
* Provider
*/
provider?: 'huggingface';
/**
* Api Key
*/
api_key: string | Array<string>;
/**
* Model
*
* Hugging Face ASR model identifier served through Inference Providers.
*/
model?: string;
/**
* Base Url
*
* Hugging Face Inference Providers router base URL.
*/
base_url?: string;
/**
* Bill To
*
* Optional Hugging Face organization or user to bill using X-HF-Bill-To.
*/
bill_to?: string | null;
/**
* Return Timestamps
*
* Request timestamp chunks when supported by the selected provider/model.
*/
return_timestamps?: boolean;
};
/**
* ImpersonateRequest
*
@ -6360,6 +6440,14 @@ export type WorkflowRunResponseSchema = {
* Recording Url
*/
recording_url: string | null;
/**
* User Recording Url
*/
user_recording_url?: string | null;
/**
* Bot Recording Url
*/
bot_recording_url?: string | null;
/**
* Transcript Public Url
*/
@ -6368,6 +6456,14 @@ export type WorkflowRunResponseSchema = {
* Recording Public Url
*/
recording_public_url?: string | null;
/**
* User Recording Public Url
*/
user_recording_public_url?: string | null;
/**
* Bot Recording Public Url
*/
bot_recording_public_url?: string | null;
/**
* Public Access Token
*/
@ -6527,6 +6623,14 @@ export type WorkflowRunUsageResponse = {
* Transcript Url
*/
transcript_url?: string | null;
/**
* User Recording Url
*/
user_recording_url?: string | null;
/**
* Bot Recording Url
*/
bot_recording_url?: string | null;
/**
* Recording Public Url
*/
@ -6535,6 +6639,14 @@ export type WorkflowRunUsageResponse = {
* Transcript Public Url
*/
transcript_public_url?: string | null;
/**
* User Recording Public Url
*/
user_recording_public_url?: string | null;
/**
* Bot Recording Public Url
*/
bot_recording_public_url?: string | null;
/**
* Public Access Token
*/
@ -12331,7 +12443,7 @@ export type DownloadWorkflowArtifactApiV1PublicDownloadWorkflowTokenArtifactType
/**
* Artifact Type
*/
artifact_type: 'recording' | 'transcript';
artifact_type: string;
};
query?: {
/**

View file

@ -11,6 +11,7 @@ import {
type ServiceSegment,
} from "@/components/ServiceConfigurationForm";
import { Button } from "@/components/ui/button";
import { Checkbox } from "@/components/ui/checkbox";
import { Input } from "@/components/ui/input";
import { Label } from "@/components/ui/label";
import { Select, SelectContent, SelectItem, SelectTrigger, SelectValue } from "@/components/ui/select";
@ -21,6 +22,7 @@ type ModelMode = "realtime" | "dograh" | "byok";
interface DograhDefaults {
voices: string[];
allow_custom_input?: boolean;
speeds: number[];
languages: string[];
defaults: {
@ -265,16 +267,21 @@ export function AIModelConfigurationV2Editor({
const [realtimeInitialConfig, setRealtimeInitialConfig] = useState<Record<string, unknown> | null>(null);
const [pipelineInitialConfig, setPipelineInitialConfig] = useState<Record<string, unknown> | null>(null);
const [isSavingDograh, setIsSavingDograh] = useState(false);
const [isCustomVoice, setIsCustomVoice] = useState(false);
const [error, setError] = useState<string | null>(null);
const allowCustomVoice = defaults.dograh.allow_custom_input ?? false;
useEffect(() => {
const rawConfiguration = asRecord(configuration);
const rawEffectiveConfiguration = asRecord(effectiveConfiguration);
setMode(preferredMode(rawConfiguration, rawEffectiveConfiguration));
setDograh(buildDograhState(defaults, rawConfiguration, rawEffectiveConfiguration));
const nextDograh = buildDograhState(defaults, rawConfiguration, rawEffectiveConfiguration);
setDograh(nextDograh);
setIsCustomVoice(allowCustomVoice && !defaults.dograh.voices.includes(nextDograh.voice));
setRealtimeInitialConfig(getByokInitialConfig(rawConfiguration, rawEffectiveConfiguration, true));
setPipelineInitialConfig(getByokInitialConfig(rawConfiguration, rawEffectiveConfiguration, false));
}, [configuration, defaults, effectiveConfiguration]);
}, [configuration, defaults, effectiveConfiguration, allowCustomVoice]);
const saveDograhConfiguration = async () => {
setIsSavingDograh(true);
@ -363,18 +370,44 @@ export function AIModelConfigurationV2Editor({
<div className="grid gap-4 sm:grid-cols-2">
<div className="space-y-2">
<Label>Voice</Label>
<Select value={dograh.voice} onValueChange={(voice) => setDograh({ ...dograh, voice })}>
<SelectTrigger className="w-full">
<SelectValue placeholder="Select voice" />
</SelectTrigger>
<SelectContent>
{defaults.dograh.voices.map((voice) => (
<SelectItem key={voice} value={voice}>
{voice}
</SelectItem>
))}
</SelectContent>
</Select>
{isCustomVoice ? (
<Input
placeholder="Enter voice"
value={dograh.voice}
onChange={(event) => setDograh({ ...dograh, voice: event.target.value })}
/>
) : (
<Select value={dograh.voice} onValueChange={(voice) => setDograh({ ...dograh, voice })}>
<SelectTrigger className="w-full">
<SelectValue placeholder="Select voice" />
</SelectTrigger>
<SelectContent>
{defaults.dograh.voices.map((voice) => (
<SelectItem key={voice} value={voice}>
{voice}
</SelectItem>
))}
</SelectContent>
</Select>
)}
{allowCustomVoice && (
<div className="flex items-center space-x-2">
<Checkbox
id="dograh-custom-voice"
checked={isCustomVoice}
onCheckedChange={(checked) => {
const custom = checked as boolean;
setIsCustomVoice(custom);
if (!custom) {
setDograh({ ...dograh, voice: defaults.dograh.defaults.voice });
}
}}
/>
<Label htmlFor="dograh-custom-voice" className="text-sm font-normal cursor-pointer">
Enter Custom Value
</Label>
</div>
)}
</div>
<div className="space-y-2">