Merge remote-tracking branch 'origin/main' into feat/user-onboarding

# Conflicts:
#	docs/api-reference/openapi.json
#	sdk/python/src/dograh_sdk/_generated_models.py
#	ui/src/client/index.ts
#	ui/src/components/AIModelConfigurationV2Editor.tsx
This commit is contained in:
Abhishek Kumar 2026-06-17 19:19:20 +05:30
commit 5559ed686f
44 changed files with 2155 additions and 321 deletions

View file

@ -84,6 +84,17 @@ An honest comparison on the axes that matter most to teams evaluating voice AI p
curl -o docker-compose.yaml https://raw.githubusercontent.com/dograh-hq/dograh/main/docker-compose.yaml && curl -o start_docker.sh https://raw.githubusercontent.com/dograh-hq/dograh/main/scripts/start_docker.sh && chmod +x start_docker.sh && ./start_docker.sh
```
> **⚡ Prefer an AI agent to set it up for you?**
> If you use **Claude Code** or **Codex**, install the official [Dograh setup skill](https://github.com/dograh-hq/dograh-plugins) and let your agent handle installation, configuration, and troubleshooting — it detects your OS, picks the right deploy path, runs Dograh's own setup scripts, and verifies the result.
>
> ```text
> # In Claude Code
> /plugin marketplace add dograh-hq/dograh-plugins
> /plugin install dograh@dograh
> ```
>
> Then start a new session and ask it to _"set up Dograh"_ (or run `/dograh-setup`). Codex is supported too — see the [plugin repo](https://github.com/dograh-hq/dograh-plugins#install).
> **Note**
> First startup may take 2-3 minutes to download all images. Once running, open http://localhost:3010 to create your first AI voice assistant!
> For common issues and solutions, see 🔧 **[Troubleshooting](docs/troubleshooting.md)**.

View file

@ -84,6 +84,17 @@
curl -o docker-compose.yaml https://raw.githubusercontent.com/dograh-hq/dograh/main/docker-compose.yaml && REGISTRY=ghcr.io/dograh-hq ENABLE_TELEMETRY=true docker compose up --pull always
```
> **⚡ 想让 AI 智能体帮你完成部署?**
> 如果你使用 **Claude Code****Codex**,可以安装官方的 [Dograh 部署技能(skill)](https://github.com/dograh-hq/dograh-plugins),让智能体替你完成安装、配置与排障——它会识别你的操作系统、选择合适的部署方式、运行 Dograh 自带的部署脚本并验证结果。
>
> ```text
> # 在 Claude Code 中
> /plugin marketplace add dograh-hq/dograh-plugins
> /plugin install dograh@dograh
> ```
>
> 然后开启一个新会话,让它 _"set up Dograh"_(或运行 `/dograh-setup`)。Codex 同样支持——详见[插件仓库](https://github.com/dograh-hq/dograh-plugins#install)。
> **提示**
> 首次启动需要 2-3 分钟拉取所有镜像。启动完成后,打开 http://localhost:3010 即可创建你的第一个 AI 语音助手!
> 常见问题及解决方案请参见 🔧 **[故障排查](docs/troubleshooting.md)**。

View file

@ -5,7 +5,7 @@ organization_configurations. Existing rows (the legacy v1 AI model
configuration blob) are backfilled with key MODEL_CONFIGURATION.
Revision ID: 91cc6ba3e1c7
Revises: 384be6596b36
Revises: efe356f488f9
Create Date: 2026-06-12 21:04:25.561529
"""
@ -17,7 +17,7 @@ from alembic import op
# revision identifiers, used by Alembic.
revision: str = "91cc6ba3e1c7"
down_revision: Union[str, None] = "384be6596b36"
down_revision: Union[str, None] = "efe356f488f9"
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None

View file

@ -0,0 +1,34 @@
"""add extra column in workflow runs
Revision ID: efe356f488f9
Revises: 384be6596b36
Create Date: 2026-06-16 12:24:30.081058
"""
from typing import Sequence, Union
import sqlalchemy as sa
from alembic import op
# revision identifiers, used by Alembic.
revision: str = "efe356f488f9"
down_revision: Union[str, None] = "384be6596b36"
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def upgrade() -> None:
op.add_column(
"workflow_runs",
sa.Column(
"extra",
sa.JSON(),
server_default=sa.text("'{}'::json"),
nullable=False,
),
)
def downgrade() -> None:
op.drop_column("workflow_runs", "extra")

View file

@ -10,6 +10,7 @@ from api.db.filters import apply_workflow_run_filters, get_workflow_run_order_cl
from api.db.models import CampaignModel, QueuedRunModel, WorkflowRunModel
from api.schemas.workflow import WorkflowRunResponseSchema
from api.services.workflow.run_usage_response import format_public_cost_info
from api.utils.recording_artifacts import get_recording_storage_key
class CampaignClient(BaseDBClient):
@ -45,9 +46,11 @@ class CampaignClient(BaseDBClient):
source_id=source_id,
created_by=user_id,
organization_id=organization_id,
retry_config=retry_config
if retry_config
else CampaignModel.retry_config.default.arg,
retry_config=(
retry_config
if retry_config
else CampaignModel.retry_config.default.arg
),
orchestrator_metadata=orchestrator_metadata,
telephony_configuration_id=telephony_configuration_id,
)
@ -216,6 +219,12 @@ class CampaignClient(BaseDBClient):
"is_completed": run.is_completed,
"recording_url": run.recording_url,
"transcript_url": run.transcript_url,
"user_recording_url": get_recording_storage_key(
run.extra, "user"
),
"bot_recording_url": get_recording_storage_key(
run.extra, "bot"
),
"cost_info": format_public_cost_info(
run.cost_info, run.usage_info
),
@ -270,9 +279,11 @@ class CampaignClient(BaseDBClient):
source_id=parent_campaign.source_id,
created_by=parent_campaign.created_by,
organization_id=parent_campaign.organization_id,
retry_config=retry_config
if retry_config
else CampaignModel.retry_config.default.arg,
retry_config=(
retry_config
if retry_config
else CampaignModel.retry_config.default.arg
),
orchestrator_metadata=child_meta,
rate_limit_per_second=parent_campaign.rate_limit_per_second,
total_rows=len(queued_runs_data),
@ -338,8 +349,7 @@ class CampaignClient(BaseDBClient):
# Retries create new queued_runs with suffixed source_uuids linked via
# parent_queued_run_id, so group by the ROOT queued_run using a
# recursive walk and pick the latest workflow_run across the tree.
sql = text(
f"""
sql = text(f"""
WITH RECURSIVE run_tree AS (
SELECT id AS root_id, id AS run_id
FROM queued_runs
@ -366,8 +376,7 @@ class CampaignClient(BaseDBClient):
JOIN latest_run_per_root lr ON lr.root_id = q0.id
WHERE q0.campaign_id = :cid
AND ({tag_filter})
"""
)
""")
async with self.async_session() as session:
result = await session.execute(sql, {"cid": campaign_id})

View file

@ -544,6 +544,9 @@ class WorkflowRunModel(Base):
is_completed = Column(Boolean, default=False)
recording_url = Column(String, nullable=True)
transcript_url = Column(String, nullable=True)
extra = Column(
JSON, nullable=False, default=dict, server_default=text("'{}'::json")
)
# Store storage backend as string enum (s3, minio)
storage_backend = Column(
Enum("s3", "minio", name="storage_backend"),

View file

@ -20,6 +20,7 @@ from api.db.models import (
)
from api.enums import OrganizationConfigurationKey, UserConfigurationKey
from api.schemas.ai_model_configuration import EffectiveAIModelConfiguration
from api.utils.recording_artifacts import get_recording_storage_key
class OrganizationUsageClient(BaseDBClient):
@ -226,6 +227,9 @@ class OrganizationUsageClient(BaseDBClient):
"call_duration_seconds": int(round(call_duration)),
"recording_url": run.recording_url,
"transcript_url": run.transcript_url,
"user_recording_url": get_recording_storage_key(run.extra, "user"),
"bot_recording_url": get_recording_storage_key(run.extra, "bot"),
"extra": run.extra,
"public_access_token": run.public_access_token,
"phone_number": phone_number,
"caller_number": caller_number,

View file

@ -17,6 +17,7 @@ from api.db.models import (
from api.enums import CallType, StorageBackend
from api.schemas.workflow import WorkflowRunResponseSchema
from api.services.workflow.run_usage_response import format_public_cost_info
from api.utils.recording_artifacts import get_recording_storage_key
class WorkflowRunClient(BaseDBClient):
@ -188,13 +189,19 @@ class WorkflowRunClient(BaseDBClient):
"workflow_name": run.workflow.name if run.workflow else None,
"user_id": run.workflow.user_id if run.workflow else None,
"organization_id": organization.id if organization else None,
"organization_name": organization.provider_id
if organization
else None,
"organization_name": (
organization.provider_id if organization else None
),
"mode": run.mode,
"is_completed": run.is_completed,
"recording_url": run.recording_url,
"transcript_url": run.transcript_url,
"user_recording_url": get_recording_storage_key(
run.extra, "user"
),
"bot_recording_url": get_recording_storage_key(
run.extra, "bot"
),
"usage_info": run.usage_info,
"cost_info": run.cost_info,
"initial_context": run.initial_context,
@ -313,6 +320,12 @@ class WorkflowRunClient(BaseDBClient):
"is_completed": run.is_completed,
"recording_url": run.recording_url,
"transcript_url": run.transcript_url,
"user_recording_url": get_recording_storage_key(
run.extra, "user"
),
"bot_recording_url": get_recording_storage_key(
run.extra, "bot"
),
"cost_info": format_public_cost_info(
run.cost_info, run.usage_info
),
@ -340,6 +353,7 @@ class WorkflowRunClient(BaseDBClient):
logs: dict | None = None,
state: str | None = None,
annotations: dict | None = None,
extra: dict | None = None,
) -> WorkflowRunModel:
async with self.async_session() as session:
# Use SELECT FOR UPDATE to lock the row during the update
@ -374,6 +388,8 @@ class WorkflowRunClient(BaseDBClient):
run.logs = {**run.logs, **logs}
if annotations:
run.annotations = {**run.annotations, **annotations}
if extra:
run.extra = {**run.extra, **extra}
if is_completed:
run.is_completed = is_completed
if state:

View file

@ -56,6 +56,7 @@ from api.services.configuration.masking import is_mask_of, mask_key, mask_user_c
from api.services.configuration.registry import (
DOGRAH_STT_LANGUAGES,
REGISTRY,
DograhTTSService,
ServiceProviders,
ServiceType,
)
@ -210,6 +211,13 @@ async def get_telephony_config_warnings(user: UserModel = Depends(get_user)):
# ---------------------------------------------------------------------------
def _dograh_allows_custom_voice() -> bool:
extra = DograhTTSService.model_fields["voice"].json_schema_extra
if isinstance(extra, dict):
return bool(extra.get("allow_custom_input", False))
return False
def _byok_provider_schemas(service_type: ServiceType) -> dict[str, dict]:
return {
provider: model_cls.model_json_schema()
@ -251,6 +259,7 @@ async def get_model_configuration_v2_defaults(
return {
"dograh": {
"voices": [DOGRAH_DEFAULT_VOICE],
"allow_custom_input": _dograh_allows_custom_voice(),
"speeds": list(DOGRAH_SPEED_OPTIONS),
"languages": DOGRAH_STT_LANGUAGES,
"defaults": {

View file

@ -14,6 +14,7 @@ from api.services.auth.depends import get_user, get_user_with_selected_organizat
from api.services.mps_service_key_client import mps_service_key_client
from api.services.reports import generate_usage_runs_report_csv
from api.utils.artifacts import artifact_url
from api.utils.recording_artifacts import has_recording_track
router = APIRouter(prefix="/organizations")
@ -99,8 +100,12 @@ class WorkflowRunUsageResponse(BaseModel):
call_duration_seconds: int
recording_url: Optional[str] = None
transcript_url: Optional[str] = None
user_recording_url: Optional[str] = None
bot_recording_url: Optional[str] = None
recording_public_url: Optional[str] = None
transcript_public_url: Optional[str] = None
user_recording_public_url: Optional[str] = None
bot_recording_public_url: Optional[str] = None
public_access_token: Optional[str] = None
phone_number: Optional[str] = Field(
default=None,
@ -308,14 +313,18 @@ async def get_billing_credits(
aggregation_key=entry.get("aggregation_key"),
usage_event_id=_optional_int(entry.get("usage_event_id")),
workflow_run_id=_optional_int(entry.get("workflow_run_id")),
workflow_id=workflow_ids_by_run_id.get(
_optional_int(entry.get("workflow_run_id"))
)
if entry.get("workflow_run_id") is not None
else None,
billable_quantity=float(entry["billable_quantity"])
if entry.get("billable_quantity") is not None
else None,
workflow_id=(
workflow_ids_by_run_id.get(
_optional_int(entry.get("workflow_run_id"))
)
if entry.get("workflow_run_id") is not None
else None
),
billable_quantity=(
float(entry["billable_quantity"])
if entry.get("billable_quantity") is not None
else None
),
quantity_unit=entry.get("quantity_unit"),
metadata=entry.get("metadata") or {},
created_at=str(entry["created_at"]),
@ -478,6 +487,17 @@ async def get_usage_history(
public_access_token, "transcript"
)
run["recording_public_url"] = artifact_url(public_access_token, "recording")
run["user_recording_public_url"] = (
artifact_url(public_access_token, "user_recording")
if has_recording_track(run.get("extra"), "user")
else None
)
run["bot_recording_public_url"] = (
artifact_url(public_access_token, "bot_recording")
if has_recording_track(run.get("extra"), "bot")
else None
)
run.pop("extra", None)
return {
"runs": runs,

View file

@ -6,14 +6,16 @@ post-call processing for runs that execute integrations, QA, or campaign
reporting.
"""
from typing import Literal
from fastapi import APIRouter, HTTPException, Query
from fastapi.responses import RedirectResponse
from loguru import logger
from api.db import db_client
from api.services.storage import get_storage_for_backend
from api.utils.recording_artifacts import (
get_recording_storage_backend,
get_recording_storage_key,
)
router = APIRouter(prefix="/public/download")
@ -21,7 +23,7 @@ router = APIRouter(prefix="/public/download")
@router.get("/workflow/{token}/{artifact_type}")
async def download_workflow_artifact(
token: str,
artifact_type: Literal["recording", "transcript"],
artifact_type: str,
inline: bool = Query(
default=False, description="Display inline in browser instead of download"
),
@ -36,13 +38,15 @@ async def download_workflow_artifact(
Args:
token: The public access token (UUID format)
artifact_type: Type of artifact - "recording" or "transcript"
artifact_type: Type of artifact - "recording", "transcript",
"user_recording", or "bot_recording"
inline: If true, sets Content-Disposition to inline for browser preview
Returns:
RedirectResponse to the signed URL (302 redirect)
Raises:
HTTPException 400: If artifact type is unsupported
HTTPException 404: If token is invalid or artifact not found
"""
# 1. Lookup workflow run by token
@ -52,10 +56,26 @@ async def download_workflow_artifact(
raise HTTPException(status_code=404, detail="Invalid or expired token")
# 2. Get file path based on artifact type
artifact_storage_backend = None
if artifact_type == "recording":
file_path = workflow_run.recording_url
else: # transcript
elif artifact_type == "transcript":
file_path = workflow_run.transcript_url
elif artifact_type == "user_recording":
file_path = get_recording_storage_key(workflow_run.extra, "user")
artifact_storage_backend = get_recording_storage_backend(
workflow_run.extra, "user"
)
elif artifact_type == "bot_recording":
file_path = get_recording_storage_key(workflow_run.extra, "bot")
artifact_storage_backend = get_recording_storage_backend(
workflow_run.extra, "bot"
)
else:
logger.warning(
f"Unsupported artifact type: type={artifact_type}, workflow_run_id={workflow_run.id}"
)
raise HTTPException(status_code=400, detail="Unsupported artifact type")
if not file_path:
logger.warning(
@ -68,7 +88,9 @@ async def download_workflow_artifact(
# 3. Get storage backend for this workflow run
try:
storage = get_storage_for_backend(workflow_run.storage_backend)
storage = get_storage_for_backend(
artifact_storage_backend or workflow_run.storage_backend
)
except ValueError as e:
logger.error(f"Invalid storage backend: {workflow_run.storage_backend}")
raise HTTPException(status_code=500, detail="Storage configuration error")

View file

@ -40,14 +40,22 @@ class PresignedUploadUrlResponse(BaseModel):
router = APIRouter(prefix="/s3", tags=["s3"])
ORG_SCOPED_STORAGE_PREFIXES = ("campaigns", "knowledge_base")
def _extract_org_id_from_key(key: str) -> Optional[int]:
"""Try to extract an organization ID from a storage key.
Matches keys of the form ``{prefix}/{org_id}/...`` where *org_id* is a
positive integer. Returns ``None`` when the pattern does not match.
Matches known org-scoped keys of the form ``{prefix}/{org_id}/...`` where
*org_id* is a positive integer. Returns ``None`` when the pattern does not
match.
"""
parts = key.split("/")
if len(parts) >= 3 and parts[1].isdigit():
if (
len(parts) >= 3
and parts[0] in ORG_SCOPED_STORAGE_PREFIXES
and parts[1].isdigit()
):
return int(parts[1])
return None
@ -58,15 +66,20 @@ def _extract_legacy_workflow_run_id(key: str) -> Optional[int]:
Supports:
- ``transcripts/{run_id}.txt``
- ``recordings/{run_id}.wav``
- ``recordings/{run_id}/user.wav``
- ``recordings/{run_id}/bot.wav``
Returns ``None`` when the key does not match a legacy pattern.
"""
if key.startswith("transcripts/") and key.endswith(".txt"):
run_id_str = key[len("transcripts/") : -4]
elif key.startswith("recordings/") and key.endswith(".wav"):
run_id_str = key[len("recordings/") : -4]
else:
return None
recording_match = re.fullmatch(
r"recordings/(\d+)(?:\.wav|/(?:user|bot)\.wav)", key
)
if not recording_match:
return None
run_id_str = recording_match.group(1)
return int(run_id_str) if run_id_str.isdigit() else None
@ -89,8 +102,13 @@ async def _validate_and_extract_workflow_run_id(
"""
if key.startswith("transcripts/") and key.endswith(".txt"):
run_id_str = key[len("transcripts/") : -4] # strip prefix & suffix
elif key.startswith("recordings/") and key.endswith(".wav"):
run_id_str = key[len("recordings/") : -4]
elif key.startswith("recordings/"):
run_id = _extract_legacy_workflow_run_id(key)
if run_id is None:
raise HTTPException(
status_code=400, detail="Invalid workflow_run_id in key"
)
return run_id
elif allow_special_paths and key.startswith("voicemail_detections/"):
return None # Skip validation for these paths
else:
@ -159,9 +177,9 @@ async def get_signed_url(
"""Return a short-lived signed URL for a file stored on S3 / MinIO.
Access Control:
* Keys that embed an organization ID (``{prefix}/{org_id}/...``) are
authorized by matching the org_id against the requesting user's
organization.
* Known org-scoped keys (for example ``campaigns/{org_id}/...`` and
``knowledge_base/{org_id}/...``) are authorized by matching the org_id
against the requesting user's organization.
* Legacy keys (``recordings/{run_id}.wav``, ``transcripts/{run_id}.txt``)
are authorized via the workflow run they belong to.
* Superusers can request any key.

View file

@ -19,7 +19,7 @@ import ipaddress
import os
from datetime import UTC, datetime
from enum import Enum
from typing import Dict, List, Optional
from typing import Dict, List, Optional, Set
from aiortc import RTCIceServer
from aiortc.sdp import candidate_from_sdp
@ -246,6 +246,74 @@ class SignalingManager:
def __init__(self):
self._connections: Dict[str, WebSocket] = {}
self._peer_connections: Dict[str, SmallWebRTCConnection] = {}
self._connection_peer_ids: Dict[str, Set[str]] = {}
self._peer_connection_owners: Dict[str, str] = {}
def _track_peer_connection(
self, connection_id: str, pc_id: str, pc: SmallWebRTCConnection
) -> None:
self._peer_connections[pc_id] = pc
self._peer_connection_owners[pc_id] = connection_id
self._connection_peer_ids.setdefault(connection_id, set()).add(pc_id)
def _forget_peer_connection(self, pc_id: str) -> Optional[str]:
connection_id = self._peer_connection_owners.pop(pc_id, None)
self._peer_connections.pop(pc_id, None)
if connection_id:
peer_ids = self._connection_peer_ids.get(connection_id)
if peer_ids is not None:
peer_ids.discard(pc_id)
if not peer_ids:
self._connection_peer_ids.pop(connection_id, None)
return connection_id
async def _send_json_if_connected(
self, websocket: WebSocket, message: dict
) -> bool:
if websocket.application_state != WebSocketState.CONNECTED:
return False
try:
await websocket.send_json(message)
return True
except Exception as e:
logger.debug(f"Failed to send signaling WebSocket message: {e}")
return False
async def _close_websocket_if_connected(
self, websocket: WebSocket, code: int = 1000, reason: str = ""
) -> None:
if websocket.application_state != WebSocketState.CONNECTED:
return
try:
await websocket.close(code=code, reason=reason)
except Exception as e:
logger.debug(f"Failed to close signaling WebSocket: {e}")
async def _notify_call_ended_and_close_websocket(
self,
websocket: WebSocket,
workflow_run_id: int,
pc_id: str,
reason: str,
) -> None:
await self._send_json_if_connected(
websocket,
{
"type": "call-ended",
"payload": {
"workflow_run_id": workflow_run_id,
"pc_id": pc_id,
"reason": reason,
},
},
)
await self._close_websocket_if_connected(
websocket, code=1000, reason="call ended"
)
async def handle_websocket(
self,
@ -257,35 +325,51 @@ class SignalingManager:
"""Handle WebSocket connection for signaling."""
await websocket.accept()
connection_id = f"{workflow_id}:{workflow_run_id}:{user.id}"
self._connections[connection_id] = websocket
connection_key = f"{connection_id}:{id(websocket)}"
self._connections[connection_key] = websocket
try:
while True:
message = await websocket.receive_json()
await self._handle_message(
websocket, message, workflow_id, workflow_run_id, user
websocket,
message,
workflow_id,
workflow_run_id,
user,
connection_key,
)
except WebSocketDisconnect:
logger.info(f"WebSocket disconnected for {connection_id}")
except Exception as e:
logger.error(f"WebSocket error for {connection_id}: {e}")
if websocket.application_state == WebSocketState.DISCONNECTED:
logger.info(f"WebSocket disconnected for {connection_id}")
else:
logger.error(f"WebSocket error for {connection_id}: {e}")
finally:
# Cleanup
self._connections.pop(connection_id, None)
self._connections.pop(connection_key, None)
peer_ids = list(self._connection_peer_ids.pop(connection_key, set()))
# Unregister WebSocket sender for real-time feedback
unregister_ws_sender(workflow_run_id)
# Clean up all peer connections for this workflow run
# Clean up peer connections owned by this WebSocket.
# Note: In a WebSocket-based signaling approach (vs HTTP PATCH),
# we maintain our own connection map instead of relying on
# SmallWebRTCRequestHandler's _pcs_map. This is suitable for
# multi-worker FastAPI deployments where state cannot be shared.
for pc_id in list(self._peer_connections.keys()):
for pc_id in peer_ids:
self._peer_connection_owners.pop(pc_id, None)
pc = self._peer_connections.pop(pc_id, None)
if pc:
await pc.disconnect()
logger.debug(f"Disconnected peer connection: {pc_id}")
try:
await pc.disconnect()
logger.debug(f"Disconnected peer connection: {pc_id}")
except Exception as e:
logger.debug(
f"Failed to disconnect peer connection {pc_id}: {e}"
)
async def _handle_message(
self,
@ -294,17 +378,20 @@ class SignalingManager:
workflow_id: int,
workflow_run_id: int,
user: UserModel,
connection_key: str,
):
"""Handle incoming WebSocket messages."""
msg_type = message.get("type")
payload = message.get("payload", {})
if msg_type == "offer":
await self._handle_offer(ws, payload, workflow_id, workflow_run_id, user)
await self._handle_offer(
ws, payload, workflow_id, workflow_run_id, user, connection_key
)
elif msg_type == "ice-candidate":
await self._handle_ice_candidate(ws, payload, workflow_run_id)
await self._handle_ice_candidate(payload, connection_key)
elif msg_type == "renegotiate":
await self._handle_renegotiation(ws, payload, workflow_id, workflow_run_id)
await self._handle_renegotiation(ws, payload, connection_key)
async def _handle_offer(
self,
@ -313,6 +400,7 @@ class SignalingManager:
workflow_id: int,
workflow_run_id: int,
user: UserModel,
connection_key: str,
):
"""Handle offer message and create answer with ICE trickling."""
pc_id = payload.get("pc_id")
@ -320,6 +408,15 @@ class SignalingManager:
type_ = payload.get("type")
call_context_vars = payload.get("call_context_vars", {})
if not pc_id or not sdp or not type_:
await ws.send_json(
{
"type": "error",
"payload": {"message": "Missing offer fields"},
}
)
return
# Set run context for logging and tracing. org_id must be set before
# pc.initialize() so that aiortc's internal tasks inherit it.
set_current_run_id(workflow_run_id)
@ -347,7 +444,16 @@ class SignalingManager:
)
return
if pc_id and pc_id in self._peer_connections:
if pc_id in self._peer_connections:
if self._peer_connection_owners.get(pc_id) != connection_key:
await ws.send_json(
{
"type": "error",
"payload": {"message": "Peer connection already owned"},
}
)
return
# Reuse existing connection
logger.info(f"Reusing existing connection for pc_id: {pc_id}")
pc = self._peer_connections[pc_id]
@ -379,7 +485,7 @@ class SignalingManager:
await pc.initialize(sdp=sdp, type=type_)
# Store peer connection using client's pc_id
self._peer_connections[pc_id] = pc
self._track_peer_connection(connection_key, pc_id, pc)
# Register WebSocket sender for real-time feedback
async def ws_sender(message: dict):
@ -392,7 +498,16 @@ class SignalingManager:
@pc.event_handler("closed")
async def handle_disconnected(webrtc_connection: SmallWebRTCConnection):
logger.info(f"PeerConnection closed: {webrtc_connection.pc_id}")
self._peer_connections.pop(webrtc_connection.pc_id, None)
owner_connection_id = self._forget_peer_connection(
webrtc_connection.pc_id
)
if owner_connection_id == connection_key:
await self._notify_call_ended_and_close_websocket(
ws,
workflow_run_id,
webrtc_connection.pc_id,
reason="peer_connection_closed",
)
# Start pipeline in background
asyncio.create_task(
@ -421,9 +536,7 @@ class SignalingManager:
}
)
async def _handle_ice_candidate(
self, ws: WebSocket, payload: dict, workflow_run_id: int
):
async def _handle_ice_candidate(self, payload: dict, connection_key: str):
"""Handle incoming ICE candidate from client.
Uses SmallWebRTC's native ICE trickling support via add_ice_candidate().
@ -442,6 +555,9 @@ class SignalingManager:
if not pc:
logger.warning(f"No peer connection found for pc_id: {pc_id}")
return
if self._peer_connection_owners.get(pc_id) != connection_key:
logger.warning(f"Ignoring ICE candidate for unowned pc_id: {pc_id}")
return
if candidate_data:
candidate_str = candidate_data.get("candidate", "")
@ -466,7 +582,7 @@ class SignalingManager:
logger.debug(f"End of ICE candidates for pc_id: {pc_id}")
async def _handle_renegotiation(
self, ws: WebSocket, payload: dict, workflow_id: int, workflow_run_id: int
self, ws: WebSocket, payload: dict, connection_key: str
):
"""Handle renegotiation request."""
pc_id = payload.get("pc_id")
@ -479,6 +595,11 @@ class SignalingManager:
{"type": "error", "payload": {"message": "Peer connection not found"}}
)
return
if self._peer_connection_owners.get(pc_id) != connection_key:
await ws.send_json(
{"type": "error", "payload": {"message": "Peer connection not found"}}
)
return
pc = self._peer_connections[pc_id]
await pc.renegotiate(sdp=sdp, type=type_, restart_pc=restart_pc)

View file

@ -60,6 +60,10 @@ from api.services.workflow.trigger_paths import (
)
from api.services.workflow.workflow_graph import WorkflowGraph
from api.utils.artifacts import artifact_url
from api.utils.recording_artifacts import (
get_recording_storage_key,
has_recording_track,
)
router = APIRouter(prefix="/workflow")
@ -1255,7 +1259,16 @@ async def get_workflow_run(
raise HTTPException(status_code=404, detail="Workflow run not found")
public_access_token = run.public_access_token
if (run.transcript_url or run.recording_url) and not public_access_token:
user_recording_url = get_recording_storage_key(run.extra, "user")
bot_recording_url = get_recording_storage_key(run.extra, "bot")
has_user_recording = has_recording_track(run.extra, "user")
has_bot_recording = has_recording_track(run.extra, "bot")
if (
run.transcript_url
or run.recording_url
or has_user_recording
or has_bot_recording
) and not public_access_token:
public_access_token = await db_client.ensure_public_access_token(run.id)
return {
@ -1266,8 +1279,20 @@ async def get_workflow_run(
"is_completed": run.is_completed,
"transcript_url": run.transcript_url,
"recording_url": run.recording_url,
"user_recording_url": user_recording_url,
"bot_recording_url": bot_recording_url,
"transcript_public_url": artifact_url(public_access_token, "transcript"),
"recording_public_url": artifact_url(public_access_token, "recording"),
"user_recording_public_url": (
artifact_url(public_access_token, "user_recording")
if has_user_recording
else None
),
"bot_recording_public_url": (
artifact_url(public_access_token, "bot_recording")
if has_bot_recording
else None
),
"public_access_token": public_access_token,
"cost_info": format_public_cost_info(run.cost_info, run.usage_info),
"usage_info": format_public_usage_info(run.usage_info),

View file

@ -15,8 +15,12 @@ class WorkflowRunResponseSchema(BaseModel):
is_completed: bool
transcript_url: str | None
recording_url: str | None
user_recording_url: str | None = None
bot_recording_url: str | None = None
transcript_public_url: str | None = None
recording_public_url: str | None = None
user_recording_public_url: str | None = None
bot_recording_public_url: str | None = None
public_access_token: str | None = None
cost_info: Dict[str, Any] | None
usage_info: Dict[str, Any] | None = None

View file

@ -49,6 +49,7 @@ class UserConfigurationValidator:
ServiceProviders.CAMB.value: self._check_camb_api_key,
ServiceProviders.AWS_BEDROCK.value: self._check_aws_bedrock_api_key,
ServiceProviders.SPEACHES.value: self._check_speaches_api_key,
ServiceProviders.HUGGINGFACE.value: self._check_huggingface_api_key,
ServiceProviders.GOOGLE_VERTEX.value: self._check_google_vertex_llm_api_key,
ServiceProviders.OPENAI_REALTIME.value: self._check_openai_api_key,
ServiceProviders.GROK_REALTIME.value: self._check_grok_realtime_api_key,
@ -60,6 +61,7 @@ class UserConfigurationValidator:
ServiceProviders.GLADIA.value: self._check_gladia_api_key,
ServiceProviders.RIME.value: self._check_rime_api_key,
ServiceProviders.MINIMAX.value: self._check_minimax_api_key,
ServiceProviders.SMALLEST.value: self._check_smallest_api_key,
}
async def validate(
@ -360,6 +362,14 @@ class UserConfigurationValidator:
raise ValueError("base_url is required for Speaches services")
return True
def _check_huggingface_api_key(self, model: str, api_key: str) -> bool:
if not api_key.startswith("hf_"):
raise ValueError(
"Invalid Hugging Face API token format. Use a token that starts with "
"'hf_' and has Inference Providers permission."
)
return True
def _check_google_vertex_realtime_api_key(self, model: str, service_config) -> bool:
if not getattr(service_config, "project_id", None):
raise ValueError("project_id is required for Google Vertex Realtime")
@ -389,6 +399,7 @@ class UserConfigurationValidator:
return True
def _check_minimax_api_key(self, model: str, api_key: str) -> bool:
# MiniMax doesn't publish a cheap key-validation endpoint; trust the key
# at save time and surface auth errors at first call (same as Rime/Sarvam).
return True
def _check_smallest_api_key(self, model: str, api_key: str) -> bool:
return True

View file

@ -68,6 +68,7 @@ class ServiceProviders(str, Enum):
CAMB = "camb"
AWS_BEDROCK = "aws_bedrock"
SPEACHES = "speaches"
HUGGINGFACE = "huggingface"
ASSEMBLYAI = "assemblyai"
GLADIA = "gladia"
RIME = "rime"
@ -79,6 +80,7 @@ class ServiceProviders(str, Enum):
GOOGLE_REALTIME = "google_realtime"
GOOGLE_VERTEX_REALTIME = "google_vertex_realtime"
AZURE_REALTIME = "azure_realtime"
SMALLEST = "smallest"
class BaseServiceConfiguration(BaseModel):
@ -94,6 +96,7 @@ class BaseServiceConfiguration(BaseModel):
ServiceProviders.DOGRAH,
ServiceProviders.AWS_BEDROCK,
ServiceProviders.SPEACHES,
ServiceProviders.HUGGINGFACE,
ServiceProviders.ASSEMBLYAI,
ServiceProviders.GLADIA,
ServiceProviders.RIME,
@ -106,6 +109,7 @@ class BaseServiceConfiguration(BaseModel):
ServiceProviders.GOOGLE_VERTEX_REALTIME,
ServiceProviders.AZURE_REALTIME,
ServiceProviders.SARVAM,
ServiceProviders.SMALLEST,
]
api_key: str | list[str]
@ -255,6 +259,11 @@ SPEACHES_PROVIDER_MODEL_CONFIG = provider_model_config(
),
provider_docs_url="https://github.com/speaches-ai/speaches",
)
HUGGINGFACE_PROVIDER_MODEL_CONFIG = provider_model_config(
"Hugging Face",
description="Hosted Hugging Face Inference Providers API for usage-based inference.",
provider_docs_url="https://huggingface.co/docs/inference-providers/en/index",
)
AZURE_SPEECH_PROVIDER_MODEL_CONFIG = provider_model_config(
"Azure Speech Services",
description="Azure Cognitive Services Speech — TTS and STT via the Azure Speech SDK.",
@ -471,6 +480,35 @@ class SpeachesLLMConfiguration(BaseLLMConfiguration):
)
HUGGINGFACE_LLM_MODELS = [
"openai/gpt-oss-120b:cerebras",
"deepseek-ai/DeepSeek-R1:fastest",
"Qwen/Qwen3-Coder-480B-A35B-Instruct:fastest",
]
@register_llm
class HuggingFaceLLMConfiguration(BaseLLMConfiguration):
model_config = HUGGINGFACE_PROVIDER_MODEL_CONFIG
provider: Literal[ServiceProviders.HUGGINGFACE] = ServiceProviders.HUGGINGFACE
model: str = Field(
default="openai/gpt-oss-120b:cerebras",
description="Hugging Face chat-completion model identifier, optionally with provider suffix.",
json_schema_extra={
"examples": HUGGINGFACE_LLM_MODELS,
"allow_custom_input": True,
},
)
base_url: str = Field(
default="https://router.huggingface.co/v1",
description="Hugging Face OpenAI-compatible chat-completions router base URL.",
)
bill_to: str | None = Field(
default=None,
description="Optional Hugging Face organization or user to bill using X-HF-Bill-To.",
)
MINIMAX_MODELS = [
"MiniMax-M2.7",
"MiniMax-M2.7-highspeed",
@ -741,6 +779,7 @@ LLMConfig = Annotated[
DograhLLMService,
AWSBedrockLLMConfiguration,
SpeachesLLMConfiguration,
HuggingFaceLLMConfiguration,
MiniMaxLLMConfiguration,
SarvamLLMConfiguration,
],
@ -907,6 +946,7 @@ class DograhTTSService(BaseTTSConfiguration):
voice: str = Field(
default="default",
description="Voice preset.",
json_schema_extra={"allow_custom_input": True},
)
speed: float = Field(default=1.0, ge=0.5, le=2.0, description="Speed of the voice.")
@ -961,6 +1001,12 @@ class SarvamTTSConfiguration(BaseTTSConfiguration):
description="BCP-47 Indian-language code (e.g. hi-IN, en-IN).",
json_schema_extra={"examples": SARVAM_LANGUAGES},
)
speed: float = Field(
default=1.0,
ge=0.5,
le=2.0,
description="Speech speed multiplier.",
)
CAMB_TTS_MODELS = ["mars-flash", "mars-pro", "mars-instruct"]
@ -1120,6 +1166,80 @@ class AzureSpeechTTSConfiguration(BaseTTSConfiguration):
)
SMALLEST_PROVIDER_MODEL_CONFIG = provider_model_config(
"Smallest AI",
description="Smallest AI ultralow-latency TTS (Waves) and STT (Pulse) APIs.",
provider_docs_url="https://smallest.ai/docs",
)
SMALLEST_TTS_MODELS = ["lightning_v3.1", "lightning_v3.1_pro"]
SMALLEST_TTS_VOICES = [
"sophia",
"avery",
"liam",
"lucas",
"olivia",
"ryan",
"freya",
"william",
"devansh",
"arjun",
"niharika",
"maya",
"dhruv",
"mia",
"maithili",
]
SMALLEST_TTS_LANGUAGES = [
"en",
"hi",
"fr",
"de",
"es",
"it",
"nl",
"pl",
"ru",
"ar",
"bn",
"gu",
"he",
"kn",
"mr",
"ta",
]
@register_tts
class SmallestAITTSConfiguration(BaseTTSConfiguration):
model_config = SMALLEST_PROVIDER_MODEL_CONFIG
provider: Literal[ServiceProviders.SMALLEST] = ServiceProviders.SMALLEST
model: str = Field(
default="lightning_v3.1",
description="Smallest AI TTS model. lightning_v3.1_pro is the premium pool (American, British, Indian accents); lightning_v3.1 is the standard pool with 217 voices across 12 languages.",
json_schema_extra={"examples": SMALLEST_TTS_MODELS},
)
voice: str = Field(
default="sophia",
description="Smallest AI voice ID.",
json_schema_extra={"examples": SMALLEST_TTS_VOICES, "allow_custom_input": True},
)
language: str = Field(
default="en",
description="ISO 639-1 language code for synthesis.",
json_schema_extra={
"examples": SMALLEST_TTS_LANGUAGES,
"allow_custom_input": True,
},
)
speed: float = Field(
default=1.0,
ge=0.5,
le=2.0,
description="Speech speed multiplier (0.5 to 2.0).",
)
TTSConfig = Annotated[
Union[
DeepgramTTSConfiguration,
@ -1134,6 +1254,7 @@ TTSConfig = Annotated[
SpeachesTTSConfiguration,
MiniMaxTTSConfiguration,
AzureSpeechTTSConfiguration,
SmallestAITTSConfiguration,
],
Field(discriminator="provider"),
]
@ -1334,6 +1455,38 @@ class SpeachesSTTConfiguration(BaseSTTConfiguration):
)
HUGGINGFACE_STT_MODELS = [
"openai/whisper-large-v3-turbo",
"openai/whisper-large-v3",
]
@register_stt
class HuggingFaceSTTConfiguration(BaseSTTConfiguration):
model_config = HUGGINGFACE_PROVIDER_MODEL_CONFIG
provider: Literal[ServiceProviders.HUGGINGFACE] = ServiceProviders.HUGGINGFACE
model: str = Field(
default="openai/whisper-large-v3-turbo",
description="Hugging Face ASR model identifier served through Inference Providers.",
json_schema_extra={
"examples": HUGGINGFACE_STT_MODELS,
"allow_custom_input": True,
},
)
base_url: str = Field(
default="https://router.huggingface.co/hf-inference",
description="Hugging Face Inference Providers router base URL.",
)
bill_to: str | None = Field(
default=None,
description="Optional Hugging Face organization or user to bill using X-HF-Bill-To.",
)
return_timestamps: bool = Field(
default=False,
description="Request timestamp chunks when supported by the selected provider/model.",
)
ASSEMBLYAI_STT_MODELS = ["u3-rt-pro"]
ASSEMBLYAI_STT_LANGUAGES = ["en", "es", "de", "fr", "pt", "it"]
@ -1396,6 +1549,62 @@ class AzureSpeechSTTConfiguration(BaseSTTConfiguration):
)
SMALLEST_STT_MODELS = ["pulse"]
SMALLEST_STT_LANGUAGES = [
"en",
"hi",
"fr",
"de",
"es",
"it",
"nl",
"pl",
"ru",
"pt",
"bn",
"gu",
"kn",
"ml",
"mr",
"ta",
"te",
"pa",
"or",
"bg",
"cs",
"da",
"et",
"fi",
"hu",
"lt",
"lv",
"mt",
"ro",
"sk",
"sv",
"uk",
]
@register_stt
class SmallestAISTTConfiguration(BaseSTTConfiguration):
model_config = SMALLEST_PROVIDER_MODEL_CONFIG
provider: Literal[ServiceProviders.SMALLEST] = ServiceProviders.SMALLEST
model: str = Field(
default="pulse",
description="Smallest AI STT model. Supports 38 languages with real-time streaming.",
json_schema_extra={"examples": SMALLEST_STT_MODELS},
)
language: str = Field(
default="en",
description="ISO 639-1 language code for transcription.",
json_schema_extra={
"examples": SMALLEST_STT_LANGUAGES,
"allow_custom_input": True,
},
)
STTConfig = Annotated[
Union[
DeepgramSTTConfiguration,
@ -1406,9 +1615,11 @@ STTConfig = Annotated[
SpeechmaticsSTTConfiguration,
SarvamSTTConfiguration,
SpeachesSTTConfiguration,
HuggingFaceSTTConfiguration,
AssemblyAISTTConfiguration,
GladiaSTTConfiguration,
AzureSpeechSTTConfiguration,
SmallestAISTTConfiguration,
],
Field(discriminator="provider"),
]

View file

@ -9,8 +9,8 @@ from api.services.integrations import IntegrationRuntimeSession
from api.services.pipecat.audio_config import AudioConfig
from api.services.pipecat.audio_playback import play_audio_loop
from api.services.pipecat.in_memory_buffers import (
InMemoryAudioBuffer,
InMemoryLogsBuffer,
InMemoryRecordingBuffers,
)
from api.services.pipecat.pipeline_metrics_aggregator import PipelineMetricsAggregator
from api.services.pipecat.tracing_config import get_trace_url
@ -40,11 +40,11 @@ async def _capture_call_event(
"workflow_run_id": workflow_run_id,
"workflow_id": workflow_run.workflow_id if workflow_run else None,
"call_type": workflow_run.mode if workflow_run else None,
"call_direction": (workflow_run.initial_context or {}).get(
"direction", "outbound"
)
if workflow_run
else None,
"call_direction": (
(workflow_run.initial_context or {}).get("direction", "outbound")
if workflow_run
else None
),
}
if extra_properties:
properties.update(extra_properties)
@ -73,7 +73,7 @@ def register_event_handlers(
"""Register all event handlers for transport and task events.
Returns:
in_memory_audio_buffer for use by other handlers.
In-memory recording buffers for use by other handlers.
"""
# Initialize in-memory buffers with proper audio configuration
sample_rate = audio_config.pipeline_sample_rate if audio_config else 16000
@ -84,7 +84,7 @@ def register_event_handlers(
f"with sample_rate={sample_rate}Hz, channels={num_channels}"
)
in_memory_audio_buffer = InMemoryAudioBuffer(
in_memory_audio_buffers = InMemoryRecordingBuffers(
workflow_run_id=workflow_run_id,
sample_rate=sample_rate,
num_channels=num_channels,
@ -363,14 +363,32 @@ def register_event_handlers(
# Write buffers to temp files and enqueue combined processing task
audio_temp_path = None
user_audio_temp_path = None
bot_audio_temp_path = None
transcript_temp_path = None
try:
if not in_memory_audio_buffer.is_empty:
audio_temp_path = await in_memory_audio_buffer.write_to_temp_file()
if not in_memory_audio_buffers.mixed.is_empty:
audio_temp_path = (
await in_memory_audio_buffers.mixed.write_to_temp_file()
)
else:
logger.debug("Audio buffer is empty, skipping upload")
if not in_memory_audio_buffers.user.is_empty:
user_audio_temp_path = (
await in_memory_audio_buffers.user.write_to_temp_file()
)
else:
logger.debug("User audio buffer is empty, skipping upload")
if not in_memory_audio_buffers.bot.is_empty:
bot_audio_temp_path = (
await in_memory_audio_buffers.bot.write_to_temp_file()
)
else:
logger.debug("Bot audio buffer is empty, skipping upload")
transcript_temp_path = in_memory_logs_buffer.write_transcript_to_temp_file()
if not transcript_temp_path:
logger.debug("No transcript events in logs buffer, skipping upload")
@ -385,16 +403,18 @@ def register_event_handlers(
workflow_run_id,
audio_temp_path,
transcript_temp_path,
user_audio_temp_path,
bot_audio_temp_path,
)
# Return the buffer so it can be passed to other handlers
return in_memory_audio_buffer
return in_memory_audio_buffers
def register_audio_data_handler(
audio_buffer: AudioBufferProcessor,
workflow_run_id,
in_memory_buffer: InMemoryAudioBuffer,
in_memory_buffers: InMemoryRecordingBuffers,
):
"""Register event handler for audio data"""
logger.info(f"Registering audio data handler for workflow run {workflow_run_id}")
@ -404,9 +424,19 @@ def register_audio_data_handler(
if not audio:
return
# Use in-memory buffer
try:
await in_memory_buffer.append(audio)
await in_memory_buffers.mixed.append(audio)
except MemoryError as e:
logger.error(f"Memory buffer full: {e}")
# Could implement overflow to disk here if needed
logger.error(f"Mixed audio buffer full: {e}")
@audio_buffer.event_handler("on_track_audio_data")
async def on_track_audio_data(
buffer, user_audio, bot_audio, sample_rate, num_channels
):
try:
if user_audio:
await in_memory_buffers.user.append(user_audio)
if bot_audio:
await in_memory_buffers.bot.append(bot_audio)
except MemoryError as e:
logger.error(f"Track audio buffer full: {e}")

View file

@ -75,6 +75,27 @@ class InMemoryAudioBuffer:
return self._total_size
class InMemoryRecordingBuffers:
"""Holds the mixed recording plus aligned user and bot mono tracks."""
def __init__(self, workflow_run_id: int, sample_rate: int, num_channels: int = 1):
self.mixed = InMemoryAudioBuffer(
workflow_run_id=workflow_run_id,
sample_rate=sample_rate,
num_channels=num_channels,
)
self.user = InMemoryAudioBuffer(
workflow_run_id=workflow_run_id,
sample_rate=sample_rate,
num_channels=1,
)
self.bot = InMemoryAudioBuffer(
workflow_run_id=workflow_run_id,
sample_rate=sample_rate,
num_channels=1,
)
class InMemoryLogsBuffer:
"""Buffer real-time feedback events in memory during a call, then save to workflow run logs."""

View file

@ -39,8 +39,17 @@ from pipecat.services.google.vertex.llm import (
GoogleVertexLLMSettings,
)
from pipecat.services.groq.llm import GroqLLMService, GroqLLMSettings
from pipecat.services.huggingface.llm import (
HuggingFaceLLMService,
HuggingFaceLLMSettings,
)
from pipecat.services.huggingface.stt import (
HuggingFaceSTTService,
HuggingFaceSTTSettings,
)
from pipecat.services.minimax.llm import MiniMaxLLMService
from pipecat.services.minimax.tts import MiniMaxTTSSettings
from pipecat.services.openai._constants import OPENAI_SAMPLE_RATE
from pipecat.services.openai.base_llm import OpenAILLMSettings
from pipecat.services.openai.llm import OpenAILLMService
from pipecat.services.openai.stt import (
@ -53,6 +62,8 @@ from pipecat.services.rime.tts import RimeTTSService, RimeTTSSettings
from pipecat.services.sarvam.llm import SarvamLLMService, SarvamLLMSettings
from pipecat.services.sarvam.stt import SarvamSTTService, SarvamSTTSettings
from pipecat.services.sarvam.tts import SarvamTTSService, SarvamTTSSettings
from pipecat.services.smallest.stt import SmallestSTTService, SmallestSTTSettings
from pipecat.services.smallest.tts import SmallestTTSService, SmallestTTSSettings
from pipecat.services.speaches.llm import SpeachesLLMService, SpeachesLLMSettings
from pipecat.services.speaches.stt import SpeachesSTTService, SpeachesSTTSettings
from pipecat.services.speaches.tts import SpeachesTTSService, SpeachesTTSSettings
@ -218,6 +229,22 @@ def create_stt_service(
),
sample_rate=audio_config.transport_in_sample_rate,
)
elif user_config.stt.provider == ServiceProviders.HUGGINGFACE.value:
base_url = (
getattr(user_config.stt, "base_url", None)
or "https://router.huggingface.co/hf-inference"
)
_validate_runtime_service_url(base_url, "base_url")
return HuggingFaceSTTService(
api_key=user_config.stt.api_key,
base_url=base_url,
bill_to=getattr(user_config.stt, "bill_to", None),
settings=HuggingFaceSTTSettings(
model=user_config.stt.model,
return_timestamps=getattr(user_config.stt, "return_timestamps", False),
),
sample_rate=audio_config.transport_in_sample_rate,
)
elif user_config.stt.provider == ServiceProviders.ASSEMBLYAI.value:
language = getattr(user_config.stt, "language", None)
settings_kwargs = {"model": user_config.stt.model, "language": language}
@ -284,6 +311,20 @@ def create_stt_service(
settings=AzureSTTSettings(language=pipecat_language),
sample_rate=audio_config.transport_in_sample_rate,
)
elif user_config.stt.provider == ServiceProviders.SMALLEST.value:
language_code = getattr(user_config.stt, "language", None) or "en"
try:
pipecat_language = Language(language_code)
except ValueError:
pipecat_language = Language.EN
return SmallestSTTService(
api_key=user_config.stt.api_key,
settings=SmallestSTTSettings(
model=user_config.stt.model,
language=pipecat_language,
),
sample_rate=audio_config.transport_in_sample_rate,
)
else:
raise HTTPException(
status_code=400, detail=f"Invalid STT provider {user_config.stt.provider}"
@ -320,6 +361,7 @@ def create_tts_service(
kwargs["base_url"] = base_url
return OpenAITTSService(
api_key=user_config.tts.api_key,
sample_rate=OPENAI_SAMPLE_RATE,
settings=OpenAITTSSettings(model=user_config.tts.model),
text_filters=[xml_function_tag_filter],
skip_aggregator_types=["recording_router", "recording"],
@ -493,13 +535,17 @@ def create_tts_service(
pipecat_language = language_mapping.get(language, Language.HI)
voice = getattr(user_config.tts, "voice", None) or "anushka"
speed = getattr(user_config.tts, "speed", None)
settings_kwargs = {
"model": user_config.tts.model,
"voice": voice,
"language": pipecat_language,
}
if speed and speed != 1.0:
settings_kwargs["pace"] = speed
return SarvamTTSService(
api_key=user_config.tts.api_key,
settings=SarvamTTSSettings(
model=user_config.tts.model,
voice=voice,
language=pipecat_language,
),
settings=SarvamTTSSettings(**settings_kwargs),
text_filters=[xml_function_tag_filter],
skip_aggregator_types=["recording_router", "recording"],
silence_time_s=1.0,
@ -560,6 +606,28 @@ def create_tts_service(
skip_aggregator_types=["recording_router", "recording"],
silence_time_s=1.0,
)
elif user_config.tts.provider == ServiceProviders.SMALLEST.value:
language_code = getattr(user_config.tts, "language", None) or "en"
try:
pipecat_language = Language(language_code)
except ValueError:
pipecat_language = Language.EN
speed = getattr(user_config.tts, "speed", None)
model = user_config.tts.model.replace("lightning-v", "lightning_v")
settings_kwargs = SmallestTTSSettings(
model=model,
voice=user_config.tts.voice,
language=pipecat_language,
)
if speed and speed != 1.0:
settings_kwargs.speed = speed
return SmallestTTSService(
api_key=user_config.tts.api_key,
settings=settings_kwargs,
text_filters=[xml_function_tag_filter],
skip_aggregator_types=["recording_router", "recording"],
silence_time_s=1.0,
)
else:
raise HTTPException(
status_code=400, detail=f"Invalid TTS provider {user_config.tts.provider}"
@ -581,6 +649,7 @@ def create_llm_service_from_provider(
location: str | None = None,
credentials: str | None = None,
temperature: float | None = None,
bill_to: str | None = None,
):
"""Create an LLM service from explicit provider/model/api_key.
@ -663,6 +732,15 @@ def create_llm_service_from_provider(
api_key=api_key or "none",
settings=SpeachesLLMSettings(model=model),
)
elif provider == ServiceProviders.HUGGINGFACE.value:
base_url = base_url or "https://router.huggingface.co/v1"
_validate_runtime_service_url(base_url, "base_url")
return HuggingFaceLLMService(
api_key=api_key,
base_url=base_url,
bill_to=bill_to,
settings=HuggingFaceLLMSettings(model=model, temperature=0.1),
)
elif provider == ServiceProviders.MINIMAX.value:
base_url = base_url or "https://api.minimax.io/v1"
_validate_runtime_service_url(base_url, "base_url")
@ -875,6 +953,9 @@ def create_llm_service(user_config, correlation_id: str | None = None):
kwargs["endpoint"] = user_config.llm.endpoint
elif provider == ServiceProviders.SPEACHES.value:
kwargs["base_url"] = user_config.llm.base_url
elif provider == ServiceProviders.HUGGINGFACE.value:
kwargs["base_url"] = user_config.llm.base_url
kwargs["bill_to"] = user_config.llm.bill_to
elif provider == ServiceProviders.AWS_BEDROCK.value:
kwargs["aws_access_key"] = user_config.llm.aws_access_key
kwargs["aws_secret_key"] = user_config.llm.aws_secret_key

View file

@ -718,6 +718,8 @@ class TriggerNodeData(BaseNodeData):
"rsvp": "{{gathered_context.rsvp}}",
"duration": "{{cost_info.call_duration_seconds}}",
"recording_url": "{{recording_url}}",
"user_recording_url": "{{user_recording_url}}",
"bot_recording_url": "{{bot_recording_url}}",
"transcript_url": "{{transcript_url}}",
},
},

View file

@ -27,6 +27,7 @@ from api.services.workflow.dto import (
)
from api.services.workflow.qa import run_per_node_qa_analysis
from api.utils.credential_auth import build_auth_header
from api.utils.recording_artifacts import get_recording_storage_key
from api.utils.template_renderer import render_template
@ -339,6 +340,10 @@ def _build_render_context(
Returns:
Dict containing all fields available for template rendering
"""
extra = workflow_run.extra or {}
user_recording_key = get_recording_storage_key(extra, "user")
bot_recording_key = get_recording_storage_key(extra, "bot")
context = {
# Top-level fields
"workflow_run_id": workflow_run.id,
@ -353,6 +358,7 @@ def _build_render_context(
"cost_info": workflow_run.usage_info or {},
# Annotations (includes QA results)
"annotations": workflow_run.annotations or {},
"extra": extra,
}
# Add public download URLs if token is available
@ -366,9 +372,17 @@ def _build_render_context(
context["transcript_url"] = (
f"{base_url}/transcript" if workflow_run.transcript_url else None
)
context["user_recording_url"] = (
f"{base_url}/user_recording" if user_recording_key else None
)
context["bot_recording_url"] = (
f"{base_url}/bot_recording" if bot_recording_key else None
)
else:
context["recording_url"] = workflow_run.recording_url
context["transcript_url"] = workflow_run.transcript_url
context["user_recording_url"] = user_recording_key
context["bot_recording_url"] = bot_recording_key
return context

View file

@ -12,11 +12,51 @@ from api.services.workflow_run_billing import (
from api.tasks.run_integrations import run_integrations_post_workflow_run
def _recording_metadata(storage_key: str, storage_backend: str, track: str) -> dict:
return {
"storage_key": storage_key,
"storage_backend": storage_backend,
"format": "wav",
"track": track,
}
async def _upload_temp_file(
workflow_run_id: int,
temp_file_path: str,
storage_key: str,
label: str,
) -> bool:
try:
if not os.path.exists(temp_file_path):
logger.warning(f"{label} temp file not found: {temp_file_path}")
return False
file_size = os.path.getsize(temp_file_path)
logger.debug(f"{label} file size: {file_size} bytes")
await storage_fs.aupload_file(temp_file_path, storage_key)
logger.info(f"Successfully uploaded {label}: {storage_key}")
return True
except Exception as e:
logger.error(f"Error uploading {label} for workflow {workflow_run_id}: {e}")
return False
finally:
if os.path.exists(temp_file_path):
try:
os.remove(temp_file_path)
logger.debug(f"Cleaned up temp {label} file: {temp_file_path}")
except Exception as e:
logger.warning(f"Failed to clean up temp {label} file: {e}")
async def process_workflow_completion(
_ctx,
workflow_run_id: int,
audio_temp_path: Optional[str] = None,
transcript_temp_path: Optional[str] = None,
user_audio_temp_path: Optional[str] = None,
bot_audio_temp_path: Optional[str] = None,
):
"""Process workflow completion: upload artifacts and run integrations.
@ -28,6 +68,8 @@ async def process_workflow_completion(
workflow_run_id: The workflow run ID
audio_temp_path: Optional path to temp audio file
transcript_temp_path: Optional path to temp transcript file
user_audio_temp_path: Optional path to temp user-track audio file
bot_audio_temp_path: Optional path to temp bot-track audio file
"""
run_id = str(workflow_run_id)
set_current_run_id(run_id)
@ -37,35 +79,55 @@ async def process_workflow_completion(
storage_backend = get_current_storage_backend()
# Step 1: Upload audio if provided
recordings_metadata: dict[str, dict] = {}
if audio_temp_path:
try:
if os.path.exists(audio_temp_path):
file_size = os.path.getsize(audio_temp_path)
logger.debug(f"Audio file size: {file_size} bytes")
recording_url = f"recordings/{workflow_run_id}.wav"
logger.info(
f"Uploading mixed audio to {storage_backend.name} - workflow_run_id: {workflow_run_id}"
)
if await _upload_temp_file(
workflow_run_id, audio_temp_path, recording_url, "mixed audio"
):
recordings_metadata["mixed"] = _recording_metadata(
recording_url, storage_backend.value, "mixed"
)
await db_client.update_workflow_run(
run_id=workflow_run_id,
recording_url=recording_url,
storage_backend=storage_backend.value,
)
recording_url = f"recordings/{workflow_run_id}.wav"
logger.info(
f"Uploading audio to {storage_backend.name} - workflow_run_id: {workflow_run_id}"
)
if user_audio_temp_path:
user_recording_url = f"recordings/{workflow_run_id}/user.wav"
logger.info(
f"Uploading user audio to {storage_backend.name} - workflow_run_id: {workflow_run_id}"
)
if await _upload_temp_file(
workflow_run_id, user_audio_temp_path, user_recording_url, "user audio"
):
recordings_metadata["user"] = _recording_metadata(
user_recording_url, storage_backend.value, "user"
)
await storage_fs.aupload_file(audio_temp_path, recording_url)
await db_client.update_workflow_run(
run_id=workflow_run_id,
recording_url=recording_url,
storage_backend=storage_backend.value,
)
logger.info(f"Successfully uploaded audio: {recording_url}")
else:
logger.warning(f"Audio temp file not found: {audio_temp_path}")
except Exception as e:
logger.error(f"Error uploading audio for workflow {workflow_run_id}: {e}")
finally:
if audio_temp_path and os.path.exists(audio_temp_path):
try:
os.remove(audio_temp_path)
logger.debug(f"Cleaned up temp audio file: {audio_temp_path}")
except Exception as e:
logger.warning(f"Failed to clean up temp audio file: {e}")
if bot_audio_temp_path:
bot_recording_url = f"recordings/{workflow_run_id}/bot.wav"
logger.info(
f"Uploading bot audio to {storage_backend.name} - workflow_run_id: {workflow_run_id}"
)
if await _upload_temp_file(
workflow_run_id, bot_audio_temp_path, bot_recording_url, "bot audio"
):
recordings_metadata["bot"] = _recording_metadata(
bot_recording_url, storage_backend.value, "bot"
)
if recordings_metadata:
await db_client.update_workflow_run(
run_id=workflow_run_id,
storage_backend=storage_backend.value,
extra={"recordings": recordings_metadata},
)
# Step 2: Upload transcript if provided
if transcript_temp_path:

View file

@ -0,0 +1,131 @@
from types import SimpleNamespace
from unittest.mock import patch
from api.services.configuration.check_validity import UserConfigurationValidator
from api.services.configuration.registry import (
REGISTRY,
HuggingFaceLLMConfiguration,
HuggingFaceSTTConfiguration,
ServiceProviders,
ServiceType,
)
from api.services.pipecat.service_factory import (
create_llm_service,
create_stt_service,
)
def test_huggingface_stt_configuration_defaults_and_registry():
config = HuggingFaceSTTConfiguration(api_key="hf_test")
assert config.provider == ServiceProviders.HUGGINGFACE
assert config.model == "openai/whisper-large-v3-turbo"
assert config.base_url == "https://router.huggingface.co/hf-inference"
assert config.return_timestamps is False
assert (
REGISTRY[ServiceType.STT][ServiceProviders.HUGGINGFACE]
is HuggingFaceSTTConfiguration
)
def test_huggingface_llm_configuration_defaults_and_registry():
config = HuggingFaceLLMConfiguration(api_key="hf_test")
assert config.provider == ServiceProviders.HUGGINGFACE
assert config.model == "openai/gpt-oss-120b:cerebras"
assert config.base_url == "https://router.huggingface.co/v1"
assert (
REGISTRY[ServiceType.LLM][ServiceProviders.HUGGINGFACE]
is HuggingFaceLLMConfiguration
)
def test_create_huggingface_llm_service_uses_openai_compatible_router():
user_config = SimpleNamespace(
llm=SimpleNamespace(
provider=ServiceProviders.HUGGINGFACE.value,
api_key="hf_test",
model="deepseek-ai/DeepSeek-R1:fastest",
base_url="https://router.huggingface.co/v1",
bill_to="demo-org",
)
)
with patch(
"api.services.pipecat.service_factory.HuggingFaceLLMService"
) as mock_service:
create_llm_service(user_config)
assert mock_service.call_count == 1
kwargs = mock_service.call_args.kwargs
assert kwargs["api_key"] == "hf_test"
assert kwargs["base_url"] == "https://router.huggingface.co/v1"
assert kwargs["bill_to"] == "demo-org"
assert kwargs["settings"].model == "deepseek-ai/DeepSeek-R1:fastest"
assert kwargs["settings"].temperature == 0.1
def test_create_huggingface_stt_service_uses_hosted_defaults():
user_config = SimpleNamespace(
stt=SimpleNamespace(
provider=ServiceProviders.HUGGINGFACE.value,
api_key="hf_test",
model="openai/whisper-large-v3-turbo",
base_url="https://router.huggingface.co/hf-inference",
bill_to="demo-org",
return_timestamps=True,
)
)
audio_config = SimpleNamespace(transport_in_sample_rate=16000)
with patch(
"api.services.pipecat.service_factory.HuggingFaceSTTService"
) as mock_service:
create_stt_service(user_config, audio_config)
assert mock_service.call_count == 1
kwargs = mock_service.call_args.kwargs
assert kwargs["api_key"] == "hf_test"
assert kwargs["base_url"] == "https://router.huggingface.co/hf-inference"
assert kwargs["bill_to"] == "demo-org"
assert kwargs["sample_rate"] == 16000
assert kwargs["settings"].model == "openai/whisper-large-v3-turbo"
assert kwargs["settings"].return_timestamps is True
def test_validator_accepts_huggingface_stt_token_format():
validator = UserConfigurationValidator()
assert (
validator._validate_service(
HuggingFaceSTTConfiguration(api_key="hf_test"),
"stt",
)
== []
)
assert (
validator._validate_service(
HuggingFaceLLMConfiguration(api_key="hf_test"),
"llm",
)
== []
)
def test_validator_rejects_non_huggingface_token_format():
validator = UserConfigurationValidator()
errors = validator._validate_service(
HuggingFaceSTTConfiguration(api_key="not-hf-token"),
"stt",
)
assert errors == [
{
"model": "stt",
"message": (
"Invalid Hugging Face API token format. Use a token that starts with "
"'hf_' and has Inference Providers permission."
),
}
]

View file

@ -0,0 +1,31 @@
from types import SimpleNamespace
from unittest.mock import patch
from pipecat.services.openai._constants import OPENAI_SAMPLE_RATE
from api.services.configuration.registry import ServiceProviders
from api.services.pipecat.service_factory import create_tts_service
def test_create_openai_tts_service_uses_openai_pcm_sample_rate():
user_config = SimpleNamespace(
tts=SimpleNamespace(
provider=ServiceProviders.OPENAI.value,
api_key="test-key",
model="gpt-4o-mini-tts",
voice="alloy",
base_url=None,
)
)
audio_config = SimpleNamespace(
transport_out_sample_rate=16000,
transport_in_sample_rate=16000,
)
with patch("api.services.pipecat.service_factory.OpenAITTSService") as mock_service:
create_tts_service(user_config, audio_config)
assert mock_service.call_count == 1
kwargs = mock_service.call_args.kwargs
assert kwargs["sample_rate"] == OPENAI_SAMPLE_RATE
assert kwargs["settings"].model == "gpt-4o-mini-tts"

View file

@ -0,0 +1,30 @@
from api.routes.s3_signed_url import (
_extract_legacy_workflow_run_id,
_extract_org_id_from_key,
)
def test_split_recording_keys_are_workflow_run_artifacts_not_org_keys():
assert _extract_legacy_workflow_run_id("recordings/1855/user.wav") == 1855
assert _extract_legacy_workflow_run_id("recordings/1855/bot.wav") == 1855
assert _extract_org_id_from_key("recordings/1855/user.wav") is None
assert _extract_org_id_from_key("recordings/1855/bot.wav") is None
def test_legacy_recording_keys_do_not_fall_through_to_org_scoped_auth():
assert _extract_legacy_workflow_run_id("recordings/1855.wav") == 1855
assert _extract_legacy_workflow_run_id("recordings/1855/other.wav") is None
assert _extract_org_id_from_key("recordings/1855.wav") is None
assert _extract_org_id_from_key("recordings/1855/other.wav") is None
def test_known_org_scoped_keys_extract_org_id():
assert _extract_org_id_from_key("campaigns/42/source.csv") == 42
assert _extract_org_id_from_key("knowledge_base/42/document/file.pdf") == 42
assert _extract_legacy_workflow_run_id("campaigns/42/source.csv") is None
def test_unknown_numeric_prefix_is_not_treated_as_org_scoped():
assert _extract_org_id_from_key("unknown/42/file.wav") is None

View file

@ -7,6 +7,7 @@ from pipecat.transcriptions.language import Language
from api.services.configuration.registry import (
SarvamLLMConfiguration,
SarvamTTSConfiguration,
ServiceProviders,
)
from api.services.pipecat.audio_config import AudioConfig
@ -14,6 +15,7 @@ from api.services.pipecat.service_factory import (
create_llm_service,
create_llm_service_from_provider,
create_stt_service,
create_tts_service,
)
@ -112,3 +114,41 @@ class TestSarvamSTTServiceFactory:
kwargs = mock_service.call_args.kwargs
assert kwargs["settings"].language == expected_language
class TestSarvamTTSServiceFactory:
def test_sarvam_tts_configuration_defaults(self):
config = SarvamTTSConfiguration(api_key="test-key")
assert config.provider == ServiceProviders.SARVAM
assert config.model == "bulbul:v2"
assert config.voice == "anushka"
assert config.language == "hi-IN"
assert config.speed == 1.0
def test_create_sarvam_tts_service_maps_speed_to_pace(self):
user_config = SimpleNamespace(
tts=SimpleNamespace(
provider=ServiceProviders.SARVAM.value,
api_key="test-key",
model="bulbul:v2",
voice="anushka",
language="hi-IN",
speed=1.25,
)
)
audio_config = AudioConfig(
transport_in_sample_rate=16000, transport_out_sample_rate=16000
)
with patch(
"api.services.pipecat.service_factory.SarvamTTSService"
) as mock_service:
create_tts_service(user_config, audio_config)
kwargs = mock_service.call_args.kwargs
assert kwargs["api_key"] == "test-key"
assert kwargs["settings"].model == "bulbul:v2"
assert kwargs["settings"].voice == "anushka"
assert kwargs["settings"].language == Language.HI
assert kwargs["settings"].pace == 1.25

View file

@ -0,0 +1,80 @@
from types import SimpleNamespace
from unittest.mock import patch
from api.services.configuration.check_validity import UserConfigurationValidator
from api.services.configuration.registry import (
REGISTRY,
ServiceProviders,
ServiceType,
SmallestAISTTConfiguration,
SmallestAITTSConfiguration,
)
from api.services.pipecat.service_factory import create_tts_service
def test_smallest_tts_configuration_defaults_and_registry():
config = SmallestAITTSConfiguration(api_key="test-key")
assert config.provider == ServiceProviders.SMALLEST
assert config.model == "lightning_v3.1"
assert config.voice == "sophia"
assert config.language == "en"
assert config.speed == 1.0
assert (
REGISTRY[ServiceType.TTS][ServiceProviders.SMALLEST]
is SmallestAITTSConfiguration
)
def test_smallest_stt_configuration_defaults_and_registry():
config = SmallestAISTTConfiguration(api_key="test-key")
assert config.provider == ServiceProviders.SMALLEST
assert config.model == "pulse"
assert config.language == "en"
assert (
REGISTRY[ServiceType.STT][ServiceProviders.SMALLEST]
is SmallestAISTTConfiguration
)
def test_validator_accepts_smallest_services():
validator = UserConfigurationValidator()
assert (
validator._validate_service(
SmallestAITTSConfiguration(api_key="test-key"),
"tts",
)
== []
)
assert (
validator._validate_service(
SmallestAISTTConfiguration(api_key="test-key"),
"stt",
)
== []
)
def test_create_smallest_tts_service_normalizes_hyphenated_model_values():
user_config = SimpleNamespace(
tts=SimpleNamespace(
provider=ServiceProviders.SMALLEST.value,
api_key="test-key",
model="lightning-v3.1",
voice="sophia",
language="en",
speed=1.0,
)
)
audio_config = SimpleNamespace(transport_in_sample_rate=16000)
with patch(
"api.services.pipecat.service_factory.SmallestTTSService"
) as mock_service:
create_tts_service(user_config, audio_config)
assert mock_service.call_count == 1
kwargs = mock_service.call_args.kwargs
assert kwargs["settings"].model == "lightning_v3.1"

View file

@ -0,0 +1,35 @@
from typing import Literal
RecordingTrack = Literal["mixed", "user", "bot"]
def get_recording_storage_key(extra: dict | None, track: RecordingTrack) -> str | None:
recordings = (extra or {}).get("recordings", {})
if not isinstance(recordings, dict):
return None
artifact = recordings.get(track)
if isinstance(artifact, str):
return artifact
if isinstance(artifact, dict):
storage_key = artifact.get("storage_key")
return storage_key if isinstance(storage_key, str) else None
return None
def get_recording_storage_backend(
extra: dict | None, track: RecordingTrack
) -> str | None:
recordings = (extra or {}).get("recordings", {})
if not isinstance(recordings, dict):
return None
artifact = recordings.get(track)
if isinstance(artifact, dict):
storage_backend = artifact.get("storage_backend")
return storage_backend if isinstance(storage_backend, str) else None
return None
def has_recording_track(extra: dict | None, track: RecordingTrack) -> bool:
return bool(get_recording_storage_key(extra, track))

File diff suppressed because one or more lines are too long

View file

@ -6,6 +6,23 @@ description: Set up the Dograh contributor environment with the devcontainer-fir
If the steps below do not work for you, please open an issue on [GitHub](https://github.com/dograh-hq/dograh/issues).
</Note>
<Tip>
**Using Claude Code or Codex?** Install the official Dograh setup skill and let your agent walk you through the contributor setup — it covers both the devcontainer and host-managed paths, runs Dograh's own scripts, and verifies the stack is healthy.
<CodeGroup>
```text Claude Code
/plugin marketplace add dograh-hq/dograh-plugins
/plugin install dograh@dograh
```
```text Codex
codex plugin marketplace add dograh-hq/dograh-plugins
codex plugin add dograh@dograh
```
</CodeGroup>
Start a new session, then ask it to *"set up Dograh for development"* (or run `/dograh-setup develop` in Claude Code). More at [dograh-hq/dograh-plugins](https://github.com/dograh-hq/dograh-plugins).
</Tip>
### Recommended: Devcontainer Setup
#### System Requirements

View file

@ -8,6 +8,23 @@ Dograh AI can be deployed using Docker in two main configurations. Choose the op
- **Option 1**: For local development and testing on your own machine
- **Option 2**: For remote server deployment with HTTPS (using IP address). If you also have a custom domain, you can first deploy Dograh stack on your server using steps in this document and then proceed to the [Custom Domain](deployment/custom-domain) section.
<Tip>
**Using Claude Code or Codex?** Install the official Dograh setup skill and let your agent drive either deployment below — it orients to your OS, picks local vs remote, runs Dograh's own setup scripts, and verifies the result with a built-in health check.
<CodeGroup>
```text Claude Code
/plugin marketplace add dograh-hq/dograh-plugins
/plugin install dograh@dograh
```
```text Codex
codex plugin marketplace add dograh-hq/dograh-plugins
codex plugin add dograh@dograh
```
</CodeGroup>
Start a new session, then ask it to *"set up Dograh"* (or run `/dograh-setup` in Claude Code). More at [dograh-hq/dograh-plugins](https://github.com/dograh-hq/dograh-plugins).
</Tip>
## Option 1: Local Docker Deployment
Watch the video tutorial below for a step-by-step walkthrough of setting up Dograh AI locally with Docker.

View file

@ -39,6 +39,23 @@ Invoke-WebRequest -OutFile start_docker.ps1 https://raw.githubusercontent.com/do
```
</CodeGroup>
<Tip>
**Using an AI coding agent?** If you work in **Claude Code** or **Codex**, install the official Dograh setup skill and let your agent handle installation, configuration, and troubleshooting for you. It orients to your OS, picks the right deploy path, runs Dograh's own setup scripts, and verifies the result. Install it once:
<CodeGroup>
```text Claude Code
/plugin marketplace add dograh-hq/dograh-plugins
/plugin install dograh@dograh
```
```text Codex
codex plugin marketplace add dograh-hq/dograh-plugins
codex plugin add dograh@dograh
```
</CodeGroup>
Then start a new session and ask it to *"set up Dograh"* (in Claude Code you can also run `/dograh-setup`). See [dograh-hq/dograh-plugins](https://github.com/dograh-hq/dograh-plugins) for details.
</Tip>
Please check [Prerequisites](getting-started/prerequisites) for the system requirements and [Troubleshooting](getting-started/troubleshooting) for common issues.
## Next Steps

@ -1 +1 @@
Subproject commit 0d64dc6e0e3e6b3c46cc66373e34b4f54f980268
Subproject commit 7992b83484da402f45816bb3555cd50ceeb0ec1a

View file

@ -1,6 +1,6 @@
# generated by datamodel-codegen:
# filename: dograh-openapi-XXXXXX.json.w5T4z8AeiH
# timestamp: 2026-06-12T16:16:24+00:00
# filename: dograh-openapi-XXXXXX.json.lMzKvoOMbD
# timestamp: 2026-06-17T13:44:53+00:00
from __future__ import annotations

View file

@ -69,7 +69,7 @@ class Webhook(TypedNode):
Additional HTTP headers to include with the request.
"""
payload_template: dict[str, Any] = field(default_factory=lambda: {'call_id': '{{workflow_run_id}}', 'first_name': '{{initial_context.first_name}}', 'rsvp': '{{gathered_context.rsvp}}', 'duration': '{{cost_info.call_duration_seconds}}', 'recording_url': '{{recording_url}}', 'transcript_url': '{{transcript_url}}'})
payload_template: dict[str, Any] = field(default_factory=lambda: {'call_id': '{{workflow_run_id}}', 'first_name': '{{initial_context.first_name}}', 'rsvp': '{{gathered_context.rsvp}}', 'duration': '{{cost_info.call_duration_seconds}}', 'recording_url': '{{recording_url}}', 'user_recording_url': '{{user_recording_url}}', 'bot_recording_url': '{{bot_recording_url}}', 'transcript_url': '{{transcript_url}}'})
"""
JSON body of the request. Values are Jinja-rendered against the run
context `{{workflow_run_id}}`, `{{gathered_context.foo}}`,

View file

@ -26,10 +26,12 @@
stream: null,
sessionToken: null,
workflowRunId: null,
pcId: null,
connectionStatus: 'idle', // idle, connecting, connected, failed
audioElement: null,
turnCredentials: null, // TURN server credentials
callStartedAt: null, // Timestamp when call connected (for duration tracking)
gracefulDisconnect: false,
callbacks: {
onReady: null,
onCallStart: null,
@ -611,6 +613,7 @@
* Start voice call
*/
async function startCall() {
state.gracefulDisconnect = false;
updateStatus('connecting', 'Connecting...', 'Please wait while we establish the connection');
if (state.callbacks.onCallStart) {
@ -766,45 +769,69 @@
};
// Monitor connection state
state.pc.oniceconnectionstatechange = () => {
console.log('ICE connection state:', state.pc.iceConnectionState);
state.pc.oniceconnectionstatechange = handlePeerConnectionStateChange;
state.pc.onconnectionstatechange = handlePeerConnectionStateChange;
state.pc.onicecandidate = sendIceCandidate;
}
if (state.pc.iceConnectionState === 'connected' || state.pc.iceConnectionState === 'completed') {
const wasAlreadyConnected = state.callStartedAt !== null;
updateStatus('connected', 'Connected', 'Your voice call is now active');
if (!wasAlreadyConnected) {
state.callStartedAt = Date.now();
if (state.callbacks.onCallConnected) {
state.callbacks.onCallConnected({
agentId: state.config.workflowId || null,
token: state.config.token || null,
workflowRunId: state.workflowRunId || null
});
}
function handlePeerConnectionStateChange() {
const pc = state.pc;
if (!pc) return;
console.log('Peer connection state:', pc.connectionState, 'ICE:', pc.iceConnectionState);
if (pc.connectionState === 'connected' || pc.iceConnectionState === 'connected' || pc.iceConnectionState === 'completed') {
const wasAlreadyConnected = state.callStartedAt !== null;
updateStatus('connected', 'Connected', 'Your voice call is now active');
if (!wasAlreadyConnected) {
state.callStartedAt = Date.now();
if (state.callbacks.onCallConnected) {
state.callbacks.onCallConnected({
agentId: state.config.workflowId || null,
token: state.config.token || null,
workflowRunId: state.workflowRunId || null
});
}
} else if (state.pc.iceConnectionState === 'failed' || state.pc.iceConnectionState === 'disconnected') {
updateStatus('failed', 'Connection lost', 'The call has been disconnected');
stopCall();
}
};
return;
}
if (pc.connectionState === 'failed' || pc.iceConnectionState === 'failed') {
stopCall({
graceful: false,
status: 'failed',
text: 'Connection lost',
subtext: 'The call has been disconnected'
});
return;
}
if (
pc.connectionState === 'closed' ||
pc.connectionState === 'disconnected' ||
pc.iceConnectionState === 'closed' ||
pc.iceConnectionState === 'disconnected'
) {
stopCall({ graceful: true });
}
}
function sendIceCandidate(event) {
// Handle ICE candidates for trickling
state.pc.onicecandidate = (event) => {
if (state.ws && state.ws.readyState === WebSocket.OPEN) {
const message = {
type: 'ice-candidate',
payload: {
candidate: event.candidate ? {
candidate: event.candidate.candidate,
sdpMid: event.candidate.sdpMid,
sdpMLineIndex: event.candidate.sdpMLineIndex
} : null,
pc_id: state.pcId
}
};
state.ws.send(JSON.stringify(message));
}
};
if (state.ws && state.ws.readyState === WebSocket.OPEN) {
const message = {
type: 'ice-candidate',
payload: {
candidate: event.candidate ? {
candidate: event.candidate.candidate,
sdpMid: event.candidate.sdpMid,
sdpMLineIndex: event.candidate.sdpMLineIndex
} : null,
pc_id: state.pcId
}
};
state.ws.send(JSON.stringify(message));
}
}
/**
@ -828,9 +855,16 @@
reject(error);
};
state.ws.onclose = () => {
state.ws.onclose = (event) => {
console.log('WebSocket closed');
if (state.connectionStatus === 'connected') {
state.ws = null;
if (event.reason === 'call ended') {
stopCall({ graceful: true, closeWebSocket: false });
return;
}
if (state.connectionStatus === 'connected' && !state.gracefulDisconnect) {
updateStatus('failed', 'Connection lost', 'The call has been disconnected');
}
};
@ -882,6 +916,11 @@
updateStatus('failed', 'Server error', message.payload.message || 'An error occurred');
break;
case 'call-ended':
console.log('Call ended by server:', message.payload);
stopCall({ graceful: true });
break;
default:
console.warn('Unknown message type:', message.type);
}
@ -913,7 +952,15 @@
/**
* Stop voice call
*/
function stopCall() {
function stopCall(options = {}) {
const graceful = options.graceful !== false;
const closeWebSocket = options.closeWebSocket !== false;
const status = options.status || 'idle';
const text = options.text || 'Call ended';
const subtext = options.subtext || 'Click below to start a new call';
state.gracefulDisconnect = graceful;
// Fire onCallDisconnected only if the call had actually connected, with
// identifiers and duration. Must run before we clear callStartedAt.
if (state.callStartedAt && state.callbacks.onCallDisconnected) {
@ -927,15 +974,20 @@
}
state.callStartedAt = null;
updateStatus('idle', 'Call ended', 'Click below to start a new call');
updateStatus(status, text, subtext);
if (state.callbacks.onCallEnd) {
state.callbacks.onCallEnd();
}
// Close WebSocket
if (state.ws) {
state.ws.close();
if (closeWebSocket && state.ws) {
const ws = state.ws;
state.ws = null;
if (ws.readyState !== WebSocket.CLOSED && ws.readyState !== WebSocket.CLOSING) {
ws.close();
}
} else if (!closeWebSocket) {
state.ws = null;
}
@ -947,8 +999,11 @@
// Close peer connection
if (state.pc) {
state.pc.close();
const pc = state.pc;
state.pc = null;
if (pc.signalingState !== 'closed') {
pc.close();
}
}
// Clear audio

View file

@ -5,6 +5,8 @@ import {
ChevronRight,
CircleDollarSign,
CreditCard,
ExternalLink,
Info,
RefreshCw,
} from "lucide-react";
import Link from "next/link";
@ -124,7 +126,8 @@ export default function BillingPage() {
);
const isBillingV2 = credits?.billing_version === "v2";
const canPurchaseCredits = isBillingV2 && config?.deploymentMode !== "oss";
const isOssMode = config?.deploymentMode === "oss";
const canPurchaseCredits = isBillingV2 && !isOssMode;
const totalQuota = credits?.total_quota ?? 0;
const remainingCredits = credits?.remaining_credits ?? 0;
const usedCredits = credits?.total_credits_used ?? 0;
@ -265,6 +268,36 @@ export default function BillingPage() {
</div>
</div>
{isOssMode && (
<div className="flex gap-3 rounded-lg border border-amber-200 bg-amber-50 p-4 dark:border-amber-900/50 dark:bg-amber-950/30">
<Info className="mt-0.5 h-4 w-4 flex-shrink-0 text-amber-600 dark:text-amber-400" />
<div className="text-sm text-amber-900 dark:text-amber-200">
<p className="font-medium">Credit purchases are unavailable in OSS mode</p>
<p className="mt-1">
You can&apos;t purchase credits from this self-hosted app. Sign up and
purchase credits at{" "}
<a
href="https://app.dograh.com"
target="_blank"
rel="noopener noreferrer"
className="inline-flex items-center gap-1 font-medium underline underline-offset-2"
>
app.dograh.com
<ExternalLink className="h-3 w-3" />
</a>
. Then add the generated service key in{" "}
<Link
href="/model-configurations"
className="font-medium underline underline-offset-2"
>
Model Configurations
</Link>
. Usage for that service key is visible in app.dograh.com.
</p>
</div>
</div>
)}
<div className="grid gap-4 md:grid-cols-2">
<Card>
<CardHeader className="pb-2">

View file

@ -19,6 +19,16 @@ interface UseWebSocketRTCProps {
onNodeTransition?: (transition: ConversationNodeTransitionItem) => void;
}
type ConnectionStatus = 'idle' | 'connecting' | 'connected' | 'failed';
interface CleanupConnectionOptions {
graceful?: boolean;
status?: ConnectionStatus;
closeWebSocket?: boolean;
closePeerConnection?: boolean;
delayPeerClose?: boolean;
}
const HANDLED_SERVICE_ERROR_TYPES = new Set([
'quota_exceeded',
'insufficient_credits',
@ -27,7 +37,7 @@ const HANDLED_SERVICE_ERROR_TYPES = new Set([
]);
export const useWebSocketRTC = ({ workflowId, workflowRunId, accessToken, initialContextVariables, onNodeTransition }: UseWebSocketRTCProps) => {
const [connectionStatus, setConnectionStatus] = useState<'idle' | 'connecting' | 'connected' | 'failed'>('idle');
const [connectionStatus, setConnectionStatus] = useState<ConnectionStatus>('idle');
const [connectionActive, setConnectionActive] = useState(false);
const [isCompleted, setIsCompleted] = useState(false);
const [apiKeyModalOpen, setApiKeyModalOpen] = useState(false);
@ -62,11 +72,22 @@ export const useWebSocketRTC = ({ workflowId, workflowRunId, accessToken, initia
const wsRef = useRef<WebSocket | null>(null);
const timeStartRef = useRef<number | null>(null);
const onNodeTransitionRef = useRef(onNodeTransition);
const connectionActiveRef = useRef(connectionActive);
const isCompletedRef = useRef(isCompleted);
const gracefulDisconnectRef = useRef(false);
useEffect(() => {
onNodeTransitionRef.current = onNodeTransition;
}, [onNodeTransition]);
useEffect(() => {
connectionActiveRef.current = connectionActive;
}, [connectionActive]);
useEffect(() => {
isCompletedRef.current = isCompleted;
}, [isCompleted]);
// Generate a cryptographically secure unique ID
const generateSecureId = () => {
// Use Web Crypto API to generate random bytes
@ -95,6 +116,68 @@ export const useWebSocketRTC = ({ workflowId, workflowRunId, accessToken, initia
return `${wsUrl}/api/v1/ws/signaling/${workflowId}/${workflowRunId}?token=${accessToken}`;
}, [workflowId, workflowRunId, accessToken]);
const closePeerConnection = useCallback((pc: RTCPeerConnection | null, delayClose = false) => {
if (!pc) return;
if (pc.getTransceivers) {
pc.getTransceivers().forEach((transceiver) => {
if (transceiver.stop) {
try {
transceiver.stop();
} catch (e) {
logger.debug('Failed to stop transceiver during cleanup:', e);
}
}
});
}
pc.getSenders().forEach((sender) => {
if (sender.track) {
sender.track.stop();
}
});
const close = () => {
if (pcRef.current === pc) {
pcRef.current = null;
}
if (pc.signalingState !== 'closed') {
pc.close();
}
};
if (delayClose) {
setTimeout(close, 500);
} else {
close();
}
}, []);
const cleanupConnection = useCallback((options: CleanupConnectionOptions = {}) => {
const graceful = options.graceful ?? true;
const status = options.status ?? (graceful ? 'idle' : 'failed');
gracefulDisconnectRef.current = graceful;
connectionActiveRef.current = false;
isCompletedRef.current = graceful;
setConnectionActive(false);
setIsCompleted(graceful);
setConnectionStatus(status);
if (options.closeWebSocket !== false) {
const ws = wsRef.current;
if (ws && ws.readyState !== WebSocket.CLOSED && ws.readyState !== WebSocket.CLOSING) {
ws.close();
}
wsRef.current = null;
}
if (options.closePeerConnection !== false) {
closePeerConnection(pcRef.current, options.delayPeerClose ?? false);
}
}, [closePeerConnection]);
const createPeerConnection = () => {
// Build ICE servers list
const iceServers: RTCIceServer[] = [];
@ -155,43 +238,36 @@ export const useWebSocketRTC = ({ workflowId, workflowRunId, accessToken, initia
}
});
pc.addEventListener('iceconnectionstatechange', () => {
logger.info(`ICE connection state changed: ${pc.iceConnectionState}`);
if (pc.iceConnectionState === 'connected' || pc.iceConnectionState === 'completed') {
const handlePeerStateChange = () => {
logger.info(`Peer connection state changed: ${pc.connectionState}; ICE: ${pc.iceConnectionState}`);
if (
pc.connectionState === 'connected' ||
pc.iceConnectionState === 'connected' ||
pc.iceConnectionState === 'completed'
) {
setConnectionStatus('connected');
} else if (pc.iceConnectionState === 'failed') {
setConnectionStatus('failed');
} else if (pc.iceConnectionState === 'disconnected') {
// Server-initiated disconnect - clean up gracefully
logger.info('Server initiated disconnect - cleaning up connection');
// Close WebSocket if still open
if (wsRef.current) {
wsRef.current.close();
wsRef.current = null;
}
// Mark as completed to trigger recording check
setConnectionActive(false);
setIsCompleted(true);
setConnectionStatus('idle');
// Clean up peer connection
if (pc.getTransceivers) {
pc.getTransceivers().forEach((transceiver) => {
if (transceiver.stop) {
transceiver.stop();
}
});
}
pc.getSenders().forEach((sender) => {
if (sender.track) {
sender.track.stop();
}
});
return;
}
});
if (pc.connectionState === 'failed' || pc.iceConnectionState === 'failed') {
cleanupConnection({ graceful: false, status: 'failed' });
return;
}
if (
pc.connectionState === 'closed' ||
pc.connectionState === 'disconnected' ||
pc.iceConnectionState === 'closed' ||
pc.iceConnectionState === 'disconnected'
) {
logger.info('Peer connection ended - cleaning up connection');
cleanupConnection({ graceful: true, status: 'idle' });
}
};
pc.addEventListener('iceconnectionstatechange', handlePeerStateChange);
pc.addEventListener('connectionstatechange', handlePeerStateChange);
pc.addEventListener('track', (evt) => {
if (evt.track.kind === 'audio' && audioRef.current) {
@ -221,11 +297,23 @@ export const useWebSocketRTC = ({ workflowId, workflowRunId, accessToken, initia
reject(error);
};
ws.onclose = () => {
ws.onclose = (event) => {
logger.info('WebSocket closed');
wsRef.current = null;
if (event.reason === 'call ended') {
cleanupConnection({
graceful: true,
status: 'idle',
closeWebSocket: false,
});
return;
}
// Don't set failed status if already completed (graceful disconnect)
if (connectionActive && !isCompleted) {
if (
connectionActiveRef.current &&
!isCompletedRef.current &&
!gracefulDisconnectRef.current
) {
setConnectionStatus('failed');
}
};
@ -245,6 +333,7 @@ export const useWebSocketRTC = ({ workflowId, workflowRunId, accessToken, initia
type: 'answer',
sdp: answer.sdp
});
connectionActiveRef.current = true;
setConnectionActive(true);
logger.info('Remote description set');
}
@ -281,25 +370,19 @@ export const useWebSocketRTC = ({ workflowId, workflowRunId, accessToken, initia
setApiKeyError(message.payload.message || 'Service quota exceeded');
setApiKeyModalOpen(true);
// Stop the connection gracefully
setConnectionStatus('failed');
setConnectionActive(false);
// Close WebSocket and peer connection
if (wsRef.current) {
wsRef.current.close();
wsRef.current = null;
}
if (pcRef.current) {
pcRef.current.close();
pcRef.current = null;
}
// Stop the connection and surface the handled service error.
cleanupConnection({ graceful: false, status: 'failed' });
} else {
// Log other errors as actual errors
logger.error('Server error:', message.payload);
}
break;
case 'call-ended':
logger.info('Call ended by server:', message.payload);
cleanupConnection({ graceful: true, status: 'idle' });
break;
case 'rtf-user-transcription': {
const transcription = message.payload;
@ -503,7 +586,7 @@ export const useWebSocketRTC = ({ workflowId, workflowRunId, accessToken, initia
}
};
});
}, [getWebSocketUrl, connectionActive, isCompleted]);
}, [getWebSocketUrl, cleanupConnection]);
const negotiate = async () => {
const pc = pcRef.current;
@ -552,7 +635,12 @@ export const useWebSocketRTC = ({ workflowId, workflowRunId, accessToken, initia
const start = async () => {
if (isStarting || !accessToken) return;
gracefulDisconnectRef.current = false;
connectionActiveRef.current = false;
isCompletedRef.current = false;
setIsStarting(true);
setConnectionActive(false);
setIsCompleted(false);
setConnectionStatus('connecting');
try {
@ -676,40 +764,7 @@ export const useWebSocketRTC = ({ workflowId, workflowRunId, accessToken, initia
};
const stop = () => {
setConnectionActive(false);
setIsCompleted(true);
setConnectionStatus('idle');
// Close WebSocket
if (wsRef.current) {
wsRef.current.close();
wsRef.current = null;
}
// Close peer connection
const pc = pcRef.current;
if (!pc) return;
if (pc.getTransceivers) {
pc.getTransceivers().forEach((transceiver) => {
if (transceiver.stop) {
transceiver.stop();
}
});
}
pc.getSenders().forEach((sender) => {
if (sender.track) {
sender.track.stop();
}
});
setTimeout(() => {
if (pcRef.current) {
pcRef.current.close();
pcRef.current = null;
}
}, 500);
cleanupConnection({ graceful: true, status: 'idle', delayPeerClose: true });
};
// Cleanup on unmount

View file

@ -1,6 +1,18 @@
'use client';
import { Check, Copy, ExternalLink, FileText, Video } from 'lucide-react';
import {
Bot,
Check,
Copy,
Download,
ExternalLink,
FileText,
Loader2,
Pause,
Play,
UserRound,
Video,
} from 'lucide-react';
import Link from 'next/link';
import { useParams } from 'next/navigation';
import posthog from 'posthog-js';
@ -18,13 +30,16 @@ import { PostHogEvent } from '@/constants/posthog-events';
import { WORKFLOW_RUN_MODES } from '@/constants/workflowRunModes';
import { useOnboarding } from '@/context/OnboardingContext';
import { useAuth } from '@/lib/auth';
import { downloadFile } from '@/lib/files';
import { downloadFile, getSignedUrl } from '@/lib/files';
import { cn } from '@/lib/utils';
interface WorkflowRunResponse {
mode: string;
is_completed: boolean;
transcript_url: string | null;
recording_url: string | null;
user_recording_url: string | null;
bot_recording_url: string | null;
cost_info: {
dograh_token_usage?: number | null;
call_duration_seconds?: number | null;
@ -36,6 +51,7 @@ interface WorkflowRunResponse {
}
const RUN_SHELL_HEIGHT_CLASS = "h-[calc(100svh-49px)] min-h-[calc(100svh-49px)] max-h-[calc(100svh-49px)]";
const WAVEFORM_BAR_COUNT = 96;
function formatDuration(seconds?: number | null) {
if (seconds == null || Number.isNaN(seconds)) return 'N/A';
@ -71,6 +87,309 @@ function MetricCard({ label, value }: { label: string; value: string }) {
);
}
function buildWaveformPeaks(audioBuffer: AudioBuffer) {
const channel = audioBuffer.getChannelData(0);
const samplesPerBar = Math.max(1, Math.floor(channel.length / WAVEFORM_BAR_COUNT));
return Array.from({ length: WAVEFORM_BAR_COUNT }, (_, index) => {
const start = index * samplesPerBar;
const end = Math.min(start + samplesPerBar, channel.length);
let sum = 0;
for (let i = start; i < end; i += 1) {
sum += channel[i] * channel[i];
}
const rms = Math.sqrt(sum / Math.max(1, end - start));
return Math.max(0.08, Math.min(1, rms * 5));
});
}
async function loadWaveformPeaks(url: string) {
const response = await fetch(url);
const audioData = await response.arrayBuffer();
const AudioContextConstructor =
window.AudioContext ||
(window as typeof window & { webkitAudioContext?: typeof AudioContext })
.webkitAudioContext;
if (!AudioContextConstructor) return null;
const audioContext = new AudioContextConstructor();
try {
const decoded = await audioContext.decodeAudioData(audioData);
return buildWaveformPeaks(decoded);
} finally {
void audioContext.close();
}
}
function WaveformLane({
peaks,
track,
position,
}: {
peaks: number[] | null;
track: 'user' | 'bot';
position: 'top' | 'bottom';
}) {
return (
<div
className={cn(
'absolute left-3 right-3 flex gap-0.5',
position === 'top' ? 'top-5 h-12 items-end' : 'bottom-5 h-12 items-start'
)}
>
{peaks ? (
peaks.map((peak, index) => (
<span
key={`${track}-${index}`}
className={cn(
'min-h-1 flex-1 rounded-full opacity-85',
track === 'user' ? 'bg-sky-500' : 'bg-emerald-500'
)}
style={{ height: `${Math.round(peak * 100)}%` }}
/>
))
) : (
<div className="my-auto h-px w-full bg-border" />
)}
</div>
);
}
function SplitTracksSection({
userRecordingUrl,
botRecordingUrl,
}: {
userRecordingUrl: string;
botRecordingUrl: string;
}) {
const userAudioRef = useRef<HTMLAudioElement | null>(null);
const botAudioRef = useRef<HTMLAudioElement | null>(null);
const [signedUrls, setSignedUrls] = useState<{ user: string | null; bot: string | null }>({
user: null,
bot: null,
});
const [peaks, setPeaks] = useState<{ user: number[] | null; bot: number[] | null }>({
user: null,
bot: null,
});
const [isLoading, setIsLoading] = useState(false);
const [isPlaying, setIsPlaying] = useState(false);
const [progress, setProgress] = useState(0);
useEffect(() => {
let isActive = true;
const userAudio = userAudioRef.current;
const botAudio = botAudioRef.current;
userAudio?.pause();
botAudio?.pause();
setSignedUrls({ user: null, bot: null });
setPeaks({ user: null, bot: null });
setIsPlaying(false);
setProgress(0);
setIsLoading(true);
async function loadTracks() {
try {
const [userUrl, botUrl] = await Promise.all([
getSignedUrl(userRecordingUrl, true),
getSignedUrl(botRecordingUrl, true),
]);
if (!isActive) return;
setSignedUrls({ user: userUrl, bot: botUrl });
if (!userUrl || !botUrl) return;
const [userPeaks, botPeaks] = await Promise.all([
loadWaveformPeaks(userUrl),
loadWaveformPeaks(botUrl),
]);
if (isActive) {
setPeaks({ user: userPeaks, bot: botPeaks });
}
} catch (error) {
console.error('Error loading split track waveforms:', error);
} finally {
if (isActive) {
setIsLoading(false);
}
}
}
void loadTracks();
return () => {
isActive = false;
userAudio?.pause();
botAudio?.pause();
};
}, [userRecordingUrl, botRecordingUrl]);
useEffect(() => {
if (!isPlaying) return;
let frameId: number;
const updateProgress = () => {
const userAudio = userAudioRef.current;
const botAudio = botAudioRef.current;
const userDuration = Number.isFinite(userAudio?.duration) ? userAudio?.duration ?? 0 : 0;
const botDuration = Number.isFinite(botAudio?.duration) ? botAudio?.duration ?? 0 : 0;
const duration = Math.max(userDuration, botDuration);
const currentTime = Math.max(userAudio?.currentTime ?? 0, botAudio?.currentTime ?? 0);
setProgress(duration > 0 ? Math.min(1, currentTime / duration) : 0);
frameId = window.requestAnimationFrame(updateProgress);
};
frameId = window.requestAnimationFrame(updateProgress);
return () => window.cancelAnimationFrame(frameId);
}, [isPlaying]);
const pauseTracks = () => {
userAudioRef.current?.pause();
botAudioRef.current?.pause();
setIsPlaying(false);
};
const handleTrackEnded = () => {
const userAudio = userAudioRef.current;
const botAudio = botAudioRef.current;
const userDone = !userAudio || userAudio.ended;
const botDone = !botAudio || botAudio.ended;
if (userDone && botDone) {
setIsPlaying(false);
setProgress(1);
}
};
const togglePlayback = async () => {
const userAudio = userAudioRef.current;
const botAudio = botAudioRef.current;
if (!userAudio || !botAudio || !signedUrls.user || !signedUrls.bot) return;
if (isPlaying) {
pauseTracks();
return;
}
const userDuration = Number.isFinite(userAudio.duration) ? userAudio.duration : 0;
const botDuration = Number.isFinite(botAudio.duration) ? botAudio.duration : 0;
const duration = Math.max(userDuration, botDuration);
const currentTime = Math.max(userAudio.currentTime, botAudio.currentTime);
const startTime = duration > 0 && currentTime >= duration - 0.1 ? 0 : currentTime;
userAudio.currentTime = Math.min(startTime, userDuration || startTime);
botAudio.currentTime = Math.min(startTime, botDuration || startTime);
try {
await Promise.all([userAudio.play(), botAudio.play()]);
setIsPlaying(true);
} catch (error) {
pauseTracks();
console.error('Error playing split tracks:', error);
}
};
const canPlay = Boolean(signedUrls.user && signedUrls.bot);
const progressPercent = Math.round(progress * 1000) / 10;
return (
<Card className="border-border">
<audio
ref={userAudioRef}
src={signedUrls.user ?? undefined}
preload="metadata"
className="hidden"
onEnded={handleTrackEnded}
/>
<audio
ref={botAudioRef}
src={signedUrls.bot ?? undefined}
preload="metadata"
className="hidden"
onEnded={handleTrackEnded}
/>
<CardHeader className="pb-3">
<CardTitle className="text-lg">Split Tracks</CardTitle>
</CardHeader>
<CardContent className="space-y-4">
<div className="flex flex-wrap items-center justify-between gap-3">
<div className="flex items-center gap-2">
<span className="inline-flex items-center gap-1.5 text-sm font-medium text-sky-600">
<UserRound className="h-4 w-4" />
User
</span>
<span className="h-4 w-px bg-border" />
<span className="inline-flex items-center gap-1.5 text-sm font-medium text-emerald-600">
<Bot className="h-4 w-4" />
Bot
</span>
</div>
<div className="flex items-center gap-2">
<Button
type="button"
variant="outline"
size="sm"
onClick={() => downloadFile(userRecordingUrl)}
className="gap-2"
>
<Download className="h-4 w-4" />
User
</Button>
<Button
type="button"
variant="outline"
size="sm"
onClick={() => downloadFile(botRecordingUrl)}
className="gap-2"
>
<Download className="h-4 w-4" />
Bot
</Button>
</div>
</div>
<div className="flex items-center gap-4">
<Button
type="button"
size="icon"
variant={isPlaying ? 'default' : 'outline'}
onClick={togglePlayback}
disabled={!canPlay}
aria-label={isPlaying ? 'Pause split tracks' : 'Play split tracks'}
className="h-10 w-10 shrink-0"
>
{isPlaying ? <Pause className="h-4 w-4" /> : <Play className="h-4 w-4" />}
</Button>
<div className="relative h-36 min-w-0 flex-1 overflow-hidden rounded-lg border border-border/70 bg-background">
<div className="absolute left-3 right-3 top-1/2 h-px bg-border/80" />
<WaveformLane peaks={peaks.user} track="user" position="top" />
<WaveformLane peaks={peaks.bot} track="bot" position="bottom" />
{canPlay && (
<div className="pointer-events-none absolute inset-x-3 inset-y-3">
<div
className="absolute top-0 bottom-0 w-px bg-foreground/50"
style={{ left: `${progressPercent}%` }}
/>
</div>
)}
{isLoading && (
<div className="absolute inset-0 flex items-center justify-center bg-background/70 text-xs text-muted-foreground">
<Loader2 className="mr-2 h-3.5 w-3.5 animate-spin" />
Loading
</div>
)}
</div>
</div>
</CardContent>
</Card>
);
}
function RunMetricsSection({
costInfo,
logs,
@ -180,6 +499,8 @@ export default function WorkflowRunPage() {
is_completed: response.data?.is_completed ?? false,
transcript_url: response.data?.transcript_url ?? null,
recording_url: response.data?.recording_url ?? null,
user_recording_url: response.data?.user_recording_url ?? null,
bot_recording_url: response.data?.bot_recording_url ?? null,
cost_info: response.data?.cost_info ?? null,
initial_context: response.data?.initial_context as Record<string, string> | null ?? null,
gathered_context: response.data?.gathered_context as Record<string, string> | null ?? null,
@ -192,6 +513,7 @@ export default function WorkflowRunPage() {
run_id: Number(runId),
is_completed: runData.is_completed,
has_recording: !!runData.recording_url,
has_split_recordings: !!runData.user_recording_url && !!runData.bot_recording_url,
has_transcript: !!runData.transcript_url,
});
};
@ -201,6 +523,9 @@ export default function WorkflowRunPage() {
let returnValue = null;
const isTextChatRun = workflowRun?.mode === WORKFLOW_RUN_MODES.TEXTCHAT;
const showRunDetailsView = Boolean(workflowRun?.is_completed || isTextChatRun);
const userSplitRecordingUrl = workflowRun?.user_recording_url ?? null;
const botSplitRecordingUrl = workflowRun?.bot_recording_url ?? null;
const hasSplitTracks = Boolean(userSplitRecordingUrl && botSplitRecordingUrl);
if (isLoading) {
returnValue = (
@ -336,6 +661,13 @@ export default function WorkflowRunPage() {
gatheredContext={workflowRun?.gathered_context ?? null}
/>
{!isTextChatRun && hasSplitTracks && (
<SplitTracksSection
userRecordingUrl={userSplitRecordingUrl as string}
botRecordingUrl={botSplitRecordingUrl as string}
/>
)}
<div className="grid gap-6 md:grid-cols-2">
<ContextDisplay
title="Initial Context"

File diff suppressed because one or more lines are too long

View file

@ -1169,9 +1169,9 @@ export const getCampaignDefaultsApiV1OrganizationsCampaignDefaultsGet = <ThrowOn
* Return a short-lived signed URL for a file stored on S3 / MinIO.
*
* Access Control:
* * Keys that embed an organization ID (``{prefix}/{org_id}/...``) are
* authorized by matching the org_id against the requesting user's
* organization.
* * Known org-scoped keys (for example ``campaigns/{org_id}/...`` and
* ``knowledge_base/{org_id}/...``) are authorized by matching the org_id
* against the requesting user's organization.
* * Legacy keys (``recordings/{run_id}.wav``, ``transcripts/{run_id}.txt``)
* are authorized via the workflow run they belong to.
* * Superusers can request any key.
@ -1491,13 +1491,15 @@ export const initiateCallTestByWorkflowUuidApiV1PublicAgentTestWorkflowWorkflowU
*
* Args:
* token: The public access token (UUID format)
* artifact_type: Type of artifact - "recording" or "transcript"
* artifact_type: Type of artifact - "recording", "transcript",
* "user_recording", or "bot_recording"
* inline: If true, sets Content-Disposition to inline for browser preview
*
* Returns:
* RedirectResponse to the signed URL (302 redirect)
*
* Raises:
* HTTPException 400: If artifact type is unsupported
* HTTPException 404: If token is invalid or artifact not found
*/
export const downloadWorkflowArtifactApiV1PublicDownloadWorkflowTokenArtifactTypeGet = <ThrowOnError extends boolean = false>(options: Options<DownloadWorkflowArtifactApiV1PublicDownloadWorkflowTokenArtifactTypeGetData, ThrowOnError>) => (options.client ?? client).get<DownloadWorkflowArtifactApiV1PublicDownloadWorkflowTokenArtifactTypeGetResponses, DownloadWorkflowArtifactApiV1PublicDownloadWorkflowTokenArtifactTypeGetErrors, ThrowOnError>({ url: '/api/v1/public/download/workflow/{token}/{artifact_type}', ...options });

View file

@ -147,13 +147,13 @@ export type AwsBedrockLlmConfiguration = {
/**
* Api Key
*
* Not used for Bedrock - authentication is via the AWS credentials above. Leave blank.
* Not used for Bedrock authentication is via the AWS credentials above. Leave blank.
*/
api_key?: string | Array<string> | null;
/**
* Model
*
* Bedrock model ID - include the region inference-profile prefix (e.g. 'us.').
* Bedrock model ID include the region inference-profile prefix (e.g. 'us.').
*/
model?: string;
/**
@ -344,7 +344,7 @@ export type AzureOpenAiEmbeddingsConfiguration = {
/**
* Azure OpenAI Realtime
*
* Azure OpenAI Realtime API - low-latency speech-to-speech conversations.
* Azure OpenAI Realtime API low-latency speech-to-speech conversations.
*/
export type AzureRealtimeLlmConfiguration = {
/**
@ -384,7 +384,7 @@ export type AzureRealtimeLlmConfiguration = {
/**
* Azure Speech Services
*
* Azure Cognitive Services Speech - TTS and STT via the Azure Speech SDK.
* Azure Cognitive Services Speech TTS and STT via the Azure Speech SDK.
*/
export type AzureSpeechSttConfiguration = {
/**
@ -418,7 +418,7 @@ export type AzureSpeechSttConfiguration = {
/**
* Azure Speech Services
*
* Azure Cognitive Services Speech - TTS and STT via the Azure Speech SDK.
* Azure Cognitive Services Speech TTS and STT via the Azure Speech SDK.
*/
export type AzureSpeechTtsConfiguration = {
/**
@ -499,6 +499,8 @@ export type ByokPipelineAiModelConfiguration = {
} & AwsBedrockLlmConfiguration) | ({
provider: 'speaches';
} & SpeachesLlmConfiguration) | ({
provider: 'huggingface';
} & HuggingFaceLlmConfiguration) | ({
provider: 'minimax';
} & MiniMaxLlmConfiguration) | ({
provider: 'sarvam';
@ -530,7 +532,9 @@ export type ByokPipelineAiModelConfiguration = {
provider: 'minimax';
} & MiniMaxTtsConfiguration) | ({
provider: 'azure_speech';
} & AzureSpeechTtsConfiguration);
} & AzureSpeechTtsConfiguration) | ({
provider: 'smallest';
} & SmallestAittsConfiguration);
/**
* Stt
*/
@ -551,12 +555,16 @@ export type ByokPipelineAiModelConfiguration = {
} & SarvamSttConfiguration) | ({
provider: 'speaches';
} & SpeachesSttConfiguration) | ({
provider: 'huggingface';
} & HuggingFaceSttConfiguration) | ({
provider: 'assemblyai';
} & AssemblyAisttConfiguration) | ({
provider: 'gladia';
} & GladiaSttConfiguration) | ({
provider: 'azure_speech';
} & AzureSpeechSttConfiguration);
} & AzureSpeechSttConfiguration) | ({
provider: 'smallest';
} & SmallestAisttConfiguration);
/**
* Embeddings
*/
@ -613,6 +621,8 @@ export type ByokRealtimeAiModelConfiguration = {
} & AwsBedrockLlmConfiguration) | ({
provider: 'speaches';
} & SpeachesLlmConfiguration) | ({
provider: 'huggingface';
} & HuggingFaceLlmConfiguration) | ({
provider: 'minimax';
} & MiniMaxLlmConfiguration) | ({
provider: 'sarvam';
@ -2627,7 +2637,7 @@ export type GoogleVertexLlmConfiguration = {
/**
* Api Key
*
* Not used for Vertex AI - authentication is via the service account in `credentials` (or ADC). Leave blank.
* Not used for Vertex AI authentication is via the service account in `credentials` (or ADC). Leave blank.
*/
api_key?: string | Array<string> | null;
/**
@ -2667,7 +2677,7 @@ export type GoogleVertexRealtimeLlmConfiguration = {
/**
* Api Key
*
* Not used for Vertex AI - authentication is via the service account in `credentials` (or ADC). Leave blank.
* Not used for Vertex AI authentication is via the service account in `credentials` (or ADC). Leave blank.
*/
api_key?: string | Array<string> | null;
/**
@ -2916,6 +2926,80 @@ export type HttpApiToolDefinition = {
config: HttpApiConfig;
};
/**
* Hugging Face
*
* Hosted Hugging Face Inference Providers API for usage-based inference.
*/
export type HuggingFaceLlmConfiguration = {
/**
* Provider
*/
provider?: 'huggingface';
/**
* Api Key
*/
api_key: string | Array<string>;
/**
* Model
*
* Hugging Face chat-completion model identifier, optionally with provider suffix.
*/
model?: string;
/**
* Base Url
*
* Hugging Face OpenAI-compatible chat-completions router base URL.
*/
base_url?: string;
/**
* Bill To
*
* Optional Hugging Face organization or user to bill using X-HF-Bill-To.
*/
bill_to?: string | null;
};
/**
* Hugging Face
*
* Hosted Hugging Face Inference Providers API for usage-based inference.
*/
export type HuggingFaceSttConfiguration = {
/**
* Provider
*/
provider?: 'huggingface';
/**
* Api Key
*/
api_key: string | Array<string>;
/**
* Model
*
* Hugging Face ASR model identifier served through Inference Providers.
*/
model?: string;
/**
* Base Url
*
* Hugging Face Inference Providers router base URL.
*/
base_url?: string;
/**
* Bill To
*
* Optional Hugging Face organization or user to bill using X-HF-Bill-To.
*/
bill_to?: string | null;
/**
* Return Timestamps
*
* Request timestamp chunks when supported by the selected provider/model.
*/
return_timestamps?: boolean;
};
/**
* ImpersonateRequest
*
@ -4725,6 +4809,12 @@ export type SarvamTtsConfiguration = {
* BCP-47 Indian-language code (e.g. hi-IN, en-IN).
*/
language?: string;
/**
* Speed
*
* Speech speed multiplier.
*/
speed?: number;
};
/**
@ -4823,6 +4913,74 @@ export type SignupRequest = {
name?: string | null;
};
/**
* Smallest AI
*
* Smallest AI ultralow-latency TTS (Waves) and STT (Pulse) APIs.
*/
export type SmallestAisttConfiguration = {
/**
* Provider
*/
provider?: 'smallest';
/**
* Api Key
*/
api_key: string | Array<string>;
/**
* Model
*
* Smallest AI STT model. Supports 38 languages with real-time streaming.
*/
model?: string;
/**
* Language
*
* ISO 639-1 language code for transcription.
*/
language?: string;
};
/**
* Smallest AI
*
* Smallest AI ultralow-latency TTS (Waves) and STT (Pulse) APIs.
*/
export type SmallestAittsConfiguration = {
/**
* Provider
*/
provider?: 'smallest';
/**
* Api Key
*/
api_key: string | Array<string>;
/**
* Model
*
* Smallest AI TTS model. lightning_v3.1_pro is the premium pool (American, British, Indian accents); lightning_v3.1 is the standard pool with 217 voices across 12 languages.
*/
model?: string;
/**
* Voice
*
* Smallest AI voice ID.
*/
voice?: string;
/**
* Language
*
* ISO 639-1 language code for synthesis.
*/
language?: string;
/**
* Speed
*
* Speech speed multiplier (0.5 to 2.0).
*/
speed?: number;
};
/**
* Local Models (Speaches)
*
@ -6415,6 +6573,14 @@ export type WorkflowRunResponseSchema = {
* Recording Url
*/
recording_url: string | null;
/**
* User Recording Url
*/
user_recording_url?: string | null;
/**
* Bot Recording Url
*/
bot_recording_url?: string | null;
/**
* Transcript Public Url
*/
@ -6423,6 +6589,14 @@ export type WorkflowRunResponseSchema = {
* Recording Public Url
*/
recording_public_url?: string | null;
/**
* User Recording Public Url
*/
user_recording_public_url?: string | null;
/**
* Bot Recording Public Url
*/
bot_recording_public_url?: string | null;
/**
* Public Access Token
*/
@ -6582,6 +6756,14 @@ export type WorkflowRunUsageResponse = {
* Transcript Url
*/
transcript_url?: string | null;
/**
* User Recording Url
*/
user_recording_url?: string | null;
/**
* Bot Recording Url
*/
bot_recording_url?: string | null;
/**
* Recording Public Url
*/
@ -6590,6 +6772,14 @@ export type WorkflowRunUsageResponse = {
* Transcript Public Url
*/
transcript_public_url?: string | null;
/**
* User Recording Public Url
*/
user_recording_public_url?: string | null;
/**
* Bot Recording Public Url
*/
bot_recording_public_url?: string | null;
/**
* Public Access Token
*/
@ -12464,7 +12654,7 @@ export type DownloadWorkflowArtifactApiV1PublicDownloadWorkflowTokenArtifactType
/**
* Artifact Type
*/
artifact_type: 'recording' | 'transcript';
artifact_type: string;
};
query?: {
/**

View file

@ -11,7 +11,7 @@ import {
type ServiceSegment,
} from "@/components/ServiceConfigurationForm";
import { Button } from "@/components/ui/button";
import { Card, CardContent } from "@/components/ui/card";
import { Checkbox } from "@/components/ui/checkbox";
import { Input } from "@/components/ui/input";
import { Label } from "@/components/ui/label";
import { Select, SelectContent, SelectItem, SelectTrigger, SelectValue } from "@/components/ui/select";
@ -22,6 +22,7 @@ type ModelMode = "realtime" | "dograh" | "byok";
interface DograhDefaults {
voices: string[];
allow_custom_input?: boolean;
speeds: number[];
languages: string[];
defaults: {
@ -266,16 +267,21 @@ export function AIModelConfigurationV2Editor({
const [realtimeInitialConfig, setRealtimeInitialConfig] = useState<Record<string, unknown> | null>(null);
const [pipelineInitialConfig, setPipelineInitialConfig] = useState<Record<string, unknown> | null>(null);
const [isSavingDograh, setIsSavingDograh] = useState(false);
const [isCustomVoice, setIsCustomVoice] = useState(false);
const [error, setError] = useState<string | null>(null);
const allowCustomVoice = defaults.dograh.allow_custom_input ?? false;
useEffect(() => {
const rawConfiguration = asRecord(configuration);
const rawEffectiveConfiguration = asRecord(effectiveConfiguration);
setMode(preferredMode(rawConfiguration, rawEffectiveConfiguration));
setDograh(buildDograhState(defaults, rawConfiguration, rawEffectiveConfiguration));
const nextDograh = buildDograhState(defaults, rawConfiguration, rawEffectiveConfiguration);
setDograh(nextDograh);
setIsCustomVoice(allowCustomVoice && !defaults.dograh.voices.includes(nextDograh.voice));
setRealtimeInitialConfig(getByokInitialConfig(rawConfiguration, rawEffectiveConfiguration, true));
setPipelineInitialConfig(getByokInitialConfig(rawConfiguration, rawEffectiveConfiguration, false));
}, [configuration, defaults, effectiveConfiguration]);
}, [configuration, defaults, effectiveConfiguration, allowCustomVoice]);
const saveDograhConfiguration = async () => {
setIsSavingDograh(true);
@ -360,11 +366,17 @@ export function AIModelConfigurationV2Editor({
</TabsContent>
<TabsContent value="dograh" className="mt-0">
<Card>
<CardContent className="pt-6">
<div className="grid gap-4 sm:grid-cols-2">
<div className="space-y-2">
<Label>Voice</Label>
<div className="rounded-lg border p-5">
<div className="grid gap-4 sm:grid-cols-2">
<div className="space-y-2">
<Label>Voice</Label>
{isCustomVoice ? (
<Input
placeholder="Enter voice"
value={dograh.voice}
onChange={(event) => setDograh({ ...dograh, voice: event.target.value })}
/>
) : (
<Select value={dograh.voice} onValueChange={(voice) => setDograh({ ...dograh, voice })}>
<SelectTrigger className="w-full">
<SelectValue placeholder="Select voice" />
@ -377,64 +389,82 @@ export function AIModelConfigurationV2Editor({
))}
</SelectContent>
</Select>
</div>
<div className="space-y-2">
<Label>Speed</Label>
<Select
value={String(dograh.speed)}
onValueChange={(speed) => setDograh({ ...dograh, speed: Number(speed) })}
>
<SelectTrigger className="w-full">
<SelectValue placeholder="Select speed" />
</SelectTrigger>
<SelectContent>
{defaults.dograh.speeds.map((speed) => (
<SelectItem key={speed} value={String(speed)}>
{speed}x
</SelectItem>
))}
</SelectContent>
</Select>
</div>
<div className="space-y-2 sm:col-span-2">
<Label>Language</Label>
<Select value={dograh.language} onValueChange={(language) => setDograh({ ...dograh, language })}>
<SelectTrigger className="w-full">
<SelectValue placeholder="Select language" />
</SelectTrigger>
<SelectContent>
{defaults.dograh.languages.map((language) => (
<SelectItem key={language} value={language}>
{LANGUAGE_DISPLAY_NAMES[language] || language}
</SelectItem>
))}
</SelectContent>
</Select>
</div>
<div className="space-y-2 sm:col-span-2">
<Label htmlFor="dograh-api-key">API Key</Label>
<div className="relative">
<KeyRound className="pointer-events-none absolute left-3 top-1/2 h-4 w-4 -translate-y-1/2 text-muted-foreground" />
<Input
id="dograh-api-key"
className="pl-9"
value={dograh.api_key}
onChange={(event) => setDograh({ ...dograh, api_key: event.target.value })}
placeholder="Enter API key"
)}
{allowCustomVoice && (
<div className="flex items-center space-x-2">
<Checkbox
id="dograh-custom-voice"
checked={isCustomVoice}
onCheckedChange={(checked) => {
const custom = checked as boolean;
setIsCustomVoice(custom);
if (!custom) {
setDograh({ ...dograh, voice: defaults.dograh.defaults.voice });
}
}}
/>
<Label htmlFor="dograh-custom-voice" className="text-sm font-normal cursor-pointer">
Enter Custom Value
</Label>
</div>
</div>
)}
</div>
<Button type="button" className="mt-6 w-full" onClick={saveDograhConfiguration} disabled={isSavingDograh}>
<Save className="mr-2 h-4 w-4" />
{isSavingDograh ? "Saving..." : submitLabel}
</Button>
</CardContent>
</Card>
<div className="space-y-2">
<Label>Speed</Label>
<Select
value={String(dograh.speed)}
onValueChange={(speed) => setDograh({ ...dograh, speed: Number(speed) })}
>
<SelectTrigger className="w-full">
<SelectValue placeholder="Select speed" />
</SelectTrigger>
<SelectContent>
{defaults.dograh.speeds.map((speed) => (
<SelectItem key={speed} value={String(speed)}>
{speed}x
</SelectItem>
))}
</SelectContent>
</Select>
</div>
<div className="space-y-2 sm:col-span-2">
<Label>Language</Label>
<Select value={dograh.language} onValueChange={(language) => setDograh({ ...dograh, language })}>
<SelectTrigger className="w-full">
<SelectValue placeholder="Select language" />
</SelectTrigger>
<SelectContent>
{defaults.dograh.languages.map((language) => (
<SelectItem key={language} value={language}>
{LANGUAGE_DISPLAY_NAMES[language] || language}
</SelectItem>
))}
</SelectContent>
</Select>
</div>
<div className="space-y-2 sm:col-span-2">
<Label htmlFor="dograh-api-key">API Key</Label>
<div className="relative">
<KeyRound className="pointer-events-none absolute left-3 top-1/2 h-4 w-4 -translate-y-1/2 text-muted-foreground" />
<Input
id="dograh-api-key"
className="pl-9"
value={dograh.api_key}
onChange={(event) => setDograh({ ...dograh, api_key: event.target.value })}
placeholder="Enter API key"
/>
</div>
</div>
</div>
<Button type="button" className="mt-6 w-full" onClick={saveDograhConfiguration} disabled={isSavingDograh}>
<Save className="mr-2 h-4 w-4" />
{isSavingDograh ? "Saving..." : submitLabel}
</Button>
</div>
</TabsContent>
<TabsContent value="byok" className="mt-0">