feat: add recording audio option in tool and node transitions (#232)

* feat: allow uploading recording as part of node transition

* feat: allow recordings in tool transitions

* chore: fix tests
This commit is contained in:
Abhishek 2026-04-10 17:53:42 +05:30 committed by GitHub
parent 3f19a16e7f
commit 7c245051d2
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
54 changed files with 3575 additions and 640 deletions

View file

@ -0,0 +1,108 @@
"""dedup recordings to org-scoped unique audio
Revision ID: 3cd3155084a2
Revises: e7254d2c6c18
Create Date: 2026-04-10 12:00:00.000000
"""
from typing import Sequence, Union
import sqlalchemy as sa
from alembic import op
# revision identifiers, used by Alembic.
revision: str = "3cd3155084a2"
down_revision: Union[str, None] = "e7254d2c6c18"
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def upgrade() -> None:
conn = op.get_bind()
# 1. Identify duplicate groups: same (org, transcript, tts config).
# Within each group the earliest row (by created_at) is canonical;
# every other row is an alias that will be remapped and soft-deleted.
rows = conn.execute(
sa.text("""
WITH ranked AS (
SELECT
recording_id,
organization_id,
transcript,
tts_provider,
tts_model,
tts_voice_id,
ROW_NUMBER() OVER (
PARTITION BY organization_id, transcript,
tts_provider, tts_model, tts_voice_id
ORDER BY created_at ASC
) AS rn
FROM workflow_recordings
WHERE is_active = true
),
canonical AS (
SELECT recording_id AS canonical_id,
organization_id, transcript,
tts_provider, tts_model, tts_voice_id
FROM ranked
WHERE rn = 1
)
SELECT r.recording_id AS alias_id, c.canonical_id
FROM ranked r
JOIN canonical c
ON r.organization_id = c.organization_id
AND r.transcript = c.transcript
AND r.tts_provider = c.tts_provider
AND r.tts_model = c.tts_model
AND r.tts_voice_id = c.tts_voice_id
WHERE r.rn > 1
""")
).fetchall()
if not rows:
return
# 2. Replace alias recording_ids with canonical ones in workflow JSON.
# Both draft definitions (workflows.workflow_definition) and published
# versions (workflow_definitions.workflow_json) are updated.
for alias_id, canonical_id in rows:
alias_pattern = f"RECORDING_ID: {alias_id}"
canonical_pattern = f"RECORDING_ID: {canonical_id}"
conn.execute(
sa.text("""
UPDATE workflows
SET workflow_definition =
REPLACE(workflow_definition::text, :alias, :canonical)::json
WHERE workflow_definition::text LIKE '%%' || :alias || '%%'
"""),
{"alias": alias_pattern, "canonical": canonical_pattern},
)
conn.execute(
sa.text("""
UPDATE workflow_definitions
SET workflow_json =
REPLACE(workflow_json::text, :alias, :canonical)::json
WHERE workflow_json::text LIKE '%%' || :alias || '%%'
"""),
{"alias": alias_pattern, "canonical": canonical_pattern},
)
# 3. Soft-delete every alias row.
alias_ids = [r[0] for r in rows]
conn.execute(
sa.text("""
UPDATE workflow_recordings
SET is_active = false
WHERE recording_id = ANY(:ids)
AND is_active = true
"""),
{"ids": alias_ids},
)
def downgrade() -> None:
# Deduplication is a one-way data migration. The soft-deleted rows
# still exist in the table; a manual restore is possible if needed.
pass

View file

@ -0,0 +1,109 @@
"""make recordings org-scoped instead of workflow-scoped
Revision ID: 67a5cf3e09d0
Revises: 3cd3155084a2
Create Date: 2026-04-09 17:03:38.302041
"""
from typing import Sequence, Union
import sqlalchemy as sa
from alembic import op
# revision identifiers, used by Alembic.
revision: str = "67a5cf3e09d0"
down_revision: Union[str, None] = "3cd3155084a2"
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def upgrade() -> None:
# 1. Widen recording_id from 16 to 64 chars for descriptive names
op.alter_column(
"workflow_recordings",
"recording_id",
existing_type=sa.VARCHAR(length=16),
type_=sa.String(length=64),
existing_nullable=False,
)
# 2. Make workflow_id nullable — recordings are now org-scoped
op.alter_column(
"workflow_recordings",
"workflow_id",
existing_type=sa.Integer(),
nullable=True,
)
# 3. Drop the old globally-unique index on recording_id
op.drop_index(
"ix_workflow_recordings_recording_id",
table_name="workflow_recordings",
)
# 4. Re-create as non-unique index (for lookups)
op.create_index(
"ix_workflow_recordings_recording_id",
"workflow_recordings",
["recording_id"],
unique=False,
)
# 5. Add unique constraint (recording_id, organization_id)
op.create_unique_constraint(
"uq_workflow_recordings_recording_id_org",
"workflow_recordings",
["recording_id", "organization_id"],
)
# 6. Drop the workflow+TTS scope index (no longer relevant)
op.drop_index(
"ix_workflow_recordings_tts_scope",
table_name="workflow_recordings",
)
def downgrade() -> None:
# Re-create the TTS scope index
op.create_index(
"ix_workflow_recordings_tts_scope",
"workflow_recordings",
["workflow_id", "tts_provider", "tts_model", "tts_voice_id"],
)
# Drop the org-scoped unique constraint
op.drop_constraint(
"uq_workflow_recordings_recording_id_org",
"workflow_recordings",
type_="unique",
)
# Drop non-unique index and re-create as unique
op.drop_index(
"ix_workflow_recordings_recording_id",
table_name="workflow_recordings",
)
op.create_index(
"ix_workflow_recordings_recording_id",
"workflow_recordings",
["recording_id"],
unique=True,
)
# Make workflow_id NOT NULL again
op.alter_column(
"workflow_recordings",
"workflow_id",
existing_type=sa.Integer(),
nullable=False,
)
# Revert recording_id width
op.alter_column(
"workflow_recordings",
"recording_id",
existing_type=sa.String(length=64),
type_=sa.VARCHAR(length=16),
existing_nullable=False,
)

View file

@ -0,0 +1,42 @@
"""make tts columns nullable on workflow_recordings
Revision ID: a1b2c3d4e5f6
Revises: 67a5cf3e09d0
Create Date: 2026-04-10 12:00:00.000000
"""
from typing import Sequence, Union
import sqlalchemy as sa
from alembic import op
# revision identifiers, used by Alembic.
revision: str = "a1b2c3d4e5f6"
down_revision: str = "67a5cf3e09d0"
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def upgrade() -> None:
op.alter_column(
"workflow_recordings", "tts_provider", existing_type=sa.String(), nullable=True
)
op.alter_column(
"workflow_recordings", "tts_model", existing_type=sa.String(), nullable=True
)
op.alter_column(
"workflow_recordings", "tts_voice_id", existing_type=sa.String(), nullable=True
)
def downgrade() -> None:
op.alter_column(
"workflow_recordings", "tts_voice_id", existing_type=sa.String(), nullable=False
)
op.alter_column(
"workflow_recordings", "tts_model", existing_type=sa.String(), nullable=False
)
op.alter_column(
"workflow_recordings", "tts_provider", existing_type=sa.String(), nullable=False
)

View file

@ -1005,7 +1005,7 @@ class KnowledgeBaseDocumentModel(Base):
class WorkflowRecordingModel(Base):
"""Model for storing audio recordings scoped to a workflow and TTS configuration.
"""Model for storing audio recordings scoped to an organization.
Recordings are used in hybrid prompts where parts of the output are pre-recorded
audio rather than dynamically generated TTS.
@ -1015,21 +1015,21 @@ class WorkflowRecordingModel(Base):
id = Column(Integer, primary_key=True, index=True)
# Short globally unique ID (e.g. "xbhfha3k") used in prompts
recording_id = Column(String(16), unique=True, nullable=False, index=True)
# Descriptive ID used in prompts (unique per organization)
recording_id = Column(String(64), nullable=False, index=True)
# Scoping
workflow_id = Column(
Integer, ForeignKey("workflows.id", ondelete="CASCADE"), nullable=False
Integer, ForeignKey("workflows.id", ondelete="CASCADE"), nullable=True
)
organization_id = Column(
Integer, ForeignKey("organizations.id", ondelete="CASCADE"), nullable=False
)
# TTS configuration scope
tts_provider = Column(String, nullable=False)
tts_model = Column(String, nullable=False)
tts_voice_id = Column(String, nullable=False)
# TTS configuration metadata (optional, legacy)
tts_provider = Column(String, nullable=True)
tts_model = Column(String, nullable=True)
tts_voice_id = Column(String, nullable=True)
# Content
transcript = Column(Text, nullable=False)
@ -1062,16 +1062,14 @@ class WorkflowRecordingModel(Base):
# Indexes
__table_args__ = (
UniqueConstraint(
"recording_id",
"organization_id",
name="uq_workflow_recordings_recording_id_org",
),
Index("ix_workflow_recordings_workflow_id", "workflow_id"),
Index("ix_workflow_recordings_org_id", "organization_id"),
Index("ix_workflow_recordings_recording_id", "recording_id"),
Index(
"ix_workflow_recordings_tts_scope",
"workflow_id",
"tts_provider",
"tts_model",
"tts_voice_id",
),
)

View file

@ -5,7 +5,7 @@ import string
from typing import List, Optional
from loguru import logger
from sqlalchemy import func, select
from sqlalchemy import func, select, text
from api.db.base_client import BaseDBClient
from api.db.models import WorkflowRecordingModel
@ -23,30 +23,30 @@ class WorkflowRecordingClient(BaseDBClient):
async def create_recording(
self,
recording_id: str,
workflow_id: int,
organization_id: int,
tts_provider: str,
tts_model: str,
tts_voice_id: str,
transcript: str,
storage_key: str,
storage_backend: str,
created_by: int,
workflow_id: Optional[int] = None,
tts_provider: Optional[str] = None,
tts_model: Optional[str] = None,
tts_voice_id: Optional[str] = None,
metadata: Optional[dict] = None,
) -> WorkflowRecordingModel:
"""Create a new workflow recording record.
Args:
recording_id: Short unique recording identifier
workflow_id: ID of the workflow
organization_id: ID of the organization
tts_provider: TTS provider name
tts_model: TTS model name
tts_voice_id: TTS voice identifier
transcript: User-provided transcript
storage_key: S3/MinIO storage key
storage_backend: Storage backend (s3 or minio)
created_by: ID of the user
workflow_id: Optional workflow ID (legacy)
tts_provider: Optional TTS provider name
tts_model: Optional TTS model name
tts_voice_id: Optional TTS voice identifier
metadata: Optional extra metadata
Returns:
@ -71,25 +71,22 @@ class WorkflowRecordingClient(BaseDBClient):
await session.commit()
await session.refresh(recording)
logger.info(
f"Created recording {recording_id} for workflow {workflow_id}, "
f"org {organization_id}"
)
logger.info(f"Created recording {recording_id} for org {organization_id}")
return recording
async def get_recordings_for_workflow(
async def get_recordings(
self,
workflow_id: int,
organization_id: int,
workflow_id: Optional[int] = None,
tts_provider: Optional[str] = None,
tts_model: Optional[str] = None,
tts_voice_id: Optional[str] = None,
) -> List[WorkflowRecordingModel]:
"""Get recordings for a workflow, optionally filtered by TTS config.
"""Get recordings for an organization, optionally filtered.
Args:
workflow_id: ID of the workflow
organization_id: ID of the organization
workflow_id: Optional workflow ID filter
tts_provider: Optional TTS provider filter
tts_model: Optional TTS model filter
tts_voice_id: Optional TTS voice ID filter
@ -99,11 +96,12 @@ class WorkflowRecordingClient(BaseDBClient):
"""
async with self.async_session() as session:
query = select(WorkflowRecordingModel).where(
WorkflowRecordingModel.workflow_id == workflow_id,
WorkflowRecordingModel.organization_id == organization_id,
WorkflowRecordingModel.is_active == True,
)
if workflow_id is not None:
query = query.where(WorkflowRecordingModel.workflow_id == workflow_id)
if tts_provider:
query = query.where(WorkflowRecordingModel.tts_provider == tts_provider)
if tts_model:
@ -140,15 +138,37 @@ class WorkflowRecordingClient(BaseDBClient):
result = await session.execute(query)
return result.scalar_one_or_none()
async def has_active_recordings(
async def get_recording_by_id(
self,
workflow_id: int,
id: int,
organization_id: int,
) -> bool:
"""Check if a workflow has any active recordings.
) -> Optional[WorkflowRecordingModel]:
"""Get a recording by its integer primary key.
Args:
id: The primary key ID
organization_id: ID of the organization
Returns:
WorkflowRecordingModel if found, None otherwise
"""
async with self.async_session() as session:
query = select(WorkflowRecordingModel).where(
WorkflowRecordingModel.id == id,
WorkflowRecordingModel.organization_id == organization_id,
WorkflowRecordingModel.is_active == True,
)
result = await session.execute(query)
return result.scalar_one_or_none()
async def has_active_recordings(
self,
organization_id: int,
) -> bool:
"""Check if an organization has any active recordings.
Args:
workflow_id: ID of the workflow
organization_id: ID of the organization
Returns:
@ -159,7 +179,6 @@ class WorkflowRecordingClient(BaseDBClient):
select(func.count())
.select_from(WorkflowRecordingModel)
.where(
WorkflowRecordingModel.workflow_id == workflow_id,
WorkflowRecordingModel.organization_id == organization_id,
WorkflowRecordingModel.is_active == True,
)
@ -167,11 +186,14 @@ class WorkflowRecordingClient(BaseDBClient):
result = await session.execute(query)
return result.scalar_one() > 0
async def check_recording_id_exists(self, recording_id: str) -> bool:
"""Check if a recording ID already exists globally.
async def check_recording_id_exists(
self, recording_id: str, organization_id: int
) -> bool:
"""Check if a recording ID already exists within an organization.
Args:
recording_id: The short recording ID to check
recording_id: The recording ID to check
organization_id: ID of the organization
Returns:
True if exists, False otherwise
@ -179,10 +201,125 @@ class WorkflowRecordingClient(BaseDBClient):
async with self.async_session() as session:
query = select(WorkflowRecordingModel.id).where(
WorkflowRecordingModel.recording_id == recording_id,
WorkflowRecordingModel.organization_id == organization_id,
WorkflowRecordingModel.is_active == True,
)
result = await session.execute(query)
return result.scalar_one_or_none() is not None
async def update_recording_id(
self,
id: int,
new_recording_id: str,
organization_id: int,
) -> Optional[WorkflowRecordingModel]:
"""Update the recording_id of a recording.
Args:
id: Primary key ID of the recording
new_recording_id: New recording ID
organization_id: ID of the organization
Returns:
Updated WorkflowRecordingModel if found, None otherwise
"""
async with self.async_session() as session:
query = select(WorkflowRecordingModel).where(
WorkflowRecordingModel.id == id,
WorkflowRecordingModel.organization_id == organization_id,
WorkflowRecordingModel.is_active == True,
)
result = await session.execute(query)
recording = result.scalar_one_or_none()
if not recording:
return None
old_id = recording.recording_id
recording.recording_id = new_recording_id
await session.commit()
await session.refresh(recording)
logger.info(
f"Updated recording ID {old_id} -> {new_recording_id}, "
f"org {organization_id}"
)
return recording
async def replace_recording_id_in_workflows(
self,
old_id: str,
new_id: str,
organization_id: int,
) -> int:
"""Replace all occurrences of a recording ID in workflow definitions.
Updates both draft definitions (workflows.workflow_definition) and
versioned definitions (workflow_definitions.workflow_json), skipping
workflow_definitions with status 'legacy'.
Args:
old_id: The old recording ID to find
new_id: The new recording ID to replace with
organization_id: ID of the organization (scopes to org workflows)
Returns:
Total number of rows updated across both tables
"""
# Match the exact pattern used in prompts: "RECORDING_ID: <id>"
old_pattern = f"RECORDING_ID: {old_id}"
new_pattern = f"RECORDING_ID: {new_id}"
total = 0
async with self.async_session() as session:
# Update workflows.workflow_definition (draft definitions)
result = await session.execute(
text("""
UPDATE workflows
SET workflow_definition =
REPLACE(workflow_definition::text, :old_pat, :new_pat)::json
WHERE organization_id = :org_id
AND workflow_definition::text LIKE '%%' || :old_pat || '%%'
"""),
{
"old_pat": old_pattern,
"new_pat": new_pattern,
"org_id": organization_id,
},
)
total += result.rowcount
# Update workflow_definitions.workflow_json (versioned definitions)
# Skip legacy definitions
result = await session.execute(
text("""
UPDATE workflow_definitions wd
SET workflow_json =
REPLACE(wd.workflow_json::text, :old_pat, :new_pat)::json
FROM workflows w
WHERE wd.workflow_id = w.id
AND w.organization_id = :org_id
AND wd.status != 'legacy'
AND wd.workflow_json::text LIKE '%%' || :old_pat || '%%'
"""),
{
"old_pat": old_pattern,
"new_pat": new_pattern,
"org_id": organization_id,
},
)
total += result.rowcount
await session.commit()
if total > 0:
logger.info(
f"Replaced recording ID '{old_id}' -> '{new_id}' "
f"in {total} workflow definition(s), org {organization_id}"
)
return total
async def delete_recording(
self,
recording_id: str,

View file

@ -178,6 +178,11 @@ async def initiate_call(
workflow_run_id = request.workflow_run_id
if not workflow_run_id:
# Fetch workflow to merge template context variables (e.g. caller_number,
# called_number set in workflow settings for testing pre-call data fetch)
workflow = await db_client.get_workflow_by_id(request.workflow_id)
template_vars = (workflow.template_context_variables or {}) if workflow else {}
numeric_suffix = int(str(uuid.uuid4()).replace("-", "")[:8], 16) % 100000000
workflow_run_name = f"WR-TEL-OUT-{numeric_suffix:08d}"
workflow_run = await db_client.create_workflow_run(
@ -187,6 +192,7 @@ async def initiate_call(
user_id=user.id,
call_type=CallType.OUTBOUND,
initial_context={
**template_vars,
"phone_number": phone_number,
"called_number": phone_number,
"provider": provider.PROVIDER_NAME,

View file

@ -45,17 +45,29 @@ class HttpApiConfig(BaseModel):
timeout_ms: Optional[int] = Field(
default=5000, description="Request timeout in milliseconds"
)
customMessage: Optional[str] = Field(
default=None, description="Custom message to play after tool execution"
)
customMessageType: Optional[Literal["text", "audio"]] = Field(
default=None, description="Type of custom message: text or audio"
)
customMessageRecordingId: Optional[str] = Field(
default=None, description="Recording ID for audio custom message"
)
class EndCallConfig(BaseModel):
"""Configuration for End Call tools."""
messageType: Literal["none", "custom"] = Field(
messageType: Literal["none", "custom", "audio"] = Field(
default="none", description="Type of goodbye message"
)
customMessage: Optional[str] = Field(
default=None, description="Custom message to play before ending the call"
)
audioRecordingId: Optional[str] = Field(
default=None, description="Recording ID for audio goodbye message"
)
endCallReason: bool = Field(
default=False,
description="When enabled, LLM must provide a reason for ending the call. "
@ -74,12 +86,15 @@ class TransferCallConfig(BaseModel):
destination: str = Field(
description="Phone number or SIP endpoint to transfer the call to (E.164 format e.g., +1234567890, or SIP endpoint e.g., PJSIP/1234)"
)
messageType: Literal["none", "custom"] = Field(
messageType: Literal["none", "custom", "audio"] = Field(
default="none", description="Type of message to play before transfer"
)
customMessage: Optional[str] = Field(
default=None, description="Custom message to play before transferring the call"
)
audioRecordingId: Optional[str] = Field(
default=None, description="Recording ID for audio message before transfer"
)
timeout: int = Field(
default=30,
ge=5,

View file

@ -16,6 +16,7 @@ from api.schemas.workflow_recording import (
BatchRecordingUploadResponseSchema,
RecordingListResponseSchema,
RecordingResponseSchema,
RecordingUpdateRequestSchema,
RecordingUploadResponseSchema,
)
from api.services.auth.depends import get_user
@ -25,11 +26,11 @@ from api.services.storage import storage_fs
router = APIRouter(prefix="/workflow-recordings", tags=["workflow-recordings"])
async def _generate_unique_recording_id() -> str:
"""Generate a globally unique short recording ID."""
async def _generate_unique_recording_id(organization_id: int) -> str:
"""Generate a unique short recording ID within an organization."""
for _ in range(10):
rid = generate_short_id(8)
exists = await db_client.check_recording_id_exists(rid)
exists = await db_client.check_recording_id_exists(rid, organization_id)
if not exists:
return rid
raise HTTPException(
@ -69,11 +70,13 @@ async def get_upload_urls(
try:
items = []
for fd in request.files:
recording_id = await _generate_unique_recording_id()
recording_id = await _generate_unique_recording_id(
user.selected_organization_id
)
storage_key = (
f"recordings/{user.selected_organization_id}"
f"/{request.workflow_id}/{recording_id}"
f"/{recording_id}"
f"/{fd.filename}"
)
@ -100,7 +103,7 @@ async def get_upload_urls(
logger.info(
f"Generated {len(items)} recording upload URL(s), "
f"workflow {request.workflow_id}, org {user.selected_organization_id}"
f"org {user.selected_organization_id}"
)
return BatchRecordingUploadResponseSchema(items=items)
@ -131,22 +134,20 @@ async def create_recordings(
for rec_req in request.recordings:
recording = await db_client.create_recording(
recording_id=rec_req.recording_id,
workflow_id=rec_req.workflow_id,
organization_id=user.selected_organization_id,
tts_provider=rec_req.tts_provider,
tts_model=rec_req.tts_model,
tts_voice_id=rec_req.tts_voice_id,
transcript=rec_req.transcript,
storage_key=rec_req.storage_key,
storage_backend=backend.value,
created_by=user.id,
tts_provider=rec_req.tts_provider,
tts_model=rec_req.tts_model,
tts_voice_id=rec_req.tts_voice_id,
metadata=rec_req.metadata,
)
results.append(_build_response(recording))
logger.info(
f"Created {len(results)} recording(s) for "
f"workflow {request.recordings[0].workflow_id}"
f"Created {len(results)} recording(s) for org {user.selected_organization_id}"
)
return BatchRecordingCreateResponseSchema(recordings=results)
@ -163,10 +164,12 @@ async def create_recordings(
@router.get(
"/",
response_model=RecordingListResponseSchema,
summary="List recordings for a workflow",
summary="List recordings",
)
async def list_recordings(
workflow_id: Annotated[int, Query(description="Workflow ID")],
workflow_id: Annotated[
Optional[int], Query(description="Filter by workflow ID")
] = None,
tts_provider: Annotated[
Optional[str], Query(description="Filter by TTS provider")
] = None,
@ -178,11 +181,11 @@ async def list_recordings(
] = None,
user=Depends(get_user),
):
"""List recordings for a workflow, optionally filtered by TTS configuration."""
"""List recordings for the organization, optionally filtered."""
try:
recordings = await db_client.get_recordings_for_workflow(
workflow_id=workflow_id,
recordings = await db_client.get_recordings(
organization_id=user.selected_organization_id,
workflow_id=workflow_id,
tts_provider=tts_provider,
tts_model=tts_model,
tts_voice_id=tts_voice_id,
@ -233,6 +236,74 @@ async def delete_recording(
) from exc
@router.patch(
"/{id}",
response_model=RecordingResponseSchema,
summary="Update a recording's Recording ID",
)
async def update_recording(
id: int,
request: RecordingUpdateRequestSchema,
user=Depends(get_user),
):
"""Update the recording_id (descriptive name) of a recording."""
try:
new_id = request.recording_id.strip()
if not new_id:
raise HTTPException(status_code=400, detail="Recording ID cannot be empty")
existing = await db_client.get_recording_by_id(
id, user.selected_organization_id
)
if not existing:
raise HTTPException(status_code=404, detail="Recording not found")
if new_id == existing.recording_id:
return _build_response(existing)
exists = await db_client.check_recording_id_exists(
new_id, user.selected_organization_id
)
if exists:
raise HTTPException(
status_code=409,
detail=f"Recording ID '{new_id}' is already in use",
)
old_id = existing.recording_id
recording = await db_client.update_recording_id(
id=id,
new_recording_id=new_id,
organization_id=user.selected_organization_id,
)
if not recording:
raise HTTPException(status_code=404, detail="Recording not found")
# Replace old recording ID in all non-legacy workflow definitions
updated = await db_client.replace_recording_id_in_workflows(
old_id=old_id,
new_id=new_id,
organization_id=user.selected_organization_id,
)
if updated:
logger.info(
f"Updated {updated} workflow definition(s) with new recording ID "
f"'{old_id}' -> '{new_id}'"
)
return _build_response(recording)
except HTTPException:
raise
except Exception as exc:
logger.error(f"Error updating recording: {exc}")
raise HTTPException(
status_code=500, detail="Failed to update recording"
) from exc
@router.post(
"/transcribe",
summary="Transcribe an audio file",

View file

@ -32,7 +32,6 @@ class FileDescriptor(BaseModel):
class BatchRecordingUploadRequestSchema(BaseModel):
"""Request schema for getting presigned upload URLs for one or more files."""
workflow_id: int = Field(..., description="Workflow ID these recordings belong to")
files: List[FileDescriptor] = Field(
..., min_length=1, max_length=20, description="List of files to upload"
)
@ -50,10 +49,13 @@ class RecordingCreateRequestSchema(BaseModel):
"""Request schema for creating a recording record after upload."""
recording_id: str = Field(..., description="Short recording ID from upload step")
workflow_id: int = Field(..., description="Workflow ID")
tts_provider: str = Field(..., description="TTS provider (e.g. elevenlabs)")
tts_model: str = Field(..., description="TTS model name")
tts_voice_id: str = Field(..., description="TTS voice identifier")
tts_provider: Optional[str] = Field(
default=None, description="TTS provider (e.g. elevenlabs)"
)
tts_model: Optional[str] = Field(default=None, description="TTS model name")
tts_voice_id: Optional[str] = Field(
default=None, description="TTS voice identifier"
)
transcript: str = Field(
..., description="User-provided transcript of the recording"
)
@ -68,11 +70,11 @@ class RecordingResponseSchema(BaseModel):
id: int
recording_id: str
workflow_id: int
workflow_id: Optional[int] = None
organization_id: int
tts_provider: str
tts_model: str
tts_voice_id: str
tts_provider: Optional[str] = None
tts_model: Optional[str] = None
tts_voice_id: Optional[str] = None
transcript: str
storage_key: str
storage_backend: str
@ -98,6 +100,18 @@ class BatchRecordingCreateResponseSchema(BaseModel):
)
class RecordingUpdateRequestSchema(BaseModel):
"""Request schema for updating a recording's ID."""
recording_id: str = Field(
...,
min_length=1,
max_length=64,
pattern=r"^[a-zA-Z0-9_-]+$",
description="New descriptive recording ID (letters, numbers, hyphens, underscores only)",
)
class RecordingListResponseSchema(BaseModel):
"""Response schema for list of recordings."""

View file

@ -200,7 +200,6 @@ class CampaignCallDispatcher:
# Merge context variables (queued_run context already includes retry info if applicable)
initial_context = {
**workflow.template_context_variables,
**queued_run.context_variables,
"campaign_id": campaign.id,
"provider": provider.PROVIDER_NAME,

View file

@ -0,0 +1,188 @@
"""Utilities for playing audio through the pipeline transport.
Provides one-shot and looping playback of raw PCM audio. All playback
should be routed through ``transport.output().queue_frame`` so the audio
reaches the caller without passing through STT (which would otherwise
generate phantom transcriptions).
"""
import asyncio
import uuid
from typing import Awaitable, Callable, Dict, Optional, Tuple
import numpy as np
from loguru import logger
from pipecat.frames.frames import (
Frame,
OutputAudioRawFrame,
TTSAudioRawFrame,
TTSStartedFrame,
TTSStoppedFrame,
TTSTextFrame,
)
try:
import soundfile as sf
except ModuleNotFoundError as e:
logger.error(f"Exception: {e}")
logger.error("In order to use audio playback, you need to `pip install soundfile`.")
raise Exception(f"Missing module: {e}")
# ---------------------------------------------------------------------------
# Audio file loading / caching
# ---------------------------------------------------------------------------
_audio_cache: Dict[Tuple[str, int], bytes] = {}
def load_audio_file(file_path: str, sample_rate: int) -> Optional[bytes]:
"""Load an audio file as PCM-16 bytes, caching the result.
Args:
file_path: Path to a WAV audio file.
sample_rate: Target sample rate (used as cache key; no resampling
is performed here).
Returns:
Raw PCM-16 bytes, or *None* on failure.
"""
cache_key = (file_path, sample_rate)
if cache_key in _audio_cache:
logger.debug(f"Using cached audio for {file_path} at {sample_rate}Hz")
return _audio_cache[cache_key]
try:
logger.info(f"Loading audio from {file_path} at {sample_rate}Hz")
sound, file_sample_rate = sf.read(file_path, dtype="int16")
logger.info(
f"Audio file loaded - file sample_rate: {file_sample_rate}, target: {sample_rate}"
)
# Ensure mono (take first channel if stereo)
if len(sound.shape) > 1:
sound = sound[:, 0]
if file_sample_rate != sample_rate:
logger.warning(
f"Audio file has sample rate {file_sample_rate}, expected {sample_rate}"
)
audio_bytes = sound.astype(np.int16).tobytes()
_audio_cache[cache_key] = audio_bytes
logger.info(f"Audio loaded: {len(sound)} samples at {sample_rate}Hz")
return audio_bytes
except Exception as e:
logger.error(f"Failed to load audio file {file_path}: {e}")
return None
def clear_audio_cache() -> None:
"""Clear the audio file cache to free memory."""
_audio_cache.clear()
logger.info("Audio cache cleared")
# ---------------------------------------------------------------------------
# Playback helpers
# ---------------------------------------------------------------------------
async def play_audio(
audio_data: bytes,
*,
sample_rate: int,
queue_frame: Callable[[Frame], Awaitable[None]],
transcript: Optional[str] = None,
append_to_context: bool = False,
) -> None:
"""Play raw PCM-16 audio once.
Pushes ``TTSStarted -> TTSAudioRaw -> TTSStopped`` so downstream
processors (audio buffer, context aggregators) handle the audio
correctly.
When *transcript* is provided a ``TTSTextFrame`` is also pushed so
that observers (e.g. ``RealtimeFeedbackObserver``) can relay the
spoken text to the UI.
Args:
audio_data: Raw 16-bit mono PCM bytes.
sample_rate: Pipeline sample rate (e.g. 16000).
queue_frame: Frame sink -- typically ``transport.output().queue_frame``.
transcript: Optional transcript of the recording.
append_to_context: Whether the transcript should be appended to
the LLM assistant context. Defaults to False.
"""
context_id = str(uuid.uuid4())
await queue_frame(TTSStartedFrame(context_id=context_id))
if transcript:
tts_text = TTSTextFrame(
text=transcript, aggregated_by="recording", context_id=context_id
)
tts_text.append_to_context = append_to_context
await queue_frame(tts_text)
await queue_frame(
TTSAudioRawFrame(
audio=audio_data,
sample_rate=sample_rate,
num_channels=1,
context_id=context_id,
)
)
await queue_frame(TTSStoppedFrame(context_id=context_id))
async def play_audio_loop(
*,
stop_event: asyncio.Event,
sample_rate: int,
queue_frame: Callable[[Frame], Awaitable[None]],
audio_file: Optional[str] = None,
) -> None:
"""Play audio in a loop until *stop_event* is set.
Used for hold music during call transfers and ringers during
pre-call data fetches.
Args:
stop_event: Set this event to terminate the loop.
sample_rate: Target sample rate for audio playback.
queue_frame: Frame sink -- typically ``transport.output().queue_frame``.
audio_file: Path to a WAV file. When *None* the default
``transfer_hold_ring_{sample_rate}.wav`` asset is used.
"""
if audio_file is None:
from api.constants import APP_ROOT_DIR
audio_file = str(
APP_ROOT_DIR / "assets" / f"transfer_hold_ring_{sample_rate}.wav"
)
audio_data = load_audio_file(audio_file, sample_rate)
if not audio_data:
logger.warning(f"Audio loop: failed to load {audio_file}, skipping")
return
num_samples = len(audio_data) // 2 # 16-bit PCM = 2 bytes per sample
duration = num_samples / sample_rate
logger.debug(f"Audio loop: playing at {sample_rate}Hz")
try:
while not stop_event.is_set():
frame = OutputAudioRawFrame(
audio=audio_data,
sample_rate=sample_rate,
num_channels=1,
)
await queue_frame(frame)
try:
await asyncio.wait_for(stop_event.wait(), timeout=duration + 1.5)
break
except asyncio.TimeoutError:
pass
except Exception as e:
logger.error(f"Audio loop error: {e}")
logger.debug("Audio loop: stopped")

View file

@ -6,6 +6,7 @@ from api.db import db_client
from api.enums import PostHogEvent, WorkflowRunState
from api.services.campaign.circuit_breaker import circuit_breaker
from api.services.pipecat.audio_config import AudioConfig
from api.services.pipecat.audio_playback import play_audio, play_audio_loop
from api.services.pipecat.in_memory_buffers import (
InMemoryAudioBuffer,
InMemoryLogsBuffer,
@ -16,8 +17,11 @@ from api.services.posthog_client import capture_event
from api.services.workflow.pipecat_engine import PipecatEngine
from api.tasks.arq import enqueue_job
from api.tasks.function_names import FunctionNames
from api.utils.hold_audio import play_hold_audio_loop
from pipecat.frames.frames import Frame, LLMContextFrame, TTSSpeakFrame
from pipecat.frames.frames import (
Frame,
LLMContextFrame,
TTSSpeakFrame,
)
from pipecat.pipeline.task import PipelineTask
from pipecat.processors.audio.audio_buffer_processor import AudioBufferProcessor
from pipecat.utils.enums import EndTaskReason
@ -64,6 +68,7 @@ def register_event_handlers(
pipeline_metrics_aggregator: PipelineMetricsAggregator,
audio_config=AudioConfig,
pre_call_fetch_task: asyncio.Task | None = None,
fetch_recording_audio=None,
user_provider_id: str | None = None,
):
"""Register all event handlers for transport and task events.
@ -123,7 +128,11 @@ def register_event_handlers(
stop_ringer = asyncio.Event()
sample_rate = audio_config.pipeline_sample_rate or 16000
ringer_task = asyncio.create_task(
play_hold_audio_loop(task, stop_ringer, sample_rate)
play_audio_loop(
stop_event=stop_ringer,
sample_rate=sample_rate,
queue_frame=transport.output().queue_frame,
)
)
try:
fetch_result = await pre_call_fetch_task
@ -151,12 +160,35 @@ def register_event_handlers(
# so that render_template() has the complete _call_context_vars.
await engine.set_node(engine.workflow.start_node_id)
greeting = engine.get_start_greeting()
if greeting:
logger.debug(
"Both pipeline_started and client_connected received - playing greeting via TTS"
)
await task.queue_frame(TTSSpeakFrame(greeting))
greeting_info = engine.get_start_greeting()
if greeting_info:
greeting_type, greeting_value = greeting_info
if (
greeting_type == "audio"
and greeting_value
and fetch_recording_audio
):
logger.debug(f"Playing audio greeting recording: {greeting_value}")
result = await fetch_recording_audio(
recording_pk=int(greeting_value)
)
if result:
await play_audio(
result.audio,
sample_rate=audio_config.pipeline_sample_rate or 16000,
queue_frame=transport.output().queue_frame,
transcript=result.transcript,
append_to_context=True,
)
else:
logger.warning(
f"Failed to fetch audio greeting {greeting_value}, "
"falling back to LLM generation"
)
await engine.llm.queue_frame(LLMContextFrame(engine.context))
else:
logger.debug("Playing text greeting via TTS")
await task.queue_frame(TTSSpeakFrame(greeting_value))
else:
logger.debug(
"Both pipeline_started and client_connected received - triggering initial LLM generation"

View file

@ -170,7 +170,10 @@ class RealtimeFeedbackObserver(BaseObserver):
frame_direction = data.direction
# Skip already processed frames (frames can be observed multiple times)
if frame.id in self._frames_seen:
if (
frame.id in self._frames_seen
or frame_direction != FrameDirection.DOWNSTREAM
):
return
self._frames_seen.add(frame.id)

View file

@ -7,7 +7,7 @@ subsequent plays (even from other workers) are instantaneous.
"""
import os
from typing import Awaitable, Callable, Optional
from typing import Awaitable, Callable, NamedTuple, Optional
import numpy as np
from loguru import logger
@ -22,14 +22,24 @@ from .audio_file_cache import (
write_cache_file,
)
class RecordingAudio(NamedTuple):
"""Audio bytes paired with the recording's transcript (when available)."""
audio: bytes
transcript: Optional[str] = None
# ---------------------------------------------------------------------------
# Cache path helper
# ---------------------------------------------------------------------------
def _cache_path(recording_id: str, sample_rate: int) -> str:
def _cache_path(organization_id: int, recording_id: str, sample_rate: int) -> str:
"""Return the on-disk path for a cached PCM file."""
return os.path.join(CACHE_DIR, f"{recording_id}_{sample_rate}.pcm")
return os.path.join(
CACHE_DIR, f"{organization_id}_{recording_id}_{sample_rate}.pcm"
)
# ---------------------------------------------------------------------------
@ -40,54 +50,95 @@ def _cache_path(recording_id: str, sample_rate: int) -> str:
def create_recording_audio_fetcher(
organization_id: int,
pipeline_sample_rate: int,
) -> Callable[[str], Awaitable[Optional[bytes]]]:
"""Create an async callback that returns raw PCM bytes for a recording_id.
) -> Callable[..., Awaitable[Optional[bytes]]]:
"""Create an async callback that returns raw PCM bytes for a recording.
The returned callable:
1. Checks the filesystem cache (keyed by ``recording_id`` + sample rate).
2. On miss, looks up the recording in the DB, downloads the audio file
from S3/MinIO, converts it to 16-bit mono PCM at *pipeline_sample_rate*,
trims leading/trailing silence, caches the result on disk, and returns it.
The returned callable accepts **one** of two keyword arguments:
- ``recording_pk`` the immutable integer primary key (used by
dropdown-based selections: greeting, edges, tool configs).
- ``recording_id`` the human-readable string ID (used by
prompt-based ``RECORDING_ID: xxx`` references).
Flow:
1. Checks the filesystem cache (keyed by org + pk + sample rate).
2. On miss, looks up the recording in the DB, downloads the audio
from S3/MinIO, converts to 16-bit mono PCM, trims silence, and
caches the result on disk.
Args:
organization_id: Organization owning the recordings.
pipeline_sample_rate: Target PCM sample rate for the pipeline.
Returns:
``async (recording_id: str) -> Optional[bytes]``
"""
from api.db import db_client
from api.services.storage import get_storage_for_backend
# Resolve storage instances once per backend at creation time, not per fetch.
_storage_cache: dict[str, object] = {}
_transcript_cache: dict[str, Optional[str]] = {}
def _get_storage(backend: str):
if backend not in _storage_cache:
_storage_cache[backend] = get_storage_for_backend(backend)
return _storage_cache[backend]
async def fetch(recording_id: str) -> Optional[bytes]:
cached = _cache_path(recording_id, pipeline_sample_rate)
async def _lookup_recording(
cache_key: str,
recording_pk: Optional[int],
recording_id: Optional[str],
):
"""DB lookup with transcript caching."""
if recording_pk is not None:
recording = await db_client.get_recording_by_id(
recording_pk, organization_id
)
else:
recording = await db_client.get_recording_by_recording_id(
recording_id, organization_id
)
if recording:
_transcript_cache[cache_key] = recording.transcript or None
return recording
async def fetch(
*,
recording_pk: Optional[int] = None,
recording_id: Optional[str] = None,
) -> Optional[RecordingAudio]:
if recording_pk is None and recording_id is None:
logger.warning("fetch called with neither recording_pk nor recording_id")
return None
# Use pk for cache key when available, otherwise recording_id
cache_key = str(recording_pk) if recording_pk is not None else recording_id
cached = _cache_path(organization_id, cache_key, pipeline_sample_rate)
# 1. Serve from filesystem cache
if os.path.exists(cached):
logger.debug(f"Recording {recording_id} served from disk cache")
return read_cached_file(cached)
logger.debug(f"Recording {cache_key} served from disk cache")
audio = read_cached_file(cached)
# Transcript may already be in memory from a prior fetch;
# if not, do a lightweight DB lookup.
if cache_key not in _transcript_cache:
await _lookup_recording(cache_key, recording_pk, recording_id)
return RecordingAudio(
audio=audio, transcript=_transcript_cache.get(cache_key)
)
# 2. DB lookup
recording = await db_client.get_recording_by_recording_id(
recording_id, organization_id
)
recording = await _lookup_recording(cache_key, recording_pk, recording_id)
if not recording:
logger.warning(f"Recording {recording_id} not found in database")
logger.warning(f"Recording {cache_key} not found in database")
return None
# 3. Download, convert, trim, and cache
pcm_data = await _download_and_convert(
recording, pipeline_sample_rate, _get_storage
)
return pcm_data
if pcm_data is None:
return None
return RecordingAudio(
audio=pcm_data, transcript=_transcript_cache.get(cache_key)
)
return fetch
@ -98,11 +149,10 @@ def create_recording_audio_fetcher(
async def warm_recording_cache(
workflow_id: int,
organization_id: int,
pipeline_sample_rate: int,
) -> None:
"""Pre-fetch all active recordings for a workflow into the disk cache.
"""Pre-fetch all active recordings for an organization into the disk cache.
Launched as a background ``asyncio.Task`` at pipeline startup so that
recordings are ready before the first playback request. Errors are logged
@ -112,9 +162,7 @@ async def warm_recording_cache(
from api.services.storage import get_storage_for_backend
try:
recordings = await db_client.get_recordings_for_workflow(
workflow_id, organization_id
)
recordings = await db_client.get_recordings(organization_id=organization_id)
if not recordings:
return
@ -122,15 +170,20 @@ async def warm_recording_cache(
uncached = [
r
for r in recordings
if not os.path.exists(_cache_path(r.recording_id, pipeline_sample_rate))
if not os.path.exists(
_cache_path(organization_id, str(r.id), pipeline_sample_rate)
)
and not os.path.exists(
_cache_path(organization_id, r.recording_id, pipeline_sample_rate)
)
]
if not uncached:
logger.debug(f"Recording cache already warm for workflow {workflow_id}")
logger.debug(f"Recording cache already warm for org {organization_id}")
return
logger.info(
f"Warming recording cache: {len(uncached)}/{len(recordings)} "
f"recording(s) for workflow {workflow_id}"
f"recording(s) for org {organization_id}"
)
# Resolve storage instances once per backend, not per recording
@ -156,7 +209,7 @@ async def warm_recording_cache(
f"Cache warm: error processing {recording.recording_id}"
)
logger.info(f"Recording cache warm complete for workflow {workflow_id}")
logger.info(f"Recording cache warm complete for org {organization_id}")
except Exception:
logger.exception("Recording cache warm failed")
@ -187,7 +240,11 @@ async def _download_and_convert(
pcm_data = _trim_silence(pcm_data, sample_rate)
# Write to disk cache
cached = _cache_path(recording.recording_id, sample_rate)
cached = _cache_path(
recording.organization_id,
recording.recording_id,
sample_rate,
)
write_cache_file(cached, pcm_data)
return pcm_data

View file

@ -17,6 +17,7 @@ from typing import Awaitable, Callable, Optional
from loguru import logger
from api.services.pipecat.recording_audio_cache import RecordingAudio
from api.services.workflow.pipecat_engine_context_composer import (
RECORDING_MARKER,
TTS_MARKER,
@ -48,14 +49,14 @@ class RecordingRouterProcessor(FrameProcessor):
Args:
audio_sample_rate: Pipeline sample rate for OutputAudioRawFrame.
fetch_recording_audio: Async callback that takes a recording_id and
returns raw 16-bit mono PCM bytes, or None on failure.
returns a RecordingAudio (audio + transcript), or None on failure.
"""
def __init__(
self,
*,
audio_sample_rate: int,
fetch_recording_audio: Callable[[str], Awaitable[Optional[bytes]]],
fetch_recording_audio: Callable[..., Awaitable[Optional[RecordingAudio]]],
**kwargs,
):
super().__init__(**kwargs)
@ -245,8 +246,8 @@ class RecordingRouterProcessor(FrameProcessor):
"""
logger.info(f"Playing pre-recorded audio: {recording_id}")
audio_data = await self._fetch_recording_audio(recording_id)
if not audio_data:
result = await self._fetch_recording_audio(recording_id=recording_id)
if not result:
logger.warning(
f"Failed to fetch recording {recording_id}, no audio will play"
)
@ -256,7 +257,7 @@ class RecordingRouterProcessor(FrameProcessor):
await self.push_frame(TTSStartedFrame(context_id=context_id))
await self.push_frame(
TTSAudioRawFrame(
audio=audio_data,
audio=result.audio,
sample_rate=self._audio_sample_rate,
num_channels=1,
context_id=context_id,
@ -264,10 +265,10 @@ class RecordingRouterProcessor(FrameProcessor):
)
await self.push_frame(TTSStoppedFrame(context_id=context_id))
duration_secs = len(audio_data) / (self._audio_sample_rate * 2)
duration_secs = len(result.audio) / (self._audio_sample_rate * 2)
logger.debug(
f"Finished pushing recording {recording_id} "
f"({len(audio_data)} bytes, {duration_secs:.1f}s)"
f"({len(result.audio)} bytes, {duration_secs:.1f}s)"
)
# ------------------------------------------------------------------

View file

@ -698,9 +698,7 @@ async def _run_pipeline(
# Check if the workflow has any active recordings so the engine can
# include recording response mode instructions in all node prompts.
has_recordings = await db_client.has_active_recordings(
workflow_id, workflow.organization_id
)
has_recordings = await db_client.has_active_recordings(workflow.organization_id)
context_compaction_enabled = (workflow.workflow_configurations or {}).get(
"context_compaction_enabled", False
@ -831,6 +829,14 @@ async def _run_pipeline(
voicemail_detector = None
recording_router = None
# Create recording audio fetcher (used by recording router, audio greetings,
# and audio transition speech)
fetch_audio = create_recording_audio_fetcher(
organization_id=workflow.organization_id,
pipeline_sample_rate=audio_config.pipeline_sample_rate,
)
engine.set_fetch_recording_audio(fetch_audio)
if not is_realtime:
# Create voicemail detector if enabled in workflow configurations
voicemail_config = (workflow.workflow_configurations or {}).get(
@ -871,10 +877,6 @@ async def _run_pipeline(
# Create recording router if workflow has active recordings
if has_recordings:
fetch_audio = create_recording_audio_fetcher(
organization_id=workflow.organization_id,
pipeline_sample_rate=audio_config.pipeline_sample_rate,
)
recording_router = RecordingRouterProcessor(
audio_sample_rate=audio_config.pipeline_sample_rate,
fetch_recording_audio=fetch_audio,
@ -883,7 +885,6 @@ async def _run_pipeline(
# before the first playback request.
asyncio.create_task(
warm_recording_cache(
workflow_id=workflow_id,
organization_id=workflow.organization_id,
pipeline_sample_rate=audio_config.pipeline_sample_rate,
)
@ -918,8 +919,9 @@ async def _run_pipeline(
# Create pipeline task with audio configuration
task = create_pipeline_task(pipeline, workflow_run_id, audio_config)
# Now set the task on the engine
# Now set the task and transport output on the engine
engine.set_task(task)
engine.set_transport_output(transport.output())
# Initialize the engine to set the initial context with
# System Prompt and Tools
@ -979,6 +981,7 @@ async def _run_pipeline(
pipeline_metrics_aggregator=pipeline_metrics_aggregator,
audio_config=audio_config,
pre_call_fetch_task=pre_call_fetch_task,
fetch_recording_audio=fetch_audio,
user_provider_id=user_provider_id,
)

View file

@ -230,7 +230,7 @@ def create_tts_service(user_config, audio_config: "AudioConfig"):
api_key=user_config.tts.api_key,
settings=DeepgramTTSSettings(voice=user_config.tts.voice),
text_filters=[xml_function_tag_filter],
skip_aggregator_types=["recording_router"],
skip_aggregator_types=["recording_router", "recording"],
silence_time_s=1.0,
)
elif user_config.tts.provider == ServiceProviders.OPENAI.value:
@ -238,7 +238,7 @@ def create_tts_service(user_config, audio_config: "AudioConfig"):
api_key=user_config.tts.api_key,
settings=OpenAITTSSettings(model=user_config.tts.model),
text_filters=[xml_function_tag_filter],
skip_aggregator_types=["recording_router"],
skip_aggregator_types=["recording_router", "recording"],
silence_time_s=1.0,
)
elif user_config.tts.provider == ServiceProviders.ELEVENLABS.value:
@ -258,7 +258,7 @@ def create_tts_service(user_config, audio_config: "AudioConfig"):
similarity_boost=0.75,
),
text_filters=[xml_function_tag_filter],
skip_aggregator_types=["recording_router"],
skip_aggregator_types=["recording_router", "recording"],
silence_time_s=1.0,
)
elif user_config.tts.provider == ServiceProviders.CARTESIA.value:
@ -284,7 +284,7 @@ def create_tts_service(user_config, audio_config: "AudioConfig"):
),
),
text_filters=[xml_function_tag_filter],
skip_aggregator_types=["recording_router"],
skip_aggregator_types=["recording_router", "recording"],
silence_time_s=1.0,
)
elif user_config.tts.provider == ServiceProviders.DOGRAH.value:
@ -299,7 +299,7 @@ def create_tts_service(user_config, audio_config: "AudioConfig"):
speed=user_config.tts.speed,
),
text_filters=[xml_function_tag_filter],
skip_aggregator_types=["recording_router"],
skip_aggregator_types=["recording_router", "recording"],
silence_time_s=1.0,
)
elif user_config.tts.provider == ServiceProviders.CAMB.value:
@ -312,7 +312,7 @@ def create_tts_service(user_config, audio_config: "AudioConfig"):
voice_id=voice_id,
model=user_config.tts.model,
text_filters=[xml_function_tag_filter],
skip_aggregator_types=["recording_router"],
skip_aggregator_types=["recording_router", "recording"],
)
# Set language directly as BCP-47 code (bypasses Language enum conversion)
tts._settings.language = language
@ -327,7 +327,7 @@ def create_tts_service(user_config, audio_config: "AudioConfig"):
speed=user_config.tts.speed,
),
text_filters=[xml_function_tag_filter],
skip_aggregator_types=["recording_router"],
skip_aggregator_types=["recording_router", "recording"],
silence_time_s=1.0,
)
elif user_config.tts.provider == ServiceProviders.RIME.value:
@ -352,7 +352,7 @@ def create_tts_service(user_config, audio_config: "AudioConfig"):
api_key=user_config.tts.api_key,
settings=RimeTTSSettings(**settings_kwargs),
text_filters=[xml_function_tag_filter],
skip_aggregator_types=["recording_router"],
skip_aggregator_types=["recording_router", "recording"],
silence_time_s=1.0,
)
elif user_config.tts.provider == ServiceProviders.SARVAM.value:
@ -382,7 +382,7 @@ def create_tts_service(user_config, audio_config: "AudioConfig"):
language=pipecat_language,
),
text_filters=[xml_function_tag_filter],
skip_aggregator_types=["recording_router"],
skip_aggregator_types=["recording_router", "recording"],
silence_time_s=1.0,
)
else:

View file

@ -54,6 +54,8 @@ class NodeDataDTO(BaseModel):
extraction_variables: Optional[list[ExtractionVariableDTO]] = None
add_global_prompt: bool = True
greeting: Optional[str] = None
greeting_type: Optional[str] = None # 'text' or 'audio'
greeting_recording_id: Optional[str] = None
wait_for_user_response: bool = False
wait_for_user_response_timeout: Optional[float] = None
detect_voicemail: bool = False
@ -102,6 +104,8 @@ class EdgeDataDTO(BaseModel):
label: str = Field(..., min_length=1)
condition: str = Field(..., min_length=1)
transition_speech: Optional[str] = None
transition_speech_type: Optional[str] = None # 'text' or 'audio'
transition_speech_recording_id: Optional[str] = None
class RFEdgeDTO(BaseModel):

View file

@ -1,14 +1,12 @@
"""Service for duplicating workflows including recordings."""
"""Service for duplicating workflows."""
import copy
import json
import posixpath
import uuid
from loguru import logger
from api.db import db_client
from api.db.workflow_recording_client import generate_short_id
from api.enums import StorageBackend
from api.services.storage import get_storage_for_backend, storage_fs
@ -41,22 +39,14 @@ def _regenerate_trigger_uuids(workflow_definition: dict) -> dict:
return updated_definition
async def _generate_unique_recording_id() -> str:
"""Generate a globally unique short recording ID."""
for _ in range(10):
rid = generate_short_id(8)
exists = await db_client.check_recording_id_exists(rid)
if not exists:
return rid
raise RuntimeError("Failed to generate unique recording ID")
async def duplicate_workflow(
workflow_id: int,
organization_id: int,
user_id: int,
):
"""Duplicate a workflow including its definition, config, recordings, and triggers.
"""Duplicate a workflow including its definition, config, and triggers.
Recordings are org-scoped and shared, so they are not duplicated.
Args:
workflow_id: The source workflow ID to duplicate
@ -130,29 +120,7 @@ async def duplicate_workflow(
organization_id=organization_id,
)
# 6. Copy recordings with new IDs and storage paths scoped to new workflow
recording_id_map = await _duplicate_recordings(
source_workflow_id=workflow_id,
new_workflow_id=new_workflow.id,
organization_id=organization_id,
user_id=user_id,
)
# 7. Replace old recording IDs with new ones in the workflow definition
if recording_id_map:
workflow_definition = _replace_recording_ids(
workflow_definition, recording_id_map
)
new_workflow = await db_client.update_workflow(
workflow_id=new_workflow.id,
name=None,
workflow_definition=workflow_definition,
template_context_variables=None,
workflow_configurations=None,
organization_id=organization_id,
)
# 8. Sync triggers for the new workflow
# 6. Sync triggers for the new workflow
if workflow_definition:
trigger_paths = _extract_trigger_paths(workflow_definition)
if trigger_paths:
@ -165,94 +133,6 @@ async def duplicate_workflow(
return new_workflow
async def _duplicate_recordings(
source_workflow_id: int,
new_workflow_id: int,
organization_id: int,
user_id: int,
) -> dict[str, str]:
"""Duplicate all recordings for a workflow.
Copies each recording file to a new storage path scoped under the new
workflow ID, and creates new DB records pointing to the copied files.
Returns:
Mapping of old_recording_id -> new_recording_id
"""
recordings = await db_client.get_recordings_for_workflow(
workflow_id=source_workflow_id,
organization_id=organization_id,
)
if not recordings:
return {}
recording_id_map: dict[str, str] = {}
for rec in recordings:
try:
new_recording_id = await _generate_unique_recording_id()
# Build new storage key: recordings/{org_id}/{new_workflow_id}/{new_recording_id}/{filename}
filename = posixpath.basename(rec.storage_key)
new_storage_key = (
f"recordings/{organization_id}"
f"/{new_workflow_id}/{new_recording_id}"
f"/{filename}"
)
copied = await _copy_storage_object(
rec.storage_key, new_storage_key, rec.storage_backend
)
if not copied:
logger.warning(
f"Failed to copy recording file {rec.recording_id}, skipping"
)
continue
await db_client.create_recording(
recording_id=new_recording_id,
workflow_id=new_workflow_id,
organization_id=organization_id,
tts_provider=rec.tts_provider,
tts_model=rec.tts_model,
tts_voice_id=rec.tts_voice_id,
transcript=rec.transcript,
storage_key=new_storage_key,
storage_backend=rec.storage_backend,
created_by=user_id,
metadata=copy.deepcopy(rec.recording_metadata),
)
recording_id_map[rec.recording_id] = new_recording_id
logger.info(
f"Duplicated recording {rec.recording_id} -> {new_recording_id}"
)
except Exception as e:
logger.error(f"Error duplicating recording {rec.recording_id}: {e}")
continue
return recording_id_map
def _replace_recording_ids(
workflow_definition: dict,
recording_id_map: dict[str, str],
) -> dict:
"""Replace old recording IDs with new ones throughout the workflow definition.
Uses JSON serialization to do a thorough find-and-replace across all
nested fields (node prompts, data, etc.).
"""
definition_str = json.dumps(workflow_definition)
for old_id, new_id in recording_id_map.items():
definition_str = definition_str.replace(old_id, new_id)
return json.loads(definition_str)
async def _copy_storage_object(
source_key: str, dest_key: str, storage_backend: str
) -> bool:

View file

@ -1,5 +1,6 @@
from typing import TYPE_CHECKING, Awaitable, Callable, Optional, Union
from api.services.pipecat.audio_playback import play_audio
from api.services.workflow.disposition_mapper import (
apply_disposition_mapping,
get_organization_id_from_workflow_run,
@ -114,6 +115,13 @@ class PipecatEngine:
# Audio configuration (set via set_audio_config from _run_pipeline)
self._audio_config = None
# Transport output processor for injecting audio directly into the
# output, bypassing STT (set via set_transport_output from _run_pipeline)
self._transport_output = None
# Recording audio fetcher (set via set_fetch_recording_audio from _run_pipeline)
self._fetch_recording_audio = None
# True when the workflow has active recordings; enables recording
# response mode instructions on all nodes for in-context learning.
self._has_recordings: bool = has_recordings
@ -191,6 +199,8 @@ class PipecatEngine:
name: str,
transition_to_node: str,
transition_speech: Optional[str] = None,
transition_speech_type: Optional[str] = None,
transition_speech_recording_id: Optional[str] = None,
):
async def transition_func(function_call_params: FunctionCallParams) -> None:
"""Inner function that handles the node change tool calls"""
@ -204,8 +214,34 @@ class PipecatEngine:
# Perform variable extraction before transitioning to new node
await self._perform_variable_extraction_if_needed(self._current_node)
# Queue transition speech before switching nodes
if transition_speech:
# Queue transition speech/audio before switching nodes
speech_type = transition_speech_type or "text"
if (
speech_type == "audio"
and transition_speech_recording_id
and self._fetch_recording_audio
):
logger.info(
f"Playing transition audio: {transition_speech_recording_id}"
)
self._queued_speech_mute_state = "waiting"
result = await self._fetch_recording_audio(
recording_pk=int(transition_speech_recording_id)
)
if result:
await play_audio(
result.audio,
sample_rate=self._audio_config.pipeline_sample_rate
if self._audio_config
else 16000,
queue_frame=self._transport_output.queue_frame,
transcript=result.transcript,
)
else:
logger.warning(
f"Failed to fetch transition audio {transition_speech_recording_id}"
)
elif transition_speech:
logger.info(f"Playing transition speech: {transition_speech}")
self._queued_speech_mute_state = "waiting"
await self.task.queue_frame(
@ -259,6 +295,8 @@ class PipecatEngine:
name: str,
transition_to_node: str,
transition_speech: Optional[str] = None,
transition_speech_type: Optional[str] = None,
transition_speech_recording_id: Optional[str] = None,
):
logger.debug(
f"Registering function {name} to transition to node {transition_to_node} with LLM"
@ -266,7 +304,11 @@ class PipecatEngine:
# Create transition function
transition_func = await self._create_transition_func(
name, transition_to_node, transition_speech
name,
transition_to_node,
transition_speech,
transition_speech_type,
transition_speech_recording_id,
)
# Register function with LLM
@ -442,6 +484,8 @@ class PipecatEngine:
outgoing_edge.get_function_name(),
outgoing_edge.target,
outgoing_edge.transition_speech,
outgoing_edge.data.transition_speech_type,
outgoing_edge.data.transition_speech_recording_id,
)
# Register custom tool handlers for this node
@ -533,11 +577,27 @@ class PipecatEngine:
# Setup LLM Context with Prompts and Functions
await self._setup_llm_context(node)
def get_start_greeting(self) -> Optional[str]:
"""Return the rendered greeting for the start node, or None if not configured."""
def get_start_greeting(self) -> Optional[tuple[str, Optional[str]]]:
"""Return the greeting info for the start node, or None if not configured.
Returns:
A tuple of (greeting_type, value) where:
- ("text", rendered_text) for text greetings spoken via TTS
- ("audio", recording_id) for pre-recorded audio greetings
Or None if no greeting is configured.
"""
start_node = self.workflow.nodes.get(self.workflow.start_node_id)
if start_node and start_node.greeting:
return self._format_prompt(start_node.greeting)
if not start_node:
return None
greeting_type = start_node.greeting_type or "text"
if greeting_type == "audio" and start_node.greeting_recording_id:
return ("audio", start_node.greeting_recording_id)
if start_node.greeting:
return ("text", self._format_prompt(start_node.greeting))
return None
async def _handle_end_node(self, node: Node) -> None:
@ -698,6 +758,18 @@ class PipecatEngine:
"""Set the audio configuration for the pipeline."""
self._audio_config = audio_config
def set_transport_output(self, transport_output) -> None:
"""Set the transport output processor for direct audio playback.
Audio queued here bypasses STT and the rest of the pipeline,
going straight to the caller.
"""
self._transport_output = transport_output
def set_fetch_recording_audio(self, fetch_fn) -> None:
"""Set the recording audio fetcher callback."""
self._fetch_recording_audio = fetch_fn
def set_mute_pipeline(self, mute: bool) -> None:
"""Set the pipeline mute state.

View file

@ -168,7 +168,6 @@ def create_aggregation_correction_callback(engine: "PipecatEngine"):
reference = engine._current_llm_generation_reference_text
if not reference:
logger.warning("No reference text available for aggregation correction")
return corrupted
# Apply the correction algorithm

View file

@ -16,6 +16,7 @@ from loguru import logger
from api.db import db_client
from api.enums import ToolCategory, WorkflowRunMode
from api.services.pipecat.audio_playback import play_audio, play_audio_loop
from api.services.telephony.call_transfer_manager import get_call_transfer_manager
from api.services.telephony.factory import get_telephony_provider
from api.services.telephony.transfer_event_protocol import TransferContext
@ -27,7 +28,6 @@ from api.services.workflow.tools.custom_tool import (
execute_http_tool,
tool_to_function_schema,
)
from api.utils.hold_audio import play_hold_audio_loop
from pipecat.adapters.schemas.function_schema import FunctionSchema
from pipecat.frames.frames import (
FunctionCallResultProperties,
@ -77,6 +77,45 @@ class CustomToolManager:
self._engine = engine
self._organization_id: Optional[int] = None
async def _play_config_message(
self, config: dict, *, append_to_context: bool = False
) -> bool:
"""Play a message from tool config — text or pre-recorded audio.
Returns True if a message was queued, False otherwise.
"""
message_type = config.get("messageType", "none")
if message_type == "audio":
recording_pk = config.get("audioRecordingId")
if recording_pk and self._engine._fetch_recording_audio:
result = await self._engine._fetch_recording_audio(
recording_pk=int(recording_pk)
)
if result:
await play_audio(
result.audio,
sample_rate=self._engine._audio_config.pipeline_sample_rate
if self._engine._audio_config
else 16000,
queue_frame=self._engine._transport_output.queue_frame,
transcript=result.transcript,
)
return True
else:
logger.warning(f"Failed to fetch recording pk={recording_pk}")
return False
if message_type == "custom":
custom_message = config.get("customMessage", "")
if custom_message:
await self._engine.task.queue_frame(
TTSSpeakFrame(custom_message, append_to_context=append_to_context)
)
return True
return False
async def get_organization_id(self) -> Optional[int]:
"""Get and cache the organization ID from workflow run."""
if self._organization_id is None:
@ -250,9 +289,30 @@ class CustomToolManager:
try:
# Queue custom message before executing the API call
# Queue custom message (text or audio) before executing the API call
config = tool.definition.get("config", {}) if tool.definition else {}
custom_msg_type = config.get("customMessageType", "text")
custom_message = config.get("customMessage", "")
if custom_message:
if custom_msg_type == "audio":
recording_pk = config.get("customMessageRecordingId")
if recording_pk and self._engine._fetch_recording_audio:
logger.info(
f"Playing audio message before HTTP tool: pk={recording_pk}"
)
self._engine._queued_speech_mute_state = "waiting"
result = await self._engine._fetch_recording_audio(
recording_pk=int(recording_pk)
)
if result:
await play_audio(
result.audio,
sample_rate=self._engine._audio_config.pipeline_sample_rate
if self._engine._audio_config
else 16000,
queue_frame=self._engine._transport_output.queue_frame,
transcript=result.transcript,
)
elif custom_message:
logger.info(
f"Playing custom message before HTTP tool: {custom_message}"
)
@ -299,8 +359,6 @@ class CustomToolManager:
try:
# Get the end call configuration
config = tool.definition.get("config", {})
message_type = config.get("messageType", "none")
custom_message = config.get("customMessage", "")
# Handle end call reason if enabled
end_call_reason_enabled = config.get("endCallReason", False)
@ -322,10 +380,8 @@ class CustomToolManager:
properties=properties,
)
if message_type == "custom" and custom_message:
# Queue the custom message to be spoken
logger.info(f"Playing custom goodbye message: {custom_message}")
await self._engine.task.queue_frame(TTSSpeakFrame(custom_message))
played = await self._play_config_message(config)
if played:
# End the call after the message (not immediately)
await self._engine.end_call_with_reason(
EndTaskReason.END_CALL_TOOL_REASON.value,
@ -370,8 +426,6 @@ class CustomToolManager:
# Get the transfer call configuration
config = tool.definition.get("config", {})
destination = config.get("destination", "")
message_type = config.get("messageType", "none")
custom_message = config.get("customMessage", "")
timeout_seconds = config.get(
"timeout", 30
) # Default 30 seconds if not configured
@ -443,10 +497,9 @@ class CustomToolManager:
)
return
if message_type == "custom" and custom_message:
logger.info(f"Playing pre-transfer message: {custom_message}")
played = await self._play_config_message(config)
if played:
self._engine._queued_speech_mute_state = "waiting"
await self._engine.task.queue_frame(TTSSpeakFrame(custom_message))
# Get organization ID for provider configuration
organization_id = await self.get_organization_id()
@ -537,10 +590,10 @@ class CustomToolManager:
# Start hold music as background task
hold_music_task = asyncio.create_task(
play_hold_audio_loop(
self._engine.task,
hold_music_stop_event,
sample_rate,
play_audio_loop(
stop_event=hold_music_stop_event,
sample_rate=sample_rate,
queue_frame=self._engine._transport_output.queue_frame,
)
)

View file

@ -77,6 +77,8 @@ class Node:
self.extraction_variables = data.extraction_variables
self.add_global_prompt = data.add_global_prompt
self.greeting = data.greeting
self.greeting_type = data.greeting_type
self.greeting_recording_id = data.greeting_recording_id
self.detect_voicemail = data.detect_voicemail
self.delayed_start = data.delayed_start
self.delayed_start_duration = data.delayed_start_duration

View file

@ -13,6 +13,7 @@ from typing import Optional
import pytest
from api.services.pipecat.recording_audio_cache import RecordingAudio
from api.services.pipecat.recording_router_processor import (
RecordingRouterProcessor,
)
@ -37,9 +38,9 @@ from pipecat.tests import run_test
FAKE_AUDIO = b"\x00\x01" * 8000 # 1 second of 16-bit mono @ 16 kHz
async def _fake_fetch(recording_id: str) -> Optional[bytes]:
async def _fake_fetch(recording_id: str) -> Optional[RecordingAudio]:
"""Stub that returns fake PCM audio for any recording_id."""
return FAKE_AUDIO
return RecordingAudio(audio=FAKE_AUDIO)
def _make_processor(**kwargs) -> RecordingRouterProcessor:
@ -189,7 +190,7 @@ class TestMixedMarkerSuppression:
async def tracking_fetch(recording_id: str):
fetched_ids.append(recording_id)
return FAKE_AUDIO
return RecordingAudio(audio=FAKE_AUDIO)
processor = _make_processor(fetch=tracking_fetch)

View file

@ -0,0 +1,603 @@
"""Tests for text and audio playback in greetings, transitions, and tool messages.
Verifies that:
- Text mode produces TTSSpeakFrame
- Audio mode produces TTSStartedFrame -> TTSAudioRawFrame -> TTSStoppedFrame
- Covers: start node greetings, edge transition speech, tool config messages
"""
import asyncio
from typing import Any, Dict, List
from unittest.mock import AsyncMock, Mock, patch
import pytest
from api.services.pipecat.recording_audio_cache import RecordingAudio
from api.services.workflow.dto import (
EdgeDataDTO,
NodeDataDTO,
NodeType,
Position,
ReactFlowDTO,
RFEdgeDTO,
RFNodeDTO,
)
from api.services.workflow.pipecat_engine import PipecatEngine
from api.services.workflow.pipecat_engine_custom_tools import CustomToolManager
from api.services.workflow.workflow import WorkflowGraph
from pipecat.frames.frames import (
Frame,
LLMContextFrame,
TTSAudioRawFrame,
TTSSpeakFrame,
TTSStartedFrame,
TTSStoppedFrame,
)
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
from pipecat.processors.aggregators.llm_context import LLMContext
from pipecat.processors.aggregators.llm_response_universal import (
LLMAssistantAggregatorParams,
LLMContextAggregatorPair,
)
from pipecat.tests import MockLLMService, MockTTSService
from pipecat.tests.mock_transport import MockTransport
from pipecat.transports.base_transport import TransportParams
# ─── Constants ──────────────────────────────────────────────────
START_PROMPT = "Start Call System Prompt"
END_PROMPT = "End Call System Prompt"
TEXT_GREETING = "Hello, welcome to our service!"
TEXT_TRANSITION = "Thank you for calling, goodbye!"
AUDIO_GREETING_ID = "rec-greeting-001"
AUDIO_TRANSITION_ID = "101"
FAKE_PCM_AUDIO = b"\x00\x01" * 1000 # Fake 16-bit mono PCM data
# ─── Fixtures ───────────────────────────────────────────────────
@pytest.fixture
def text_workflow() -> WorkflowGraph:
"""Start->End workflow with text greeting and text transition speech."""
dto = ReactFlowDTO(
nodes=[
RFNodeDTO(
id="start",
type=NodeType.startNode,
position=Position(x=0, y=0),
data=NodeDataDTO(
name="Start Call",
prompt=START_PROMPT,
is_start=True,
allow_interrupt=False,
add_global_prompt=False,
greeting=TEXT_GREETING,
greeting_type="text",
extraction_enabled=False,
),
),
RFNodeDTO(
id="end",
type=NodeType.endNode,
position=Position(x=0, y=200),
data=NodeDataDTO(
name="End Call",
prompt=END_PROMPT,
is_end=True,
allow_interrupt=False,
add_global_prompt=False,
extraction_enabled=False,
),
),
],
edges=[
RFEdgeDTO(
id="start-end",
source="start",
target="end",
data=EdgeDataDTO(
label="End Call",
condition="When the user says end the call",
transition_speech=TEXT_TRANSITION,
transition_speech_type="text",
),
),
],
)
return WorkflowGraph(dto)
@pytest.fixture
def audio_workflow() -> WorkflowGraph:
"""Start->End workflow with audio greeting and audio transition speech."""
dto = ReactFlowDTO(
nodes=[
RFNodeDTO(
id="start",
type=NodeType.startNode,
position=Position(x=0, y=0),
data=NodeDataDTO(
name="Start Call",
prompt=START_PROMPT,
is_start=True,
allow_interrupt=False,
add_global_prompt=False,
greeting_type="audio",
greeting_recording_id=AUDIO_GREETING_ID,
extraction_enabled=False,
),
),
RFNodeDTO(
id="end",
type=NodeType.endNode,
position=Position(x=0, y=200),
data=NodeDataDTO(
name="End Call",
prompt=END_PROMPT,
is_end=True,
allow_interrupt=False,
add_global_prompt=False,
extraction_enabled=False,
),
),
],
edges=[
RFEdgeDTO(
id="start-end",
source="start",
target="end",
data=EdgeDataDTO(
label="End Call",
condition="When the user says end the call",
transition_speech_type="audio",
transition_speech_recording_id=AUDIO_TRANSITION_ID,
),
),
],
)
return WorkflowGraph(dto)
# ─── Pipeline Helper ────────────────────────────────────────────
async def run_pipeline_and_capture_frames(
workflow: WorkflowGraph,
functions: List[Dict[str, Any]],
fetch_recording_audio=None,
num_text_steps: int = 1,
) -> tuple[MockLLMService, LLMContext, list[Frame]]:
"""Run a pipeline with mock tool calls and capture frames queued via task.queue_frame.
Returns:
Tuple of (llm, context, list of captured frames).
"""
first_step_chunks = MockLLMService.create_multiple_function_call_chunks(functions)
mock_steps = MockLLMService.create_multi_step_responses(
first_step_chunks, num_text_steps=num_text_steps, step_prefix="Response"
)
llm = MockLLMService(mock_steps=mock_steps, chunk_delay=0.001)
tts = MockTTSService(mock_audio_duration_ms=40, frame_delay=0)
mock_transport = MockTransport(
params=TransportParams(
audio_in_enabled=True,
audio_out_enabled=True,
audio_in_sample_rate=16000,
audio_out_sample_rate=16000,
),
)
context = LLMContext()
assistant_params = LLMAssistantAggregatorParams(expect_stripped_words=True)
context_aggregator = LLMContextAggregatorPair(
context, assistant_params=assistant_params
)
engine = PipecatEngine(
llm=llm,
context=context,
workflow=workflow,
call_context_vars={"customer_name": "Test User"},
workflow_run_id=1,
)
transport_output = mock_transport.output()
if fetch_recording_audio:
engine.set_fetch_recording_audio(fetch_recording_audio)
engine.set_transport_output(transport_output)
pipeline = Pipeline([llm, tts, transport_output, context_aggregator.assistant()])
task = PipelineTask(pipeline, params=PipelineParams(), enable_rtvi=False)
engine.set_task(task)
# Spy on task.queue_frame and transport_output.queue_frame to capture
# all frames queued by the engine (audio transitions go via transport output)
queued_frames: list[Frame] = []
original_queue_frame = task.queue_frame
async def capturing_queue_frame(frame):
queued_frames.append(frame)
await original_queue_frame(frame)
task.queue_frame = capturing_queue_frame
if fetch_recording_audio:
original_transport_queue = transport_output.queue_frame
async def _spy_transport_queue(frame, *args, **kwargs):
queued_frames.append(frame)
await original_transport_queue(frame, *args, **kwargs)
transport_output.queue_frame = _spy_transport_queue
with (
patch(
"api.services.workflow.pipecat_engine.get_organization_id_from_workflow_run",
new_callable=AsyncMock,
return_value=1,
),
patch(
"api.services.workflow.pipecat_engine.apply_disposition_mapping",
new_callable=AsyncMock,
return_value="completed",
),
):
runner = PipelineRunner()
async def run():
await runner.run(task)
async def initialize():
await asyncio.sleep(0.01)
await engine.initialize()
await engine.set_node(engine.workflow.start_node_id)
await engine.llm.queue_frame(LLMContextFrame(engine.context))
await asyncio.gather(run(), initialize())
return llm, context, queued_frames
# ─── Tests: Start Greeting ──────────────────────────────────────
class TestStartGreeting:
"""Unit tests for PipecatEngine.get_start_greeting()."""
def test_text_greeting_returns_text_tuple(self, text_workflow: WorkflowGraph):
"""Text greeting config should return ('text', rendered_text)."""
engine = PipecatEngine(
workflow=text_workflow,
call_context_vars={},
workflow_run_id=1,
)
result = engine.get_start_greeting()
assert result == ("text", TEXT_GREETING)
def test_audio_greeting_returns_audio_tuple(self, audio_workflow: WorkflowGraph):
"""Audio greeting config should return ('audio', recording_id)."""
engine = PipecatEngine(
workflow=audio_workflow,
call_context_vars={},
workflow_run_id=1,
)
result = engine.get_start_greeting()
assert result == ("audio", AUDIO_GREETING_ID)
def test_no_greeting_returns_none(self):
"""No greeting configured should return None."""
dto = ReactFlowDTO(
nodes=[
RFNodeDTO(
id="start",
type=NodeType.startNode,
position=Position(x=0, y=0),
data=NodeDataDTO(
name="Start",
prompt="Prompt",
is_start=True,
add_global_prompt=False,
extraction_enabled=False,
),
),
RFNodeDTO(
id="end",
type=NodeType.endNode,
position=Position(x=0, y=200),
data=NodeDataDTO(
name="End",
prompt="End",
is_end=True,
add_global_prompt=False,
extraction_enabled=False,
),
),
],
edges=[
RFEdgeDTO(
id="e",
source="start",
target="end",
data=EdgeDataDTO(label="End", condition="End"),
),
],
)
engine = PipecatEngine(
workflow=WorkflowGraph(dto),
call_context_vars={},
workflow_run_id=1,
)
assert engine.get_start_greeting() is None
def test_text_greeting_renders_template_variables(self):
"""Text greeting with {{variable}} placeholders should be rendered."""
dto = ReactFlowDTO(
nodes=[
RFNodeDTO(
id="start",
type=NodeType.startNode,
position=Position(x=0, y=0),
data=NodeDataDTO(
name="Start",
prompt="Prompt",
is_start=True,
add_global_prompt=False,
greeting="Hello {{customer_name}}!",
greeting_type="text",
extraction_enabled=False,
),
),
RFNodeDTO(
id="end",
type=NodeType.endNode,
position=Position(x=0, y=200),
data=NodeDataDTO(
name="End",
prompt="End",
is_end=True,
add_global_prompt=False,
extraction_enabled=False,
),
),
],
edges=[
RFEdgeDTO(
id="e",
source="start",
target="end",
data=EdgeDataDTO(label="End", condition="End"),
),
],
)
engine = PipecatEngine(
workflow=WorkflowGraph(dto),
call_context_vars={"customer_name": "Alice"},
workflow_run_id=1,
)
result = engine.get_start_greeting()
assert result == ("text", "Hello Alice!")
# ─── Tests: Transition Speech (Pipeline) ────────────────────────
class TestTransitionSpeech:
"""Pipeline tests for edge transition speech (text and audio)."""
@pytest.mark.asyncio
async def test_text_transition_queues_tts_speak_frame(
self, text_workflow: WorkflowGraph
):
"""Text transition speech should queue a TTSSpeakFrame with the message."""
functions = [
{
"name": "end_call",
"arguments": {},
"tool_call_id": "call_transition",
},
]
llm, context, queued_frames = await run_pipeline_and_capture_frames(
workflow=text_workflow,
functions=functions,
num_text_steps=2,
)
# Pipeline completes: 1st gen on StartNode, 2nd gen on EndNode
assert llm.get_current_step() == 2
# Verify TTSSpeakFrame was queued with the transition speech text
tts_speak_frames = [f for f in queued_frames if isinstance(f, TTSSpeakFrame)]
transition_frames = [f for f in tts_speak_frames if f.text == TEXT_TRANSITION]
assert len(transition_frames) == 1, (
f"Expected one TTSSpeakFrame with text '{TEXT_TRANSITION}', "
f"got: {[f.text for f in tts_speak_frames]}"
)
# No raw audio frames should be queued for text transition
audio_raw = [f for f in queued_frames if isinstance(f, TTSAudioRawFrame)]
assert len(audio_raw) == 0
@pytest.mark.asyncio
async def test_audio_transition_queues_audio_frames(
self, audio_workflow: WorkflowGraph
):
"""Audio transition speech should queue TTSStarted + TTSAudioRaw + TTSStopped."""
functions = [
{
"name": "end_call",
"arguments": {},
"tool_call_id": "call_transition",
},
]
mock_fetch = AsyncMock(return_value=RecordingAudio(audio=FAKE_PCM_AUDIO))
llm, context, queued_frames = await run_pipeline_and_capture_frames(
workflow=audio_workflow,
functions=functions,
fetch_recording_audio=mock_fetch,
num_text_steps=2,
)
# Pipeline completes
assert llm.get_current_step() == 2
# Verify fetch was called with the correct recording ID
mock_fetch.assert_called_once_with(recording_pk=int(AUDIO_TRANSITION_ID))
# Verify the three-frame audio sequence was queued
started = [f for f in queued_frames if isinstance(f, TTSStartedFrame)]
audio = [f for f in queued_frames if isinstance(f, TTSAudioRawFrame)]
stopped = [f for f in queued_frames if isinstance(f, TTSStoppedFrame)]
assert len(started) >= 1, (
f"Expected TTSStartedFrame. "
f"Frame types: {[type(f).__name__ for f in queued_frames]}"
)
assert len(audio) >= 1, "Expected TTSAudioRawFrame"
assert len(stopped) >= 1, "Expected TTSStoppedFrame"
# Verify audio content
assert audio[0].audio == FAKE_PCM_AUDIO
assert audio[0].sample_rate == 16000
assert audio[0].num_channels == 1
# Verify context_id consistency across the three frames
ctx_id = started[0].context_id
assert ctx_id is not None
assert audio[0].context_id == ctx_id
assert stopped[0].context_id == ctx_id
# No TTSSpeakFrame should be queued for audio transition
speak = [f for f in queued_frames if isinstance(f, TTSSpeakFrame)]
assert len(speak) == 0
# ─── Tests: Tool Config Messages ────────────────────────────────
class TestPlayConfigMessage:
"""Unit tests for CustomToolManager._play_config_message."""
@pytest.fixture
def mock_engine(self):
"""Create a mock engine with frame capture on task.queue_frame."""
engine = Mock()
engine._workflow_run_id = 1
engine._call_context_vars = {}
engine._fetch_recording_audio = None
engine._audio_config = None
engine.task = Mock()
engine.llm = Mock()
# Capture frames queued via task.queue_frame
engine._queued_frames = []
async def mock_queue_frame(frame):
engine._queued_frames.append(frame)
engine.task.queue_frame = mock_queue_frame
# Also capture frames queued via transport_output.queue_frame (audio playback)
engine._transport_output = Mock()
engine._transport_output.queue_frame = mock_queue_frame
return engine
@pytest.mark.asyncio
async def test_custom_text_queues_tts_speak_frame(self, mock_engine):
"""messageType='custom' queues TTSSpeakFrame with the message text."""
manager = CustomToolManager(mock_engine)
config = {"messageType": "custom", "customMessage": "Ending your call now."}
result = await manager._play_config_message(config)
assert result is True
frames = mock_engine._queued_frames
assert len(frames) == 1
assert isinstance(frames[0], TTSSpeakFrame)
assert frames[0].text == "Ending your call now."
@pytest.mark.asyncio
async def test_audio_queues_started_raw_stopped_frames(self, mock_engine):
"""messageType='audio' queues TTSStarted + TTSAudioRaw + TTSStopped."""
mock_fetch = AsyncMock(return_value=RecordingAudio(audio=FAKE_PCM_AUDIO))
mock_engine._fetch_recording_audio = mock_fetch
manager = CustomToolManager(mock_engine)
config = {"messageType": "audio", "audioRecordingId": "201"}
result = await manager._play_config_message(config)
assert result is True
mock_fetch.assert_called_once_with(recording_pk=201)
frames = mock_engine._queued_frames
assert len(frames) == 3
assert isinstance(frames[0], TTSStartedFrame)
assert isinstance(frames[1], TTSAudioRawFrame)
assert isinstance(frames[2], TTSStoppedFrame)
# Verify audio content
assert frames[1].audio == FAKE_PCM_AUDIO
assert frames[1].sample_rate == 16000
assert frames[1].num_channels == 1
# Context IDs should match across all three frames
ctx_id = frames[0].context_id
assert ctx_id is not None
assert frames[1].context_id == ctx_id
assert frames[2].context_id == ctx_id
@pytest.mark.asyncio
async def test_none_message_type_returns_false(self, mock_engine):
"""messageType='none' returns False without queuing frames."""
manager = CustomToolManager(mock_engine)
result = await manager._play_config_message({"messageType": "none"})
assert result is False
assert len(mock_engine._queued_frames) == 0
@pytest.mark.asyncio
async def test_audio_without_fetch_callback_returns_false(self, mock_engine):
"""Audio without fetch_recording_audio callback returns False."""
mock_engine._fetch_recording_audio = None
manager = CustomToolManager(mock_engine)
config = {"messageType": "audio", "audioRecordingId": "301"}
result = await manager._play_config_message(config)
assert result is False
assert len(mock_engine._queued_frames) == 0
@pytest.mark.asyncio
async def test_audio_with_failed_fetch_returns_false(self, mock_engine):
"""Audio with fetch returning None returns False."""
mock_fetch = AsyncMock(return_value=None)
mock_engine._fetch_recording_audio = mock_fetch
manager = CustomToolManager(mock_engine)
config = {"messageType": "audio", "audioRecordingId": "301"}
result = await manager._play_config_message(config)
assert result is False
mock_fetch.assert_called_once_with(recording_pk=301)
assert len(mock_engine._queued_frames) == 0
@pytest.mark.asyncio
async def test_custom_empty_message_returns_false(self, mock_engine):
"""messageType='custom' with empty message returns False."""
manager = CustomToolManager(mock_engine)
config = {"messageType": "custom", "customMessage": ""}
result = await manager._play_config_message(config)
assert result is False
assert len(mock_engine._queued_frames) == 0

View file

@ -1,151 +0,0 @@
"""
Hold audio utility for loading, caching, and playing hold music files.
This module provides functionality to load hold music audio files at specific sample rates
with caching to improve performance during multiple calls, and a reusable loop that queues
audio frames until a stop event is set.
"""
import asyncio
from typing import Dict, Optional, Tuple
import numpy as np
from loguru import logger
from pipecat.frames.frames import OutputAudioRawFrame
try:
import soundfile as sf
except ModuleNotFoundError as e:
logger.error(f"Exception: {e}")
logger.error("In order to use hold audio, you need to `pip install soundfile`.")
raise Exception(f"Missing module: {e}")
# Global cache for loaded hold music data
_hold_audio_cache: Dict[Tuple[str, int], np.ndarray] = {}
def load_hold_audio(file_path: str, sample_rate: int) -> Optional[bytes]:
"""Load hold music audio file at the specified sample rate with caching.
Args:
file_path: Path to the hold music audio file
sample_rate: Target sample rate (8000 or 16000 Hz supported)
Returns:
Audio data as bytes (PCM16) or None if loading failed
"""
cache_key = (file_path, sample_rate)
# Check cache first
if cache_key in _hold_audio_cache:
logger.debug(f"Using cached hold audio for {file_path} at {sample_rate}Hz")
audio_data = _hold_audio_cache[cache_key]
return audio_data.tobytes()
try:
logger.info(f"Loading hold audio from {file_path} at {sample_rate}Hz")
# Load audio file
sound, file_sample_rate = sf.read(file_path, dtype="int16")
logger.info(
f"Audio file loaded - file sample_rate: {file_sample_rate}, target: {sample_rate}"
)
# Ensure mono audio (take first channel if stereo)
if len(sound.shape) > 1:
sound = sound[:, 0]
# Resample if needed
if file_sample_rate != sample_rate:
logger.warning(
f"Hold music file has sample rate {file_sample_rate}, expected {sample_rate}"
)
# For now, we'll use the audio as-is and let the transport handle resampling
# In a production system, you might want to use librosa or scipy for proper resampling
# Convert to int16 and cache
audio_data = sound.astype(np.int16)
_hold_audio_cache[cache_key] = audio_data
logger.info(
f"Hold audio loaded successfully: {len(audio_data)} samples at {sample_rate}Hz"
)
return audio_data.tobytes()
except Exception as e:
logger.error(f"Failed to load hold audio file {file_path}: {e}")
return None
def clear_hold_audio_cache():
"""Clear the hold audio cache to free memory."""
global _hold_audio_cache
_hold_audio_cache.clear()
logger.info("Hold audio cache cleared")
def get_cache_info() -> Dict[str, int]:
"""Get information about the current cache state.
Returns:
Dictionary with cache statistics
"""
return {
"cached_files": len(_hold_audio_cache),
"total_cache_size": sum(len(data) for data in _hold_audio_cache.values()),
}
async def play_hold_audio_loop(
task,
stop_event: asyncio.Event,
sample_rate: int = 16000,
hold_music_file: Optional[str] = None,
) -> None:
"""Play hold/ring-back audio in a loop until *stop_event* is set.
This is a shared helper used by call-transfer hold music and the
pre-call data fetch ringer. The caller is responsible for creating
the ``asyncio.Event`` and setting it when playback should stop.
Args:
task: A ``PipelineTask`` (or anything with ``queue_frame``).
stop_event: Set this event to terminate the loop.
sample_rate: Target sample rate for audio playback.
hold_music_file: Path to a WAV file. When *None* the default
``transfer_hold_ring_{sample_rate}.wav`` asset is used.
"""
if hold_music_file is None:
from api.constants import APP_ROOT_DIR
hold_music_file = str(
APP_ROOT_DIR / "assets" / f"transfer_hold_ring_{sample_rate}.wav"
)
hold_audio_data = load_hold_audio(hold_music_file, sample_rate)
if not hold_audio_data:
logger.warning(f"Hold audio loop: failed to load {hold_music_file}, skipping")
return
num_samples = len(hold_audio_data) // 2 # 16-bit PCM = 2 bytes per sample
duration = num_samples / sample_rate
logger.debug(f"Hold audio loop: playing at {sample_rate}Hz")
try:
while not stop_event.is_set():
frame = OutputAudioRawFrame(
audio=hold_audio_data,
sample_rate=sample_rate,
num_channels=1,
)
await task.queue_frame(frame)
try:
await asyncio.wait_for(stop_event.wait(), timeout=duration + 1.5)
break
except asyncio.TimeoutError:
pass
except Exception as e:
logger.error(f"Hold audio loop: error: {e}")
logger.debug("Hold audio loop: stopped")

View file

@ -6,15 +6,6 @@ tag: "NEW"
Custom recordings allow you to build **hybrid voice agents** that use your own pre-recorded audio for key parts of the conversation, while falling back to LLM-generated speech (via a cloned voice) for dynamic responses. This gives you the best of both worlds — the emotional depth of real human speech and the flexibility of AI-generated dialogue.
<iframe
className="w-full aspect-video rounded-xl"
src="https://www.youtube.com/embed/1uZqhG0_cIo"
title="Dograh Twilio Setup"
allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture"
allowFullScreen
></iframe>
## Why use custom recordings?
- **Reduced TTS cost** — Pre-recorded audio is played directly, so you are not charged for TTS synthesis on those segments.
@ -50,23 +41,20 @@ You can use any TTS provider that supports voice cloning. The steps will vary by
## Step 3: Upload recordings
Navigate to your agent in the workflow builder and open the **Recordings** panel. You can either upload pre-recorded audio files or record directly in the browser.
Navigate to the **Recordings** page in the Dograh dashboard. Recordings are shared across all agents in your organization. You can either upload pre-recorded audio files or record directly in the browser.
For each recording:
1. Click **Record** (or upload a file).
2. Speak the exact phrase you want the agent to use.
3. Give the recording a descriptive name (e.g., `greeting`, `invitation`, `venue`).
4. Verify the transcription is correct — edit it if needed.
5. Click **Upload**.
1. Click **Upload Recording**.
2. Choose an audio file or click **Record** to record in the browser.
3. Verify the transcription is correct — edit it if needed.
4. Click **Upload**.
<Warning>
Recordings are scoped to a specific **provider and Voice ID**. If you change either, you will need to re-upload your recordings to ensure consistency between the recorded audio and the cloned voice used for dynamic responses.
</Warning>
You can rename a recording's ID at any time by clicking the edit icon next to it in the recordings list.
## Step 4: Build the workflow
Open your agent's workflow and write the conversation flow in natural language. To insert a recording, type **`@`** in the prompt editor — this will show a list of all available recordings scoped to your current Voice ID.
Open your agent's workflow and write the conversation flow in natural language. To insert a recording, type **`@`** in the prompt editor — this will show a list of all available recordings in your organization.
For any user question that falls outside your recordings, the agent automatically generates a dynamic response using the LLM, which is then synthesized using your cloned voice via TTS.

@ -1 +1 @@
Subproject commit 002c095b2f15a11d4a6ffd85b86592821aa5cd62
Subproject commit 5a2e4c89118264bfdfe91db059b01e226609f060

View file

@ -3,25 +3,19 @@
import { useRef, useState } from 'react';
import { toast } from 'sonner';
import { getPresignedUploadUrlApiV1S3PresignedUploadUrlPost } from '@/client/sdk.gen';
import { Button } from '@/components/ui/button';
import { Label } from '@/components/ui/label';
import logger from '@/lib/logger';
interface CsvUploadSelectorProps {
accessToken: string;
onFileUploaded: (fileKey: string, fileName: string) => void;
selectedFileName?: string;
}
interface PresignedUploadUrlResponse {
upload_url: string;
file_key: string;
expires_in: number;
}
const MAX_FILE_SIZE = 10 * 1024 * 1024; // 10MB
export default function CsvUploadSelector({ accessToken, onFileUploaded, selectedFileName }: CsvUploadSelectorProps) {
export default function CsvUploadSelector({ onFileUploaded, selectedFileName }: CsvUploadSelectorProps) {
const [uploading, setUploading] = useState(false);
const [uploadProgress, setUploadProgress] = useState(0);
const fileInputRef = useRef<HTMLInputElement>(null);
@ -48,25 +42,18 @@ export default function CsvUploadSelector({ accessToken, onFileUploaded, selecte
try {
// Step 1: Request presigned upload URL
logger.info('Requesting presigned upload URL for:', file.name);
const presignedResponse = await fetch('/api/v1/s3/presigned-upload-url', {
method: 'POST',
headers: {
'Authorization': `Bearer ${accessToken}`,
'Content-Type': 'application/json',
},
body: JSON.stringify({
const { data: presignedData, error } = await getPresignedUploadUrlApiV1S3PresignedUploadUrlPost({
body: {
file_name: file.name,
file_size: file.size,
content_type: 'text/csv',
}),
},
});
if (!presignedResponse.ok) {
const error = await presignedResponse.json();
throw new Error(error.detail || 'Failed to get upload URL');
if (error || !presignedData) {
throw new Error('Failed to get upload URL');
}
const presignedData: PresignedUploadUrlResponse = await presignedResponse.json();
logger.info('Received presigned URL, uploading file...');
// Step 2: Upload file directly to S3/MinIO

View file

@ -415,7 +415,6 @@ export default function NewCampaignPage() {
/>
) : (
<CsvUploadSelector
accessToken={userAccessToken}
onFileUploaded={handleFileUploaded}
selectedFileName={selectedFileName}
/>

View file

@ -1,11 +1,18 @@
"use client";
import { ExternalLink } from "lucide-react";
import { ExternalLink, Upload } from "lucide-react";
import { useEffect, useState } from "react";
import { Button } from "@/components/ui/button";
import { Card, CardContent, CardDescription, CardHeader, CardTitle } from "@/components/ui/card";
import {
Dialog,
DialogContent,
DialogDescription,
DialogHeader,
DialogTitle,
} from "@/components/ui/dialog";
import { Skeleton } from "@/components/ui/skeleton";
import { Tabs, TabsContent, TabsList, TabsTrigger } from "@/components/ui/tabs";
import { useAuth } from "@/lib/auth";
import DocumentList from "./DocumentList";
@ -14,6 +21,7 @@ import DocumentUpload from "./DocumentUpload";
export default function FilesPage() {
const { user, redirectToLogin, loading } = useAuth();
const [refreshKey, setRefreshKey] = useState(0);
const [isUploadOpen, setIsUploadOpen] = useState(false);
// Redirect if not authenticated
useEffect(() => {
@ -23,8 +31,8 @@ export default function FilesPage() {
}, [loading, user, redirectToLogin]);
const handleUploadSuccess = () => {
// Trigger refresh of document list
setRefreshKey(prev => prev + 1);
setIsUploadOpen(false);
};
if (loading || !user) {
@ -50,44 +58,37 @@ export default function FilesPage() {
</p>
</div>
<Tabs defaultValue="all" className="space-y-6">
<TabsList>
<TabsTrigger value="all">All Files</TabsTrigger>
<TabsTrigger value="upload">Upload New</TabsTrigger>
</TabsList>
<TabsContent value="all" className="space-y-4">
<Card>
<CardHeader>
<Card>
<CardHeader>
<div className="flex justify-between items-center">
<div>
<CardTitle>Your Documents</CardTitle>
<CardDescription>
View and manage your uploaded documents
Documents shared across all agents in your organization
</CardDescription>
</CardHeader>
<CardContent>
<DocumentList
refreshTrigger={refreshKey}
/>
</CardContent>
</Card>
</TabsContent>
</div>
<Button onClick={() => setIsUploadOpen(true)}>
<Upload className="w-4 h-4 mr-2" />
Upload Document
</Button>
</div>
</CardHeader>
<CardContent>
<DocumentList refreshTrigger={refreshKey} />
</CardContent>
</Card>
<TabsContent value="upload" className="space-y-4">
<Card>
<CardHeader>
<CardTitle>Upload Document</CardTitle>
<CardDescription>
Upload a PDF or document file to add to your knowledge base
</CardDescription>
</CardHeader>
<CardContent>
<DocumentUpload
onUploadSuccess={handleUploadSuccess}
/>
</CardContent>
</Card>
</TabsContent>
</Tabs>
<Dialog open={isUploadOpen} onOpenChange={setIsUploadOpen}>
<DialogContent>
<DialogHeader>
<DialogTitle>Upload Document</DialogTitle>
<DialogDescription>
Upload a PDF or document file to add to your knowledge base
</DialogDescription>
</DialogHeader>
<DocumentUpload onUploadSuccess={handleUploadSuccess} />
</DialogContent>
</Dialog>
</div>
);
}

View file

@ -0,0 +1,323 @@
"use client";
import { AudioLines, Check, Pause, Pencil, Play, RefreshCw, Search, Trash2, X } from "lucide-react";
import { useCallback, useEffect, useState } from "react";
import { toast } from "sonner";
import {
deleteRecordingApiV1WorkflowRecordingsRecordingIdDelete,
listRecordingsApiV1WorkflowRecordingsGet,
updateRecordingApiV1WorkflowRecordingsIdPatch,
} from "@/client/sdk.gen";
import type { RecordingResponseSchema } from "@/client/types.gen";
import { Button } from "@/components/ui/button";
import { Input } from "@/components/ui/input";
import { Skeleton } from "@/components/ui/skeleton";
import { useAudioPlayback } from "@/hooks/useAudioPlayback";
import logger from "@/lib/logger";
export default function RecordingsList({ refreshKey }: { refreshKey?: number }) {
const [recordings, setRecordings] = useState<RecordingResponseSchema[]>([]);
const [isLoading, setIsLoading] = useState(true);
const [searchQuery, setSearchQuery] = useState("");
const [error, setError] = useState<string | null>(null);
// Inline edit state
const [editingId, setEditingId] = useState<string | null>(null);
const [editValue, setEditValue] = useState("");
const [editError, setEditError] = useState<string | null>(null);
const { playingId, toggle: togglePlayback, stop: stopPlayback } = useAudioPlayback();
const fetchRecordings = useCallback(async () => {
try {
setIsLoading(true);
setError(null);
const response = await listRecordingsApiV1WorkflowRecordingsGet({
query: {},
});
if (response.error || !response.data) {
throw new Error("Failed to fetch recordings");
}
setRecordings(response.data.recordings);
} catch (err) {
setError(err instanceof Error ? err.message : "Failed to fetch recordings");
logger.error("Error fetching recordings:", err);
} finally {
setIsLoading(false);
}
}, []);
useEffect(() => {
fetchRecordings();
}, [fetchRecordings, refreshKey]);
const handleDelete = async (recordingId: string) => {
if (!confirm("Are you sure you want to delete this recording?")) return;
try {
const response = await deleteRecordingApiV1WorkflowRecordingsRecordingIdDelete({
path: { recording_id: recordingId },
});
if (response.error) {
throw new Error("Failed to delete recording");
}
toast.success("Recording deleted");
fetchRecordings();
} catch (err) {
toast.error(err instanceof Error ? err.message : "Failed to delete recording");
logger.error("Error deleting recording:", err);
}
};
const handlePlay = async (rec: RecordingResponseSchema) => {
try {
await togglePlayback(rec.recording_id, rec.storage_key, rec.storage_backend);
} catch {
toast.error("Failed to play recording");
}
};
const startEditing = (rec: RecordingResponseSchema) => {
setEditingId(rec.recording_id);
setEditValue(rec.recording_id);
setEditError(null);
};
const cancelEditing = () => {
setEditingId(null);
setEditValue("");
setEditError(null);
};
const saveRecordingId = async (rec: RecordingResponseSchema) => {
const newId = editValue.trim();
if (!newId) {
setEditError("ID cannot be empty");
return;
}
if (!/^[a-zA-Z0-9_-]+$/.test(newId)) {
setEditError("Only letters, numbers, hyphens, and underscores");
return;
}
if (newId === rec.recording_id) {
cancelEditing();
return;
}
setEditError(null);
try {
const response = await updateRecordingApiV1WorkflowRecordingsIdPatch({
path: { id: rec.id },
body: { recording_id: newId },
});
if (response.error) {
const errData = response.error as { detail?: string };
throw new Error(errData?.detail || "Failed to update recording ID");
}
toast.success(`Recording ID updated to "${newId}". All workflow references have been updated.`);
cancelEditing();
fetchRecordings();
} catch (err) {
setEditError(err instanceof Error ? err.message : "Failed to update recording ID");
}
};
const formatDate = (dateString: string): string => {
const date = new Date(dateString);
return date.toLocaleDateString() + " " + date.toLocaleTimeString();
};
const filteredRecordings = recordings.filter((rec) => {
if (!searchQuery) return true;
const q = searchQuery.toLowerCase();
const filename = (rec.metadata?.original_filename as string) || "";
return (
filename.toLowerCase().includes(q) ||
rec.transcript.toLowerCase().includes(q) ||
rec.recording_id.toLowerCase().includes(q)
);
});
if (isLoading && recordings.length === 0) {
return (
<div className="space-y-4">
{[1, 2, 3].map((i) => (
<div key={i} className="flex items-center justify-between p-4 border rounded-lg">
<div className="space-y-2 flex-1">
<Skeleton className="h-4 w-48" />
<Skeleton className="h-3 w-64" />
</div>
<Skeleton className="h-8 w-24" />
</div>
))}
</div>
);
}
if (error) {
return (
<div className="p-4 bg-destructive/10 border border-destructive/20 rounded-lg text-destructive">
{error}
</div>
);
}
return (
<div className="space-y-4">
{/* Search and Refresh */}
<div className="flex items-center gap-4">
<div className="relative flex-1">
<Search className="absolute left-3 top-1/2 transform -translate-y-1/2 h-4 w-4 text-muted-foreground" />
<Input
placeholder="Search by filename, transcript, or ID..."
value={searchQuery}
onChange={(e) => setSearchQuery(e.target.value)}
className="pl-10"
/>
</div>
<Button
variant="outline"
size="icon"
onClick={() => { stopPlayback(); fetchRecordings(); }}
disabled={isLoading}
>
<RefreshCw className={`h-4 w-4 ${isLoading ? "animate-spin" : ""}`} />
</Button>
</div>
{/* Results count */}
<div className="text-sm text-muted-foreground">
{filteredRecordings.length} recording{filteredRecordings.length !== 1 ? "s" : ""}
{searchQuery && ` matching "${searchQuery}"`}
</div>
{/* Recordings List */}
{filteredRecordings.length === 0 ? (
<div className="text-center py-12">
<AudioLines className="w-12 h-12 text-muted-foreground mx-auto mb-4" />
<p className="text-muted-foreground">
{searchQuery
? "No recordings match your search"
: "No recordings yet"}
</p>
</div>
) : (
<div className="space-y-3">
{filteredRecordings.map((rec) => {
const filename = (rec.metadata?.original_filename as string) || "";
const isEditing = editingId === rec.recording_id;
return (
<div
key={rec.recording_id}
className="flex items-center justify-between p-4 border rounded-lg hover:bg-muted/50 transition-colors"
>
<div className="flex items-center gap-4 flex-1 min-w-0">
<div className="w-10 h-10 rounded-lg bg-primary/10 flex items-center justify-center shrink-0">
<AudioLines className="w-5 h-5 text-primary" />
</div>
<div className="flex-1 min-w-0">
{/* Recording ID (editable) */}
<div className="flex items-center gap-2 mb-1">
{isEditing ? (
<div className="flex items-center gap-1 flex-wrap">
<Input
value={editValue}
onChange={(e) => { setEditValue(e.target.value); setEditError(null); }}
onKeyDown={(e) => {
if (e.key === "Enter") saveRecordingId(rec);
if (e.key === "Escape") cancelEditing();
}}
className={`h-7 text-sm font-mono w-48 ${editError ? "border-destructive" : ""}`}
maxLength={64}
autoFocus
/>
<Button
variant="ghost"
size="sm"
className="h-7 w-7 p-0"
onClick={() => saveRecordingId(rec)}
>
<Check className="w-3.5 h-3.5" />
</Button>
<Button
variant="ghost"
size="sm"
className="h-7 w-7 p-0"
onClick={cancelEditing}
>
<X className="w-3.5 h-3.5" />
</Button>
{editError && (
<span className="text-xs text-destructive">{editError}</span>
)}
</div>
) : (
<div className="flex items-center gap-1.5">
<code className="text-sm font-mono bg-muted px-1.5 py-0.5 rounded truncate max-w-[250px]">
{rec.recording_id}
</code>
<Button
variant="ghost"
size="sm"
className="h-6 px-1.5 text-xs text-muted-foreground gap-1"
onClick={() => startEditing(rec)}
>
<Pencil className="w-3 h-3" />
Edit ID
</Button>
</div>
)}
</div>
{/* Filename */}
{filename && (
<p className="text-xs text-muted-foreground mb-0.5 truncate max-w-[300px]">
{filename}
</p>
)}
{/* Transcript */}
<p className="text-sm text-muted-foreground line-clamp-1 mb-1">
{rec.transcript}
</p>
<div className="flex items-center gap-3 text-xs text-muted-foreground flex-wrap">
<span>{formatDate(rec.created_at)}</span>
</div>
</div>
</div>
<div className="flex items-center gap-1 shrink-0 ml-2">
<Button
variant="ghost"
size="sm"
onClick={() => handlePlay(rec)}
>
{playingId === rec.recording_id ? (
<Pause className="w-4 h-4" />
) : (
<Play className="w-4 h-4" />
)}
</Button>
<Button
variant="ghost"
size="sm"
onClick={() => handleDelete(rec.recording_id)}
className="text-destructive hover:text-destructive/90"
>
<Trash2 className="w-4 h-4" />
</Button>
</div>
</div>
);
})}
</div>
)}
</div>
);
}

View file

@ -0,0 +1,465 @@
"use client";
import { Loader2, Mic, Square, Upload, X } from "lucide-react";
import { useCallback, useEffect, useRef, useState } from "react";
import {
createRecordingsApiV1WorkflowRecordingsPost,
getUploadUrlsApiV1WorkflowRecordingsUploadUrlPost,
transcribeAudioApiV1WorkflowRecordingsTranscribePost,
} from "@/client";
import type { RecordingUploadResponseSchema } from "@/client/types.gen";
import { Button } from "@/components/ui/button";
import {
Dialog,
DialogContent,
DialogDescription,
DialogHeader,
DialogTitle,
} from "@/components/ui/dialog";
import { Input } from "@/components/ui/input";
import { Label } from "@/components/ui/label";
import {
Select,
SelectContent,
SelectItem,
SelectTrigger,
SelectValue,
} from "@/components/ui/select";
import { Textarea } from "@/components/ui/textarea";
import { LANGUAGE_DISPLAY_NAMES } from "@/constants/languages";
interface RecordingsUploadDialogProps {
open: boolean;
onOpenChange: (open: boolean) => void;
onUploadComplete?: () => void;
}
const MAX_FILE_SIZE = 5 * 1024 * 1024; // 5MB
interface PendingFile {
id: string;
file: File;
transcript: string;
isTranscribing: boolean;
error?: string;
}
let pendingFileCounter = 0;
export const RecordingsUploadDialog = ({
open,
onOpenChange,
onUploadComplete,
}: RecordingsUploadDialogProps) => {
const [uploading, setUploading] = useState(false);
const [pendingFiles, setPendingFiles] = useState<PendingFile[]>([]);
const [error, setError] = useState<string | null>(null);
const [language, setLanguage] = useState("multi");
const [recordingStep, setRecordingStep] = useState<"idle" | "naming" | "recording">("idle");
const [recordingFilename, setRecordingFilename] = useState("");
const [recordingDuration, setRecordingDuration] = useState(0);
const mediaRecorderRef = useRef<MediaRecorder | null>(null);
const audioChunksRef = useRef<Blob[]>([]);
const recordingTimerRef = useRef<ReturnType<typeof setInterval> | null>(null);
const fileInputRef = useRef<HTMLInputElement>(null);
const languageRef = useRef(language);
languageRef.current = language;
const stopRecordingTimer = useCallback(() => {
if (recordingTimerRef.current) {
clearInterval(recordingTimerRef.current);
recordingTimerRef.current = null;
}
}, []);
const stopRecording = useCallback(() => {
if (mediaRecorderRef.current && mediaRecorderRef.current.state !== "inactive") {
mediaRecorderRef.current.stop();
}
}, []);
const resetRecordingState = useCallback(() => {
setRecordingStep("idle");
setRecordingFilename("");
setRecordingDuration(0);
}, []);
useEffect(() => {
if (open) {
setError(null);
setPendingFiles([]);
setLanguage("multi");
resetRecordingState();
}
}, [open, resetRecordingState]);
useEffect(() => {
if (!open) {
stopRecording();
stopRecordingTimer();
}
}, [open, stopRecording, stopRecordingTimer]);
const transcribeFile = async (pendingId: string, file: File) => {
setPendingFiles((prev) =>
prev.map((p) => (p.id === pendingId ? { ...p, isTranscribing: true } : p))
);
try {
const currentLang = languageRef.current;
const result = await transcribeAudioApiV1WorkflowRecordingsTranscribePost({
body: { file, language: currentLang },
});
const data = result.data as Record<string, unknown> | undefined;
if (data?.transcript) {
setPendingFiles((prev) =>
prev.map((p) =>
p.id === pendingId ? { ...p, transcript: data.transcript as string, isTranscribing: false } : p
)
);
} else {
setPendingFiles((prev) =>
prev.map((p) => (p.id === pendingId ? { ...p, isTranscribing: false } : p))
);
}
} catch {
setPendingFiles((prev) =>
prev.map((p) =>
p.id === pendingId
? { ...p, isTranscribing: false, error: "Auto-transcription failed" }
: p
)
);
}
};
const addPendingFiles = (files: File[]) => {
const valid: PendingFile[] = [];
for (const file of files) {
if (file.size > MAX_FILE_SIZE) {
setError(`${file.name} (${(file.size / (1024 * 1024)).toFixed(1)}MB) exceeds 5MB limit — skipped.`);
continue;
}
const id = `pending-${++pendingFileCounter}`;
valid.push({ id, file, transcript: "", isTranscribing: false });
}
if (valid.length === 0) return;
setPendingFiles((prev) => [...prev, ...valid]);
setError(null);
for (const pf of valid) {
transcribeFile(pf.id, pf.file);
}
};
const removePendingFile = (pendingId: string) => {
setPendingFiles((prev) => prev.filter((p) => p.id !== pendingId));
};
const updateTranscript = (pendingId: string, transcript: string) => {
setPendingFiles((prev) =>
prev.map((p) => (p.id === pendingId ? { ...p, transcript } : p))
);
};
const startRecording = async () => {
try {
const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
const mediaRecorder = new MediaRecorder(stream);
mediaRecorderRef.current = mediaRecorder;
audioChunksRef.current = [];
mediaRecorder.ondataavailable = (e) => {
if (e.data.size > 0) audioChunksRef.current.push(e.data);
};
const filename = recordingFilename.trim() || "recording";
mediaRecorder.onstop = () => {
stream.getTracks().forEach((t) => t.stop());
stopRecordingTimer();
const blob = new Blob(audioChunksRef.current, { type: mediaRecorder.mimeType });
if (blob.size > MAX_FILE_SIZE) {
setError(`Recording (${(blob.size / (1024 * 1024)).toFixed(1)}MB) exceeds the maximum allowed size of 5MB.`);
resetRecordingState();
return;
}
const ext = mediaRecorder.mimeType.includes("webm") ? "webm" : "mp4";
const file = new File([blob], `${filename}.${ext}`, { type: mediaRecorder.mimeType });
resetRecordingState();
addPendingFiles([file]);
};
mediaRecorder.start();
setRecordingStep("recording");
setRecordingDuration(0);
setError(null);
recordingTimerRef.current = setInterval(() => {
setRecordingDuration((d) => d + 1);
}, 1000);
} catch {
setError("Microphone access denied. Please allow microphone permissions.");
resetRecordingState();
}
};
const handleFileSelect = (fileList: FileList | null) => {
if (!fileList || fileList.length === 0) return;
addPendingFiles(Array.from(fileList));
if (fileInputRef.current) fileInputRef.current.value = "";
};
const handleUpload = async () => {
const ready = pendingFiles.filter((p) => p.transcript.trim() && !p.isTranscribing);
if (ready.length === 0) return;
setUploading(true);
setError(null);
try {
const uploadUrlResponse = await getUploadUrlsApiV1WorkflowRecordingsUploadUrlPost({
body: {
files: ready.map((p) => ({
filename: p.file.name,
mime_type: p.file.type || "audio/wav",
file_size: p.file.size,
})),
},
});
if (!uploadUrlResponse.data?.items) {
throw new Error("Failed to get upload URLs");
}
const items = uploadUrlResponse.data.items;
await Promise.all(
items.map(async (item: RecordingUploadResponseSchema, idx: number) => {
const file = ready[idx].file;
const uploadResponse = await fetch(item.upload_url, {
method: "PUT",
body: file,
headers: { "Content-Type": file.type || "audio/wav" },
});
if (!uploadResponse.ok) {
throw new Error(`File upload failed for ${file.name}`);
}
})
);
await createRecordingsApiV1WorkflowRecordingsPost({
body: {
recordings: items.map((item: RecordingUploadResponseSchema, idx: number) => ({
recording_id: item.recording_id,
transcript: ready[idx].transcript.trim(),
storage_key: item.storage_key,
metadata: {
original_filename: ready[idx].file.name,
file_size_bytes: ready[idx].file.size,
mime_type: ready[idx].file.type,
language,
},
})),
},
});
setPendingFiles([]);
setLanguage("multi");
resetRecordingState();
if (fileInputRef.current) fileInputRef.current.value = "";
onUploadComplete?.();
onOpenChange(false);
} catch (err) {
setError(err instanceof Error ? err.message : "Failed to upload recordings");
} finally {
setUploading(false);
}
};
const isRecording = recordingStep === "recording";
const anyTranscribing = pendingFiles.some((p) => p.isTranscribing);
const readyCount = pendingFiles.filter((p) => p.transcript.trim() && !p.isTranscribing).length;
const isBusy = uploading || isRecording || anyTranscribing;
return (
<Dialog open={open} onOpenChange={onOpenChange}>
<DialogContent className="max-w-lg max-h-[80vh] overflow-y-auto">
<DialogHeader>
<DialogTitle>Upload Recordings</DialogTitle>
<DialogDescription>
Upload or record audio files. Use{" "}
<code className="text-xs bg-muted px-1 rounded">@</code> in
prompt fields to insert them into your agents.
</DialogDescription>
</DialogHeader>
{error && (
<div className="text-sm text-destructive bg-destructive/10 rounded-md p-2">
{error}
</div>
)}
{/* Upload Section */}
<div className="space-y-3">
{/* Audio source: file picker or record */}
<div>
<Label className="text-xs text-muted-foreground">Audio Files</Label>
<div className="flex gap-2">
<input
ref={fileInputRef}
type="file"
accept="audio/*"
multiple
onChange={(e) => handleFileSelect(e.target.files)}
className="hidden"
/>
<Button
type="button"
variant="outline"
size="sm"
className="flex-1 justify-start text-sm font-normal"
onClick={() => fileInputRef.current?.click()}
disabled={isBusy}
>
<Upload className="w-4 h-4 mr-2 shrink-0" />
<span className="text-muted-foreground">Choose audio files (max 5MB each)</span>
</Button>
{recordingStep === "idle" && (
<Button
type="button"
variant="outline"
size="sm"
onClick={() => setRecordingStep("naming")}
disabled={uploading || anyTranscribing}
>
<Mic className="w-4 h-4 mr-1" />
Record
</Button>
)}
</div>
</div>
{/* Recording: filename + start/stop */}
{(recordingStep === "naming" || isRecording) && (
<div className="space-y-2 rounded-md border border-dashed p-3 bg-muted/20">
{recordingStep === "naming" && (
<>
<div>
<Label className="text-xs text-muted-foreground">Recording Name</Label>
<Input
placeholder="e.g. greeting, hold-message"
value={recordingFilename}
onChange={(e) => setRecordingFilename(e.target.value)}
autoFocus
/>
</div>
<div className="flex gap-2">
<Button size="sm" onClick={startRecording} disabled={!recordingFilename.trim()}>
<Mic className="w-4 h-4 mr-1" />
Start Recording
</Button>
<Button size="sm" variant="ghost" onClick={resetRecordingState}>
Cancel
</Button>
</div>
</>
)}
{isRecording && (
<div className="flex items-center gap-3">
<span className="relative flex h-3 w-3">
<span className="animate-ping absolute inline-flex h-full w-full rounded-full bg-red-400 opacity-75" />
<span className="relative inline-flex rounded-full h-3 w-3 bg-red-500" />
</span>
<span className="text-sm font-mono">
{Math.floor(recordingDuration / 60)}:{(recordingDuration % 60).toString().padStart(2, "0")}
</span>
<span className="text-xs text-muted-foreground">{recordingFilename}</span>
<Button
size="sm"
variant="destructive"
onClick={() => stopRecording()}
className="ml-auto"
>
<Square className="w-4 h-4 mr-1" />
Stop
</Button>
</div>
)}
</div>
)}
{/* Pending files list */}
{pendingFiles.length > 0 && (
<div className="space-y-2">
<Label className="text-xs text-muted-foreground">
Pending ({pendingFiles.length} file{pendingFiles.length !== 1 ? "s" : ""})
</Label>
{pendingFiles.map((pf) => (
<div key={pf.id} className="rounded-md border p-2 space-y-1.5 bg-muted/10">
<div className="flex items-center gap-2">
<code className="text-xs bg-muted px-1.5 py-0.5 rounded font-mono truncate flex-1">
{pf.file.name} ({(pf.file.size / (1024 * 1024)).toFixed(1)}MB)
</code>
{pf.isTranscribing && (
<Loader2 className="w-3.5 h-3.5 animate-spin text-muted-foreground shrink-0" />
)}
<Button
size="sm"
variant="ghost"
className="h-6 w-6 p-0 shrink-0"
onClick={() => removePendingFile(pf.id)}
disabled={uploading}
>
<X className="w-3.5 h-3.5" />
</Button>
</div>
{pf.error && (
<p className="text-xs text-destructive">{pf.error}</p>
)}
<Textarea
placeholder={pf.isTranscribing ? "Transcribing..." : "What does this recording say?"}
value={pf.transcript}
onChange={(e) => updateTranscript(pf.id, e.target.value)}
disabled={pf.isTranscribing}
rows={2}
className="resize-none text-sm"
/>
</div>
))}
</div>
)}
{/* Language */}
<div>
<Label className="text-xs text-muted-foreground">Language</Label>
<Select value={language} onValueChange={setLanguage}>
<SelectTrigger className="h-9 text-sm">
<SelectValue />
</SelectTrigger>
<SelectContent>
{Object.entries(LANGUAGE_DISPLAY_NAMES).map(([code, name]) => (
<SelectItem key={code} value={code}>
{name}
</SelectItem>
))}
</SelectContent>
</Select>
</div>
<Button
size="sm"
onClick={handleUpload}
disabled={readyCount === 0 || isBusy}
>
{uploading ? (
<Loader2 className="w-4 h-4 mr-1 animate-spin" />
) : (
<Upload className="w-4 h-4 mr-1" />
)}
{uploading
? "Uploading..."
: `Upload ${readyCount} Recording${readyCount !== 1 ? "s" : ""}`}
</Button>
</div>
</DialogContent>
</Dialog>
);
};

View file

@ -0,0 +1,77 @@
"use client";
import { ExternalLink, Upload } from "lucide-react";
import { useEffect, useState } from "react";
import { Button } from "@/components/ui/button";
import { Card, CardContent, CardDescription, CardHeader, CardTitle } from "@/components/ui/card";
import { Skeleton } from "@/components/ui/skeleton";
import { useAuth } from "@/lib/auth";
import RecordingsList from "./RecordingsList";
import { RecordingsUploadDialog } from "./RecordingsUploadDialog";
export default function RecordingsPage() {
const { user, redirectToLogin, loading } = useAuth();
const [isUploadOpen, setIsUploadOpen] = useState(false);
const [refreshKey, setRefreshKey] = useState(0);
useEffect(() => {
if (!loading && !user) {
redirectToLogin();
}
}, [loading, user, redirectToLogin]);
if (loading || !user) {
return (
<div className="container mx-auto px-4 py-8">
<div className="space-y-4">
<Skeleton className="h-12 w-64" />
<Skeleton className="h-64 w-full" />
</div>
</div>
);
}
return (
<div className="container mx-auto px-4 py-8">
<div className="mb-8">
<h1 className="text-3xl font-bold mb-2">Recordings</h1>
<p className="text-muted-foreground">
Manage audio recordings for your organization. Use{" "}
<code className="rounded bg-muted px-1 text-xs">@</code> in prompt fields to insert them,
or as transition messages in tool calls.{" "}
<a href="https://docs.dograh.com/voice-agent/pre-recorded-audio" target="_blank" rel="noopener noreferrer" className="inline-flex items-center gap-0.5 underline">
Learn more <ExternalLink className="h-3 w-3" />
</a>
</p>
</div>
<Card>
<CardHeader>
<div className="flex justify-between items-center">
<div>
<CardTitle>All Recordings</CardTitle>
<CardDescription>
Audio recordings shared across all agents in your organization
</CardDescription>
</div>
<Button onClick={() => setIsUploadOpen(true)}>
<Upload className="w-4 h-4 mr-2" />
Upload Recording
</Button>
</div>
</CardHeader>
<CardContent>
<RecordingsList refreshKey={refreshKey} />
</CardContent>
</Card>
<RecordingsUploadDialog
open={isUploadOpen}
onOpenChange={setIsUploadOpen}
onUploadComplete={() => setRefreshKey((k) => k + 1)}
/>
</div>
);
}

View file

@ -2,6 +2,8 @@
import { AlertCircle } from "lucide-react";
import type { RecordingResponseSchema } from "@/client/types.gen";
import { RecordingSelect } from "@/components/flow/TextOrAudioInput";
import { Card, CardContent, CardDescription, CardHeader, CardTitle } from "@/components/ui/card";
import { Input } from "@/components/ui/input";
import { Label } from "@/components/ui/label";
@ -20,6 +22,9 @@ export interface EndCallToolConfigProps {
onMessageTypeChange: (messageType: EndCallMessageType) => void;
customMessage: string;
onCustomMessageChange: (message: string) => void;
audioRecordingId: string;
onAudioRecordingIdChange: (id: string) => void;
recordings?: RecordingResponseSchema[];
endCallReason: boolean;
onEndCallReasonChange: (enabled: boolean) => void;
endCallReasonDescription: string;
@ -35,6 +40,9 @@ export function EndCallToolConfig({
onMessageTypeChange,
customMessage,
onCustomMessageChange,
audioRecordingId,
onAudioRecordingIdChange,
recordings = [],
endCallReason,
onEndCallReasonChange,
endCallReasonDescription,
@ -148,6 +156,24 @@ export function EndCallToolConfig({
/>
</div>
)}
<div className="flex items-start space-x-3 p-3 border rounded-lg hover:bg-muted/50">
<RadioGroupItem value="audio" id="audio" className="mt-1" />
<label htmlFor="audio" className="flex-1 space-y-2 cursor-pointer">
<span className="font-medium">Pre-recorded Audio</span>
<p className="text-xs text-muted-foreground">
Play a pre-recorded audio file before disconnecting
</p>
</label>
</div>
{messageType === "audio" && (
<div className="pl-8">
<RecordingSelect
value={audioRecordingId}
onChange={onAudioRecordingIdChange}
recordings={recordings}
/>
</div>
)}
</RadioGroup>
</div>
</CardContent>

View file

@ -2,6 +2,8 @@
import { AlertCircle } from "lucide-react";
import type { RecordingResponseSchema } from "@/client/types.gen";
import { TextOrAudioInput } from "@/components/flow/TextOrAudioInput";
import {
CredentialSelector,
type HttpMethod,
@ -37,6 +39,11 @@ export interface HttpApiToolConfigProps {
onTimeoutMsChange: (timeout: number) => void;
customMessage: string;
onCustomMessageChange: (message: string) => void;
customMessageType: 'text' | 'audio';
onCustomMessageTypeChange: (type: 'text' | 'audio') => void;
customMessageRecordingId: string;
onCustomMessageRecordingIdChange: (id: string) => void;
recordings?: RecordingResponseSchema[];
}
export function HttpApiToolConfig({
@ -58,6 +65,11 @@ export function HttpApiToolConfig({
onTimeoutMsChange,
customMessage,
onCustomMessageChange,
customMessageType,
onCustomMessageTypeChange,
customMessageRecordingId,
onCustomMessageRecordingIdChange,
recordings = [],
}: HttpApiToolConfigProps) {
return (
<Card>
@ -136,18 +148,28 @@ export function HttpApiToolConfig({
<div className="grid gap-2 pt-4 border-t">
<Label>Custom Message</Label>
<Label className="text-xs text-muted-foreground">
Optional message the AI will speak before executing this tool (e.g., &quot;Let me look that up for you&quot;)
Optional message the AI will speak or play before executing this tool.
</Label>
<div className="flex items-start gap-2 rounded-md bg-amber-50 p-2 text-xs text-amber-700 border border-amber-200">
<AlertCircle className="h-3.5 w-3.5 mt-0.5 shrink-0" />
<span>This text is spoken as-is. For multilingual workflows, choose your phrasing carefully.</span>
</div>
<Textarea
value={customMessage}
onChange={(e) => onCustomMessageChange(e.target.value)}
placeholder="e.g., Let me check that for you, one moment please."
rows={2}
/>
<TextOrAudioInput
type={customMessageType}
onTypeChange={onCustomMessageTypeChange}
recordingId={customMessageRecordingId}
onRecordingIdChange={onCustomMessageRecordingIdChange}
recordings={recordings}
>
<>
<div className="flex items-start gap-2 rounded-md bg-amber-50 p-2 text-xs text-amber-700 border border-amber-200">
<AlertCircle className="h-3.5 w-3.5 mt-0.5 shrink-0" />
<span>This text is spoken as-is. For multilingual workflows, choose your phrasing carefully.</span>
</div>
<Textarea
value={customMessage}
onChange={(e) => onCustomMessageChange(e.target.value)}
placeholder="e.g., Let me check that for you, one moment please."
rows={2}
/>
</>
</TextOrAudioInput>
</div>
</TabsContent>

View file

@ -3,6 +3,8 @@
import { AlertCircle } from "lucide-react";
import {useState } from "react";
import type { RecordingResponseSchema } from "@/client/types.gen";
import { RecordingSelect } from "@/components/flow/TextOrAudioInput";
import { Card, CardContent, CardDescription, CardHeader, CardTitle } from "@/components/ui/card";
import { Input } from "@/components/ui/input";
import { Label } from "@/components/ui/label";
@ -22,6 +24,9 @@ export interface TransferCallToolConfigProps {
onMessageTypeChange: (messageType: EndCallMessageType) => void;
customMessage: string;
onCustomMessageChange: (message: string) => void;
audioRecordingId: string;
onAudioRecordingIdChange: (id: string) => void;
recordings?: RecordingResponseSchema[];
timeout?: number; // Make optional to match API type
onTimeoutChange: (timeout: number) => void;
}
@ -37,6 +42,9 @@ export function TransferCallToolConfig({
onMessageTypeChange,
customMessage,
onCustomMessageChange,
audioRecordingId,
onAudioRecordingIdChange,
recordings = [],
timeout,
onTimeoutChange,
}: TransferCallToolConfigProps) {
@ -181,6 +189,24 @@ export function TransferCallToolConfig({
/>
</div>
)}
<div className="flex items-start space-x-3 p-3 border rounded-lg hover:bg-muted/50">
<RadioGroupItem value="audio" id="audio" className="mt-1" />
<label htmlFor="audio" className="flex-1 space-y-2 cursor-pointer">
<span className="font-medium">Pre-recorded Audio</span>
<p className="text-xs text-muted-foreground">
Play a pre-recorded audio file before transferring
</p>
</label>
</div>
{messageType === "audio" && (
<div className="pl-8">
<RecordingSelect
value={audioRecordingId}
onChange={onAudioRecordingIdChange}
recordings={recordings}
/>
</div>
)}
</RadioGroup>
</div>

View file

@ -6,9 +6,10 @@ import { useCallback, useEffect, useState } from "react";
import {
getToolApiV1ToolsToolUuidGet,
listRecordingsApiV1WorkflowRecordingsGet,
updateToolApiV1ToolsToolUuidPut,
} from "@/client/sdk.gen";
import type { ToolResponse, TransferCallConfig as APITransferCallConfig } from "@/client/types.gen";
import type { RecordingResponseSchema, ToolResponse, TransferCallConfig as APITransferCallConfig } from "@/client/types.gen";
import type { EndCallConfig } from "@/client/types.gen";
import { type HttpMethod, type KeyValueItem, type ToolParameter, validateUrl } from "@/components/http";
import { Button } from "@/components/ui/button";
@ -75,6 +76,7 @@ export default function ToolDetailPage() {
const [endCallMessageType, setEndCallMessageType] = useState<EndCallMessageType>("none");
const [endCallReason, setEndCallReason] = useState(false);
const [endCallReasonDescription, setEndCallReasonDescription] = useState("");
const [audioRecordingId, setAudioRecordingId] = useState("");
const handleEndCallReasonChange = (enabled: boolean) => {
setEndCallReason(enabled);
@ -87,6 +89,14 @@ export default function ToolDetailPage() {
const [transferDestination, setTransferDestination] = useState("");
const [transferMessageType, setTransferMessageType] = useState<EndCallMessageType>("none");
const [transferTimeout, setTransferTimeout] = useState(30);
const [transferAudioRecordingId, setTransferAudioRecordingId] = useState("");
// HTTP API form state - custom message type
const [customMessageType, setCustomMessageType] = useState<'text' | 'audio'>('text');
const [customMessageRecordingId, setCustomMessageRecordingId] = useState("");
// Org-level recordings for audio dropdowns
const [recordings, setRecordings] = useState<RecordingResponseSchema[]>([]);
// Redirect if not authenticated
useEffect(() => {
@ -132,11 +142,14 @@ export default function ToolDetailPage() {
if (config) {
setEndCallMessageType(config.messageType || "none");
setCustomMessage(config.customMessage || "");
// eslint-disable-next-line @typescript-eslint/no-explicit-any
setAudioRecordingId((config as any).audioRecordingId || "");
setEndCallReason(config.endCallReason ?? false);
setEndCallReasonDescription(config.endCallReasonDescription || "");
} else {
setEndCallMessageType("none");
setCustomMessage("");
setAudioRecordingId("");
setEndCallReason(false);
setEndCallReasonDescription("");
}
@ -147,11 +160,14 @@ export default function ToolDetailPage() {
setTransferDestination(config.destination || "");
setTransferMessageType(config.messageType || "none");
setCustomMessage(config.customMessage || "");
// eslint-disable-next-line @typescript-eslint/no-explicit-any
setTransferAudioRecordingId((config as any).audioRecordingId || "");
setTransferTimeout(config.timeout ?? 30);
} else {
setTransferDestination("");
setTransferMessageType("none");
setCustomMessage("");
setTransferAudioRecordingId("");
setTransferTimeout(30);
}
} else {
@ -163,6 +179,10 @@ export default function ToolDetailPage() {
setCredentialUuid(config.credential_uuid || "");
setTimeoutMs(config.timeout_ms || 5000);
setCustomMessage(config.customMessage || "");
// eslint-disable-next-line @typescript-eslint/no-explicit-any
setCustomMessageType((config as any).customMessageType || "text");
// eslint-disable-next-line @typescript-eslint/no-explicit-any
setCustomMessageRecordingId((config as any).customMessageRecordingId || "");
// Convert headers object to array
if (config.headers) {
@ -193,9 +213,24 @@ export default function ToolDetailPage() {
}
};
const fetchRecordings = useCallback(async () => {
if (loading || !user) return;
try {
const response = await listRecordingsApiV1WorkflowRecordingsGet({
query: {},
});
if (response.data) {
setRecordings(response.data.recordings);
}
} catch {
// Non-critical — dropdowns will show "No recordings available"
}
}, [loading, user]);
useEffect(() => {
fetchTool();
}, [fetchTool]);
fetchRecordings();
}, [fetchTool, fetchRecordings]);
const handleSave = async () => {
if (!tool) return;
@ -259,6 +294,7 @@ export default function ToolDetailPage() {
config: {
messageType: endCallMessageType,
customMessage: endCallMessageType === "custom" ? customMessage : undefined,
audioRecordingId: endCallMessageType === "audio" ? audioRecordingId || undefined : undefined,
endCallReason,
endCallReasonDescription: endCallReason ? endCallReasonDescription || undefined : undefined,
},
@ -276,6 +312,7 @@ export default function ToolDetailPage() {
destination: transferDestination,
messageType: transferMessageType,
customMessage: transferMessageType === "custom" ? customMessage : undefined,
audioRecordingId: transferMessageType === "audio" ? transferAudioRecordingId || undefined : undefined,
timeout: transferTimeout,
},
},
@ -306,7 +343,9 @@ export default function ToolDetailPage() {
parameters:
validParameters.length > 0 ? validParameters : undefined,
timeout_ms: timeoutMs,
customMessage: customMessage || undefined,
customMessage: customMessageType === 'text' ? (customMessage || undefined) : undefined,
customMessageType,
customMessageRecordingId: customMessageType === 'audio' ? (customMessageRecordingId || undefined) : undefined,
},
},
};
@ -490,6 +529,9 @@ const data = await response.json();`;
onMessageTypeChange={setEndCallMessageType}
customMessage={customMessage}
onCustomMessageChange={setCustomMessage}
audioRecordingId={audioRecordingId}
onAudioRecordingIdChange={setAudioRecordingId}
recordings={recordings}
endCallReason={endCallReason}
onEndCallReasonChange={handleEndCallReasonChange}
endCallReasonDescription={endCallReasonDescription}
@ -507,6 +549,9 @@ const data = await response.json();`;
onMessageTypeChange={setTransferMessageType}
customMessage={customMessage}
onCustomMessageChange={setCustomMessage}
audioRecordingId={transferAudioRecordingId}
onAudioRecordingIdChange={setTransferAudioRecordingId}
recordings={recordings}
timeout={transferTimeout}
onTimeoutChange={setTransferTimeout}
/>
@ -530,6 +575,11 @@ const data = await response.json();`;
onTimeoutMsChange={setTimeoutMs}
customMessage={customMessage}
onCustomMessageChange={setCustomMessage}
customMessageType={customMessageType}
onCustomMessageTypeChange={setCustomMessageType}
customMessageRecordingId={customMessageRecordingId}
onCustomMessageRecordingIdChange={setCustomMessageRecordingId}
recordings={recordings}
/>
)}

View file

@ -14,7 +14,7 @@ import type {
export type ToolCategory = "http_api" | "end_call" | "transfer_call" | "calculator" | "native" | "integration";
export type EndCallMessageType = "none" | "custom";
export type EndCallMessageType = "none" | "custom" | "audio";
export interface ToolCategoryConfig {
value: ToolCategory;

View file

@ -15,7 +15,6 @@ import type { DocumentResponseSchema, RecordingResponseSchema, ToolResponse } fr
import { FlowEdge, FlowNode, NodeType } from "@/components/flow/types";
import { Button } from '@/components/ui/button';
import { Tooltip, TooltipContent, TooltipProvider, TooltipTrigger } from '@/components/ui/tooltip';
import { useUserConfig } from '@/context/UserConfigContext';
import { WorkflowConfigurations } from '@/types/workflow-configurations';
import AddNodePanel from "../../../components/flow/AddNodePanel";
@ -64,12 +63,6 @@ interface RenderWorkflowProps {
function RenderWorkflow({ initialWorkflowName, workflowId, initialFlow, initialTemplateContextVariables, initialWorkflowConfigurations, initialVersionNumber, initialVersionStatus, user }: RenderWorkflowProps) {
const router = useRouter();
const { userConfig } = useUserConfig();
const ttsOverrides = initialWorkflowConfigurations?.model_overrides?.tts;
const ttsProvider = ttsOverrides?.provider ?? (userConfig?.tts?.provider as string) ?? "";
const ttsModel = ttsOverrides?.model ?? (userConfig?.tts?.model as string) ?? "";
const ttsVoiceId = ttsOverrides?.voice ?? (userConfig?.tts?.voice as string) ?? "";
const [isPhoneCallDialogOpen, setIsPhoneCallDialogOpen] = useState(false);
const [isVersionPanelOpen, setIsVersionPanelOpen] = useState(false);
const [versions, setVersions] = useState<WorkflowVersion[]>([]);
@ -245,15 +238,10 @@ function RenderWorkflow({ initialWorkflowName, workflowId, initialFlow, initialT
setTools(toolsResponse.data);
}
// Fetch recordings for this workflow filtered by active TTS config
// Fetch org-level recordings
try {
const recordingsResponse = await listRecordingsApiV1WorkflowRecordingsGet({
query: {
workflow_id: workflowId,
tts_provider: ttsProvider || undefined,
tts_model: ttsModel || undefined,
tts_voice_id: ttsVoiceId || undefined,
},
query: {},
});
if (recordingsResponse.data) {
setRecordings(recordingsResponse.data.recordings);
@ -267,7 +255,7 @@ function RenderWorkflow({ initialWorkflowName, workflowId, initialFlow, initialT
};
fetchData();
}, [workflowId, ttsProvider, ttsModel, ttsVoiceId]);
}, [workflowId]);
// Memoize defaultEdgeOptions to prevent unnecessary re-renders
const defaultEdgeOptions = useMemo(() => ({

View file

@ -62,7 +62,6 @@ let pendingFileCounter = 0;
export const RecordingsDialog = ({
open,
onOpenChange,
workflowId,
onRecordingsChange,
ttsOverrides,
}: RecordingsDialogProps) => {
@ -89,12 +88,10 @@ export const RecordingsDialog = ({
const ttsVoiceId = ttsOverrides?.voice ?? (userConfig?.tts?.voice as string) ?? "";
const fetchRecordings = useCallback(async () => {
if (!workflowId) return;
setLoading(true);
try {
const result = await listRecordingsApiV1WorkflowRecordingsGet({
query: {
workflow_id: workflowId,
tts_provider: ttsProvider || undefined,
tts_model: ttsModel || undefined,
tts_voice_id: ttsVoiceId || undefined,
@ -108,7 +105,7 @@ export const RecordingsDialog = ({
} finally {
setLoading(false);
}
}, [workflowId, ttsProvider, ttsModel, ttsVoiceId, onRecordingsChange]);
}, [ttsProvider, ttsModel, ttsVoiceId, onRecordingsChange]);
const stopRecordingTimer = useCallback(() => {
if (recordingTimerRef.current) {
@ -277,7 +274,6 @@ export const RecordingsDialog = ({
const uploadUrlResponse =
await getUploadUrlsApiV1WorkflowRecordingsUploadUrlPost({
body: {
workflow_id: workflowId,
files: ready.map((p) => ({
filename: p.file.name,
mime_type: p.file.type || "audio/wav",
@ -314,7 +310,6 @@ export const RecordingsDialog = ({
body: {
recordings: items.map((item: RecordingUploadResponseSchema, idx: number) => ({
recording_id: item.recording_id,
workflow_id: workflowId,
tts_provider: ttsProvider,
tts_model: ttsModel,
tts_voice_id: ttsVoiceId,

View file

@ -446,6 +446,17 @@ export const useWebSocketRTC = ({ workflowId, workflowRunId, accessToken, initia
if (!firstBotSpeechCompletedRef.current) {
firstBotSpeechCompletedRef.current = true;
}
// Finalize the last bot message so "speaking..." indicator is removed
setFeedbackMessages(prev => {
const lastIdx = prev.length - 1;
const last = prev[lastIdx];
if (last && last.type === 'bot-text' && !last.final) {
const updated = [...prev];
updated[lastIdx] = { ...last, final: true };
return updated;
}
return prev;
});
break;
case 'rtf-user-mute-started':

View file

@ -1,6 +1,7 @@
"use client";
import { ArrowLeft, BookA, Brain, ExternalLink, Loader2, Mic, Pause, PhoneOff, Play, Rocket, Settings, Trash2Icon, Upload, Variable, X } from "lucide-react";
import Link from "next/link";
import { useParams, useRouter } from "next/navigation";
import { useEffect, useMemo, useRef, useState } from "react";
@ -19,6 +20,7 @@ import { Separator } from "@/components/ui/separator";
import { Switch } from "@/components/ui/switch";
import { Textarea } from "@/components/ui/textarea";
import { SETTINGS_DOCUMENTATION_URLS } from "@/constants/documentation";
import { UnsavedChangesProvider, useUnsavedChanges, useUnsavedChangesContext } from "@/context/UnsavedChangesContext";
import { useAudioPlayback } from "@/hooks/useAudioPlayback";
import { useAuth } from "@/lib/auth";
import logger from "@/lib/logger";
@ -32,7 +34,6 @@ import {
} from "@/types/workflow-configurations";
import { EmbedDialog } from "../components/EmbedDialog";
import { RecordingsDialog } from "../components/RecordingsDialog";
import { useWorkflowState } from "../hooks/useWorkflowState";
// ---------------------------------------------------------------------------
@ -113,6 +114,21 @@ function GeneralSection({
const ambientFileInputRef = useRef<HTMLInputElement>(null);
const { playingId, toggle: togglePlayback } = useAudioPlayback();
const isDirty = useMemo(() => {
const initAmbient = workflowConfigurations.ambient_noise_configuration || DEFAULT_AMBIENT_NOISE_CONFIG;
return (
name !== workflowName ||
JSON.stringify(ambientNoiseConfig) !== JSON.stringify(initAmbient) ||
maxCallDuration !== (workflowConfigurations.max_call_duration || 600) ||
maxUserIdleTimeout !== (workflowConfigurations.max_user_idle_timeout || 10) ||
smartTurnStopSecs !== (workflowConfigurations.smart_turn_stop_secs || 2) ||
turnStopStrategy !== (workflowConfigurations.turn_stop_strategy || "transcription") ||
contextCompactionEnabled !== (workflowConfigurations.context_compaction_enabled ?? false)
);
}, [name, workflowName, ambientNoiseConfig, maxCallDuration, maxUserIdleTimeout, smartTurnStopSecs, turnStopStrategy, contextCompactionEnabled, workflowConfigurations]);
useUnsavedChanges("general", isDirty);
const handleAmbientFileUpload = async (file: File) => {
if (file.size > MAX_AMBIENT_NOISE_FILE_SIZE) {
setAudioUploadError(`File too large (${(file.size / (1024 * 1024)).toFixed(1)}MB). Maximum is 10MB.`);
@ -463,8 +479,9 @@ function GeneralSection({
</div>
</div>
</CardContent>
<CardFooter className="justify-end border-t pt-6">
<Button onClick={handleSave} disabled={isSaving}>
<CardFooter className="justify-end gap-3 border-t pt-6">
{isDirty && <span className="text-xs text-muted-foreground">Unsaved changes</span>}
<Button onClick={handleSave} disabled={isSaving || !isDirty}>
{isSaving ? "Saving..." : "Save General Settings"}
</Button>
</CardFooter>
@ -488,6 +505,13 @@ function TemplateVariablesSection({
const [newValue, setNewValue] = useState("");
const [isSaving, setIsSaving] = useState(false);
const isDirty = useMemo(() => {
const pendingVars = newKey && newValue ? { ...contextVars, [newKey]: newValue } : contextVars;
return JSON.stringify(pendingVars) !== JSON.stringify(templateContextVariables);
}, [contextVars, newKey, newValue, templateContextVariables]);
useUnsavedChanges("variables", isDirty);
const handleAdd = () => {
if (newKey && newValue) {
setContextVars((prev) => ({ ...prev, [newKey]: newValue }));
@ -578,8 +602,9 @@ function TemplateVariablesSection({
</Button>
</div>
</CardContent>
<CardFooter className="justify-end border-t pt-6">
<Button onClick={handleSave} disabled={isSaving}>
<CardFooter className="justify-end gap-3 border-t pt-6">
{isDirty && <span className="text-xs text-muted-foreground">Unsaved changes</span>}
<Button onClick={handleSave} disabled={isSaving || !isDirty}>
{isSaving ? "Saving..." : "Save Variables"}
</Button>
</CardFooter>
@ -601,6 +626,10 @@ function DictionarySection({
const [dictionaryValue, setDictionaryValue] = useState(dictionary);
const [isSaving, setIsSaving] = useState(false);
const isDirty = dictionaryValue !== dictionary;
useUnsavedChanges("dictionary", isDirty);
const handleSave = async () => {
setIsSaving(true);
try {
@ -633,8 +662,9 @@ function DictionarySection({
className="resize-none"
/>
</CardContent>
<CardFooter className="justify-end border-t pt-6">
<Button onClick={handleSave} disabled={isSaving}>
<CardFooter className="justify-end gap-3 border-t pt-6">
{isDirty && <span className="text-xs text-muted-foreground">Unsaved changes</span>}
<Button onClick={handleSave} disabled={isSaving || !isDirty}>
{isSaving ? "Saving..." : "Save Dictionary"}
</Button>
</CardFooter>
@ -669,6 +699,24 @@ function VoicemailSection({
const [longSpeechTimeout, setLongSpeechTimeout] = useState(getConfig().long_speech_timeout);
const [isSaving, setIsSaving] = useState(false);
const isDirty = useMemo(() => {
const init = {
...DEFAULT_VOICEMAIL_DETECTION_CONFIGURATION,
...workflowConfigurations.voicemail_detection,
};
return (
enabled !== init.enabled ||
useWorkflowLlm !== init.use_workflow_llm ||
provider !== (init.provider || "openai") ||
model !== (init.model || "gpt-4.1") ||
apiKey !== (init.api_key || "") ||
systemPrompt !== (init.system_prompt || DEFAULT_VOICEMAIL_SYSTEM_PROMPT) ||
longSpeechTimeout !== init.long_speech_timeout
);
}, [enabled, useWorkflowLlm, provider, model, apiKey, systemPrompt, longSpeechTimeout, workflowConfigurations]);
useUnsavedChanges("voicemail", isDirty);
const handleSave = async () => {
setIsSaving(true);
try {
@ -772,8 +820,9 @@ function VoicemailSection({
</>
)}
</CardContent>
<CardFooter className="justify-end border-t pt-6">
<Button onClick={handleSave} disabled={isSaving}>
<CardFooter className="justify-end gap-3 border-t pt-6">
{isDirty && <span className="text-xs text-muted-foreground">Unsaved changes</span>}
<Button onClick={handleSave} disabled={isSaving || !isDirty}>
{isSaving ? "Saving..." : "Save Voicemail Settings"}
</Button>
</CardFooter>
@ -849,9 +898,23 @@ function WorkflowSettingsContent({
workflow: WorkflowResponse;
user: { id: string; email?: string };
}) {
const router = useRouter();
return (
<UnsavedChangesProvider>
<WorkflowSettingsInner workflow={workflow} user={user} />
</UnsavedChangesProvider>
);
}
function WorkflowSettingsInner({
workflow,
user,
}: {
workflow: WorkflowResponse;
user: { id: string; email?: string };
}) {
const router = useRouter();
const { dirtySections, confirmNavigate } = useUnsavedChangesContext();
const [isRecordingsDialogOpen, setIsRecordingsDialogOpen] = useState(false);
const [isEmbedDialogOpen, setIsEmbedDialogOpen] = useState(false);
const [activeSection, setActiveSection] = useState("general");
@ -921,7 +984,7 @@ function WorkflowSettingsContent({
<Button
variant="ghost"
size="icon"
onClick={() => router.push(`/workflow/${workflowId}`)}
onClick={() => confirmNavigate(() => router.push(`/workflow/${workflowId}`))}
>
<ArrowLeft className="h-4 w-4" />
</Button>
@ -993,7 +1056,7 @@ function WorkflowSettingsContent({
onSave={saveWorkflowConfigurations}
/>
{/* Recordings (dialog trigger) */}
{/* Recordings moved to org-level page */}
<Card id="recordings">
<CardHeader>
<CardTitle className="flex items-center gap-2 text-base">
@ -1001,15 +1064,17 @@ function WorkflowSettingsContent({
Recordings
</CardTitle>
<CardDescription>
Upload or record audio for hybrid prompts. Use{" "}
<code className="rounded bg-muted px-1 text-xs">@</code> in prompt fields to
insert them.{" "}
Recordings are now managed at the organization level and shared across all agents.
Use <code className="rounded bg-muted px-1 text-xs">@</code> in prompt fields to insert them.{" "}
<a href={SETTINGS_DOCUMENTATION_URLS.recordings} target="_blank" rel="noopener noreferrer" className="inline-flex items-center gap-0.5 underline">Learn more <ExternalLink className="h-3 w-3" /></a>
</CardDescription>
</CardHeader>
<CardFooter className="border-t pt-6">
<Button variant="outline" onClick={() => setIsRecordingsDialogOpen(true)}>
Manage Recordings
<Button variant="outline" asChild>
<Link href="/recordings">
Go to Recordings
<ExternalLink className="ml-2 h-4 w-4" />
</Link>
</Button>
</CardFooter>
</Card>
@ -1047,13 +1112,16 @@ function WorkflowSettingsContent({
<a
key={item.id}
href={`#${item.id}`}
className={`block rounded-md px-2 py-1 text-sm transition-colors hover:text-foreground ${
className={`flex items-center gap-1.5 rounded-md px-2 py-1 text-sm transition-colors hover:text-foreground ${
activeSection === item.id
? "font-medium text-foreground"
: "text-muted-foreground"
}`}
>
{item.label}
{dirtySections.has(item.id) && (
<span className="h-1.5 w-1.5 rounded-full bg-orange-500" />
)}
</a>
))}
</div>
@ -1061,12 +1129,6 @@ function WorkflowSettingsContent({
</div>
{/* Dialogs for complex sections */}
<RecordingsDialog
open={isRecordingsDialogOpen}
onOpenChange={setIsRecordingsDialogOpen}
workflowId={workflowId}
ttsOverrides={workflowConfigurations?.model_overrides?.tts}
/>
<EmbedDialog
open={isEmbedDialogOpen}
onOpenChange={setIsEmbedDialogOpen}

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View file

@ -269,12 +269,6 @@ export type BatchRecordingCreateResponseSchema = {
* Request schema for getting presigned upload URLs for one or more files.
*/
export type BatchRecordingUploadRequestSchema = {
/**
* Workflow Id
*
* Workflow ID these recordings belong to
*/
workflow_id: number;
/**
* Files
*
@ -1572,13 +1566,19 @@ export type EndCallConfig = {
*
* Type of goodbye message
*/
messageType?: 'none' | 'custom';
messageType?: 'none' | 'custom' | 'audio';
/**
* Custommessage
*
* Custom message to play before ending the call
*/
customMessage?: string | null;
/**
* Audiorecordingid
*
* Recording ID for audio goodbye message
*/
audioRecordingId?: string | null;
/**
* Endcallreason
*
@ -1739,6 +1739,24 @@ export type HttpApiConfig = {
* Request timeout in milliseconds
*/
timeout_ms?: number | null;
/**
* Custommessage
*
* Custom message to play after tool execution
*/
customMessage?: string | null;
/**
* Custommessagetype
*
* Type of custom message: text or audio
*/
customMessageType?: 'text' | 'audio' | null;
/**
* Custommessagerecordingid
*
* Recording ID for audio custom message
*/
customMessageRecordingId?: string | null;
};
/**
@ -2102,30 +2120,24 @@ export type RecordingCreateRequestSchema = {
* Short recording ID from upload step
*/
recording_id: string;
/**
* Workflow Id
*
* Workflow ID
*/
workflow_id: number;
/**
* Tts Provider
*
* TTS provider (e.g. elevenlabs)
*/
tts_provider: string;
tts_provider?: string | null;
/**
* Tts Model
*
* TTS model name
*/
tts_model: string;
tts_model?: string | null;
/**
* Tts Voice Id
*
* TTS voice identifier
*/
tts_voice_id: string;
tts_voice_id?: string | null;
/**
* Transcript
*
@ -2181,7 +2193,7 @@ export type RecordingResponseSchema = {
/**
* Workflow Id
*/
workflow_id: number;
workflow_id?: number | null;
/**
* Organization Id
*/
@ -2189,15 +2201,15 @@ export type RecordingResponseSchema = {
/**
* Tts Provider
*/
tts_provider: string;
tts_provider?: string | null;
/**
* Tts Model
*/
tts_model: string;
tts_model?: string | null;
/**
* Tts Voice Id
*/
tts_voice_id: string;
tts_voice_id?: string | null;
/**
* Transcript
*/
@ -2230,6 +2242,20 @@ export type RecordingResponseSchema = {
is_active: boolean;
};
/**
* RecordingUpdateRequestSchema
*
* Request schema for updating a recording's ID.
*/
export type RecordingUpdateRequestSchema = {
/**
* Recording Id
*
* New descriptive recording ID (letters, numbers, hyphens, underscores only)
*/
recording_id: string;
};
/**
* RecordingUploadResponseSchema
*
@ -2814,13 +2840,19 @@ export type TransferCallConfig = {
*
* Type of message to play before transfer
*/
messageType?: 'none' | 'custom';
messageType?: 'none' | 'custom' | 'audio';
/**
* Custommessage
*
* Custom message to play before transferring the call
*/
customMessage?: string | null;
/**
* Audiorecordingid
*
* Recording ID for audio message before transfer
*/
audioRecordingId?: string | null;
/**
* Timeout
*
@ -8885,13 +8917,13 @@ export type ListRecordingsApiV1WorkflowRecordingsGetData = {
'X-API-Key'?: string | null;
};
path?: never;
query: {
query?: {
/**
* Workflow Id
*
* Workflow ID
* Filter by workflow ID
*/
workflow_id: number;
workflow_id?: number | null;
/**
* Tts Provider
*
@ -9017,6 +9049,50 @@ export type DeleteRecordingApiV1WorkflowRecordingsRecordingIdDeleteResponses = {
200: unknown;
};
export type UpdateRecordingApiV1WorkflowRecordingsIdPatchData = {
body: RecordingUpdateRequestSchema;
headers?: {
/**
* Authorization
*/
authorization?: string | null;
/**
* X-Api-Key
*/
'X-API-Key'?: string | null;
};
path: {
/**
* Id
*/
id: number;
};
query?: never;
url: '/api/v1/workflow-recordings/{id}';
};
export type UpdateRecordingApiV1WorkflowRecordingsIdPatchErrors = {
/**
* Not found
*/
404: unknown;
/**
* Validation Error
*/
422: HttpValidationError;
};
export type UpdateRecordingApiV1WorkflowRecordingsIdPatchError = UpdateRecordingApiV1WorkflowRecordingsIdPatchErrors[keyof UpdateRecordingApiV1WorkflowRecordingsIdPatchErrors];
export type UpdateRecordingApiV1WorkflowRecordingsIdPatchResponses = {
/**
* Successful Response
*/
200: RecordingResponseSchema;
};
export type UpdateRecordingApiV1WorkflowRecordingsIdPatchResponse = UpdateRecordingApiV1WorkflowRecordingsIdPatchResponses[keyof UpdateRecordingApiV1WorkflowRecordingsIdPatchResponses];
export type TranscribeAudioApiV1WorkflowRecordingsTranscribePostData = {
body: BodyTranscribeAudioApiV1WorkflowRecordingsTranscribePost;
headers?: {

View file

@ -0,0 +1,212 @@
import { Check, ChevronDown, Pause, Play, Search } from "lucide-react";
import { useMemo, useState } from "react";
import type { RecordingResponseSchema } from "@/client/types.gen";
import { Button } from "@/components/ui/button";
import { Input } from "@/components/ui/input";
import { Label } from "@/components/ui/label";
import { Popover, PopoverContentInline, PopoverTrigger } from "@/components/ui/popover";
import { RadioGroup, RadioGroupItem } from "@/components/ui/radio-group";
import { useAudioPlayback } from "@/hooks/useAudioPlayback";
import { cn } from "@/lib/utils";
interface TextOrAudioInputProps {
type: 'text' | 'audio';
onTypeChange: (type: 'text' | 'audio') => void;
recordingId: string;
onRecordingIdChange: (id: string) => void;
recordings?: RecordingResponseSchema[];
/** Rendered when type === 'text' */
children: React.ReactNode;
}
export function TextOrAudioInput({
type,
onTypeChange,
recordingId,
onRecordingIdChange,
recordings = [],
children,
}: TextOrAudioInputProps) {
return (
<>
<RadioGroup
value={type}
onValueChange={(value) => onTypeChange(value as 'text' | 'audio')}
className="flex items-center gap-4"
>
<div className="flex items-center gap-2">
<RadioGroupItem value="text" id="toa-text" />
<Label htmlFor="toa-text" className="font-normal cursor-pointer">Text</Label>
</div>
<div className="flex items-center gap-2">
<RadioGroupItem value="audio" id="toa-audio" />
<Label htmlFor="toa-audio" className="font-normal cursor-pointer">Audio</Label>
</div>
</RadioGroup>
{type === 'text' ? (
children
) : (
<RecordingSelect
value={recordingId}
onChange={onRecordingIdChange}
recordings={recordings}
/>
)}
</>
);
}
interface RecordingSelectProps {
value: string;
onChange: (id: string) => void;
recordings: RecordingResponseSchema[];
}
/**
* Dropdown to select a pre-recorded audio file.
* Re-exported so callers that only need the dropdown (e.g. tool configs with
* their own none/custom/audio radio) can use it directly.
*/
export function RecordingSelect({ value, onChange, recordings }: RecordingSelectProps) {
const [open, setOpen] = useState(false);
const [search, setSearch] = useState("");
const { playingId, toggle, stop } = useAudioPlayback();
const selected = recordings.find((r) => String(r.id) === value);
const filtered = useMemo(() => {
if (!search) return recordings;
const q = search.toLowerCase();
return recordings.filter((r) =>
r.recording_id.toLowerCase().includes(q) ||
r.transcript.toLowerCase().includes(q) ||
((r.metadata?.original_filename as string) || "").toLowerCase().includes(q)
);
}, [recordings, search]);
const handleSelect = (rec: RecordingResponseSchema) => {
stop();
onChange(String(rec.id));
setOpen(false);
};
const handlePlay = async (e: React.MouseEvent, rec: RecordingResponseSchema) => {
e.stopPropagation();
try {
await toggle(rec.recording_id, rec.storage_key, rec.storage_backend);
} catch {
// Ignore playback errors
}
};
return (
<div className="space-y-2">
<Label className="text-xs text-muted-foreground">
Select a pre-recorded audio file to play.
</Label>
<Popover modal open={open} onOpenChange={(v) => { if (!v) { stop(); setSearch(""); } setOpen(v); }}>
<PopoverTrigger asChild>
<Button
variant="outline"
role="combobox"
aria-expanded={open}
className="w-full justify-between h-auto min-h-9 font-normal"
>
{selected ? (
<span className="flex items-center gap-2 text-left">
<code className="text-xs bg-muted px-1 py-0.5 rounded font-mono shrink-0">
{selected.recording_id}
</code>
<span className="text-sm">
{selected.transcript.length > 75
? `${selected.transcript.slice(0, 75)}`
: selected.transcript}
</span>
</span>
) : (
<span className="text-muted-foreground">Select a recording</span>
)}
<ChevronDown className="ml-2 h-4 w-4 shrink-0 opacity-50" />
</Button>
</PopoverTrigger>
<PopoverContentInline
className="w-[var(--radix-popover-trigger-width)] p-0"
align="start"
>
{recordings.length === 0 ? (
<div className="p-3 text-sm text-muted-foreground text-center">
No recordings available
</div>
) : (
<div>
<div className="p-2 border-b">
<div className="relative">
<Search className="absolute left-2.5 top-1/2 -translate-y-1/2 h-3.5 w-3.5 text-muted-foreground" />
<Input
placeholder="Search by ID, transcript, or filename..."
value={search}
onChange={(e) => setSearch(e.target.value)}
className="h-8 pl-8 text-sm"
autoFocus
/>
</div>
</div>
<div className="max-h-56 overflow-y-auto">
{filtered.length === 0 ? (
<div className="p-3 text-sm text-muted-foreground text-center">
No recordings match &ldquo;{search}&rdquo;
</div>
) : filtered.map((r) => {
const filename = (r.metadata?.original_filename as string) || "";
const isSelected = String(r.id) === value;
const isPlaying = playingId === r.recording_id;
return (
<div
key={r.id}
className={cn(
"flex items-center gap-2 px-3 py-2 cursor-pointer hover:bg-accent transition-colors",
isSelected && "bg-accent"
)}
onClick={() => handleSelect(r)}
>
<Check className={cn(
"h-4 w-4 shrink-0",
isSelected ? "opacity-100" : "opacity-0"
)} />
<code className="text-xs bg-muted px-1 py-0.5 rounded font-mono shrink-0">
{r.recording_id}
</code>
{filename && (
<span className="text-xs text-muted-foreground shrink-0 max-w-[100px] truncate">
{filename}
</span>
)}
<span className="text-xs text-muted-foreground bg-muted/50 px-1.5 py-0.5 rounded truncate flex-1 min-w-0">
{r.transcript}
</span>
<Button
type="button"
variant="ghost"
size="sm"
className="h-7 w-7 p-0 shrink-0"
onClick={(e) => handlePlay(e, r)}
>
{isPlaying ? (
<Pause className="h-3.5 w-3.5" />
) : (
<Play className="h-3.5 w-3.5" />
)}
</Button>
</div>
);
})}
</div>
</div>
)}
</PopoverContentInline>
</Popover>
</div>
);
}

View file

@ -4,6 +4,7 @@ import { useCallback, useEffect, useState } from 'react';
import { useWorkflow, useWorkflowOptional } from "@/app/workflow/[workflowId]/contexts/WorkflowContext";
import { useWorkflowStore } from "@/app/workflow/[workflowId]/stores/workflowStore";
import { TextOrAudioInput } from "@/components/flow/TextOrAudioInput";
import { Button } from "@/components/ui/button";
import { Dialog, DialogContent, DialogFooter, DialogHeader, DialogTitle } from "@/components/ui/dialog";
import { Input } from "@/components/ui/input";
@ -24,9 +25,12 @@ interface EdgeDetailsDialogProps {
const EdgeDetailsDialog = ({ open, onOpenChange, data, onSave }: EdgeDetailsDialogProps) => {
const readOnly = useWorkflowOptional()?.readOnly ?? false;
const { recordings } = useWorkflow();
const [condition, setCondition] = useState(data?.condition ?? '');
const [label, setLabel] = useState(data?.label ?? '');
const [transitionSpeech, setTransitionSpeech] = useState(data?.transition_speech ?? '');
const [transitionSpeechType, setTransitionSpeechType] = useState<'text' | 'audio'>(data?.transition_speech_type ?? 'text');
const [transitionSpeechRecordingId, setTransitionSpeechRecordingId] = useState(data?.transition_speech_recording_id ?? '');
// Update form state when data changes (e.g., from undo/redo)
useEffect(() => {
@ -34,13 +38,21 @@ const EdgeDetailsDialog = ({ open, onOpenChange, data, onSave }: EdgeDetailsDial
setCondition(data?.condition ?? '');
setLabel(data?.label ?? '');
setTransitionSpeech(data?.transition_speech ?? '');
setTransitionSpeechType(data?.transition_speech_type ?? 'text');
setTransitionSpeechRecordingId(data?.transition_speech_recording_id ?? '');
}
}, [data, open]);
const handleSave = useCallback(() => {
onSave({ condition: condition, label: label, transition_speech: transitionSpeech || undefined });
onSave({
condition,
label,
transition_speech: transitionSpeechType === 'text' ? (transitionSpeech || undefined) : undefined,
transition_speech_type: transitionSpeechType,
transition_speech_recording_id: transitionSpeechType === 'audio' ? (transitionSpeechRecordingId || undefined) : undefined,
});
onOpenChange(false);
}, [condition, label, transitionSpeech, onSave, onOpenChange]);
}, [condition, label, transitionSpeech, transitionSpeechType, transitionSpeechRecordingId, onSave, onOpenChange]);
// Handle Cmd+S / Ctrl+S keyboard shortcut to save
useEffect(() => {
@ -60,7 +72,7 @@ const EdgeDetailsDialog = ({ open, onOpenChange, data, onSave }: EdgeDetailsDial
return (
<Dialog open={open} onOpenChange={onOpenChange}>
<DialogContent>
<DialogContent className="max-h-[85vh] flex flex-col">
<DialogHeader>
<DialogTitle>Edit Condition</DialogTitle>
{data?.invalid && data.validationMessage && (
@ -70,7 +82,7 @@ const EdgeDetailsDialog = ({ open, onOpenChange, data, onSave }: EdgeDetailsDial
</div>
)}
</DialogHeader>
<div className="grid gap-4 py-4">
<div className="grid gap-4 py-4 overflow-y-auto">
<div className="grid gap-2">
<Label>Condition Label</Label>
<Label className="text-xs text-muted-foreground">
@ -99,18 +111,28 @@ const EdgeDetailsDialog = ({ open, onOpenChange, data, onSave }: EdgeDetailsDial
<div className="grid gap-2">
<Label>Transition Speech</Label>
<Label className="text-xs text-muted-foreground">
Optional text the assistant will speak right before transitioning to the node.
This text will not be attached in Conversation Context. Use this as simple filler to reduce latency.
Optional text or audio the assistant will play right before transitioning to the node.
This will not be attached in Conversation Context. Use this as simple filler to reduce latency.
</Label>
<div className="flex items-start gap-2 rounded-md bg-amber-50 p-2 text-xs text-amber-700 border border-amber-200">
<AlertCircle className="h-3.5 w-3.5 mt-0.5 shrink-0" />
<span>This text is spoken as-is. For multilingual workflows, choose your phrasing carefully.</span>
</div>
<Textarea
value={transitionSpeech}
placeholder="e.g. Let me transfer you to our billing department..."
onChange={(e) => setTransitionSpeech(e.target.value)}
/>
<TextOrAudioInput
type={transitionSpeechType}
onTypeChange={setTransitionSpeechType}
recordingId={transitionSpeechRecordingId}
onRecordingIdChange={setTransitionSpeechRecordingId}
recordings={recordings ?? []}
>
<>
<div className="flex items-start gap-2 rounded-md bg-amber-50 p-2 text-xs text-amber-700 border border-amber-200">
<AlertCircle className="h-3.5 w-3.5 mt-0.5 shrink-0" />
<span>This text is spoken as-is. For multilingual workflows, choose your phrasing carefully.</span>
</div>
<Textarea
value={transitionSpeech}
placeholder="e.g. Let me transfer you to our billing department..."
onChange={(e) => setTransitionSpeech(e.target.value)}
/>
</>
</TextOrAudioInput>
</div>
</div>
<DialogFooter>

View file

@ -8,6 +8,7 @@ import type { RecordingResponseSchema } from "@/client/types.gen";
import { DocumentBadges } from "@/components/flow/DocumentBadges";
import { DocumentSelector } from "@/components/flow/DocumentSelector";
import { MentionTextarea } from "@/components/flow/MentionTextarea";
import { TextOrAudioInput } from "@/components/flow/TextOrAudioInput";
import { ToolBadges } from "@/components/flow/ToolBadges";
import { ToolSelector } from "@/components/flow/ToolSelector";
import { ExtractionVariable, FlowNodeData } from "@/components/flow/types";
@ -26,8 +27,12 @@ import { useNodeHandlers } from "./common/useNodeHandlers";
interface StartCallEditFormProps {
nodeData: FlowNodeData;
greetingType: 'text' | 'audio';
setGreetingType: (value: 'text' | 'audio') => void;
greeting: string;
setGreeting: (value: string) => void;
greetingRecordingId: string;
setGreetingRecordingId: (value: string) => void;
prompt: string;
setPrompt: (value: string) => void;
name: string;
@ -73,7 +78,9 @@ export const StartCall = memo(({ data, selected, id }: StartCallNodeProps) => {
const { saveWorkflow, tools, documents, recordings } = useWorkflow();
// Form state
const [greetingType, setGreetingType] = useState<'text' | 'audio'>(data.greeting_type ?? "text");
const [greeting, setGreeting] = useState(data.greeting ?? "");
const [greetingRecordingId, setGreetingRecordingId] = useState(data.greeting_recording_id ?? "");
const [prompt, setPrompt] = useState(data.prompt ?? "");
const [name, setName] = useState(data.name);
const [allowInterrupt, setAllowInterrupt] = useState(data.allow_interrupt ?? true);
@ -109,7 +116,9 @@ export const StartCall = memo(({ data, selected, id }: StartCallNodeProps) => {
handleSaveNodeData({
...data,
greeting: greeting || undefined,
greeting_type: greetingType,
greeting: greetingType === 'text' ? (greeting || undefined) : undefined,
greeting_recording_id: greetingType === 'audio' ? (greetingRecordingId || undefined) : undefined,
prompt,
name,
allow_interrupt: allowInterrupt,
@ -132,7 +141,9 @@ export const StartCall = memo(({ data, selected, id }: StartCallNodeProps) => {
// Reset form state when dialog opens
const handleOpenChange = (newOpen: boolean) => {
if (newOpen) {
setGreetingType(data.greeting_type ?? "text");
setGreeting(data.greeting ?? "");
setGreetingRecordingId(data.greeting_recording_id ?? "");
setPrompt(data.prompt ?? "");
setName(data.name);
setAllowInterrupt(data.allow_interrupt ?? true);
@ -154,7 +165,9 @@ export const StartCall = memo(({ data, selected, id }: StartCallNodeProps) => {
// Update form state when data changes (e.g., from undo/redo)
useEffect(() => {
if (open) {
setGreetingType(data.greeting_type ?? "text");
setGreeting(data.greeting ?? "");
setGreetingRecordingId(data.greeting_recording_id ?? "");
setPrompt(data.prompt ?? "");
setName(data.name);
setAllowInterrupt(data.allow_interrupt ?? true);
@ -247,8 +260,12 @@ export const StartCall = memo(({ data, selected, id }: StartCallNodeProps) => {
{open && (
<StartCallEditForm
nodeData={data}
greetingType={greetingType}
setGreetingType={setGreetingType}
greeting={greeting}
setGreeting={setGreeting}
greetingRecordingId={greetingRecordingId}
setGreetingRecordingId={setGreetingRecordingId}
prompt={prompt}
setPrompt={setPrompt}
name={name}
@ -288,8 +305,12 @@ export const StartCall = memo(({ data, selected, id }: StartCallNodeProps) => {
});
const StartCallEditForm = ({
greetingType,
setGreetingType,
greeting,
setGreeting,
greetingRecordingId,
setGreetingRecordingId,
prompt,
setPrompt,
name,
@ -362,15 +383,22 @@ const StartCallEditForm = ({
<Label>Greeting</Label>
<Label className="text-xs text-muted-foreground">
Optional greeting message played via TTS when the call starts. If set, this will be spoken directly instead of generating a response from the LLM. Supports template variables like {"{{variable_name}}"}.
Optional greeting played when the call starts. Choose between a text message (spoken via TTS) or a pre-recorded audio file.
</Label>
<MentionTextarea
value={greeting}
onChange={setGreeting}
className="min-h-[60px] max-h-[200px] resize-none overflow-y-auto"
placeholder="e.g. Hello {{first_name}}, this is Sarah calling from Acme Corp."
<TextOrAudioInput
type={greetingType}
onTypeChange={setGreetingType}
recordingId={greetingRecordingId}
onRecordingIdChange={setGreetingRecordingId}
recordings={recordings}
/>
>
<Textarea
value={greeting}
onChange={(e) => setGreeting(e.target.value)}
className="min-h-[60px] max-h-[200px] resize-none overflow-y-auto"
placeholder="e.g. Hello {{first_name}}, this is Sarah calling from Acme Corp."
/>
</TextOrAudioInput>
<Label>Prompt</Label>
<Label className="text-xs text-muted-foreground">

View file

@ -24,6 +24,8 @@ export type FlowNodeData = {
extraction_variables?: ExtractionVariable[];
add_global_prompt?: boolean;
greeting?: string;
greeting_type?: 'text' | 'audio';
greeting_recording_id?: string;
wait_for_user_greeting?: boolean;
detect_voicemail?: boolean;
delayed_start?: boolean;
@ -79,6 +81,8 @@ export type FlowEdgeData = {
condition: string;
label: string;
transition_speech?: string;
transition_speech_type?: 'text' | 'audio';
transition_speech_recording_id?: string;
invalid?: boolean;
validationMessage?: string | null;
}

View file

@ -2,6 +2,7 @@
import type { Team } from "@stackframe/stack";
import {
AudioLines,
Brain,
ChevronLeft,
ChevronRight,
@ -135,6 +136,11 @@ export function AppSidebar() {
url: "/files",
icon: Database,
},
{
title: "Recordings",
url: "/recordings",
icon: AudioLines,
},
// {
// title: "Integrations",
// url: "/integrations",

View file

@ -56,6 +56,23 @@ function DialogContent({
<DialogOverlay />
<DialogPrimitive.Content
onOpenAutoFocus={e => e.preventDefault()}
onCloseAutoFocus={() => {
document.body.style.pointerEvents = "";
}}
onPointerDownOutside={(e) => {
// Prevent the Dialog from closing when the user clicks inside a
// portaled Radix Popover/DropdownMenu rendered on top of this Dialog.
const target = e.target as HTMLElement;
if (target.closest('[data-radix-popper-content-wrapper]')) {
e.preventDefault();
}
}}
onInteractOutside={(e) => {
const target = e.target as HTMLElement;
if (target.closest('[data-radix-popper-content-wrapper]')) {
e.preventDefault();
}
}}
data-slot="dialog-content"
className={cn(
"bg-background data-[state=open]:animate-in data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=open]:fade-in-0 data-[state=closed]:zoom-out-95 data-[state=open]:zoom-in-95 fixed top-[50%] left-[50%] z-50 grid w-full max-w-[calc(100%-2rem)] translate-x-[-50%] translate-y-[-50%] gap-4 rounded-lg border p-6 shadow-lg duration-200 sm:max-w-lg",

View file

@ -17,6 +17,9 @@ function PopoverTrigger({
return <PopoverPrimitive.Trigger data-slot="popover-trigger" {...props} />
}
const popoverContentClass =
"bg-popover text-popover-foreground data-[state=open]:animate-in data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=open]:fade-in-0 data-[state=closed]:zoom-out-95 data-[state=open]:zoom-in-95 data-[side=bottom]:slide-in-from-top-2 data-[side=left]:slide-in-from-right-2 data-[side=right]:slide-in-from-left-2 data-[side=top]:slide-in-from-bottom-2 z-50 w-72 origin-(--radix-popover-content-transform-origin) rounded-md border p-4 shadow-md outline-hidden"
function PopoverContent({
className,
align = "center",
@ -29,20 +32,38 @@ function PopoverContent({
data-slot="popover-content"
align={align}
sideOffset={sideOffset}
className={cn(
"bg-popover text-popover-foreground data-[state=open]:animate-in data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=open]:fade-in-0 data-[state=closed]:zoom-out-95 data-[state=open]:zoom-in-95 data-[side=bottom]:slide-in-from-top-2 data-[side=left]:slide-in-from-right-2 data-[side=right]:slide-in-from-left-2 data-[side=top]:slide-in-from-bottom-2 z-50 w-72 origin-(--radix-popover-content-transform-origin) rounded-md border p-4 shadow-md outline-hidden",
className
)}
className={cn(popoverContentClass, className)}
{...props}
/>
</PopoverPrimitive.Portal>
)
}
/**
* PopoverContent without a Portal wrapper. Renders inline in the DOM tree,
* which avoids focus-trap conflicts when used inside a Dialog.
*/
function PopoverContentInline({
className,
align = "center",
sideOffset = 4,
...props
}: React.ComponentProps<typeof PopoverPrimitive.Content>) {
return (
<PopoverPrimitive.Content
data-slot="popover-content"
align={align}
sideOffset={sideOffset}
className={cn(popoverContentClass, className)}
{...props}
/>
)
}
function PopoverAnchor({
...props
}: React.ComponentProps<typeof PopoverPrimitive.Anchor>) {
return <PopoverPrimitive.Anchor data-slot="popover-anchor" {...props} />
}
export { Popover, PopoverAnchor,PopoverContent, PopoverTrigger }
export { Popover, PopoverAnchor, PopoverContent, PopoverContentInline, PopoverTrigger }

View file

@ -0,0 +1,269 @@
"use client";
import { createContext, useCallback, useContext, useEffect, useLayoutEffect, useRef, useState } from "react";
import {
AlertDialog,
AlertDialogAction,
AlertDialogCancel,
AlertDialogContent,
AlertDialogDescription,
AlertDialogFooter,
AlertDialogHeader,
AlertDialogTitle,
} from "@/components/ui/alert-dialog";
// ---------------------------------------------------------------------------
// Context
// ---------------------------------------------------------------------------
interface UnsavedChangesContextValue {
register: (id: string, isDirty: boolean) => void;
unregister: (id: string) => void;
hasDirtyChanges: boolean;
dirtySections: Set<string>;
/** Wrap programmatic navigation (e.g. router.push) to guard against unsaved changes. */
confirmNavigate: (navigate: () => void) => void;
}
const UnsavedChangesContext = createContext<UnsavedChangesContextValue | null>(null);
// ---------------------------------------------------------------------------
// Provider
// ---------------------------------------------------------------------------
/**
* Wraps a page to guard against accidental navigation when sections have
* unsaved changes. Intercepts:
*
* - Browser back / forward (`popstate` with history-state tracking)
* - In-app link clicks (document-level click capture on `<a>` tags)
*
* Sections register via the `useUnsavedChanges` hook.
*/
export function UnsavedChangesProvider({ children }: { children: React.ReactNode }) {
const [dirtySections, setDirtySections] = useState<Set<string>>(new Set());
const [showDialog, setShowDialog] = useState(false);
const pendingNavigate = useRef<(() => void) | null>(null);
const hasDirtyChanges = dirtySections.size > 0;
const hasDirtyRef = useRef(hasDirtyChanges);
hasDirtyRef.current = hasDirtyChanges;
// -- Section registration ------------------------------------------------
const register = useCallback((id: string, isDirty: boolean) => {
setDirtySections((prev) => {
const next = new Set(prev);
if (isDirty) next.add(id);
else next.delete(id);
return next;
});
}, []);
const unregister = useCallback((id: string) => {
setDirtySections((prev) => {
if (!prev.has(id)) return prev;
const next = new Set(prev);
next.delete(id);
return next;
});
}, []);
// -- Helper: prompt or proceed -------------------------------------------
const askOrProceed = useCallback((proceed: () => void) => {
if (!hasDirtyRef.current) {
proceed();
return;
}
pendingNavigate.current = proceed;
setTimeout(() => setShowDialog(true), 0);
}, []);
// -- 1. Intercept <a> clicks in capture phase -----------------------------
//
// Next.js <Link> renders <a> tags. By listening in the capture phase we
// intercept the click before React / Next.js processes it. If the user
// confirms, we navigate programmatically via window.location.
useEffect(() => {
const handleClick = (e: MouseEvent) => {
if (!hasDirtyRef.current) return;
const target = e.target as HTMLElement;
const link = target.closest("a[href]") as HTMLAnchorElement | null;
if (!link) return;
const href = link.getAttribute("href");
if (!href) return;
// Skip external links
if (href.startsWith("http://") || href.startsWith("https://") || href.startsWith("//")) return;
// Skip hash-only links (in-page anchors)
if (href.startsWith("#")) return;
// Skip links that open in a new tab/window
if (link.target && link.target !== "_self") return;
// Skip download links
if (link.hasAttribute("download")) return;
// Skip if modifier keys are held (Ctrl+click, Cmd+click, etc.)
if (e.metaKey || e.ctrlKey || e.shiftKey || e.altKey) return;
// Skip non-left clicks
if (e.button !== 0) return;
// Block the navigation and ask the user
e.preventDefault();
e.stopPropagation();
e.stopImmediatePropagation();
askOrProceed(() => {
// Navigate after user confirms
window.location.href = href;
});
};
// Capture phase so we fire before React / Next.js handlers
document.addEventListener("click", handleClick, true);
return () => document.removeEventListener("click", handleClick, true);
}, [askOrProceed]);
// -- 3. Browser back / forward (`popstate`) ------------------------------
//
// When the browser fires popstate the URL has already changed. We push
// the current page back onto the stack to "undo" the navigation, then
// show the dialog. If confirmed, we call history.back() for real.
useLayoutEffect(() => {
// Track our own history stack index so we can correctly reverse
// back/forward regardless of how many entries deep we are.
let stackIndex = (history.state?.__unsaved_guard_index as number) ?? 0;
const originalPushState = history.pushState.bind(history);
const originalReplaceState = history.replaceState.bind(history);
// Augment pushState to track stack depth
history.pushState = function (state, unused, url) {
stackIndex++;
const augmented = { ...state, __unsaved_guard_index: stackIndex };
return originalPushState(augmented, unused, url);
};
history.replaceState = function (state, unused, url) {
const augmented = { ...state, __unsaved_guard_index: stackIndex };
return originalReplaceState(augmented, unused, url);
};
// Write initial index if not present
if (history.state?.__unsaved_guard_index == null) {
originalReplaceState(
{ ...history.state, __unsaved_guard_index: stackIndex },
"",
location.href,
);
}
const handlePopState = (e: PopStateEvent) => {
if (!hasDirtyRef.current) {
// Not dirty — accept navigation, update our tracked index
stackIndex = (e.state?.__unsaved_guard_index as number) ?? stackIndex;
return;
}
const nextIndex = (e.state?.__unsaved_guard_index as number) ?? 0;
const delta = nextIndex - stackIndex;
if (delta === 0) return;
// Undo the navigation the browser already did
history.go(-delta);
askOrProceed(() => {
// User confirmed — replay the navigation
stackIndex = nextIndex;
history.go(delta);
});
};
window.addEventListener("popstate", handlePopState);
return () => {
history.pushState = originalPushState;
history.replaceState = originalReplaceState;
window.removeEventListener("popstate", handlePopState);
};
}, [askOrProceed]);
// -- Dialog handlers -----------------------------------------------------
const handleConfirm = useCallback(() => {
setShowDialog(false);
const nav = pendingNavigate.current;
pendingNavigate.current = null;
nav?.();
}, []);
const handleCancel = useCallback(() => {
setShowDialog(false);
pendingNavigate.current = null;
}, []);
// -- Render --------------------------------------------------------------
return (
<UnsavedChangesContext.Provider
value={{ register, unregister, hasDirtyChanges, dirtySections, confirmNavigate: askOrProceed }}
>
{children}
<AlertDialog open={showDialog} onOpenChange={(open) => { if (!open) handleCancel(); }}>
<AlertDialogContent>
<AlertDialogHeader>
<AlertDialogTitle>Unsaved changes</AlertDialogTitle>
<AlertDialogDescription>
You have unsaved changes that will be lost. Are you sure you want to leave?
</AlertDialogDescription>
</AlertDialogHeader>
<AlertDialogFooter>
<AlertDialogCancel onClick={handleCancel}>Stay on page</AlertDialogCancel>
<AlertDialogAction onClick={handleConfirm}>Discard changes</AlertDialogAction>
</AlertDialogFooter>
</AlertDialogContent>
</AlertDialog>
</UnsavedChangesContext.Provider>
);
}
// ---------------------------------------------------------------------------
// Hooks
// ---------------------------------------------------------------------------
/**
* Register a section's dirty state with the nearest UnsavedChangesProvider.
* Automatically unregisters on unmount.
*
* @example
* useUnsavedChanges("general", isDirty);
*/
export function useUnsavedChanges(sectionId: string, isDirty: boolean) {
const ctx = useContext(UnsavedChangesContext);
if (!ctx) throw new Error("useUnsavedChanges must be used within UnsavedChangesProvider");
const { register, unregister } = ctx;
useEffect(() => {
register(sectionId, isDirty);
}, [sectionId, isDirty, register]);
useEffect(() => {
return () => unregister(sectionId);
}, [sectionId, unregister]);
}
/**
* Access the unsaved-changes context directly (e.g. for dirtySections).
*/
export function useUnsavedChangesContext() {
const ctx = useContext(UnsavedChangesContext);
if (!ctx) throw new Error("useUnsavedChangesContext must be used within UnsavedChangesProvider");
return ctx;
}