mirror of
https://github.com/dograh-hq/dograh.git
synced 2026-06-07 07:55:16 +02:00
feat: add recording audio option in tool and node transitions (#232)
* feat: allow uploading recording as part of node transition * feat: allow recordings in tool transitions * chore: fix tests
This commit is contained in:
parent
3f19a16e7f
commit
7c245051d2
54 changed files with 3575 additions and 640 deletions
108
api/alembic/versions/3cd3155084a2_dedup_org_scoped_recordings.py
Normal file
108
api/alembic/versions/3cd3155084a2_dedup_org_scoped_recordings.py
Normal file
|
|
@ -0,0 +1,108 @@
|
|||
"""dedup recordings to org-scoped unique audio
|
||||
|
||||
Revision ID: 3cd3155084a2
|
||||
Revises: e7254d2c6c18
|
||||
Create Date: 2026-04-10 12:00:00.000000
|
||||
|
||||
"""
|
||||
|
||||
from typing import Sequence, Union
|
||||
|
||||
import sqlalchemy as sa
|
||||
from alembic import op
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision: str = "3cd3155084a2"
|
||||
down_revision: Union[str, None] = "e7254d2c6c18"
|
||||
branch_labels: Union[str, Sequence[str], None] = None
|
||||
depends_on: Union[str, Sequence[str], None] = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
conn = op.get_bind()
|
||||
|
||||
# 1. Identify duplicate groups: same (org, transcript, tts config).
|
||||
# Within each group the earliest row (by created_at) is canonical;
|
||||
# every other row is an alias that will be remapped and soft-deleted.
|
||||
rows = conn.execute(
|
||||
sa.text("""
|
||||
WITH ranked AS (
|
||||
SELECT
|
||||
recording_id,
|
||||
organization_id,
|
||||
transcript,
|
||||
tts_provider,
|
||||
tts_model,
|
||||
tts_voice_id,
|
||||
ROW_NUMBER() OVER (
|
||||
PARTITION BY organization_id, transcript,
|
||||
tts_provider, tts_model, tts_voice_id
|
||||
ORDER BY created_at ASC
|
||||
) AS rn
|
||||
FROM workflow_recordings
|
||||
WHERE is_active = true
|
||||
),
|
||||
canonical AS (
|
||||
SELECT recording_id AS canonical_id,
|
||||
organization_id, transcript,
|
||||
tts_provider, tts_model, tts_voice_id
|
||||
FROM ranked
|
||||
WHERE rn = 1
|
||||
)
|
||||
SELECT r.recording_id AS alias_id, c.canonical_id
|
||||
FROM ranked r
|
||||
JOIN canonical c
|
||||
ON r.organization_id = c.organization_id
|
||||
AND r.transcript = c.transcript
|
||||
AND r.tts_provider = c.tts_provider
|
||||
AND r.tts_model = c.tts_model
|
||||
AND r.tts_voice_id = c.tts_voice_id
|
||||
WHERE r.rn > 1
|
||||
""")
|
||||
).fetchall()
|
||||
|
||||
if not rows:
|
||||
return
|
||||
|
||||
# 2. Replace alias recording_ids with canonical ones in workflow JSON.
|
||||
# Both draft definitions (workflows.workflow_definition) and published
|
||||
# versions (workflow_definitions.workflow_json) are updated.
|
||||
for alias_id, canonical_id in rows:
|
||||
alias_pattern = f"RECORDING_ID: {alias_id}"
|
||||
canonical_pattern = f"RECORDING_ID: {canonical_id}"
|
||||
conn.execute(
|
||||
sa.text("""
|
||||
UPDATE workflows
|
||||
SET workflow_definition =
|
||||
REPLACE(workflow_definition::text, :alias, :canonical)::json
|
||||
WHERE workflow_definition::text LIKE '%%' || :alias || '%%'
|
||||
"""),
|
||||
{"alias": alias_pattern, "canonical": canonical_pattern},
|
||||
)
|
||||
conn.execute(
|
||||
sa.text("""
|
||||
UPDATE workflow_definitions
|
||||
SET workflow_json =
|
||||
REPLACE(workflow_json::text, :alias, :canonical)::json
|
||||
WHERE workflow_json::text LIKE '%%' || :alias || '%%'
|
||||
"""),
|
||||
{"alias": alias_pattern, "canonical": canonical_pattern},
|
||||
)
|
||||
|
||||
# 3. Soft-delete every alias row.
|
||||
alias_ids = [r[0] for r in rows]
|
||||
conn.execute(
|
||||
sa.text("""
|
||||
UPDATE workflow_recordings
|
||||
SET is_active = false
|
||||
WHERE recording_id = ANY(:ids)
|
||||
AND is_active = true
|
||||
"""),
|
||||
{"ids": alias_ids},
|
||||
)
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
# Deduplication is a one-way data migration. The soft-deleted rows
|
||||
# still exist in the table; a manual restore is possible if needed.
|
||||
pass
|
||||
109
api/alembic/versions/67a5cf3e09d0_unique_recording_id_per_org.py
Normal file
109
api/alembic/versions/67a5cf3e09d0_unique_recording_id_per_org.py
Normal file
|
|
@ -0,0 +1,109 @@
|
|||
"""make recordings org-scoped instead of workflow-scoped
|
||||
|
||||
Revision ID: 67a5cf3e09d0
|
||||
Revises: 3cd3155084a2
|
||||
Create Date: 2026-04-09 17:03:38.302041
|
||||
|
||||
"""
|
||||
|
||||
from typing import Sequence, Union
|
||||
|
||||
import sqlalchemy as sa
|
||||
from alembic import op
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision: str = "67a5cf3e09d0"
|
||||
down_revision: Union[str, None] = "3cd3155084a2"
|
||||
branch_labels: Union[str, Sequence[str], None] = None
|
||||
depends_on: Union[str, Sequence[str], None] = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
# 1. Widen recording_id from 16 to 64 chars for descriptive names
|
||||
op.alter_column(
|
||||
"workflow_recordings",
|
||||
"recording_id",
|
||||
existing_type=sa.VARCHAR(length=16),
|
||||
type_=sa.String(length=64),
|
||||
existing_nullable=False,
|
||||
)
|
||||
|
||||
# 2. Make workflow_id nullable — recordings are now org-scoped
|
||||
op.alter_column(
|
||||
"workflow_recordings",
|
||||
"workflow_id",
|
||||
existing_type=sa.Integer(),
|
||||
nullable=True,
|
||||
)
|
||||
|
||||
# 3. Drop the old globally-unique index on recording_id
|
||||
op.drop_index(
|
||||
"ix_workflow_recordings_recording_id",
|
||||
table_name="workflow_recordings",
|
||||
)
|
||||
|
||||
# 4. Re-create as non-unique index (for lookups)
|
||||
op.create_index(
|
||||
"ix_workflow_recordings_recording_id",
|
||||
"workflow_recordings",
|
||||
["recording_id"],
|
||||
unique=False,
|
||||
)
|
||||
|
||||
# 5. Add unique constraint (recording_id, organization_id)
|
||||
op.create_unique_constraint(
|
||||
"uq_workflow_recordings_recording_id_org",
|
||||
"workflow_recordings",
|
||||
["recording_id", "organization_id"],
|
||||
)
|
||||
|
||||
# 6. Drop the workflow+TTS scope index (no longer relevant)
|
||||
op.drop_index(
|
||||
"ix_workflow_recordings_tts_scope",
|
||||
table_name="workflow_recordings",
|
||||
)
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
# Re-create the TTS scope index
|
||||
op.create_index(
|
||||
"ix_workflow_recordings_tts_scope",
|
||||
"workflow_recordings",
|
||||
["workflow_id", "tts_provider", "tts_model", "tts_voice_id"],
|
||||
)
|
||||
|
||||
# Drop the org-scoped unique constraint
|
||||
op.drop_constraint(
|
||||
"uq_workflow_recordings_recording_id_org",
|
||||
"workflow_recordings",
|
||||
type_="unique",
|
||||
)
|
||||
|
||||
# Drop non-unique index and re-create as unique
|
||||
op.drop_index(
|
||||
"ix_workflow_recordings_recording_id",
|
||||
table_name="workflow_recordings",
|
||||
)
|
||||
op.create_index(
|
||||
"ix_workflow_recordings_recording_id",
|
||||
"workflow_recordings",
|
||||
["recording_id"],
|
||||
unique=True,
|
||||
)
|
||||
|
||||
# Make workflow_id NOT NULL again
|
||||
op.alter_column(
|
||||
"workflow_recordings",
|
||||
"workflow_id",
|
||||
existing_type=sa.Integer(),
|
||||
nullable=False,
|
||||
)
|
||||
|
||||
# Revert recording_id width
|
||||
op.alter_column(
|
||||
"workflow_recordings",
|
||||
"recording_id",
|
||||
existing_type=sa.String(length=64),
|
||||
type_=sa.VARCHAR(length=16),
|
||||
existing_nullable=False,
|
||||
)
|
||||
|
|
@ -0,0 +1,42 @@
|
|||
"""make tts columns nullable on workflow_recordings
|
||||
|
||||
Revision ID: a1b2c3d4e5f6
|
||||
Revises: 67a5cf3e09d0
|
||||
Create Date: 2026-04-10 12:00:00.000000
|
||||
|
||||
"""
|
||||
|
||||
from typing import Sequence, Union
|
||||
|
||||
import sqlalchemy as sa
|
||||
from alembic import op
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision: str = "a1b2c3d4e5f6"
|
||||
down_revision: str = "67a5cf3e09d0"
|
||||
branch_labels: Union[str, Sequence[str], None] = None
|
||||
depends_on: Union[str, Sequence[str], None] = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
op.alter_column(
|
||||
"workflow_recordings", "tts_provider", existing_type=sa.String(), nullable=True
|
||||
)
|
||||
op.alter_column(
|
||||
"workflow_recordings", "tts_model", existing_type=sa.String(), nullable=True
|
||||
)
|
||||
op.alter_column(
|
||||
"workflow_recordings", "tts_voice_id", existing_type=sa.String(), nullable=True
|
||||
)
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
op.alter_column(
|
||||
"workflow_recordings", "tts_voice_id", existing_type=sa.String(), nullable=False
|
||||
)
|
||||
op.alter_column(
|
||||
"workflow_recordings", "tts_model", existing_type=sa.String(), nullable=False
|
||||
)
|
||||
op.alter_column(
|
||||
"workflow_recordings", "tts_provider", existing_type=sa.String(), nullable=False
|
||||
)
|
||||
|
|
@ -1005,7 +1005,7 @@ class KnowledgeBaseDocumentModel(Base):
|
|||
|
||||
|
||||
class WorkflowRecordingModel(Base):
|
||||
"""Model for storing audio recordings scoped to a workflow and TTS configuration.
|
||||
"""Model for storing audio recordings scoped to an organization.
|
||||
|
||||
Recordings are used in hybrid prompts where parts of the output are pre-recorded
|
||||
audio rather than dynamically generated TTS.
|
||||
|
|
@ -1015,21 +1015,21 @@ class WorkflowRecordingModel(Base):
|
|||
|
||||
id = Column(Integer, primary_key=True, index=True)
|
||||
|
||||
# Short globally unique ID (e.g. "xbhfha3k") used in prompts
|
||||
recording_id = Column(String(16), unique=True, nullable=False, index=True)
|
||||
# Descriptive ID used in prompts (unique per organization)
|
||||
recording_id = Column(String(64), nullable=False, index=True)
|
||||
|
||||
# Scoping
|
||||
workflow_id = Column(
|
||||
Integer, ForeignKey("workflows.id", ondelete="CASCADE"), nullable=False
|
||||
Integer, ForeignKey("workflows.id", ondelete="CASCADE"), nullable=True
|
||||
)
|
||||
organization_id = Column(
|
||||
Integer, ForeignKey("organizations.id", ondelete="CASCADE"), nullable=False
|
||||
)
|
||||
|
||||
# TTS configuration scope
|
||||
tts_provider = Column(String, nullable=False)
|
||||
tts_model = Column(String, nullable=False)
|
||||
tts_voice_id = Column(String, nullable=False)
|
||||
# TTS configuration metadata (optional, legacy)
|
||||
tts_provider = Column(String, nullable=True)
|
||||
tts_model = Column(String, nullable=True)
|
||||
tts_voice_id = Column(String, nullable=True)
|
||||
|
||||
# Content
|
||||
transcript = Column(Text, nullable=False)
|
||||
|
|
@ -1062,16 +1062,14 @@ class WorkflowRecordingModel(Base):
|
|||
|
||||
# Indexes
|
||||
__table_args__ = (
|
||||
UniqueConstraint(
|
||||
"recording_id",
|
||||
"organization_id",
|
||||
name="uq_workflow_recordings_recording_id_org",
|
||||
),
|
||||
Index("ix_workflow_recordings_workflow_id", "workflow_id"),
|
||||
Index("ix_workflow_recordings_org_id", "organization_id"),
|
||||
Index("ix_workflow_recordings_recording_id", "recording_id"),
|
||||
Index(
|
||||
"ix_workflow_recordings_tts_scope",
|
||||
"workflow_id",
|
||||
"tts_provider",
|
||||
"tts_model",
|
||||
"tts_voice_id",
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -5,7 +5,7 @@ import string
|
|||
from typing import List, Optional
|
||||
|
||||
from loguru import logger
|
||||
from sqlalchemy import func, select
|
||||
from sqlalchemy import func, select, text
|
||||
|
||||
from api.db.base_client import BaseDBClient
|
||||
from api.db.models import WorkflowRecordingModel
|
||||
|
|
@ -23,30 +23,30 @@ class WorkflowRecordingClient(BaseDBClient):
|
|||
async def create_recording(
|
||||
self,
|
||||
recording_id: str,
|
||||
workflow_id: int,
|
||||
organization_id: int,
|
||||
tts_provider: str,
|
||||
tts_model: str,
|
||||
tts_voice_id: str,
|
||||
transcript: str,
|
||||
storage_key: str,
|
||||
storage_backend: str,
|
||||
created_by: int,
|
||||
workflow_id: Optional[int] = None,
|
||||
tts_provider: Optional[str] = None,
|
||||
tts_model: Optional[str] = None,
|
||||
tts_voice_id: Optional[str] = None,
|
||||
metadata: Optional[dict] = None,
|
||||
) -> WorkflowRecordingModel:
|
||||
"""Create a new workflow recording record.
|
||||
|
||||
Args:
|
||||
recording_id: Short unique recording identifier
|
||||
workflow_id: ID of the workflow
|
||||
organization_id: ID of the organization
|
||||
tts_provider: TTS provider name
|
||||
tts_model: TTS model name
|
||||
tts_voice_id: TTS voice identifier
|
||||
transcript: User-provided transcript
|
||||
storage_key: S3/MinIO storage key
|
||||
storage_backend: Storage backend (s3 or minio)
|
||||
created_by: ID of the user
|
||||
workflow_id: Optional workflow ID (legacy)
|
||||
tts_provider: Optional TTS provider name
|
||||
tts_model: Optional TTS model name
|
||||
tts_voice_id: Optional TTS voice identifier
|
||||
metadata: Optional extra metadata
|
||||
|
||||
Returns:
|
||||
|
|
@ -71,25 +71,22 @@ class WorkflowRecordingClient(BaseDBClient):
|
|||
await session.commit()
|
||||
await session.refresh(recording)
|
||||
|
||||
logger.info(
|
||||
f"Created recording {recording_id} for workflow {workflow_id}, "
|
||||
f"org {organization_id}"
|
||||
)
|
||||
logger.info(f"Created recording {recording_id} for org {organization_id}")
|
||||
return recording
|
||||
|
||||
async def get_recordings_for_workflow(
|
||||
async def get_recordings(
|
||||
self,
|
||||
workflow_id: int,
|
||||
organization_id: int,
|
||||
workflow_id: Optional[int] = None,
|
||||
tts_provider: Optional[str] = None,
|
||||
tts_model: Optional[str] = None,
|
||||
tts_voice_id: Optional[str] = None,
|
||||
) -> List[WorkflowRecordingModel]:
|
||||
"""Get recordings for a workflow, optionally filtered by TTS config.
|
||||
"""Get recordings for an organization, optionally filtered.
|
||||
|
||||
Args:
|
||||
workflow_id: ID of the workflow
|
||||
organization_id: ID of the organization
|
||||
workflow_id: Optional workflow ID filter
|
||||
tts_provider: Optional TTS provider filter
|
||||
tts_model: Optional TTS model filter
|
||||
tts_voice_id: Optional TTS voice ID filter
|
||||
|
|
@ -99,11 +96,12 @@ class WorkflowRecordingClient(BaseDBClient):
|
|||
"""
|
||||
async with self.async_session() as session:
|
||||
query = select(WorkflowRecordingModel).where(
|
||||
WorkflowRecordingModel.workflow_id == workflow_id,
|
||||
WorkflowRecordingModel.organization_id == organization_id,
|
||||
WorkflowRecordingModel.is_active == True,
|
||||
)
|
||||
|
||||
if workflow_id is not None:
|
||||
query = query.where(WorkflowRecordingModel.workflow_id == workflow_id)
|
||||
if tts_provider:
|
||||
query = query.where(WorkflowRecordingModel.tts_provider == tts_provider)
|
||||
if tts_model:
|
||||
|
|
@ -140,15 +138,37 @@ class WorkflowRecordingClient(BaseDBClient):
|
|||
result = await session.execute(query)
|
||||
return result.scalar_one_or_none()
|
||||
|
||||
async def has_active_recordings(
|
||||
async def get_recording_by_id(
|
||||
self,
|
||||
workflow_id: int,
|
||||
id: int,
|
||||
organization_id: int,
|
||||
) -> bool:
|
||||
"""Check if a workflow has any active recordings.
|
||||
) -> Optional[WorkflowRecordingModel]:
|
||||
"""Get a recording by its integer primary key.
|
||||
|
||||
Args:
|
||||
id: The primary key ID
|
||||
organization_id: ID of the organization
|
||||
|
||||
Returns:
|
||||
WorkflowRecordingModel if found, None otherwise
|
||||
"""
|
||||
async with self.async_session() as session:
|
||||
query = select(WorkflowRecordingModel).where(
|
||||
WorkflowRecordingModel.id == id,
|
||||
WorkflowRecordingModel.organization_id == organization_id,
|
||||
WorkflowRecordingModel.is_active == True,
|
||||
)
|
||||
|
||||
result = await session.execute(query)
|
||||
return result.scalar_one_or_none()
|
||||
|
||||
async def has_active_recordings(
|
||||
self,
|
||||
organization_id: int,
|
||||
) -> bool:
|
||||
"""Check if an organization has any active recordings.
|
||||
|
||||
Args:
|
||||
workflow_id: ID of the workflow
|
||||
organization_id: ID of the organization
|
||||
|
||||
Returns:
|
||||
|
|
@ -159,7 +179,6 @@ class WorkflowRecordingClient(BaseDBClient):
|
|||
select(func.count())
|
||||
.select_from(WorkflowRecordingModel)
|
||||
.where(
|
||||
WorkflowRecordingModel.workflow_id == workflow_id,
|
||||
WorkflowRecordingModel.organization_id == organization_id,
|
||||
WorkflowRecordingModel.is_active == True,
|
||||
)
|
||||
|
|
@ -167,11 +186,14 @@ class WorkflowRecordingClient(BaseDBClient):
|
|||
result = await session.execute(query)
|
||||
return result.scalar_one() > 0
|
||||
|
||||
async def check_recording_id_exists(self, recording_id: str) -> bool:
|
||||
"""Check if a recording ID already exists globally.
|
||||
async def check_recording_id_exists(
|
||||
self, recording_id: str, organization_id: int
|
||||
) -> bool:
|
||||
"""Check if a recording ID already exists within an organization.
|
||||
|
||||
Args:
|
||||
recording_id: The short recording ID to check
|
||||
recording_id: The recording ID to check
|
||||
organization_id: ID of the organization
|
||||
|
||||
Returns:
|
||||
True if exists, False otherwise
|
||||
|
|
@ -179,10 +201,125 @@ class WorkflowRecordingClient(BaseDBClient):
|
|||
async with self.async_session() as session:
|
||||
query = select(WorkflowRecordingModel.id).where(
|
||||
WorkflowRecordingModel.recording_id == recording_id,
|
||||
WorkflowRecordingModel.organization_id == organization_id,
|
||||
WorkflowRecordingModel.is_active == True,
|
||||
)
|
||||
result = await session.execute(query)
|
||||
return result.scalar_one_or_none() is not None
|
||||
|
||||
async def update_recording_id(
|
||||
self,
|
||||
id: int,
|
||||
new_recording_id: str,
|
||||
organization_id: int,
|
||||
) -> Optional[WorkflowRecordingModel]:
|
||||
"""Update the recording_id of a recording.
|
||||
|
||||
Args:
|
||||
id: Primary key ID of the recording
|
||||
new_recording_id: New recording ID
|
||||
organization_id: ID of the organization
|
||||
|
||||
Returns:
|
||||
Updated WorkflowRecordingModel if found, None otherwise
|
||||
"""
|
||||
async with self.async_session() as session:
|
||||
query = select(WorkflowRecordingModel).where(
|
||||
WorkflowRecordingModel.id == id,
|
||||
WorkflowRecordingModel.organization_id == organization_id,
|
||||
WorkflowRecordingModel.is_active == True,
|
||||
)
|
||||
result = await session.execute(query)
|
||||
recording = result.scalar_one_or_none()
|
||||
|
||||
if not recording:
|
||||
return None
|
||||
|
||||
old_id = recording.recording_id
|
||||
recording.recording_id = new_recording_id
|
||||
await session.commit()
|
||||
await session.refresh(recording)
|
||||
|
||||
logger.info(
|
||||
f"Updated recording ID {old_id} -> {new_recording_id}, "
|
||||
f"org {organization_id}"
|
||||
)
|
||||
return recording
|
||||
|
||||
async def replace_recording_id_in_workflows(
|
||||
self,
|
||||
old_id: str,
|
||||
new_id: str,
|
||||
organization_id: int,
|
||||
) -> int:
|
||||
"""Replace all occurrences of a recording ID in workflow definitions.
|
||||
|
||||
Updates both draft definitions (workflows.workflow_definition) and
|
||||
versioned definitions (workflow_definitions.workflow_json), skipping
|
||||
workflow_definitions with status 'legacy'.
|
||||
|
||||
Args:
|
||||
old_id: The old recording ID to find
|
||||
new_id: The new recording ID to replace with
|
||||
organization_id: ID of the organization (scopes to org workflows)
|
||||
|
||||
Returns:
|
||||
Total number of rows updated across both tables
|
||||
"""
|
||||
# Match the exact pattern used in prompts: "RECORDING_ID: <id>"
|
||||
old_pattern = f"RECORDING_ID: {old_id}"
|
||||
new_pattern = f"RECORDING_ID: {new_id}"
|
||||
|
||||
total = 0
|
||||
async with self.async_session() as session:
|
||||
# Update workflows.workflow_definition (draft definitions)
|
||||
result = await session.execute(
|
||||
text("""
|
||||
UPDATE workflows
|
||||
SET workflow_definition =
|
||||
REPLACE(workflow_definition::text, :old_pat, :new_pat)::json
|
||||
WHERE organization_id = :org_id
|
||||
AND workflow_definition::text LIKE '%%' || :old_pat || '%%'
|
||||
"""),
|
||||
{
|
||||
"old_pat": old_pattern,
|
||||
"new_pat": new_pattern,
|
||||
"org_id": organization_id,
|
||||
},
|
||||
)
|
||||
total += result.rowcount
|
||||
|
||||
# Update workflow_definitions.workflow_json (versioned definitions)
|
||||
# Skip legacy definitions
|
||||
result = await session.execute(
|
||||
text("""
|
||||
UPDATE workflow_definitions wd
|
||||
SET workflow_json =
|
||||
REPLACE(wd.workflow_json::text, :old_pat, :new_pat)::json
|
||||
FROM workflows w
|
||||
WHERE wd.workflow_id = w.id
|
||||
AND w.organization_id = :org_id
|
||||
AND wd.status != 'legacy'
|
||||
AND wd.workflow_json::text LIKE '%%' || :old_pat || '%%'
|
||||
"""),
|
||||
{
|
||||
"old_pat": old_pattern,
|
||||
"new_pat": new_pattern,
|
||||
"org_id": organization_id,
|
||||
},
|
||||
)
|
||||
total += result.rowcount
|
||||
|
||||
await session.commit()
|
||||
|
||||
if total > 0:
|
||||
logger.info(
|
||||
f"Replaced recording ID '{old_id}' -> '{new_id}' "
|
||||
f"in {total} workflow definition(s), org {organization_id}"
|
||||
)
|
||||
|
||||
return total
|
||||
|
||||
async def delete_recording(
|
||||
self,
|
||||
recording_id: str,
|
||||
|
|
|
|||
|
|
@ -178,6 +178,11 @@ async def initiate_call(
|
|||
workflow_run_id = request.workflow_run_id
|
||||
|
||||
if not workflow_run_id:
|
||||
# Fetch workflow to merge template context variables (e.g. caller_number,
|
||||
# called_number set in workflow settings for testing pre-call data fetch)
|
||||
workflow = await db_client.get_workflow_by_id(request.workflow_id)
|
||||
template_vars = (workflow.template_context_variables or {}) if workflow else {}
|
||||
|
||||
numeric_suffix = int(str(uuid.uuid4()).replace("-", "")[:8], 16) % 100000000
|
||||
workflow_run_name = f"WR-TEL-OUT-{numeric_suffix:08d}"
|
||||
workflow_run = await db_client.create_workflow_run(
|
||||
|
|
@ -187,6 +192,7 @@ async def initiate_call(
|
|||
user_id=user.id,
|
||||
call_type=CallType.OUTBOUND,
|
||||
initial_context={
|
||||
**template_vars,
|
||||
"phone_number": phone_number,
|
||||
"called_number": phone_number,
|
||||
"provider": provider.PROVIDER_NAME,
|
||||
|
|
|
|||
|
|
@ -45,17 +45,29 @@ class HttpApiConfig(BaseModel):
|
|||
timeout_ms: Optional[int] = Field(
|
||||
default=5000, description="Request timeout in milliseconds"
|
||||
)
|
||||
customMessage: Optional[str] = Field(
|
||||
default=None, description="Custom message to play after tool execution"
|
||||
)
|
||||
customMessageType: Optional[Literal["text", "audio"]] = Field(
|
||||
default=None, description="Type of custom message: text or audio"
|
||||
)
|
||||
customMessageRecordingId: Optional[str] = Field(
|
||||
default=None, description="Recording ID for audio custom message"
|
||||
)
|
||||
|
||||
|
||||
class EndCallConfig(BaseModel):
|
||||
"""Configuration for End Call tools."""
|
||||
|
||||
messageType: Literal["none", "custom"] = Field(
|
||||
messageType: Literal["none", "custom", "audio"] = Field(
|
||||
default="none", description="Type of goodbye message"
|
||||
)
|
||||
customMessage: Optional[str] = Field(
|
||||
default=None, description="Custom message to play before ending the call"
|
||||
)
|
||||
audioRecordingId: Optional[str] = Field(
|
||||
default=None, description="Recording ID for audio goodbye message"
|
||||
)
|
||||
endCallReason: bool = Field(
|
||||
default=False,
|
||||
description="When enabled, LLM must provide a reason for ending the call. "
|
||||
|
|
@ -74,12 +86,15 @@ class TransferCallConfig(BaseModel):
|
|||
destination: str = Field(
|
||||
description="Phone number or SIP endpoint to transfer the call to (E.164 format e.g., +1234567890, or SIP endpoint e.g., PJSIP/1234)"
|
||||
)
|
||||
messageType: Literal["none", "custom"] = Field(
|
||||
messageType: Literal["none", "custom", "audio"] = Field(
|
||||
default="none", description="Type of message to play before transfer"
|
||||
)
|
||||
customMessage: Optional[str] = Field(
|
||||
default=None, description="Custom message to play before transferring the call"
|
||||
)
|
||||
audioRecordingId: Optional[str] = Field(
|
||||
default=None, description="Recording ID for audio message before transfer"
|
||||
)
|
||||
timeout: int = Field(
|
||||
default=30,
|
||||
ge=5,
|
||||
|
|
|
|||
|
|
@ -16,6 +16,7 @@ from api.schemas.workflow_recording import (
|
|||
BatchRecordingUploadResponseSchema,
|
||||
RecordingListResponseSchema,
|
||||
RecordingResponseSchema,
|
||||
RecordingUpdateRequestSchema,
|
||||
RecordingUploadResponseSchema,
|
||||
)
|
||||
from api.services.auth.depends import get_user
|
||||
|
|
@ -25,11 +26,11 @@ from api.services.storage import storage_fs
|
|||
router = APIRouter(prefix="/workflow-recordings", tags=["workflow-recordings"])
|
||||
|
||||
|
||||
async def _generate_unique_recording_id() -> str:
|
||||
"""Generate a globally unique short recording ID."""
|
||||
async def _generate_unique_recording_id(organization_id: int) -> str:
|
||||
"""Generate a unique short recording ID within an organization."""
|
||||
for _ in range(10):
|
||||
rid = generate_short_id(8)
|
||||
exists = await db_client.check_recording_id_exists(rid)
|
||||
exists = await db_client.check_recording_id_exists(rid, organization_id)
|
||||
if not exists:
|
||||
return rid
|
||||
raise HTTPException(
|
||||
|
|
@ -69,11 +70,13 @@ async def get_upload_urls(
|
|||
try:
|
||||
items = []
|
||||
for fd in request.files:
|
||||
recording_id = await _generate_unique_recording_id()
|
||||
recording_id = await _generate_unique_recording_id(
|
||||
user.selected_organization_id
|
||||
)
|
||||
|
||||
storage_key = (
|
||||
f"recordings/{user.selected_organization_id}"
|
||||
f"/{request.workflow_id}/{recording_id}"
|
||||
f"/{recording_id}"
|
||||
f"/{fd.filename}"
|
||||
)
|
||||
|
||||
|
|
@ -100,7 +103,7 @@ async def get_upload_urls(
|
|||
|
||||
logger.info(
|
||||
f"Generated {len(items)} recording upload URL(s), "
|
||||
f"workflow {request.workflow_id}, org {user.selected_organization_id}"
|
||||
f"org {user.selected_organization_id}"
|
||||
)
|
||||
|
||||
return BatchRecordingUploadResponseSchema(items=items)
|
||||
|
|
@ -131,22 +134,20 @@ async def create_recordings(
|
|||
for rec_req in request.recordings:
|
||||
recording = await db_client.create_recording(
|
||||
recording_id=rec_req.recording_id,
|
||||
workflow_id=rec_req.workflow_id,
|
||||
organization_id=user.selected_organization_id,
|
||||
tts_provider=rec_req.tts_provider,
|
||||
tts_model=rec_req.tts_model,
|
||||
tts_voice_id=rec_req.tts_voice_id,
|
||||
transcript=rec_req.transcript,
|
||||
storage_key=rec_req.storage_key,
|
||||
storage_backend=backend.value,
|
||||
created_by=user.id,
|
||||
tts_provider=rec_req.tts_provider,
|
||||
tts_model=rec_req.tts_model,
|
||||
tts_voice_id=rec_req.tts_voice_id,
|
||||
metadata=rec_req.metadata,
|
||||
)
|
||||
results.append(_build_response(recording))
|
||||
|
||||
logger.info(
|
||||
f"Created {len(results)} recording(s) for "
|
||||
f"workflow {request.recordings[0].workflow_id}"
|
||||
f"Created {len(results)} recording(s) for org {user.selected_organization_id}"
|
||||
)
|
||||
|
||||
return BatchRecordingCreateResponseSchema(recordings=results)
|
||||
|
|
@ -163,10 +164,12 @@ async def create_recordings(
|
|||
@router.get(
|
||||
"/",
|
||||
response_model=RecordingListResponseSchema,
|
||||
summary="List recordings for a workflow",
|
||||
summary="List recordings",
|
||||
)
|
||||
async def list_recordings(
|
||||
workflow_id: Annotated[int, Query(description="Workflow ID")],
|
||||
workflow_id: Annotated[
|
||||
Optional[int], Query(description="Filter by workflow ID")
|
||||
] = None,
|
||||
tts_provider: Annotated[
|
||||
Optional[str], Query(description="Filter by TTS provider")
|
||||
] = None,
|
||||
|
|
@ -178,11 +181,11 @@ async def list_recordings(
|
|||
] = None,
|
||||
user=Depends(get_user),
|
||||
):
|
||||
"""List recordings for a workflow, optionally filtered by TTS configuration."""
|
||||
"""List recordings for the organization, optionally filtered."""
|
||||
try:
|
||||
recordings = await db_client.get_recordings_for_workflow(
|
||||
workflow_id=workflow_id,
|
||||
recordings = await db_client.get_recordings(
|
||||
organization_id=user.selected_organization_id,
|
||||
workflow_id=workflow_id,
|
||||
tts_provider=tts_provider,
|
||||
tts_model=tts_model,
|
||||
tts_voice_id=tts_voice_id,
|
||||
|
|
@ -233,6 +236,74 @@ async def delete_recording(
|
|||
) from exc
|
||||
|
||||
|
||||
@router.patch(
|
||||
"/{id}",
|
||||
response_model=RecordingResponseSchema,
|
||||
summary="Update a recording's Recording ID",
|
||||
)
|
||||
async def update_recording(
|
||||
id: int,
|
||||
request: RecordingUpdateRequestSchema,
|
||||
user=Depends(get_user),
|
||||
):
|
||||
"""Update the recording_id (descriptive name) of a recording."""
|
||||
try:
|
||||
new_id = request.recording_id.strip()
|
||||
if not new_id:
|
||||
raise HTTPException(status_code=400, detail="Recording ID cannot be empty")
|
||||
|
||||
existing = await db_client.get_recording_by_id(
|
||||
id, user.selected_organization_id
|
||||
)
|
||||
if not existing:
|
||||
raise HTTPException(status_code=404, detail="Recording not found")
|
||||
|
||||
if new_id == existing.recording_id:
|
||||
return _build_response(existing)
|
||||
|
||||
exists = await db_client.check_recording_id_exists(
|
||||
new_id, user.selected_organization_id
|
||||
)
|
||||
if exists:
|
||||
raise HTTPException(
|
||||
status_code=409,
|
||||
detail=f"Recording ID '{new_id}' is already in use",
|
||||
)
|
||||
|
||||
old_id = existing.recording_id
|
||||
|
||||
recording = await db_client.update_recording_id(
|
||||
id=id,
|
||||
new_recording_id=new_id,
|
||||
organization_id=user.selected_organization_id,
|
||||
)
|
||||
|
||||
if not recording:
|
||||
raise HTTPException(status_code=404, detail="Recording not found")
|
||||
|
||||
# Replace old recording ID in all non-legacy workflow definitions
|
||||
updated = await db_client.replace_recording_id_in_workflows(
|
||||
old_id=old_id,
|
||||
new_id=new_id,
|
||||
organization_id=user.selected_organization_id,
|
||||
)
|
||||
if updated:
|
||||
logger.info(
|
||||
f"Updated {updated} workflow definition(s) with new recording ID "
|
||||
f"'{old_id}' -> '{new_id}'"
|
||||
)
|
||||
|
||||
return _build_response(recording)
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as exc:
|
||||
logger.error(f"Error updating recording: {exc}")
|
||||
raise HTTPException(
|
||||
status_code=500, detail="Failed to update recording"
|
||||
) from exc
|
||||
|
||||
|
||||
@router.post(
|
||||
"/transcribe",
|
||||
summary="Transcribe an audio file",
|
||||
|
|
|
|||
|
|
@ -32,7 +32,6 @@ class FileDescriptor(BaseModel):
|
|||
class BatchRecordingUploadRequestSchema(BaseModel):
|
||||
"""Request schema for getting presigned upload URLs for one or more files."""
|
||||
|
||||
workflow_id: int = Field(..., description="Workflow ID these recordings belong to")
|
||||
files: List[FileDescriptor] = Field(
|
||||
..., min_length=1, max_length=20, description="List of files to upload"
|
||||
)
|
||||
|
|
@ -50,10 +49,13 @@ class RecordingCreateRequestSchema(BaseModel):
|
|||
"""Request schema for creating a recording record after upload."""
|
||||
|
||||
recording_id: str = Field(..., description="Short recording ID from upload step")
|
||||
workflow_id: int = Field(..., description="Workflow ID")
|
||||
tts_provider: str = Field(..., description="TTS provider (e.g. elevenlabs)")
|
||||
tts_model: str = Field(..., description="TTS model name")
|
||||
tts_voice_id: str = Field(..., description="TTS voice identifier")
|
||||
tts_provider: Optional[str] = Field(
|
||||
default=None, description="TTS provider (e.g. elevenlabs)"
|
||||
)
|
||||
tts_model: Optional[str] = Field(default=None, description="TTS model name")
|
||||
tts_voice_id: Optional[str] = Field(
|
||||
default=None, description="TTS voice identifier"
|
||||
)
|
||||
transcript: str = Field(
|
||||
..., description="User-provided transcript of the recording"
|
||||
)
|
||||
|
|
@ -68,11 +70,11 @@ class RecordingResponseSchema(BaseModel):
|
|||
|
||||
id: int
|
||||
recording_id: str
|
||||
workflow_id: int
|
||||
workflow_id: Optional[int] = None
|
||||
organization_id: int
|
||||
tts_provider: str
|
||||
tts_model: str
|
||||
tts_voice_id: str
|
||||
tts_provider: Optional[str] = None
|
||||
tts_model: Optional[str] = None
|
||||
tts_voice_id: Optional[str] = None
|
||||
transcript: str
|
||||
storage_key: str
|
||||
storage_backend: str
|
||||
|
|
@ -98,6 +100,18 @@ class BatchRecordingCreateResponseSchema(BaseModel):
|
|||
)
|
||||
|
||||
|
||||
class RecordingUpdateRequestSchema(BaseModel):
|
||||
"""Request schema for updating a recording's ID."""
|
||||
|
||||
recording_id: str = Field(
|
||||
...,
|
||||
min_length=1,
|
||||
max_length=64,
|
||||
pattern=r"^[a-zA-Z0-9_-]+$",
|
||||
description="New descriptive recording ID (letters, numbers, hyphens, underscores only)",
|
||||
)
|
||||
|
||||
|
||||
class RecordingListResponseSchema(BaseModel):
|
||||
"""Response schema for list of recordings."""
|
||||
|
||||
|
|
|
|||
|
|
@ -200,7 +200,6 @@ class CampaignCallDispatcher:
|
|||
|
||||
# Merge context variables (queued_run context already includes retry info if applicable)
|
||||
initial_context = {
|
||||
**workflow.template_context_variables,
|
||||
**queued_run.context_variables,
|
||||
"campaign_id": campaign.id,
|
||||
"provider": provider.PROVIDER_NAME,
|
||||
|
|
|
|||
188
api/services/pipecat/audio_playback.py
Normal file
188
api/services/pipecat/audio_playback.py
Normal file
|
|
@ -0,0 +1,188 @@
|
|||
"""Utilities for playing audio through the pipeline transport.
|
||||
|
||||
Provides one-shot and looping playback of raw PCM audio. All playback
|
||||
should be routed through ``transport.output().queue_frame`` so the audio
|
||||
reaches the caller without passing through STT (which would otherwise
|
||||
generate phantom transcriptions).
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import uuid
|
||||
from typing import Awaitable, Callable, Dict, Optional, Tuple
|
||||
|
||||
import numpy as np
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.frames.frames import (
|
||||
Frame,
|
||||
OutputAudioRawFrame,
|
||||
TTSAudioRawFrame,
|
||||
TTSStartedFrame,
|
||||
TTSStoppedFrame,
|
||||
TTSTextFrame,
|
||||
)
|
||||
|
||||
try:
|
||||
import soundfile as sf
|
||||
except ModuleNotFoundError as e:
|
||||
logger.error(f"Exception: {e}")
|
||||
logger.error("In order to use audio playback, you need to `pip install soundfile`.")
|
||||
raise Exception(f"Missing module: {e}")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Audio file loading / caching
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_audio_cache: Dict[Tuple[str, int], bytes] = {}
|
||||
|
||||
|
||||
def load_audio_file(file_path: str, sample_rate: int) -> Optional[bytes]:
|
||||
"""Load an audio file as PCM-16 bytes, caching the result.
|
||||
|
||||
Args:
|
||||
file_path: Path to a WAV audio file.
|
||||
sample_rate: Target sample rate (used as cache key; no resampling
|
||||
is performed here).
|
||||
|
||||
Returns:
|
||||
Raw PCM-16 bytes, or *None* on failure.
|
||||
"""
|
||||
cache_key = (file_path, sample_rate)
|
||||
if cache_key in _audio_cache:
|
||||
logger.debug(f"Using cached audio for {file_path} at {sample_rate}Hz")
|
||||
return _audio_cache[cache_key]
|
||||
|
||||
try:
|
||||
logger.info(f"Loading audio from {file_path} at {sample_rate}Hz")
|
||||
sound, file_sample_rate = sf.read(file_path, dtype="int16")
|
||||
logger.info(
|
||||
f"Audio file loaded - file sample_rate: {file_sample_rate}, target: {sample_rate}"
|
||||
)
|
||||
|
||||
# Ensure mono (take first channel if stereo)
|
||||
if len(sound.shape) > 1:
|
||||
sound = sound[:, 0]
|
||||
|
||||
if file_sample_rate != sample_rate:
|
||||
logger.warning(
|
||||
f"Audio file has sample rate {file_sample_rate}, expected {sample_rate}"
|
||||
)
|
||||
|
||||
audio_bytes = sound.astype(np.int16).tobytes()
|
||||
_audio_cache[cache_key] = audio_bytes
|
||||
logger.info(f"Audio loaded: {len(sound)} samples at {sample_rate}Hz")
|
||||
return audio_bytes
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to load audio file {file_path}: {e}")
|
||||
return None
|
||||
|
||||
|
||||
def clear_audio_cache() -> None:
|
||||
"""Clear the audio file cache to free memory."""
|
||||
_audio_cache.clear()
|
||||
logger.info("Audio cache cleared")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Playback helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
async def play_audio(
|
||||
audio_data: bytes,
|
||||
*,
|
||||
sample_rate: int,
|
||||
queue_frame: Callable[[Frame], Awaitable[None]],
|
||||
transcript: Optional[str] = None,
|
||||
append_to_context: bool = False,
|
||||
) -> None:
|
||||
"""Play raw PCM-16 audio once.
|
||||
|
||||
Pushes ``TTSStarted -> TTSAudioRaw -> TTSStopped`` so downstream
|
||||
processors (audio buffer, context aggregators) handle the audio
|
||||
correctly.
|
||||
|
||||
When *transcript* is provided a ``TTSTextFrame`` is also pushed so
|
||||
that observers (e.g. ``RealtimeFeedbackObserver``) can relay the
|
||||
spoken text to the UI.
|
||||
|
||||
Args:
|
||||
audio_data: Raw 16-bit mono PCM bytes.
|
||||
sample_rate: Pipeline sample rate (e.g. 16000).
|
||||
queue_frame: Frame sink -- typically ``transport.output().queue_frame``.
|
||||
transcript: Optional transcript of the recording.
|
||||
append_to_context: Whether the transcript should be appended to
|
||||
the LLM assistant context. Defaults to False.
|
||||
"""
|
||||
context_id = str(uuid.uuid4())
|
||||
await queue_frame(TTSStartedFrame(context_id=context_id))
|
||||
if transcript:
|
||||
tts_text = TTSTextFrame(
|
||||
text=transcript, aggregated_by="recording", context_id=context_id
|
||||
)
|
||||
tts_text.append_to_context = append_to_context
|
||||
await queue_frame(tts_text)
|
||||
await queue_frame(
|
||||
TTSAudioRawFrame(
|
||||
audio=audio_data,
|
||||
sample_rate=sample_rate,
|
||||
num_channels=1,
|
||||
context_id=context_id,
|
||||
)
|
||||
)
|
||||
await queue_frame(TTSStoppedFrame(context_id=context_id))
|
||||
|
||||
|
||||
async def play_audio_loop(
|
||||
*,
|
||||
stop_event: asyncio.Event,
|
||||
sample_rate: int,
|
||||
queue_frame: Callable[[Frame], Awaitable[None]],
|
||||
audio_file: Optional[str] = None,
|
||||
) -> None:
|
||||
"""Play audio in a loop until *stop_event* is set.
|
||||
|
||||
Used for hold music during call transfers and ringers during
|
||||
pre-call data fetches.
|
||||
|
||||
Args:
|
||||
stop_event: Set this event to terminate the loop.
|
||||
sample_rate: Target sample rate for audio playback.
|
||||
queue_frame: Frame sink -- typically ``transport.output().queue_frame``.
|
||||
audio_file: Path to a WAV file. When *None* the default
|
||||
``transfer_hold_ring_{sample_rate}.wav`` asset is used.
|
||||
"""
|
||||
if audio_file is None:
|
||||
from api.constants import APP_ROOT_DIR
|
||||
|
||||
audio_file = str(
|
||||
APP_ROOT_DIR / "assets" / f"transfer_hold_ring_{sample_rate}.wav"
|
||||
)
|
||||
|
||||
audio_data = load_audio_file(audio_file, sample_rate)
|
||||
if not audio_data:
|
||||
logger.warning(f"Audio loop: failed to load {audio_file}, skipping")
|
||||
return
|
||||
|
||||
num_samples = len(audio_data) // 2 # 16-bit PCM = 2 bytes per sample
|
||||
duration = num_samples / sample_rate
|
||||
|
||||
logger.debug(f"Audio loop: playing at {sample_rate}Hz")
|
||||
try:
|
||||
while not stop_event.is_set():
|
||||
frame = OutputAudioRawFrame(
|
||||
audio=audio_data,
|
||||
sample_rate=sample_rate,
|
||||
num_channels=1,
|
||||
)
|
||||
await queue_frame(frame)
|
||||
try:
|
||||
await asyncio.wait_for(stop_event.wait(), timeout=duration + 1.5)
|
||||
break
|
||||
except asyncio.TimeoutError:
|
||||
pass
|
||||
except Exception as e:
|
||||
logger.error(f"Audio loop error: {e}")
|
||||
logger.debug("Audio loop: stopped")
|
||||
|
|
@ -6,6 +6,7 @@ from api.db import db_client
|
|||
from api.enums import PostHogEvent, WorkflowRunState
|
||||
from api.services.campaign.circuit_breaker import circuit_breaker
|
||||
from api.services.pipecat.audio_config import AudioConfig
|
||||
from api.services.pipecat.audio_playback import play_audio, play_audio_loop
|
||||
from api.services.pipecat.in_memory_buffers import (
|
||||
InMemoryAudioBuffer,
|
||||
InMemoryLogsBuffer,
|
||||
|
|
@ -16,8 +17,11 @@ from api.services.posthog_client import capture_event
|
|||
from api.services.workflow.pipecat_engine import PipecatEngine
|
||||
from api.tasks.arq import enqueue_job
|
||||
from api.tasks.function_names import FunctionNames
|
||||
from api.utils.hold_audio import play_hold_audio_loop
|
||||
from pipecat.frames.frames import Frame, LLMContextFrame, TTSSpeakFrame
|
||||
from pipecat.frames.frames import (
|
||||
Frame,
|
||||
LLMContextFrame,
|
||||
TTSSpeakFrame,
|
||||
)
|
||||
from pipecat.pipeline.task import PipelineTask
|
||||
from pipecat.processors.audio.audio_buffer_processor import AudioBufferProcessor
|
||||
from pipecat.utils.enums import EndTaskReason
|
||||
|
|
@ -64,6 +68,7 @@ def register_event_handlers(
|
|||
pipeline_metrics_aggregator: PipelineMetricsAggregator,
|
||||
audio_config=AudioConfig,
|
||||
pre_call_fetch_task: asyncio.Task | None = None,
|
||||
fetch_recording_audio=None,
|
||||
user_provider_id: str | None = None,
|
||||
):
|
||||
"""Register all event handlers for transport and task events.
|
||||
|
|
@ -123,7 +128,11 @@ def register_event_handlers(
|
|||
stop_ringer = asyncio.Event()
|
||||
sample_rate = audio_config.pipeline_sample_rate or 16000
|
||||
ringer_task = asyncio.create_task(
|
||||
play_hold_audio_loop(task, stop_ringer, sample_rate)
|
||||
play_audio_loop(
|
||||
stop_event=stop_ringer,
|
||||
sample_rate=sample_rate,
|
||||
queue_frame=transport.output().queue_frame,
|
||||
)
|
||||
)
|
||||
try:
|
||||
fetch_result = await pre_call_fetch_task
|
||||
|
|
@ -151,12 +160,35 @@ def register_event_handlers(
|
|||
# so that render_template() has the complete _call_context_vars.
|
||||
await engine.set_node(engine.workflow.start_node_id)
|
||||
|
||||
greeting = engine.get_start_greeting()
|
||||
if greeting:
|
||||
logger.debug(
|
||||
"Both pipeline_started and client_connected received - playing greeting via TTS"
|
||||
)
|
||||
await task.queue_frame(TTSSpeakFrame(greeting))
|
||||
greeting_info = engine.get_start_greeting()
|
||||
if greeting_info:
|
||||
greeting_type, greeting_value = greeting_info
|
||||
if (
|
||||
greeting_type == "audio"
|
||||
and greeting_value
|
||||
and fetch_recording_audio
|
||||
):
|
||||
logger.debug(f"Playing audio greeting recording: {greeting_value}")
|
||||
result = await fetch_recording_audio(
|
||||
recording_pk=int(greeting_value)
|
||||
)
|
||||
if result:
|
||||
await play_audio(
|
||||
result.audio,
|
||||
sample_rate=audio_config.pipeline_sample_rate or 16000,
|
||||
queue_frame=transport.output().queue_frame,
|
||||
transcript=result.transcript,
|
||||
append_to_context=True,
|
||||
)
|
||||
else:
|
||||
logger.warning(
|
||||
f"Failed to fetch audio greeting {greeting_value}, "
|
||||
"falling back to LLM generation"
|
||||
)
|
||||
await engine.llm.queue_frame(LLMContextFrame(engine.context))
|
||||
else:
|
||||
logger.debug("Playing text greeting via TTS")
|
||||
await task.queue_frame(TTSSpeakFrame(greeting_value))
|
||||
else:
|
||||
logger.debug(
|
||||
"Both pipeline_started and client_connected received - triggering initial LLM generation"
|
||||
|
|
|
|||
|
|
@ -170,7 +170,10 @@ class RealtimeFeedbackObserver(BaseObserver):
|
|||
frame_direction = data.direction
|
||||
|
||||
# Skip already processed frames (frames can be observed multiple times)
|
||||
if frame.id in self._frames_seen:
|
||||
if (
|
||||
frame.id in self._frames_seen
|
||||
or frame_direction != FrameDirection.DOWNSTREAM
|
||||
):
|
||||
return
|
||||
self._frames_seen.add(frame.id)
|
||||
|
||||
|
|
|
|||
|
|
@ -7,7 +7,7 @@ subsequent plays (even from other workers) are instantaneous.
|
|||
"""
|
||||
|
||||
import os
|
||||
from typing import Awaitable, Callable, Optional
|
||||
from typing import Awaitable, Callable, NamedTuple, Optional
|
||||
|
||||
import numpy as np
|
||||
from loguru import logger
|
||||
|
|
@ -22,14 +22,24 @@ from .audio_file_cache import (
|
|||
write_cache_file,
|
||||
)
|
||||
|
||||
|
||||
class RecordingAudio(NamedTuple):
|
||||
"""Audio bytes paired with the recording's transcript (when available)."""
|
||||
|
||||
audio: bytes
|
||||
transcript: Optional[str] = None
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Cache path helper
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _cache_path(recording_id: str, sample_rate: int) -> str:
|
||||
def _cache_path(organization_id: int, recording_id: str, sample_rate: int) -> str:
|
||||
"""Return the on-disk path for a cached PCM file."""
|
||||
return os.path.join(CACHE_DIR, f"{recording_id}_{sample_rate}.pcm")
|
||||
return os.path.join(
|
||||
CACHE_DIR, f"{organization_id}_{recording_id}_{sample_rate}.pcm"
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
|
|
@ -40,54 +50,95 @@ def _cache_path(recording_id: str, sample_rate: int) -> str:
|
|||
def create_recording_audio_fetcher(
|
||||
organization_id: int,
|
||||
pipeline_sample_rate: int,
|
||||
) -> Callable[[str], Awaitable[Optional[bytes]]]:
|
||||
"""Create an async callback that returns raw PCM bytes for a recording_id.
|
||||
) -> Callable[..., Awaitable[Optional[bytes]]]:
|
||||
"""Create an async callback that returns raw PCM bytes for a recording.
|
||||
|
||||
The returned callable:
|
||||
1. Checks the filesystem cache (keyed by ``recording_id`` + sample rate).
|
||||
2. On miss, looks up the recording in the DB, downloads the audio file
|
||||
from S3/MinIO, converts it to 16-bit mono PCM at *pipeline_sample_rate*,
|
||||
trims leading/trailing silence, caches the result on disk, and returns it.
|
||||
The returned callable accepts **one** of two keyword arguments:
|
||||
- ``recording_pk`` – the immutable integer primary key (used by
|
||||
dropdown-based selections: greeting, edges, tool configs).
|
||||
- ``recording_id`` – the human-readable string ID (used by
|
||||
prompt-based ``RECORDING_ID: xxx`` references).
|
||||
|
||||
Flow:
|
||||
1. Checks the filesystem cache (keyed by org + pk + sample rate).
|
||||
2. On miss, looks up the recording in the DB, downloads the audio
|
||||
from S3/MinIO, converts to 16-bit mono PCM, trims silence, and
|
||||
caches the result on disk.
|
||||
|
||||
Args:
|
||||
organization_id: Organization owning the recordings.
|
||||
pipeline_sample_rate: Target PCM sample rate for the pipeline.
|
||||
|
||||
Returns:
|
||||
``async (recording_id: str) -> Optional[bytes]``
|
||||
"""
|
||||
from api.db import db_client
|
||||
from api.services.storage import get_storage_for_backend
|
||||
|
||||
# Resolve storage instances once per backend at creation time, not per fetch.
|
||||
_storage_cache: dict[str, object] = {}
|
||||
_transcript_cache: dict[str, Optional[str]] = {}
|
||||
|
||||
def _get_storage(backend: str):
|
||||
if backend not in _storage_cache:
|
||||
_storage_cache[backend] = get_storage_for_backend(backend)
|
||||
return _storage_cache[backend]
|
||||
|
||||
async def fetch(recording_id: str) -> Optional[bytes]:
|
||||
cached = _cache_path(recording_id, pipeline_sample_rate)
|
||||
async def _lookup_recording(
|
||||
cache_key: str,
|
||||
recording_pk: Optional[int],
|
||||
recording_id: Optional[str],
|
||||
):
|
||||
"""DB lookup with transcript caching."""
|
||||
if recording_pk is not None:
|
||||
recording = await db_client.get_recording_by_id(
|
||||
recording_pk, organization_id
|
||||
)
|
||||
else:
|
||||
recording = await db_client.get_recording_by_recording_id(
|
||||
recording_id, organization_id
|
||||
)
|
||||
if recording:
|
||||
_transcript_cache[cache_key] = recording.transcript or None
|
||||
return recording
|
||||
|
||||
async def fetch(
|
||||
*,
|
||||
recording_pk: Optional[int] = None,
|
||||
recording_id: Optional[str] = None,
|
||||
) -> Optional[RecordingAudio]:
|
||||
if recording_pk is None and recording_id is None:
|
||||
logger.warning("fetch called with neither recording_pk nor recording_id")
|
||||
return None
|
||||
|
||||
# Use pk for cache key when available, otherwise recording_id
|
||||
cache_key = str(recording_pk) if recording_pk is not None else recording_id
|
||||
cached = _cache_path(organization_id, cache_key, pipeline_sample_rate)
|
||||
|
||||
# 1. Serve from filesystem cache
|
||||
if os.path.exists(cached):
|
||||
logger.debug(f"Recording {recording_id} served from disk cache")
|
||||
return read_cached_file(cached)
|
||||
logger.debug(f"Recording {cache_key} served from disk cache")
|
||||
audio = read_cached_file(cached)
|
||||
# Transcript may already be in memory from a prior fetch;
|
||||
# if not, do a lightweight DB lookup.
|
||||
if cache_key not in _transcript_cache:
|
||||
await _lookup_recording(cache_key, recording_pk, recording_id)
|
||||
return RecordingAudio(
|
||||
audio=audio, transcript=_transcript_cache.get(cache_key)
|
||||
)
|
||||
|
||||
# 2. DB lookup
|
||||
recording = await db_client.get_recording_by_recording_id(
|
||||
recording_id, organization_id
|
||||
)
|
||||
recording = await _lookup_recording(cache_key, recording_pk, recording_id)
|
||||
|
||||
if not recording:
|
||||
logger.warning(f"Recording {recording_id} not found in database")
|
||||
logger.warning(f"Recording {cache_key} not found in database")
|
||||
return None
|
||||
|
||||
# 3. Download, convert, trim, and cache
|
||||
pcm_data = await _download_and_convert(
|
||||
recording, pipeline_sample_rate, _get_storage
|
||||
)
|
||||
return pcm_data
|
||||
if pcm_data is None:
|
||||
return None
|
||||
return RecordingAudio(
|
||||
audio=pcm_data, transcript=_transcript_cache.get(cache_key)
|
||||
)
|
||||
|
||||
return fetch
|
||||
|
||||
|
|
@ -98,11 +149,10 @@ def create_recording_audio_fetcher(
|
|||
|
||||
|
||||
async def warm_recording_cache(
|
||||
workflow_id: int,
|
||||
organization_id: int,
|
||||
pipeline_sample_rate: int,
|
||||
) -> None:
|
||||
"""Pre-fetch all active recordings for a workflow into the disk cache.
|
||||
"""Pre-fetch all active recordings for an organization into the disk cache.
|
||||
|
||||
Launched as a background ``asyncio.Task`` at pipeline startup so that
|
||||
recordings are ready before the first playback request. Errors are logged
|
||||
|
|
@ -112,9 +162,7 @@ async def warm_recording_cache(
|
|||
from api.services.storage import get_storage_for_backend
|
||||
|
||||
try:
|
||||
recordings = await db_client.get_recordings_for_workflow(
|
||||
workflow_id, organization_id
|
||||
)
|
||||
recordings = await db_client.get_recordings(organization_id=organization_id)
|
||||
if not recordings:
|
||||
return
|
||||
|
||||
|
|
@ -122,15 +170,20 @@ async def warm_recording_cache(
|
|||
uncached = [
|
||||
r
|
||||
for r in recordings
|
||||
if not os.path.exists(_cache_path(r.recording_id, pipeline_sample_rate))
|
||||
if not os.path.exists(
|
||||
_cache_path(organization_id, str(r.id), pipeline_sample_rate)
|
||||
)
|
||||
and not os.path.exists(
|
||||
_cache_path(organization_id, r.recording_id, pipeline_sample_rate)
|
||||
)
|
||||
]
|
||||
if not uncached:
|
||||
logger.debug(f"Recording cache already warm for workflow {workflow_id}")
|
||||
logger.debug(f"Recording cache already warm for org {organization_id}")
|
||||
return
|
||||
|
||||
logger.info(
|
||||
f"Warming recording cache: {len(uncached)}/{len(recordings)} "
|
||||
f"recording(s) for workflow {workflow_id}"
|
||||
f"recording(s) for org {organization_id}"
|
||||
)
|
||||
|
||||
# Resolve storage instances once per backend, not per recording
|
||||
|
|
@ -156,7 +209,7 @@ async def warm_recording_cache(
|
|||
f"Cache warm: error processing {recording.recording_id}"
|
||||
)
|
||||
|
||||
logger.info(f"Recording cache warm complete for workflow {workflow_id}")
|
||||
logger.info(f"Recording cache warm complete for org {organization_id}")
|
||||
except Exception:
|
||||
logger.exception("Recording cache warm failed")
|
||||
|
||||
|
|
@ -187,7 +240,11 @@ async def _download_and_convert(
|
|||
pcm_data = _trim_silence(pcm_data, sample_rate)
|
||||
|
||||
# Write to disk cache
|
||||
cached = _cache_path(recording.recording_id, sample_rate)
|
||||
cached = _cache_path(
|
||||
recording.organization_id,
|
||||
recording.recording_id,
|
||||
sample_rate,
|
||||
)
|
||||
write_cache_file(cached, pcm_data)
|
||||
|
||||
return pcm_data
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@ from typing import Awaitable, Callable, Optional
|
|||
|
||||
from loguru import logger
|
||||
|
||||
from api.services.pipecat.recording_audio_cache import RecordingAudio
|
||||
from api.services.workflow.pipecat_engine_context_composer import (
|
||||
RECORDING_MARKER,
|
||||
TTS_MARKER,
|
||||
|
|
@ -48,14 +49,14 @@ class RecordingRouterProcessor(FrameProcessor):
|
|||
Args:
|
||||
audio_sample_rate: Pipeline sample rate for OutputAudioRawFrame.
|
||||
fetch_recording_audio: Async callback that takes a recording_id and
|
||||
returns raw 16-bit mono PCM bytes, or None on failure.
|
||||
returns a RecordingAudio (audio + transcript), or None on failure.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
audio_sample_rate: int,
|
||||
fetch_recording_audio: Callable[[str], Awaitable[Optional[bytes]]],
|
||||
fetch_recording_audio: Callable[..., Awaitable[Optional[RecordingAudio]]],
|
||||
**kwargs,
|
||||
):
|
||||
super().__init__(**kwargs)
|
||||
|
|
@ -245,8 +246,8 @@ class RecordingRouterProcessor(FrameProcessor):
|
|||
"""
|
||||
logger.info(f"Playing pre-recorded audio: {recording_id}")
|
||||
|
||||
audio_data = await self._fetch_recording_audio(recording_id)
|
||||
if not audio_data:
|
||||
result = await self._fetch_recording_audio(recording_id=recording_id)
|
||||
if not result:
|
||||
logger.warning(
|
||||
f"Failed to fetch recording {recording_id}, no audio will play"
|
||||
)
|
||||
|
|
@ -256,7 +257,7 @@ class RecordingRouterProcessor(FrameProcessor):
|
|||
await self.push_frame(TTSStartedFrame(context_id=context_id))
|
||||
await self.push_frame(
|
||||
TTSAudioRawFrame(
|
||||
audio=audio_data,
|
||||
audio=result.audio,
|
||||
sample_rate=self._audio_sample_rate,
|
||||
num_channels=1,
|
||||
context_id=context_id,
|
||||
|
|
@ -264,10 +265,10 @@ class RecordingRouterProcessor(FrameProcessor):
|
|||
)
|
||||
await self.push_frame(TTSStoppedFrame(context_id=context_id))
|
||||
|
||||
duration_secs = len(audio_data) / (self._audio_sample_rate * 2)
|
||||
duration_secs = len(result.audio) / (self._audio_sample_rate * 2)
|
||||
logger.debug(
|
||||
f"Finished pushing recording {recording_id} "
|
||||
f"({len(audio_data)} bytes, {duration_secs:.1f}s)"
|
||||
f"({len(result.audio)} bytes, {duration_secs:.1f}s)"
|
||||
)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
|
|
|
|||
|
|
@ -698,9 +698,7 @@ async def _run_pipeline(
|
|||
|
||||
# Check if the workflow has any active recordings so the engine can
|
||||
# include recording response mode instructions in all node prompts.
|
||||
has_recordings = await db_client.has_active_recordings(
|
||||
workflow_id, workflow.organization_id
|
||||
)
|
||||
has_recordings = await db_client.has_active_recordings(workflow.organization_id)
|
||||
|
||||
context_compaction_enabled = (workflow.workflow_configurations or {}).get(
|
||||
"context_compaction_enabled", False
|
||||
|
|
@ -831,6 +829,14 @@ async def _run_pipeline(
|
|||
voicemail_detector = None
|
||||
recording_router = None
|
||||
|
||||
# Create recording audio fetcher (used by recording router, audio greetings,
|
||||
# and audio transition speech)
|
||||
fetch_audio = create_recording_audio_fetcher(
|
||||
organization_id=workflow.organization_id,
|
||||
pipeline_sample_rate=audio_config.pipeline_sample_rate,
|
||||
)
|
||||
engine.set_fetch_recording_audio(fetch_audio)
|
||||
|
||||
if not is_realtime:
|
||||
# Create voicemail detector if enabled in workflow configurations
|
||||
voicemail_config = (workflow.workflow_configurations or {}).get(
|
||||
|
|
@ -871,10 +877,6 @@ async def _run_pipeline(
|
|||
|
||||
# Create recording router if workflow has active recordings
|
||||
if has_recordings:
|
||||
fetch_audio = create_recording_audio_fetcher(
|
||||
organization_id=workflow.organization_id,
|
||||
pipeline_sample_rate=audio_config.pipeline_sample_rate,
|
||||
)
|
||||
recording_router = RecordingRouterProcessor(
|
||||
audio_sample_rate=audio_config.pipeline_sample_rate,
|
||||
fetch_recording_audio=fetch_audio,
|
||||
|
|
@ -883,7 +885,6 @@ async def _run_pipeline(
|
|||
# before the first playback request.
|
||||
asyncio.create_task(
|
||||
warm_recording_cache(
|
||||
workflow_id=workflow_id,
|
||||
organization_id=workflow.organization_id,
|
||||
pipeline_sample_rate=audio_config.pipeline_sample_rate,
|
||||
)
|
||||
|
|
@ -918,8 +919,9 @@ async def _run_pipeline(
|
|||
# Create pipeline task with audio configuration
|
||||
task = create_pipeline_task(pipeline, workflow_run_id, audio_config)
|
||||
|
||||
# Now set the task on the engine
|
||||
# Now set the task and transport output on the engine
|
||||
engine.set_task(task)
|
||||
engine.set_transport_output(transport.output())
|
||||
|
||||
# Initialize the engine to set the initial context with
|
||||
# System Prompt and Tools
|
||||
|
|
@ -979,6 +981,7 @@ async def _run_pipeline(
|
|||
pipeline_metrics_aggregator=pipeline_metrics_aggregator,
|
||||
audio_config=audio_config,
|
||||
pre_call_fetch_task=pre_call_fetch_task,
|
||||
fetch_recording_audio=fetch_audio,
|
||||
user_provider_id=user_provider_id,
|
||||
)
|
||||
|
||||
|
|
|
|||
|
|
@ -230,7 +230,7 @@ def create_tts_service(user_config, audio_config: "AudioConfig"):
|
|||
api_key=user_config.tts.api_key,
|
||||
settings=DeepgramTTSSettings(voice=user_config.tts.voice),
|
||||
text_filters=[xml_function_tag_filter],
|
||||
skip_aggregator_types=["recording_router"],
|
||||
skip_aggregator_types=["recording_router", "recording"],
|
||||
silence_time_s=1.0,
|
||||
)
|
||||
elif user_config.tts.provider == ServiceProviders.OPENAI.value:
|
||||
|
|
@ -238,7 +238,7 @@ def create_tts_service(user_config, audio_config: "AudioConfig"):
|
|||
api_key=user_config.tts.api_key,
|
||||
settings=OpenAITTSSettings(model=user_config.tts.model),
|
||||
text_filters=[xml_function_tag_filter],
|
||||
skip_aggregator_types=["recording_router"],
|
||||
skip_aggregator_types=["recording_router", "recording"],
|
||||
silence_time_s=1.0,
|
||||
)
|
||||
elif user_config.tts.provider == ServiceProviders.ELEVENLABS.value:
|
||||
|
|
@ -258,7 +258,7 @@ def create_tts_service(user_config, audio_config: "AudioConfig"):
|
|||
similarity_boost=0.75,
|
||||
),
|
||||
text_filters=[xml_function_tag_filter],
|
||||
skip_aggregator_types=["recording_router"],
|
||||
skip_aggregator_types=["recording_router", "recording"],
|
||||
silence_time_s=1.0,
|
||||
)
|
||||
elif user_config.tts.provider == ServiceProviders.CARTESIA.value:
|
||||
|
|
@ -284,7 +284,7 @@ def create_tts_service(user_config, audio_config: "AudioConfig"):
|
|||
),
|
||||
),
|
||||
text_filters=[xml_function_tag_filter],
|
||||
skip_aggregator_types=["recording_router"],
|
||||
skip_aggregator_types=["recording_router", "recording"],
|
||||
silence_time_s=1.0,
|
||||
)
|
||||
elif user_config.tts.provider == ServiceProviders.DOGRAH.value:
|
||||
|
|
@ -299,7 +299,7 @@ def create_tts_service(user_config, audio_config: "AudioConfig"):
|
|||
speed=user_config.tts.speed,
|
||||
),
|
||||
text_filters=[xml_function_tag_filter],
|
||||
skip_aggregator_types=["recording_router"],
|
||||
skip_aggregator_types=["recording_router", "recording"],
|
||||
silence_time_s=1.0,
|
||||
)
|
||||
elif user_config.tts.provider == ServiceProviders.CAMB.value:
|
||||
|
|
@ -312,7 +312,7 @@ def create_tts_service(user_config, audio_config: "AudioConfig"):
|
|||
voice_id=voice_id,
|
||||
model=user_config.tts.model,
|
||||
text_filters=[xml_function_tag_filter],
|
||||
skip_aggregator_types=["recording_router"],
|
||||
skip_aggregator_types=["recording_router", "recording"],
|
||||
)
|
||||
# Set language directly as BCP-47 code (bypasses Language enum conversion)
|
||||
tts._settings.language = language
|
||||
|
|
@ -327,7 +327,7 @@ def create_tts_service(user_config, audio_config: "AudioConfig"):
|
|||
speed=user_config.tts.speed,
|
||||
),
|
||||
text_filters=[xml_function_tag_filter],
|
||||
skip_aggregator_types=["recording_router"],
|
||||
skip_aggregator_types=["recording_router", "recording"],
|
||||
silence_time_s=1.0,
|
||||
)
|
||||
elif user_config.tts.provider == ServiceProviders.RIME.value:
|
||||
|
|
@ -352,7 +352,7 @@ def create_tts_service(user_config, audio_config: "AudioConfig"):
|
|||
api_key=user_config.tts.api_key,
|
||||
settings=RimeTTSSettings(**settings_kwargs),
|
||||
text_filters=[xml_function_tag_filter],
|
||||
skip_aggregator_types=["recording_router"],
|
||||
skip_aggregator_types=["recording_router", "recording"],
|
||||
silence_time_s=1.0,
|
||||
)
|
||||
elif user_config.tts.provider == ServiceProviders.SARVAM.value:
|
||||
|
|
@ -382,7 +382,7 @@ def create_tts_service(user_config, audio_config: "AudioConfig"):
|
|||
language=pipecat_language,
|
||||
),
|
||||
text_filters=[xml_function_tag_filter],
|
||||
skip_aggregator_types=["recording_router"],
|
||||
skip_aggregator_types=["recording_router", "recording"],
|
||||
silence_time_s=1.0,
|
||||
)
|
||||
else:
|
||||
|
|
|
|||
|
|
@ -54,6 +54,8 @@ class NodeDataDTO(BaseModel):
|
|||
extraction_variables: Optional[list[ExtractionVariableDTO]] = None
|
||||
add_global_prompt: bool = True
|
||||
greeting: Optional[str] = None
|
||||
greeting_type: Optional[str] = None # 'text' or 'audio'
|
||||
greeting_recording_id: Optional[str] = None
|
||||
wait_for_user_response: bool = False
|
||||
wait_for_user_response_timeout: Optional[float] = None
|
||||
detect_voicemail: bool = False
|
||||
|
|
@ -102,6 +104,8 @@ class EdgeDataDTO(BaseModel):
|
|||
label: str = Field(..., min_length=1)
|
||||
condition: str = Field(..., min_length=1)
|
||||
transition_speech: Optional[str] = None
|
||||
transition_speech_type: Optional[str] = None # 'text' or 'audio'
|
||||
transition_speech_recording_id: Optional[str] = None
|
||||
|
||||
|
||||
class RFEdgeDTO(BaseModel):
|
||||
|
|
|
|||
|
|
@ -1,14 +1,12 @@
|
|||
"""Service for duplicating workflows including recordings."""
|
||||
"""Service for duplicating workflows."""
|
||||
|
||||
import copy
|
||||
import json
|
||||
import posixpath
|
||||
import uuid
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from api.db import db_client
|
||||
from api.db.workflow_recording_client import generate_short_id
|
||||
from api.enums import StorageBackend
|
||||
from api.services.storage import get_storage_for_backend, storage_fs
|
||||
|
||||
|
|
@ -41,22 +39,14 @@ def _regenerate_trigger_uuids(workflow_definition: dict) -> dict:
|
|||
return updated_definition
|
||||
|
||||
|
||||
async def _generate_unique_recording_id() -> str:
|
||||
"""Generate a globally unique short recording ID."""
|
||||
for _ in range(10):
|
||||
rid = generate_short_id(8)
|
||||
exists = await db_client.check_recording_id_exists(rid)
|
||||
if not exists:
|
||||
return rid
|
||||
raise RuntimeError("Failed to generate unique recording ID")
|
||||
|
||||
|
||||
async def duplicate_workflow(
|
||||
workflow_id: int,
|
||||
organization_id: int,
|
||||
user_id: int,
|
||||
):
|
||||
"""Duplicate a workflow including its definition, config, recordings, and triggers.
|
||||
"""Duplicate a workflow including its definition, config, and triggers.
|
||||
|
||||
Recordings are org-scoped and shared, so they are not duplicated.
|
||||
|
||||
Args:
|
||||
workflow_id: The source workflow ID to duplicate
|
||||
|
|
@ -130,29 +120,7 @@ async def duplicate_workflow(
|
|||
organization_id=organization_id,
|
||||
)
|
||||
|
||||
# 6. Copy recordings with new IDs and storage paths scoped to new workflow
|
||||
recording_id_map = await _duplicate_recordings(
|
||||
source_workflow_id=workflow_id,
|
||||
new_workflow_id=new_workflow.id,
|
||||
organization_id=organization_id,
|
||||
user_id=user_id,
|
||||
)
|
||||
|
||||
# 7. Replace old recording IDs with new ones in the workflow definition
|
||||
if recording_id_map:
|
||||
workflow_definition = _replace_recording_ids(
|
||||
workflow_definition, recording_id_map
|
||||
)
|
||||
new_workflow = await db_client.update_workflow(
|
||||
workflow_id=new_workflow.id,
|
||||
name=None,
|
||||
workflow_definition=workflow_definition,
|
||||
template_context_variables=None,
|
||||
workflow_configurations=None,
|
||||
organization_id=organization_id,
|
||||
)
|
||||
|
||||
# 8. Sync triggers for the new workflow
|
||||
# 6. Sync triggers for the new workflow
|
||||
if workflow_definition:
|
||||
trigger_paths = _extract_trigger_paths(workflow_definition)
|
||||
if trigger_paths:
|
||||
|
|
@ -165,94 +133,6 @@ async def duplicate_workflow(
|
|||
return new_workflow
|
||||
|
||||
|
||||
async def _duplicate_recordings(
|
||||
source_workflow_id: int,
|
||||
new_workflow_id: int,
|
||||
organization_id: int,
|
||||
user_id: int,
|
||||
) -> dict[str, str]:
|
||||
"""Duplicate all recordings for a workflow.
|
||||
|
||||
Copies each recording file to a new storage path scoped under the new
|
||||
workflow ID, and creates new DB records pointing to the copied files.
|
||||
|
||||
Returns:
|
||||
Mapping of old_recording_id -> new_recording_id
|
||||
"""
|
||||
recordings = await db_client.get_recordings_for_workflow(
|
||||
workflow_id=source_workflow_id,
|
||||
organization_id=organization_id,
|
||||
)
|
||||
|
||||
if not recordings:
|
||||
return {}
|
||||
|
||||
recording_id_map: dict[str, str] = {}
|
||||
|
||||
for rec in recordings:
|
||||
try:
|
||||
new_recording_id = await _generate_unique_recording_id()
|
||||
|
||||
# Build new storage key: recordings/{org_id}/{new_workflow_id}/{new_recording_id}/{filename}
|
||||
filename = posixpath.basename(rec.storage_key)
|
||||
new_storage_key = (
|
||||
f"recordings/{organization_id}"
|
||||
f"/{new_workflow_id}/{new_recording_id}"
|
||||
f"/{filename}"
|
||||
)
|
||||
|
||||
copied = await _copy_storage_object(
|
||||
rec.storage_key, new_storage_key, rec.storage_backend
|
||||
)
|
||||
if not copied:
|
||||
logger.warning(
|
||||
f"Failed to copy recording file {rec.recording_id}, skipping"
|
||||
)
|
||||
continue
|
||||
|
||||
await db_client.create_recording(
|
||||
recording_id=new_recording_id,
|
||||
workflow_id=new_workflow_id,
|
||||
organization_id=organization_id,
|
||||
tts_provider=rec.tts_provider,
|
||||
tts_model=rec.tts_model,
|
||||
tts_voice_id=rec.tts_voice_id,
|
||||
transcript=rec.transcript,
|
||||
storage_key=new_storage_key,
|
||||
storage_backend=rec.storage_backend,
|
||||
created_by=user_id,
|
||||
metadata=copy.deepcopy(rec.recording_metadata),
|
||||
)
|
||||
|
||||
recording_id_map[rec.recording_id] = new_recording_id
|
||||
logger.info(
|
||||
f"Duplicated recording {rec.recording_id} -> {new_recording_id}"
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error duplicating recording {rec.recording_id}: {e}")
|
||||
continue
|
||||
|
||||
return recording_id_map
|
||||
|
||||
|
||||
def _replace_recording_ids(
|
||||
workflow_definition: dict,
|
||||
recording_id_map: dict[str, str],
|
||||
) -> dict:
|
||||
"""Replace old recording IDs with new ones throughout the workflow definition.
|
||||
|
||||
Uses JSON serialization to do a thorough find-and-replace across all
|
||||
nested fields (node prompts, data, etc.).
|
||||
"""
|
||||
definition_str = json.dumps(workflow_definition)
|
||||
|
||||
for old_id, new_id in recording_id_map.items():
|
||||
definition_str = definition_str.replace(old_id, new_id)
|
||||
|
||||
return json.loads(definition_str)
|
||||
|
||||
|
||||
async def _copy_storage_object(
|
||||
source_key: str, dest_key: str, storage_backend: str
|
||||
) -> bool:
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
from typing import TYPE_CHECKING, Awaitable, Callable, Optional, Union
|
||||
|
||||
from api.services.pipecat.audio_playback import play_audio
|
||||
from api.services.workflow.disposition_mapper import (
|
||||
apply_disposition_mapping,
|
||||
get_organization_id_from_workflow_run,
|
||||
|
|
@ -114,6 +115,13 @@ class PipecatEngine:
|
|||
# Audio configuration (set via set_audio_config from _run_pipeline)
|
||||
self._audio_config = None
|
||||
|
||||
# Transport output processor for injecting audio directly into the
|
||||
# output, bypassing STT (set via set_transport_output from _run_pipeline)
|
||||
self._transport_output = None
|
||||
|
||||
# Recording audio fetcher (set via set_fetch_recording_audio from _run_pipeline)
|
||||
self._fetch_recording_audio = None
|
||||
|
||||
# True when the workflow has active recordings; enables recording
|
||||
# response mode instructions on all nodes for in-context learning.
|
||||
self._has_recordings: bool = has_recordings
|
||||
|
|
@ -191,6 +199,8 @@ class PipecatEngine:
|
|||
name: str,
|
||||
transition_to_node: str,
|
||||
transition_speech: Optional[str] = None,
|
||||
transition_speech_type: Optional[str] = None,
|
||||
transition_speech_recording_id: Optional[str] = None,
|
||||
):
|
||||
async def transition_func(function_call_params: FunctionCallParams) -> None:
|
||||
"""Inner function that handles the node change tool calls"""
|
||||
|
|
@ -204,8 +214,34 @@ class PipecatEngine:
|
|||
# Perform variable extraction before transitioning to new node
|
||||
await self._perform_variable_extraction_if_needed(self._current_node)
|
||||
|
||||
# Queue transition speech before switching nodes
|
||||
if transition_speech:
|
||||
# Queue transition speech/audio before switching nodes
|
||||
speech_type = transition_speech_type or "text"
|
||||
if (
|
||||
speech_type == "audio"
|
||||
and transition_speech_recording_id
|
||||
and self._fetch_recording_audio
|
||||
):
|
||||
logger.info(
|
||||
f"Playing transition audio: {transition_speech_recording_id}"
|
||||
)
|
||||
self._queued_speech_mute_state = "waiting"
|
||||
result = await self._fetch_recording_audio(
|
||||
recording_pk=int(transition_speech_recording_id)
|
||||
)
|
||||
if result:
|
||||
await play_audio(
|
||||
result.audio,
|
||||
sample_rate=self._audio_config.pipeline_sample_rate
|
||||
if self._audio_config
|
||||
else 16000,
|
||||
queue_frame=self._transport_output.queue_frame,
|
||||
transcript=result.transcript,
|
||||
)
|
||||
else:
|
||||
logger.warning(
|
||||
f"Failed to fetch transition audio {transition_speech_recording_id}"
|
||||
)
|
||||
elif transition_speech:
|
||||
logger.info(f"Playing transition speech: {transition_speech}")
|
||||
self._queued_speech_mute_state = "waiting"
|
||||
await self.task.queue_frame(
|
||||
|
|
@ -259,6 +295,8 @@ class PipecatEngine:
|
|||
name: str,
|
||||
transition_to_node: str,
|
||||
transition_speech: Optional[str] = None,
|
||||
transition_speech_type: Optional[str] = None,
|
||||
transition_speech_recording_id: Optional[str] = None,
|
||||
):
|
||||
logger.debug(
|
||||
f"Registering function {name} to transition to node {transition_to_node} with LLM"
|
||||
|
|
@ -266,7 +304,11 @@ class PipecatEngine:
|
|||
|
||||
# Create transition function
|
||||
transition_func = await self._create_transition_func(
|
||||
name, transition_to_node, transition_speech
|
||||
name,
|
||||
transition_to_node,
|
||||
transition_speech,
|
||||
transition_speech_type,
|
||||
transition_speech_recording_id,
|
||||
)
|
||||
|
||||
# Register function with LLM
|
||||
|
|
@ -442,6 +484,8 @@ class PipecatEngine:
|
|||
outgoing_edge.get_function_name(),
|
||||
outgoing_edge.target,
|
||||
outgoing_edge.transition_speech,
|
||||
outgoing_edge.data.transition_speech_type,
|
||||
outgoing_edge.data.transition_speech_recording_id,
|
||||
)
|
||||
|
||||
# Register custom tool handlers for this node
|
||||
|
|
@ -533,11 +577,27 @@ class PipecatEngine:
|
|||
# Setup LLM Context with Prompts and Functions
|
||||
await self._setup_llm_context(node)
|
||||
|
||||
def get_start_greeting(self) -> Optional[str]:
|
||||
"""Return the rendered greeting for the start node, or None if not configured."""
|
||||
def get_start_greeting(self) -> Optional[tuple[str, Optional[str]]]:
|
||||
"""Return the greeting info for the start node, or None if not configured.
|
||||
|
||||
Returns:
|
||||
A tuple of (greeting_type, value) where:
|
||||
- ("text", rendered_text) for text greetings spoken via TTS
|
||||
- ("audio", recording_id) for pre-recorded audio greetings
|
||||
Or None if no greeting is configured.
|
||||
"""
|
||||
start_node = self.workflow.nodes.get(self.workflow.start_node_id)
|
||||
if start_node and start_node.greeting:
|
||||
return self._format_prompt(start_node.greeting)
|
||||
if not start_node:
|
||||
return None
|
||||
|
||||
greeting_type = start_node.greeting_type or "text"
|
||||
|
||||
if greeting_type == "audio" and start_node.greeting_recording_id:
|
||||
return ("audio", start_node.greeting_recording_id)
|
||||
|
||||
if start_node.greeting:
|
||||
return ("text", self._format_prompt(start_node.greeting))
|
||||
|
||||
return None
|
||||
|
||||
async def _handle_end_node(self, node: Node) -> None:
|
||||
|
|
@ -698,6 +758,18 @@ class PipecatEngine:
|
|||
"""Set the audio configuration for the pipeline."""
|
||||
self._audio_config = audio_config
|
||||
|
||||
def set_transport_output(self, transport_output) -> None:
|
||||
"""Set the transport output processor for direct audio playback.
|
||||
|
||||
Audio queued here bypasses STT and the rest of the pipeline,
|
||||
going straight to the caller.
|
||||
"""
|
||||
self._transport_output = transport_output
|
||||
|
||||
def set_fetch_recording_audio(self, fetch_fn) -> None:
|
||||
"""Set the recording audio fetcher callback."""
|
||||
self._fetch_recording_audio = fetch_fn
|
||||
|
||||
def set_mute_pipeline(self, mute: bool) -> None:
|
||||
"""Set the pipeline mute state.
|
||||
|
||||
|
|
|
|||
|
|
@ -168,7 +168,6 @@ def create_aggregation_correction_callback(engine: "PipecatEngine"):
|
|||
reference = engine._current_llm_generation_reference_text
|
||||
|
||||
if not reference:
|
||||
logger.warning("No reference text available for aggregation correction")
|
||||
return corrupted
|
||||
|
||||
# Apply the correction algorithm
|
||||
|
|
|
|||
|
|
@ -16,6 +16,7 @@ from loguru import logger
|
|||
|
||||
from api.db import db_client
|
||||
from api.enums import ToolCategory, WorkflowRunMode
|
||||
from api.services.pipecat.audio_playback import play_audio, play_audio_loop
|
||||
from api.services.telephony.call_transfer_manager import get_call_transfer_manager
|
||||
from api.services.telephony.factory import get_telephony_provider
|
||||
from api.services.telephony.transfer_event_protocol import TransferContext
|
||||
|
|
@ -27,7 +28,6 @@ from api.services.workflow.tools.custom_tool import (
|
|||
execute_http_tool,
|
||||
tool_to_function_schema,
|
||||
)
|
||||
from api.utils.hold_audio import play_hold_audio_loop
|
||||
from pipecat.adapters.schemas.function_schema import FunctionSchema
|
||||
from pipecat.frames.frames import (
|
||||
FunctionCallResultProperties,
|
||||
|
|
@ -77,6 +77,45 @@ class CustomToolManager:
|
|||
self._engine = engine
|
||||
self._organization_id: Optional[int] = None
|
||||
|
||||
async def _play_config_message(
|
||||
self, config: dict, *, append_to_context: bool = False
|
||||
) -> bool:
|
||||
"""Play a message from tool config — text or pre-recorded audio.
|
||||
|
||||
Returns True if a message was queued, False otherwise.
|
||||
"""
|
||||
message_type = config.get("messageType", "none")
|
||||
|
||||
if message_type == "audio":
|
||||
recording_pk = config.get("audioRecordingId")
|
||||
if recording_pk and self._engine._fetch_recording_audio:
|
||||
result = await self._engine._fetch_recording_audio(
|
||||
recording_pk=int(recording_pk)
|
||||
)
|
||||
if result:
|
||||
await play_audio(
|
||||
result.audio,
|
||||
sample_rate=self._engine._audio_config.pipeline_sample_rate
|
||||
if self._engine._audio_config
|
||||
else 16000,
|
||||
queue_frame=self._engine._transport_output.queue_frame,
|
||||
transcript=result.transcript,
|
||||
)
|
||||
return True
|
||||
else:
|
||||
logger.warning(f"Failed to fetch recording pk={recording_pk}")
|
||||
return False
|
||||
|
||||
if message_type == "custom":
|
||||
custom_message = config.get("customMessage", "")
|
||||
if custom_message:
|
||||
await self._engine.task.queue_frame(
|
||||
TTSSpeakFrame(custom_message, append_to_context=append_to_context)
|
||||
)
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
async def get_organization_id(self) -> Optional[int]:
|
||||
"""Get and cache the organization ID from workflow run."""
|
||||
if self._organization_id is None:
|
||||
|
|
@ -250,9 +289,30 @@ class CustomToolManager:
|
|||
|
||||
try:
|
||||
# Queue custom message before executing the API call
|
||||
# Queue custom message (text or audio) before executing the API call
|
||||
config = tool.definition.get("config", {}) if tool.definition else {}
|
||||
custom_msg_type = config.get("customMessageType", "text")
|
||||
custom_message = config.get("customMessage", "")
|
||||
if custom_message:
|
||||
if custom_msg_type == "audio":
|
||||
recording_pk = config.get("customMessageRecordingId")
|
||||
if recording_pk and self._engine._fetch_recording_audio:
|
||||
logger.info(
|
||||
f"Playing audio message before HTTP tool: pk={recording_pk}"
|
||||
)
|
||||
self._engine._queued_speech_mute_state = "waiting"
|
||||
result = await self._engine._fetch_recording_audio(
|
||||
recording_pk=int(recording_pk)
|
||||
)
|
||||
if result:
|
||||
await play_audio(
|
||||
result.audio,
|
||||
sample_rate=self._engine._audio_config.pipeline_sample_rate
|
||||
if self._engine._audio_config
|
||||
else 16000,
|
||||
queue_frame=self._engine._transport_output.queue_frame,
|
||||
transcript=result.transcript,
|
||||
)
|
||||
elif custom_message:
|
||||
logger.info(
|
||||
f"Playing custom message before HTTP tool: {custom_message}"
|
||||
)
|
||||
|
|
@ -299,8 +359,6 @@ class CustomToolManager:
|
|||
try:
|
||||
# Get the end call configuration
|
||||
config = tool.definition.get("config", {})
|
||||
message_type = config.get("messageType", "none")
|
||||
custom_message = config.get("customMessage", "")
|
||||
|
||||
# Handle end call reason if enabled
|
||||
end_call_reason_enabled = config.get("endCallReason", False)
|
||||
|
|
@ -322,10 +380,8 @@ class CustomToolManager:
|
|||
properties=properties,
|
||||
)
|
||||
|
||||
if message_type == "custom" and custom_message:
|
||||
# Queue the custom message to be spoken
|
||||
logger.info(f"Playing custom goodbye message: {custom_message}")
|
||||
await self._engine.task.queue_frame(TTSSpeakFrame(custom_message))
|
||||
played = await self._play_config_message(config)
|
||||
if played:
|
||||
# End the call after the message (not immediately)
|
||||
await self._engine.end_call_with_reason(
|
||||
EndTaskReason.END_CALL_TOOL_REASON.value,
|
||||
|
|
@ -370,8 +426,6 @@ class CustomToolManager:
|
|||
# Get the transfer call configuration
|
||||
config = tool.definition.get("config", {})
|
||||
destination = config.get("destination", "")
|
||||
message_type = config.get("messageType", "none")
|
||||
custom_message = config.get("customMessage", "")
|
||||
timeout_seconds = config.get(
|
||||
"timeout", 30
|
||||
) # Default 30 seconds if not configured
|
||||
|
|
@ -443,10 +497,9 @@ class CustomToolManager:
|
|||
)
|
||||
return
|
||||
|
||||
if message_type == "custom" and custom_message:
|
||||
logger.info(f"Playing pre-transfer message: {custom_message}")
|
||||
played = await self._play_config_message(config)
|
||||
if played:
|
||||
self._engine._queued_speech_mute_state = "waiting"
|
||||
await self._engine.task.queue_frame(TTSSpeakFrame(custom_message))
|
||||
|
||||
# Get organization ID for provider configuration
|
||||
organization_id = await self.get_organization_id()
|
||||
|
|
@ -537,10 +590,10 @@ class CustomToolManager:
|
|||
|
||||
# Start hold music as background task
|
||||
hold_music_task = asyncio.create_task(
|
||||
play_hold_audio_loop(
|
||||
self._engine.task,
|
||||
hold_music_stop_event,
|
||||
sample_rate,
|
||||
play_audio_loop(
|
||||
stop_event=hold_music_stop_event,
|
||||
sample_rate=sample_rate,
|
||||
queue_frame=self._engine._transport_output.queue_frame,
|
||||
)
|
||||
)
|
||||
|
||||
|
|
|
|||
|
|
@ -77,6 +77,8 @@ class Node:
|
|||
self.extraction_variables = data.extraction_variables
|
||||
self.add_global_prompt = data.add_global_prompt
|
||||
self.greeting = data.greeting
|
||||
self.greeting_type = data.greeting_type
|
||||
self.greeting_recording_id = data.greeting_recording_id
|
||||
self.detect_voicemail = data.detect_voicemail
|
||||
self.delayed_start = data.delayed_start
|
||||
self.delayed_start_duration = data.delayed_start_duration
|
||||
|
|
|
|||
|
|
@ -13,6 +13,7 @@ from typing import Optional
|
|||
|
||||
import pytest
|
||||
|
||||
from api.services.pipecat.recording_audio_cache import RecordingAudio
|
||||
from api.services.pipecat.recording_router_processor import (
|
||||
RecordingRouterProcessor,
|
||||
)
|
||||
|
|
@ -37,9 +38,9 @@ from pipecat.tests import run_test
|
|||
FAKE_AUDIO = b"\x00\x01" * 8000 # 1 second of 16-bit mono @ 16 kHz
|
||||
|
||||
|
||||
async def _fake_fetch(recording_id: str) -> Optional[bytes]:
|
||||
async def _fake_fetch(recording_id: str) -> Optional[RecordingAudio]:
|
||||
"""Stub that returns fake PCM audio for any recording_id."""
|
||||
return FAKE_AUDIO
|
||||
return RecordingAudio(audio=FAKE_AUDIO)
|
||||
|
||||
|
||||
def _make_processor(**kwargs) -> RecordingRouterProcessor:
|
||||
|
|
@ -189,7 +190,7 @@ class TestMixedMarkerSuppression:
|
|||
|
||||
async def tracking_fetch(recording_id: str):
|
||||
fetched_ids.append(recording_id)
|
||||
return FAKE_AUDIO
|
||||
return RecordingAudio(audio=FAKE_AUDIO)
|
||||
|
||||
processor = _make_processor(fetch=tracking_fetch)
|
||||
|
||||
|
|
|
|||
603
api/tests/test_text_and_audio_playback.py
Normal file
603
api/tests/test_text_and_audio_playback.py
Normal file
|
|
@ -0,0 +1,603 @@
|
|||
"""Tests for text and audio playback in greetings, transitions, and tool messages.
|
||||
|
||||
Verifies that:
|
||||
- Text mode produces TTSSpeakFrame
|
||||
- Audio mode produces TTSStartedFrame -> TTSAudioRawFrame -> TTSStoppedFrame
|
||||
- Covers: start node greetings, edge transition speech, tool config messages
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
from typing import Any, Dict, List
|
||||
from unittest.mock import AsyncMock, Mock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
from api.services.pipecat.recording_audio_cache import RecordingAudio
|
||||
from api.services.workflow.dto import (
|
||||
EdgeDataDTO,
|
||||
NodeDataDTO,
|
||||
NodeType,
|
||||
Position,
|
||||
ReactFlowDTO,
|
||||
RFEdgeDTO,
|
||||
RFNodeDTO,
|
||||
)
|
||||
from api.services.workflow.pipecat_engine import PipecatEngine
|
||||
from api.services.workflow.pipecat_engine_custom_tools import CustomToolManager
|
||||
from api.services.workflow.workflow import WorkflowGraph
|
||||
from pipecat.frames.frames import (
|
||||
Frame,
|
||||
LLMContextFrame,
|
||||
TTSAudioRawFrame,
|
||||
TTSSpeakFrame,
|
||||
TTSStartedFrame,
|
||||
TTSStoppedFrame,
|
||||
)
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
||||
from pipecat.processors.aggregators.llm_response_universal import (
|
||||
LLMAssistantAggregatorParams,
|
||||
LLMContextAggregatorPair,
|
||||
)
|
||||
from pipecat.tests import MockLLMService, MockTTSService
|
||||
from pipecat.tests.mock_transport import MockTransport
|
||||
from pipecat.transports.base_transport import TransportParams
|
||||
|
||||
# ─── Constants ──────────────────────────────────────────────────
|
||||
|
||||
START_PROMPT = "Start Call System Prompt"
|
||||
END_PROMPT = "End Call System Prompt"
|
||||
TEXT_GREETING = "Hello, welcome to our service!"
|
||||
TEXT_TRANSITION = "Thank you for calling, goodbye!"
|
||||
AUDIO_GREETING_ID = "rec-greeting-001"
|
||||
AUDIO_TRANSITION_ID = "101"
|
||||
FAKE_PCM_AUDIO = b"\x00\x01" * 1000 # Fake 16-bit mono PCM data
|
||||
|
||||
|
||||
# ─── Fixtures ───────────────────────────────────────────────────
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def text_workflow() -> WorkflowGraph:
|
||||
"""Start->End workflow with text greeting and text transition speech."""
|
||||
dto = ReactFlowDTO(
|
||||
nodes=[
|
||||
RFNodeDTO(
|
||||
id="start",
|
||||
type=NodeType.startNode,
|
||||
position=Position(x=0, y=0),
|
||||
data=NodeDataDTO(
|
||||
name="Start Call",
|
||||
prompt=START_PROMPT,
|
||||
is_start=True,
|
||||
allow_interrupt=False,
|
||||
add_global_prompt=False,
|
||||
greeting=TEXT_GREETING,
|
||||
greeting_type="text",
|
||||
extraction_enabled=False,
|
||||
),
|
||||
),
|
||||
RFNodeDTO(
|
||||
id="end",
|
||||
type=NodeType.endNode,
|
||||
position=Position(x=0, y=200),
|
||||
data=NodeDataDTO(
|
||||
name="End Call",
|
||||
prompt=END_PROMPT,
|
||||
is_end=True,
|
||||
allow_interrupt=False,
|
||||
add_global_prompt=False,
|
||||
extraction_enabled=False,
|
||||
),
|
||||
),
|
||||
],
|
||||
edges=[
|
||||
RFEdgeDTO(
|
||||
id="start-end",
|
||||
source="start",
|
||||
target="end",
|
||||
data=EdgeDataDTO(
|
||||
label="End Call",
|
||||
condition="When the user says end the call",
|
||||
transition_speech=TEXT_TRANSITION,
|
||||
transition_speech_type="text",
|
||||
),
|
||||
),
|
||||
],
|
||||
)
|
||||
return WorkflowGraph(dto)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def audio_workflow() -> WorkflowGraph:
|
||||
"""Start->End workflow with audio greeting and audio transition speech."""
|
||||
dto = ReactFlowDTO(
|
||||
nodes=[
|
||||
RFNodeDTO(
|
||||
id="start",
|
||||
type=NodeType.startNode,
|
||||
position=Position(x=0, y=0),
|
||||
data=NodeDataDTO(
|
||||
name="Start Call",
|
||||
prompt=START_PROMPT,
|
||||
is_start=True,
|
||||
allow_interrupt=False,
|
||||
add_global_prompt=False,
|
||||
greeting_type="audio",
|
||||
greeting_recording_id=AUDIO_GREETING_ID,
|
||||
extraction_enabled=False,
|
||||
),
|
||||
),
|
||||
RFNodeDTO(
|
||||
id="end",
|
||||
type=NodeType.endNode,
|
||||
position=Position(x=0, y=200),
|
||||
data=NodeDataDTO(
|
||||
name="End Call",
|
||||
prompt=END_PROMPT,
|
||||
is_end=True,
|
||||
allow_interrupt=False,
|
||||
add_global_prompt=False,
|
||||
extraction_enabled=False,
|
||||
),
|
||||
),
|
||||
],
|
||||
edges=[
|
||||
RFEdgeDTO(
|
||||
id="start-end",
|
||||
source="start",
|
||||
target="end",
|
||||
data=EdgeDataDTO(
|
||||
label="End Call",
|
||||
condition="When the user says end the call",
|
||||
transition_speech_type="audio",
|
||||
transition_speech_recording_id=AUDIO_TRANSITION_ID,
|
||||
),
|
||||
),
|
||||
],
|
||||
)
|
||||
return WorkflowGraph(dto)
|
||||
|
||||
|
||||
# ─── Pipeline Helper ────────────────────────────────────────────
|
||||
|
||||
|
||||
async def run_pipeline_and_capture_frames(
|
||||
workflow: WorkflowGraph,
|
||||
functions: List[Dict[str, Any]],
|
||||
fetch_recording_audio=None,
|
||||
num_text_steps: int = 1,
|
||||
) -> tuple[MockLLMService, LLMContext, list[Frame]]:
|
||||
"""Run a pipeline with mock tool calls and capture frames queued via task.queue_frame.
|
||||
|
||||
Returns:
|
||||
Tuple of (llm, context, list of captured frames).
|
||||
"""
|
||||
first_step_chunks = MockLLMService.create_multiple_function_call_chunks(functions)
|
||||
mock_steps = MockLLMService.create_multi_step_responses(
|
||||
first_step_chunks, num_text_steps=num_text_steps, step_prefix="Response"
|
||||
)
|
||||
|
||||
llm = MockLLMService(mock_steps=mock_steps, chunk_delay=0.001)
|
||||
tts = MockTTSService(mock_audio_duration_ms=40, frame_delay=0)
|
||||
mock_transport = MockTransport(
|
||||
params=TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
audio_in_sample_rate=16000,
|
||||
audio_out_sample_rate=16000,
|
||||
),
|
||||
)
|
||||
|
||||
context = LLMContext()
|
||||
assistant_params = LLMAssistantAggregatorParams(expect_stripped_words=True)
|
||||
context_aggregator = LLMContextAggregatorPair(
|
||||
context, assistant_params=assistant_params
|
||||
)
|
||||
|
||||
engine = PipecatEngine(
|
||||
llm=llm,
|
||||
context=context,
|
||||
workflow=workflow,
|
||||
call_context_vars={"customer_name": "Test User"},
|
||||
workflow_run_id=1,
|
||||
)
|
||||
|
||||
transport_output = mock_transport.output()
|
||||
|
||||
if fetch_recording_audio:
|
||||
engine.set_fetch_recording_audio(fetch_recording_audio)
|
||||
engine.set_transport_output(transport_output)
|
||||
|
||||
pipeline = Pipeline([llm, tts, transport_output, context_aggregator.assistant()])
|
||||
task = PipelineTask(pipeline, params=PipelineParams(), enable_rtvi=False)
|
||||
engine.set_task(task)
|
||||
|
||||
# Spy on task.queue_frame and transport_output.queue_frame to capture
|
||||
# all frames queued by the engine (audio transitions go via transport output)
|
||||
queued_frames: list[Frame] = []
|
||||
original_queue_frame = task.queue_frame
|
||||
|
||||
async def capturing_queue_frame(frame):
|
||||
queued_frames.append(frame)
|
||||
await original_queue_frame(frame)
|
||||
|
||||
task.queue_frame = capturing_queue_frame
|
||||
|
||||
if fetch_recording_audio:
|
||||
original_transport_queue = transport_output.queue_frame
|
||||
|
||||
async def _spy_transport_queue(frame, *args, **kwargs):
|
||||
queued_frames.append(frame)
|
||||
await original_transport_queue(frame, *args, **kwargs)
|
||||
|
||||
transport_output.queue_frame = _spy_transport_queue
|
||||
|
||||
with (
|
||||
patch(
|
||||
"api.services.workflow.pipecat_engine.get_organization_id_from_workflow_run",
|
||||
new_callable=AsyncMock,
|
||||
return_value=1,
|
||||
),
|
||||
patch(
|
||||
"api.services.workflow.pipecat_engine.apply_disposition_mapping",
|
||||
new_callable=AsyncMock,
|
||||
return_value="completed",
|
||||
),
|
||||
):
|
||||
runner = PipelineRunner()
|
||||
|
||||
async def run():
|
||||
await runner.run(task)
|
||||
|
||||
async def initialize():
|
||||
await asyncio.sleep(0.01)
|
||||
await engine.initialize()
|
||||
await engine.set_node(engine.workflow.start_node_id)
|
||||
await engine.llm.queue_frame(LLMContextFrame(engine.context))
|
||||
|
||||
await asyncio.gather(run(), initialize())
|
||||
|
||||
return llm, context, queued_frames
|
||||
|
||||
|
||||
# ─── Tests: Start Greeting ──────────────────────────────────────
|
||||
|
||||
|
||||
class TestStartGreeting:
|
||||
"""Unit tests for PipecatEngine.get_start_greeting()."""
|
||||
|
||||
def test_text_greeting_returns_text_tuple(self, text_workflow: WorkflowGraph):
|
||||
"""Text greeting config should return ('text', rendered_text)."""
|
||||
engine = PipecatEngine(
|
||||
workflow=text_workflow,
|
||||
call_context_vars={},
|
||||
workflow_run_id=1,
|
||||
)
|
||||
result = engine.get_start_greeting()
|
||||
assert result == ("text", TEXT_GREETING)
|
||||
|
||||
def test_audio_greeting_returns_audio_tuple(self, audio_workflow: WorkflowGraph):
|
||||
"""Audio greeting config should return ('audio', recording_id)."""
|
||||
engine = PipecatEngine(
|
||||
workflow=audio_workflow,
|
||||
call_context_vars={},
|
||||
workflow_run_id=1,
|
||||
)
|
||||
result = engine.get_start_greeting()
|
||||
assert result == ("audio", AUDIO_GREETING_ID)
|
||||
|
||||
def test_no_greeting_returns_none(self):
|
||||
"""No greeting configured should return None."""
|
||||
dto = ReactFlowDTO(
|
||||
nodes=[
|
||||
RFNodeDTO(
|
||||
id="start",
|
||||
type=NodeType.startNode,
|
||||
position=Position(x=0, y=0),
|
||||
data=NodeDataDTO(
|
||||
name="Start",
|
||||
prompt="Prompt",
|
||||
is_start=True,
|
||||
add_global_prompt=False,
|
||||
extraction_enabled=False,
|
||||
),
|
||||
),
|
||||
RFNodeDTO(
|
||||
id="end",
|
||||
type=NodeType.endNode,
|
||||
position=Position(x=0, y=200),
|
||||
data=NodeDataDTO(
|
||||
name="End",
|
||||
prompt="End",
|
||||
is_end=True,
|
||||
add_global_prompt=False,
|
||||
extraction_enabled=False,
|
||||
),
|
||||
),
|
||||
],
|
||||
edges=[
|
||||
RFEdgeDTO(
|
||||
id="e",
|
||||
source="start",
|
||||
target="end",
|
||||
data=EdgeDataDTO(label="End", condition="End"),
|
||||
),
|
||||
],
|
||||
)
|
||||
engine = PipecatEngine(
|
||||
workflow=WorkflowGraph(dto),
|
||||
call_context_vars={},
|
||||
workflow_run_id=1,
|
||||
)
|
||||
assert engine.get_start_greeting() is None
|
||||
|
||||
def test_text_greeting_renders_template_variables(self):
|
||||
"""Text greeting with {{variable}} placeholders should be rendered."""
|
||||
dto = ReactFlowDTO(
|
||||
nodes=[
|
||||
RFNodeDTO(
|
||||
id="start",
|
||||
type=NodeType.startNode,
|
||||
position=Position(x=0, y=0),
|
||||
data=NodeDataDTO(
|
||||
name="Start",
|
||||
prompt="Prompt",
|
||||
is_start=True,
|
||||
add_global_prompt=False,
|
||||
greeting="Hello {{customer_name}}!",
|
||||
greeting_type="text",
|
||||
extraction_enabled=False,
|
||||
),
|
||||
),
|
||||
RFNodeDTO(
|
||||
id="end",
|
||||
type=NodeType.endNode,
|
||||
position=Position(x=0, y=200),
|
||||
data=NodeDataDTO(
|
||||
name="End",
|
||||
prompt="End",
|
||||
is_end=True,
|
||||
add_global_prompt=False,
|
||||
extraction_enabled=False,
|
||||
),
|
||||
),
|
||||
],
|
||||
edges=[
|
||||
RFEdgeDTO(
|
||||
id="e",
|
||||
source="start",
|
||||
target="end",
|
||||
data=EdgeDataDTO(label="End", condition="End"),
|
||||
),
|
||||
],
|
||||
)
|
||||
engine = PipecatEngine(
|
||||
workflow=WorkflowGraph(dto),
|
||||
call_context_vars={"customer_name": "Alice"},
|
||||
workflow_run_id=1,
|
||||
)
|
||||
result = engine.get_start_greeting()
|
||||
assert result == ("text", "Hello Alice!")
|
||||
|
||||
|
||||
# ─── Tests: Transition Speech (Pipeline) ────────────────────────
|
||||
|
||||
|
||||
class TestTransitionSpeech:
|
||||
"""Pipeline tests for edge transition speech (text and audio)."""
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_text_transition_queues_tts_speak_frame(
|
||||
self, text_workflow: WorkflowGraph
|
||||
):
|
||||
"""Text transition speech should queue a TTSSpeakFrame with the message."""
|
||||
functions = [
|
||||
{
|
||||
"name": "end_call",
|
||||
"arguments": {},
|
||||
"tool_call_id": "call_transition",
|
||||
},
|
||||
]
|
||||
|
||||
llm, context, queued_frames = await run_pipeline_and_capture_frames(
|
||||
workflow=text_workflow,
|
||||
functions=functions,
|
||||
num_text_steps=2,
|
||||
)
|
||||
|
||||
# Pipeline completes: 1st gen on StartNode, 2nd gen on EndNode
|
||||
assert llm.get_current_step() == 2
|
||||
|
||||
# Verify TTSSpeakFrame was queued with the transition speech text
|
||||
tts_speak_frames = [f for f in queued_frames if isinstance(f, TTSSpeakFrame)]
|
||||
transition_frames = [f for f in tts_speak_frames if f.text == TEXT_TRANSITION]
|
||||
assert len(transition_frames) == 1, (
|
||||
f"Expected one TTSSpeakFrame with text '{TEXT_TRANSITION}', "
|
||||
f"got: {[f.text for f in tts_speak_frames]}"
|
||||
)
|
||||
|
||||
# No raw audio frames should be queued for text transition
|
||||
audio_raw = [f for f in queued_frames if isinstance(f, TTSAudioRawFrame)]
|
||||
assert len(audio_raw) == 0
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_audio_transition_queues_audio_frames(
|
||||
self, audio_workflow: WorkflowGraph
|
||||
):
|
||||
"""Audio transition speech should queue TTSStarted + TTSAudioRaw + TTSStopped."""
|
||||
functions = [
|
||||
{
|
||||
"name": "end_call",
|
||||
"arguments": {},
|
||||
"tool_call_id": "call_transition",
|
||||
},
|
||||
]
|
||||
|
||||
mock_fetch = AsyncMock(return_value=RecordingAudio(audio=FAKE_PCM_AUDIO))
|
||||
|
||||
llm, context, queued_frames = await run_pipeline_and_capture_frames(
|
||||
workflow=audio_workflow,
|
||||
functions=functions,
|
||||
fetch_recording_audio=mock_fetch,
|
||||
num_text_steps=2,
|
||||
)
|
||||
|
||||
# Pipeline completes
|
||||
assert llm.get_current_step() == 2
|
||||
|
||||
# Verify fetch was called with the correct recording ID
|
||||
mock_fetch.assert_called_once_with(recording_pk=int(AUDIO_TRANSITION_ID))
|
||||
|
||||
# Verify the three-frame audio sequence was queued
|
||||
started = [f for f in queued_frames if isinstance(f, TTSStartedFrame)]
|
||||
audio = [f for f in queued_frames if isinstance(f, TTSAudioRawFrame)]
|
||||
stopped = [f for f in queued_frames if isinstance(f, TTSStoppedFrame)]
|
||||
|
||||
assert len(started) >= 1, (
|
||||
f"Expected TTSStartedFrame. "
|
||||
f"Frame types: {[type(f).__name__ for f in queued_frames]}"
|
||||
)
|
||||
assert len(audio) >= 1, "Expected TTSAudioRawFrame"
|
||||
assert len(stopped) >= 1, "Expected TTSStoppedFrame"
|
||||
|
||||
# Verify audio content
|
||||
assert audio[0].audio == FAKE_PCM_AUDIO
|
||||
assert audio[0].sample_rate == 16000
|
||||
assert audio[0].num_channels == 1
|
||||
|
||||
# Verify context_id consistency across the three frames
|
||||
ctx_id = started[0].context_id
|
||||
assert ctx_id is not None
|
||||
assert audio[0].context_id == ctx_id
|
||||
assert stopped[0].context_id == ctx_id
|
||||
|
||||
# No TTSSpeakFrame should be queued for audio transition
|
||||
speak = [f for f in queued_frames if isinstance(f, TTSSpeakFrame)]
|
||||
assert len(speak) == 0
|
||||
|
||||
|
||||
# ─── Tests: Tool Config Messages ────────────────────────────────
|
||||
|
||||
|
||||
class TestPlayConfigMessage:
|
||||
"""Unit tests for CustomToolManager._play_config_message."""
|
||||
|
||||
@pytest.fixture
|
||||
def mock_engine(self):
|
||||
"""Create a mock engine with frame capture on task.queue_frame."""
|
||||
engine = Mock()
|
||||
engine._workflow_run_id = 1
|
||||
engine._call_context_vars = {}
|
||||
engine._fetch_recording_audio = None
|
||||
engine._audio_config = None
|
||||
engine.task = Mock()
|
||||
engine.llm = Mock()
|
||||
|
||||
# Capture frames queued via task.queue_frame
|
||||
engine._queued_frames = []
|
||||
|
||||
async def mock_queue_frame(frame):
|
||||
engine._queued_frames.append(frame)
|
||||
|
||||
engine.task.queue_frame = mock_queue_frame
|
||||
|
||||
# Also capture frames queued via transport_output.queue_frame (audio playback)
|
||||
engine._transport_output = Mock()
|
||||
engine._transport_output.queue_frame = mock_queue_frame
|
||||
return engine
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_custom_text_queues_tts_speak_frame(self, mock_engine):
|
||||
"""messageType='custom' queues TTSSpeakFrame with the message text."""
|
||||
manager = CustomToolManager(mock_engine)
|
||||
config = {"messageType": "custom", "customMessage": "Ending your call now."}
|
||||
|
||||
result = await manager._play_config_message(config)
|
||||
|
||||
assert result is True
|
||||
frames = mock_engine._queued_frames
|
||||
assert len(frames) == 1
|
||||
assert isinstance(frames[0], TTSSpeakFrame)
|
||||
assert frames[0].text == "Ending your call now."
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_audio_queues_started_raw_stopped_frames(self, mock_engine):
|
||||
"""messageType='audio' queues TTSStarted + TTSAudioRaw + TTSStopped."""
|
||||
mock_fetch = AsyncMock(return_value=RecordingAudio(audio=FAKE_PCM_AUDIO))
|
||||
mock_engine._fetch_recording_audio = mock_fetch
|
||||
|
||||
manager = CustomToolManager(mock_engine)
|
||||
config = {"messageType": "audio", "audioRecordingId": "201"}
|
||||
|
||||
result = await manager._play_config_message(config)
|
||||
|
||||
assert result is True
|
||||
mock_fetch.assert_called_once_with(recording_pk=201)
|
||||
|
||||
frames = mock_engine._queued_frames
|
||||
assert len(frames) == 3
|
||||
assert isinstance(frames[0], TTSStartedFrame)
|
||||
assert isinstance(frames[1], TTSAudioRawFrame)
|
||||
assert isinstance(frames[2], TTSStoppedFrame)
|
||||
|
||||
# Verify audio content
|
||||
assert frames[1].audio == FAKE_PCM_AUDIO
|
||||
assert frames[1].sample_rate == 16000
|
||||
assert frames[1].num_channels == 1
|
||||
|
||||
# Context IDs should match across all three frames
|
||||
ctx_id = frames[0].context_id
|
||||
assert ctx_id is not None
|
||||
assert frames[1].context_id == ctx_id
|
||||
assert frames[2].context_id == ctx_id
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_none_message_type_returns_false(self, mock_engine):
|
||||
"""messageType='none' returns False without queuing frames."""
|
||||
manager = CustomToolManager(mock_engine)
|
||||
result = await manager._play_config_message({"messageType": "none"})
|
||||
|
||||
assert result is False
|
||||
assert len(mock_engine._queued_frames) == 0
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_audio_without_fetch_callback_returns_false(self, mock_engine):
|
||||
"""Audio without fetch_recording_audio callback returns False."""
|
||||
mock_engine._fetch_recording_audio = None
|
||||
|
||||
manager = CustomToolManager(mock_engine)
|
||||
config = {"messageType": "audio", "audioRecordingId": "301"}
|
||||
|
||||
result = await manager._play_config_message(config)
|
||||
|
||||
assert result is False
|
||||
assert len(mock_engine._queued_frames) == 0
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_audio_with_failed_fetch_returns_false(self, mock_engine):
|
||||
"""Audio with fetch returning None returns False."""
|
||||
mock_fetch = AsyncMock(return_value=None)
|
||||
mock_engine._fetch_recording_audio = mock_fetch
|
||||
|
||||
manager = CustomToolManager(mock_engine)
|
||||
config = {"messageType": "audio", "audioRecordingId": "301"}
|
||||
|
||||
result = await manager._play_config_message(config)
|
||||
|
||||
assert result is False
|
||||
mock_fetch.assert_called_once_with(recording_pk=301)
|
||||
assert len(mock_engine._queued_frames) == 0
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_custom_empty_message_returns_false(self, mock_engine):
|
||||
"""messageType='custom' with empty message returns False."""
|
||||
manager = CustomToolManager(mock_engine)
|
||||
config = {"messageType": "custom", "customMessage": ""}
|
||||
|
||||
result = await manager._play_config_message(config)
|
||||
|
||||
assert result is False
|
||||
assert len(mock_engine._queued_frames) == 0
|
||||
|
|
@ -1,151 +0,0 @@
|
|||
"""
|
||||
Hold audio utility for loading, caching, and playing hold music files.
|
||||
|
||||
This module provides functionality to load hold music audio files at specific sample rates
|
||||
with caching to improve performance during multiple calls, and a reusable loop that queues
|
||||
audio frames until a stop event is set.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
from typing import Dict, Optional, Tuple
|
||||
|
||||
import numpy as np
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.frames.frames import OutputAudioRawFrame
|
||||
|
||||
try:
|
||||
import soundfile as sf
|
||||
except ModuleNotFoundError as e:
|
||||
logger.error(f"Exception: {e}")
|
||||
logger.error("In order to use hold audio, you need to `pip install soundfile`.")
|
||||
raise Exception(f"Missing module: {e}")
|
||||
|
||||
|
||||
# Global cache for loaded hold music data
|
||||
_hold_audio_cache: Dict[Tuple[str, int], np.ndarray] = {}
|
||||
|
||||
|
||||
def load_hold_audio(file_path: str, sample_rate: int) -> Optional[bytes]:
|
||||
"""Load hold music audio file at the specified sample rate with caching.
|
||||
|
||||
Args:
|
||||
file_path: Path to the hold music audio file
|
||||
sample_rate: Target sample rate (8000 or 16000 Hz supported)
|
||||
|
||||
Returns:
|
||||
Audio data as bytes (PCM16) or None if loading failed
|
||||
"""
|
||||
cache_key = (file_path, sample_rate)
|
||||
|
||||
# Check cache first
|
||||
if cache_key in _hold_audio_cache:
|
||||
logger.debug(f"Using cached hold audio for {file_path} at {sample_rate}Hz")
|
||||
audio_data = _hold_audio_cache[cache_key]
|
||||
return audio_data.tobytes()
|
||||
|
||||
try:
|
||||
logger.info(f"Loading hold audio from {file_path} at {sample_rate}Hz")
|
||||
|
||||
# Load audio file
|
||||
sound, file_sample_rate = sf.read(file_path, dtype="int16")
|
||||
logger.info(
|
||||
f"Audio file loaded - file sample_rate: {file_sample_rate}, target: {sample_rate}"
|
||||
)
|
||||
|
||||
# Ensure mono audio (take first channel if stereo)
|
||||
if len(sound.shape) > 1:
|
||||
sound = sound[:, 0]
|
||||
|
||||
# Resample if needed
|
||||
if file_sample_rate != sample_rate:
|
||||
logger.warning(
|
||||
f"Hold music file has sample rate {file_sample_rate}, expected {sample_rate}"
|
||||
)
|
||||
# For now, we'll use the audio as-is and let the transport handle resampling
|
||||
# In a production system, you might want to use librosa or scipy for proper resampling
|
||||
|
||||
# Convert to int16 and cache
|
||||
audio_data = sound.astype(np.int16)
|
||||
_hold_audio_cache[cache_key] = audio_data
|
||||
|
||||
logger.info(
|
||||
f"Hold audio loaded successfully: {len(audio_data)} samples at {sample_rate}Hz"
|
||||
)
|
||||
return audio_data.tobytes()
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to load hold audio file {file_path}: {e}")
|
||||
return None
|
||||
|
||||
|
||||
def clear_hold_audio_cache():
|
||||
"""Clear the hold audio cache to free memory."""
|
||||
global _hold_audio_cache
|
||||
_hold_audio_cache.clear()
|
||||
logger.info("Hold audio cache cleared")
|
||||
|
||||
|
||||
def get_cache_info() -> Dict[str, int]:
|
||||
"""Get information about the current cache state.
|
||||
|
||||
Returns:
|
||||
Dictionary with cache statistics
|
||||
"""
|
||||
return {
|
||||
"cached_files": len(_hold_audio_cache),
|
||||
"total_cache_size": sum(len(data) for data in _hold_audio_cache.values()),
|
||||
}
|
||||
|
||||
|
||||
async def play_hold_audio_loop(
|
||||
task,
|
||||
stop_event: asyncio.Event,
|
||||
sample_rate: int = 16000,
|
||||
hold_music_file: Optional[str] = None,
|
||||
) -> None:
|
||||
"""Play hold/ring-back audio in a loop until *stop_event* is set.
|
||||
|
||||
This is a shared helper used by call-transfer hold music and the
|
||||
pre-call data fetch ringer. The caller is responsible for creating
|
||||
the ``asyncio.Event`` and setting it when playback should stop.
|
||||
|
||||
Args:
|
||||
task: A ``PipelineTask`` (or anything with ``queue_frame``).
|
||||
stop_event: Set this event to terminate the loop.
|
||||
sample_rate: Target sample rate for audio playback.
|
||||
hold_music_file: Path to a WAV file. When *None* the default
|
||||
``transfer_hold_ring_{sample_rate}.wav`` asset is used.
|
||||
"""
|
||||
if hold_music_file is None:
|
||||
from api.constants import APP_ROOT_DIR
|
||||
|
||||
hold_music_file = str(
|
||||
APP_ROOT_DIR / "assets" / f"transfer_hold_ring_{sample_rate}.wav"
|
||||
)
|
||||
|
||||
hold_audio_data = load_hold_audio(hold_music_file, sample_rate)
|
||||
if not hold_audio_data:
|
||||
logger.warning(f"Hold audio loop: failed to load {hold_music_file}, skipping")
|
||||
return
|
||||
|
||||
num_samples = len(hold_audio_data) // 2 # 16-bit PCM = 2 bytes per sample
|
||||
duration = num_samples / sample_rate
|
||||
|
||||
logger.debug(f"Hold audio loop: playing at {sample_rate}Hz")
|
||||
try:
|
||||
while not stop_event.is_set():
|
||||
frame = OutputAudioRawFrame(
|
||||
audio=hold_audio_data,
|
||||
sample_rate=sample_rate,
|
||||
num_channels=1,
|
||||
)
|
||||
await task.queue_frame(frame)
|
||||
try:
|
||||
await asyncio.wait_for(stop_event.wait(), timeout=duration + 1.5)
|
||||
break
|
||||
except asyncio.TimeoutError:
|
||||
pass
|
||||
except Exception as e:
|
||||
logger.error(f"Hold audio loop: error: {e}")
|
||||
logger.debug("Hold audio loop: stopped")
|
||||
|
|
@ -6,15 +6,6 @@ tag: "NEW"
|
|||
|
||||
Custom recordings allow you to build **hybrid voice agents** that use your own pre-recorded audio for key parts of the conversation, while falling back to LLM-generated speech (via a cloned voice) for dynamic responses. This gives you the best of both worlds — the emotional depth of real human speech and the flexibility of AI-generated dialogue.
|
||||
|
||||
<iframe
|
||||
className="w-full aspect-video rounded-xl"
|
||||
src="https://www.youtube.com/embed/1uZqhG0_cIo"
|
||||
title="Dograh Twilio Setup"
|
||||
allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture"
|
||||
allowFullScreen
|
||||
></iframe>
|
||||
|
||||
|
||||
## Why use custom recordings?
|
||||
|
||||
- **Reduced TTS cost** — Pre-recorded audio is played directly, so you are not charged for TTS synthesis on those segments.
|
||||
|
|
@ -50,23 +41,20 @@ You can use any TTS provider that supports voice cloning. The steps will vary by
|
|||
|
||||
## Step 3: Upload recordings
|
||||
|
||||
Navigate to your agent in the workflow builder and open the **Recordings** panel. You can either upload pre-recorded audio files or record directly in the browser.
|
||||
Navigate to the **Recordings** page in the Dograh dashboard. Recordings are shared across all agents in your organization. You can either upload pre-recorded audio files or record directly in the browser.
|
||||
|
||||
For each recording:
|
||||
|
||||
1. Click **Record** (or upload a file).
|
||||
2. Speak the exact phrase you want the agent to use.
|
||||
3. Give the recording a descriptive name (e.g., `greeting`, `invitation`, `venue`).
|
||||
4. Verify the transcription is correct — edit it if needed.
|
||||
5. Click **Upload**.
|
||||
1. Click **Upload Recording**.
|
||||
2. Choose an audio file or click **Record** to record in the browser.
|
||||
3. Verify the transcription is correct — edit it if needed.
|
||||
4. Click **Upload**.
|
||||
|
||||
<Warning>
|
||||
Recordings are scoped to a specific **provider and Voice ID**. If you change either, you will need to re-upload your recordings to ensure consistency between the recorded audio and the cloned voice used for dynamic responses.
|
||||
</Warning>
|
||||
You can rename a recording's ID at any time by clicking the edit icon next to it in the recordings list.
|
||||
|
||||
## Step 4: Build the workflow
|
||||
|
||||
Open your agent's workflow and write the conversation flow in natural language. To insert a recording, type **`@`** in the prompt editor — this will show a list of all available recordings scoped to your current Voice ID.
|
||||
Open your agent's workflow and write the conversation flow in natural language. To insert a recording, type **`@`** in the prompt editor — this will show a list of all available recordings in your organization.
|
||||
|
||||
For any user question that falls outside your recordings, the agent automatically generates a dynamic response using the LLM, which is then synthesized using your cloned voice via TTS.
|
||||
|
||||
|
|
|
|||
2
pipecat
2
pipecat
|
|
@ -1 +1 @@
|
|||
Subproject commit 002c095b2f15a11d4a6ffd85b86592821aa5cd62
|
||||
Subproject commit 5a2e4c89118264bfdfe91db059b01e226609f060
|
||||
|
|
@ -3,25 +3,19 @@
|
|||
import { useRef, useState } from 'react';
|
||||
import { toast } from 'sonner';
|
||||
|
||||
import { getPresignedUploadUrlApiV1S3PresignedUploadUrlPost } from '@/client/sdk.gen';
|
||||
import { Button } from '@/components/ui/button';
|
||||
import { Label } from '@/components/ui/label';
|
||||
import logger from '@/lib/logger';
|
||||
|
||||
interface CsvUploadSelectorProps {
|
||||
accessToken: string;
|
||||
onFileUploaded: (fileKey: string, fileName: string) => void;
|
||||
selectedFileName?: string;
|
||||
}
|
||||
|
||||
interface PresignedUploadUrlResponse {
|
||||
upload_url: string;
|
||||
file_key: string;
|
||||
expires_in: number;
|
||||
}
|
||||
|
||||
const MAX_FILE_SIZE = 10 * 1024 * 1024; // 10MB
|
||||
|
||||
export default function CsvUploadSelector({ accessToken, onFileUploaded, selectedFileName }: CsvUploadSelectorProps) {
|
||||
export default function CsvUploadSelector({ onFileUploaded, selectedFileName }: CsvUploadSelectorProps) {
|
||||
const [uploading, setUploading] = useState(false);
|
||||
const [uploadProgress, setUploadProgress] = useState(0);
|
||||
const fileInputRef = useRef<HTMLInputElement>(null);
|
||||
|
|
@ -48,25 +42,18 @@ export default function CsvUploadSelector({ accessToken, onFileUploaded, selecte
|
|||
try {
|
||||
// Step 1: Request presigned upload URL
|
||||
logger.info('Requesting presigned upload URL for:', file.name);
|
||||
const presignedResponse = await fetch('/api/v1/s3/presigned-upload-url', {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Authorization': `Bearer ${accessToken}`,
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
body: JSON.stringify({
|
||||
const { data: presignedData, error } = await getPresignedUploadUrlApiV1S3PresignedUploadUrlPost({
|
||||
body: {
|
||||
file_name: file.name,
|
||||
file_size: file.size,
|
||||
content_type: 'text/csv',
|
||||
}),
|
||||
},
|
||||
});
|
||||
|
||||
if (!presignedResponse.ok) {
|
||||
const error = await presignedResponse.json();
|
||||
throw new Error(error.detail || 'Failed to get upload URL');
|
||||
if (error || !presignedData) {
|
||||
throw new Error('Failed to get upload URL');
|
||||
}
|
||||
|
||||
const presignedData: PresignedUploadUrlResponse = await presignedResponse.json();
|
||||
logger.info('Received presigned URL, uploading file...');
|
||||
|
||||
// Step 2: Upload file directly to S3/MinIO
|
||||
|
|
|
|||
|
|
@ -415,7 +415,6 @@ export default function NewCampaignPage() {
|
|||
/>
|
||||
) : (
|
||||
<CsvUploadSelector
|
||||
accessToken={userAccessToken}
|
||||
onFileUploaded={handleFileUploaded}
|
||||
selectedFileName={selectedFileName}
|
||||
/>
|
||||
|
|
|
|||
|
|
@ -1,11 +1,18 @@
|
|||
"use client";
|
||||
|
||||
import { ExternalLink } from "lucide-react";
|
||||
import { ExternalLink, Upload } from "lucide-react";
|
||||
import { useEffect, useState } from "react";
|
||||
|
||||
import { Button } from "@/components/ui/button";
|
||||
import { Card, CardContent, CardDescription, CardHeader, CardTitle } from "@/components/ui/card";
|
||||
import {
|
||||
Dialog,
|
||||
DialogContent,
|
||||
DialogDescription,
|
||||
DialogHeader,
|
||||
DialogTitle,
|
||||
} from "@/components/ui/dialog";
|
||||
import { Skeleton } from "@/components/ui/skeleton";
|
||||
import { Tabs, TabsContent, TabsList, TabsTrigger } from "@/components/ui/tabs";
|
||||
import { useAuth } from "@/lib/auth";
|
||||
|
||||
import DocumentList from "./DocumentList";
|
||||
|
|
@ -14,6 +21,7 @@ import DocumentUpload from "./DocumentUpload";
|
|||
export default function FilesPage() {
|
||||
const { user, redirectToLogin, loading } = useAuth();
|
||||
const [refreshKey, setRefreshKey] = useState(0);
|
||||
const [isUploadOpen, setIsUploadOpen] = useState(false);
|
||||
|
||||
// Redirect if not authenticated
|
||||
useEffect(() => {
|
||||
|
|
@ -23,8 +31,8 @@ export default function FilesPage() {
|
|||
}, [loading, user, redirectToLogin]);
|
||||
|
||||
const handleUploadSuccess = () => {
|
||||
// Trigger refresh of document list
|
||||
setRefreshKey(prev => prev + 1);
|
||||
setIsUploadOpen(false);
|
||||
};
|
||||
|
||||
if (loading || !user) {
|
||||
|
|
@ -50,44 +58,37 @@ export default function FilesPage() {
|
|||
</p>
|
||||
</div>
|
||||
|
||||
<Tabs defaultValue="all" className="space-y-6">
|
||||
<TabsList>
|
||||
<TabsTrigger value="all">All Files</TabsTrigger>
|
||||
<TabsTrigger value="upload">Upload New</TabsTrigger>
|
||||
</TabsList>
|
||||
|
||||
<TabsContent value="all" className="space-y-4">
|
||||
<Card>
|
||||
<CardHeader>
|
||||
<Card>
|
||||
<CardHeader>
|
||||
<div className="flex justify-between items-center">
|
||||
<div>
|
||||
<CardTitle>Your Documents</CardTitle>
|
||||
<CardDescription>
|
||||
View and manage your uploaded documents
|
||||
Documents shared across all agents in your organization
|
||||
</CardDescription>
|
||||
</CardHeader>
|
||||
<CardContent>
|
||||
<DocumentList
|
||||
refreshTrigger={refreshKey}
|
||||
/>
|
||||
</CardContent>
|
||||
</Card>
|
||||
</TabsContent>
|
||||
</div>
|
||||
<Button onClick={() => setIsUploadOpen(true)}>
|
||||
<Upload className="w-4 h-4 mr-2" />
|
||||
Upload Document
|
||||
</Button>
|
||||
</div>
|
||||
</CardHeader>
|
||||
<CardContent>
|
||||
<DocumentList refreshTrigger={refreshKey} />
|
||||
</CardContent>
|
||||
</Card>
|
||||
|
||||
<TabsContent value="upload" className="space-y-4">
|
||||
<Card>
|
||||
<CardHeader>
|
||||
<CardTitle>Upload Document</CardTitle>
|
||||
<CardDescription>
|
||||
Upload a PDF or document file to add to your knowledge base
|
||||
</CardDescription>
|
||||
</CardHeader>
|
||||
<CardContent>
|
||||
<DocumentUpload
|
||||
onUploadSuccess={handleUploadSuccess}
|
||||
/>
|
||||
</CardContent>
|
||||
</Card>
|
||||
</TabsContent>
|
||||
</Tabs>
|
||||
<Dialog open={isUploadOpen} onOpenChange={setIsUploadOpen}>
|
||||
<DialogContent>
|
||||
<DialogHeader>
|
||||
<DialogTitle>Upload Document</DialogTitle>
|
||||
<DialogDescription>
|
||||
Upload a PDF or document file to add to your knowledge base
|
||||
</DialogDescription>
|
||||
</DialogHeader>
|
||||
<DocumentUpload onUploadSuccess={handleUploadSuccess} />
|
||||
</DialogContent>
|
||||
</Dialog>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
|
|
|||
323
ui/src/app/recordings/RecordingsList.tsx
Normal file
323
ui/src/app/recordings/RecordingsList.tsx
Normal file
|
|
@ -0,0 +1,323 @@
|
|||
"use client";
|
||||
|
||||
import { AudioLines, Check, Pause, Pencil, Play, RefreshCw, Search, Trash2, X } from "lucide-react";
|
||||
import { useCallback, useEffect, useState } from "react";
|
||||
import { toast } from "sonner";
|
||||
|
||||
import {
|
||||
deleteRecordingApiV1WorkflowRecordingsRecordingIdDelete,
|
||||
listRecordingsApiV1WorkflowRecordingsGet,
|
||||
updateRecordingApiV1WorkflowRecordingsIdPatch,
|
||||
} from "@/client/sdk.gen";
|
||||
import type { RecordingResponseSchema } from "@/client/types.gen";
|
||||
import { Button } from "@/components/ui/button";
|
||||
import { Input } from "@/components/ui/input";
|
||||
import { Skeleton } from "@/components/ui/skeleton";
|
||||
import { useAudioPlayback } from "@/hooks/useAudioPlayback";
|
||||
import logger from "@/lib/logger";
|
||||
|
||||
export default function RecordingsList({ refreshKey }: { refreshKey?: number }) {
|
||||
const [recordings, setRecordings] = useState<RecordingResponseSchema[]>([]);
|
||||
const [isLoading, setIsLoading] = useState(true);
|
||||
const [searchQuery, setSearchQuery] = useState("");
|
||||
const [error, setError] = useState<string | null>(null);
|
||||
|
||||
// Inline edit state
|
||||
const [editingId, setEditingId] = useState<string | null>(null);
|
||||
const [editValue, setEditValue] = useState("");
|
||||
const [editError, setEditError] = useState<string | null>(null);
|
||||
|
||||
const { playingId, toggle: togglePlayback, stop: stopPlayback } = useAudioPlayback();
|
||||
|
||||
const fetchRecordings = useCallback(async () => {
|
||||
try {
|
||||
setIsLoading(true);
|
||||
setError(null);
|
||||
|
||||
const response = await listRecordingsApiV1WorkflowRecordingsGet({
|
||||
query: {},
|
||||
});
|
||||
|
||||
if (response.error || !response.data) {
|
||||
throw new Error("Failed to fetch recordings");
|
||||
}
|
||||
|
||||
setRecordings(response.data.recordings);
|
||||
} catch (err) {
|
||||
setError(err instanceof Error ? err.message : "Failed to fetch recordings");
|
||||
logger.error("Error fetching recordings:", err);
|
||||
} finally {
|
||||
setIsLoading(false);
|
||||
}
|
||||
}, []);
|
||||
|
||||
useEffect(() => {
|
||||
fetchRecordings();
|
||||
}, [fetchRecordings, refreshKey]);
|
||||
|
||||
const handleDelete = async (recordingId: string) => {
|
||||
if (!confirm("Are you sure you want to delete this recording?")) return;
|
||||
|
||||
try {
|
||||
const response = await deleteRecordingApiV1WorkflowRecordingsRecordingIdDelete({
|
||||
path: { recording_id: recordingId },
|
||||
});
|
||||
|
||||
if (response.error) {
|
||||
throw new Error("Failed to delete recording");
|
||||
}
|
||||
|
||||
toast.success("Recording deleted");
|
||||
fetchRecordings();
|
||||
} catch (err) {
|
||||
toast.error(err instanceof Error ? err.message : "Failed to delete recording");
|
||||
logger.error("Error deleting recording:", err);
|
||||
}
|
||||
};
|
||||
|
||||
const handlePlay = async (rec: RecordingResponseSchema) => {
|
||||
try {
|
||||
await togglePlayback(rec.recording_id, rec.storage_key, rec.storage_backend);
|
||||
} catch {
|
||||
toast.error("Failed to play recording");
|
||||
}
|
||||
};
|
||||
|
||||
const startEditing = (rec: RecordingResponseSchema) => {
|
||||
setEditingId(rec.recording_id);
|
||||
setEditValue(rec.recording_id);
|
||||
setEditError(null);
|
||||
};
|
||||
|
||||
const cancelEditing = () => {
|
||||
setEditingId(null);
|
||||
setEditValue("");
|
||||
setEditError(null);
|
||||
};
|
||||
|
||||
const saveRecordingId = async (rec: RecordingResponseSchema) => {
|
||||
const newId = editValue.trim();
|
||||
if (!newId) {
|
||||
setEditError("ID cannot be empty");
|
||||
return;
|
||||
}
|
||||
if (!/^[a-zA-Z0-9_-]+$/.test(newId)) {
|
||||
setEditError("Only letters, numbers, hyphens, and underscores");
|
||||
return;
|
||||
}
|
||||
if (newId === rec.recording_id) {
|
||||
cancelEditing();
|
||||
return;
|
||||
}
|
||||
|
||||
setEditError(null);
|
||||
try {
|
||||
const response = await updateRecordingApiV1WorkflowRecordingsIdPatch({
|
||||
path: { id: rec.id },
|
||||
body: { recording_id: newId },
|
||||
});
|
||||
|
||||
if (response.error) {
|
||||
const errData = response.error as { detail?: string };
|
||||
throw new Error(errData?.detail || "Failed to update recording ID");
|
||||
}
|
||||
|
||||
toast.success(`Recording ID updated to "${newId}". All workflow references have been updated.`);
|
||||
cancelEditing();
|
||||
fetchRecordings();
|
||||
} catch (err) {
|
||||
setEditError(err instanceof Error ? err.message : "Failed to update recording ID");
|
||||
}
|
||||
};
|
||||
|
||||
const formatDate = (dateString: string): string => {
|
||||
const date = new Date(dateString);
|
||||
return date.toLocaleDateString() + " " + date.toLocaleTimeString();
|
||||
};
|
||||
|
||||
const filteredRecordings = recordings.filter((rec) => {
|
||||
if (!searchQuery) return true;
|
||||
const q = searchQuery.toLowerCase();
|
||||
const filename = (rec.metadata?.original_filename as string) || "";
|
||||
return (
|
||||
filename.toLowerCase().includes(q) ||
|
||||
rec.transcript.toLowerCase().includes(q) ||
|
||||
rec.recording_id.toLowerCase().includes(q)
|
||||
);
|
||||
});
|
||||
|
||||
if (isLoading && recordings.length === 0) {
|
||||
return (
|
||||
<div className="space-y-4">
|
||||
{[1, 2, 3].map((i) => (
|
||||
<div key={i} className="flex items-center justify-between p-4 border rounded-lg">
|
||||
<div className="space-y-2 flex-1">
|
||||
<Skeleton className="h-4 w-48" />
|
||||
<Skeleton className="h-3 w-64" />
|
||||
</div>
|
||||
<Skeleton className="h-8 w-24" />
|
||||
</div>
|
||||
))}
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
if (error) {
|
||||
return (
|
||||
<div className="p-4 bg-destructive/10 border border-destructive/20 rounded-lg text-destructive">
|
||||
{error}
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
return (
|
||||
<div className="space-y-4">
|
||||
{/* Search and Refresh */}
|
||||
<div className="flex items-center gap-4">
|
||||
<div className="relative flex-1">
|
||||
<Search className="absolute left-3 top-1/2 transform -translate-y-1/2 h-4 w-4 text-muted-foreground" />
|
||||
<Input
|
||||
placeholder="Search by filename, transcript, or ID..."
|
||||
value={searchQuery}
|
||||
onChange={(e) => setSearchQuery(e.target.value)}
|
||||
className="pl-10"
|
||||
/>
|
||||
</div>
|
||||
<Button
|
||||
variant="outline"
|
||||
size="icon"
|
||||
onClick={() => { stopPlayback(); fetchRecordings(); }}
|
||||
disabled={isLoading}
|
||||
>
|
||||
<RefreshCw className={`h-4 w-4 ${isLoading ? "animate-spin" : ""}`} />
|
||||
</Button>
|
||||
</div>
|
||||
|
||||
{/* Results count */}
|
||||
<div className="text-sm text-muted-foreground">
|
||||
{filteredRecordings.length} recording{filteredRecordings.length !== 1 ? "s" : ""}
|
||||
{searchQuery && ` matching "${searchQuery}"`}
|
||||
</div>
|
||||
|
||||
{/* Recordings List */}
|
||||
{filteredRecordings.length === 0 ? (
|
||||
<div className="text-center py-12">
|
||||
<AudioLines className="w-12 h-12 text-muted-foreground mx-auto mb-4" />
|
||||
<p className="text-muted-foreground">
|
||||
{searchQuery
|
||||
? "No recordings match your search"
|
||||
: "No recordings yet"}
|
||||
</p>
|
||||
</div>
|
||||
) : (
|
||||
<div className="space-y-3">
|
||||
{filteredRecordings.map((rec) => {
|
||||
const filename = (rec.metadata?.original_filename as string) || "";
|
||||
const isEditing = editingId === rec.recording_id;
|
||||
|
||||
return (
|
||||
<div
|
||||
key={rec.recording_id}
|
||||
className="flex items-center justify-between p-4 border rounded-lg hover:bg-muted/50 transition-colors"
|
||||
>
|
||||
<div className="flex items-center gap-4 flex-1 min-w-0">
|
||||
<div className="w-10 h-10 rounded-lg bg-primary/10 flex items-center justify-center shrink-0">
|
||||
<AudioLines className="w-5 h-5 text-primary" />
|
||||
</div>
|
||||
<div className="flex-1 min-w-0">
|
||||
{/* Recording ID (editable) */}
|
||||
<div className="flex items-center gap-2 mb-1">
|
||||
{isEditing ? (
|
||||
<div className="flex items-center gap-1 flex-wrap">
|
||||
<Input
|
||||
value={editValue}
|
||||
onChange={(e) => { setEditValue(e.target.value); setEditError(null); }}
|
||||
onKeyDown={(e) => {
|
||||
if (e.key === "Enter") saveRecordingId(rec);
|
||||
if (e.key === "Escape") cancelEditing();
|
||||
}}
|
||||
className={`h-7 text-sm font-mono w-48 ${editError ? "border-destructive" : ""}`}
|
||||
maxLength={64}
|
||||
autoFocus
|
||||
/>
|
||||
<Button
|
||||
variant="ghost"
|
||||
size="sm"
|
||||
className="h-7 w-7 p-0"
|
||||
onClick={() => saveRecordingId(rec)}
|
||||
>
|
||||
<Check className="w-3.5 h-3.5" />
|
||||
</Button>
|
||||
<Button
|
||||
variant="ghost"
|
||||
size="sm"
|
||||
className="h-7 w-7 p-0"
|
||||
onClick={cancelEditing}
|
||||
>
|
||||
<X className="w-3.5 h-3.5" />
|
||||
</Button>
|
||||
{editError && (
|
||||
<span className="text-xs text-destructive">{editError}</span>
|
||||
)}
|
||||
</div>
|
||||
) : (
|
||||
<div className="flex items-center gap-1.5">
|
||||
<code className="text-sm font-mono bg-muted px-1.5 py-0.5 rounded truncate max-w-[250px]">
|
||||
{rec.recording_id}
|
||||
</code>
|
||||
<Button
|
||||
variant="ghost"
|
||||
size="sm"
|
||||
className="h-6 px-1.5 text-xs text-muted-foreground gap-1"
|
||||
onClick={() => startEditing(rec)}
|
||||
>
|
||||
<Pencil className="w-3 h-3" />
|
||||
Edit ID
|
||||
</Button>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
{/* Filename */}
|
||||
{filename && (
|
||||
<p className="text-xs text-muted-foreground mb-0.5 truncate max-w-[300px]">
|
||||
{filename}
|
||||
</p>
|
||||
)}
|
||||
{/* Transcript */}
|
||||
<p className="text-sm text-muted-foreground line-clamp-1 mb-1">
|
||||
{rec.transcript}
|
||||
</p>
|
||||
<div className="flex items-center gap-3 text-xs text-muted-foreground flex-wrap">
|
||||
<span>{formatDate(rec.created_at)}</span>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div className="flex items-center gap-1 shrink-0 ml-2">
|
||||
<Button
|
||||
variant="ghost"
|
||||
size="sm"
|
||||
onClick={() => handlePlay(rec)}
|
||||
>
|
||||
{playingId === rec.recording_id ? (
|
||||
<Pause className="w-4 h-4" />
|
||||
) : (
|
||||
<Play className="w-4 h-4" />
|
||||
)}
|
||||
</Button>
|
||||
<Button
|
||||
variant="ghost"
|
||||
size="sm"
|
||||
onClick={() => handleDelete(rec.recording_id)}
|
||||
className="text-destructive hover:text-destructive/90"
|
||||
>
|
||||
<Trash2 className="w-4 h-4" />
|
||||
</Button>
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
})}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
}
|
||||
465
ui/src/app/recordings/RecordingsUploadDialog.tsx
Normal file
465
ui/src/app/recordings/RecordingsUploadDialog.tsx
Normal file
|
|
@ -0,0 +1,465 @@
|
|||
"use client";
|
||||
|
||||
import { Loader2, Mic, Square, Upload, X } from "lucide-react";
|
||||
import { useCallback, useEffect, useRef, useState } from "react";
|
||||
|
||||
import {
|
||||
createRecordingsApiV1WorkflowRecordingsPost,
|
||||
getUploadUrlsApiV1WorkflowRecordingsUploadUrlPost,
|
||||
transcribeAudioApiV1WorkflowRecordingsTranscribePost,
|
||||
} from "@/client";
|
||||
import type { RecordingUploadResponseSchema } from "@/client/types.gen";
|
||||
import { Button } from "@/components/ui/button";
|
||||
import {
|
||||
Dialog,
|
||||
DialogContent,
|
||||
DialogDescription,
|
||||
DialogHeader,
|
||||
DialogTitle,
|
||||
} from "@/components/ui/dialog";
|
||||
import { Input } from "@/components/ui/input";
|
||||
import { Label } from "@/components/ui/label";
|
||||
import {
|
||||
Select,
|
||||
SelectContent,
|
||||
SelectItem,
|
||||
SelectTrigger,
|
||||
SelectValue,
|
||||
} from "@/components/ui/select";
|
||||
import { Textarea } from "@/components/ui/textarea";
|
||||
import { LANGUAGE_DISPLAY_NAMES } from "@/constants/languages";
|
||||
interface RecordingsUploadDialogProps {
|
||||
open: boolean;
|
||||
onOpenChange: (open: boolean) => void;
|
||||
onUploadComplete?: () => void;
|
||||
}
|
||||
|
||||
const MAX_FILE_SIZE = 5 * 1024 * 1024; // 5MB
|
||||
|
||||
interface PendingFile {
|
||||
id: string;
|
||||
file: File;
|
||||
transcript: string;
|
||||
isTranscribing: boolean;
|
||||
error?: string;
|
||||
}
|
||||
|
||||
let pendingFileCounter = 0;
|
||||
|
||||
export const RecordingsUploadDialog = ({
|
||||
open,
|
||||
onOpenChange,
|
||||
onUploadComplete,
|
||||
}: RecordingsUploadDialogProps) => {
|
||||
const [uploading, setUploading] = useState(false);
|
||||
const [pendingFiles, setPendingFiles] = useState<PendingFile[]>([]);
|
||||
const [error, setError] = useState<string | null>(null);
|
||||
const [language, setLanguage] = useState("multi");
|
||||
const [recordingStep, setRecordingStep] = useState<"idle" | "naming" | "recording">("idle");
|
||||
const [recordingFilename, setRecordingFilename] = useState("");
|
||||
const [recordingDuration, setRecordingDuration] = useState(0);
|
||||
const mediaRecorderRef = useRef<MediaRecorder | null>(null);
|
||||
const audioChunksRef = useRef<Blob[]>([]);
|
||||
const recordingTimerRef = useRef<ReturnType<typeof setInterval> | null>(null);
|
||||
const fileInputRef = useRef<HTMLInputElement>(null);
|
||||
const languageRef = useRef(language);
|
||||
languageRef.current = language;
|
||||
|
||||
const stopRecordingTimer = useCallback(() => {
|
||||
if (recordingTimerRef.current) {
|
||||
clearInterval(recordingTimerRef.current);
|
||||
recordingTimerRef.current = null;
|
||||
}
|
||||
}, []);
|
||||
|
||||
const stopRecording = useCallback(() => {
|
||||
if (mediaRecorderRef.current && mediaRecorderRef.current.state !== "inactive") {
|
||||
mediaRecorderRef.current.stop();
|
||||
}
|
||||
}, []);
|
||||
|
||||
const resetRecordingState = useCallback(() => {
|
||||
setRecordingStep("idle");
|
||||
setRecordingFilename("");
|
||||
setRecordingDuration(0);
|
||||
}, []);
|
||||
|
||||
useEffect(() => {
|
||||
if (open) {
|
||||
setError(null);
|
||||
setPendingFiles([]);
|
||||
setLanguage("multi");
|
||||
resetRecordingState();
|
||||
}
|
||||
}, [open, resetRecordingState]);
|
||||
|
||||
useEffect(() => {
|
||||
if (!open) {
|
||||
stopRecording();
|
||||
stopRecordingTimer();
|
||||
}
|
||||
}, [open, stopRecording, stopRecordingTimer]);
|
||||
|
||||
const transcribeFile = async (pendingId: string, file: File) => {
|
||||
setPendingFiles((prev) =>
|
||||
prev.map((p) => (p.id === pendingId ? { ...p, isTranscribing: true } : p))
|
||||
);
|
||||
try {
|
||||
const currentLang = languageRef.current;
|
||||
const result = await transcribeAudioApiV1WorkflowRecordingsTranscribePost({
|
||||
body: { file, language: currentLang },
|
||||
});
|
||||
const data = result.data as Record<string, unknown> | undefined;
|
||||
if (data?.transcript) {
|
||||
setPendingFiles((prev) =>
|
||||
prev.map((p) =>
|
||||
p.id === pendingId ? { ...p, transcript: data.transcript as string, isTranscribing: false } : p
|
||||
)
|
||||
);
|
||||
} else {
|
||||
setPendingFiles((prev) =>
|
||||
prev.map((p) => (p.id === pendingId ? { ...p, isTranscribing: false } : p))
|
||||
);
|
||||
}
|
||||
} catch {
|
||||
setPendingFiles((prev) =>
|
||||
prev.map((p) =>
|
||||
p.id === pendingId
|
||||
? { ...p, isTranscribing: false, error: "Auto-transcription failed" }
|
||||
: p
|
||||
)
|
||||
);
|
||||
}
|
||||
};
|
||||
|
||||
const addPendingFiles = (files: File[]) => {
|
||||
const valid: PendingFile[] = [];
|
||||
for (const file of files) {
|
||||
if (file.size > MAX_FILE_SIZE) {
|
||||
setError(`${file.name} (${(file.size / (1024 * 1024)).toFixed(1)}MB) exceeds 5MB limit — skipped.`);
|
||||
continue;
|
||||
}
|
||||
const id = `pending-${++pendingFileCounter}`;
|
||||
valid.push({ id, file, transcript: "", isTranscribing: false });
|
||||
}
|
||||
if (valid.length === 0) return;
|
||||
setPendingFiles((prev) => [...prev, ...valid]);
|
||||
setError(null);
|
||||
for (const pf of valid) {
|
||||
transcribeFile(pf.id, pf.file);
|
||||
}
|
||||
};
|
||||
|
||||
const removePendingFile = (pendingId: string) => {
|
||||
setPendingFiles((prev) => prev.filter((p) => p.id !== pendingId));
|
||||
};
|
||||
|
||||
const updateTranscript = (pendingId: string, transcript: string) => {
|
||||
setPendingFiles((prev) =>
|
||||
prev.map((p) => (p.id === pendingId ? { ...p, transcript } : p))
|
||||
);
|
||||
};
|
||||
|
||||
const startRecording = async () => {
|
||||
try {
|
||||
const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
|
||||
const mediaRecorder = new MediaRecorder(stream);
|
||||
mediaRecorderRef.current = mediaRecorder;
|
||||
audioChunksRef.current = [];
|
||||
|
||||
mediaRecorder.ondataavailable = (e) => {
|
||||
if (e.data.size > 0) audioChunksRef.current.push(e.data);
|
||||
};
|
||||
|
||||
const filename = recordingFilename.trim() || "recording";
|
||||
mediaRecorder.onstop = () => {
|
||||
stream.getTracks().forEach((t) => t.stop());
|
||||
stopRecordingTimer();
|
||||
|
||||
const blob = new Blob(audioChunksRef.current, { type: mediaRecorder.mimeType });
|
||||
if (blob.size > MAX_FILE_SIZE) {
|
||||
setError(`Recording (${(blob.size / (1024 * 1024)).toFixed(1)}MB) exceeds the maximum allowed size of 5MB.`);
|
||||
resetRecordingState();
|
||||
return;
|
||||
}
|
||||
const ext = mediaRecorder.mimeType.includes("webm") ? "webm" : "mp4";
|
||||
const file = new File([blob], `${filename}.${ext}`, { type: mediaRecorder.mimeType });
|
||||
resetRecordingState();
|
||||
addPendingFiles([file]);
|
||||
};
|
||||
|
||||
mediaRecorder.start();
|
||||
setRecordingStep("recording");
|
||||
setRecordingDuration(0);
|
||||
setError(null);
|
||||
recordingTimerRef.current = setInterval(() => {
|
||||
setRecordingDuration((d) => d + 1);
|
||||
}, 1000);
|
||||
} catch {
|
||||
setError("Microphone access denied. Please allow microphone permissions.");
|
||||
resetRecordingState();
|
||||
}
|
||||
};
|
||||
|
||||
const handleFileSelect = (fileList: FileList | null) => {
|
||||
if (!fileList || fileList.length === 0) return;
|
||||
addPendingFiles(Array.from(fileList));
|
||||
if (fileInputRef.current) fileInputRef.current.value = "";
|
||||
};
|
||||
|
||||
const handleUpload = async () => {
|
||||
const ready = pendingFiles.filter((p) => p.transcript.trim() && !p.isTranscribing);
|
||||
if (ready.length === 0) return;
|
||||
|
||||
setUploading(true);
|
||||
setError(null);
|
||||
|
||||
try {
|
||||
const uploadUrlResponse = await getUploadUrlsApiV1WorkflowRecordingsUploadUrlPost({
|
||||
body: {
|
||||
files: ready.map((p) => ({
|
||||
filename: p.file.name,
|
||||
mime_type: p.file.type || "audio/wav",
|
||||
file_size: p.file.size,
|
||||
})),
|
||||
},
|
||||
});
|
||||
|
||||
if (!uploadUrlResponse.data?.items) {
|
||||
throw new Error("Failed to get upload URLs");
|
||||
}
|
||||
|
||||
const items = uploadUrlResponse.data.items;
|
||||
|
||||
await Promise.all(
|
||||
items.map(async (item: RecordingUploadResponseSchema, idx: number) => {
|
||||
const file = ready[idx].file;
|
||||
const uploadResponse = await fetch(item.upload_url, {
|
||||
method: "PUT",
|
||||
body: file,
|
||||
headers: { "Content-Type": file.type || "audio/wav" },
|
||||
});
|
||||
if (!uploadResponse.ok) {
|
||||
throw new Error(`File upload failed for ${file.name}`);
|
||||
}
|
||||
})
|
||||
);
|
||||
|
||||
await createRecordingsApiV1WorkflowRecordingsPost({
|
||||
body: {
|
||||
recordings: items.map((item: RecordingUploadResponseSchema, idx: number) => ({
|
||||
recording_id: item.recording_id,
|
||||
transcript: ready[idx].transcript.trim(),
|
||||
storage_key: item.storage_key,
|
||||
metadata: {
|
||||
original_filename: ready[idx].file.name,
|
||||
file_size_bytes: ready[idx].file.size,
|
||||
mime_type: ready[idx].file.type,
|
||||
language,
|
||||
},
|
||||
})),
|
||||
},
|
||||
});
|
||||
|
||||
setPendingFiles([]);
|
||||
setLanguage("multi");
|
||||
resetRecordingState();
|
||||
if (fileInputRef.current) fileInputRef.current.value = "";
|
||||
onUploadComplete?.();
|
||||
onOpenChange(false);
|
||||
} catch (err) {
|
||||
setError(err instanceof Error ? err.message : "Failed to upload recordings");
|
||||
} finally {
|
||||
setUploading(false);
|
||||
}
|
||||
};
|
||||
|
||||
const isRecording = recordingStep === "recording";
|
||||
const anyTranscribing = pendingFiles.some((p) => p.isTranscribing);
|
||||
const readyCount = pendingFiles.filter((p) => p.transcript.trim() && !p.isTranscribing).length;
|
||||
const isBusy = uploading || isRecording || anyTranscribing;
|
||||
|
||||
return (
|
||||
<Dialog open={open} onOpenChange={onOpenChange}>
|
||||
<DialogContent className="max-w-lg max-h-[80vh] overflow-y-auto">
|
||||
<DialogHeader>
|
||||
<DialogTitle>Upload Recordings</DialogTitle>
|
||||
<DialogDescription>
|
||||
Upload or record audio files. Use{" "}
|
||||
<code className="text-xs bg-muted px-1 rounded">@</code> in
|
||||
prompt fields to insert them into your agents.
|
||||
</DialogDescription>
|
||||
</DialogHeader>
|
||||
|
||||
{error && (
|
||||
<div className="text-sm text-destructive bg-destructive/10 rounded-md p-2">
|
||||
{error}
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Upload Section */}
|
||||
<div className="space-y-3">
|
||||
{/* Audio source: file picker or record */}
|
||||
<div>
|
||||
<Label className="text-xs text-muted-foreground">Audio Files</Label>
|
||||
<div className="flex gap-2">
|
||||
<input
|
||||
ref={fileInputRef}
|
||||
type="file"
|
||||
accept="audio/*"
|
||||
multiple
|
||||
onChange={(e) => handleFileSelect(e.target.files)}
|
||||
className="hidden"
|
||||
/>
|
||||
<Button
|
||||
type="button"
|
||||
variant="outline"
|
||||
size="sm"
|
||||
className="flex-1 justify-start text-sm font-normal"
|
||||
onClick={() => fileInputRef.current?.click()}
|
||||
disabled={isBusy}
|
||||
>
|
||||
<Upload className="w-4 h-4 mr-2 shrink-0" />
|
||||
<span className="text-muted-foreground">Choose audio files (max 5MB each)</span>
|
||||
</Button>
|
||||
{recordingStep === "idle" && (
|
||||
<Button
|
||||
type="button"
|
||||
variant="outline"
|
||||
size="sm"
|
||||
onClick={() => setRecordingStep("naming")}
|
||||
disabled={uploading || anyTranscribing}
|
||||
>
|
||||
<Mic className="w-4 h-4 mr-1" />
|
||||
Record
|
||||
</Button>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Recording: filename + start/stop */}
|
||||
{(recordingStep === "naming" || isRecording) && (
|
||||
<div className="space-y-2 rounded-md border border-dashed p-3 bg-muted/20">
|
||||
{recordingStep === "naming" && (
|
||||
<>
|
||||
<div>
|
||||
<Label className="text-xs text-muted-foreground">Recording Name</Label>
|
||||
<Input
|
||||
placeholder="e.g. greeting, hold-message"
|
||||
value={recordingFilename}
|
||||
onChange={(e) => setRecordingFilename(e.target.value)}
|
||||
autoFocus
|
||||
/>
|
||||
</div>
|
||||
<div className="flex gap-2">
|
||||
<Button size="sm" onClick={startRecording} disabled={!recordingFilename.trim()}>
|
||||
<Mic className="w-4 h-4 mr-1" />
|
||||
Start Recording
|
||||
</Button>
|
||||
<Button size="sm" variant="ghost" onClick={resetRecordingState}>
|
||||
Cancel
|
||||
</Button>
|
||||
</div>
|
||||
</>
|
||||
)}
|
||||
{isRecording && (
|
||||
<div className="flex items-center gap-3">
|
||||
<span className="relative flex h-3 w-3">
|
||||
<span className="animate-ping absolute inline-flex h-full w-full rounded-full bg-red-400 opacity-75" />
|
||||
<span className="relative inline-flex rounded-full h-3 w-3 bg-red-500" />
|
||||
</span>
|
||||
<span className="text-sm font-mono">
|
||||
{Math.floor(recordingDuration / 60)}:{(recordingDuration % 60).toString().padStart(2, "0")}
|
||||
</span>
|
||||
<span className="text-xs text-muted-foreground">{recordingFilename}</span>
|
||||
<Button
|
||||
size="sm"
|
||||
variant="destructive"
|
||||
onClick={() => stopRecording()}
|
||||
className="ml-auto"
|
||||
>
|
||||
<Square className="w-4 h-4 mr-1" />
|
||||
Stop
|
||||
</Button>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Pending files list */}
|
||||
{pendingFiles.length > 0 && (
|
||||
<div className="space-y-2">
|
||||
<Label className="text-xs text-muted-foreground">
|
||||
Pending ({pendingFiles.length} file{pendingFiles.length !== 1 ? "s" : ""})
|
||||
</Label>
|
||||
{pendingFiles.map((pf) => (
|
||||
<div key={pf.id} className="rounded-md border p-2 space-y-1.5 bg-muted/10">
|
||||
<div className="flex items-center gap-2">
|
||||
<code className="text-xs bg-muted px-1.5 py-0.5 rounded font-mono truncate flex-1">
|
||||
{pf.file.name} ({(pf.file.size / (1024 * 1024)).toFixed(1)}MB)
|
||||
</code>
|
||||
{pf.isTranscribing && (
|
||||
<Loader2 className="w-3.5 h-3.5 animate-spin text-muted-foreground shrink-0" />
|
||||
)}
|
||||
<Button
|
||||
size="sm"
|
||||
variant="ghost"
|
||||
className="h-6 w-6 p-0 shrink-0"
|
||||
onClick={() => removePendingFile(pf.id)}
|
||||
disabled={uploading}
|
||||
>
|
||||
<X className="w-3.5 h-3.5" />
|
||||
</Button>
|
||||
</div>
|
||||
{pf.error && (
|
||||
<p className="text-xs text-destructive">{pf.error}</p>
|
||||
)}
|
||||
<Textarea
|
||||
placeholder={pf.isTranscribing ? "Transcribing..." : "What does this recording say?"}
|
||||
value={pf.transcript}
|
||||
onChange={(e) => updateTranscript(pf.id, e.target.value)}
|
||||
disabled={pf.isTranscribing}
|
||||
rows={2}
|
||||
className="resize-none text-sm"
|
||||
/>
|
||||
</div>
|
||||
))}
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Language */}
|
||||
<div>
|
||||
<Label className="text-xs text-muted-foreground">Language</Label>
|
||||
<Select value={language} onValueChange={setLanguage}>
|
||||
<SelectTrigger className="h-9 text-sm">
|
||||
<SelectValue />
|
||||
</SelectTrigger>
|
||||
<SelectContent>
|
||||
{Object.entries(LANGUAGE_DISPLAY_NAMES).map(([code, name]) => (
|
||||
<SelectItem key={code} value={code}>
|
||||
{name}
|
||||
</SelectItem>
|
||||
))}
|
||||
</SelectContent>
|
||||
</Select>
|
||||
</div>
|
||||
|
||||
<Button
|
||||
size="sm"
|
||||
onClick={handleUpload}
|
||||
disabled={readyCount === 0 || isBusy}
|
||||
>
|
||||
{uploading ? (
|
||||
<Loader2 className="w-4 h-4 mr-1 animate-spin" />
|
||||
) : (
|
||||
<Upload className="w-4 h-4 mr-1" />
|
||||
)}
|
||||
{uploading
|
||||
? "Uploading..."
|
||||
: `Upload ${readyCount} Recording${readyCount !== 1 ? "s" : ""}`}
|
||||
</Button>
|
||||
</div>
|
||||
</DialogContent>
|
||||
</Dialog>
|
||||
);
|
||||
};
|
||||
77
ui/src/app/recordings/page.tsx
Normal file
77
ui/src/app/recordings/page.tsx
Normal file
|
|
@ -0,0 +1,77 @@
|
|||
"use client";
|
||||
|
||||
import { ExternalLink, Upload } from "lucide-react";
|
||||
import { useEffect, useState } from "react";
|
||||
|
||||
import { Button } from "@/components/ui/button";
|
||||
import { Card, CardContent, CardDescription, CardHeader, CardTitle } from "@/components/ui/card";
|
||||
import { Skeleton } from "@/components/ui/skeleton";
|
||||
import { useAuth } from "@/lib/auth";
|
||||
|
||||
import RecordingsList from "./RecordingsList";
|
||||
import { RecordingsUploadDialog } from "./RecordingsUploadDialog";
|
||||
|
||||
export default function RecordingsPage() {
|
||||
const { user, redirectToLogin, loading } = useAuth();
|
||||
const [isUploadOpen, setIsUploadOpen] = useState(false);
|
||||
const [refreshKey, setRefreshKey] = useState(0);
|
||||
|
||||
useEffect(() => {
|
||||
if (!loading && !user) {
|
||||
redirectToLogin();
|
||||
}
|
||||
}, [loading, user, redirectToLogin]);
|
||||
|
||||
if (loading || !user) {
|
||||
return (
|
||||
<div className="container mx-auto px-4 py-8">
|
||||
<div className="space-y-4">
|
||||
<Skeleton className="h-12 w-64" />
|
||||
<Skeleton className="h-64 w-full" />
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
return (
|
||||
<div className="container mx-auto px-4 py-8">
|
||||
<div className="mb-8">
|
||||
<h1 className="text-3xl font-bold mb-2">Recordings</h1>
|
||||
<p className="text-muted-foreground">
|
||||
Manage audio recordings for your organization. Use{" "}
|
||||
<code className="rounded bg-muted px-1 text-xs">@</code> in prompt fields to insert them,
|
||||
or as transition messages in tool calls.{" "}
|
||||
<a href="https://docs.dograh.com/voice-agent/pre-recorded-audio" target="_blank" rel="noopener noreferrer" className="inline-flex items-center gap-0.5 underline">
|
||||
Learn more <ExternalLink className="h-3 w-3" />
|
||||
</a>
|
||||
</p>
|
||||
</div>
|
||||
|
||||
<Card>
|
||||
<CardHeader>
|
||||
<div className="flex justify-between items-center">
|
||||
<div>
|
||||
<CardTitle>All Recordings</CardTitle>
|
||||
<CardDescription>
|
||||
Audio recordings shared across all agents in your organization
|
||||
</CardDescription>
|
||||
</div>
|
||||
<Button onClick={() => setIsUploadOpen(true)}>
|
||||
<Upload className="w-4 h-4 mr-2" />
|
||||
Upload Recording
|
||||
</Button>
|
||||
</div>
|
||||
</CardHeader>
|
||||
<CardContent>
|
||||
<RecordingsList refreshKey={refreshKey} />
|
||||
</CardContent>
|
||||
</Card>
|
||||
|
||||
<RecordingsUploadDialog
|
||||
open={isUploadOpen}
|
||||
onOpenChange={setIsUploadOpen}
|
||||
onUploadComplete={() => setRefreshKey((k) => k + 1)}
|
||||
/>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
|
@ -2,6 +2,8 @@
|
|||
|
||||
import { AlertCircle } from "lucide-react";
|
||||
|
||||
import type { RecordingResponseSchema } from "@/client/types.gen";
|
||||
import { RecordingSelect } from "@/components/flow/TextOrAudioInput";
|
||||
import { Card, CardContent, CardDescription, CardHeader, CardTitle } from "@/components/ui/card";
|
||||
import { Input } from "@/components/ui/input";
|
||||
import { Label } from "@/components/ui/label";
|
||||
|
|
@ -20,6 +22,9 @@ export interface EndCallToolConfigProps {
|
|||
onMessageTypeChange: (messageType: EndCallMessageType) => void;
|
||||
customMessage: string;
|
||||
onCustomMessageChange: (message: string) => void;
|
||||
audioRecordingId: string;
|
||||
onAudioRecordingIdChange: (id: string) => void;
|
||||
recordings?: RecordingResponseSchema[];
|
||||
endCallReason: boolean;
|
||||
onEndCallReasonChange: (enabled: boolean) => void;
|
||||
endCallReasonDescription: string;
|
||||
|
|
@ -35,6 +40,9 @@ export function EndCallToolConfig({
|
|||
onMessageTypeChange,
|
||||
customMessage,
|
||||
onCustomMessageChange,
|
||||
audioRecordingId,
|
||||
onAudioRecordingIdChange,
|
||||
recordings = [],
|
||||
endCallReason,
|
||||
onEndCallReasonChange,
|
||||
endCallReasonDescription,
|
||||
|
|
@ -148,6 +156,24 @@ export function EndCallToolConfig({
|
|||
/>
|
||||
</div>
|
||||
)}
|
||||
<div className="flex items-start space-x-3 p-3 border rounded-lg hover:bg-muted/50">
|
||||
<RadioGroupItem value="audio" id="audio" className="mt-1" />
|
||||
<label htmlFor="audio" className="flex-1 space-y-2 cursor-pointer">
|
||||
<span className="font-medium">Pre-recorded Audio</span>
|
||||
<p className="text-xs text-muted-foreground">
|
||||
Play a pre-recorded audio file before disconnecting
|
||||
</p>
|
||||
</label>
|
||||
</div>
|
||||
{messageType === "audio" && (
|
||||
<div className="pl-8">
|
||||
<RecordingSelect
|
||||
value={audioRecordingId}
|
||||
onChange={onAudioRecordingIdChange}
|
||||
recordings={recordings}
|
||||
/>
|
||||
</div>
|
||||
)}
|
||||
</RadioGroup>
|
||||
</div>
|
||||
</CardContent>
|
||||
|
|
|
|||
|
|
@ -2,6 +2,8 @@
|
|||
|
||||
import { AlertCircle } from "lucide-react";
|
||||
|
||||
import type { RecordingResponseSchema } from "@/client/types.gen";
|
||||
import { TextOrAudioInput } from "@/components/flow/TextOrAudioInput";
|
||||
import {
|
||||
CredentialSelector,
|
||||
type HttpMethod,
|
||||
|
|
@ -37,6 +39,11 @@ export interface HttpApiToolConfigProps {
|
|||
onTimeoutMsChange: (timeout: number) => void;
|
||||
customMessage: string;
|
||||
onCustomMessageChange: (message: string) => void;
|
||||
customMessageType: 'text' | 'audio';
|
||||
onCustomMessageTypeChange: (type: 'text' | 'audio') => void;
|
||||
customMessageRecordingId: string;
|
||||
onCustomMessageRecordingIdChange: (id: string) => void;
|
||||
recordings?: RecordingResponseSchema[];
|
||||
}
|
||||
|
||||
export function HttpApiToolConfig({
|
||||
|
|
@ -58,6 +65,11 @@ export function HttpApiToolConfig({
|
|||
onTimeoutMsChange,
|
||||
customMessage,
|
||||
onCustomMessageChange,
|
||||
customMessageType,
|
||||
onCustomMessageTypeChange,
|
||||
customMessageRecordingId,
|
||||
onCustomMessageRecordingIdChange,
|
||||
recordings = [],
|
||||
}: HttpApiToolConfigProps) {
|
||||
return (
|
||||
<Card>
|
||||
|
|
@ -136,18 +148,28 @@ export function HttpApiToolConfig({
|
|||
<div className="grid gap-2 pt-4 border-t">
|
||||
<Label>Custom Message</Label>
|
||||
<Label className="text-xs text-muted-foreground">
|
||||
Optional message the AI will speak before executing this tool (e.g., "Let me look that up for you")
|
||||
Optional message the AI will speak or play before executing this tool.
|
||||
</Label>
|
||||
<div className="flex items-start gap-2 rounded-md bg-amber-50 p-2 text-xs text-amber-700 border border-amber-200">
|
||||
<AlertCircle className="h-3.5 w-3.5 mt-0.5 shrink-0" />
|
||||
<span>This text is spoken as-is. For multilingual workflows, choose your phrasing carefully.</span>
|
||||
</div>
|
||||
<Textarea
|
||||
value={customMessage}
|
||||
onChange={(e) => onCustomMessageChange(e.target.value)}
|
||||
placeholder="e.g., Let me check that for you, one moment please."
|
||||
rows={2}
|
||||
/>
|
||||
<TextOrAudioInput
|
||||
type={customMessageType}
|
||||
onTypeChange={onCustomMessageTypeChange}
|
||||
recordingId={customMessageRecordingId}
|
||||
onRecordingIdChange={onCustomMessageRecordingIdChange}
|
||||
recordings={recordings}
|
||||
>
|
||||
<>
|
||||
<div className="flex items-start gap-2 rounded-md bg-amber-50 p-2 text-xs text-amber-700 border border-amber-200">
|
||||
<AlertCircle className="h-3.5 w-3.5 mt-0.5 shrink-0" />
|
||||
<span>This text is spoken as-is. For multilingual workflows, choose your phrasing carefully.</span>
|
||||
</div>
|
||||
<Textarea
|
||||
value={customMessage}
|
||||
onChange={(e) => onCustomMessageChange(e.target.value)}
|
||||
placeholder="e.g., Let me check that for you, one moment please."
|
||||
rows={2}
|
||||
/>
|
||||
</>
|
||||
</TextOrAudioInput>
|
||||
</div>
|
||||
</TabsContent>
|
||||
|
||||
|
|
|
|||
|
|
@ -3,6 +3,8 @@
|
|||
import { AlertCircle } from "lucide-react";
|
||||
import {useState } from "react";
|
||||
|
||||
import type { RecordingResponseSchema } from "@/client/types.gen";
|
||||
import { RecordingSelect } from "@/components/flow/TextOrAudioInput";
|
||||
import { Card, CardContent, CardDescription, CardHeader, CardTitle } from "@/components/ui/card";
|
||||
import { Input } from "@/components/ui/input";
|
||||
import { Label } from "@/components/ui/label";
|
||||
|
|
@ -22,6 +24,9 @@ export interface TransferCallToolConfigProps {
|
|||
onMessageTypeChange: (messageType: EndCallMessageType) => void;
|
||||
customMessage: string;
|
||||
onCustomMessageChange: (message: string) => void;
|
||||
audioRecordingId: string;
|
||||
onAudioRecordingIdChange: (id: string) => void;
|
||||
recordings?: RecordingResponseSchema[];
|
||||
timeout?: number; // Make optional to match API type
|
||||
onTimeoutChange: (timeout: number) => void;
|
||||
}
|
||||
|
|
@ -37,6 +42,9 @@ export function TransferCallToolConfig({
|
|||
onMessageTypeChange,
|
||||
customMessage,
|
||||
onCustomMessageChange,
|
||||
audioRecordingId,
|
||||
onAudioRecordingIdChange,
|
||||
recordings = [],
|
||||
timeout,
|
||||
onTimeoutChange,
|
||||
}: TransferCallToolConfigProps) {
|
||||
|
|
@ -181,6 +189,24 @@ export function TransferCallToolConfig({
|
|||
/>
|
||||
</div>
|
||||
)}
|
||||
<div className="flex items-start space-x-3 p-3 border rounded-lg hover:bg-muted/50">
|
||||
<RadioGroupItem value="audio" id="audio" className="mt-1" />
|
||||
<label htmlFor="audio" className="flex-1 space-y-2 cursor-pointer">
|
||||
<span className="font-medium">Pre-recorded Audio</span>
|
||||
<p className="text-xs text-muted-foreground">
|
||||
Play a pre-recorded audio file before transferring
|
||||
</p>
|
||||
</label>
|
||||
</div>
|
||||
{messageType === "audio" && (
|
||||
<div className="pl-8">
|
||||
<RecordingSelect
|
||||
value={audioRecordingId}
|
||||
onChange={onAudioRecordingIdChange}
|
||||
recordings={recordings}
|
||||
/>
|
||||
</div>
|
||||
)}
|
||||
</RadioGroup>
|
||||
</div>
|
||||
|
||||
|
|
|
|||
|
|
@ -6,9 +6,10 @@ import { useCallback, useEffect, useState } from "react";
|
|||
|
||||
import {
|
||||
getToolApiV1ToolsToolUuidGet,
|
||||
listRecordingsApiV1WorkflowRecordingsGet,
|
||||
updateToolApiV1ToolsToolUuidPut,
|
||||
} from "@/client/sdk.gen";
|
||||
import type { ToolResponse, TransferCallConfig as APITransferCallConfig } from "@/client/types.gen";
|
||||
import type { RecordingResponseSchema, ToolResponse, TransferCallConfig as APITransferCallConfig } from "@/client/types.gen";
|
||||
import type { EndCallConfig } from "@/client/types.gen";
|
||||
import { type HttpMethod, type KeyValueItem, type ToolParameter, validateUrl } from "@/components/http";
|
||||
import { Button } from "@/components/ui/button";
|
||||
|
|
@ -75,6 +76,7 @@ export default function ToolDetailPage() {
|
|||
const [endCallMessageType, setEndCallMessageType] = useState<EndCallMessageType>("none");
|
||||
const [endCallReason, setEndCallReason] = useState(false);
|
||||
const [endCallReasonDescription, setEndCallReasonDescription] = useState("");
|
||||
const [audioRecordingId, setAudioRecordingId] = useState("");
|
||||
|
||||
const handleEndCallReasonChange = (enabled: boolean) => {
|
||||
setEndCallReason(enabled);
|
||||
|
|
@ -87,6 +89,14 @@ export default function ToolDetailPage() {
|
|||
const [transferDestination, setTransferDestination] = useState("");
|
||||
const [transferMessageType, setTransferMessageType] = useState<EndCallMessageType>("none");
|
||||
const [transferTimeout, setTransferTimeout] = useState(30);
|
||||
const [transferAudioRecordingId, setTransferAudioRecordingId] = useState("");
|
||||
|
||||
// HTTP API form state - custom message type
|
||||
const [customMessageType, setCustomMessageType] = useState<'text' | 'audio'>('text');
|
||||
const [customMessageRecordingId, setCustomMessageRecordingId] = useState("");
|
||||
|
||||
// Org-level recordings for audio dropdowns
|
||||
const [recordings, setRecordings] = useState<RecordingResponseSchema[]>([]);
|
||||
|
||||
// Redirect if not authenticated
|
||||
useEffect(() => {
|
||||
|
|
@ -132,11 +142,14 @@ export default function ToolDetailPage() {
|
|||
if (config) {
|
||||
setEndCallMessageType(config.messageType || "none");
|
||||
setCustomMessage(config.customMessage || "");
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
setAudioRecordingId((config as any).audioRecordingId || "");
|
||||
setEndCallReason(config.endCallReason ?? false);
|
||||
setEndCallReasonDescription(config.endCallReasonDescription || "");
|
||||
} else {
|
||||
setEndCallMessageType("none");
|
||||
setCustomMessage("");
|
||||
setAudioRecordingId("");
|
||||
setEndCallReason(false);
|
||||
setEndCallReasonDescription("");
|
||||
}
|
||||
|
|
@ -147,11 +160,14 @@ export default function ToolDetailPage() {
|
|||
setTransferDestination(config.destination || "");
|
||||
setTransferMessageType(config.messageType || "none");
|
||||
setCustomMessage(config.customMessage || "");
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
setTransferAudioRecordingId((config as any).audioRecordingId || "");
|
||||
setTransferTimeout(config.timeout ?? 30);
|
||||
} else {
|
||||
setTransferDestination("");
|
||||
setTransferMessageType("none");
|
||||
setCustomMessage("");
|
||||
setTransferAudioRecordingId("");
|
||||
setTransferTimeout(30);
|
||||
}
|
||||
} else {
|
||||
|
|
@ -163,6 +179,10 @@ export default function ToolDetailPage() {
|
|||
setCredentialUuid(config.credential_uuid || "");
|
||||
setTimeoutMs(config.timeout_ms || 5000);
|
||||
setCustomMessage(config.customMessage || "");
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
setCustomMessageType((config as any).customMessageType || "text");
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
setCustomMessageRecordingId((config as any).customMessageRecordingId || "");
|
||||
|
||||
// Convert headers object to array
|
||||
if (config.headers) {
|
||||
|
|
@ -193,9 +213,24 @@ export default function ToolDetailPage() {
|
|||
}
|
||||
};
|
||||
|
||||
const fetchRecordings = useCallback(async () => {
|
||||
if (loading || !user) return;
|
||||
try {
|
||||
const response = await listRecordingsApiV1WorkflowRecordingsGet({
|
||||
query: {},
|
||||
});
|
||||
if (response.data) {
|
||||
setRecordings(response.data.recordings);
|
||||
}
|
||||
} catch {
|
||||
// Non-critical — dropdowns will show "No recordings available"
|
||||
}
|
||||
}, [loading, user]);
|
||||
|
||||
useEffect(() => {
|
||||
fetchTool();
|
||||
}, [fetchTool]);
|
||||
fetchRecordings();
|
||||
}, [fetchTool, fetchRecordings]);
|
||||
|
||||
const handleSave = async () => {
|
||||
if (!tool) return;
|
||||
|
|
@ -259,6 +294,7 @@ export default function ToolDetailPage() {
|
|||
config: {
|
||||
messageType: endCallMessageType,
|
||||
customMessage: endCallMessageType === "custom" ? customMessage : undefined,
|
||||
audioRecordingId: endCallMessageType === "audio" ? audioRecordingId || undefined : undefined,
|
||||
endCallReason,
|
||||
endCallReasonDescription: endCallReason ? endCallReasonDescription || undefined : undefined,
|
||||
},
|
||||
|
|
@ -276,6 +312,7 @@ export default function ToolDetailPage() {
|
|||
destination: transferDestination,
|
||||
messageType: transferMessageType,
|
||||
customMessage: transferMessageType === "custom" ? customMessage : undefined,
|
||||
audioRecordingId: transferMessageType === "audio" ? transferAudioRecordingId || undefined : undefined,
|
||||
timeout: transferTimeout,
|
||||
},
|
||||
},
|
||||
|
|
@ -306,7 +343,9 @@ export default function ToolDetailPage() {
|
|||
parameters:
|
||||
validParameters.length > 0 ? validParameters : undefined,
|
||||
timeout_ms: timeoutMs,
|
||||
customMessage: customMessage || undefined,
|
||||
customMessage: customMessageType === 'text' ? (customMessage || undefined) : undefined,
|
||||
customMessageType,
|
||||
customMessageRecordingId: customMessageType === 'audio' ? (customMessageRecordingId || undefined) : undefined,
|
||||
},
|
||||
},
|
||||
};
|
||||
|
|
@ -490,6 +529,9 @@ const data = await response.json();`;
|
|||
onMessageTypeChange={setEndCallMessageType}
|
||||
customMessage={customMessage}
|
||||
onCustomMessageChange={setCustomMessage}
|
||||
audioRecordingId={audioRecordingId}
|
||||
onAudioRecordingIdChange={setAudioRecordingId}
|
||||
recordings={recordings}
|
||||
endCallReason={endCallReason}
|
||||
onEndCallReasonChange={handleEndCallReasonChange}
|
||||
endCallReasonDescription={endCallReasonDescription}
|
||||
|
|
@ -507,6 +549,9 @@ const data = await response.json();`;
|
|||
onMessageTypeChange={setTransferMessageType}
|
||||
customMessage={customMessage}
|
||||
onCustomMessageChange={setCustomMessage}
|
||||
audioRecordingId={transferAudioRecordingId}
|
||||
onAudioRecordingIdChange={setTransferAudioRecordingId}
|
||||
recordings={recordings}
|
||||
timeout={transferTimeout}
|
||||
onTimeoutChange={setTransferTimeout}
|
||||
/>
|
||||
|
|
@ -530,6 +575,11 @@ const data = await response.json();`;
|
|||
onTimeoutMsChange={setTimeoutMs}
|
||||
customMessage={customMessage}
|
||||
onCustomMessageChange={setCustomMessage}
|
||||
customMessageType={customMessageType}
|
||||
onCustomMessageTypeChange={setCustomMessageType}
|
||||
customMessageRecordingId={customMessageRecordingId}
|
||||
onCustomMessageRecordingIdChange={setCustomMessageRecordingId}
|
||||
recordings={recordings}
|
||||
/>
|
||||
)}
|
||||
|
||||
|
|
|
|||
|
|
@ -14,7 +14,7 @@ import type {
|
|||
|
||||
export type ToolCategory = "http_api" | "end_call" | "transfer_call" | "calculator" | "native" | "integration";
|
||||
|
||||
export type EndCallMessageType = "none" | "custom";
|
||||
export type EndCallMessageType = "none" | "custom" | "audio";
|
||||
|
||||
export interface ToolCategoryConfig {
|
||||
value: ToolCategory;
|
||||
|
|
|
|||
|
|
@ -15,7 +15,6 @@ import type { DocumentResponseSchema, RecordingResponseSchema, ToolResponse } fr
|
|||
import { FlowEdge, FlowNode, NodeType } from "@/components/flow/types";
|
||||
import { Button } from '@/components/ui/button';
|
||||
import { Tooltip, TooltipContent, TooltipProvider, TooltipTrigger } from '@/components/ui/tooltip';
|
||||
import { useUserConfig } from '@/context/UserConfigContext';
|
||||
import { WorkflowConfigurations } from '@/types/workflow-configurations';
|
||||
|
||||
import AddNodePanel from "../../../components/flow/AddNodePanel";
|
||||
|
|
@ -64,12 +63,6 @@ interface RenderWorkflowProps {
|
|||
|
||||
function RenderWorkflow({ initialWorkflowName, workflowId, initialFlow, initialTemplateContextVariables, initialWorkflowConfigurations, initialVersionNumber, initialVersionStatus, user }: RenderWorkflowProps) {
|
||||
const router = useRouter();
|
||||
const { userConfig } = useUserConfig();
|
||||
const ttsOverrides = initialWorkflowConfigurations?.model_overrides?.tts;
|
||||
const ttsProvider = ttsOverrides?.provider ?? (userConfig?.tts?.provider as string) ?? "";
|
||||
const ttsModel = ttsOverrides?.model ?? (userConfig?.tts?.model as string) ?? "";
|
||||
const ttsVoiceId = ttsOverrides?.voice ?? (userConfig?.tts?.voice as string) ?? "";
|
||||
|
||||
const [isPhoneCallDialogOpen, setIsPhoneCallDialogOpen] = useState(false);
|
||||
const [isVersionPanelOpen, setIsVersionPanelOpen] = useState(false);
|
||||
const [versions, setVersions] = useState<WorkflowVersion[]>([]);
|
||||
|
|
@ -245,15 +238,10 @@ function RenderWorkflow({ initialWorkflowName, workflowId, initialFlow, initialT
|
|||
setTools(toolsResponse.data);
|
||||
}
|
||||
|
||||
// Fetch recordings for this workflow filtered by active TTS config
|
||||
// Fetch org-level recordings
|
||||
try {
|
||||
const recordingsResponse = await listRecordingsApiV1WorkflowRecordingsGet({
|
||||
query: {
|
||||
workflow_id: workflowId,
|
||||
tts_provider: ttsProvider || undefined,
|
||||
tts_model: ttsModel || undefined,
|
||||
tts_voice_id: ttsVoiceId || undefined,
|
||||
},
|
||||
query: {},
|
||||
});
|
||||
if (recordingsResponse.data) {
|
||||
setRecordings(recordingsResponse.data.recordings);
|
||||
|
|
@ -267,7 +255,7 @@ function RenderWorkflow({ initialWorkflowName, workflowId, initialFlow, initialT
|
|||
};
|
||||
|
||||
fetchData();
|
||||
}, [workflowId, ttsProvider, ttsModel, ttsVoiceId]);
|
||||
}, [workflowId]);
|
||||
|
||||
// Memoize defaultEdgeOptions to prevent unnecessary re-renders
|
||||
const defaultEdgeOptions = useMemo(() => ({
|
||||
|
|
|
|||
|
|
@ -62,7 +62,6 @@ let pendingFileCounter = 0;
|
|||
export const RecordingsDialog = ({
|
||||
open,
|
||||
onOpenChange,
|
||||
workflowId,
|
||||
onRecordingsChange,
|
||||
ttsOverrides,
|
||||
}: RecordingsDialogProps) => {
|
||||
|
|
@ -89,12 +88,10 @@ export const RecordingsDialog = ({
|
|||
const ttsVoiceId = ttsOverrides?.voice ?? (userConfig?.tts?.voice as string) ?? "";
|
||||
|
||||
const fetchRecordings = useCallback(async () => {
|
||||
if (!workflowId) return;
|
||||
setLoading(true);
|
||||
try {
|
||||
const result = await listRecordingsApiV1WorkflowRecordingsGet({
|
||||
query: {
|
||||
workflow_id: workflowId,
|
||||
tts_provider: ttsProvider || undefined,
|
||||
tts_model: ttsModel || undefined,
|
||||
tts_voice_id: ttsVoiceId || undefined,
|
||||
|
|
@ -108,7 +105,7 @@ export const RecordingsDialog = ({
|
|||
} finally {
|
||||
setLoading(false);
|
||||
}
|
||||
}, [workflowId, ttsProvider, ttsModel, ttsVoiceId, onRecordingsChange]);
|
||||
}, [ttsProvider, ttsModel, ttsVoiceId, onRecordingsChange]);
|
||||
|
||||
const stopRecordingTimer = useCallback(() => {
|
||||
if (recordingTimerRef.current) {
|
||||
|
|
@ -277,7 +274,6 @@ export const RecordingsDialog = ({
|
|||
const uploadUrlResponse =
|
||||
await getUploadUrlsApiV1WorkflowRecordingsUploadUrlPost({
|
||||
body: {
|
||||
workflow_id: workflowId,
|
||||
files: ready.map((p) => ({
|
||||
filename: p.file.name,
|
||||
mime_type: p.file.type || "audio/wav",
|
||||
|
|
@ -314,7 +310,6 @@ export const RecordingsDialog = ({
|
|||
body: {
|
||||
recordings: items.map((item: RecordingUploadResponseSchema, idx: number) => ({
|
||||
recording_id: item.recording_id,
|
||||
workflow_id: workflowId,
|
||||
tts_provider: ttsProvider,
|
||||
tts_model: ttsModel,
|
||||
tts_voice_id: ttsVoiceId,
|
||||
|
|
|
|||
|
|
@ -446,6 +446,17 @@ export const useWebSocketRTC = ({ workflowId, workflowRunId, accessToken, initia
|
|||
if (!firstBotSpeechCompletedRef.current) {
|
||||
firstBotSpeechCompletedRef.current = true;
|
||||
}
|
||||
// Finalize the last bot message so "speaking..." indicator is removed
|
||||
setFeedbackMessages(prev => {
|
||||
const lastIdx = prev.length - 1;
|
||||
const last = prev[lastIdx];
|
||||
if (last && last.type === 'bot-text' && !last.final) {
|
||||
const updated = [...prev];
|
||||
updated[lastIdx] = { ...last, final: true };
|
||||
return updated;
|
||||
}
|
||||
return prev;
|
||||
});
|
||||
break;
|
||||
|
||||
case 'rtf-user-mute-started':
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
"use client";
|
||||
|
||||
import { ArrowLeft, BookA, Brain, ExternalLink, Loader2, Mic, Pause, PhoneOff, Play, Rocket, Settings, Trash2Icon, Upload, Variable, X } from "lucide-react";
|
||||
import Link from "next/link";
|
||||
import { useParams, useRouter } from "next/navigation";
|
||||
import { useEffect, useMemo, useRef, useState } from "react";
|
||||
|
||||
|
|
@ -19,6 +20,7 @@ import { Separator } from "@/components/ui/separator";
|
|||
import { Switch } from "@/components/ui/switch";
|
||||
import { Textarea } from "@/components/ui/textarea";
|
||||
import { SETTINGS_DOCUMENTATION_URLS } from "@/constants/documentation";
|
||||
import { UnsavedChangesProvider, useUnsavedChanges, useUnsavedChangesContext } from "@/context/UnsavedChangesContext";
|
||||
import { useAudioPlayback } from "@/hooks/useAudioPlayback";
|
||||
import { useAuth } from "@/lib/auth";
|
||||
import logger from "@/lib/logger";
|
||||
|
|
@ -32,7 +34,6 @@ import {
|
|||
} from "@/types/workflow-configurations";
|
||||
|
||||
import { EmbedDialog } from "../components/EmbedDialog";
|
||||
import { RecordingsDialog } from "../components/RecordingsDialog";
|
||||
import { useWorkflowState } from "../hooks/useWorkflowState";
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
|
|
@ -113,6 +114,21 @@ function GeneralSection({
|
|||
const ambientFileInputRef = useRef<HTMLInputElement>(null);
|
||||
const { playingId, toggle: togglePlayback } = useAudioPlayback();
|
||||
|
||||
const isDirty = useMemo(() => {
|
||||
const initAmbient = workflowConfigurations.ambient_noise_configuration || DEFAULT_AMBIENT_NOISE_CONFIG;
|
||||
return (
|
||||
name !== workflowName ||
|
||||
JSON.stringify(ambientNoiseConfig) !== JSON.stringify(initAmbient) ||
|
||||
maxCallDuration !== (workflowConfigurations.max_call_duration || 600) ||
|
||||
maxUserIdleTimeout !== (workflowConfigurations.max_user_idle_timeout || 10) ||
|
||||
smartTurnStopSecs !== (workflowConfigurations.smart_turn_stop_secs || 2) ||
|
||||
turnStopStrategy !== (workflowConfigurations.turn_stop_strategy || "transcription") ||
|
||||
contextCompactionEnabled !== (workflowConfigurations.context_compaction_enabled ?? false)
|
||||
);
|
||||
}, [name, workflowName, ambientNoiseConfig, maxCallDuration, maxUserIdleTimeout, smartTurnStopSecs, turnStopStrategy, contextCompactionEnabled, workflowConfigurations]);
|
||||
|
||||
useUnsavedChanges("general", isDirty);
|
||||
|
||||
const handleAmbientFileUpload = async (file: File) => {
|
||||
if (file.size > MAX_AMBIENT_NOISE_FILE_SIZE) {
|
||||
setAudioUploadError(`File too large (${(file.size / (1024 * 1024)).toFixed(1)}MB). Maximum is 10MB.`);
|
||||
|
|
@ -463,8 +479,9 @@ function GeneralSection({
|
|||
</div>
|
||||
</div>
|
||||
</CardContent>
|
||||
<CardFooter className="justify-end border-t pt-6">
|
||||
<Button onClick={handleSave} disabled={isSaving}>
|
||||
<CardFooter className="justify-end gap-3 border-t pt-6">
|
||||
{isDirty && <span className="text-xs text-muted-foreground">Unsaved changes</span>}
|
||||
<Button onClick={handleSave} disabled={isSaving || !isDirty}>
|
||||
{isSaving ? "Saving..." : "Save General Settings"}
|
||||
</Button>
|
||||
</CardFooter>
|
||||
|
|
@ -488,6 +505,13 @@ function TemplateVariablesSection({
|
|||
const [newValue, setNewValue] = useState("");
|
||||
const [isSaving, setIsSaving] = useState(false);
|
||||
|
||||
const isDirty = useMemo(() => {
|
||||
const pendingVars = newKey && newValue ? { ...contextVars, [newKey]: newValue } : contextVars;
|
||||
return JSON.stringify(pendingVars) !== JSON.stringify(templateContextVariables);
|
||||
}, [contextVars, newKey, newValue, templateContextVariables]);
|
||||
|
||||
useUnsavedChanges("variables", isDirty);
|
||||
|
||||
const handleAdd = () => {
|
||||
if (newKey && newValue) {
|
||||
setContextVars((prev) => ({ ...prev, [newKey]: newValue }));
|
||||
|
|
@ -578,8 +602,9 @@ function TemplateVariablesSection({
|
|||
</Button>
|
||||
</div>
|
||||
</CardContent>
|
||||
<CardFooter className="justify-end border-t pt-6">
|
||||
<Button onClick={handleSave} disabled={isSaving}>
|
||||
<CardFooter className="justify-end gap-3 border-t pt-6">
|
||||
{isDirty && <span className="text-xs text-muted-foreground">Unsaved changes</span>}
|
||||
<Button onClick={handleSave} disabled={isSaving || !isDirty}>
|
||||
{isSaving ? "Saving..." : "Save Variables"}
|
||||
</Button>
|
||||
</CardFooter>
|
||||
|
|
@ -601,6 +626,10 @@ function DictionarySection({
|
|||
const [dictionaryValue, setDictionaryValue] = useState(dictionary);
|
||||
const [isSaving, setIsSaving] = useState(false);
|
||||
|
||||
const isDirty = dictionaryValue !== dictionary;
|
||||
|
||||
useUnsavedChanges("dictionary", isDirty);
|
||||
|
||||
const handleSave = async () => {
|
||||
setIsSaving(true);
|
||||
try {
|
||||
|
|
@ -633,8 +662,9 @@ function DictionarySection({
|
|||
className="resize-none"
|
||||
/>
|
||||
</CardContent>
|
||||
<CardFooter className="justify-end border-t pt-6">
|
||||
<Button onClick={handleSave} disabled={isSaving}>
|
||||
<CardFooter className="justify-end gap-3 border-t pt-6">
|
||||
{isDirty && <span className="text-xs text-muted-foreground">Unsaved changes</span>}
|
||||
<Button onClick={handleSave} disabled={isSaving || !isDirty}>
|
||||
{isSaving ? "Saving..." : "Save Dictionary"}
|
||||
</Button>
|
||||
</CardFooter>
|
||||
|
|
@ -669,6 +699,24 @@ function VoicemailSection({
|
|||
const [longSpeechTimeout, setLongSpeechTimeout] = useState(getConfig().long_speech_timeout);
|
||||
const [isSaving, setIsSaving] = useState(false);
|
||||
|
||||
const isDirty = useMemo(() => {
|
||||
const init = {
|
||||
...DEFAULT_VOICEMAIL_DETECTION_CONFIGURATION,
|
||||
...workflowConfigurations.voicemail_detection,
|
||||
};
|
||||
return (
|
||||
enabled !== init.enabled ||
|
||||
useWorkflowLlm !== init.use_workflow_llm ||
|
||||
provider !== (init.provider || "openai") ||
|
||||
model !== (init.model || "gpt-4.1") ||
|
||||
apiKey !== (init.api_key || "") ||
|
||||
systemPrompt !== (init.system_prompt || DEFAULT_VOICEMAIL_SYSTEM_PROMPT) ||
|
||||
longSpeechTimeout !== init.long_speech_timeout
|
||||
);
|
||||
}, [enabled, useWorkflowLlm, provider, model, apiKey, systemPrompt, longSpeechTimeout, workflowConfigurations]);
|
||||
|
||||
useUnsavedChanges("voicemail", isDirty);
|
||||
|
||||
const handleSave = async () => {
|
||||
setIsSaving(true);
|
||||
try {
|
||||
|
|
@ -772,8 +820,9 @@ function VoicemailSection({
|
|||
</>
|
||||
)}
|
||||
</CardContent>
|
||||
<CardFooter className="justify-end border-t pt-6">
|
||||
<Button onClick={handleSave} disabled={isSaving}>
|
||||
<CardFooter className="justify-end gap-3 border-t pt-6">
|
||||
{isDirty && <span className="text-xs text-muted-foreground">Unsaved changes</span>}
|
||||
<Button onClick={handleSave} disabled={isSaving || !isDirty}>
|
||||
{isSaving ? "Saving..." : "Save Voicemail Settings"}
|
||||
</Button>
|
||||
</CardFooter>
|
||||
|
|
@ -849,9 +898,23 @@ function WorkflowSettingsContent({
|
|||
workflow: WorkflowResponse;
|
||||
user: { id: string; email?: string };
|
||||
}) {
|
||||
const router = useRouter();
|
||||
return (
|
||||
<UnsavedChangesProvider>
|
||||
<WorkflowSettingsInner workflow={workflow} user={user} />
|
||||
</UnsavedChangesProvider>
|
||||
);
|
||||
}
|
||||
|
||||
function WorkflowSettingsInner({
|
||||
workflow,
|
||||
user,
|
||||
}: {
|
||||
workflow: WorkflowResponse;
|
||||
user: { id: string; email?: string };
|
||||
}) {
|
||||
const router = useRouter();
|
||||
const { dirtySections, confirmNavigate } = useUnsavedChangesContext();
|
||||
|
||||
const [isRecordingsDialogOpen, setIsRecordingsDialogOpen] = useState(false);
|
||||
const [isEmbedDialogOpen, setIsEmbedDialogOpen] = useState(false);
|
||||
const [activeSection, setActiveSection] = useState("general");
|
||||
|
||||
|
|
@ -921,7 +984,7 @@ function WorkflowSettingsContent({
|
|||
<Button
|
||||
variant="ghost"
|
||||
size="icon"
|
||||
onClick={() => router.push(`/workflow/${workflowId}`)}
|
||||
onClick={() => confirmNavigate(() => router.push(`/workflow/${workflowId}`))}
|
||||
>
|
||||
<ArrowLeft className="h-4 w-4" />
|
||||
</Button>
|
||||
|
|
@ -993,7 +1056,7 @@ function WorkflowSettingsContent({
|
|||
onSave={saveWorkflowConfigurations}
|
||||
/>
|
||||
|
||||
{/* Recordings (dialog trigger) */}
|
||||
{/* Recordings – moved to org-level page */}
|
||||
<Card id="recordings">
|
||||
<CardHeader>
|
||||
<CardTitle className="flex items-center gap-2 text-base">
|
||||
|
|
@ -1001,15 +1064,17 @@ function WorkflowSettingsContent({
|
|||
Recordings
|
||||
</CardTitle>
|
||||
<CardDescription>
|
||||
Upload or record audio for hybrid prompts. Use{" "}
|
||||
<code className="rounded bg-muted px-1 text-xs">@</code> in prompt fields to
|
||||
insert them.{" "}
|
||||
Recordings are now managed at the organization level and shared across all agents.
|
||||
Use <code className="rounded bg-muted px-1 text-xs">@</code> in prompt fields to insert them.{" "}
|
||||
<a href={SETTINGS_DOCUMENTATION_URLS.recordings} target="_blank" rel="noopener noreferrer" className="inline-flex items-center gap-0.5 underline">Learn more <ExternalLink className="h-3 w-3" /></a>
|
||||
</CardDescription>
|
||||
</CardHeader>
|
||||
<CardFooter className="border-t pt-6">
|
||||
<Button variant="outline" onClick={() => setIsRecordingsDialogOpen(true)}>
|
||||
Manage Recordings
|
||||
<Button variant="outline" asChild>
|
||||
<Link href="/recordings">
|
||||
Go to Recordings
|
||||
<ExternalLink className="ml-2 h-4 w-4" />
|
||||
</Link>
|
||||
</Button>
|
||||
</CardFooter>
|
||||
</Card>
|
||||
|
|
@ -1047,13 +1112,16 @@ function WorkflowSettingsContent({
|
|||
<a
|
||||
key={item.id}
|
||||
href={`#${item.id}`}
|
||||
className={`block rounded-md px-2 py-1 text-sm transition-colors hover:text-foreground ${
|
||||
className={`flex items-center gap-1.5 rounded-md px-2 py-1 text-sm transition-colors hover:text-foreground ${
|
||||
activeSection === item.id
|
||||
? "font-medium text-foreground"
|
||||
: "text-muted-foreground"
|
||||
}`}
|
||||
>
|
||||
{item.label}
|
||||
{dirtySections.has(item.id) && (
|
||||
<span className="h-1.5 w-1.5 rounded-full bg-orange-500" />
|
||||
)}
|
||||
</a>
|
||||
))}
|
||||
</div>
|
||||
|
|
@ -1061,12 +1129,6 @@ function WorkflowSettingsContent({
|
|||
</div>
|
||||
|
||||
{/* Dialogs for complex sections */}
|
||||
<RecordingsDialog
|
||||
open={isRecordingsDialogOpen}
|
||||
onOpenChange={setIsRecordingsDialogOpen}
|
||||
workflowId={workflowId}
|
||||
ttsOverrides={workflowConfigurations?.model_overrides?.tts}
|
||||
/>
|
||||
<EmbedDialog
|
||||
open={isEmbedDialogOpen}
|
||||
onOpenChange={setIsEmbedDialogOpen}
|
||||
|
|
|
|||
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
|
|
@ -269,12 +269,6 @@ export type BatchRecordingCreateResponseSchema = {
|
|||
* Request schema for getting presigned upload URLs for one or more files.
|
||||
*/
|
||||
export type BatchRecordingUploadRequestSchema = {
|
||||
/**
|
||||
* Workflow Id
|
||||
*
|
||||
* Workflow ID these recordings belong to
|
||||
*/
|
||||
workflow_id: number;
|
||||
/**
|
||||
* Files
|
||||
*
|
||||
|
|
@ -1572,13 +1566,19 @@ export type EndCallConfig = {
|
|||
*
|
||||
* Type of goodbye message
|
||||
*/
|
||||
messageType?: 'none' | 'custom';
|
||||
messageType?: 'none' | 'custom' | 'audio';
|
||||
/**
|
||||
* Custommessage
|
||||
*
|
||||
* Custom message to play before ending the call
|
||||
*/
|
||||
customMessage?: string | null;
|
||||
/**
|
||||
* Audiorecordingid
|
||||
*
|
||||
* Recording ID for audio goodbye message
|
||||
*/
|
||||
audioRecordingId?: string | null;
|
||||
/**
|
||||
* Endcallreason
|
||||
*
|
||||
|
|
@ -1739,6 +1739,24 @@ export type HttpApiConfig = {
|
|||
* Request timeout in milliseconds
|
||||
*/
|
||||
timeout_ms?: number | null;
|
||||
/**
|
||||
* Custommessage
|
||||
*
|
||||
* Custom message to play after tool execution
|
||||
*/
|
||||
customMessage?: string | null;
|
||||
/**
|
||||
* Custommessagetype
|
||||
*
|
||||
* Type of custom message: text or audio
|
||||
*/
|
||||
customMessageType?: 'text' | 'audio' | null;
|
||||
/**
|
||||
* Custommessagerecordingid
|
||||
*
|
||||
* Recording ID for audio custom message
|
||||
*/
|
||||
customMessageRecordingId?: string | null;
|
||||
};
|
||||
|
||||
/**
|
||||
|
|
@ -2102,30 +2120,24 @@ export type RecordingCreateRequestSchema = {
|
|||
* Short recording ID from upload step
|
||||
*/
|
||||
recording_id: string;
|
||||
/**
|
||||
* Workflow Id
|
||||
*
|
||||
* Workflow ID
|
||||
*/
|
||||
workflow_id: number;
|
||||
/**
|
||||
* Tts Provider
|
||||
*
|
||||
* TTS provider (e.g. elevenlabs)
|
||||
*/
|
||||
tts_provider: string;
|
||||
tts_provider?: string | null;
|
||||
/**
|
||||
* Tts Model
|
||||
*
|
||||
* TTS model name
|
||||
*/
|
||||
tts_model: string;
|
||||
tts_model?: string | null;
|
||||
/**
|
||||
* Tts Voice Id
|
||||
*
|
||||
* TTS voice identifier
|
||||
*/
|
||||
tts_voice_id: string;
|
||||
tts_voice_id?: string | null;
|
||||
/**
|
||||
* Transcript
|
||||
*
|
||||
|
|
@ -2181,7 +2193,7 @@ export type RecordingResponseSchema = {
|
|||
/**
|
||||
* Workflow Id
|
||||
*/
|
||||
workflow_id: number;
|
||||
workflow_id?: number | null;
|
||||
/**
|
||||
* Organization Id
|
||||
*/
|
||||
|
|
@ -2189,15 +2201,15 @@ export type RecordingResponseSchema = {
|
|||
/**
|
||||
* Tts Provider
|
||||
*/
|
||||
tts_provider: string;
|
||||
tts_provider?: string | null;
|
||||
/**
|
||||
* Tts Model
|
||||
*/
|
||||
tts_model: string;
|
||||
tts_model?: string | null;
|
||||
/**
|
||||
* Tts Voice Id
|
||||
*/
|
||||
tts_voice_id: string;
|
||||
tts_voice_id?: string | null;
|
||||
/**
|
||||
* Transcript
|
||||
*/
|
||||
|
|
@ -2230,6 +2242,20 @@ export type RecordingResponseSchema = {
|
|||
is_active: boolean;
|
||||
};
|
||||
|
||||
/**
|
||||
* RecordingUpdateRequestSchema
|
||||
*
|
||||
* Request schema for updating a recording's ID.
|
||||
*/
|
||||
export type RecordingUpdateRequestSchema = {
|
||||
/**
|
||||
* Recording Id
|
||||
*
|
||||
* New descriptive recording ID (letters, numbers, hyphens, underscores only)
|
||||
*/
|
||||
recording_id: string;
|
||||
};
|
||||
|
||||
/**
|
||||
* RecordingUploadResponseSchema
|
||||
*
|
||||
|
|
@ -2814,13 +2840,19 @@ export type TransferCallConfig = {
|
|||
*
|
||||
* Type of message to play before transfer
|
||||
*/
|
||||
messageType?: 'none' | 'custom';
|
||||
messageType?: 'none' | 'custom' | 'audio';
|
||||
/**
|
||||
* Custommessage
|
||||
*
|
||||
* Custom message to play before transferring the call
|
||||
*/
|
||||
customMessage?: string | null;
|
||||
/**
|
||||
* Audiorecordingid
|
||||
*
|
||||
* Recording ID for audio message before transfer
|
||||
*/
|
||||
audioRecordingId?: string | null;
|
||||
/**
|
||||
* Timeout
|
||||
*
|
||||
|
|
@ -8885,13 +8917,13 @@ export type ListRecordingsApiV1WorkflowRecordingsGetData = {
|
|||
'X-API-Key'?: string | null;
|
||||
};
|
||||
path?: never;
|
||||
query: {
|
||||
query?: {
|
||||
/**
|
||||
* Workflow Id
|
||||
*
|
||||
* Workflow ID
|
||||
* Filter by workflow ID
|
||||
*/
|
||||
workflow_id: number;
|
||||
workflow_id?: number | null;
|
||||
/**
|
||||
* Tts Provider
|
||||
*
|
||||
|
|
@ -9017,6 +9049,50 @@ export type DeleteRecordingApiV1WorkflowRecordingsRecordingIdDeleteResponses = {
|
|||
200: unknown;
|
||||
};
|
||||
|
||||
export type UpdateRecordingApiV1WorkflowRecordingsIdPatchData = {
|
||||
body: RecordingUpdateRequestSchema;
|
||||
headers?: {
|
||||
/**
|
||||
* Authorization
|
||||
*/
|
||||
authorization?: string | null;
|
||||
/**
|
||||
* X-Api-Key
|
||||
*/
|
||||
'X-API-Key'?: string | null;
|
||||
};
|
||||
path: {
|
||||
/**
|
||||
* Id
|
||||
*/
|
||||
id: number;
|
||||
};
|
||||
query?: never;
|
||||
url: '/api/v1/workflow-recordings/{id}';
|
||||
};
|
||||
|
||||
export type UpdateRecordingApiV1WorkflowRecordingsIdPatchErrors = {
|
||||
/**
|
||||
* Not found
|
||||
*/
|
||||
404: unknown;
|
||||
/**
|
||||
* Validation Error
|
||||
*/
|
||||
422: HttpValidationError;
|
||||
};
|
||||
|
||||
export type UpdateRecordingApiV1WorkflowRecordingsIdPatchError = UpdateRecordingApiV1WorkflowRecordingsIdPatchErrors[keyof UpdateRecordingApiV1WorkflowRecordingsIdPatchErrors];
|
||||
|
||||
export type UpdateRecordingApiV1WorkflowRecordingsIdPatchResponses = {
|
||||
/**
|
||||
* Successful Response
|
||||
*/
|
||||
200: RecordingResponseSchema;
|
||||
};
|
||||
|
||||
export type UpdateRecordingApiV1WorkflowRecordingsIdPatchResponse = UpdateRecordingApiV1WorkflowRecordingsIdPatchResponses[keyof UpdateRecordingApiV1WorkflowRecordingsIdPatchResponses];
|
||||
|
||||
export type TranscribeAudioApiV1WorkflowRecordingsTranscribePostData = {
|
||||
body: BodyTranscribeAudioApiV1WorkflowRecordingsTranscribePost;
|
||||
headers?: {
|
||||
|
|
|
|||
212
ui/src/components/flow/TextOrAudioInput.tsx
Normal file
212
ui/src/components/flow/TextOrAudioInput.tsx
Normal file
|
|
@ -0,0 +1,212 @@
|
|||
import { Check, ChevronDown, Pause, Play, Search } from "lucide-react";
|
||||
import { useMemo, useState } from "react";
|
||||
|
||||
import type { RecordingResponseSchema } from "@/client/types.gen";
|
||||
import { Button } from "@/components/ui/button";
|
||||
import { Input } from "@/components/ui/input";
|
||||
import { Label } from "@/components/ui/label";
|
||||
import { Popover, PopoverContentInline, PopoverTrigger } from "@/components/ui/popover";
|
||||
import { RadioGroup, RadioGroupItem } from "@/components/ui/radio-group";
|
||||
import { useAudioPlayback } from "@/hooks/useAudioPlayback";
|
||||
import { cn } from "@/lib/utils";
|
||||
|
||||
interface TextOrAudioInputProps {
|
||||
type: 'text' | 'audio';
|
||||
onTypeChange: (type: 'text' | 'audio') => void;
|
||||
recordingId: string;
|
||||
onRecordingIdChange: (id: string) => void;
|
||||
recordings?: RecordingResponseSchema[];
|
||||
/** Rendered when type === 'text' */
|
||||
children: React.ReactNode;
|
||||
}
|
||||
|
||||
export function TextOrAudioInput({
|
||||
type,
|
||||
onTypeChange,
|
||||
recordingId,
|
||||
onRecordingIdChange,
|
||||
recordings = [],
|
||||
children,
|
||||
}: TextOrAudioInputProps) {
|
||||
return (
|
||||
<>
|
||||
<RadioGroup
|
||||
value={type}
|
||||
onValueChange={(value) => onTypeChange(value as 'text' | 'audio')}
|
||||
className="flex items-center gap-4"
|
||||
>
|
||||
<div className="flex items-center gap-2">
|
||||
<RadioGroupItem value="text" id="toa-text" />
|
||||
<Label htmlFor="toa-text" className="font-normal cursor-pointer">Text</Label>
|
||||
</div>
|
||||
<div className="flex items-center gap-2">
|
||||
<RadioGroupItem value="audio" id="toa-audio" />
|
||||
<Label htmlFor="toa-audio" className="font-normal cursor-pointer">Audio</Label>
|
||||
</div>
|
||||
</RadioGroup>
|
||||
{type === 'text' ? (
|
||||
children
|
||||
) : (
|
||||
<RecordingSelect
|
||||
value={recordingId}
|
||||
onChange={onRecordingIdChange}
|
||||
recordings={recordings}
|
||||
/>
|
||||
)}
|
||||
</>
|
||||
);
|
||||
}
|
||||
|
||||
interface RecordingSelectProps {
|
||||
value: string;
|
||||
onChange: (id: string) => void;
|
||||
recordings: RecordingResponseSchema[];
|
||||
}
|
||||
|
||||
/**
|
||||
* Dropdown to select a pre-recorded audio file.
|
||||
* Re-exported so callers that only need the dropdown (e.g. tool configs with
|
||||
* their own none/custom/audio radio) can use it directly.
|
||||
*/
|
||||
export function RecordingSelect({ value, onChange, recordings }: RecordingSelectProps) {
|
||||
const [open, setOpen] = useState(false);
|
||||
const [search, setSearch] = useState("");
|
||||
const { playingId, toggle, stop } = useAudioPlayback();
|
||||
|
||||
const selected = recordings.find((r) => String(r.id) === value);
|
||||
|
||||
const filtered = useMemo(() => {
|
||||
if (!search) return recordings;
|
||||
const q = search.toLowerCase();
|
||||
return recordings.filter((r) =>
|
||||
r.recording_id.toLowerCase().includes(q) ||
|
||||
r.transcript.toLowerCase().includes(q) ||
|
||||
((r.metadata?.original_filename as string) || "").toLowerCase().includes(q)
|
||||
);
|
||||
}, [recordings, search]);
|
||||
|
||||
const handleSelect = (rec: RecordingResponseSchema) => {
|
||||
stop();
|
||||
onChange(String(rec.id));
|
||||
setOpen(false);
|
||||
};
|
||||
|
||||
const handlePlay = async (e: React.MouseEvent, rec: RecordingResponseSchema) => {
|
||||
e.stopPropagation();
|
||||
try {
|
||||
await toggle(rec.recording_id, rec.storage_key, rec.storage_backend);
|
||||
} catch {
|
||||
// Ignore playback errors
|
||||
}
|
||||
};
|
||||
|
||||
return (
|
||||
<div className="space-y-2">
|
||||
<Label className="text-xs text-muted-foreground">
|
||||
Select a pre-recorded audio file to play.
|
||||
</Label>
|
||||
<Popover modal open={open} onOpenChange={(v) => { if (!v) { stop(); setSearch(""); } setOpen(v); }}>
|
||||
<PopoverTrigger asChild>
|
||||
<Button
|
||||
variant="outline"
|
||||
role="combobox"
|
||||
aria-expanded={open}
|
||||
className="w-full justify-between h-auto min-h-9 font-normal"
|
||||
>
|
||||
{selected ? (
|
||||
<span className="flex items-center gap-2 text-left">
|
||||
<code className="text-xs bg-muted px-1 py-0.5 rounded font-mono shrink-0">
|
||||
{selected.recording_id}
|
||||
</code>
|
||||
<span className="text-sm">
|
||||
{selected.transcript.length > 75
|
||||
? `${selected.transcript.slice(0, 75)}…`
|
||||
: selected.transcript}
|
||||
</span>
|
||||
</span>
|
||||
) : (
|
||||
<span className="text-muted-foreground">Select a recording</span>
|
||||
)}
|
||||
<ChevronDown className="ml-2 h-4 w-4 shrink-0 opacity-50" />
|
||||
</Button>
|
||||
</PopoverTrigger>
|
||||
<PopoverContentInline
|
||||
className="w-[var(--radix-popover-trigger-width)] p-0"
|
||||
align="start"
|
||||
>
|
||||
{recordings.length === 0 ? (
|
||||
<div className="p-3 text-sm text-muted-foreground text-center">
|
||||
No recordings available
|
||||
</div>
|
||||
) : (
|
||||
<div>
|
||||
<div className="p-2 border-b">
|
||||
<div className="relative">
|
||||
<Search className="absolute left-2.5 top-1/2 -translate-y-1/2 h-3.5 w-3.5 text-muted-foreground" />
|
||||
<Input
|
||||
placeholder="Search by ID, transcript, or filename..."
|
||||
value={search}
|
||||
onChange={(e) => setSearch(e.target.value)}
|
||||
className="h-8 pl-8 text-sm"
|
||||
autoFocus
|
||||
/>
|
||||
</div>
|
||||
</div>
|
||||
<div className="max-h-56 overflow-y-auto">
|
||||
{filtered.length === 0 ? (
|
||||
<div className="p-3 text-sm text-muted-foreground text-center">
|
||||
No recordings match “{search}”
|
||||
</div>
|
||||
) : filtered.map((r) => {
|
||||
const filename = (r.metadata?.original_filename as string) || "";
|
||||
const isSelected = String(r.id) === value;
|
||||
const isPlaying = playingId === r.recording_id;
|
||||
|
||||
return (
|
||||
<div
|
||||
key={r.id}
|
||||
className={cn(
|
||||
"flex items-center gap-2 px-3 py-2 cursor-pointer hover:bg-accent transition-colors",
|
||||
isSelected && "bg-accent"
|
||||
)}
|
||||
onClick={() => handleSelect(r)}
|
||||
>
|
||||
<Check className={cn(
|
||||
"h-4 w-4 shrink-0",
|
||||
isSelected ? "opacity-100" : "opacity-0"
|
||||
)} />
|
||||
<code className="text-xs bg-muted px-1 py-0.5 rounded font-mono shrink-0">
|
||||
{r.recording_id}
|
||||
</code>
|
||||
{filename && (
|
||||
<span className="text-xs text-muted-foreground shrink-0 max-w-[100px] truncate">
|
||||
{filename}
|
||||
</span>
|
||||
)}
|
||||
<span className="text-xs text-muted-foreground bg-muted/50 px-1.5 py-0.5 rounded truncate flex-1 min-w-0">
|
||||
{r.transcript}
|
||||
</span>
|
||||
<Button
|
||||
type="button"
|
||||
variant="ghost"
|
||||
size="sm"
|
||||
className="h-7 w-7 p-0 shrink-0"
|
||||
onClick={(e) => handlePlay(e, r)}
|
||||
>
|
||||
{isPlaying ? (
|
||||
<Pause className="h-3.5 w-3.5" />
|
||||
) : (
|
||||
<Play className="h-3.5 w-3.5" />
|
||||
)}
|
||||
</Button>
|
||||
</div>
|
||||
);
|
||||
})}
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
</PopoverContentInline>
|
||||
</Popover>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
|
@ -4,6 +4,7 @@ import { useCallback, useEffect, useState } from 'react';
|
|||
|
||||
import { useWorkflow, useWorkflowOptional } from "@/app/workflow/[workflowId]/contexts/WorkflowContext";
|
||||
import { useWorkflowStore } from "@/app/workflow/[workflowId]/stores/workflowStore";
|
||||
import { TextOrAudioInput } from "@/components/flow/TextOrAudioInput";
|
||||
import { Button } from "@/components/ui/button";
|
||||
import { Dialog, DialogContent, DialogFooter, DialogHeader, DialogTitle } from "@/components/ui/dialog";
|
||||
import { Input } from "@/components/ui/input";
|
||||
|
|
@ -24,9 +25,12 @@ interface EdgeDetailsDialogProps {
|
|||
|
||||
const EdgeDetailsDialog = ({ open, onOpenChange, data, onSave }: EdgeDetailsDialogProps) => {
|
||||
const readOnly = useWorkflowOptional()?.readOnly ?? false;
|
||||
const { recordings } = useWorkflow();
|
||||
const [condition, setCondition] = useState(data?.condition ?? '');
|
||||
const [label, setLabel] = useState(data?.label ?? '');
|
||||
const [transitionSpeech, setTransitionSpeech] = useState(data?.transition_speech ?? '');
|
||||
const [transitionSpeechType, setTransitionSpeechType] = useState<'text' | 'audio'>(data?.transition_speech_type ?? 'text');
|
||||
const [transitionSpeechRecordingId, setTransitionSpeechRecordingId] = useState(data?.transition_speech_recording_id ?? '');
|
||||
|
||||
// Update form state when data changes (e.g., from undo/redo)
|
||||
useEffect(() => {
|
||||
|
|
@ -34,13 +38,21 @@ const EdgeDetailsDialog = ({ open, onOpenChange, data, onSave }: EdgeDetailsDial
|
|||
setCondition(data?.condition ?? '');
|
||||
setLabel(data?.label ?? '');
|
||||
setTransitionSpeech(data?.transition_speech ?? '');
|
||||
setTransitionSpeechType(data?.transition_speech_type ?? 'text');
|
||||
setTransitionSpeechRecordingId(data?.transition_speech_recording_id ?? '');
|
||||
}
|
||||
}, [data, open]);
|
||||
|
||||
const handleSave = useCallback(() => {
|
||||
onSave({ condition: condition, label: label, transition_speech: transitionSpeech || undefined });
|
||||
onSave({
|
||||
condition,
|
||||
label,
|
||||
transition_speech: transitionSpeechType === 'text' ? (transitionSpeech || undefined) : undefined,
|
||||
transition_speech_type: transitionSpeechType,
|
||||
transition_speech_recording_id: transitionSpeechType === 'audio' ? (transitionSpeechRecordingId || undefined) : undefined,
|
||||
});
|
||||
onOpenChange(false);
|
||||
}, [condition, label, transitionSpeech, onSave, onOpenChange]);
|
||||
}, [condition, label, transitionSpeech, transitionSpeechType, transitionSpeechRecordingId, onSave, onOpenChange]);
|
||||
|
||||
// Handle Cmd+S / Ctrl+S keyboard shortcut to save
|
||||
useEffect(() => {
|
||||
|
|
@ -60,7 +72,7 @@ const EdgeDetailsDialog = ({ open, onOpenChange, data, onSave }: EdgeDetailsDial
|
|||
|
||||
return (
|
||||
<Dialog open={open} onOpenChange={onOpenChange}>
|
||||
<DialogContent>
|
||||
<DialogContent className="max-h-[85vh] flex flex-col">
|
||||
<DialogHeader>
|
||||
<DialogTitle>Edit Condition</DialogTitle>
|
||||
{data?.invalid && data.validationMessage && (
|
||||
|
|
@ -70,7 +82,7 @@ const EdgeDetailsDialog = ({ open, onOpenChange, data, onSave }: EdgeDetailsDial
|
|||
</div>
|
||||
)}
|
||||
</DialogHeader>
|
||||
<div className="grid gap-4 py-4">
|
||||
<div className="grid gap-4 py-4 overflow-y-auto">
|
||||
<div className="grid gap-2">
|
||||
<Label>Condition Label</Label>
|
||||
<Label className="text-xs text-muted-foreground">
|
||||
|
|
@ -99,18 +111,28 @@ const EdgeDetailsDialog = ({ open, onOpenChange, data, onSave }: EdgeDetailsDial
|
|||
<div className="grid gap-2">
|
||||
<Label>Transition Speech</Label>
|
||||
<Label className="text-xs text-muted-foreground">
|
||||
Optional text the assistant will speak right before transitioning to the node.
|
||||
This text will not be attached in Conversation Context. Use this as simple filler to reduce latency.
|
||||
Optional text or audio the assistant will play right before transitioning to the node.
|
||||
This will not be attached in Conversation Context. Use this as simple filler to reduce latency.
|
||||
</Label>
|
||||
<div className="flex items-start gap-2 rounded-md bg-amber-50 p-2 text-xs text-amber-700 border border-amber-200">
|
||||
<AlertCircle className="h-3.5 w-3.5 mt-0.5 shrink-0" />
|
||||
<span>This text is spoken as-is. For multilingual workflows, choose your phrasing carefully.</span>
|
||||
</div>
|
||||
<Textarea
|
||||
value={transitionSpeech}
|
||||
placeholder="e.g. Let me transfer you to our billing department..."
|
||||
onChange={(e) => setTransitionSpeech(e.target.value)}
|
||||
/>
|
||||
<TextOrAudioInput
|
||||
type={transitionSpeechType}
|
||||
onTypeChange={setTransitionSpeechType}
|
||||
recordingId={transitionSpeechRecordingId}
|
||||
onRecordingIdChange={setTransitionSpeechRecordingId}
|
||||
recordings={recordings ?? []}
|
||||
>
|
||||
<>
|
||||
<div className="flex items-start gap-2 rounded-md bg-amber-50 p-2 text-xs text-amber-700 border border-amber-200">
|
||||
<AlertCircle className="h-3.5 w-3.5 mt-0.5 shrink-0" />
|
||||
<span>This text is spoken as-is. For multilingual workflows, choose your phrasing carefully.</span>
|
||||
</div>
|
||||
<Textarea
|
||||
value={transitionSpeech}
|
||||
placeholder="e.g. Let me transfer you to our billing department..."
|
||||
onChange={(e) => setTransitionSpeech(e.target.value)}
|
||||
/>
|
||||
</>
|
||||
</TextOrAudioInput>
|
||||
</div>
|
||||
</div>
|
||||
<DialogFooter>
|
||||
|
|
|
|||
|
|
@ -8,6 +8,7 @@ import type { RecordingResponseSchema } from "@/client/types.gen";
|
|||
import { DocumentBadges } from "@/components/flow/DocumentBadges";
|
||||
import { DocumentSelector } from "@/components/flow/DocumentSelector";
|
||||
import { MentionTextarea } from "@/components/flow/MentionTextarea";
|
||||
import { TextOrAudioInput } from "@/components/flow/TextOrAudioInput";
|
||||
import { ToolBadges } from "@/components/flow/ToolBadges";
|
||||
import { ToolSelector } from "@/components/flow/ToolSelector";
|
||||
import { ExtractionVariable, FlowNodeData } from "@/components/flow/types";
|
||||
|
|
@ -26,8 +27,12 @@ import { useNodeHandlers } from "./common/useNodeHandlers";
|
|||
|
||||
interface StartCallEditFormProps {
|
||||
nodeData: FlowNodeData;
|
||||
greetingType: 'text' | 'audio';
|
||||
setGreetingType: (value: 'text' | 'audio') => void;
|
||||
greeting: string;
|
||||
setGreeting: (value: string) => void;
|
||||
greetingRecordingId: string;
|
||||
setGreetingRecordingId: (value: string) => void;
|
||||
prompt: string;
|
||||
setPrompt: (value: string) => void;
|
||||
name: string;
|
||||
|
|
@ -73,7 +78,9 @@ export const StartCall = memo(({ data, selected, id }: StartCallNodeProps) => {
|
|||
const { saveWorkflow, tools, documents, recordings } = useWorkflow();
|
||||
|
||||
// Form state
|
||||
const [greetingType, setGreetingType] = useState<'text' | 'audio'>(data.greeting_type ?? "text");
|
||||
const [greeting, setGreeting] = useState(data.greeting ?? "");
|
||||
const [greetingRecordingId, setGreetingRecordingId] = useState(data.greeting_recording_id ?? "");
|
||||
const [prompt, setPrompt] = useState(data.prompt ?? "");
|
||||
const [name, setName] = useState(data.name);
|
||||
const [allowInterrupt, setAllowInterrupt] = useState(data.allow_interrupt ?? true);
|
||||
|
|
@ -109,7 +116,9 @@ export const StartCall = memo(({ data, selected, id }: StartCallNodeProps) => {
|
|||
|
||||
handleSaveNodeData({
|
||||
...data,
|
||||
greeting: greeting || undefined,
|
||||
greeting_type: greetingType,
|
||||
greeting: greetingType === 'text' ? (greeting || undefined) : undefined,
|
||||
greeting_recording_id: greetingType === 'audio' ? (greetingRecordingId || undefined) : undefined,
|
||||
prompt,
|
||||
name,
|
||||
allow_interrupt: allowInterrupt,
|
||||
|
|
@ -132,7 +141,9 @@ export const StartCall = memo(({ data, selected, id }: StartCallNodeProps) => {
|
|||
// Reset form state when dialog opens
|
||||
const handleOpenChange = (newOpen: boolean) => {
|
||||
if (newOpen) {
|
||||
setGreetingType(data.greeting_type ?? "text");
|
||||
setGreeting(data.greeting ?? "");
|
||||
setGreetingRecordingId(data.greeting_recording_id ?? "");
|
||||
setPrompt(data.prompt ?? "");
|
||||
setName(data.name);
|
||||
setAllowInterrupt(data.allow_interrupt ?? true);
|
||||
|
|
@ -154,7 +165,9 @@ export const StartCall = memo(({ data, selected, id }: StartCallNodeProps) => {
|
|||
// Update form state when data changes (e.g., from undo/redo)
|
||||
useEffect(() => {
|
||||
if (open) {
|
||||
setGreetingType(data.greeting_type ?? "text");
|
||||
setGreeting(data.greeting ?? "");
|
||||
setGreetingRecordingId(data.greeting_recording_id ?? "");
|
||||
setPrompt(data.prompt ?? "");
|
||||
setName(data.name);
|
||||
setAllowInterrupt(data.allow_interrupt ?? true);
|
||||
|
|
@ -247,8 +260,12 @@ export const StartCall = memo(({ data, selected, id }: StartCallNodeProps) => {
|
|||
{open && (
|
||||
<StartCallEditForm
|
||||
nodeData={data}
|
||||
greetingType={greetingType}
|
||||
setGreetingType={setGreetingType}
|
||||
greeting={greeting}
|
||||
setGreeting={setGreeting}
|
||||
greetingRecordingId={greetingRecordingId}
|
||||
setGreetingRecordingId={setGreetingRecordingId}
|
||||
prompt={prompt}
|
||||
setPrompt={setPrompt}
|
||||
name={name}
|
||||
|
|
@ -288,8 +305,12 @@ export const StartCall = memo(({ data, selected, id }: StartCallNodeProps) => {
|
|||
});
|
||||
|
||||
const StartCallEditForm = ({
|
||||
greetingType,
|
||||
setGreetingType,
|
||||
greeting,
|
||||
setGreeting,
|
||||
greetingRecordingId,
|
||||
setGreetingRecordingId,
|
||||
prompt,
|
||||
setPrompt,
|
||||
name,
|
||||
|
|
@ -362,15 +383,22 @@ const StartCallEditForm = ({
|
|||
|
||||
<Label>Greeting</Label>
|
||||
<Label className="text-xs text-muted-foreground">
|
||||
Optional greeting message played via TTS when the call starts. If set, this will be spoken directly instead of generating a response from the LLM. Supports template variables like {"{{variable_name}}"}.
|
||||
Optional greeting played when the call starts. Choose between a text message (spoken via TTS) or a pre-recorded audio file.
|
||||
</Label>
|
||||
<MentionTextarea
|
||||
value={greeting}
|
||||
onChange={setGreeting}
|
||||
className="min-h-[60px] max-h-[200px] resize-none overflow-y-auto"
|
||||
placeholder="e.g. Hello {{first_name}}, this is Sarah calling from Acme Corp."
|
||||
<TextOrAudioInput
|
||||
type={greetingType}
|
||||
onTypeChange={setGreetingType}
|
||||
recordingId={greetingRecordingId}
|
||||
onRecordingIdChange={setGreetingRecordingId}
|
||||
recordings={recordings}
|
||||
/>
|
||||
>
|
||||
<Textarea
|
||||
value={greeting}
|
||||
onChange={(e) => setGreeting(e.target.value)}
|
||||
className="min-h-[60px] max-h-[200px] resize-none overflow-y-auto"
|
||||
placeholder="e.g. Hello {{first_name}}, this is Sarah calling from Acme Corp."
|
||||
/>
|
||||
</TextOrAudioInput>
|
||||
|
||||
<Label>Prompt</Label>
|
||||
<Label className="text-xs text-muted-foreground">
|
||||
|
|
|
|||
|
|
@ -24,6 +24,8 @@ export type FlowNodeData = {
|
|||
extraction_variables?: ExtractionVariable[];
|
||||
add_global_prompt?: boolean;
|
||||
greeting?: string;
|
||||
greeting_type?: 'text' | 'audio';
|
||||
greeting_recording_id?: string;
|
||||
wait_for_user_greeting?: boolean;
|
||||
detect_voicemail?: boolean;
|
||||
delayed_start?: boolean;
|
||||
|
|
@ -79,6 +81,8 @@ export type FlowEdgeData = {
|
|||
condition: string;
|
||||
label: string;
|
||||
transition_speech?: string;
|
||||
transition_speech_type?: 'text' | 'audio';
|
||||
transition_speech_recording_id?: string;
|
||||
invalid?: boolean;
|
||||
validationMessage?: string | null;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -2,6 +2,7 @@
|
|||
|
||||
import type { Team } from "@stackframe/stack";
|
||||
import {
|
||||
AudioLines,
|
||||
Brain,
|
||||
ChevronLeft,
|
||||
ChevronRight,
|
||||
|
|
@ -135,6 +136,11 @@ export function AppSidebar() {
|
|||
url: "/files",
|
||||
icon: Database,
|
||||
},
|
||||
{
|
||||
title: "Recordings",
|
||||
url: "/recordings",
|
||||
icon: AudioLines,
|
||||
},
|
||||
// {
|
||||
// title: "Integrations",
|
||||
// url: "/integrations",
|
||||
|
|
|
|||
|
|
@ -56,6 +56,23 @@ function DialogContent({
|
|||
<DialogOverlay />
|
||||
<DialogPrimitive.Content
|
||||
onOpenAutoFocus={e => e.preventDefault()}
|
||||
onCloseAutoFocus={() => {
|
||||
document.body.style.pointerEvents = "";
|
||||
}}
|
||||
onPointerDownOutside={(e) => {
|
||||
// Prevent the Dialog from closing when the user clicks inside a
|
||||
// portaled Radix Popover/DropdownMenu rendered on top of this Dialog.
|
||||
const target = e.target as HTMLElement;
|
||||
if (target.closest('[data-radix-popper-content-wrapper]')) {
|
||||
e.preventDefault();
|
||||
}
|
||||
}}
|
||||
onInteractOutside={(e) => {
|
||||
const target = e.target as HTMLElement;
|
||||
if (target.closest('[data-radix-popper-content-wrapper]')) {
|
||||
e.preventDefault();
|
||||
}
|
||||
}}
|
||||
data-slot="dialog-content"
|
||||
className={cn(
|
||||
"bg-background data-[state=open]:animate-in data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=open]:fade-in-0 data-[state=closed]:zoom-out-95 data-[state=open]:zoom-in-95 fixed top-[50%] left-[50%] z-50 grid w-full max-w-[calc(100%-2rem)] translate-x-[-50%] translate-y-[-50%] gap-4 rounded-lg border p-6 shadow-lg duration-200 sm:max-w-lg",
|
||||
|
|
|
|||
|
|
@ -17,6 +17,9 @@ function PopoverTrigger({
|
|||
return <PopoverPrimitive.Trigger data-slot="popover-trigger" {...props} />
|
||||
}
|
||||
|
||||
const popoverContentClass =
|
||||
"bg-popover text-popover-foreground data-[state=open]:animate-in data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=open]:fade-in-0 data-[state=closed]:zoom-out-95 data-[state=open]:zoom-in-95 data-[side=bottom]:slide-in-from-top-2 data-[side=left]:slide-in-from-right-2 data-[side=right]:slide-in-from-left-2 data-[side=top]:slide-in-from-bottom-2 z-50 w-72 origin-(--radix-popover-content-transform-origin) rounded-md border p-4 shadow-md outline-hidden"
|
||||
|
||||
function PopoverContent({
|
||||
className,
|
||||
align = "center",
|
||||
|
|
@ -29,20 +32,38 @@ function PopoverContent({
|
|||
data-slot="popover-content"
|
||||
align={align}
|
||||
sideOffset={sideOffset}
|
||||
className={cn(
|
||||
"bg-popover text-popover-foreground data-[state=open]:animate-in data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=open]:fade-in-0 data-[state=closed]:zoom-out-95 data-[state=open]:zoom-in-95 data-[side=bottom]:slide-in-from-top-2 data-[side=left]:slide-in-from-right-2 data-[side=right]:slide-in-from-left-2 data-[side=top]:slide-in-from-bottom-2 z-50 w-72 origin-(--radix-popover-content-transform-origin) rounded-md border p-4 shadow-md outline-hidden",
|
||||
className
|
||||
)}
|
||||
className={cn(popoverContentClass, className)}
|
||||
{...props}
|
||||
/>
|
||||
</PopoverPrimitive.Portal>
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* PopoverContent without a Portal wrapper. Renders inline in the DOM tree,
|
||||
* which avoids focus-trap conflicts when used inside a Dialog.
|
||||
*/
|
||||
function PopoverContentInline({
|
||||
className,
|
||||
align = "center",
|
||||
sideOffset = 4,
|
||||
...props
|
||||
}: React.ComponentProps<typeof PopoverPrimitive.Content>) {
|
||||
return (
|
||||
<PopoverPrimitive.Content
|
||||
data-slot="popover-content"
|
||||
align={align}
|
||||
sideOffset={sideOffset}
|
||||
className={cn(popoverContentClass, className)}
|
||||
{...props}
|
||||
/>
|
||||
)
|
||||
}
|
||||
|
||||
function PopoverAnchor({
|
||||
...props
|
||||
}: React.ComponentProps<typeof PopoverPrimitive.Anchor>) {
|
||||
return <PopoverPrimitive.Anchor data-slot="popover-anchor" {...props} />
|
||||
}
|
||||
|
||||
export { Popover, PopoverAnchor,PopoverContent, PopoverTrigger }
|
||||
export { Popover, PopoverAnchor, PopoverContent, PopoverContentInline, PopoverTrigger }
|
||||
|
|
|
|||
269
ui/src/context/UnsavedChangesContext.tsx
Normal file
269
ui/src/context/UnsavedChangesContext.tsx
Normal file
|
|
@ -0,0 +1,269 @@
|
|||
"use client";
|
||||
|
||||
import { createContext, useCallback, useContext, useEffect, useLayoutEffect, useRef, useState } from "react";
|
||||
|
||||
import {
|
||||
AlertDialog,
|
||||
AlertDialogAction,
|
||||
AlertDialogCancel,
|
||||
AlertDialogContent,
|
||||
AlertDialogDescription,
|
||||
AlertDialogFooter,
|
||||
AlertDialogHeader,
|
||||
AlertDialogTitle,
|
||||
} from "@/components/ui/alert-dialog";
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Context
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
interface UnsavedChangesContextValue {
|
||||
register: (id: string, isDirty: boolean) => void;
|
||||
unregister: (id: string) => void;
|
||||
hasDirtyChanges: boolean;
|
||||
dirtySections: Set<string>;
|
||||
/** Wrap programmatic navigation (e.g. router.push) to guard against unsaved changes. */
|
||||
confirmNavigate: (navigate: () => void) => void;
|
||||
}
|
||||
|
||||
const UnsavedChangesContext = createContext<UnsavedChangesContextValue | null>(null);
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Provider
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Wraps a page to guard against accidental navigation when sections have
|
||||
* unsaved changes. Intercepts:
|
||||
*
|
||||
* - Browser back / forward (`popstate` with history-state tracking)
|
||||
* - In-app link clicks (document-level click capture on `<a>` tags)
|
||||
*
|
||||
* Sections register via the `useUnsavedChanges` hook.
|
||||
*/
|
||||
export function UnsavedChangesProvider({ children }: { children: React.ReactNode }) {
|
||||
const [dirtySections, setDirtySections] = useState<Set<string>>(new Set());
|
||||
const [showDialog, setShowDialog] = useState(false);
|
||||
const pendingNavigate = useRef<(() => void) | null>(null);
|
||||
|
||||
const hasDirtyChanges = dirtySections.size > 0;
|
||||
const hasDirtyRef = useRef(hasDirtyChanges);
|
||||
hasDirtyRef.current = hasDirtyChanges;
|
||||
|
||||
// -- Section registration ------------------------------------------------
|
||||
|
||||
const register = useCallback((id: string, isDirty: boolean) => {
|
||||
setDirtySections((prev) => {
|
||||
const next = new Set(prev);
|
||||
if (isDirty) next.add(id);
|
||||
else next.delete(id);
|
||||
return next;
|
||||
});
|
||||
}, []);
|
||||
|
||||
const unregister = useCallback((id: string) => {
|
||||
setDirtySections((prev) => {
|
||||
if (!prev.has(id)) return prev;
|
||||
const next = new Set(prev);
|
||||
next.delete(id);
|
||||
return next;
|
||||
});
|
||||
}, []);
|
||||
|
||||
// -- Helper: prompt or proceed -------------------------------------------
|
||||
|
||||
const askOrProceed = useCallback((proceed: () => void) => {
|
||||
if (!hasDirtyRef.current) {
|
||||
proceed();
|
||||
return;
|
||||
}
|
||||
pendingNavigate.current = proceed;
|
||||
setTimeout(() => setShowDialog(true), 0);
|
||||
}, []);
|
||||
|
||||
// -- 1. Intercept <a> clicks in capture phase -----------------------------
|
||||
//
|
||||
// Next.js <Link> renders <a> tags. By listening in the capture phase we
|
||||
// intercept the click before React / Next.js processes it. If the user
|
||||
// confirms, we navigate programmatically via window.location.
|
||||
|
||||
useEffect(() => {
|
||||
const handleClick = (e: MouseEvent) => {
|
||||
if (!hasDirtyRef.current) return;
|
||||
|
||||
const target = e.target as HTMLElement;
|
||||
const link = target.closest("a[href]") as HTMLAnchorElement | null;
|
||||
if (!link) return;
|
||||
|
||||
const href = link.getAttribute("href");
|
||||
if (!href) return;
|
||||
|
||||
// Skip external links
|
||||
if (href.startsWith("http://") || href.startsWith("https://") || href.startsWith("//")) return;
|
||||
// Skip hash-only links (in-page anchors)
|
||||
if (href.startsWith("#")) return;
|
||||
// Skip links that open in a new tab/window
|
||||
if (link.target && link.target !== "_self") return;
|
||||
// Skip download links
|
||||
if (link.hasAttribute("download")) return;
|
||||
// Skip if modifier keys are held (Ctrl+click, Cmd+click, etc.)
|
||||
if (e.metaKey || e.ctrlKey || e.shiftKey || e.altKey) return;
|
||||
// Skip non-left clicks
|
||||
if (e.button !== 0) return;
|
||||
|
||||
// Block the navigation and ask the user
|
||||
e.preventDefault();
|
||||
e.stopPropagation();
|
||||
e.stopImmediatePropagation();
|
||||
|
||||
askOrProceed(() => {
|
||||
// Navigate after user confirms
|
||||
window.location.href = href;
|
||||
});
|
||||
};
|
||||
|
||||
// Capture phase so we fire before React / Next.js handlers
|
||||
document.addEventListener("click", handleClick, true);
|
||||
return () => document.removeEventListener("click", handleClick, true);
|
||||
}, [askOrProceed]);
|
||||
|
||||
// -- 3. Browser back / forward (`popstate`) ------------------------------
|
||||
//
|
||||
// When the browser fires popstate the URL has already changed. We push
|
||||
// the current page back onto the stack to "undo" the navigation, then
|
||||
// show the dialog. If confirmed, we call history.back() for real.
|
||||
|
||||
useLayoutEffect(() => {
|
||||
// Track our own history stack index so we can correctly reverse
|
||||
// back/forward regardless of how many entries deep we are.
|
||||
let stackIndex = (history.state?.__unsaved_guard_index as number) ?? 0;
|
||||
|
||||
const originalPushState = history.pushState.bind(history);
|
||||
const originalReplaceState = history.replaceState.bind(history);
|
||||
|
||||
// Augment pushState to track stack depth
|
||||
history.pushState = function (state, unused, url) {
|
||||
stackIndex++;
|
||||
const augmented = { ...state, __unsaved_guard_index: stackIndex };
|
||||
return originalPushState(augmented, unused, url);
|
||||
};
|
||||
|
||||
history.replaceState = function (state, unused, url) {
|
||||
const augmented = { ...state, __unsaved_guard_index: stackIndex };
|
||||
return originalReplaceState(augmented, unused, url);
|
||||
};
|
||||
|
||||
// Write initial index if not present
|
||||
if (history.state?.__unsaved_guard_index == null) {
|
||||
originalReplaceState(
|
||||
{ ...history.state, __unsaved_guard_index: stackIndex },
|
||||
"",
|
||||
location.href,
|
||||
);
|
||||
}
|
||||
|
||||
const handlePopState = (e: PopStateEvent) => {
|
||||
if (!hasDirtyRef.current) {
|
||||
// Not dirty — accept navigation, update our tracked index
|
||||
stackIndex = (e.state?.__unsaved_guard_index as number) ?? stackIndex;
|
||||
return;
|
||||
}
|
||||
|
||||
const nextIndex = (e.state?.__unsaved_guard_index as number) ?? 0;
|
||||
const delta = nextIndex - stackIndex;
|
||||
|
||||
if (delta === 0) return;
|
||||
|
||||
// Undo the navigation the browser already did
|
||||
history.go(-delta);
|
||||
|
||||
askOrProceed(() => {
|
||||
// User confirmed — replay the navigation
|
||||
stackIndex = nextIndex;
|
||||
history.go(delta);
|
||||
});
|
||||
};
|
||||
|
||||
window.addEventListener("popstate", handlePopState);
|
||||
|
||||
return () => {
|
||||
history.pushState = originalPushState;
|
||||
history.replaceState = originalReplaceState;
|
||||
window.removeEventListener("popstate", handlePopState);
|
||||
};
|
||||
}, [askOrProceed]);
|
||||
|
||||
// -- Dialog handlers -----------------------------------------------------
|
||||
|
||||
const handleConfirm = useCallback(() => {
|
||||
setShowDialog(false);
|
||||
const nav = pendingNavigate.current;
|
||||
pendingNavigate.current = null;
|
||||
nav?.();
|
||||
}, []);
|
||||
|
||||
const handleCancel = useCallback(() => {
|
||||
setShowDialog(false);
|
||||
pendingNavigate.current = null;
|
||||
}, []);
|
||||
|
||||
// -- Render --------------------------------------------------------------
|
||||
|
||||
return (
|
||||
<UnsavedChangesContext.Provider
|
||||
value={{ register, unregister, hasDirtyChanges, dirtySections, confirmNavigate: askOrProceed }}
|
||||
>
|
||||
{children}
|
||||
|
||||
<AlertDialog open={showDialog} onOpenChange={(open) => { if (!open) handleCancel(); }}>
|
||||
<AlertDialogContent>
|
||||
<AlertDialogHeader>
|
||||
<AlertDialogTitle>Unsaved changes</AlertDialogTitle>
|
||||
<AlertDialogDescription>
|
||||
You have unsaved changes that will be lost. Are you sure you want to leave?
|
||||
</AlertDialogDescription>
|
||||
</AlertDialogHeader>
|
||||
<AlertDialogFooter>
|
||||
<AlertDialogCancel onClick={handleCancel}>Stay on page</AlertDialogCancel>
|
||||
<AlertDialogAction onClick={handleConfirm}>Discard changes</AlertDialogAction>
|
||||
</AlertDialogFooter>
|
||||
</AlertDialogContent>
|
||||
</AlertDialog>
|
||||
</UnsavedChangesContext.Provider>
|
||||
);
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Hooks
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Register a section's dirty state with the nearest UnsavedChangesProvider.
|
||||
* Automatically unregisters on unmount.
|
||||
*
|
||||
* @example
|
||||
* useUnsavedChanges("general", isDirty);
|
||||
*/
|
||||
export function useUnsavedChanges(sectionId: string, isDirty: boolean) {
|
||||
const ctx = useContext(UnsavedChangesContext);
|
||||
if (!ctx) throw new Error("useUnsavedChanges must be used within UnsavedChangesProvider");
|
||||
|
||||
const { register, unregister } = ctx;
|
||||
|
||||
useEffect(() => {
|
||||
register(sectionId, isDirty);
|
||||
}, [sectionId, isDirty, register]);
|
||||
|
||||
useEffect(() => {
|
||||
return () => unregister(sectionId);
|
||||
}, [sectionId, unregister]);
|
||||
}
|
||||
|
||||
/**
|
||||
* Access the unsaved-changes context directly (e.g. for dirtySections).
|
||||
*/
|
||||
export function useUnsavedChangesContext() {
|
||||
const ctx = useContext(UnsavedChangesContext);
|
||||
if (!ctx) throw new Error("useUnsavedChangesContext must be used within UnsavedChangesProvider");
|
||||
return ctx;
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue