feat: allow uploading recording as part of node transition

This commit is contained in:
Abhishek Kumar 2026-04-10 11:54:00 +05:30
parent bb5f56bfb7
commit 65c76ca7ff
36 changed files with 2255 additions and 201 deletions

View file

@ -0,0 +1,70 @@
"""unique recording id per org and workflow
Revision ID: 67a5cf3e09d0
Revises: e7254d2c6c18
Create Date: 2026-04-09 17:03:38.302041
"""
from typing import Sequence, Union
import sqlalchemy as sa
from alembic import op
# revision identifiers, used by Alembic.
revision: str = "67a5cf3e09d0"
down_revision: Union[str, None] = "e7254d2c6c18"
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def upgrade() -> None:
# Widen column from 16 to 64 chars for descriptive names
op.alter_column(
"workflow_recordings",
"recording_id",
existing_type=sa.VARCHAR(length=16),
type_=sa.String(length=64),
existing_nullable=False,
)
# Drop the old globally-unique index
op.drop_index(
op.f("ix_workflow_recordings_recording_id"), table_name="workflow_recordings"
)
# Re-create as non-unique index for lookups
op.create_index(
"ix_workflow_recordings_recording_id",
"workflow_recordings",
["recording_id"],
unique=False,
)
# Add composite unique constraint (recording_id, organization_id, workflow_id)
op.create_unique_constraint(
"uq_workflow_recordings_recording_id_org_wf",
"workflow_recordings",
["recording_id", "organization_id", "workflow_id"],
)
def downgrade() -> None:
op.drop_constraint(
"uq_workflow_recordings_recording_id_org_wf",
"workflow_recordings",
type_="unique",
)
op.drop_index(
"ix_workflow_recordings_recording_id", table_name="workflow_recordings"
)
op.create_index(
op.f("ix_workflow_recordings_recording_id"),
"workflow_recordings",
["recording_id"],
unique=True,
)
op.alter_column(
"workflow_recordings",
"recording_id",
existing_type=sa.String(length=64),
type_=sa.VARCHAR(length=16),
existing_nullable=False,
)

View file

@ -1015,8 +1015,8 @@ class WorkflowRecordingModel(Base):
id = Column(Integer, primary_key=True, index=True) id = Column(Integer, primary_key=True, index=True)
# Short globally unique ID (e.g. "xbhfha3k") used in prompts # Descriptive ID used in prompts (unique per organization)
recording_id = Column(String(16), unique=True, nullable=False, index=True) recording_id = Column(String(64), nullable=False, index=True)
# Scoping # Scoping
workflow_id = Column( workflow_id = Column(
@ -1062,6 +1062,12 @@ class WorkflowRecordingModel(Base):
# Indexes # Indexes
__table_args__ = ( __table_args__ = (
UniqueConstraint(
"recording_id",
"organization_id",
"workflow_id",
name="uq_workflow_recordings_recording_id_org_wf",
),
Index("ix_workflow_recordings_workflow_id", "workflow_id"), Index("ix_workflow_recordings_workflow_id", "workflow_id"),
Index("ix_workflow_recordings_org_id", "organization_id"), Index("ix_workflow_recordings_org_id", "organization_id"),
Index("ix_workflow_recordings_recording_id", "recording_id"), Index("ix_workflow_recordings_recording_id", "recording_id"),

View file

@ -77,19 +77,19 @@ class WorkflowRecordingClient(BaseDBClient):
) )
return recording return recording
async def get_recordings_for_workflow( async def get_recordings(
self, self,
workflow_id: int,
organization_id: int, organization_id: int,
workflow_id: Optional[int] = None,
tts_provider: Optional[str] = None, tts_provider: Optional[str] = None,
tts_model: Optional[str] = None, tts_model: Optional[str] = None,
tts_voice_id: Optional[str] = None, tts_voice_id: Optional[str] = None,
) -> List[WorkflowRecordingModel]: ) -> List[WorkflowRecordingModel]:
"""Get recordings for a workflow, optionally filtered by TTS config. """Get recordings for an organization, optionally filtered by workflow and TTS config.
Args: Args:
workflow_id: ID of the workflow
organization_id: ID of the organization organization_id: ID of the organization
workflow_id: Optional workflow ID filter
tts_provider: Optional TTS provider filter tts_provider: Optional TTS provider filter
tts_model: Optional TTS model filter tts_model: Optional TTS model filter
tts_voice_id: Optional TTS voice ID filter tts_voice_id: Optional TTS voice ID filter
@ -99,11 +99,12 @@ class WorkflowRecordingClient(BaseDBClient):
""" """
async with self.async_session() as session: async with self.async_session() as session:
query = select(WorkflowRecordingModel).where( query = select(WorkflowRecordingModel).where(
WorkflowRecordingModel.workflow_id == workflow_id,
WorkflowRecordingModel.organization_id == organization_id, WorkflowRecordingModel.organization_id == organization_id,
WorkflowRecordingModel.is_active == True, WorkflowRecordingModel.is_active == True,
) )
if workflow_id is not None:
query = query.where(WorkflowRecordingModel.workflow_id == workflow_id)
if tts_provider: if tts_provider:
query = query.where(WorkflowRecordingModel.tts_provider == tts_provider) query = query.where(WorkflowRecordingModel.tts_provider == tts_provider)
if tts_model: if tts_model:
@ -120,12 +121,14 @@ class WorkflowRecordingClient(BaseDBClient):
self, self,
recording_id: str, recording_id: str,
organization_id: int, organization_id: int,
workflow_id: int,
) -> Optional[WorkflowRecordingModel]: ) -> Optional[WorkflowRecordingModel]:
"""Get a recording by its short ID. """Get a recording by its string recording_id (unique per org + workflow).
Args: Args:
recording_id: The short unique recording ID recording_id: The descriptive recording ID
organization_id: ID of the organization organization_id: ID of the organization
workflow_id: ID of the workflow
Returns: Returns:
WorkflowRecordingModel if found, None otherwise WorkflowRecordingModel if found, None otherwise
@ -134,6 +137,31 @@ class WorkflowRecordingClient(BaseDBClient):
query = select(WorkflowRecordingModel).where( query = select(WorkflowRecordingModel).where(
WorkflowRecordingModel.recording_id == recording_id, WorkflowRecordingModel.recording_id == recording_id,
WorkflowRecordingModel.organization_id == organization_id, WorkflowRecordingModel.organization_id == organization_id,
WorkflowRecordingModel.workflow_id == workflow_id,
WorkflowRecordingModel.is_active == True,
)
result = await session.execute(query)
return result.scalar_one_or_none()
async def get_recording_by_id(
self,
id: int,
organization_id: int,
) -> Optional[WorkflowRecordingModel]:
"""Get a recording by its integer primary key.
Args:
id: The primary key ID
organization_id: ID of the organization
Returns:
WorkflowRecordingModel if found, None otherwise
"""
async with self.async_session() as session:
query = select(WorkflowRecordingModel).where(
WorkflowRecordingModel.id == id,
WorkflowRecordingModel.organization_id == organization_id,
WorkflowRecordingModel.is_active == True, WorkflowRecordingModel.is_active == True,
) )
@ -167,11 +195,15 @@ class WorkflowRecordingClient(BaseDBClient):
result = await session.execute(query) result = await session.execute(query)
return result.scalar_one() > 0 return result.scalar_one() > 0
async def check_recording_id_exists(self, recording_id: str) -> bool: async def check_recording_id_exists(
"""Check if a recording ID already exists globally. self, recording_id: str, organization_id: int, workflow_id: int
) -> bool:
"""Check if a recording ID already exists within an organization and workflow.
Args: Args:
recording_id: The short recording ID to check recording_id: The recording ID to check
organization_id: ID of the organization
workflow_id: ID of the workflow
Returns: Returns:
True if exists, False otherwise True if exists, False otherwise
@ -179,10 +211,52 @@ class WorkflowRecordingClient(BaseDBClient):
async with self.async_session() as session: async with self.async_session() as session:
query = select(WorkflowRecordingModel.id).where( query = select(WorkflowRecordingModel.id).where(
WorkflowRecordingModel.recording_id == recording_id, WorkflowRecordingModel.recording_id == recording_id,
WorkflowRecordingModel.organization_id == organization_id,
WorkflowRecordingModel.workflow_id == workflow_id,
WorkflowRecordingModel.is_active == True,
) )
result = await session.execute(query) result = await session.execute(query)
return result.scalar_one_or_none() is not None return result.scalar_one_or_none() is not None
async def update_recording_id(
self,
id: int,
new_recording_id: str,
organization_id: int,
) -> Optional[WorkflowRecordingModel]:
"""Update the recording_id of a recording.
Args:
id: Primary key ID of the recording
new_recording_id: New recording ID
organization_id: ID of the organization
Returns:
Updated WorkflowRecordingModel if found, None otherwise
"""
async with self.async_session() as session:
query = select(WorkflowRecordingModel).where(
WorkflowRecordingModel.id == id,
WorkflowRecordingModel.organization_id == organization_id,
WorkflowRecordingModel.is_active == True,
)
result = await session.execute(query)
recording = result.scalar_one_or_none()
if not recording:
return None
old_id = recording.recording_id
recording.recording_id = new_recording_id
await session.commit()
await session.refresh(recording)
logger.info(
f"Updated recording ID {old_id} -> {new_recording_id}, "
f"org {organization_id}"
)
return recording
async def delete_recording( async def delete_recording(
self, self,
recording_id: str, recording_id: str,

View file

@ -178,6 +178,11 @@ async def initiate_call(
workflow_run_id = request.workflow_run_id workflow_run_id = request.workflow_run_id
if not workflow_run_id: if not workflow_run_id:
# Fetch workflow to merge template context variables (e.g. caller_number,
# called_number set in workflow settings for testing pre-call data fetch)
workflow = await db_client.get_workflow_by_id(request.workflow_id)
template_vars = (workflow.template_context_variables or {}) if workflow else {}
numeric_suffix = int(str(uuid.uuid4()).replace("-", "")[:8], 16) % 100000000 numeric_suffix = int(str(uuid.uuid4()).replace("-", "")[:8], 16) % 100000000
workflow_run_name = f"WR-TEL-OUT-{numeric_suffix:08d}" workflow_run_name = f"WR-TEL-OUT-{numeric_suffix:08d}"
workflow_run = await db_client.create_workflow_run( workflow_run = await db_client.create_workflow_run(
@ -187,6 +192,7 @@ async def initiate_call(
user_id=user.id, user_id=user.id,
call_type=CallType.OUTBOUND, call_type=CallType.OUTBOUND,
initial_context={ initial_context={
**template_vars,
"phone_number": phone_number, "phone_number": phone_number,
"called_number": phone_number, "called_number": phone_number,
"provider": provider.PROVIDER_NAME, "provider": provider.PROVIDER_NAME,

View file

@ -16,6 +16,7 @@ from api.schemas.workflow_recording import (
BatchRecordingUploadResponseSchema, BatchRecordingUploadResponseSchema,
RecordingListResponseSchema, RecordingListResponseSchema,
RecordingResponseSchema, RecordingResponseSchema,
RecordingUpdateRequestSchema,
RecordingUploadResponseSchema, RecordingUploadResponseSchema,
) )
from api.services.auth.depends import get_user from api.services.auth.depends import get_user
@ -25,11 +26,13 @@ from api.services.storage import storage_fs
router = APIRouter(prefix="/workflow-recordings", tags=["workflow-recordings"]) router = APIRouter(prefix="/workflow-recordings", tags=["workflow-recordings"])
async def _generate_unique_recording_id() -> str: async def _generate_unique_recording_id(organization_id: int, workflow_id: int) -> str:
"""Generate a globally unique short recording ID.""" """Generate a unique short recording ID within an organization and workflow."""
for _ in range(10): for _ in range(10):
rid = generate_short_id(8) rid = generate_short_id(8)
exists = await db_client.check_recording_id_exists(rid) exists = await db_client.check_recording_id_exists(
rid, organization_id, workflow_id
)
if not exists: if not exists:
return rid return rid
raise HTTPException( raise HTTPException(
@ -69,7 +72,9 @@ async def get_upload_urls(
try: try:
items = [] items = []
for fd in request.files: for fd in request.files:
recording_id = await _generate_unique_recording_id() recording_id = await _generate_unique_recording_id(
user.selected_organization_id, request.workflow_id
)
storage_key = ( storage_key = (
f"recordings/{user.selected_organization_id}" f"recordings/{user.selected_organization_id}"
@ -163,10 +168,12 @@ async def create_recordings(
@router.get( @router.get(
"/", "/",
response_model=RecordingListResponseSchema, response_model=RecordingListResponseSchema,
summary="List recordings for a workflow", summary="List recordings",
) )
async def list_recordings( async def list_recordings(
workflow_id: Annotated[int, Query(description="Workflow ID")], workflow_id: Annotated[
Optional[int], Query(description="Filter by workflow ID")
] = None,
tts_provider: Annotated[ tts_provider: Annotated[
Optional[str], Query(description="Filter by TTS provider") Optional[str], Query(description="Filter by TTS provider")
] = None, ] = None,
@ -178,11 +185,11 @@ async def list_recordings(
] = None, ] = None,
user=Depends(get_user), user=Depends(get_user),
): ):
"""List recordings for a workflow, optionally filtered by TTS configuration.""" """List recordings for the organization, optionally filtered by workflow and TTS configuration."""
try: try:
recordings = await db_client.get_recordings_for_workflow( recordings = await db_client.get_recordings(
workflow_id=workflow_id,
organization_id=user.selected_organization_id, organization_id=user.selected_organization_id,
workflow_id=workflow_id,
tts_provider=tts_provider, tts_provider=tts_provider,
tts_model=tts_model, tts_model=tts_model,
tts_voice_id=tts_voice_id, tts_voice_id=tts_voice_id,
@ -233,6 +240,62 @@ async def delete_recording(
) from exc ) from exc
@router.patch(
"/{id}",
response_model=RecordingResponseSchema,
summary="Update a recording's Recording ID",
)
async def update_recording(
id: int,
request: RecordingUpdateRequestSchema,
user=Depends(get_user),
):
"""Update the recording_id (descriptive name) of a recording."""
try:
new_id = request.recording_id.strip()
if not new_id:
raise HTTPException(status_code=400, detail="Recording ID cannot be empty")
# Look up by integer PK — globally unique, no ambiguity
existing = await db_client.get_recording_by_id(
id, user.selected_organization_id
)
if not existing:
raise HTTPException(status_code=404, detail="Recording not found")
if new_id == existing.recording_id:
return _build_response(existing)
# Check if the new ID is already taken within this org + workflow
exists = await db_client.check_recording_id_exists(
new_id, user.selected_organization_id, existing.workflow_id
)
if exists:
raise HTTPException(
status_code=409,
detail=f"Recording ID '{new_id}' is already in use in this workflow",
)
recording = await db_client.update_recording_id(
id=id,
new_recording_id=new_id,
organization_id=user.selected_organization_id,
)
if not recording:
raise HTTPException(status_code=404, detail="Recording not found")
return _build_response(recording)
except HTTPException:
raise
except Exception as exc:
logger.error(f"Error updating recording: {exc}")
raise HTTPException(
status_code=500, detail="Failed to update recording"
) from exc
@router.post( @router.post(
"/transcribe", "/transcribe",
summary="Transcribe an audio file", summary="Transcribe an audio file",

View file

@ -98,6 +98,17 @@ class BatchRecordingCreateResponseSchema(BaseModel):
) )
class RecordingUpdateRequestSchema(BaseModel):
"""Request schema for updating a recording's ID."""
recording_id: str = Field(
...,
min_length=1,
max_length=64,
description="New descriptive recording ID",
)
class RecordingListResponseSchema(BaseModel): class RecordingListResponseSchema(BaseModel):
"""Response schema for list of recordings.""" """Response schema for list of recordings."""

View file

@ -200,7 +200,6 @@ class CampaignCallDispatcher:
# Merge context variables (queued_run context already includes retry info if applicable) # Merge context variables (queued_run context already includes retry info if applicable)
initial_context = { initial_context = {
**workflow.template_context_variables,
**queued_run.context_variables, **queued_run.context_variables,
"campaign_id": campaign.id, "campaign_id": campaign.id,
"provider": provider.PROVIDER_NAME, "provider": provider.PROVIDER_NAME,

View file

@ -11,12 +11,17 @@ from api.services.pipecat.in_memory_buffers import (
InMemoryLogsBuffer, InMemoryLogsBuffer,
) )
from api.services.pipecat.pipeline_metrics_aggregator import PipelineMetricsAggregator from api.services.pipecat.pipeline_metrics_aggregator import PipelineMetricsAggregator
from api.services.pipecat.recording_playback import queue_recording_audio
from api.services.pipecat.tracing_config import get_trace_url from api.services.pipecat.tracing_config import get_trace_url
from api.services.workflow.pipecat_engine import PipecatEngine from api.services.workflow.pipecat_engine import PipecatEngine
from api.tasks.arq import enqueue_job from api.tasks.arq import enqueue_job
from api.tasks.function_names import FunctionNames from api.tasks.function_names import FunctionNames
from api.utils.hold_audio import play_hold_audio_loop from api.utils.hold_audio import play_hold_audio_loop
from pipecat.frames.frames import Frame, LLMContextFrame, TTSSpeakFrame from pipecat.frames.frames import (
Frame,
LLMContextFrame,
TTSSpeakFrame,
)
from pipecat.pipeline.task import PipelineTask from pipecat.pipeline.task import PipelineTask
from pipecat.processors.audio.audio_buffer_processor import AudioBufferProcessor from pipecat.processors.audio.audio_buffer_processor import AudioBufferProcessor
from pipecat.utils.enums import EndTaskReason from pipecat.utils.enums import EndTaskReason
@ -32,6 +37,7 @@ def register_event_handlers(
pipeline_metrics_aggregator: PipelineMetricsAggregator, pipeline_metrics_aggregator: PipelineMetricsAggregator,
audio_config=AudioConfig, audio_config=AudioConfig,
pre_call_fetch_task: asyncio.Task | None = None, pre_call_fetch_task: asyncio.Task | None = None,
fetch_recording_audio=None,
): ):
"""Register all event handlers for transport and task events. """Register all event handlers for transport and task events.
@ -112,12 +118,31 @@ def register_event_handlers(
# so that render_template() has the complete _call_context_vars. # so that render_template() has the complete _call_context_vars.
await engine.set_node(engine.workflow.start_node_id) await engine.set_node(engine.workflow.start_node_id)
greeting = engine.get_start_greeting() greeting_info = engine.get_start_greeting()
if greeting: if greeting_info:
logger.debug( greeting_type, greeting_value = greeting_info
"Both pipeline_started and client_connected received - playing greeting via TTS" if (
) greeting_type == "audio"
await task.queue_frame(TTSSpeakFrame(greeting)) and greeting_value
and fetch_recording_audio
):
logger.debug(f"Playing audio greeting recording: {greeting_value}")
audio_data = await fetch_recording_audio(greeting_value)
if audio_data:
await queue_recording_audio(
audio_data,
sample_rate=audio_config.pipeline_sample_rate or 16000,
queue_frame=task.queue_frame,
)
else:
logger.warning(
f"Failed to fetch audio greeting {greeting_value}, "
"falling back to LLM generation"
)
await engine.llm.queue_frame(LLMContextFrame(engine.context))
else:
logger.debug("Playing text greeting via TTS")
await task.queue_frame(TTSSpeakFrame(greeting_value))
else: else:
logger.debug( logger.debug(
"Both pipeline_started and client_connected received - triggering initial LLM generation" "Both pipeline_started and client_connected received - triggering initial LLM generation"

View file

@ -27,9 +27,13 @@ from .audio_file_cache import (
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
def _cache_path(recording_id: str, sample_rate: int) -> str: def _cache_path(
organization_id: int, workflow_id: int, recording_id: str, sample_rate: int
) -> str:
"""Return the on-disk path for a cached PCM file.""" """Return the on-disk path for a cached PCM file."""
return os.path.join(CACHE_DIR, f"{recording_id}_{sample_rate}.pcm") return os.path.join(
CACHE_DIR, f"{organization_id}_{workflow_id}_{recording_id}_{sample_rate}.pcm"
)
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
@ -39,18 +43,20 @@ def _cache_path(recording_id: str, sample_rate: int) -> str:
def create_recording_audio_fetcher( def create_recording_audio_fetcher(
organization_id: int, organization_id: int,
workflow_id: int,
pipeline_sample_rate: int, pipeline_sample_rate: int,
) -> Callable[[str], Awaitable[Optional[bytes]]]: ) -> Callable[[str], Awaitable[Optional[bytes]]]:
"""Create an async callback that returns raw PCM bytes for a recording_id. """Create an async callback that returns raw PCM bytes for a recording_id.
The returned callable: The returned callable:
1. Checks the filesystem cache (keyed by ``recording_id`` + sample rate). 1. Checks the filesystem cache (keyed by org/workflow/recording + sample rate).
2. On miss, looks up the recording in the DB, downloads the audio file 2. On miss, looks up the recording in the DB, downloads the audio file
from S3/MinIO, converts it to 16-bit mono PCM at *pipeline_sample_rate*, from S3/MinIO, converts it to 16-bit mono PCM at *pipeline_sample_rate*,
trims leading/trailing silence, caches the result on disk, and returns it. trims leading/trailing silence, caches the result on disk, and returns it.
Args: Args:
organization_id: Organization owning the recordings. organization_id: Organization owning the recordings.
workflow_id: Workflow the recordings belong to.
pipeline_sample_rate: Target PCM sample rate for the pipeline. pipeline_sample_rate: Target PCM sample rate for the pipeline.
Returns: Returns:
@ -68,7 +74,9 @@ def create_recording_audio_fetcher(
return _storage_cache[backend] return _storage_cache[backend]
async def fetch(recording_id: str) -> Optional[bytes]: async def fetch(recording_id: str) -> Optional[bytes]:
cached = _cache_path(recording_id, pipeline_sample_rate) cached = _cache_path(
organization_id, workflow_id, recording_id, pipeline_sample_rate
)
# 1. Serve from filesystem cache # 1. Serve from filesystem cache
if os.path.exists(cached): if os.path.exists(cached):
@ -77,7 +85,7 @@ def create_recording_audio_fetcher(
# 2. DB lookup # 2. DB lookup
recording = await db_client.get_recording_by_recording_id( recording = await db_client.get_recording_by_recording_id(
recording_id, organization_id recording_id, organization_id, workflow_id
) )
if not recording: if not recording:
logger.warning(f"Recording {recording_id} not found in database") logger.warning(f"Recording {recording_id} not found in database")
@ -112,8 +120,8 @@ async def warm_recording_cache(
from api.services.storage import get_storage_for_backend from api.services.storage import get_storage_for_backend
try: try:
recordings = await db_client.get_recordings_for_workflow( recordings = await db_client.get_recordings(
workflow_id, organization_id organization_id=organization_id, workflow_id=workflow_id
) )
if not recordings: if not recordings:
return return
@ -122,7 +130,11 @@ async def warm_recording_cache(
uncached = [ uncached = [
r r
for r in recordings for r in recordings
if not os.path.exists(_cache_path(r.recording_id, pipeline_sample_rate)) if not os.path.exists(
_cache_path(
organization_id, workflow_id, r.recording_id, pipeline_sample_rate
)
)
] ]
if not uncached: if not uncached:
logger.debug(f"Recording cache already warm for workflow {workflow_id}") logger.debug(f"Recording cache already warm for workflow {workflow_id}")
@ -187,7 +199,12 @@ async def _download_and_convert(
pcm_data = _trim_silence(pcm_data, sample_rate) pcm_data = _trim_silence(pcm_data, sample_rate)
# Write to disk cache # Write to disk cache
cached = _cache_path(recording.recording_id, sample_rate) cached = _cache_path(
recording.organization_id,
recording.workflow_id,
recording.recording_id,
sample_rate,
)
write_cache_file(cached, pcm_data) write_cache_file(cached, pcm_data)
return pcm_data return pcm_data

View file

@ -0,0 +1,41 @@
"""Shared helper for pushing pre-recorded audio frames into a pipeline."""
import uuid
from typing import Awaitable, Callable
from pipecat.frames.frames import (
Frame,
TTSAudioRawFrame,
TTSStartedFrame,
TTSStoppedFrame,
)
async def queue_recording_audio(
audio_data: bytes,
*,
sample_rate: int,
queue_frame: Callable[[Frame], Awaitable[None]],
) -> None:
"""Push TTSStarted → TTSAudioRaw → TTSStopped frames.
This is the canonical way to play pre-recorded PCM audio through the
pipeline outside of the RecordingRouterProcessor (which uses its own
``push_frame`` path).
Args:
audio_data: Raw 16-bit mono PCM bytes.
sample_rate: Pipeline sample rate (e.g. 16000).
queue_frame: Typically ``task.queue_frame``.
"""
context_id = str(uuid.uuid4())
await queue_frame(TTSStartedFrame(context_id=context_id))
await queue_frame(
TTSAudioRawFrame(
audio=audio_data,
sample_rate=sample_rate,
num_channels=1,
context_id=context_id,
)
)
await queue_frame(TTSStoppedFrame(context_id=context_id))

View file

@ -828,6 +828,15 @@ async def _run_pipeline(
voicemail_detector = None voicemail_detector = None
recording_router = None recording_router = None
# Create recording audio fetcher (used by recording router, audio greetings,
# and audio transition speech)
fetch_audio = create_recording_audio_fetcher(
organization_id=workflow.organization_id,
workflow_id=workflow_id,
pipeline_sample_rate=audio_config.pipeline_sample_rate,
)
engine.set_fetch_recording_audio(fetch_audio)
if not is_realtime: if not is_realtime:
# Create voicemail detector if enabled in workflow configurations # Create voicemail detector if enabled in workflow configurations
voicemail_config = (workflow.workflow_configurations or {}).get( voicemail_config = (workflow.workflow_configurations or {}).get(
@ -868,10 +877,6 @@ async def _run_pipeline(
# Create recording router if workflow has active recordings # Create recording router if workflow has active recordings
if has_recordings: if has_recordings:
fetch_audio = create_recording_audio_fetcher(
organization_id=workflow.organization_id,
pipeline_sample_rate=audio_config.pipeline_sample_rate,
)
recording_router = RecordingRouterProcessor( recording_router = RecordingRouterProcessor(
audio_sample_rate=audio_config.pipeline_sample_rate, audio_sample_rate=audio_config.pipeline_sample_rate,
fetch_recording_audio=fetch_audio, fetch_recording_audio=fetch_audio,
@ -973,6 +978,7 @@ async def _run_pipeline(
pipeline_metrics_aggregator=pipeline_metrics_aggregator, pipeline_metrics_aggregator=pipeline_metrics_aggregator,
audio_config=audio_config, audio_config=audio_config,
pre_call_fetch_task=pre_call_fetch_task, pre_call_fetch_task=pre_call_fetch_task,
fetch_recording_audio=fetch_audio,
) )
register_audio_data_handler(audio_buffer, workflow_run_id, in_memory_audio_buffer) register_audio_data_handler(audio_buffer, workflow_run_id, in_memory_audio_buffer)

View file

@ -54,6 +54,8 @@ class NodeDataDTO(BaseModel):
extraction_variables: Optional[list[ExtractionVariableDTO]] = None extraction_variables: Optional[list[ExtractionVariableDTO]] = None
add_global_prompt: bool = True add_global_prompt: bool = True
greeting: Optional[str] = None greeting: Optional[str] = None
greeting_type: Optional[str] = None # 'text' or 'audio'
greeting_recording_id: Optional[str] = None
wait_for_user_response: bool = False wait_for_user_response: bool = False
wait_for_user_response_timeout: Optional[float] = None wait_for_user_response_timeout: Optional[float] = None
detect_voicemail: bool = False detect_voicemail: bool = False
@ -102,6 +104,8 @@ class EdgeDataDTO(BaseModel):
label: str = Field(..., min_length=1) label: str = Field(..., min_length=1)
condition: str = Field(..., min_length=1) condition: str = Field(..., min_length=1)
transition_speech: Optional[str] = None transition_speech: Optional[str] = None
transition_speech_type: Optional[str] = None # 'text' or 'audio'
transition_speech_recording_id: Optional[str] = None
class RFEdgeDTO(BaseModel): class RFEdgeDTO(BaseModel):

View file

@ -1,14 +1,12 @@
"""Service for duplicating workflows including recordings.""" """Service for duplicating workflows including recordings."""
import copy import copy
import json
import posixpath import posixpath
import uuid import uuid
from loguru import logger from loguru import logger
from api.db import db_client from api.db import db_client
from api.db.workflow_recording_client import generate_short_id
from api.enums import StorageBackend from api.enums import StorageBackend
from api.services.storage import get_storage_for_backend, storage_fs from api.services.storage import get_storage_for_backend, storage_fs
@ -41,16 +39,6 @@ def _regenerate_trigger_uuids(workflow_definition: dict) -> dict:
return updated_definition return updated_definition
async def _generate_unique_recording_id() -> str:
"""Generate a globally unique short recording ID."""
for _ in range(10):
rid = generate_short_id(8)
exists = await db_client.check_recording_id_exists(rid)
if not exists:
return rid
raise RuntimeError("Failed to generate unique recording ID")
async def duplicate_workflow( async def duplicate_workflow(
workflow_id: int, workflow_id: int,
organization_id: int, organization_id: int,
@ -130,29 +118,15 @@ async def duplicate_workflow(
organization_id=organization_id, organization_id=organization_id,
) )
# 6. Copy recordings with new IDs and storage paths scoped to new workflow # 6. Copy recordings (recording_ids are preserved since they're scoped per workflow)
recording_id_map = await _duplicate_recordings( await _duplicate_recordings(
source_workflow_id=workflow_id, source_workflow_id=workflow_id,
new_workflow_id=new_workflow.id, new_workflow_id=new_workflow.id,
organization_id=organization_id, organization_id=organization_id,
user_id=user_id, user_id=user_id,
) )
# 7. Replace old recording IDs with new ones in the workflow definition # 7. Sync triggers for the new workflow
if recording_id_map:
workflow_definition = _replace_recording_ids(
workflow_definition, recording_id_map
)
new_workflow = await db_client.update_workflow(
workflow_id=new_workflow.id,
name=None,
workflow_definition=workflow_definition,
template_context_variables=None,
workflow_configurations=None,
organization_id=organization_id,
)
# 8. Sync triggers for the new workflow
if workflow_definition: if workflow_definition:
trigger_paths = _extract_trigger_paths(workflow_definition) trigger_paths = _extract_trigger_paths(workflow_definition)
if trigger_paths: if trigger_paths:
@ -170,34 +144,28 @@ async def _duplicate_recordings(
new_workflow_id: int, new_workflow_id: int,
organization_id: int, organization_id: int,
user_id: int, user_id: int,
) -> dict[str, str]: ) -> None:
"""Duplicate all recordings for a workflow. """Duplicate all recordings for a workflow.
Copies each recording file to a new storage path scoped under the new Copies each recording file to a new storage path scoped under the new
workflow ID, and creates new DB records pointing to the copied files. workflow ID. Recording IDs are preserved since they are unique per
(org, workflow).
Returns:
Mapping of old_recording_id -> new_recording_id
""" """
recordings = await db_client.get_recordings_for_workflow( recordings = await db_client.get_recordings(
workflow_id=source_workflow_id, workflow_id=source_workflow_id,
organization_id=organization_id, organization_id=organization_id,
) )
if not recordings: if not recordings:
return {} return
recording_id_map: dict[str, str] = {}
for rec in recordings: for rec in recordings:
try: try:
new_recording_id = await _generate_unique_recording_id() # Build new storage key: recordings/{org_id}/{new_workflow_id}/{recording_id}/{filename}
# Build new storage key: recordings/{org_id}/{new_workflow_id}/{new_recording_id}/{filename}
filename = posixpath.basename(rec.storage_key) filename = posixpath.basename(rec.storage_key)
new_storage_key = ( new_storage_key = (
f"recordings/{organization_id}" f"recordings/{organization_id}"
f"/{new_workflow_id}/{new_recording_id}" f"/{new_workflow_id}/{rec.recording_id}"
f"/{filename}" f"/{filename}"
) )
@ -211,7 +179,7 @@ async def _duplicate_recordings(
continue continue
await db_client.create_recording( await db_client.create_recording(
recording_id=new_recording_id, recording_id=rec.recording_id,
workflow_id=new_workflow_id, workflow_id=new_workflow_id,
organization_id=organization_id, organization_id=organization_id,
tts_provider=rec.tts_provider, tts_provider=rec.tts_provider,
@ -224,34 +192,12 @@ async def _duplicate_recordings(
metadata=copy.deepcopy(rec.recording_metadata), metadata=copy.deepcopy(rec.recording_metadata),
) )
recording_id_map[rec.recording_id] = new_recording_id logger.info(f"Duplicated recording {rec.recording_id}")
logger.info(
f"Duplicated recording {rec.recording_id} -> {new_recording_id}"
)
except Exception as e: except Exception as e:
logger.error(f"Error duplicating recording {rec.recording_id}: {e}") logger.error(f"Error duplicating recording {rec.recording_id}: {e}")
continue continue
return recording_id_map
def _replace_recording_ids(
workflow_definition: dict,
recording_id_map: dict[str, str],
) -> dict:
"""Replace old recording IDs with new ones throughout the workflow definition.
Uses JSON serialization to do a thorough find-and-replace across all
nested fields (node prompts, data, etc.).
"""
definition_str = json.dumps(workflow_definition)
for old_id, new_id in recording_id_map.items():
definition_str = definition_str.replace(old_id, new_id)
return json.loads(definition_str)
async def _copy_storage_object( async def _copy_storage_object(
source_key: str, dest_key: str, storage_backend: str source_key: str, dest_key: str, storage_backend: str

View file

@ -1,5 +1,6 @@
from typing import TYPE_CHECKING, Awaitable, Callable, Optional, Union from typing import TYPE_CHECKING, Awaitable, Callable, Optional, Union
from api.services.pipecat.recording_playback import queue_recording_audio
from api.services.workflow.disposition_mapper import ( from api.services.workflow.disposition_mapper import (
apply_disposition_mapping, apply_disposition_mapping,
get_organization_id_from_workflow_run, get_organization_id_from_workflow_run,
@ -114,6 +115,9 @@ class PipecatEngine:
# Audio configuration (set via set_audio_config from _run_pipeline) # Audio configuration (set via set_audio_config from _run_pipeline)
self._audio_config = None self._audio_config = None
# Recording audio fetcher (set via set_fetch_recording_audio from _run_pipeline)
self._fetch_recording_audio = None
# True when the workflow has active recordings; enables recording # True when the workflow has active recordings; enables recording
# response mode instructions on all nodes for in-context learning. # response mode instructions on all nodes for in-context learning.
self._has_recordings: bool = has_recordings self._has_recordings: bool = has_recordings
@ -191,6 +195,8 @@ class PipecatEngine:
name: str, name: str,
transition_to_node: str, transition_to_node: str,
transition_speech: Optional[str] = None, transition_speech: Optional[str] = None,
transition_speech_type: Optional[str] = None,
transition_speech_recording_id: Optional[str] = None,
): ):
async def transition_func(function_call_params: FunctionCallParams) -> None: async def transition_func(function_call_params: FunctionCallParams) -> None:
"""Inner function that handles the node change tool calls""" """Inner function that handles the node change tool calls"""
@ -204,8 +210,33 @@ class PipecatEngine:
# Perform variable extraction before transitioning to new node # Perform variable extraction before transitioning to new node
await self._perform_variable_extraction_if_needed(self._current_node) await self._perform_variable_extraction_if_needed(self._current_node)
# Queue transition speech before switching nodes # Queue transition speech/audio before switching nodes
if transition_speech: speech_type = transition_speech_type or "text"
if (
speech_type == "audio"
and transition_speech_recording_id
and self._fetch_recording_audio
):
logger.info(
f"Playing transition audio: {transition_speech_recording_id}"
)
self._queued_speech_mute_state = "waiting"
audio_data = await self._fetch_recording_audio(
transition_speech_recording_id
)
if audio_data:
await queue_recording_audio(
audio_data,
sample_rate=self._audio_config.pipeline_sample_rate
if self._audio_config
else 16000,
queue_frame=self.task.queue_frame,
)
else:
logger.warning(
f"Failed to fetch transition audio {transition_speech_recording_id}"
)
elif transition_speech:
logger.info(f"Playing transition speech: {transition_speech}") logger.info(f"Playing transition speech: {transition_speech}")
self._queued_speech_mute_state = "waiting" self._queued_speech_mute_state = "waiting"
await self.task.queue_frame( await self.task.queue_frame(
@ -259,6 +290,8 @@ class PipecatEngine:
name: str, name: str,
transition_to_node: str, transition_to_node: str,
transition_speech: Optional[str] = None, transition_speech: Optional[str] = None,
transition_speech_type: Optional[str] = None,
transition_speech_recording_id: Optional[str] = None,
): ):
logger.debug( logger.debug(
f"Registering function {name} to transition to node {transition_to_node} with LLM" f"Registering function {name} to transition to node {transition_to_node} with LLM"
@ -266,7 +299,11 @@ class PipecatEngine:
# Create transition function # Create transition function
transition_func = await self._create_transition_func( transition_func = await self._create_transition_func(
name, transition_to_node, transition_speech name,
transition_to_node,
transition_speech,
transition_speech_type,
transition_speech_recording_id,
) )
# Register function with LLM # Register function with LLM
@ -442,6 +479,8 @@ class PipecatEngine:
outgoing_edge.get_function_name(), outgoing_edge.get_function_name(),
outgoing_edge.target, outgoing_edge.target,
outgoing_edge.transition_speech, outgoing_edge.transition_speech,
outgoing_edge.data.transition_speech_type,
outgoing_edge.data.transition_speech_recording_id,
) )
# Register custom tool handlers for this node # Register custom tool handlers for this node
@ -533,11 +572,27 @@ class PipecatEngine:
# Setup LLM Context with Prompts and Functions # Setup LLM Context with Prompts and Functions
await self._setup_llm_context(node) await self._setup_llm_context(node)
def get_start_greeting(self) -> Optional[str]: def get_start_greeting(self) -> Optional[tuple[str, Optional[str]]]:
"""Return the rendered greeting for the start node, or None if not configured.""" """Return the greeting info for the start node, or None if not configured.
Returns:
A tuple of (greeting_type, value) where:
- ("text", rendered_text) for text greetings spoken via TTS
- ("audio", recording_id) for pre-recorded audio greetings
Or None if no greeting is configured.
"""
start_node = self.workflow.nodes.get(self.workflow.start_node_id) start_node = self.workflow.nodes.get(self.workflow.start_node_id)
if start_node and start_node.greeting: if not start_node:
return self._format_prompt(start_node.greeting) return None
greeting_type = start_node.greeting_type or "text"
if greeting_type == "audio" and start_node.greeting_recording_id:
return ("audio", start_node.greeting_recording_id)
if start_node.greeting:
return ("text", self._format_prompt(start_node.greeting))
return None return None
async def _handle_end_node(self, node: Node) -> None: async def _handle_end_node(self, node: Node) -> None:
@ -698,6 +753,10 @@ class PipecatEngine:
"""Set the audio configuration for the pipeline.""" """Set the audio configuration for the pipeline."""
self._audio_config = audio_config self._audio_config = audio_config
def set_fetch_recording_audio(self, fetch_fn) -> None:
"""Set the recording audio fetcher callback."""
self._fetch_recording_audio = fetch_fn
def set_mute_pipeline(self, mute: bool) -> None: def set_mute_pipeline(self, mute: bool) -> None:
"""Set the pipeline mute state. """Set the pipeline mute state.

View file

@ -16,6 +16,7 @@ from loguru import logger
from api.db import db_client from api.db import db_client
from api.enums import ToolCategory, WorkflowRunMode from api.enums import ToolCategory, WorkflowRunMode
from api.services.pipecat.recording_playback import queue_recording_audio
from api.services.telephony.call_transfer_manager import get_call_transfer_manager from api.services.telephony.call_transfer_manager import get_call_transfer_manager
from api.services.telephony.factory import get_telephony_provider from api.services.telephony.factory import get_telephony_provider
from api.services.telephony.transfer_event_protocol import TransferContext from api.services.telephony.transfer_event_protocol import TransferContext
@ -77,6 +78,42 @@ class CustomToolManager:
self._engine = engine self._engine = engine
self._organization_id: Optional[int] = None self._organization_id: Optional[int] = None
async def _play_config_message(
self, config: dict, *, append_to_context: bool = False
) -> bool:
"""Play a message from tool config — text or pre-recorded audio.
Returns True if a message was queued, False otherwise.
"""
message_type = config.get("messageType", "none")
if message_type == "audio":
recording_id = config.get("audioRecordingId", "")
if recording_id and self._engine._fetch_recording_audio:
audio_data = await self._engine._fetch_recording_audio(recording_id)
if audio_data:
await queue_recording_audio(
audio_data,
sample_rate=self._engine._audio_config.pipeline_sample_rate
if self._engine._audio_config
else 16000,
queue_frame=self._engine.task.queue_frame,
)
return True
else:
logger.warning(f"Failed to fetch recording {recording_id}")
return False
if message_type == "custom":
custom_message = config.get("customMessage", "")
if custom_message:
await self._engine.task.queue_frame(
TTSSpeakFrame(custom_message, append_to_context=append_to_context)
)
return True
return False
async def get_organization_id(self) -> Optional[int]: async def get_organization_id(self) -> Optional[int]:
"""Get and cache the organization ID from workflow run.""" """Get and cache the organization ID from workflow run."""
if self._organization_id is None: if self._organization_id is None:
@ -250,9 +287,29 @@ class CustomToolManager:
try: try:
# Queue custom message before executing the API call # Queue custom message before executing the API call
# Queue custom message (text or audio) before executing the API call
config = tool.definition.get("config", {}) if tool.definition else {} config = tool.definition.get("config", {}) if tool.definition else {}
custom_msg_type = config.get("customMessageType", "text")
custom_message = config.get("customMessage", "") custom_message = config.get("customMessage", "")
if custom_message: if custom_msg_type == "audio":
recording_id = config.get("customMessageRecordingId", "")
if recording_id and self._engine._fetch_recording_audio:
logger.info(
f"Playing audio message before HTTP tool: {recording_id}"
)
self._engine._queued_speech_mute_state = "waiting"
audio_data = await self._engine._fetch_recording_audio(
recording_id
)
if audio_data:
await queue_recording_audio(
audio_data,
sample_rate=self._engine._audio_config.pipeline_sample_rate
if self._engine._audio_config
else 16000,
queue_frame=self._engine.task.queue_frame,
)
elif custom_message:
logger.info( logger.info(
f"Playing custom message before HTTP tool: {custom_message}" f"Playing custom message before HTTP tool: {custom_message}"
) )
@ -299,8 +356,6 @@ class CustomToolManager:
try: try:
# Get the end call configuration # Get the end call configuration
config = tool.definition.get("config", {}) config = tool.definition.get("config", {})
message_type = config.get("messageType", "none")
custom_message = config.get("customMessage", "")
# Handle end call reason if enabled # Handle end call reason if enabled
end_call_reason_enabled = config.get("endCallReason", False) end_call_reason_enabled = config.get("endCallReason", False)
@ -322,10 +377,8 @@ class CustomToolManager:
properties=properties, properties=properties,
) )
if message_type == "custom" and custom_message: played = await self._play_config_message(config)
# Queue the custom message to be spoken if played:
logger.info(f"Playing custom goodbye message: {custom_message}")
await self._engine.task.queue_frame(TTSSpeakFrame(custom_message))
# End the call after the message (not immediately) # End the call after the message (not immediately)
await self._engine.end_call_with_reason( await self._engine.end_call_with_reason(
EndTaskReason.END_CALL_TOOL_REASON.value, EndTaskReason.END_CALL_TOOL_REASON.value,
@ -370,8 +423,6 @@ class CustomToolManager:
# Get the transfer call configuration # Get the transfer call configuration
config = tool.definition.get("config", {}) config = tool.definition.get("config", {})
destination = config.get("destination", "") destination = config.get("destination", "")
message_type = config.get("messageType", "none")
custom_message = config.get("customMessage", "")
timeout_seconds = config.get( timeout_seconds = config.get(
"timeout", 30 "timeout", 30
) # Default 30 seconds if not configured ) # Default 30 seconds if not configured
@ -443,10 +494,9 @@ class CustomToolManager:
) )
return return
if message_type == "custom" and custom_message: played = await self._play_config_message(config)
logger.info(f"Playing pre-transfer message: {custom_message}") if played:
self._engine._queued_speech_mute_state = "waiting" self._engine._queued_speech_mute_state = "waiting"
await self._engine.task.queue_frame(TTSSpeakFrame(custom_message))
# Get organization ID for provider configuration # Get organization ID for provider configuration
organization_id = await self.get_organization_id() organization_id = await self.get_organization_id()

View file

@ -77,6 +77,8 @@ class Node:
self.extraction_variables = data.extraction_variables self.extraction_variables = data.extraction_variables
self.add_global_prompt = data.add_global_prompt self.add_global_prompt = data.add_global_prompt
self.greeting = data.greeting self.greeting = data.greeting
self.greeting_type = data.greeting_type
self.greeting_recording_id = data.greeting_recording_id
self.detect_voicemail = data.detect_voicemail self.detect_voicemail = data.detect_voicemail
self.delayed_start = data.delayed_start self.delayed_start = data.delayed_start
self.delayed_start_duration = data.delayed_start_duration self.delayed_start_duration = data.delayed_start_duration

View file

@ -0,0 +1,587 @@
"""Tests for text and audio playback in greetings, transitions, and tool messages.
Verifies that:
- Text mode produces TTSSpeakFrame
- Audio mode produces TTSStartedFrame -> TTSAudioRawFrame -> TTSStoppedFrame
- Covers: start node greetings, edge transition speech, tool config messages
"""
import asyncio
from typing import Any, Dict, List
from unittest.mock import AsyncMock, Mock, patch
import pytest
from api.services.workflow.dto import (
EdgeDataDTO,
NodeDataDTO,
NodeType,
Position,
ReactFlowDTO,
RFEdgeDTO,
RFNodeDTO,
)
from api.services.workflow.pipecat_engine import PipecatEngine
from api.services.workflow.pipecat_engine_custom_tools import CustomToolManager
from api.services.workflow.workflow import WorkflowGraph
from pipecat.frames.frames import (
Frame,
LLMContextFrame,
TTSAudioRawFrame,
TTSSpeakFrame,
TTSStartedFrame,
TTSStoppedFrame,
)
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
from pipecat.processors.aggregators.llm_context import LLMContext
from pipecat.processors.aggregators.llm_response_universal import (
LLMAssistantAggregatorParams,
LLMContextAggregatorPair,
)
from pipecat.tests import MockLLMService, MockTTSService
from pipecat.tests.mock_transport import MockTransport
from pipecat.transports.base_transport import TransportParams
# ─── Constants ──────────────────────────────────────────────────
START_PROMPT = "Start Call System Prompt"
END_PROMPT = "End Call System Prompt"
TEXT_GREETING = "Hello, welcome to our service!"
TEXT_TRANSITION = "Thank you for calling, goodbye!"
AUDIO_GREETING_ID = "rec-greeting-001"
AUDIO_TRANSITION_ID = "rec-transition-001"
FAKE_PCM_AUDIO = b"\x00\x01" * 1000 # Fake 16-bit mono PCM data
# ─── Fixtures ───────────────────────────────────────────────────
@pytest.fixture
def text_workflow() -> WorkflowGraph:
"""Start->End workflow with text greeting and text transition speech."""
dto = ReactFlowDTO(
nodes=[
RFNodeDTO(
id="start",
type=NodeType.startNode,
position=Position(x=0, y=0),
data=NodeDataDTO(
name="Start Call",
prompt=START_PROMPT,
is_start=True,
allow_interrupt=False,
add_global_prompt=False,
greeting=TEXT_GREETING,
greeting_type="text",
extraction_enabled=False,
),
),
RFNodeDTO(
id="end",
type=NodeType.endNode,
position=Position(x=0, y=200),
data=NodeDataDTO(
name="End Call",
prompt=END_PROMPT,
is_end=True,
allow_interrupt=False,
add_global_prompt=False,
extraction_enabled=False,
),
),
],
edges=[
RFEdgeDTO(
id="start-end",
source="start",
target="end",
data=EdgeDataDTO(
label="End Call",
condition="When the user says end the call",
transition_speech=TEXT_TRANSITION,
transition_speech_type="text",
),
),
],
)
return WorkflowGraph(dto)
@pytest.fixture
def audio_workflow() -> WorkflowGraph:
"""Start->End workflow with audio greeting and audio transition speech."""
dto = ReactFlowDTO(
nodes=[
RFNodeDTO(
id="start",
type=NodeType.startNode,
position=Position(x=0, y=0),
data=NodeDataDTO(
name="Start Call",
prompt=START_PROMPT,
is_start=True,
allow_interrupt=False,
add_global_prompt=False,
greeting_type="audio",
greeting_recording_id=AUDIO_GREETING_ID,
extraction_enabled=False,
),
),
RFNodeDTO(
id="end",
type=NodeType.endNode,
position=Position(x=0, y=200),
data=NodeDataDTO(
name="End Call",
prompt=END_PROMPT,
is_end=True,
allow_interrupt=False,
add_global_prompt=False,
extraction_enabled=False,
),
),
],
edges=[
RFEdgeDTO(
id="start-end",
source="start",
target="end",
data=EdgeDataDTO(
label="End Call",
condition="When the user says end the call",
transition_speech_type="audio",
transition_speech_recording_id=AUDIO_TRANSITION_ID,
),
),
],
)
return WorkflowGraph(dto)
# ─── Pipeline Helper ────────────────────────────────────────────
async def run_pipeline_and_capture_frames(
workflow: WorkflowGraph,
functions: List[Dict[str, Any]],
fetch_recording_audio=None,
num_text_steps: int = 1,
) -> tuple[MockLLMService, LLMContext, list[Frame]]:
"""Run a pipeline with mock tool calls and capture frames queued via task.queue_frame.
Returns:
Tuple of (llm, context, list of captured frames).
"""
first_step_chunks = MockLLMService.create_multiple_function_call_chunks(functions)
mock_steps = MockLLMService.create_multi_step_responses(
first_step_chunks, num_text_steps=num_text_steps, step_prefix="Response"
)
llm = MockLLMService(mock_steps=mock_steps, chunk_delay=0.001)
tts = MockTTSService(mock_audio_duration_ms=40, frame_delay=0)
mock_transport = MockTransport(
params=TransportParams(
audio_in_enabled=True,
audio_out_enabled=True,
audio_in_sample_rate=16000,
audio_out_sample_rate=16000,
),
)
context = LLMContext()
assistant_params = LLMAssistantAggregatorParams(expect_stripped_words=True)
context_aggregator = LLMContextAggregatorPair(
context, assistant_params=assistant_params
)
engine = PipecatEngine(
llm=llm,
context=context,
workflow=workflow,
call_context_vars={"customer_name": "Test User"},
workflow_run_id=1,
)
if fetch_recording_audio:
engine.set_fetch_recording_audio(fetch_recording_audio)
pipeline = Pipeline(
[llm, tts, mock_transport.output(), context_aggregator.assistant()]
)
task = PipelineTask(pipeline, params=PipelineParams(), enable_rtvi=False)
engine.set_task(task)
# Spy on task.queue_frame to capture all frames queued by the engine
queued_frames: list[Frame] = []
original_queue_frame = task.queue_frame
async def capturing_queue_frame(frame):
queued_frames.append(frame)
await original_queue_frame(frame)
task.queue_frame = capturing_queue_frame
with (
patch(
"api.services.workflow.pipecat_engine.get_organization_id_from_workflow_run",
new_callable=AsyncMock,
return_value=1,
),
patch(
"api.services.workflow.pipecat_engine.apply_disposition_mapping",
new_callable=AsyncMock,
return_value="completed",
),
):
runner = PipelineRunner()
async def run():
await runner.run(task)
async def initialize():
await asyncio.sleep(0.01)
await engine.initialize()
await engine.set_node(engine.workflow.start_node_id)
await engine.llm.queue_frame(LLMContextFrame(engine.context))
await asyncio.gather(run(), initialize())
return llm, context, queued_frames
# ─── Tests: Start Greeting ──────────────────────────────────────
class TestStartGreeting:
"""Unit tests for PipecatEngine.get_start_greeting()."""
def test_text_greeting_returns_text_tuple(self, text_workflow: WorkflowGraph):
"""Text greeting config should return ('text', rendered_text)."""
engine = PipecatEngine(
workflow=text_workflow,
call_context_vars={},
workflow_run_id=1,
)
result = engine.get_start_greeting()
assert result == ("text", TEXT_GREETING)
def test_audio_greeting_returns_audio_tuple(self, audio_workflow: WorkflowGraph):
"""Audio greeting config should return ('audio', recording_id)."""
engine = PipecatEngine(
workflow=audio_workflow,
call_context_vars={},
workflow_run_id=1,
)
result = engine.get_start_greeting()
assert result == ("audio", AUDIO_GREETING_ID)
def test_no_greeting_returns_none(self):
"""No greeting configured should return None."""
dto = ReactFlowDTO(
nodes=[
RFNodeDTO(
id="start",
type=NodeType.startNode,
position=Position(x=0, y=0),
data=NodeDataDTO(
name="Start",
prompt="Prompt",
is_start=True,
add_global_prompt=False,
extraction_enabled=False,
),
),
RFNodeDTO(
id="end",
type=NodeType.endNode,
position=Position(x=0, y=200),
data=NodeDataDTO(
name="End",
prompt="End",
is_end=True,
add_global_prompt=False,
extraction_enabled=False,
),
),
],
edges=[
RFEdgeDTO(
id="e",
source="start",
target="end",
data=EdgeDataDTO(label="End", condition="End"),
),
],
)
engine = PipecatEngine(
workflow=WorkflowGraph(dto),
call_context_vars={},
workflow_run_id=1,
)
assert engine.get_start_greeting() is None
def test_text_greeting_renders_template_variables(self):
"""Text greeting with {{variable}} placeholders should be rendered."""
dto = ReactFlowDTO(
nodes=[
RFNodeDTO(
id="start",
type=NodeType.startNode,
position=Position(x=0, y=0),
data=NodeDataDTO(
name="Start",
prompt="Prompt",
is_start=True,
add_global_prompt=False,
greeting="Hello {{customer_name}}!",
greeting_type="text",
extraction_enabled=False,
),
),
RFNodeDTO(
id="end",
type=NodeType.endNode,
position=Position(x=0, y=200),
data=NodeDataDTO(
name="End",
prompt="End",
is_end=True,
add_global_prompt=False,
extraction_enabled=False,
),
),
],
edges=[
RFEdgeDTO(
id="e",
source="start",
target="end",
data=EdgeDataDTO(label="End", condition="End"),
),
],
)
engine = PipecatEngine(
workflow=WorkflowGraph(dto),
call_context_vars={"customer_name": "Alice"},
workflow_run_id=1,
)
result = engine.get_start_greeting()
assert result == ("text", "Hello Alice!")
# ─── Tests: Transition Speech (Pipeline) ────────────────────────
class TestTransitionSpeech:
"""Pipeline tests for edge transition speech (text and audio)."""
@pytest.mark.asyncio
async def test_text_transition_queues_tts_speak_frame(
self, text_workflow: WorkflowGraph
):
"""Text transition speech should queue a TTSSpeakFrame with the message."""
functions = [
{
"name": "end_call",
"arguments": {},
"tool_call_id": "call_transition",
},
]
llm, context, queued_frames = await run_pipeline_and_capture_frames(
workflow=text_workflow,
functions=functions,
num_text_steps=2,
)
# Pipeline completes: 1st gen on StartNode, 2nd gen on EndNode
assert llm.get_current_step() == 2
# Verify TTSSpeakFrame was queued with the transition speech text
tts_speak_frames = [f for f in queued_frames if isinstance(f, TTSSpeakFrame)]
transition_frames = [f for f in tts_speak_frames if f.text == TEXT_TRANSITION]
assert len(transition_frames) == 1, (
f"Expected one TTSSpeakFrame with text '{TEXT_TRANSITION}', "
f"got: {[f.text for f in tts_speak_frames]}"
)
# No raw audio frames should be queued for text transition
audio_raw = [f for f in queued_frames if isinstance(f, TTSAudioRawFrame)]
assert len(audio_raw) == 0
@pytest.mark.asyncio
async def test_audio_transition_queues_audio_frames(
self, audio_workflow: WorkflowGraph
):
"""Audio transition speech should queue TTSStarted + TTSAudioRaw + TTSStopped."""
functions = [
{
"name": "end_call",
"arguments": {},
"tool_call_id": "call_transition",
},
]
mock_fetch = AsyncMock(return_value=FAKE_PCM_AUDIO)
llm, context, queued_frames = await run_pipeline_and_capture_frames(
workflow=audio_workflow,
functions=functions,
fetch_recording_audio=mock_fetch,
num_text_steps=2,
)
# Pipeline completes
assert llm.get_current_step() == 2
# Verify fetch was called with the correct recording ID
mock_fetch.assert_called_once_with(AUDIO_TRANSITION_ID)
# Verify the three-frame audio sequence was queued
started = [f for f in queued_frames if isinstance(f, TTSStartedFrame)]
audio = [f for f in queued_frames if isinstance(f, TTSAudioRawFrame)]
stopped = [f for f in queued_frames if isinstance(f, TTSStoppedFrame)]
assert len(started) >= 1, (
f"Expected TTSStartedFrame. "
f"Frame types: {[type(f).__name__ for f in queued_frames]}"
)
assert len(audio) >= 1, "Expected TTSAudioRawFrame"
assert len(stopped) >= 1, "Expected TTSStoppedFrame"
# Verify audio content
assert audio[0].audio == FAKE_PCM_AUDIO
assert audio[0].sample_rate == 16000
assert audio[0].num_channels == 1
# Verify context_id consistency across the three frames
ctx_id = started[0].context_id
assert ctx_id is not None
assert audio[0].context_id == ctx_id
assert stopped[0].context_id == ctx_id
# No TTSSpeakFrame should be queued for audio transition
speak = [f for f in queued_frames if isinstance(f, TTSSpeakFrame)]
assert len(speak) == 0
# ─── Tests: Tool Config Messages ────────────────────────────────
class TestPlayConfigMessage:
"""Unit tests for CustomToolManager._play_config_message."""
@pytest.fixture
def mock_engine(self):
"""Create a mock engine with frame capture on task.queue_frame."""
engine = Mock()
engine._workflow_run_id = 1
engine._call_context_vars = {}
engine._fetch_recording_audio = None
engine._audio_config = None
engine.task = Mock()
engine.llm = Mock()
# Capture frames queued via task.queue_frame
engine._queued_frames = []
async def mock_queue_frame(frame):
engine._queued_frames.append(frame)
engine.task.queue_frame = mock_queue_frame
return engine
@pytest.mark.asyncio
async def test_custom_text_queues_tts_speak_frame(self, mock_engine):
"""messageType='custom' queues TTSSpeakFrame with the message text."""
manager = CustomToolManager(mock_engine)
config = {"messageType": "custom", "customMessage": "Ending your call now."}
result = await manager._play_config_message(config)
assert result is True
frames = mock_engine._queued_frames
assert len(frames) == 1
assert isinstance(frames[0], TTSSpeakFrame)
assert frames[0].text == "Ending your call now."
@pytest.mark.asyncio
async def test_audio_queues_started_raw_stopped_frames(self, mock_engine):
"""messageType='audio' queues TTSStarted + TTSAudioRaw + TTSStopped."""
mock_fetch = AsyncMock(return_value=FAKE_PCM_AUDIO)
mock_engine._fetch_recording_audio = mock_fetch
manager = CustomToolManager(mock_engine)
config = {"messageType": "audio", "audioRecordingId": "rec-end-001"}
result = await manager._play_config_message(config)
assert result is True
mock_fetch.assert_called_once_with("rec-end-001")
frames = mock_engine._queued_frames
assert len(frames) == 3
assert isinstance(frames[0], TTSStartedFrame)
assert isinstance(frames[1], TTSAudioRawFrame)
assert isinstance(frames[2], TTSStoppedFrame)
# Verify audio content
assert frames[1].audio == FAKE_PCM_AUDIO
assert frames[1].sample_rate == 16000
assert frames[1].num_channels == 1
# Context IDs should match across all three frames
ctx_id = frames[0].context_id
assert ctx_id is not None
assert frames[1].context_id == ctx_id
assert frames[2].context_id == ctx_id
@pytest.mark.asyncio
async def test_none_message_type_returns_false(self, mock_engine):
"""messageType='none' returns False without queuing frames."""
manager = CustomToolManager(mock_engine)
result = await manager._play_config_message({"messageType": "none"})
assert result is False
assert len(mock_engine._queued_frames) == 0
@pytest.mark.asyncio
async def test_audio_without_fetch_callback_returns_false(self, mock_engine):
"""Audio without fetch_recording_audio callback returns False."""
mock_engine._fetch_recording_audio = None
manager = CustomToolManager(mock_engine)
config = {"messageType": "audio", "audioRecordingId": "rec-123"}
result = await manager._play_config_message(config)
assert result is False
assert len(mock_engine._queued_frames) == 0
@pytest.mark.asyncio
async def test_audio_with_failed_fetch_returns_false(self, mock_engine):
"""Audio with fetch returning None returns False."""
mock_fetch = AsyncMock(return_value=None)
mock_engine._fetch_recording_audio = mock_fetch
manager = CustomToolManager(mock_engine)
config = {"messageType": "audio", "audioRecordingId": "rec-123"}
result = await manager._play_config_message(config)
assert result is False
mock_fetch.assert_called_once_with("rec-123")
assert len(mock_engine._queued_frames) == 0
@pytest.mark.asyncio
async def test_custom_empty_message_returns_false(self, mock_engine):
"""messageType='custom' with empty message returns False."""
manager = CustomToolManager(mock_engine)
config = {"messageType": "custom", "customMessage": ""}
result = await manager._play_config_message(config)
assert result is False
assert len(mock_engine._queued_frames) == 0

View file

@ -3,25 +3,19 @@
import { useRef, useState } from 'react'; import { useRef, useState } from 'react';
import { toast } from 'sonner'; import { toast } from 'sonner';
import { getPresignedUploadUrlApiV1S3PresignedUploadUrlPost } from '@/client/sdk.gen';
import { Button } from '@/components/ui/button'; import { Button } from '@/components/ui/button';
import { Label } from '@/components/ui/label'; import { Label } from '@/components/ui/label';
import logger from '@/lib/logger'; import logger from '@/lib/logger';
interface CsvUploadSelectorProps { interface CsvUploadSelectorProps {
accessToken: string;
onFileUploaded: (fileKey: string, fileName: string) => void; onFileUploaded: (fileKey: string, fileName: string) => void;
selectedFileName?: string; selectedFileName?: string;
} }
interface PresignedUploadUrlResponse {
upload_url: string;
file_key: string;
expires_in: number;
}
const MAX_FILE_SIZE = 10 * 1024 * 1024; // 10MB const MAX_FILE_SIZE = 10 * 1024 * 1024; // 10MB
export default function CsvUploadSelector({ accessToken, onFileUploaded, selectedFileName }: CsvUploadSelectorProps) { export default function CsvUploadSelector({ onFileUploaded, selectedFileName }: CsvUploadSelectorProps) {
const [uploading, setUploading] = useState(false); const [uploading, setUploading] = useState(false);
const [uploadProgress, setUploadProgress] = useState(0); const [uploadProgress, setUploadProgress] = useState(0);
const fileInputRef = useRef<HTMLInputElement>(null); const fileInputRef = useRef<HTMLInputElement>(null);
@ -48,25 +42,18 @@ export default function CsvUploadSelector({ accessToken, onFileUploaded, selecte
try { try {
// Step 1: Request presigned upload URL // Step 1: Request presigned upload URL
logger.info('Requesting presigned upload URL for:', file.name); logger.info('Requesting presigned upload URL for:', file.name);
const presignedResponse = await fetch('/api/v1/s3/presigned-upload-url', { const { data: presignedData, error } = await getPresignedUploadUrlApiV1S3PresignedUploadUrlPost({
method: 'POST', body: {
headers: {
'Authorization': `Bearer ${accessToken}`,
'Content-Type': 'application/json',
},
body: JSON.stringify({
file_name: file.name, file_name: file.name,
file_size: file.size, file_size: file.size,
content_type: 'text/csv', content_type: 'text/csv',
}), },
}); });
if (!presignedResponse.ok) { if (error || !presignedData) {
const error = await presignedResponse.json(); throw new Error('Failed to get upload URL');
throw new Error(error.detail || 'Failed to get upload URL');
} }
const presignedData: PresignedUploadUrlResponse = await presignedResponse.json();
logger.info('Received presigned URL, uploading file...'); logger.info('Received presigned URL, uploading file...');
// Step 2: Upload file directly to S3/MinIO // Step 2: Upload file directly to S3/MinIO

View file

@ -415,7 +415,6 @@ export default function NewCampaignPage() {
/> />
) : ( ) : (
<CsvUploadSelector <CsvUploadSelector
accessToken={userAccessToken}
onFileUploaded={handleFileUploaded} onFileUploaded={handleFileUploaded}
selectedFileName={selectedFileName} selectedFileName={selectedFileName}
/> />

View file

@ -0,0 +1,382 @@
"use client";
import { AudioLines, Check, Pause, Pencil, Play, RefreshCw, Search, Trash2, X } from "lucide-react";
import { useCallback, useEffect, useMemo, useRef, useState } from "react";
import { toast } from "sonner";
import {
deleteRecordingApiV1WorkflowRecordingsRecordingIdDelete,
getWorkflowsSummaryApiV1WorkflowSummaryGet,
listRecordingsApiV1WorkflowRecordingsGet,
updateRecordingApiV1WorkflowRecordingsIdPatch,
} from "@/client/sdk.gen";
import type { RecordingResponseSchema, WorkflowSummaryResponse } from "@/client/types.gen";
import { Badge } from "@/components/ui/badge";
import { Button } from "@/components/ui/button";
import { Input } from "@/components/ui/input";
import {
Select,
SelectContent,
SelectItem,
SelectTrigger,
SelectValue,
} from "@/components/ui/select";
import { Skeleton } from "@/components/ui/skeleton";
import { useAudioPlayback } from "@/hooks/useAudioPlayback";
import logger from "@/lib/logger";
const ALL_VALUE = "__all__";
export default function RecordingsList() {
const [recordings, setRecordings] = useState<RecordingResponseSchema[]>([]);
const [workflows, setWorkflows] = useState<WorkflowSummaryResponse[]>([]);
const [isLoading, setIsLoading] = useState(true);
const [searchQuery, setSearchQuery] = useState("");
const [error, setError] = useState<string | null>(null);
// Filters
const [selectedWorkflow, setSelectedWorkflow] = useState<string>(ALL_VALUE);
// Inline edit state
const [editingId, setEditingId] = useState<string | null>(null);
const [editValue, setEditValue] = useState("");
const { playingId, toggle: togglePlayback, stop: stopPlayback } = useAudioPlayback();
const hasFetchedWorkflows = useRef(false);
const workflowMap = useMemo(() => {
const map = new Map<number, string>();
for (const w of workflows) {
map.set(w.id, w.name);
}
return map;
}, [workflows]);
const fetchWorkflows = useCallback(async () => {
try {
const response = await getWorkflowsSummaryApiV1WorkflowSummaryGet();
if (response.data) {
setWorkflows(response.data);
}
} catch (err) {
logger.error("Error fetching workflows:", err);
}
}, []);
const fetchRecordings = useCallback(async () => {
try {
setIsLoading(true);
setError(null);
const response = await listRecordingsApiV1WorkflowRecordingsGet({
query: {
workflow_id: selectedWorkflow !== ALL_VALUE ? Number(selectedWorkflow) : undefined,
},
});
if (response.error || !response.data) {
throw new Error("Failed to fetch recordings");
}
setRecordings(response.data.recordings);
} catch (err) {
setError(err instanceof Error ? err.message : "Failed to fetch recordings");
logger.error("Error fetching recordings:", err);
} finally {
setIsLoading(false);
}
}, [selectedWorkflow]);
useEffect(() => {
if (!hasFetchedWorkflows.current) {
hasFetchedWorkflows.current = true;
fetchWorkflows();
}
}, [fetchWorkflows]);
useEffect(() => {
fetchRecordings();
}, [fetchRecordings]);
const handleDelete = async (recordingId: string) => {
if (!confirm("Are you sure you want to delete this recording?")) return;
try {
const response = await deleteRecordingApiV1WorkflowRecordingsRecordingIdDelete({
path: { recording_id: recordingId },
});
if (response.error) {
throw new Error("Failed to delete recording");
}
toast.success("Recording deleted");
fetchRecordings();
} catch (err) {
toast.error(err instanceof Error ? err.message : "Failed to delete recording");
logger.error("Error deleting recording:", err);
}
};
const handlePlay = async (rec: RecordingResponseSchema) => {
try {
await togglePlayback(rec.recording_id, rec.storage_key, rec.storage_backend);
} catch {
toast.error("Failed to play recording");
}
};
const startEditing = (rec: RecordingResponseSchema) => {
setEditingId(rec.recording_id);
setEditValue(rec.recording_id);
};
const cancelEditing = () => {
setEditingId(null);
setEditValue("");
};
const saveRecordingId = async (rec: RecordingResponseSchema) => {
const newId = editValue.trim();
if (!newId) {
toast.error("Recording ID cannot be empty");
return;
}
if (newId === rec.recording_id) {
cancelEditing();
return;
}
try {
const response = await updateRecordingApiV1WorkflowRecordingsIdPatch({
path: { id: rec.id },
body: { recording_id: newId },
});
if (response.error) {
const errData = response.error as { detail?: string };
throw new Error(errData?.detail || "Failed to update recording ID");
}
toast.success(`Recording ID updated to "${newId}"`);
cancelEditing();
fetchRecordings();
} catch (err) {
toast.error(err instanceof Error ? err.message : "Failed to update recording ID");
}
};
const formatDate = (dateString: string): string => {
const date = new Date(dateString);
return date.toLocaleDateString() + " " + date.toLocaleTimeString();
};
const filteredRecordings = recordings.filter((rec) => {
if (!searchQuery) return true;
const q = searchQuery.toLowerCase();
const filename = (rec.metadata?.original_filename as string) || "";
return (
filename.toLowerCase().includes(q) ||
rec.transcript.toLowerCase().includes(q) ||
rec.recording_id.toLowerCase().includes(q)
);
});
if (isLoading && recordings.length === 0) {
return (
<div className="space-y-4">
{[1, 2, 3].map((i) => (
<div key={i} className="flex items-center justify-between p-4 border rounded-lg">
<div className="space-y-2 flex-1">
<Skeleton className="h-4 w-48" />
<Skeleton className="h-3 w-64" />
</div>
<Skeleton className="h-8 w-24" />
</div>
))}
</div>
);
}
if (error) {
return (
<div className="p-4 bg-destructive/10 border border-destructive/20 rounded-lg text-destructive">
{error}
</div>
);
}
return (
<div className="space-y-4">
{/* Filter */}
<div className="max-w-xs">
<label className="text-xs text-muted-foreground mb-1 block">Voice Agent</label>
<Select value={selectedWorkflow} onValueChange={setSelectedWorkflow}>
<SelectTrigger className="h-9 text-sm">
<SelectValue placeholder="All agents" />
</SelectTrigger>
<SelectContent>
<SelectItem value={ALL_VALUE}>All agents</SelectItem>
{workflows.map((w) => (
<SelectItem key={w.id} value={String(w.id)}>
{w.name}
</SelectItem>
))}
</SelectContent>
</Select>
</div>
{/* Search and Refresh */}
<div className="flex items-center gap-4">
<div className="relative flex-1">
<Search className="absolute left-3 top-1/2 transform -translate-y-1/2 h-4 w-4 text-muted-foreground" />
<Input
placeholder="Search by filename, transcript, or ID..."
value={searchQuery}
onChange={(e) => setSearchQuery(e.target.value)}
className="pl-10"
/>
</div>
<Button
variant="outline"
size="icon"
onClick={() => { stopPlayback(); fetchRecordings(); }}
disabled={isLoading}
>
<RefreshCw className={`h-4 w-4 ${isLoading ? "animate-spin" : ""}`} />
</Button>
</div>
{/* Results count */}
<div className="text-sm text-muted-foreground">
{filteredRecordings.length} recording{filteredRecordings.length !== 1 ? "s" : ""}
{searchQuery && ` matching "${searchQuery}"`}
</div>
{/* Recordings List */}
{filteredRecordings.length === 0 ? (
<div className="text-center py-12">
<AudioLines className="w-12 h-12 text-muted-foreground mx-auto mb-4" />
<p className="text-muted-foreground">
{searchQuery
? "No recordings match your search"
: "No recordings found for the selected filters"}
</p>
</div>
) : (
<div className="space-y-3">
{filteredRecordings.map((rec) => {
const filename = (rec.metadata?.original_filename as string) || "";
const workflowName = workflowMap.get(rec.workflow_id);
const isEditing = editingId === rec.recording_id;
return (
<div
key={rec.recording_id}
className="flex items-center justify-between p-4 border rounded-lg hover:bg-muted/50 transition-colors"
>
<div className="flex items-center gap-4 flex-1 min-w-0">
<div className="w-10 h-10 rounded-lg bg-primary/10 flex items-center justify-center shrink-0">
<AudioLines className="w-5 h-5 text-primary" />
</div>
<div className="flex-1 min-w-0">
{/* Recording ID (editable) */}
<div className="flex items-center gap-2 mb-1">
{isEditing ? (
<div className="flex items-center gap-1">
<Input
value={editValue}
onChange={(e) => setEditValue(e.target.value)}
onKeyDown={(e) => {
if (e.key === "Enter") saveRecordingId(rec);
if (e.key === "Escape") cancelEditing();
}}
className="h-7 text-sm font-mono w-48"
maxLength={64}
autoFocus
/>
<Button
variant="ghost"
size="sm"
className="h-7 w-7 p-0"
onClick={() => saveRecordingId(rec)}
>
<Check className="w-3.5 h-3.5" />
</Button>
<Button
variant="ghost"
size="sm"
className="h-7 w-7 p-0"
onClick={cancelEditing}
>
<X className="w-3.5 h-3.5" />
</Button>
</div>
) : (
<div className="flex items-center gap-1.5 group">
<code className="text-sm font-mono bg-muted px-1.5 py-0.5 rounded truncate max-w-[250px]">
{rec.recording_id}
</code>
<Button
variant="ghost"
size="sm"
className="h-6 w-6 p-0 opacity-0 group-hover:opacity-100 transition-opacity"
onClick={() => startEditing(rec)}
>
<Pencil className="w-3 h-3" />
</Button>
</div>
)}
{workflowName && (
<Badge variant="outline" className="text-xs shrink-0">
{workflowName}
</Badge>
)}
</div>
{/* Filename */}
{filename && (
<p className="text-xs text-muted-foreground mb-0.5 truncate max-w-[300px]">
{filename}
</p>
)}
{/* Transcript */}
<p className="text-sm text-muted-foreground line-clamp-1 mb-1">
{rec.transcript}
</p>
<div className="flex items-center gap-3 text-xs text-muted-foreground flex-wrap">
<span>{rec.tts_provider}</span>
<span>{rec.tts_model}</span>
<span className="truncate max-w-[150px]">{rec.tts_voice_id}</span>
<span>{formatDate(rec.created_at)}</span>
</div>
</div>
</div>
<div className="flex items-center gap-1 shrink-0 ml-2">
<Button
variant="ghost"
size="sm"
onClick={() => handlePlay(rec)}
>
{playingId === rec.recording_id ? (
<Pause className="w-4 h-4" />
) : (
<Play className="w-4 h-4" />
)}
</Button>
<Button
variant="ghost"
size="sm"
onClick={() => handleDelete(rec.recording_id)}
className="text-destructive hover:text-destructive/90"
>
<Trash2 className="w-4 h-4" />
</Button>
</div>
</div>
);
})}
</div>
)}
</div>
);
}

View file

@ -0,0 +1,53 @@
"use client";
import { useEffect } from "react";
import { Card, CardContent, CardDescription, CardHeader, CardTitle } from "@/components/ui/card";
import { Skeleton } from "@/components/ui/skeleton";
import { useAuth } from "@/lib/auth";
import RecordingsList from "./RecordingsList";
export default function RecordingsPage() {
const { user, redirectToLogin, loading } = useAuth();
useEffect(() => {
if (!loading && !user) {
redirectToLogin();
}
}, [loading, user, redirectToLogin]);
if (loading || !user) {
return (
<div className="container mx-auto px-4 py-8">
<div className="space-y-4">
<Skeleton className="h-12 w-64" />
<Skeleton className="h-64 w-full" />
</div>
</div>
);
}
return (
<div className="container mx-auto px-4 py-8">
<div className="mb-8">
<h1 className="text-3xl font-bold mb-2">Recordings</h1>
<p className="text-muted-foreground">
View all audio recordings across your voice agents. Filter by agent, provider, model, or voice.
</p>
</div>
<Card>
<CardHeader>
<CardTitle>All Recordings</CardTitle>
<CardDescription>
Audio recordings scoped to your organization
</CardDescription>
</CardHeader>
<CardContent>
<RecordingsList />
</CardContent>
</Card>
</div>
);
}

View file

@ -2,6 +2,8 @@
import { AlertCircle } from "lucide-react"; import { AlertCircle } from "lucide-react";
import type { RecordingResponseSchema } from "@/client/types.gen";
import { RecordingSelect } from "@/components/flow/TextOrAudioInput";
import { Card, CardContent, CardDescription, CardHeader, CardTitle } from "@/components/ui/card"; import { Card, CardContent, CardDescription, CardHeader, CardTitle } from "@/components/ui/card";
import { Input } from "@/components/ui/input"; import { Input } from "@/components/ui/input";
import { Label } from "@/components/ui/label"; import { Label } from "@/components/ui/label";
@ -20,6 +22,9 @@ export interface EndCallToolConfigProps {
onMessageTypeChange: (messageType: EndCallMessageType) => void; onMessageTypeChange: (messageType: EndCallMessageType) => void;
customMessage: string; customMessage: string;
onCustomMessageChange: (message: string) => void; onCustomMessageChange: (message: string) => void;
audioRecordingId: string;
onAudioRecordingIdChange: (id: string) => void;
recordings?: RecordingResponseSchema[];
endCallReason: boolean; endCallReason: boolean;
onEndCallReasonChange: (enabled: boolean) => void; onEndCallReasonChange: (enabled: boolean) => void;
endCallReasonDescription: string; endCallReasonDescription: string;
@ -35,6 +40,9 @@ export function EndCallToolConfig({
onMessageTypeChange, onMessageTypeChange,
customMessage, customMessage,
onCustomMessageChange, onCustomMessageChange,
audioRecordingId,
onAudioRecordingIdChange,
recordings = [],
endCallReason, endCallReason,
onEndCallReasonChange, onEndCallReasonChange,
endCallReasonDescription, endCallReasonDescription,
@ -148,6 +156,24 @@ export function EndCallToolConfig({
/> />
</div> </div>
)} )}
<div className="flex items-start space-x-3 p-3 border rounded-lg hover:bg-muted/50">
<RadioGroupItem value="audio" id="audio" className="mt-1" />
<label htmlFor="audio" className="flex-1 space-y-2 cursor-pointer">
<span className="font-medium">Pre-recorded Audio</span>
<p className="text-xs text-muted-foreground">
Play a pre-recorded audio file before disconnecting
</p>
</label>
</div>
{messageType === "audio" && (
<div className="pl-8">
<RecordingSelect
value={audioRecordingId}
onChange={onAudioRecordingIdChange}
recordings={recordings}
/>
</div>
)}
</RadioGroup> </RadioGroup>
</div> </div>
</CardContent> </CardContent>

View file

@ -2,6 +2,8 @@
import { AlertCircle } from "lucide-react"; import { AlertCircle } from "lucide-react";
import type { RecordingResponseSchema } from "@/client/types.gen";
import { TextOrAudioInput } from "@/components/flow/TextOrAudioInput";
import { import {
CredentialSelector, CredentialSelector,
type HttpMethod, type HttpMethod,
@ -37,6 +39,11 @@ export interface HttpApiToolConfigProps {
onTimeoutMsChange: (timeout: number) => void; onTimeoutMsChange: (timeout: number) => void;
customMessage: string; customMessage: string;
onCustomMessageChange: (message: string) => void; onCustomMessageChange: (message: string) => void;
customMessageType: 'text' | 'audio';
onCustomMessageTypeChange: (type: 'text' | 'audio') => void;
customMessageRecordingId: string;
onCustomMessageRecordingIdChange: (id: string) => void;
recordings?: RecordingResponseSchema[];
} }
export function HttpApiToolConfig({ export function HttpApiToolConfig({
@ -58,6 +65,11 @@ export function HttpApiToolConfig({
onTimeoutMsChange, onTimeoutMsChange,
customMessage, customMessage,
onCustomMessageChange, onCustomMessageChange,
customMessageType,
onCustomMessageTypeChange,
customMessageRecordingId,
onCustomMessageRecordingIdChange,
recordings = [],
}: HttpApiToolConfigProps) { }: HttpApiToolConfigProps) {
return ( return (
<Card> <Card>
@ -136,18 +148,28 @@ export function HttpApiToolConfig({
<div className="grid gap-2 pt-4 border-t"> <div className="grid gap-2 pt-4 border-t">
<Label>Custom Message</Label> <Label>Custom Message</Label>
<Label className="text-xs text-muted-foreground"> <Label className="text-xs text-muted-foreground">
Optional message the AI will speak before executing this tool (e.g., &quot;Let me look that up for you&quot;) Optional message the AI will speak or play before executing this tool.
</Label> </Label>
<div className="flex items-start gap-2 rounded-md bg-amber-50 p-2 text-xs text-amber-700 border border-amber-200"> <TextOrAudioInput
<AlertCircle className="h-3.5 w-3.5 mt-0.5 shrink-0" /> type={customMessageType}
<span>This text is spoken as-is. For multilingual workflows, choose your phrasing carefully.</span> onTypeChange={onCustomMessageTypeChange}
</div> recordingId={customMessageRecordingId}
<Textarea onRecordingIdChange={onCustomMessageRecordingIdChange}
value={customMessage} recordings={recordings}
onChange={(e) => onCustomMessageChange(e.target.value)} >
placeholder="e.g., Let me check that for you, one moment please." <>
rows={2} <div className="flex items-start gap-2 rounded-md bg-amber-50 p-2 text-xs text-amber-700 border border-amber-200">
/> <AlertCircle className="h-3.5 w-3.5 mt-0.5 shrink-0" />
<span>This text is spoken as-is. For multilingual workflows, choose your phrasing carefully.</span>
</div>
<Textarea
value={customMessage}
onChange={(e) => onCustomMessageChange(e.target.value)}
placeholder="e.g., Let me check that for you, one moment please."
rows={2}
/>
</>
</TextOrAudioInput>
</div> </div>
</TabsContent> </TabsContent>

View file

@ -3,6 +3,8 @@
import { AlertCircle } from "lucide-react"; import { AlertCircle } from "lucide-react";
import {useState } from "react"; import {useState } from "react";
import type { RecordingResponseSchema } from "@/client/types.gen";
import { RecordingSelect } from "@/components/flow/TextOrAudioInput";
import { Card, CardContent, CardDescription, CardHeader, CardTitle } from "@/components/ui/card"; import { Card, CardContent, CardDescription, CardHeader, CardTitle } from "@/components/ui/card";
import { Input } from "@/components/ui/input"; import { Input } from "@/components/ui/input";
import { Label } from "@/components/ui/label"; import { Label } from "@/components/ui/label";
@ -22,6 +24,9 @@ export interface TransferCallToolConfigProps {
onMessageTypeChange: (messageType: EndCallMessageType) => void; onMessageTypeChange: (messageType: EndCallMessageType) => void;
customMessage: string; customMessage: string;
onCustomMessageChange: (message: string) => void; onCustomMessageChange: (message: string) => void;
audioRecordingId: string;
onAudioRecordingIdChange: (id: string) => void;
recordings?: RecordingResponseSchema[];
timeout?: number; // Make optional to match API type timeout?: number; // Make optional to match API type
onTimeoutChange: (timeout: number) => void; onTimeoutChange: (timeout: number) => void;
} }
@ -37,6 +42,9 @@ export function TransferCallToolConfig({
onMessageTypeChange, onMessageTypeChange,
customMessage, customMessage,
onCustomMessageChange, onCustomMessageChange,
audioRecordingId,
onAudioRecordingIdChange,
recordings = [],
timeout, timeout,
onTimeoutChange, onTimeoutChange,
}: TransferCallToolConfigProps) { }: TransferCallToolConfigProps) {
@ -181,6 +189,24 @@ export function TransferCallToolConfig({
/> />
</div> </div>
)} )}
<div className="flex items-start space-x-3 p-3 border rounded-lg hover:bg-muted/50">
<RadioGroupItem value="audio" id="audio" className="mt-1" />
<label htmlFor="audio" className="flex-1 space-y-2 cursor-pointer">
<span className="font-medium">Pre-recorded Audio</span>
<p className="text-xs text-muted-foreground">
Play a pre-recorded audio file before transferring
</p>
</label>
</div>
{messageType === "audio" && (
<div className="pl-8">
<RecordingSelect
value={audioRecordingId}
onChange={onAudioRecordingIdChange}
recordings={recordings}
/>
</div>
)}
</RadioGroup> </RadioGroup>
</div> </div>

View file

@ -75,6 +75,7 @@ export default function ToolDetailPage() {
const [endCallMessageType, setEndCallMessageType] = useState<EndCallMessageType>("none"); const [endCallMessageType, setEndCallMessageType] = useState<EndCallMessageType>("none");
const [endCallReason, setEndCallReason] = useState(false); const [endCallReason, setEndCallReason] = useState(false);
const [endCallReasonDescription, setEndCallReasonDescription] = useState(""); const [endCallReasonDescription, setEndCallReasonDescription] = useState("");
const [audioRecordingId, setAudioRecordingId] = useState("");
const handleEndCallReasonChange = (enabled: boolean) => { const handleEndCallReasonChange = (enabled: boolean) => {
setEndCallReason(enabled); setEndCallReason(enabled);
@ -87,6 +88,11 @@ export default function ToolDetailPage() {
const [transferDestination, setTransferDestination] = useState(""); const [transferDestination, setTransferDestination] = useState("");
const [transferMessageType, setTransferMessageType] = useState<EndCallMessageType>("none"); const [transferMessageType, setTransferMessageType] = useState<EndCallMessageType>("none");
const [transferTimeout, setTransferTimeout] = useState(30); const [transferTimeout, setTransferTimeout] = useState(30);
const [transferAudioRecordingId, setTransferAudioRecordingId] = useState("");
// HTTP API form state - custom message type
const [customMessageType, setCustomMessageType] = useState<'text' | 'audio'>('text');
const [customMessageRecordingId, setCustomMessageRecordingId] = useState("");
// Redirect if not authenticated // Redirect if not authenticated
useEffect(() => { useEffect(() => {
@ -132,11 +138,14 @@ export default function ToolDetailPage() {
if (config) { if (config) {
setEndCallMessageType(config.messageType || "none"); setEndCallMessageType(config.messageType || "none");
setCustomMessage(config.customMessage || ""); setCustomMessage(config.customMessage || "");
// eslint-disable-next-line @typescript-eslint/no-explicit-any
setAudioRecordingId((config as any).audioRecordingId || "");
setEndCallReason(config.endCallReason ?? false); setEndCallReason(config.endCallReason ?? false);
setEndCallReasonDescription(config.endCallReasonDescription || ""); setEndCallReasonDescription(config.endCallReasonDescription || "");
} else { } else {
setEndCallMessageType("none"); setEndCallMessageType("none");
setCustomMessage(""); setCustomMessage("");
setAudioRecordingId("");
setEndCallReason(false); setEndCallReason(false);
setEndCallReasonDescription(""); setEndCallReasonDescription("");
} }
@ -147,11 +156,14 @@ export default function ToolDetailPage() {
setTransferDestination(config.destination || ""); setTransferDestination(config.destination || "");
setTransferMessageType(config.messageType || "none"); setTransferMessageType(config.messageType || "none");
setCustomMessage(config.customMessage || ""); setCustomMessage(config.customMessage || "");
// eslint-disable-next-line @typescript-eslint/no-explicit-any
setTransferAudioRecordingId((config as any).audioRecordingId || "");
setTransferTimeout(config.timeout ?? 30); setTransferTimeout(config.timeout ?? 30);
} else { } else {
setTransferDestination(""); setTransferDestination("");
setTransferMessageType("none"); setTransferMessageType("none");
setCustomMessage(""); setCustomMessage("");
setTransferAudioRecordingId("");
setTransferTimeout(30); setTransferTimeout(30);
} }
} else { } else {
@ -163,6 +175,10 @@ export default function ToolDetailPage() {
setCredentialUuid(config.credential_uuid || ""); setCredentialUuid(config.credential_uuid || "");
setTimeoutMs(config.timeout_ms || 5000); setTimeoutMs(config.timeout_ms || 5000);
setCustomMessage(config.customMessage || ""); setCustomMessage(config.customMessage || "");
// eslint-disable-next-line @typescript-eslint/no-explicit-any
setCustomMessageType((config as any).customMessageType || "text");
// eslint-disable-next-line @typescript-eslint/no-explicit-any
setCustomMessageRecordingId((config as any).customMessageRecordingId || "");
// Convert headers object to array // Convert headers object to array
if (config.headers) { if (config.headers) {
@ -259,6 +275,7 @@ export default function ToolDetailPage() {
config: { config: {
messageType: endCallMessageType, messageType: endCallMessageType,
customMessage: endCallMessageType === "custom" ? customMessage : undefined, customMessage: endCallMessageType === "custom" ? customMessage : undefined,
audioRecordingId: endCallMessageType === "audio" ? audioRecordingId || undefined : undefined,
endCallReason, endCallReason,
endCallReasonDescription: endCallReason ? endCallReasonDescription || undefined : undefined, endCallReasonDescription: endCallReason ? endCallReasonDescription || undefined : undefined,
}, },
@ -276,6 +293,7 @@ export default function ToolDetailPage() {
destination: transferDestination, destination: transferDestination,
messageType: transferMessageType, messageType: transferMessageType,
customMessage: transferMessageType === "custom" ? customMessage : undefined, customMessage: transferMessageType === "custom" ? customMessage : undefined,
audioRecordingId: transferMessageType === "audio" ? transferAudioRecordingId || undefined : undefined,
timeout: transferTimeout, timeout: transferTimeout,
}, },
}, },
@ -306,7 +324,9 @@ export default function ToolDetailPage() {
parameters: parameters:
validParameters.length > 0 ? validParameters : undefined, validParameters.length > 0 ? validParameters : undefined,
timeout_ms: timeoutMs, timeout_ms: timeoutMs,
customMessage: customMessage || undefined, customMessage: customMessageType === 'text' ? (customMessage || undefined) : undefined,
customMessageType,
customMessageRecordingId: customMessageType === 'audio' ? (customMessageRecordingId || undefined) : undefined,
}, },
}, },
}; };
@ -490,6 +510,8 @@ const data = await response.json();`;
onMessageTypeChange={setEndCallMessageType} onMessageTypeChange={setEndCallMessageType}
customMessage={customMessage} customMessage={customMessage}
onCustomMessageChange={setCustomMessage} onCustomMessageChange={setCustomMessage}
audioRecordingId={audioRecordingId}
onAudioRecordingIdChange={setAudioRecordingId}
endCallReason={endCallReason} endCallReason={endCallReason}
onEndCallReasonChange={handleEndCallReasonChange} onEndCallReasonChange={handleEndCallReasonChange}
endCallReasonDescription={endCallReasonDescription} endCallReasonDescription={endCallReasonDescription}
@ -507,6 +529,8 @@ const data = await response.json();`;
onMessageTypeChange={setTransferMessageType} onMessageTypeChange={setTransferMessageType}
customMessage={customMessage} customMessage={customMessage}
onCustomMessageChange={setCustomMessage} onCustomMessageChange={setCustomMessage}
audioRecordingId={transferAudioRecordingId}
onAudioRecordingIdChange={setTransferAudioRecordingId}
timeout={transferTimeout} timeout={transferTimeout}
onTimeoutChange={setTransferTimeout} onTimeoutChange={setTransferTimeout}
/> />
@ -530,6 +554,10 @@ const data = await response.json();`;
onTimeoutMsChange={setTimeoutMs} onTimeoutMsChange={setTimeoutMs}
customMessage={customMessage} customMessage={customMessage}
onCustomMessageChange={setCustomMessage} onCustomMessageChange={setCustomMessage}
customMessageType={customMessageType}
onCustomMessageTypeChange={setCustomMessageType}
customMessageRecordingId={customMessageRecordingId}
onCustomMessageRecordingIdChange={setCustomMessageRecordingId}
/> />
)} )}

View file

@ -14,7 +14,7 @@ import type {
export type ToolCategory = "http_api" | "end_call" | "transfer_call" | "calculator" | "native" | "integration"; export type ToolCategory = "http_api" | "end_call" | "transfer_call" | "calculator" | "native" | "integration";
export type EndCallMessageType = "none" | "custom"; export type EndCallMessageType = "none" | "custom" | "audio";
export interface ToolCategoryConfig { export interface ToolCategoryConfig {
value: ToolCategory; value: ToolCategory;

View file

@ -19,6 +19,7 @@ import { Separator } from "@/components/ui/separator";
import { Switch } from "@/components/ui/switch"; import { Switch } from "@/components/ui/switch";
import { Textarea } from "@/components/ui/textarea"; import { Textarea } from "@/components/ui/textarea";
import { SETTINGS_DOCUMENTATION_URLS } from "@/constants/documentation"; import { SETTINGS_DOCUMENTATION_URLS } from "@/constants/documentation";
import { UnsavedChangesProvider, useUnsavedChanges, useUnsavedChangesContext } from "@/context/UnsavedChangesContext";
import { useAudioPlayback } from "@/hooks/useAudioPlayback"; import { useAudioPlayback } from "@/hooks/useAudioPlayback";
import { useAuth } from "@/lib/auth"; import { useAuth } from "@/lib/auth";
import logger from "@/lib/logger"; import logger from "@/lib/logger";
@ -113,6 +114,21 @@ function GeneralSection({
const ambientFileInputRef = useRef<HTMLInputElement>(null); const ambientFileInputRef = useRef<HTMLInputElement>(null);
const { playingId, toggle: togglePlayback } = useAudioPlayback(); const { playingId, toggle: togglePlayback } = useAudioPlayback();
const isDirty = useMemo(() => {
const initAmbient = workflowConfigurations.ambient_noise_configuration || DEFAULT_AMBIENT_NOISE_CONFIG;
return (
name !== workflowName ||
JSON.stringify(ambientNoiseConfig) !== JSON.stringify(initAmbient) ||
maxCallDuration !== (workflowConfigurations.max_call_duration || 600) ||
maxUserIdleTimeout !== (workflowConfigurations.max_user_idle_timeout || 10) ||
smartTurnStopSecs !== (workflowConfigurations.smart_turn_stop_secs || 2) ||
turnStopStrategy !== (workflowConfigurations.turn_stop_strategy || "transcription") ||
contextCompactionEnabled !== (workflowConfigurations.context_compaction_enabled ?? false)
);
}, [name, workflowName, ambientNoiseConfig, maxCallDuration, maxUserIdleTimeout, smartTurnStopSecs, turnStopStrategy, contextCompactionEnabled, workflowConfigurations]);
useUnsavedChanges("general", isDirty);
const handleAmbientFileUpload = async (file: File) => { const handleAmbientFileUpload = async (file: File) => {
if (file.size > MAX_AMBIENT_NOISE_FILE_SIZE) { if (file.size > MAX_AMBIENT_NOISE_FILE_SIZE) {
setAudioUploadError(`File too large (${(file.size / (1024 * 1024)).toFixed(1)}MB). Maximum is 10MB.`); setAudioUploadError(`File too large (${(file.size / (1024 * 1024)).toFixed(1)}MB). Maximum is 10MB.`);
@ -463,8 +479,9 @@ function GeneralSection({
</div> </div>
</div> </div>
</CardContent> </CardContent>
<CardFooter className="justify-end border-t pt-6"> <CardFooter className="justify-end gap-3 border-t pt-6">
<Button onClick={handleSave} disabled={isSaving}> {isDirty && <span className="text-xs text-muted-foreground">Unsaved changes</span>}
<Button onClick={handleSave} disabled={isSaving || !isDirty}>
{isSaving ? "Saving..." : "Save General Settings"} {isSaving ? "Saving..." : "Save General Settings"}
</Button> </Button>
</CardFooter> </CardFooter>
@ -488,6 +505,13 @@ function TemplateVariablesSection({
const [newValue, setNewValue] = useState(""); const [newValue, setNewValue] = useState("");
const [isSaving, setIsSaving] = useState(false); const [isSaving, setIsSaving] = useState(false);
const isDirty = useMemo(() => {
const pendingVars = newKey && newValue ? { ...contextVars, [newKey]: newValue } : contextVars;
return JSON.stringify(pendingVars) !== JSON.stringify(templateContextVariables);
}, [contextVars, newKey, newValue, templateContextVariables]);
useUnsavedChanges("variables", isDirty);
const handleAdd = () => { const handleAdd = () => {
if (newKey && newValue) { if (newKey && newValue) {
setContextVars((prev) => ({ ...prev, [newKey]: newValue })); setContextVars((prev) => ({ ...prev, [newKey]: newValue }));
@ -578,8 +602,9 @@ function TemplateVariablesSection({
</Button> </Button>
</div> </div>
</CardContent> </CardContent>
<CardFooter className="justify-end border-t pt-6"> <CardFooter className="justify-end gap-3 border-t pt-6">
<Button onClick={handleSave} disabled={isSaving}> {isDirty && <span className="text-xs text-muted-foreground">Unsaved changes</span>}
<Button onClick={handleSave} disabled={isSaving || !isDirty}>
{isSaving ? "Saving..." : "Save Variables"} {isSaving ? "Saving..." : "Save Variables"}
</Button> </Button>
</CardFooter> </CardFooter>
@ -601,6 +626,10 @@ function DictionarySection({
const [dictionaryValue, setDictionaryValue] = useState(dictionary); const [dictionaryValue, setDictionaryValue] = useState(dictionary);
const [isSaving, setIsSaving] = useState(false); const [isSaving, setIsSaving] = useState(false);
const isDirty = dictionaryValue !== dictionary;
useUnsavedChanges("dictionary", isDirty);
const handleSave = async () => { const handleSave = async () => {
setIsSaving(true); setIsSaving(true);
try { try {
@ -633,8 +662,9 @@ function DictionarySection({
className="resize-none" className="resize-none"
/> />
</CardContent> </CardContent>
<CardFooter className="justify-end border-t pt-6"> <CardFooter className="justify-end gap-3 border-t pt-6">
<Button onClick={handleSave} disabled={isSaving}> {isDirty && <span className="text-xs text-muted-foreground">Unsaved changes</span>}
<Button onClick={handleSave} disabled={isSaving || !isDirty}>
{isSaving ? "Saving..." : "Save Dictionary"} {isSaving ? "Saving..." : "Save Dictionary"}
</Button> </Button>
</CardFooter> </CardFooter>
@ -669,6 +699,24 @@ function VoicemailSection({
const [longSpeechTimeout, setLongSpeechTimeout] = useState(getConfig().long_speech_timeout); const [longSpeechTimeout, setLongSpeechTimeout] = useState(getConfig().long_speech_timeout);
const [isSaving, setIsSaving] = useState(false); const [isSaving, setIsSaving] = useState(false);
const isDirty = useMemo(() => {
const init = {
...DEFAULT_VOICEMAIL_DETECTION_CONFIGURATION,
...workflowConfigurations.voicemail_detection,
};
return (
enabled !== init.enabled ||
useWorkflowLlm !== init.use_workflow_llm ||
provider !== (init.provider || "openai") ||
model !== (init.model || "gpt-4.1") ||
apiKey !== (init.api_key || "") ||
systemPrompt !== (init.system_prompt || DEFAULT_VOICEMAIL_SYSTEM_PROMPT) ||
longSpeechTimeout !== init.long_speech_timeout
);
}, [enabled, useWorkflowLlm, provider, model, apiKey, systemPrompt, longSpeechTimeout, workflowConfigurations]);
useUnsavedChanges("voicemail", isDirty);
const handleSave = async () => { const handleSave = async () => {
setIsSaving(true); setIsSaving(true);
try { try {
@ -772,8 +820,9 @@ function VoicemailSection({
</> </>
)} )}
</CardContent> </CardContent>
<CardFooter className="justify-end border-t pt-6"> <CardFooter className="justify-end gap-3 border-t pt-6">
<Button onClick={handleSave} disabled={isSaving}> {isDirty && <span className="text-xs text-muted-foreground">Unsaved changes</span>}
<Button onClick={handleSave} disabled={isSaving || !isDirty}>
{isSaving ? "Saving..." : "Save Voicemail Settings"} {isSaving ? "Saving..." : "Save Voicemail Settings"}
</Button> </Button>
</CardFooter> </CardFooter>
@ -848,8 +897,23 @@ function WorkflowSettingsContent({
}: { }: {
workflow: WorkflowResponse; workflow: WorkflowResponse;
user: { id: string; email?: string }; user: { id: string; email?: string };
}) {
return (
<UnsavedChangesProvider>
<WorkflowSettingsInner workflow={workflow} user={user} />
</UnsavedChangesProvider>
);
}
function WorkflowSettingsInner({
workflow,
user,
}: {
workflow: WorkflowResponse;
user: { id: string; email?: string };
}) { }) {
const router = useRouter(); const router = useRouter();
const { dirtySections, confirmNavigate } = useUnsavedChangesContext();
const [isRecordingsDialogOpen, setIsRecordingsDialogOpen] = useState(false); const [isRecordingsDialogOpen, setIsRecordingsDialogOpen] = useState(false);
const [isEmbedDialogOpen, setIsEmbedDialogOpen] = useState(false); const [isEmbedDialogOpen, setIsEmbedDialogOpen] = useState(false);
@ -921,7 +985,7 @@ function WorkflowSettingsContent({
<Button <Button
variant="ghost" variant="ghost"
size="icon" size="icon"
onClick={() => router.push(`/workflow/${workflowId}`)} onClick={() => confirmNavigate(() => router.push(`/workflow/${workflowId}`))}
> >
<ArrowLeft className="h-4 w-4" /> <ArrowLeft className="h-4 w-4" />
</Button> </Button>
@ -1047,13 +1111,16 @@ function WorkflowSettingsContent({
<a <a
key={item.id} key={item.id}
href={`#${item.id}`} href={`#${item.id}`}
className={`block rounded-md px-2 py-1 text-sm transition-colors hover:text-foreground ${ className={`flex items-center gap-1.5 rounded-md px-2 py-1 text-sm transition-colors hover:text-foreground ${
activeSection === item.id activeSection === item.id
? "font-medium text-foreground" ? "font-medium text-foreground"
: "text-muted-foreground" : "text-muted-foreground"
}`} }`}
> >
{item.label} {item.label}
{dirtySections.has(item.id) && (
<span className="h-1.5 w-1.5 rounded-full bg-orange-500" />
)}
</a> </a>
))} ))}
</div> </div>

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View file

@ -2230,6 +2230,20 @@ export type RecordingResponseSchema = {
is_active: boolean; is_active: boolean;
}; };
/**
* RecordingUpdateRequestSchema
*
* Request schema for updating a recording's ID.
*/
export type RecordingUpdateRequestSchema = {
/**
* Recording Id
*
* New descriptive recording ID
*/
recording_id: string;
};
/** /**
* RecordingUploadResponseSchema * RecordingUploadResponseSchema
* *
@ -8885,13 +8899,13 @@ export type ListRecordingsApiV1WorkflowRecordingsGetData = {
'X-API-Key'?: string | null; 'X-API-Key'?: string | null;
}; };
path?: never; path?: never;
query: { query?: {
/** /**
* Workflow Id * Workflow Id
* *
* Workflow ID * Filter by workflow ID
*/ */
workflow_id: number; workflow_id?: number | null;
/** /**
* Tts Provider * Tts Provider
* *
@ -9017,6 +9031,50 @@ export type DeleteRecordingApiV1WorkflowRecordingsRecordingIdDeleteResponses = {
200: unknown; 200: unknown;
}; };
export type UpdateRecordingApiV1WorkflowRecordingsIdPatchData = {
body: RecordingUpdateRequestSchema;
headers?: {
/**
* Authorization
*/
authorization?: string | null;
/**
* X-Api-Key
*/
'X-API-Key'?: string | null;
};
path: {
/**
* Id
*/
id: number;
};
query?: never;
url: '/api/v1/workflow-recordings/{id}';
};
export type UpdateRecordingApiV1WorkflowRecordingsIdPatchErrors = {
/**
* Not found
*/
404: unknown;
/**
* Validation Error
*/
422: HttpValidationError;
};
export type UpdateRecordingApiV1WorkflowRecordingsIdPatchError = UpdateRecordingApiV1WorkflowRecordingsIdPatchErrors[keyof UpdateRecordingApiV1WorkflowRecordingsIdPatchErrors];
export type UpdateRecordingApiV1WorkflowRecordingsIdPatchResponses = {
/**
* Successful Response
*/
200: RecordingResponseSchema;
};
export type UpdateRecordingApiV1WorkflowRecordingsIdPatchResponse = UpdateRecordingApiV1WorkflowRecordingsIdPatchResponses[keyof UpdateRecordingApiV1WorkflowRecordingsIdPatchResponses];
export type TranscribeAudioApiV1WorkflowRecordingsTranscribePostData = { export type TranscribeAudioApiV1WorkflowRecordingsTranscribePostData = {
body: BodyTranscribeAudioApiV1WorkflowRecordingsTranscribePost; body: BodyTranscribeAudioApiV1WorkflowRecordingsTranscribePost;
headers?: { headers?: {

View file

@ -0,0 +1,97 @@
import type { RecordingResponseSchema } from "@/client/types.gen";
import { Label } from "@/components/ui/label";
import { RadioGroup, RadioGroupItem } from "@/components/ui/radio-group";
import { Select, SelectContent, SelectItem, SelectTrigger, SelectValue } from "@/components/ui/select";
interface TextOrAudioInputProps {
type: 'text' | 'audio';
onTypeChange: (type: 'text' | 'audio') => void;
recordingId: string;
onRecordingIdChange: (id: string) => void;
recordings?: RecordingResponseSchema[];
/** Rendered when type === 'text' */
children: React.ReactNode;
}
export function TextOrAudioInput({
type,
onTypeChange,
recordingId,
onRecordingIdChange,
recordings = [],
children,
}: TextOrAudioInputProps) {
return (
<>
<RadioGroup
value={type}
onValueChange={(value) => onTypeChange(value as 'text' | 'audio')}
className="flex items-center gap-4"
>
<div className="flex items-center gap-2">
<RadioGroupItem value="text" id="toa-text" />
<Label htmlFor="toa-text" className="font-normal cursor-pointer">Text</Label>
</div>
<div className="flex items-center gap-2">
<RadioGroupItem value="audio" id="toa-audio" />
<Label htmlFor="toa-audio" className="font-normal cursor-pointer">Audio</Label>
</div>
</RadioGroup>
{type === 'text' ? (
children
) : (
<RecordingSelect
value={recordingId}
onChange={onRecordingIdChange}
recordings={recordings}
/>
)}
</>
);
}
interface RecordingSelectProps {
value: string;
onChange: (id: string) => void;
recordings: RecordingResponseSchema[];
}
/**
* Dropdown to select a pre-recorded audio file.
* Re-exported so callers that only need the dropdown (e.g. tool configs with
* their own none/custom/audio radio) can use it directly.
*/
export function RecordingSelect({ value, onChange, recordings }: RecordingSelectProps) {
return (
<div className="space-y-2">
<Label className="text-xs text-muted-foreground">
Select a pre-recorded audio file to play.
</Label>
<Select value={value} onValueChange={onChange}>
<SelectTrigger className="w-full">
<SelectValue placeholder="Select a recording" />
</SelectTrigger>
<SelectContent>
{recordings.length === 0 ? (
<SelectItem value="__empty__" disabled>
No recordings available
</SelectItem>
) : (
recordings.map((r) => (
<SelectItem key={r.recording_id} value={r.recording_id}>
<span className="truncate">
{(r.metadata?.original_filename as string) || r.recording_id}
</span>
{r.transcript && (
<span className="text-xs text-muted-foreground ml-2 truncate">
{r.transcript}
</span>
)}
</SelectItem>
))
)}
</SelectContent>
</Select>
</div>
);
}

View file

@ -4,6 +4,7 @@ import { useCallback, useEffect, useState } from 'react';
import { useWorkflow, useWorkflowOptional } from "@/app/workflow/[workflowId]/contexts/WorkflowContext"; import { useWorkflow, useWorkflowOptional } from "@/app/workflow/[workflowId]/contexts/WorkflowContext";
import { useWorkflowStore } from "@/app/workflow/[workflowId]/stores/workflowStore"; import { useWorkflowStore } from "@/app/workflow/[workflowId]/stores/workflowStore";
import { TextOrAudioInput } from "@/components/flow/TextOrAudioInput";
import { Button } from "@/components/ui/button"; import { Button } from "@/components/ui/button";
import { Dialog, DialogContent, DialogFooter, DialogHeader, DialogTitle } from "@/components/ui/dialog"; import { Dialog, DialogContent, DialogFooter, DialogHeader, DialogTitle } from "@/components/ui/dialog";
import { Input } from "@/components/ui/input"; import { Input } from "@/components/ui/input";
@ -24,9 +25,12 @@ interface EdgeDetailsDialogProps {
const EdgeDetailsDialog = ({ open, onOpenChange, data, onSave }: EdgeDetailsDialogProps) => { const EdgeDetailsDialog = ({ open, onOpenChange, data, onSave }: EdgeDetailsDialogProps) => {
const readOnly = useWorkflowOptional()?.readOnly ?? false; const readOnly = useWorkflowOptional()?.readOnly ?? false;
const { recordings } = useWorkflow();
const [condition, setCondition] = useState(data?.condition ?? ''); const [condition, setCondition] = useState(data?.condition ?? '');
const [label, setLabel] = useState(data?.label ?? ''); const [label, setLabel] = useState(data?.label ?? '');
const [transitionSpeech, setTransitionSpeech] = useState(data?.transition_speech ?? ''); const [transitionSpeech, setTransitionSpeech] = useState(data?.transition_speech ?? '');
const [transitionSpeechType, setTransitionSpeechType] = useState<'text' | 'audio'>(data?.transition_speech_type ?? 'text');
const [transitionSpeechRecordingId, setTransitionSpeechRecordingId] = useState(data?.transition_speech_recording_id ?? '');
// Update form state when data changes (e.g., from undo/redo) // Update form state when data changes (e.g., from undo/redo)
useEffect(() => { useEffect(() => {
@ -34,13 +38,21 @@ const EdgeDetailsDialog = ({ open, onOpenChange, data, onSave }: EdgeDetailsDial
setCondition(data?.condition ?? ''); setCondition(data?.condition ?? '');
setLabel(data?.label ?? ''); setLabel(data?.label ?? '');
setTransitionSpeech(data?.transition_speech ?? ''); setTransitionSpeech(data?.transition_speech ?? '');
setTransitionSpeechType(data?.transition_speech_type ?? 'text');
setTransitionSpeechRecordingId(data?.transition_speech_recording_id ?? '');
} }
}, [data, open]); }, [data, open]);
const handleSave = useCallback(() => { const handleSave = useCallback(() => {
onSave({ condition: condition, label: label, transition_speech: transitionSpeech || undefined }); onSave({
condition,
label,
transition_speech: transitionSpeechType === 'text' ? (transitionSpeech || undefined) : undefined,
transition_speech_type: transitionSpeechType,
transition_speech_recording_id: transitionSpeechType === 'audio' ? (transitionSpeechRecordingId || undefined) : undefined,
});
onOpenChange(false); onOpenChange(false);
}, [condition, label, transitionSpeech, onSave, onOpenChange]); }, [condition, label, transitionSpeech, transitionSpeechType, transitionSpeechRecordingId, onSave, onOpenChange]);
// Handle Cmd+S / Ctrl+S keyboard shortcut to save // Handle Cmd+S / Ctrl+S keyboard shortcut to save
useEffect(() => { useEffect(() => {
@ -99,18 +111,28 @@ const EdgeDetailsDialog = ({ open, onOpenChange, data, onSave }: EdgeDetailsDial
<div className="grid gap-2"> <div className="grid gap-2">
<Label>Transition Speech</Label> <Label>Transition Speech</Label>
<Label className="text-xs text-muted-foreground"> <Label className="text-xs text-muted-foreground">
Optional text the assistant will speak right before transitioning to the node. Optional text or audio the assistant will play right before transitioning to the node.
This text will not be attached in Conversation Context. Use this as simple filler to reduce latency. This will not be attached in Conversation Context. Use this as simple filler to reduce latency.
</Label> </Label>
<div className="flex items-start gap-2 rounded-md bg-amber-50 p-2 text-xs text-amber-700 border border-amber-200"> <TextOrAudioInput
<AlertCircle className="h-3.5 w-3.5 mt-0.5 shrink-0" /> type={transitionSpeechType}
<span>This text is spoken as-is. For multilingual workflows, choose your phrasing carefully.</span> onTypeChange={setTransitionSpeechType}
</div> recordingId={transitionSpeechRecordingId}
<Textarea onRecordingIdChange={setTransitionSpeechRecordingId}
value={transitionSpeech} recordings={recordings ?? []}
placeholder="e.g. Let me transfer you to our billing department..." >
onChange={(e) => setTransitionSpeech(e.target.value)} <>
/> <div className="flex items-start gap-2 rounded-md bg-amber-50 p-2 text-xs text-amber-700 border border-amber-200">
<AlertCircle className="h-3.5 w-3.5 mt-0.5 shrink-0" />
<span>This text is spoken as-is. For multilingual workflows, choose your phrasing carefully.</span>
</div>
<Textarea
value={transitionSpeech}
placeholder="e.g. Let me transfer you to our billing department..."
onChange={(e) => setTransitionSpeech(e.target.value)}
/>
</>
</TextOrAudioInput>
</div> </div>
</div> </div>
<DialogFooter> <DialogFooter>

View file

@ -8,6 +8,7 @@ import type { RecordingResponseSchema } from "@/client/types.gen";
import { DocumentBadges } from "@/components/flow/DocumentBadges"; import { DocumentBadges } from "@/components/flow/DocumentBadges";
import { DocumentSelector } from "@/components/flow/DocumentSelector"; import { DocumentSelector } from "@/components/flow/DocumentSelector";
import { MentionTextarea } from "@/components/flow/MentionTextarea"; import { MentionTextarea } from "@/components/flow/MentionTextarea";
import { TextOrAudioInput } from "@/components/flow/TextOrAudioInput";
import { ToolBadges } from "@/components/flow/ToolBadges"; import { ToolBadges } from "@/components/flow/ToolBadges";
import { ToolSelector } from "@/components/flow/ToolSelector"; import { ToolSelector } from "@/components/flow/ToolSelector";
import { ExtractionVariable, FlowNodeData } from "@/components/flow/types"; import { ExtractionVariable, FlowNodeData } from "@/components/flow/types";
@ -26,8 +27,12 @@ import { useNodeHandlers } from "./common/useNodeHandlers";
interface StartCallEditFormProps { interface StartCallEditFormProps {
nodeData: FlowNodeData; nodeData: FlowNodeData;
greetingType: 'text' | 'audio';
setGreetingType: (value: 'text' | 'audio') => void;
greeting: string; greeting: string;
setGreeting: (value: string) => void; setGreeting: (value: string) => void;
greetingRecordingId: string;
setGreetingRecordingId: (value: string) => void;
prompt: string; prompt: string;
setPrompt: (value: string) => void; setPrompt: (value: string) => void;
name: string; name: string;
@ -73,7 +78,9 @@ export const StartCall = memo(({ data, selected, id }: StartCallNodeProps) => {
const { saveWorkflow, tools, documents, recordings } = useWorkflow(); const { saveWorkflow, tools, documents, recordings } = useWorkflow();
// Form state // Form state
const [greetingType, setGreetingType] = useState<'text' | 'audio'>(data.greeting_type ?? "text");
const [greeting, setGreeting] = useState(data.greeting ?? ""); const [greeting, setGreeting] = useState(data.greeting ?? "");
const [greetingRecordingId, setGreetingRecordingId] = useState(data.greeting_recording_id ?? "");
const [prompt, setPrompt] = useState(data.prompt ?? ""); const [prompt, setPrompt] = useState(data.prompt ?? "");
const [name, setName] = useState(data.name); const [name, setName] = useState(data.name);
const [allowInterrupt, setAllowInterrupt] = useState(data.allow_interrupt ?? true); const [allowInterrupt, setAllowInterrupt] = useState(data.allow_interrupt ?? true);
@ -109,7 +116,9 @@ export const StartCall = memo(({ data, selected, id }: StartCallNodeProps) => {
handleSaveNodeData({ handleSaveNodeData({
...data, ...data,
greeting: greeting || undefined, greeting_type: greetingType,
greeting: greetingType === 'text' ? (greeting || undefined) : undefined,
greeting_recording_id: greetingType === 'audio' ? (greetingRecordingId || undefined) : undefined,
prompt, prompt,
name, name,
allow_interrupt: allowInterrupt, allow_interrupt: allowInterrupt,
@ -132,7 +141,9 @@ export const StartCall = memo(({ data, selected, id }: StartCallNodeProps) => {
// Reset form state when dialog opens // Reset form state when dialog opens
const handleOpenChange = (newOpen: boolean) => { const handleOpenChange = (newOpen: boolean) => {
if (newOpen) { if (newOpen) {
setGreetingType(data.greeting_type ?? "text");
setGreeting(data.greeting ?? ""); setGreeting(data.greeting ?? "");
setGreetingRecordingId(data.greeting_recording_id ?? "");
setPrompt(data.prompt ?? ""); setPrompt(data.prompt ?? "");
setName(data.name); setName(data.name);
setAllowInterrupt(data.allow_interrupt ?? true); setAllowInterrupt(data.allow_interrupt ?? true);
@ -154,7 +165,9 @@ export const StartCall = memo(({ data, selected, id }: StartCallNodeProps) => {
// Update form state when data changes (e.g., from undo/redo) // Update form state when data changes (e.g., from undo/redo)
useEffect(() => { useEffect(() => {
if (open) { if (open) {
setGreetingType(data.greeting_type ?? "text");
setGreeting(data.greeting ?? ""); setGreeting(data.greeting ?? "");
setGreetingRecordingId(data.greeting_recording_id ?? "");
setPrompt(data.prompt ?? ""); setPrompt(data.prompt ?? "");
setName(data.name); setName(data.name);
setAllowInterrupt(data.allow_interrupt ?? true); setAllowInterrupt(data.allow_interrupt ?? true);
@ -247,8 +260,12 @@ export const StartCall = memo(({ data, selected, id }: StartCallNodeProps) => {
{open && ( {open && (
<StartCallEditForm <StartCallEditForm
nodeData={data} nodeData={data}
greetingType={greetingType}
setGreetingType={setGreetingType}
greeting={greeting} greeting={greeting}
setGreeting={setGreeting} setGreeting={setGreeting}
greetingRecordingId={greetingRecordingId}
setGreetingRecordingId={setGreetingRecordingId}
prompt={prompt} prompt={prompt}
setPrompt={setPrompt} setPrompt={setPrompt}
name={name} name={name}
@ -288,8 +305,12 @@ export const StartCall = memo(({ data, selected, id }: StartCallNodeProps) => {
}); });
const StartCallEditForm = ({ const StartCallEditForm = ({
greetingType,
setGreetingType,
greeting, greeting,
setGreeting, setGreeting,
greetingRecordingId,
setGreetingRecordingId,
prompt, prompt,
setPrompt, setPrompt,
name, name,
@ -362,15 +383,22 @@ const StartCallEditForm = ({
<Label>Greeting</Label> <Label>Greeting</Label>
<Label className="text-xs text-muted-foreground"> <Label className="text-xs text-muted-foreground">
Optional greeting message played via TTS when the call starts. If set, this will be spoken directly instead of generating a response from the LLM. Supports template variables like {"{{variable_name}}"}. Optional greeting played when the call starts. Choose between a text message (spoken via TTS) or a pre-recorded audio file.
</Label> </Label>
<MentionTextarea <TextOrAudioInput
value={greeting} type={greetingType}
onChange={setGreeting} onTypeChange={setGreetingType}
className="min-h-[60px] max-h-[200px] resize-none overflow-y-auto" recordingId={greetingRecordingId}
placeholder="e.g. Hello {{first_name}}, this is Sarah calling from Acme Corp." onRecordingIdChange={setGreetingRecordingId}
recordings={recordings} recordings={recordings}
/> >
<Textarea
value={greeting}
onChange={(e) => setGreeting(e.target.value)}
className="min-h-[60px] max-h-[200px] resize-none overflow-y-auto"
placeholder="e.g. Hello {{first_name}}, this is Sarah calling from Acme Corp."
/>
</TextOrAudioInput>
<Label>Prompt</Label> <Label>Prompt</Label>
<Label className="text-xs text-muted-foreground"> <Label className="text-xs text-muted-foreground">

View file

@ -24,6 +24,8 @@ export type FlowNodeData = {
extraction_variables?: ExtractionVariable[]; extraction_variables?: ExtractionVariable[];
add_global_prompt?: boolean; add_global_prompt?: boolean;
greeting?: string; greeting?: string;
greeting_type?: 'text' | 'audio';
greeting_recording_id?: string;
wait_for_user_greeting?: boolean; wait_for_user_greeting?: boolean;
detect_voicemail?: boolean; detect_voicemail?: boolean;
delayed_start?: boolean; delayed_start?: boolean;
@ -79,6 +81,8 @@ export type FlowEdgeData = {
condition: string; condition: string;
label: string; label: string;
transition_speech?: string; transition_speech?: string;
transition_speech_type?: 'text' | 'audio';
transition_speech_recording_id?: string;
invalid?: boolean; invalid?: boolean;
validationMessage?: string | null; validationMessage?: string | null;
} }

View file

@ -2,6 +2,7 @@
import type { Team } from "@stackframe/stack"; import type { Team } from "@stackframe/stack";
import { import {
AudioLines,
Brain, Brain,
ChevronLeft, ChevronLeft,
ChevronRight, ChevronRight,
@ -135,6 +136,11 @@ export function AppSidebar() {
url: "/files", url: "/files",
icon: Database, icon: Database,
}, },
{
title: "Recordings",
url: "/recordings",
icon: AudioLines,
},
// { // {
// title: "Integrations", // title: "Integrations",
// url: "/integrations", // url: "/integrations",

View file

@ -0,0 +1,269 @@
"use client";
import { createContext, useCallback, useContext, useEffect, useLayoutEffect, useRef, useState } from "react";
import {
AlertDialog,
AlertDialogAction,
AlertDialogCancel,
AlertDialogContent,
AlertDialogDescription,
AlertDialogFooter,
AlertDialogHeader,
AlertDialogTitle,
} from "@/components/ui/alert-dialog";
// ---------------------------------------------------------------------------
// Context
// ---------------------------------------------------------------------------
interface UnsavedChangesContextValue {
register: (id: string, isDirty: boolean) => void;
unregister: (id: string) => void;
hasDirtyChanges: boolean;
dirtySections: Set<string>;
/** Wrap programmatic navigation (e.g. router.push) to guard against unsaved changes. */
confirmNavigate: (navigate: () => void) => void;
}
const UnsavedChangesContext = createContext<UnsavedChangesContextValue | null>(null);
// ---------------------------------------------------------------------------
// Provider
// ---------------------------------------------------------------------------
/**
* Wraps a page to guard against accidental navigation when sections have
* unsaved changes. Intercepts:
*
* - Browser back / forward (`popstate` with history-state tracking)
* - In-app link clicks (document-level click capture on `<a>` tags)
*
* Sections register via the `useUnsavedChanges` hook.
*/
export function UnsavedChangesProvider({ children }: { children: React.ReactNode }) {
const [dirtySections, setDirtySections] = useState<Set<string>>(new Set());
const [showDialog, setShowDialog] = useState(false);
const pendingNavigate = useRef<(() => void) | null>(null);
const hasDirtyChanges = dirtySections.size > 0;
const hasDirtyRef = useRef(hasDirtyChanges);
hasDirtyRef.current = hasDirtyChanges;
// -- Section registration ------------------------------------------------
const register = useCallback((id: string, isDirty: boolean) => {
setDirtySections((prev) => {
const next = new Set(prev);
if (isDirty) next.add(id);
else next.delete(id);
return next;
});
}, []);
const unregister = useCallback((id: string) => {
setDirtySections((prev) => {
if (!prev.has(id)) return prev;
const next = new Set(prev);
next.delete(id);
return next;
});
}, []);
// -- Helper: prompt or proceed -------------------------------------------
const askOrProceed = useCallback((proceed: () => void) => {
if (!hasDirtyRef.current) {
proceed();
return;
}
pendingNavigate.current = proceed;
setTimeout(() => setShowDialog(true), 0);
}, []);
// -- 1. Intercept <a> clicks in capture phase -----------------------------
//
// Next.js <Link> renders <a> tags. By listening in the capture phase we
// intercept the click before React / Next.js processes it. If the user
// confirms, we navigate programmatically via window.location.
useEffect(() => {
const handleClick = (e: MouseEvent) => {
if (!hasDirtyRef.current) return;
const target = e.target as HTMLElement;
const link = target.closest("a[href]") as HTMLAnchorElement | null;
if (!link) return;
const href = link.getAttribute("href");
if (!href) return;
// Skip external links
if (href.startsWith("http://") || href.startsWith("https://") || href.startsWith("//")) return;
// Skip hash-only links (in-page anchors)
if (href.startsWith("#")) return;
// Skip links that open in a new tab/window
if (link.target && link.target !== "_self") return;
// Skip download links
if (link.hasAttribute("download")) return;
// Skip if modifier keys are held (Ctrl+click, Cmd+click, etc.)
if (e.metaKey || e.ctrlKey || e.shiftKey || e.altKey) return;
// Skip non-left clicks
if (e.button !== 0) return;
// Block the navigation and ask the user
e.preventDefault();
e.stopPropagation();
e.stopImmediatePropagation();
askOrProceed(() => {
// Navigate after user confirms
window.location.href = href;
});
};
// Capture phase so we fire before React / Next.js handlers
document.addEventListener("click", handleClick, true);
return () => document.removeEventListener("click", handleClick, true);
}, [askOrProceed]);
// -- 3. Browser back / forward (`popstate`) ------------------------------
//
// When the browser fires popstate the URL has already changed. We push
// the current page back onto the stack to "undo" the navigation, then
// show the dialog. If confirmed, we call history.back() for real.
useLayoutEffect(() => {
// Track our own history stack index so we can correctly reverse
// back/forward regardless of how many entries deep we are.
let stackIndex = (history.state?.__unsaved_guard_index as number) ?? 0;
const originalPushState = history.pushState.bind(history);
const originalReplaceState = history.replaceState.bind(history);
// Augment pushState to track stack depth
history.pushState = function (state, unused, url) {
stackIndex++;
const augmented = { ...state, __unsaved_guard_index: stackIndex };
return originalPushState(augmented, unused, url);
};
history.replaceState = function (state, unused, url) {
const augmented = { ...state, __unsaved_guard_index: stackIndex };
return originalReplaceState(augmented, unused, url);
};
// Write initial index if not present
if (history.state?.__unsaved_guard_index == null) {
originalReplaceState(
{ ...history.state, __unsaved_guard_index: stackIndex },
"",
location.href,
);
}
const handlePopState = (e: PopStateEvent) => {
if (!hasDirtyRef.current) {
// Not dirty — accept navigation, update our tracked index
stackIndex = (e.state?.__unsaved_guard_index as number) ?? stackIndex;
return;
}
const nextIndex = (e.state?.__unsaved_guard_index as number) ?? 0;
const delta = nextIndex - stackIndex;
if (delta === 0) return;
// Undo the navigation the browser already did
history.go(-delta);
askOrProceed(() => {
// User confirmed — replay the navigation
stackIndex = nextIndex;
history.go(delta);
});
};
window.addEventListener("popstate", handlePopState);
return () => {
history.pushState = originalPushState;
history.replaceState = originalReplaceState;
window.removeEventListener("popstate", handlePopState);
};
}, [askOrProceed]);
// -- Dialog handlers -----------------------------------------------------
const handleConfirm = useCallback(() => {
setShowDialog(false);
const nav = pendingNavigate.current;
pendingNavigate.current = null;
nav?.();
}, []);
const handleCancel = useCallback(() => {
setShowDialog(false);
pendingNavigate.current = null;
}, []);
// -- Render --------------------------------------------------------------
return (
<UnsavedChangesContext.Provider
value={{ register, unregister, hasDirtyChanges, dirtySections, confirmNavigate: askOrProceed }}
>
{children}
<AlertDialog open={showDialog} onOpenChange={(open) => { if (!open) handleCancel(); }}>
<AlertDialogContent>
<AlertDialogHeader>
<AlertDialogTitle>Unsaved changes</AlertDialogTitle>
<AlertDialogDescription>
You have unsaved changes that will be lost. Are you sure you want to leave?
</AlertDialogDescription>
</AlertDialogHeader>
<AlertDialogFooter>
<AlertDialogCancel onClick={handleCancel}>Stay on page</AlertDialogCancel>
<AlertDialogAction onClick={handleConfirm}>Discard changes</AlertDialogAction>
</AlertDialogFooter>
</AlertDialogContent>
</AlertDialog>
</UnsavedChangesContext.Provider>
);
}
// ---------------------------------------------------------------------------
// Hooks
// ---------------------------------------------------------------------------
/**
* Register a section's dirty state with the nearest UnsavedChangesProvider.
* Automatically unregisters on unmount.
*
* @example
* useUnsavedChanges("general", isDirty);
*/
export function useUnsavedChanges(sectionId: string, isDirty: boolean) {
const ctx = useContext(UnsavedChangesContext);
if (!ctx) throw new Error("useUnsavedChanges must be used within UnsavedChangesProvider");
const { register, unregister } = ctx;
useEffect(() => {
register(sectionId, isDirty);
}, [sectionId, isDirty, register]);
useEffect(() => {
return () => unregister(sectionId);
}, [sectionId, unregister]);
}
/**
* Access the unsaved-changes context directly (e.g. for dirtySections).
*/
export function useUnsavedChangesContext() {
const ctx = useContext(UnsavedChangesContext);
if (!ctx) throw new Error("useUnsavedChangesContext must be used within UnsavedChangesProvider");
return ctx;
}