feat: add recording audio option in tool and node transitions (#232)

* feat: allow uploading recording as part of node transition * feat: allow recordings in tool transitions * chore: fix tests
2026-06-28 08:49:42 +02:00 · 2026-04-10 17:53:42 +05:30 · 2026-04-10 17:53:42 +05:30 · 7c245051d2
commit 7c245051d2
parent 3f19a16e7f
54 changed files with 3575 additions and 640 deletions
--- a/api/routes/telephony.py
+++ b/api/routes/telephony.py
@ -178,6 +178,11 @@ async def initiate_call(
    workflow_run_id = request.workflow_run_id

    if not workflow_run_id:
+        # Fetch workflow to merge template context variables (e.g. caller_number,
+        # called_number set in workflow settings for testing pre-call data fetch)
+        workflow = await db_client.get_workflow_by_id(request.workflow_id)
+        template_vars = (workflow.template_context_variables or {}) if workflow else {}
+
        numeric_suffix = int(str(uuid.uuid4()).replace("-", "")[:8], 16) % 100000000
        workflow_run_name = f"WR-TEL-OUT-{numeric_suffix:08d}"
        workflow_run = await db_client.create_workflow_run(
@ -187,6 +192,7 @@ async def initiate_call(
            user_id=user.id,
            call_type=CallType.OUTBOUND,
            initial_context={
+                **template_vars,
                "phone_number": phone_number,
                "called_number": phone_number,
                "provider": provider.PROVIDER_NAME,
--- a/api/routes/tool.py
+++ b/api/routes/tool.py
@ -45,17 +45,29 @@ class HttpApiConfig(BaseModel):
    timeout_ms: Optional[int] = Field(
        default=5000, description="Request timeout in milliseconds"
    )
+    customMessage: Optional[str] = Field(
+        default=None, description="Custom message to play after tool execution"
+    )
+    customMessageType: Optional[Literal["text", "audio"]] = Field(
+        default=None, description="Type of custom message: text or audio"
+    )
+    customMessageRecordingId: Optional[str] = Field(
+        default=None, description="Recording ID for audio custom message"
+    )


 class EndCallConfig(BaseModel):
    """Configuration for End Call tools."""

-    messageType: Literal["none", "custom"] = Field(
+    messageType: Literal["none", "custom", "audio"] = Field(
        default="none", description="Type of goodbye message"
    )
    customMessage: Optional[str] = Field(
        default=None, description="Custom message to play before ending the call"
    )
+    audioRecordingId: Optional[str] = Field(
+        default=None, description="Recording ID for audio goodbye message"
+    )
    endCallReason: bool = Field(
        default=False,
        description="When enabled, LLM must provide a reason for ending the call. "
@ -74,12 +86,15 @@ class TransferCallConfig(BaseModel):
    destination: str = Field(
        description="Phone number or SIP endpoint to transfer the call to (E.164 format e.g., +1234567890, or SIP endpoint e.g., PJSIP/1234)"
    )
-    messageType: Literal["none", "custom"] = Field(
+    messageType: Literal["none", "custom", "audio"] = Field(
        default="none", description="Type of message to play before transfer"
    )
    customMessage: Optional[str] = Field(
        default=None, description="Custom message to play before transferring the call"
    )
+    audioRecordingId: Optional[str] = Field(
+        default=None, description="Recording ID for audio message before transfer"
+    )
    timeout: int = Field(
        default=30,
        ge=5,
--- a/api/routes/workflow_recording.py
+++ b/api/routes/workflow_recording.py
@ -16,6 +16,7 @@ from api.schemas.workflow_recording import (
    BatchRecordingUploadResponseSchema,
    RecordingListResponseSchema,
    RecordingResponseSchema,
+    RecordingUpdateRequestSchema,
    RecordingUploadResponseSchema,
 )
 from api.services.auth.depends import get_user
@ -25,11 +26,11 @@ from api.services.storage import storage_fs
 router = APIRouter(prefix="/workflow-recordings", tags=["workflow-recordings"])


-async def _generate_unique_recording_id() -> str:
-    """Generate a globally unique short recording ID."""
+async def _generate_unique_recording_id(organization_id: int) -> str:
+    """Generate a unique short recording ID within an organization."""
    for _ in range(10):
        rid = generate_short_id(8)
-        exists = await db_client.check_recording_id_exists(rid)
+        exists = await db_client.check_recording_id_exists(rid, organization_id)
        if not exists:
            return rid
    raise HTTPException(
@ -69,11 +70,13 @@ async def get_upload_urls(
    try:
        items = []
        for fd in request.files:
-            recording_id = await _generate_unique_recording_id()
+            recording_id = await _generate_unique_recording_id(
+                user.selected_organization_id
+            )

            storage_key = (
                f"recordings/{user.selected_organization_id}"
-                f"/{request.workflow_id}/{recording_id}"
+                f"/{recording_id}"
                f"/{fd.filename}"
            )

@ -100,7 +103,7 @@ async def get_upload_urls(

        logger.info(
            f"Generated {len(items)} recording upload URL(s), "
-            f"workflow {request.workflow_id}, org {user.selected_organization_id}"
+            f"org {user.selected_organization_id}"
        )

        return BatchRecordingUploadResponseSchema(items=items)
@ -131,22 +134,20 @@ async def create_recordings(
        for rec_req in request.recordings:
            recording = await db_client.create_recording(
                recording_id=rec_req.recording_id,
-                workflow_id=rec_req.workflow_id,
                organization_id=user.selected_organization_id,
-                tts_provider=rec_req.tts_provider,
-                tts_model=rec_req.tts_model,
-                tts_voice_id=rec_req.tts_voice_id,
                transcript=rec_req.transcript,
                storage_key=rec_req.storage_key,
                storage_backend=backend.value,
                created_by=user.id,
+                tts_provider=rec_req.tts_provider,
+                tts_model=rec_req.tts_model,
+                tts_voice_id=rec_req.tts_voice_id,
                metadata=rec_req.metadata,
            )
            results.append(_build_response(recording))

        logger.info(
-            f"Created {len(results)} recording(s) for "
-            f"workflow {request.recordings[0].workflow_id}"
+            f"Created {len(results)} recording(s) for org {user.selected_organization_id}"
        )

        return BatchRecordingCreateResponseSchema(recordings=results)
@ -163,10 +164,12 @@ async def create_recordings(
@router.get(
    "/",
    response_model=RecordingListResponseSchema,
-    summary="List recordings for a workflow",
+    summary="List recordings",
 )
 async def list_recordings(
-    workflow_id: Annotated[int, Query(description="Workflow ID")],
+    workflow_id: Annotated[
+        Optional[int], Query(description="Filter by workflow ID")
+    ] = None,
    tts_provider: Annotated[
        Optional[str], Query(description="Filter by TTS provider")
    ] = None,
@ -178,11 +181,11 @@ async def list_recordings(
    ] = None,
    user=Depends(get_user),
 ):
-    """List recordings for a workflow, optionally filtered by TTS configuration."""
+    """List recordings for the organization, optionally filtered."""
    try:
-        recordings = await db_client.get_recordings_for_workflow(
-            workflow_id=workflow_id,
+        recordings = await db_client.get_recordings(
            organization_id=user.selected_organization_id,
+            workflow_id=workflow_id,
            tts_provider=tts_provider,
            tts_model=tts_model,
            tts_voice_id=tts_voice_id,
@ -233,6 +236,74 @@ async def delete_recording(
        ) from exc


+@router.patch(
+    "/{id}",
+    response_model=RecordingResponseSchema,
+    summary="Update a recording's Recording ID",
+)
+async def update_recording(
+    id: int,
+    request: RecordingUpdateRequestSchema,
+    user=Depends(get_user),
+):
+    """Update the recording_id (descriptive name) of a recording."""
+    try:
+        new_id = request.recording_id.strip()
+        if not new_id:
+            raise HTTPException(status_code=400, detail="Recording ID cannot be empty")
+
+        existing = await db_client.get_recording_by_id(
+            id, user.selected_organization_id
+        )
+        if not existing:
+            raise HTTPException(status_code=404, detail="Recording not found")
+
+        if new_id == existing.recording_id:
+            return _build_response(existing)
+
+        exists = await db_client.check_recording_id_exists(
+            new_id, user.selected_organization_id
+        )
+        if exists:
+            raise HTTPException(
+                status_code=409,
+                detail=f"Recording ID '{new_id}' is already in use",
+            )
+
+        old_id = existing.recording_id
+
+        recording = await db_client.update_recording_id(
+            id=id,
+            new_recording_id=new_id,
+            organization_id=user.selected_organization_id,
+        )
+
+        if not recording:
+            raise HTTPException(status_code=404, detail="Recording not found")
+
+        # Replace old recording ID in all non-legacy workflow definitions
+        updated = await db_client.replace_recording_id_in_workflows(
+            old_id=old_id,
+            new_id=new_id,
+            organization_id=user.selected_organization_id,
+        )
+        if updated:
+            logger.info(
+                f"Updated {updated} workflow definition(s) with new recording ID "
+                f"'{old_id}' -> '{new_id}'"
+            )
+
+        return _build_response(recording)
+
+    except HTTPException:
+        raise
+    except Exception as exc:
+        logger.error(f"Error updating recording: {exc}")
+        raise HTTPException(
+            status_code=500, detail="Failed to update recording"
+        ) from exc
+
+
@router.post(
    "/transcribe",
    summary="Transcribe an audio file",