feat: add hybrid text + recording functionality in agents (#191)

* feat: add recording feature in agents * chore: pin pipecat version * feat: show usage in UI * chore: update pipecat
2026-07-01 08:59:46 +02:00 · 2026-03-16 15:04:08 +05:30 · 2026-03-16 15:04:08 +05:30 · 494c60d774
commit 494c60d774
parent f075bcb623
43 changed files with 2865 additions and 397 deletions
--- a/api/db/campaign_client.py
+++ b/api/db/campaign_client.py
@ -526,7 +526,7 @@ class CampaignClient(BaseDBClient):
                        QueuedRunModel.state == "queued",
                        QueuedRunModel.scheduled_for.is_(None),
                    )
-                    .order_by(QueuedRunModel.created_at)
+                    .order_by(func.random())
                    .limit(remaining_slots)
                    .with_for_update(skip_locked=True)
                )
--- a/api/db/db_client.py
+++ b/api/db/db_client.py
@ -13,6 +13,7 @@ from api.db.tool_client import ToolClient
 from api.db.user_client import UserClient
 from api.db.webhook_credential_client import WebhookCredentialClient
 from api.db.workflow_client import WorkflowClient
+from api.db.workflow_recording_client import WorkflowRecordingClient
 from api.db.workflow_run_client import WorkflowRunClient
 from api.db.workflow_template_client import WorkflowTemplateClient

@ -35,6 +36,7 @@ class DBClient(
    WebhookCredentialClient,
    ToolClient,
    KnowledgeBaseClient,
+    WorkflowRecordingClient,
 ):
    """
    Unified database client that combines all specialized database operations.
--- a/api/db/models.py
+++ b/api/db/models.py
@ -996,6 +996,77 @@ class KnowledgeBaseDocumentModel(Base):
    )


+class WorkflowRecordingModel(Base):
+    """Model for storing audio recordings scoped to a workflow and TTS configuration.
+
+    Recordings are used in hybrid prompts where parts of the output are pre-recorded
+    audio rather than dynamically generated TTS.
+    """
+
+    __tablename__ = "workflow_recordings"
+
+    id = Column(Integer, primary_key=True, index=True)
+
+    # Short globally unique ID (e.g. "xbhfha3k") used in prompts
+    recording_id = Column(String(16), unique=True, nullable=False, index=True)
+
+    # Scoping
+    workflow_id = Column(
+        Integer, ForeignKey("workflows.id", ondelete="CASCADE"), nullable=False
+    )
+    organization_id = Column(
+        Integer, ForeignKey("organizations.id", ondelete="CASCADE"), nullable=False
+    )
+
+    # TTS configuration scope
+    tts_provider = Column(String, nullable=False)
+    tts_model = Column(String, nullable=False)
+    tts_voice_id = Column(String, nullable=False)
+
+    # Content
+    transcript = Column(Text, nullable=False)
+
+    # Storage
+    storage_key = Column(String, nullable=False)
+    storage_backend = Column(
+        Enum("s3", "minio", name="recording_storage_backend"),
+        nullable=False,
+        default="s3",
+        server_default=text("'s3'::recording_storage_backend"),
+    )
+
+    # Extra metadata (file_size_bytes, duration_seconds, original_filename, mime_type, etc.)
+    recording_metadata = Column(
+        JSON, nullable=False, default=dict, server_default=text("'{}'::json")
+    )
+
+    # Audit
+    created_by = Column(Integer, ForeignKey("users.id"), nullable=False)
+    created_at = Column(DateTime(timezone=True), default=lambda: datetime.now(UTC))
+
+    # Soft delete
+    is_active = Column(Boolean, default=True, nullable=False)
+
+    # Relationships
+    workflow = relationship("WorkflowModel")
+    organization = relationship("OrganizationModel")
+    created_by_user = relationship("UserModel")
+
+    # Indexes
+    __table_args__ = (
+        Index("ix_workflow_recordings_workflow_id", "workflow_id"),
+        Index("ix_workflow_recordings_org_id", "organization_id"),
+        Index("ix_workflow_recordings_recording_id", "recording_id"),
+        Index(
+            "ix_workflow_recordings_tts_scope",
+            "workflow_id",
+            "tts_provider",
+            "tts_model",
+            "tts_voice_id",
+        ),
+    )
+
+
 class KnowledgeBaseChunkModel(Base):
    """Model for storing document chunks with vector embeddings.

--- a/api/db/workflow_recording_client.py
+++ b/api/db/workflow_recording_client.py
@ -0,0 +1,218 @@
+"""Database client for managing workflow recordings."""
+
+import secrets
+import string
+from typing import List, Optional
+
+from loguru import logger
+from sqlalchemy import func, select
+
+from api.db.base_client import BaseDBClient
+from api.db.models import WorkflowRecordingModel
+
+
+def generate_short_id(length: int = 8) -> str:
+    """Generate a random lowercase alphanumeric short ID."""
+    alphabet = string.ascii_lowercase + string.digits
+    return "".join(secrets.choice(alphabet) for _ in range(length))
+
+
+class WorkflowRecordingClient(BaseDBClient):
+    """Client for managing workflow audio recordings."""
+
+    async def create_recording(
+        self,
+        recording_id: str,
+        workflow_id: int,
+        organization_id: int,
+        tts_provider: str,
+        tts_model: str,
+        tts_voice_id: str,
+        transcript: str,
+        storage_key: str,
+        storage_backend: str,
+        created_by: int,
+        metadata: Optional[dict] = None,
+    ) -> WorkflowRecordingModel:
+        """Create a new workflow recording record.
+
+        Args:
+            recording_id: Short unique recording identifier
+            workflow_id: ID of the workflow
+            organization_id: ID of the organization
+            tts_provider: TTS provider name
+            tts_model: TTS model name
+            tts_voice_id: TTS voice identifier
+            transcript: User-provided transcript
+            storage_key: S3/MinIO storage key
+            storage_backend: Storage backend (s3 or minio)
+            created_by: ID of the user
+            metadata: Optional extra metadata
+
+        Returns:
+            The created WorkflowRecordingModel
+        """
+        async with self.async_session() as session:
+            recording = WorkflowRecordingModel(
+                recording_id=recording_id,
+                workflow_id=workflow_id,
+                organization_id=organization_id,
+                tts_provider=tts_provider,
+                tts_model=tts_model,
+                tts_voice_id=tts_voice_id,
+                transcript=transcript,
+                storage_key=storage_key,
+                storage_backend=storage_backend,
+                created_by=created_by,
+                metadata=metadata or {},
+            )
+
+            session.add(recording)
+            await session.commit()
+            await session.refresh(recording)
+
+            logger.info(
+                f"Created recording {recording_id} for workflow {workflow_id}, "
+                f"org {organization_id}"
+            )
+            return recording
+
+    async def get_recordings_for_workflow(
+        self,
+        workflow_id: int,
+        organization_id: int,
+        tts_provider: Optional[str] = None,
+        tts_model: Optional[str] = None,
+        tts_voice_id: Optional[str] = None,
+    ) -> List[WorkflowRecordingModel]:
+        """Get recordings for a workflow, optionally filtered by TTS config.
+
+        Args:
+            workflow_id: ID of the workflow
+            organization_id: ID of the organization
+            tts_provider: Optional TTS provider filter
+            tts_model: Optional TTS model filter
+            tts_voice_id: Optional TTS voice ID filter
+
+        Returns:
+            List of WorkflowRecordingModel instances
+        """
+        async with self.async_session() as session:
+            query = select(WorkflowRecordingModel).where(
+                WorkflowRecordingModel.workflow_id == workflow_id,
+                WorkflowRecordingModel.organization_id == organization_id,
+                WorkflowRecordingModel.is_active == True,
+            )
+
+            if tts_provider:
+                query = query.where(WorkflowRecordingModel.tts_provider == tts_provider)
+            if tts_model:
+                query = query.where(WorkflowRecordingModel.tts_model == tts_model)
+            if tts_voice_id:
+                query = query.where(WorkflowRecordingModel.tts_voice_id == tts_voice_id)
+
+            query = query.order_by(WorkflowRecordingModel.created_at.desc())
+
+            result = await session.execute(query)
+            return list(result.scalars().all())
+
+    async def get_recording_by_recording_id(
+        self,
+        recording_id: str,
+        organization_id: int,
+    ) -> Optional[WorkflowRecordingModel]:
+        """Get a recording by its short ID.
+
+        Args:
+            recording_id: The short unique recording ID
+            organization_id: ID of the organization
+
+        Returns:
+            WorkflowRecordingModel if found, None otherwise
+        """
+        async with self.async_session() as session:
+            query = select(WorkflowRecordingModel).where(
+                WorkflowRecordingModel.recording_id == recording_id,
+                WorkflowRecordingModel.organization_id == organization_id,
+                WorkflowRecordingModel.is_active == True,
+            )
+
+            result = await session.execute(query)
+            return result.scalar_one_or_none()
+
+    async def has_active_recordings(
+        self,
+        workflow_id: int,
+        organization_id: int,
+    ) -> bool:
+        """Check if a workflow has any active recordings.
+
+        Args:
+            workflow_id: ID of the workflow
+            organization_id: ID of the organization
+
+        Returns:
+            True if at least one active recording exists, False otherwise
+        """
+        async with self.async_session() as session:
+            query = (
+                select(func.count())
+                .select_from(WorkflowRecordingModel)
+                .where(
+                    WorkflowRecordingModel.workflow_id == workflow_id,
+                    WorkflowRecordingModel.organization_id == organization_id,
+                    WorkflowRecordingModel.is_active == True,
+                )
+            )
+            result = await session.execute(query)
+            return result.scalar_one() > 0
+
+    async def check_recording_id_exists(self, recording_id: str) -> bool:
+        """Check if a recording ID already exists globally.
+
+        Args:
+            recording_id: The short recording ID to check
+
+        Returns:
+            True if exists, False otherwise
+        """
+        async with self.async_session() as session:
+            query = select(WorkflowRecordingModel.id).where(
+                WorkflowRecordingModel.recording_id == recording_id,
+            )
+            result = await session.execute(query)
+            return result.scalar_one_or_none() is not None
+
+    async def delete_recording(
+        self,
+        recording_id: str,
+        organization_id: int,
+    ) -> bool:
+        """Soft delete a recording.
+
+        Args:
+            recording_id: The short recording ID
+            organization_id: ID of the organization
+
+        Returns:
+            True if deleted, False if not found
+        """
+        async with self.async_session() as session:
+            query = select(WorkflowRecordingModel).where(
+                WorkflowRecordingModel.recording_id == recording_id,
+                WorkflowRecordingModel.organization_id == organization_id,
+            )
+
+            result = await session.execute(query)
+            recording = result.scalar_one_or_none()
+
+            if not recording:
+                return False
+
+            recording.is_active = False
+            await session.commit()
+
+            logger.info(
+                f"Deleted recording {recording_id} for organization {organization_id}"
+            )
+            return True