feat: add hybrid text + recording functionality in agents (#191)

* feat: add recording feature in agents

* chore: pin pipecat version

* feat: show usage in UI

* chore: update pipecat
This commit is contained in:
Abhishek 2026-03-16 15:04:08 +05:30 committed by GitHub
parent f075bcb623
commit 494c60d774
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
43 changed files with 2865 additions and 397 deletions

View file

@ -526,7 +526,7 @@ class CampaignClient(BaseDBClient):
QueuedRunModel.state == "queued",
QueuedRunModel.scheduled_for.is_(None),
)
.order_by(QueuedRunModel.created_at)
.order_by(func.random())
.limit(remaining_slots)
.with_for_update(skip_locked=True)
)

View file

@ -13,6 +13,7 @@ from api.db.tool_client import ToolClient
from api.db.user_client import UserClient
from api.db.webhook_credential_client import WebhookCredentialClient
from api.db.workflow_client import WorkflowClient
from api.db.workflow_recording_client import WorkflowRecordingClient
from api.db.workflow_run_client import WorkflowRunClient
from api.db.workflow_template_client import WorkflowTemplateClient
@ -35,6 +36,7 @@ class DBClient(
WebhookCredentialClient,
ToolClient,
KnowledgeBaseClient,
WorkflowRecordingClient,
):
"""
Unified database client that combines all specialized database operations.

View file

@ -996,6 +996,77 @@ class KnowledgeBaseDocumentModel(Base):
)
class WorkflowRecordingModel(Base):
"""Model for storing audio recordings scoped to a workflow and TTS configuration.
Recordings are used in hybrid prompts where parts of the output are pre-recorded
audio rather than dynamically generated TTS.
"""
__tablename__ = "workflow_recordings"
id = Column(Integer, primary_key=True, index=True)
# Short globally unique ID (e.g. "xbhfha3k") used in prompts
recording_id = Column(String(16), unique=True, nullable=False, index=True)
# Scoping
workflow_id = Column(
Integer, ForeignKey("workflows.id", ondelete="CASCADE"), nullable=False
)
organization_id = Column(
Integer, ForeignKey("organizations.id", ondelete="CASCADE"), nullable=False
)
# TTS configuration scope
tts_provider = Column(String, nullable=False)
tts_model = Column(String, nullable=False)
tts_voice_id = Column(String, nullable=False)
# Content
transcript = Column(Text, nullable=False)
# Storage
storage_key = Column(String, nullable=False)
storage_backend = Column(
Enum("s3", "minio", name="recording_storage_backend"),
nullable=False,
default="s3",
server_default=text("'s3'::recording_storage_backend"),
)
# Extra metadata (file_size_bytes, duration_seconds, original_filename, mime_type, etc.)
recording_metadata = Column(
JSON, nullable=False, default=dict, server_default=text("'{}'::json")
)
# Audit
created_by = Column(Integer, ForeignKey("users.id"), nullable=False)
created_at = Column(DateTime(timezone=True), default=lambda: datetime.now(UTC))
# Soft delete
is_active = Column(Boolean, default=True, nullable=False)
# Relationships
workflow = relationship("WorkflowModel")
organization = relationship("OrganizationModel")
created_by_user = relationship("UserModel")
# Indexes
__table_args__ = (
Index("ix_workflow_recordings_workflow_id", "workflow_id"),
Index("ix_workflow_recordings_org_id", "organization_id"),
Index("ix_workflow_recordings_recording_id", "recording_id"),
Index(
"ix_workflow_recordings_tts_scope",
"workflow_id",
"tts_provider",
"tts_model",
"tts_voice_id",
),
)
class KnowledgeBaseChunkModel(Base):
"""Model for storing document chunks with vector embeddings.

View file

@ -0,0 +1,218 @@
"""Database client for managing workflow recordings."""
import secrets
import string
from typing import List, Optional
from loguru import logger
from sqlalchemy import func, select
from api.db.base_client import BaseDBClient
from api.db.models import WorkflowRecordingModel
def generate_short_id(length: int = 8) -> str:
"""Generate a random lowercase alphanumeric short ID."""
alphabet = string.ascii_lowercase + string.digits
return "".join(secrets.choice(alphabet) for _ in range(length))
class WorkflowRecordingClient(BaseDBClient):
"""Client for managing workflow audio recordings."""
async def create_recording(
self,
recording_id: str,
workflow_id: int,
organization_id: int,
tts_provider: str,
tts_model: str,
tts_voice_id: str,
transcript: str,
storage_key: str,
storage_backend: str,
created_by: int,
metadata: Optional[dict] = None,
) -> WorkflowRecordingModel:
"""Create a new workflow recording record.
Args:
recording_id: Short unique recording identifier
workflow_id: ID of the workflow
organization_id: ID of the organization
tts_provider: TTS provider name
tts_model: TTS model name
tts_voice_id: TTS voice identifier
transcript: User-provided transcript
storage_key: S3/MinIO storage key
storage_backend: Storage backend (s3 or minio)
created_by: ID of the user
metadata: Optional extra metadata
Returns:
The created WorkflowRecordingModel
"""
async with self.async_session() as session:
recording = WorkflowRecordingModel(
recording_id=recording_id,
workflow_id=workflow_id,
organization_id=organization_id,
tts_provider=tts_provider,
tts_model=tts_model,
tts_voice_id=tts_voice_id,
transcript=transcript,
storage_key=storage_key,
storage_backend=storage_backend,
created_by=created_by,
metadata=metadata or {},
)
session.add(recording)
await session.commit()
await session.refresh(recording)
logger.info(
f"Created recording {recording_id} for workflow {workflow_id}, "
f"org {organization_id}"
)
return recording
async def get_recordings_for_workflow(
self,
workflow_id: int,
organization_id: int,
tts_provider: Optional[str] = None,
tts_model: Optional[str] = None,
tts_voice_id: Optional[str] = None,
) -> List[WorkflowRecordingModel]:
"""Get recordings for a workflow, optionally filtered by TTS config.
Args:
workflow_id: ID of the workflow
organization_id: ID of the organization
tts_provider: Optional TTS provider filter
tts_model: Optional TTS model filter
tts_voice_id: Optional TTS voice ID filter
Returns:
List of WorkflowRecordingModel instances
"""
async with self.async_session() as session:
query = select(WorkflowRecordingModel).where(
WorkflowRecordingModel.workflow_id == workflow_id,
WorkflowRecordingModel.organization_id == organization_id,
WorkflowRecordingModel.is_active == True,
)
if tts_provider:
query = query.where(WorkflowRecordingModel.tts_provider == tts_provider)
if tts_model:
query = query.where(WorkflowRecordingModel.tts_model == tts_model)
if tts_voice_id:
query = query.where(WorkflowRecordingModel.tts_voice_id == tts_voice_id)
query = query.order_by(WorkflowRecordingModel.created_at.desc())
result = await session.execute(query)
return list(result.scalars().all())
async def get_recording_by_recording_id(
self,
recording_id: str,
organization_id: int,
) -> Optional[WorkflowRecordingModel]:
"""Get a recording by its short ID.
Args:
recording_id: The short unique recording ID
organization_id: ID of the organization
Returns:
WorkflowRecordingModel if found, None otherwise
"""
async with self.async_session() as session:
query = select(WorkflowRecordingModel).where(
WorkflowRecordingModel.recording_id == recording_id,
WorkflowRecordingModel.organization_id == organization_id,
WorkflowRecordingModel.is_active == True,
)
result = await session.execute(query)
return result.scalar_one_or_none()
async def has_active_recordings(
self,
workflow_id: int,
organization_id: int,
) -> bool:
"""Check if a workflow has any active recordings.
Args:
workflow_id: ID of the workflow
organization_id: ID of the organization
Returns:
True if at least one active recording exists, False otherwise
"""
async with self.async_session() as session:
query = (
select(func.count())
.select_from(WorkflowRecordingModel)
.where(
WorkflowRecordingModel.workflow_id == workflow_id,
WorkflowRecordingModel.organization_id == organization_id,
WorkflowRecordingModel.is_active == True,
)
)
result = await session.execute(query)
return result.scalar_one() > 0
async def check_recording_id_exists(self, recording_id: str) -> bool:
"""Check if a recording ID already exists globally.
Args:
recording_id: The short recording ID to check
Returns:
True if exists, False otherwise
"""
async with self.async_session() as session:
query = select(WorkflowRecordingModel.id).where(
WorkflowRecordingModel.recording_id == recording_id,
)
result = await session.execute(query)
return result.scalar_one_or_none() is not None
async def delete_recording(
self,
recording_id: str,
organization_id: int,
) -> bool:
"""Soft delete a recording.
Args:
recording_id: The short recording ID
organization_id: ID of the organization
Returns:
True if deleted, False if not found
"""
async with self.async_session() as session:
query = select(WorkflowRecordingModel).where(
WorkflowRecordingModel.recording_id == recording_id,
WorkflowRecordingModel.organization_id == organization_id,
)
result = await session.execute(query)
recording = result.scalar_one_or_none()
if not recording:
return False
recording.is_active = False
await session.commit()
logger.info(
f"Deleted recording {recording_id} for organization {organization_id}"
)
return True