feat: add full document mode in knowledge base

This commit is contained in:
Abhishek Kumar 2026-04-09 13:49:20 +05:30
parent c085398933
commit 87c8c5e2c8
26 changed files with 1144 additions and 351 deletions

View file

@ -0,0 +1,42 @@
"""add retrieval mode in document
Revision ID: e7254d2c6c18
Revises: d688d0da1123
Create Date: 2026-04-09 13:00:13.020713
"""
from typing import Sequence, Union
import sqlalchemy as sa
from alembic import op
# revision identifiers, used by Alembic.
revision: str = "e7254d2c6c18"
down_revision: Union[str, None] = "d688d0da1123"
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def upgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
op.add_column(
"knowledge_base_documents",
sa.Column(
"retrieval_mode",
sa.String(length=20),
server_default="chunked",
nullable=False,
),
)
op.add_column(
"knowledge_base_documents", sa.Column("full_text", sa.Text(), nullable=True)
)
# ### end Alembic commands ###
def downgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
op.drop_column("knowledge_base_documents", "full_text")
op.drop_column("knowledge_base_documents", "retrieval_mode")
# ### end Alembic commands ###

View file

@ -27,6 +27,7 @@ class KnowledgeBaseClient(BaseDBClient):
custom_metadata: Optional[dict] = None,
docling_metadata: Optional[dict] = None,
document_uuid: Optional[str] = None,
retrieval_mode: str = "chunked",
) -> KnowledgeBaseDocumentModel:
"""Create a new knowledge base document record.
@ -58,6 +59,7 @@ class KnowledgeBaseClient(BaseDBClient):
docling_metadata=docling_metadata or {},
processing_status="pending",
total_chunks=0,
retrieval_mode=retrieval_mode,
)
# Use provided UUID or let the model generate one
@ -425,6 +427,55 @@ class KnowledgeBaseClient(BaseDBClient):
# Convert asyncpg records to dictionaries
return [dict(row) for row in rows]
async def update_document_full_text(
self,
document_id: int,
full_text: str,
) -> None:
"""Store full document text for full_document retrieval mode.
Args:
document_id: ID of the document
full_text: The full extracted text content
"""
async with self.async_session() as session:
query = select(KnowledgeBaseDocumentModel).where(
KnowledgeBaseDocumentModel.id == document_id
)
result = await session.execute(query)
document = result.scalar_one_or_none()
if document:
document.full_text = full_text
await session.commit()
logger.info(
f"Stored full text for document {document_id} ({len(full_text)} chars)"
)
async def get_full_text_documents(
self,
organization_id: int,
document_uuids: List[str],
) -> List[KnowledgeBaseDocumentModel]:
"""Get full_document mode documents by their UUIDs.
Args:
organization_id: Organization ID for scoping
document_uuids: List of document UUIDs to fetch
Returns:
List of documents with retrieval_mode='full_document' and full_text set
"""
async with self.async_session() as session:
query = select(KnowledgeBaseDocumentModel).where(
KnowledgeBaseDocumentModel.organization_id == organization_id,
KnowledgeBaseDocumentModel.document_uuid.in_(document_uuids),
KnowledgeBaseDocumentModel.retrieval_mode == "full_document",
KnowledgeBaseDocumentModel.is_active == True,
KnowledgeBaseDocumentModel.processing_status == "completed",
)
result = await session.execute(query)
return list(result.scalars().all())
async def delete_document(
self,
document_uuid: str,

View file

@ -940,6 +940,14 @@ class KnowledgeBaseDocumentModel(Base):
file_hash = Column(String(64), nullable=True) # SHA-256 hash for deduplication
mime_type = Column(String(100), nullable=True)
# Retrieval mode: "chunked" (vector search) or "full_document" (return full text)
retrieval_mode = Column(
String(20), nullable=False, default="chunked", server_default="chunked"
)
full_text = Column(
Text, nullable=True
) # Stored when retrieval_mode is "full_document"
# Processing metadata
source_url = Column(String, nullable=True) # If document was fetched from URL
total_chunks = Column(Integer, nullable=False, default=0)

View file

@ -124,6 +124,7 @@ async def process_document(
mime_type="application/octet-stream", # Will be detected by background task
custom_metadata={"s3_key": request.s3_key},
document_uuid=request.document_uuid, # Use UUID from upload
retrieval_mode=request.retrieval_mode,
)
# Enqueue background task for processing
@ -133,6 +134,7 @@ async def process_document(
request.s3_key,
user.selected_organization_id,
128, # max_tokens (default)
request.retrieval_mode,
)
logger.info(
@ -150,6 +152,7 @@ async def process_document(
processing_status="pending",
processing_error=None,
total_chunks=0,
retrieval_mode=request.retrieval_mode,
custom_metadata={"s3_key": request.s3_key},
docling_metadata={},
source_url=None,
@ -209,6 +212,7 @@ async def list_documents(
processing_status=doc.processing_status,
processing_error=doc.processing_error,
total_chunks=doc.total_chunks,
retrieval_mode=doc.retrieval_mode,
custom_metadata=doc.custom_metadata,
docling_metadata=doc.docling_metadata,
source_url=doc.source_url,
@ -267,6 +271,7 @@ async def get_document(
processing_status=document.processing_status,
processing_error=document.processing_error,
total_chunks=document.total_chunks,
retrieval_mode=document.retrieval_mode,
custom_metadata=document.custom_metadata,
docling_metadata=document.docling_metadata,
source_url=document.source_url,

View file

@ -1,4 +1,5 @@
import json
import re
import uuid
from datetime import datetime
from typing import List, Literal, Optional
@ -6,13 +7,13 @@ from typing import List, Literal, Optional
from fastapi import APIRouter, Depends, HTTPException, Query
from httpx import HTTPStatusError
from loguru import logger
from pydantic import BaseModel, ValidationError
from pydantic import BaseModel, Field, ValidationError
from api.constants import DEPLOYMENT_MODE
from api.db import db_client
from api.db.models import UserModel
from api.db.workflow_template_client import WorkflowTemplateClient
from api.enums import CallType
from api.enums import CallType, StorageBackend
from api.schemas.workflow import WorkflowRunResponseSchema
from api.services.auth.depends import get_user
from api.services.configuration.check_validity import UserConfigurationValidator
@ -22,6 +23,7 @@ from api.services.configuration.masking import (
)
from api.services.configuration.resolve import resolve_effective_config
from api.services.mps_service_key_client import mps_service_key_client
from api.services.storage import storage_fs
from api.services.workflow.dto import ReactFlowDTO
from api.services.workflow.duplicate import duplicate_workflow
from api.services.workflow.errors import ItemKind, WorkflowError
@ -1030,3 +1032,60 @@ async def duplicate_workflow_template(
"call_disposition_codes": workflow.call_disposition_codes,
"workflow_configurations": workflow.workflow_configurations,
}
# ---------------------------------------------------------------------------
# Ambient Noise Upload
# ---------------------------------------------------------------------------
class AmbientNoiseUploadRequest(BaseModel):
workflow_id: int
filename: str
mime_type: str = "audio/wav"
file_size: int = Field(..., gt=0, le=10_485_760, description="Max 10MB")
class AmbientNoiseUploadResponse(BaseModel):
upload_url: str
storage_key: str
storage_backend: str
@router.post(
"/ambient-noise/upload-url",
response_model=AmbientNoiseUploadResponse,
summary="Get a presigned URL to upload a custom ambient noise audio file",
)
async def get_ambient_noise_upload_url(
request: AmbientNoiseUploadRequest,
user=Depends(get_user),
):
"""Generate a presigned PUT URL for uploading a custom ambient noise file."""
# Verify user owns this workflow
workflow = await db_client.get_workflow(
request.workflow_id, organization_id=user.selected_organization_id
)
if not workflow:
raise HTTPException(status_code=404, detail="Workflow not found")
sanitized = re.sub(r"[^a-zA-Z0-9._-]", "_", request.filename)
storage_key = (
f"ambient-noise/{user.selected_organization_id}"
f"/{request.workflow_id}/{uuid.uuid4()}_{sanitized}"
)
upload_url = await storage_fs.aget_presigned_put_url(
file_path=storage_key,
expiration=1800,
content_type=request.mime_type,
max_size=request.file_size,
)
if not upload_url:
raise HTTPException(status_code=500, detail="Failed to generate upload URL")
return AmbientNoiseUploadResponse(
upload_url=upload_url,
storage_key=storage_key,
storage_backend=StorageBackend.get_current_backend().value,
)

View file

@ -29,6 +29,10 @@ class ProcessDocumentRequestSchema(BaseModel):
document_uuid: str = Field(..., description="Document UUID to process")
s3_key: str = Field(..., description="S3 key of the uploaded file")
retrieval_mode: str = Field(
default="chunked",
description="Retrieval mode: 'chunked' for vector search or 'full_document' for full text retrieval",
)
class DocumentResponseSchema(BaseModel):
@ -43,6 +47,7 @@ class DocumentResponseSchema(BaseModel):
processing_status: str # pending, processing, completed, failed
processing_error: Optional[str] = None
total_chunks: int
retrieval_mode: str = "chunked"
custom_metadata: Dict[str, Any]
docling_metadata: Dict[str, Any]
source_url: Optional[str] = None

View file

@ -0,0 +1,220 @@
"""Shared utilities for downloading, converting, and caching audio files.
Provides helpers used by both the recording audio cache and the ambient
noise cache to avoid duplicating download / ffmpeg / disk-cache logic.
"""
import asyncio
import os
import shutil
import tempfile
from typing import Literal, Optional
from loguru import logger
from api.constants import APP_ROOT_DIR
# ---------------------------------------------------------------------------
# Filesystem cache directory (shared by all audio caches)
# ---------------------------------------------------------------------------
CACHE_DIR = os.path.join(os.path.dirname(APP_ROOT_DIR), "dograh_pcm_cache")
os.makedirs(CACHE_DIR, exist_ok=True)
# ---------------------------------------------------------------------------
# Download helper
# ---------------------------------------------------------------------------
async def download_storage_file(
storage_key: str,
storage_backend: str,
get_storage_fn,
) -> Optional[str]:
"""Download a file from object storage to a local temp file.
Returns the temp file path on success, or None on failure.
The caller is responsible for cleaning up the temp file.
"""
ext = ext_from_key(storage_key)
fd, tmp_path = tempfile.mkstemp(suffix=ext, prefix="dograh_dl_")
os.close(fd)
try:
storage = get_storage_fn(storage_backend)
success = await storage.adownload_file(storage_key, tmp_path)
if not success:
logger.error(f"Failed to download {storage_key}")
_safe_unlink(tmp_path)
return None
return tmp_path
except Exception:
logger.exception(f"Error downloading {storage_key}")
_safe_unlink(tmp_path)
return None
# ---------------------------------------------------------------------------
# Audio conversion via ffmpeg
# ---------------------------------------------------------------------------
async def convert_audio_file(
file_path: str,
target_sample_rate: int,
output_format: Literal["pcm", "wav"] = "pcm",
) -> Optional[bytes]:
"""Convert an audio file via ffmpeg.
Args:
file_path: Path to the source audio file.
target_sample_rate: Desired output sample rate.
output_format: ``"pcm"`` for raw s16le bytes, ``"wav"`` for a
complete WAV file (16-bit mono).
Returns:
Converted audio bytes, or None on failure.
"""
ffmpeg = shutil.which("ffmpeg")
if not ffmpeg:
logger.error("ffmpeg not found on PATH - cannot convert audio")
return None
if output_format == "pcm":
fmt_args = ["-f", "s16le", "-acodec", "pcm_s16le"]
else:
fmt_args = ["-f", "wav", "-acodec", "pcm_s16le"]
cmd = [
ffmpeg,
"-i",
file_path,
*fmt_args,
"-ac",
"1",
"-ar",
str(target_sample_rate),
"-loglevel",
"error",
"pipe:1",
]
try:
proc = await asyncio.create_subprocess_exec(
*cmd,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
)
stdout, stderr = await proc.communicate()
if proc.returncode != 0:
logger.error(f"ffmpeg failed (rc={proc.returncode}): {stderr.decode()}")
return None
if not stdout:
logger.error("ffmpeg produced no output")
return None
return stdout
except Exception:
logger.exception("ffmpeg subprocess error")
return None
# ---------------------------------------------------------------------------
# File I/O helpers
# ---------------------------------------------------------------------------
def read_cached_file(path: str) -> bytes:
with open(path, "rb") as f:
return f.read()
def write_cache_file(path: str, data: bytes) -> None:
"""Atomically write *data* to *path* (write-to-tmp then rename)."""
fd, tmp = tempfile.mkstemp(dir=CACHE_DIR, suffix=".tmp")
os.close(fd)
with open(tmp, "wb") as f:
f.write(data)
os.replace(tmp, path)
def ext_from_key(storage_key: str) -> str:
"""Extract file extension from a storage key, defaulting to .wav."""
_, ext = os.path.splitext(storage_key)
return ext if ext else ".wav"
def _safe_unlink(path: str) -> None:
try:
if os.path.exists(path):
os.unlink(path)
except OSError:
pass
# ---------------------------------------------------------------------------
# Ambient noise file cache
# ---------------------------------------------------------------------------
def _ambient_noise_cache_path(storage_key: str, sample_rate: int) -> str:
"""Return the on-disk path for a cached ambient noise WAV file."""
# Use a stable hash of the storage key so different uploads get different cache entries
import hashlib
key_hash = hashlib.sha256(storage_key.encode()).hexdigest()[:16]
return os.path.join(CACHE_DIR, f"ambient_{key_hash}_{sample_rate}.wav")
async def get_cached_ambient_noise_path(
storage_key: str,
storage_backend: str,
target_sample_rate: int,
) -> Optional[str]:
"""Return a local WAV file path for a custom ambient noise file.
Downloads from object storage and converts to mono WAV at
*target_sample_rate* on the first call; subsequent calls return the
cached path immediately.
Args:
storage_key: Object storage key for the uploaded audio file.
storage_backend: Storage backend identifier (e.g. ``"minio"``, ``"s3"``).
target_sample_rate: Target sample rate for the output WAV.
Returns:
Absolute path to the cached WAV file, or None on failure.
"""
from api.services.storage import get_storage_for_backend
cached = _ambient_noise_cache_path(storage_key, target_sample_rate)
if os.path.exists(cached):
logger.debug(f"Ambient noise served from cache: {cached}")
return cached
logger.info(f"Downloading custom ambient noise: {storage_key}")
def _get_storage(backend: str):
return get_storage_for_backend(backend)
tmp_path = await download_storage_file(storage_key, storage_backend, _get_storage)
if not tmp_path:
return None
try:
wav_data = await convert_audio_file(
tmp_path, target_sample_rate, output_format="wav"
)
if wav_data is None:
return None
write_cache_file(cached, wav_data)
logger.info(f"Cached custom ambient noise: {cached} ({len(wav_data)} bytes)")
return cached
except Exception:
logger.exception("Error caching ambient noise file")
return None
finally:
_safe_unlink(tmp_path)

View file

@ -6,29 +6,30 @@ leading/trailing silence, and caches the processed bytes on disk so
subsequent plays (even from other workers) are instantaneous.
"""
import asyncio
import os
import shutil
import tempfile
from typing import Awaitable, Callable, Optional
import numpy as np
from loguru import logger
from api.constants import APP_ROOT_DIR
from pipecat.audio.utils import SPEAKING_THRESHOLD
# ---------------------------------------------------------------------------
# Filesystem cache directory
# ---------------------------------------------------------------------------
from .audio_file_cache import (
CACHE_DIR,
convert_audio_file,
download_storage_file,
read_cached_file,
write_cache_file,
)
_CACHE_DIR = os.path.join(os.path.dirname(APP_ROOT_DIR), "dograh_pcm_cache")
os.makedirs(_CACHE_DIR, exist_ok=True)
# ---------------------------------------------------------------------------
# Cache path helper
# ---------------------------------------------------------------------------
def _cache_path(recording_id: str, sample_rate: int) -> str:
"""Return the on-disk path for a cached PCM file."""
return os.path.join(_CACHE_DIR, f"{recording_id}_{sample_rate}.pcm")
return os.path.join(CACHE_DIR, f"{recording_id}_{sample_rate}.pcm")
# ---------------------------------------------------------------------------
@ -72,7 +73,7 @@ def create_recording_audio_fetcher(
# 1. Serve from filesystem cache
if os.path.exists(cached):
logger.debug(f"Recording {recording_id} served from disk cache")
return _read_file(cached)
return read_cached_file(cached)
# 2. DB lookup
recording = await db_client.get_recording_by_recording_id(
@ -172,109 +173,33 @@ async def _download_and_convert(
Returns the processed PCM bytes, or None on failure.
"""
ext = _ext_from_key(recording.storage_key)
fd, tmp_path = tempfile.mkstemp(
suffix=ext, prefix=f"dograh_dl_{recording.recording_id}_"
tmp_path = await download_storage_file(
recording.storage_key, recording.storage_backend, get_storage_fn
)
os.close(fd)
try:
storage = get_storage_fn(recording.storage_backend)
success = await storage.adownload_file(recording.storage_key, tmp_path)
if not success:
logger.error(f"Failed to download recording {recording.recording_id}")
return None
if not tmp_path:
return None
pcm_data = await _audio_file_to_pcm(tmp_path, sample_rate)
try:
pcm_data = await convert_audio_file(tmp_path, sample_rate, output_format="pcm")
if pcm_data is None:
return None
pcm_data = _trim_silence(pcm_data, sample_rate)
# Write to disk cache atomically (write to tmp then rename)
# Write to disk cache
cached = _cache_path(recording.recording_id, sample_rate)
fd, tmp_cache = tempfile.mkstemp(dir=_CACHE_DIR, suffix=".pcm.tmp")
os.close(fd)
_write_file(tmp_cache, pcm_data)
os.replace(tmp_cache, cached)
write_cache_file(cached, pcm_data)
return pcm_data
except Exception:
logger.exception(f"Error fetching recording {recording.recording_id}")
return None
finally:
if os.path.exists(tmp_path):
try:
try:
if os.path.exists(tmp_path):
os.unlink(tmp_path)
except OSError:
pass
# ---------------------------------------------------------------------------
# File I/O helpers (run via asyncio.to_thread)
# ---------------------------------------------------------------------------
def _read_file(path: str) -> bytes:
with open(path, "rb") as f:
return f.read()
def _write_file(path: str, data: bytes) -> None:
with open(path, "wb") as f:
f.write(data)
# ---------------------------------------------------------------------------
# Audio conversion
# ---------------------------------------------------------------------------
async def _audio_file_to_pcm(
file_path: str, target_sample_rate: int
) -> Optional[bytes]:
"""Convert an audio file to raw 16-bit mono PCM bytes via ffmpeg."""
ffmpeg = shutil.which("ffmpeg")
if not ffmpeg:
logger.error("ffmpeg not found on PATH — cannot decode recording")
return None
cmd = [
ffmpeg,
"-i",
file_path,
"-f",
"s16le", # raw 16-bit signed little-endian PCM
"-acodec",
"pcm_s16le",
"-ac",
"1", # mono
"-ar",
str(target_sample_rate),
"-loglevel",
"error",
"pipe:1", # output to stdout
]
try:
proc = await asyncio.create_subprocess_exec(
*cmd,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
)
stdout, stderr = await proc.communicate()
if proc.returncode != 0:
logger.error(f"ffmpeg failed (rc={proc.returncode}): {stderr.decode()}")
return None
if not stdout:
logger.error("ffmpeg produced no output")
return None
return stdout
except Exception:
logger.exception("ffmpeg subprocess error")
return None
except OSError:
pass
# ---------------------------------------------------------------------------
@ -327,14 +252,3 @@ def _trim_silence(pcm_data: bytes, sample_rate: int) -> bytes:
)
return trimmed.tobytes()
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
def _ext_from_key(storage_key: str) -> str:
"""Extract file extension from a storage key, defaulting to .wav."""
_, ext = os.path.splitext(storage_key)
return ext if ext else ".wav"

View file

@ -510,7 +510,7 @@ async def run_pipeline_smallwebrtc(
# Create audio configuration for WebRTC
audio_config = create_audio_config(WorkflowRunMode.SMALLWEBRTC.value)
transport = create_webrtc_transport(
transport = await create_webrtc_transport(
webrtc_connection,
workflow_run_id,
audio_config,

View file

@ -1,11 +1,13 @@
import os
from fastapi import WebSocket
from loguru import logger
from api.constants import APP_ROOT_DIR
from api.db import db_client
from api.enums import OrganizationConfigurationKey
from api.services.pipecat.audio_config import AudioConfig
from api.services.pipecat.audio_file_cache import get_cached_ambient_noise_path
from api.services.telephony.providers.ari_call_strategies import (
ARIBridgeSwapStrategy,
ARIHangupStrategy,
@ -37,6 +39,49 @@ librnnoise_path = os.path.normpath(
)
async def _build_audio_out_mixer(
audio_out_sample_rate: int,
ambient_noise_config: dict | None,
):
"""Build the audio output mixer based on the ambient noise configuration.
Returns a ``SoundfileMixer`` when ambient noise is enabled, or a
``SilenceAudioMixer`` otherwise. Supports custom user-uploaded audio
files via the ``storage_key`` / ``storage_backend`` fields in the config.
"""
if not ambient_noise_config or not ambient_noise_config.get("enabled", False):
return SilenceAudioMixer()
volume = ambient_noise_config.get("volume", 0.3)
# Check for a custom uploaded ambient noise file
storage_key = ambient_noise_config.get("storage_key")
storage_backend = ambient_noise_config.get("storage_backend")
if storage_key and storage_backend:
cached_path = await get_cached_ambient_noise_path(
storage_key, storage_backend, audio_out_sample_rate
)
if cached_path:
return SoundfileMixer(
sound_files={"custom": cached_path},
default_sound="custom",
volume=volume,
)
logger.warning("Custom ambient noise file unavailable, falling back to default")
# Default built-in office ambience
return SoundfileMixer(
sound_files={
"office": APP_ROOT_DIR
/ "assets"
/ f"office-ambience-{audio_out_sample_rate}-mono.wav"
},
default_sound="office",
volume=volume,
)
async def create_twilio_transport(
websocket_client: WebSocket,
stream_sid: str,
@ -79,6 +124,10 @@ async def create_twilio_transport(
hangup_strategy=hangup_strategy,
)
mixer = await _build_audio_out_mixer(
audio_config.transport_out_sample_rate, ambient_noise_config
)
return FastAPIWebsocketTransport(
websocket=websocket_client,
params=FastAPIWebsocketParams(
@ -86,19 +135,7 @@ async def create_twilio_transport(
audio_out_enabled=True,
audio_in_sample_rate=audio_config.transport_in_sample_rate,
audio_out_sample_rate=audio_config.transport_out_sample_rate,
audio_out_mixer=(
SoundfileMixer(
sound_files={
"office": APP_ROOT_DIR
/ "assets"
/ f"office-ambience-{audio_config.transport_out_sample_rate}-mono.wav"
},
default_sound="office",
volume=ambient_noise_config.get("volume", 0.3),
)
if ambient_noise_config and ambient_noise_config.get("enabled", False)
else SilenceAudioMixer()
),
audio_out_mixer=mixer,
serializer=serializer,
),
)
@ -144,6 +181,10 @@ async def create_cloudonix_transport(
hangup_strategy=hangup_strategy,
)
mixer = await _build_audio_out_mixer(
audio_config.transport_out_sample_rate, ambient_noise_config
)
return FastAPIWebsocketTransport(
websocket=websocket_client,
params=FastAPIWebsocketParams(
@ -151,19 +192,7 @@ async def create_cloudonix_transport(
audio_out_enabled=True,
audio_in_sample_rate=audio_config.transport_in_sample_rate,
audio_out_sample_rate=audio_config.transport_out_sample_rate,
audio_out_mixer=(
SoundfileMixer(
sound_files={
"office": APP_ROOT_DIR
/ "assets"
/ f"office-ambience-{audio_config.transport_out_sample_rate}-mono.wav"
},
default_sound="office",
volume=ambient_noise_config.get("volume", 0.3),
)
if ambient_noise_config and ambient_noise_config.get("enabled", False)
else SilenceAudioMixer()
),
audio_out_mixer=mixer,
serializer=serializer,
audio_out_10ms_chunks=2,
),
@ -209,6 +238,10 @@ async def create_telnyx_transport(
inbound_encoding="PCMU",
)
mixer = await _build_audio_out_mixer(
audio_config.transport_out_sample_rate, ambient_noise_config
)
return FastAPIWebsocketTransport(
websocket=websocket_client,
params=FastAPIWebsocketParams(
@ -216,19 +249,7 @@ async def create_telnyx_transport(
audio_out_enabled=True,
audio_in_sample_rate=audio_config.transport_in_sample_rate,
audio_out_sample_rate=audio_config.transport_out_sample_rate,
audio_out_mixer=(
SoundfileMixer(
sound_files={
"office": APP_ROOT_DIR
/ "assets"
/ f"office-ambience-{audio_config.transport_out_sample_rate}-mono.wav"
},
default_sound="office",
volume=ambient_noise_config.get("volume", 0.3),
)
if ambient_noise_config and ambient_noise_config.get("enabled", False)
else SilenceAudioMixer()
),
audio_out_mixer=mixer,
serializer=serializer,
),
)
@ -278,6 +299,10 @@ async def create_ari_transport(
),
)
mixer = await _build_audio_out_mixer(
audio_config.transport_out_sample_rate, ambient_noise_config
)
return FastAPIWebsocketTransport(
websocket=websocket_client,
params=FastAPIWebsocketParams(
@ -285,19 +310,7 @@ async def create_ari_transport(
audio_out_enabled=True,
audio_in_sample_rate=audio_config.transport_in_sample_rate,
audio_out_sample_rate=audio_config.transport_out_sample_rate,
audio_out_mixer=(
SoundfileMixer(
sound_files={
"office": APP_ROOT_DIR
/ "assets"
/ f"office-ambience-{audio_config.transport_out_sample_rate}-mono.wav"
},
default_sound="office",
volume=ambient_noise_config.get("volume", 0.3),
)
if ambient_noise_config and ambient_noise_config.get("enabled", False)
else SilenceAudioMixer()
),
audio_out_mixer=mixer,
serializer=serializer,
),
)
@ -340,6 +353,10 @@ async def create_vonage_transport(
),
)
mixer = await _build_audio_out_mixer(
audio_config.transport_out_sample_rate, ambient_noise_config
)
# Important: Vonage uses binary WebSocket mode, not text
return FastAPIWebsocketTransport(
websocket=websocket_client,
@ -348,19 +365,7 @@ async def create_vonage_transport(
audio_out_enabled=True,
audio_in_sample_rate=audio_config.transport_in_sample_rate,
audio_out_sample_rate=audio_config.transport_out_sample_rate,
audio_out_mixer=(
SoundfileMixer(
sound_files={
"office": APP_ROOT_DIR
/ "assets"
/ f"office-ambience-{audio_config.transport_out_sample_rate}-mono.wav"
},
default_sound="office",
volume=ambient_noise_config.get("volume", 0.3),
)
if ambient_noise_config and ambient_noise_config.get("enabled", False)
else SilenceAudioMixer()
),
audio_out_mixer=mixer,
serializer=serializer,
),
)
@ -428,6 +433,10 @@ async def create_vobiz_transport(
f"transport_rate=8000Hz, pipeline_rate={audio_config.pipeline_sample_rate}Hz"
)
mixer = await _build_audio_out_mixer(
audio_config.transport_out_sample_rate, ambient_noise_config
)
# Create WebSocket transport (same structure as Twilio/Vonage)
transport = FastAPIWebsocketTransport(
websocket=websocket_client,
@ -436,19 +445,7 @@ async def create_vobiz_transport(
audio_out_enabled=True,
audio_in_sample_rate=audio_config.transport_in_sample_rate,
audio_out_sample_rate=audio_config.transport_out_sample_rate,
audio_out_mixer=(
SoundfileMixer(
sound_files={
"office": APP_ROOT_DIR
/ "assets"
/ f"office-ambience-{audio_config.transport_out_sample_rate}-mono.wav"
},
default_sound="office",
volume=ambient_noise_config.get("volume", 0.3),
)
if ambient_noise_config and ambient_noise_config.get("enabled", False)
else SilenceAudioMixer()
),
audio_out_mixer=mixer,
serializer=serializer,
),
)
@ -459,7 +456,7 @@ async def create_vobiz_transport(
return transport
def create_webrtc_transport(
async def create_webrtc_transport(
webrtc_connection: SmallWebRTCConnection,
workflow_run_id: int,
audio_config: AudioConfig,
@ -468,6 +465,10 @@ def create_webrtc_transport(
):
"""Create a transport for WebRTC connections"""
mixer = await _build_audio_out_mixer(
audio_config.transport_out_sample_rate, ambient_noise_config
)
return SmallWebRTCTransport(
webrtc_connection=webrtc_connection,
params=TransportParams(
@ -475,19 +476,7 @@ def create_webrtc_transport(
audio_out_enabled=True,
audio_in_sample_rate=audio_config.transport_in_sample_rate,
audio_out_sample_rate=audio_config.transport_out_sample_rate,
audio_out_mixer=(
SoundfileMixer(
sound_files={
"office": APP_ROOT_DIR
/ "assets"
/ f"office-ambience-{audio_config.transport_out_sample_rate}-mono.wav"
},
default_sound="office",
volume=ambient_noise_config.get("volume", 0.3),
)
if ambient_noise_config and ambient_noise_config.get("enabled", False)
else SilenceAudioMixer()
),
audio_out_mixer=mixer,
),
)

View file

@ -301,12 +301,6 @@ class PipecatEngine:
"Organization ID not available for knowledge base retrieval"
)
if not self._embeddings_api_key:
raise ValueError(
"Embeddings API key not configured. Please set your API key in "
"Model Configurations > Embedding."
)
result = await retrieve_from_knowledge_base(
query=query,
organization_id=organization_id,

View file

@ -204,37 +204,66 @@ async def _perform_retrieval(
"""Internal function to perform the actual retrieval operation.
Separated from tracing logic for cleaner code organization.
Uses OpenAI embeddings by default for high-quality retrieval.
Handles both chunked (vector search) and full_document (full text) modes.
"""
try:
# Create a new embedding service instance
# Uses OpenAI text-embedding-3-small by default, or user-provided config
embedding_service = OpenAIEmbeddingService(
db_client=db_client,
max_tokens=128, # This is only used for chunking, not for retrieval
api_key=embeddings_api_key,
model_id=embeddings_model or "text-embedding-3-small",
base_url=embeddings_base_url,
)
# Perform vector similarity search
results = await embedding_service.search_similar_chunks(
query=query,
organization_id=organization_id,
limit=limit,
document_uuids=document_uuids,
)
# Format results for LLM consumption
chunks = []
for result in results:
chunk_info = {
"text": result.get("contextualized_text") or result.get("chunk_text"),
"filename": result.get("filename"),
"similarity": round(result.get("similarity", 0), 4),
"chunk_index": result.get("chunk_index"),
}
chunks.append(chunk_info)
# Check for full_document mode documents and return their full text
if document_uuids:
full_text_docs = await db_client.get_full_text_documents(
organization_id=organization_id,
document_uuids=document_uuids,
)
for doc in full_text_docs:
if doc.full_text:
chunks.append(
{
"text": doc.full_text,
"filename": doc.filename,
"similarity": 1.0,
"chunk_index": 0,
}
)
# Filter out full_document UUIDs so vector search only hits chunked docs
full_doc_uuids = {doc.document_uuid for doc in full_text_docs}
chunked_uuids = [u for u in document_uuids if u not in full_doc_uuids]
else:
chunked_uuids = document_uuids
# Perform vector similarity search on chunked documents
if chunked_uuids is None or len(chunked_uuids) > 0:
if not embeddings_api_key:
raise ValueError(
"Embeddings API key not configured. Please set your API key in "
"Model Configurations > Embedding."
)
embedding_service = OpenAIEmbeddingService(
db_client=db_client,
max_tokens=128,
api_key=embeddings_api_key,
model_id=embeddings_model or "text-embedding-3-small",
base_url=embeddings_base_url,
)
results = await embedding_service.search_similar_chunks(
query=query,
organization_id=organization_id,
limit=limit,
document_uuids=chunked_uuids if chunked_uuids else None,
)
for result in results:
chunk_info = {
"text": result.get("contextualized_text")
or result.get("chunk_text"),
"filename": result.get("filename"),
"similarity": round(result.get("similarity", 0), 4),
"chunk_index": result.get("chunk_index"),
}
chunks.append(chunk_info)
logger.info(
f"Knowledge base retrieval: query='{query}', "

View file

@ -25,6 +25,7 @@ async def process_knowledge_base_document(
s3_key: str,
organization_id: int,
max_tokens: int = 128,
retrieval_mode: str = "chunked",
):
"""Process a knowledge base document: download, chunk, embed, and store.
@ -34,6 +35,7 @@ async def process_knowledge_base_document(
s3_key: S3 key where the file is stored
organization_id: Organization ID
max_tokens: Maximum number of tokens per chunk (default: 128)
retrieval_mode: "chunked" for vector search or "full_document" for full text
"""
logger.info(
f"Starting knowledge base document processing for document_id={document_id}, "
@ -128,6 +130,47 @@ async def process_knowledge_base_document(
mime_type=mime_type,
)
# Full document mode: extract text and store it, skip chunking/embedding
if retrieval_mode == "full_document":
logger.info(f"Document {document_id}: full_document mode, extracting text")
plain_text_extensions = {".txt", ".json"}
if file_extension.lower() in plain_text_extensions:
with open(temp_file_path, "r", encoding="utf-8") as f:
full_text = f.read()
if file_extension.lower() == ".json":
try:
parsed = json.loads(full_text)
full_text = json.dumps(parsed, indent=2, ensure_ascii=False)
except json.JSONDecodeError:
pass
docling_metadata = {"document_type": "PlainText"}
else:
converter = DocumentConverter()
conversion_result = converter.convert(temp_file_path)
doc = conversion_result.document
full_text = doc.export_to_text()
docling_metadata = {
"num_pages": len(doc.pages) if hasattr(doc, "pages") else None,
"document_type": type(doc).__name__,
}
# Store full text on the document record
await db_client.update_document_full_text(document_id, full_text)
await db_client.update_document_status(
document_id,
"completed",
total_chunks=0,
docling_metadata=docling_metadata,
)
logger.info(
f"Successfully processed full_document {document_id}. "
f"Text length: {len(full_text)} chars"
)
return
# Initialize the OpenAI embedding service
logger.info(
f"Initializing OpenAI embedding service with max_tokens={max_tokens}"

View file

@ -6,7 +6,7 @@ description: "Upload documents that your voice agent can reference during live c
The Knowledge Base lets you upload documents that your voice agents can reference during conversations. Instead of encoding all information into prompts, you can provide source documents and let the agent retrieve relevant content on the fly.
<Warning>
You must configure an embedding provider and API key in **AI Models Configuration → Embedding** before using the Knowledge Base. Document processing and retrieval depend on embeddings, so this feature will not work without a valid embedding configuration.
An embedding API key (configured in **AI Models Configuration → Embedding**) is required for **Chunked Search** mode. **Full Document** mode does not require embeddings.
</Warning>
![Embedding Configuration](../images/embedding-configurations.png)
@ -14,9 +14,10 @@ You must configure an embedding provider and API key in **AI Models Configuratio
## How It Works
1. You **upload** a document (PDF, DOCX, TXT, or JSON) to the Knowledge Base
2. Dograh **processes** and chunks the document for efficient retrieval
3. You **attach** the document to one or more workflow nodes
4. During a call, the agent **searches** the document for relevant information based on the caller's questions and uses it to generate accurate responses
2. You **choose a retrieval mode** — Full Document or Chunked Search
3. Dograh **processes** the document based on the selected mode
4. You **attach** the document to one or more workflow nodes
5. During a call, the agent **retrieves** information from the document and uses it to generate accurate responses
## Supported File Types
@ -33,7 +34,27 @@ Maximum file size: **5 MB**
1. Go to **Knowledge Base Files** in the dashboard
2. Click **Upload New** or drag and drop a file
3. Wait for processing to complete — the document will be chunked and indexed automatically
3. Choose a **retrieval mode** (see below)
4. Click **Upload & Process** — the document will be processed according to the selected mode
### Retrieval Modes
When uploading a document, you choose how the agent retrieves information from it:
<CardGroup cols={2}>
<Card title="Full Document" icon="file-lines">
The entire document text is provided to the agent whenever it queries the knowledge base. No chunking or embedding is performed.
**Best for:** menus, price lists, FAQs, short reference sheets, and any document where the agent needs access to all the information at once.
<Tip>Full Document mode does not require an embedding API key.</Tip>
</Card>
<Card title="Chunked Search" icon="magnifying-glass">
The document is split into smaller chunks and indexed with vector embeddings. When the agent queries the knowledge base, only the most relevant chunks are returned.
**Best for:** large documents like policies, manuals, or contracts where only a portion is relevant to any given question.
</Card>
</CardGroup>
## Attaching Documents to Nodes
@ -47,6 +68,8 @@ The agent will only search documents attached to the current node, so attach onl
## Best Practices
- **Use Full Document for small reference files** — menus, price lists, and FAQs work best when the agent sees the entire document
- **Use Chunked Search for large documents** — policies, manuals, and contracts are too large to fit in context, so chunked retrieval finds the relevant sections
- **Keep documents focused** — a single topic per document produces better retrieval results than a large multi-topic file
- **Use clear, structured content** — headings, lists, and short paragraphs help the chunking process
- **Attach selectively** — only attach documents relevant to a specific node rather than attaching everything everywhere

View file

@ -201,10 +201,15 @@ export default function DocumentList({ refreshTrigger }: DocumentListProps) {
<div className="flex items-center gap-2 mb-1">
<span className="font-medium truncate">{doc.filename}</span>
{getStatusBadge(doc.processing_status)}
{doc.retrieval_mode === 'full_document' ? (
<Badge variant="outline" className="text-xs">Full Document</Badge>
) : (
<Badge variant="outline" className="text-xs">Chunked</Badge>
)}
</div>
<div className="flex items-center gap-4 text-sm text-muted-foreground">
<span>{formatFileSize(doc.file_size_bytes)}</span>
{doc.processing_status === 'completed' && (
{doc.processing_status === 'completed' && doc.retrieval_mode !== 'full_document' && (
<span>{doc.total_chunks} chunks</span>
)}
<span>{formatDate(doc.created_at)}</span>

View file

@ -1,6 +1,6 @@
'use client';
import { Upload } from 'lucide-react';
import { FileText, Upload, X } from 'lucide-react';
import { useRef, useState } from 'react';
import { toast } from 'sonner';
@ -10,7 +10,9 @@ import {
} from '@/client/sdk.gen';
import type { DocumentUploadResponseSchema } from '@/client/types.gen';
import { Button } from '@/components/ui/button';
import { Label } from '@/components/ui/label';
import { Progress } from '@/components/ui/progress';
import { RadioGroup, RadioGroupItem } from '@/components/ui/radio-group';
import logger from '@/lib/logger';
interface DocumentUploadProps {
@ -21,20 +23,20 @@ const MAX_FILE_SIZE = 5 * 1024 * 1024; // 5MB
const ACCEPTED_FILE_TYPES = ['.pdf', '.docx', '.doc', '.txt', '.json'];
export default function DocumentUpload({ onUploadSuccess }: DocumentUploadProps) {
const [selectedFile, setSelectedFile] = useState<File | null>(null);
const [retrievalMode, setRetrievalMode] = useState<string>('full_document');
const [uploading, setUploading] = useState(false);
const [uploadProgress, setUploadProgress] = useState(0);
const [dragActive, setDragActive] = useState(false);
const fileInputRef = useRef<HTMLInputElement>(null);
const validateFile = (file: File): boolean => {
// Validate file type
const fileExtension = '.' + file.name.split('.').pop()?.toLowerCase();
if (!ACCEPTED_FILE_TYPES.includes(fileExtension)) {
toast.error(`Please select a supported file type: ${ACCEPTED_FILE_TYPES.join(', ')}`);
return false;
}
// Validate file size
if (file.size > MAX_FILE_SIZE) {
toast.error('File size must be less than 5MB');
return false;
@ -43,27 +45,38 @@ export default function DocumentUpload({ onUploadSuccess }: DocumentUploadProps)
return true;
};
const uploadFile = async (file: File) => {
const handleFileSelected = (file: File) => {
if (!validateFile(file)) {
// Reset file input so the same file can be re-selected
if (fileInputRef.current) {
fileInputRef.current.value = '';
}
return;
}
setSelectedFile(file);
};
const clearSelectedFile = () => {
setSelectedFile(null);
setRetrievalMode('full_document');
if (fileInputRef.current) {
fileInputRef.current.value = '';
}
};
const uploadFile = async () => {
if (!selectedFile) return;
setUploading(true);
setUploadProgress(0);
try {
// Step 1: Request presigned upload URL
logger.info('Requesting presigned upload URL for:', file.name);
logger.info('Requesting presigned upload URL for:', selectedFile.name);
const uploadUrlResponse = await getUploadUrlApiV1KnowledgeBaseUploadUrlPost({
body: {
filename: file.name,
mime_type: file.type || 'application/octet-stream',
filename: selectedFile.name,
mime_type: selectedFile.type || 'application/octet-stream',
custom_metadata: {
original_filename: file.name,
original_filename: selectedFile.name,
uploaded_at: new Date().toISOString(),
},
},
@ -74,16 +87,13 @@ export default function DocumentUpload({ onUploadSuccess }: DocumentUploadProps)
}
const uploadData: DocumentUploadResponseSchema = uploadUrlResponse.data;
logger.info('Received presigned URL, uploading file...');
setUploadProgress(25);
// Step 2: Upload file directly to S3/MinIO using PUT
const uploadResponse = await fetch(uploadData.upload_url, {
method: 'PUT',
body: file,
body: selectedFile,
headers: {
'Content-Type': file.type || 'application/octet-stream',
'Content-Type': selectedFile.type || 'application/octet-stream',
},
});
@ -92,13 +102,12 @@ export default function DocumentUpload({ onUploadSuccess }: DocumentUploadProps)
}
setUploadProgress(75);
logger.info('File uploaded successfully, triggering processing...');
// Step 3: Trigger document processing
const processResponse = await processDocumentApiV1KnowledgeBaseProcessDocumentPost({
body: {
document_uuid: uploadData.document_uuid,
s3_key: uploadData.s3_key,
retrieval_mode: retrievalMode,
},
});
@ -107,9 +116,8 @@ export default function DocumentUpload({ onUploadSuccess }: DocumentUploadProps)
}
setUploadProgress(100);
logger.info('Document processing triggered successfully');
toast.success(`File uploaded: ${file.name}. Processing started.`);
toast.success(`File uploaded: ${selectedFile.name}. Processing started.`);
clearSelectedFile();
onUploadSuccess();
} catch (error) {
logger.error('Error uploading document:', error);
@ -117,17 +125,13 @@ export default function DocumentUpload({ onUploadSuccess }: DocumentUploadProps)
} finally {
setUploading(false);
setUploadProgress(0);
// Reset file input
if (fileInputRef.current) {
fileInputRef.current.value = '';
}
}
};
const handleFileSelect = async (event: React.ChangeEvent<HTMLInputElement>) => {
const handleFileSelect = (event: React.ChangeEvent<HTMLInputElement>) => {
const file = event.target.files?.[0];
if (file) {
await uploadFile(file);
handleFileSelected(file);
}
};
@ -141,14 +145,14 @@ export default function DocumentUpload({ onUploadSuccess }: DocumentUploadProps)
}
};
const handleDrop = async (e: React.DragEvent) => {
const handleDrop = (e: React.DragEvent) => {
e.preventDefault();
e.stopPropagation();
setDragActive(false);
const file = e.dataTransfer.files?.[0];
if (file) {
await uploadFile(file);
handleFileSelected(file);
}
};
@ -156,6 +160,69 @@ export default function DocumentUpload({ onUploadSuccess }: DocumentUploadProps)
fileInputRef.current?.click();
};
// Step 2: File selected — show retrieval mode choice
if (selectedFile && !uploading) {
return (
<div className="space-y-4">
{/* Selected file info */}
<div className="flex items-center gap-3 p-3 border rounded-lg bg-muted/30">
<FileText className="w-8 h-8 text-primary flex-shrink-0" />
<div className="flex-1 min-w-0">
<p className="font-medium truncate">{selectedFile.name}</p>
<p className="text-xs text-muted-foreground">
{(selectedFile.size / 1024).toFixed(1)} KB
</p>
</div>
<Button variant="ghost" size="icon" onClick={clearSelectedFile}>
<X className="w-4 h-4" />
</Button>
</div>
{/* Retrieval mode selection */}
<div className="space-y-3">
<Label className="text-sm font-medium">How should the agent use this document?</Label>
<RadioGroup value={retrievalMode} onValueChange={setRetrievalMode}>
<label
htmlFor="full_document"
className={`flex items-start gap-3 p-3 border rounded-lg cursor-pointer transition-colors ${
retrievalMode === 'full_document' ? 'border-primary bg-primary/5' : 'hover:bg-muted/50'
}`}
>
<RadioGroupItem value="full_document" id="full_document" className="mt-0.5" />
<div>
<p className="font-medium text-sm">Full Document</p>
<p className="text-xs text-muted-foreground">
The entire document is provided to the agent on each retrieval.
Best for menus, price lists, FAQs, and other small reference documents.
</p>
</div>
</label>
<label
htmlFor="chunked"
className={`flex items-start gap-3 p-3 border rounded-lg cursor-pointer transition-colors ${
retrievalMode === 'chunked' ? 'border-primary bg-primary/5' : 'hover:bg-muted/50'
}`}
>
<RadioGroupItem value="chunked" id="chunked" className="mt-0.5" />
<div>
<p className="font-medium text-sm">Chunked Search</p>
<p className="text-xs text-muted-foreground">
The document is split into chunks and the most relevant ones are retrieved.
Better for large documents like manuals or policies.
</p>
</div>
</label>
</RadioGroup>
</div>
{/* Upload button */}
<Button onClick={uploadFile} className="w-full">
Upload & Process
</Button>
</div>
);
}
return (
<div className="space-y-4">
<input
@ -204,16 +271,17 @@ export default function DocumentUpload({ onUploadSuccess }: DocumentUploadProps)
)}
{/* Manual Upload Button */}
<div className="flex justify-center">
<Button
type="button"
variant="outline"
onClick={handleButtonClick}
disabled={uploading}
>
{uploading ? 'Uploading...' : 'Choose File'}
</Button>
</div>
{!uploading && (
<div className="flex justify-center">
<Button
type="button"
variant="outline"
onClick={handleButtonClick}
>
Choose File
</Button>
</div>
)}
</div>
);
}

View file

@ -1,5 +1,6 @@
"use client";
import { ExternalLink } from "lucide-react";
import { useEffect, useState } from "react";
import { Card, CardContent, CardDescription, CardHeader, CardTitle } from "@/components/ui/card";
@ -42,7 +43,10 @@ export default function FilesPage() {
<div className="mb-8">
<h1 className="text-3xl font-bold mb-2">Knowledge Base Files</h1>
<p className="text-muted-foreground">
Upload and manage documents for your voice agents to reference.
Upload and manage documents for your voice agents to reference.{" "}
<a href="https://docs.dograh.com/voice-agent/knowledge-base" target="_blank" rel="noopener noreferrer" className="inline-flex items-center gap-0.5 underline">
Learn more <ExternalLink className="h-3 w-3" />
</a>
</p>
</div>

View file

@ -4,7 +4,6 @@ import { useCallback, useEffect, useRef, useState } from "react";
import {
createRecordingsApiV1WorkflowRecordingsPost,
deleteRecordingApiV1WorkflowRecordingsRecordingIdDelete,
getSignedUrlApiV1S3SignedUrlGet,
getUploadUrlsApiV1WorkflowRecordingsUploadUrlPost,
listRecordingsApiV1WorkflowRecordingsGet,
transcribeAudioApiV1WorkflowRecordingsTranscribePost,
@ -30,6 +29,7 @@ import {
import { Textarea } from "@/components/ui/textarea";
import { LANGUAGE_DISPLAY_NAMES } from "@/constants/languages";
import { useUserConfig } from "@/context/UserConfigContext";
import { useAudioPlayback } from "@/hooks/useAudioPlayback";
interface RecordingsDialogProps {
open: boolean;
@ -74,8 +74,7 @@ export const RecordingsDialog = ({
const [recordingStep, setRecordingStep] = useState<RecordingStep>("idle");
const [recordingFilename, setRecordingFilename] = useState("");
const [recordingDuration, setRecordingDuration] = useState(0);
const [playingId, setPlayingId] = useState<string | null>(null);
const audioRef = useRef<HTMLAudioElement | null>(null);
const { playingId, toggle: togglePlayback, stop: stopPlayback } = useAudioPlayback();
const mediaRecorderRef = useRef<MediaRecorder | null>(null);
const audioChunksRef = useRef<Blob[]>([]);
const recordingTimerRef = useRef<ReturnType<typeof setInterval> | null>(null);
@ -128,13 +127,6 @@ export const RecordingsDialog = ({
setRecordingDuration(0);
}, []);
const stopPlayback = useCallback(() => {
if (audioRef.current) {
audioRef.current.pause();
audioRef.current = null;
}
setPlayingId(null);
}, []);
useEffect(() => {
if (open) {
@ -363,27 +355,8 @@ export const RecordingsDialog = ({
};
const handlePlay = async (rec: RecordingResponseSchema) => {
if (playingId === rec.recording_id) {
stopPlayback();
return;
}
stopPlayback();
try {
const result = await getSignedUrlApiV1S3SignedUrlGet({
query: {
key: rec.storage_key,
storage_backend: rec.storage_backend,
},
});
if (!result.data?.url) {
setError("Failed to get audio URL");
return;
}
const audio = new Audio(result.data.url);
audio.onended = () => setPlayingId(null);
audioRef.current = audio;
setPlayingId(rec.recording_id);
await audio.play();
await togglePlayback(rec.recording_id, rec.storage_key, rec.storage_backend);
} catch {
setError("Failed to play recording");
}

View file

@ -1,22 +1,27 @@
'use client';
import { Check, Copy, ExternalLink, FileText, Video } from 'lucide-react';
import { Check, Copy, ExternalLink, FileText, LoaderCircle, Phone, Video } from 'lucide-react';
import Link from 'next/link';
import { useParams } from 'next/navigation';
import { useParams, useRouter } from 'next/navigation';
import { useEffect, useRef, useState } from 'react';
import BrowserCall from '@/app/workflow/[workflowId]/run/[runId]/BrowserCall';
import { RealtimeFeedback, WorkflowRunLogs } from '@/app/workflow/[workflowId]/run/[runId]/components/RealtimeFeedback';
import WorkflowLayout from '@/app/workflow/WorkflowLayout';
import { getWorkflowRunApiV1WorkflowWorkflowIdRunsRunIdGet } from '@/client/sdk.gen';
import {
createWorkflowRunApiV1WorkflowWorkflowIdRunsPost,
getWorkflowRunApiV1WorkflowWorkflowIdRunsRunIdGet,
} from '@/client/sdk.gen';
import { MediaPreviewButton, MediaPreviewDialog } from '@/components/MediaPreviewDialog';
import { OnboardingTooltip } from '@/components/onboarding/OnboardingTooltip';
import { Button } from '@/components/ui/button';
import { Card, CardContent, CardFooter, CardHeader, CardTitle } from '@/components/ui/card';
import { Skeleton } from '@/components/ui/skeleton';
import { WORKFLOW_RUN_MODES } from '@/constants/workflowRunModes';
import { useOnboarding } from '@/context/OnboardingContext';
import { useAuth } from '@/lib/auth';
import { downloadFile } from '@/lib/files';
import { getRandomId } from '@/lib/utils';
interface WorkflowRunResponse {
is_completed: boolean;
@ -72,7 +77,9 @@ function ContextDisplay({ title, context }: { title: string; context: Record<str
export default function WorkflowRunPage() {
const params = useParams();
const router = useRouter();
const [isLoading, setIsLoading] = useState(true);
const [startingCall, setStartingCall] = useState(false);
const auth = useAuth();
const [workflowRun, setWorkflowRun] = useState<WorkflowRunResponse | null>(null);
const { hasSeenTooltip, markTooltipSeen } = useOnboarding();
@ -120,6 +127,24 @@ export default function WorkflowRunPage() {
fetchWorkflowRun();
}, [params.workflowId, params.runId, auth]);
const handleTestAgain = async () => {
if (startingCall) return;
setStartingCall(true);
try {
const workflowId = Number(params.workflowId);
const workflowRunName = `WR-${getRandomId()}`;
const response = await createWorkflowRunApiV1WorkflowWorkflowIdRunsPost({
path: { workflow_id: workflowId },
body: { mode: WORKFLOW_RUN_MODES.SMALL_WEBRTC, name: workflowRunName },
});
if (response.data?.id) {
router.push(`/workflow/${workflowId}/run/${response.data.id}`);
}
} finally {
setStartingCall(false);
}
};
let returnValue = null;
if (isLoading) {
@ -160,22 +185,37 @@ export default function WorkflowRunPage() {
</svg>
</div>
</div>
<Link href={`/workflow/${params.workflowId}`}>
<div className="flex items-center gap-2">
<Button
ref={customizeButtonRef}
onClick={handleTestAgain}
disabled={startingCall}
variant="outline"
className="gap-2"
onClick={() => {
if (!hasSeenTooltip('customize_workflow')) {
markTooltipSeen('customize_workflow');
}
}}
>
<svg className="h-4 w-4" fill="none" stroke="currentColor" viewBox="0 0 24 24">
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth="2" d="M11 5H6a2 2 0 00-2 2v11a2 2 0 002 2h11a2 2 0 002-2v-5m-1.414-9.414a2 2 0 112.828 2.828L11.828 15H9v-2.828l8.586-8.586z" />
</svg>
Customize Agent
{startingCall ? (
<LoaderCircle className="h-4 w-4 animate-spin" />
) : (
<Phone className="h-4 w-4" />
)}
{startingCall ? 'Starting...' : 'Test Again'}
</Button>
</Link>
<Link href={`/workflow/${params.workflowId}`}>
<Button
ref={customizeButtonRef}
className="gap-2"
onClick={() => {
if (!hasSeenTooltip('customize_workflow')) {
markTooltipSeen('customize_workflow');
}
}}
>
<svg className="h-4 w-4" fill="none" stroke="currentColor" viewBox="0 0 24 24">
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth="2" d="M11 5H6a2 2 0 00-2 2v11a2 2 0 002 2h11a2 2 0 002-2v-5m-1.414-9.414a2 2 0 112.828 2.828L11.828 15H9v-2.828l8.586-8.586z" />
</svg>
Customize Agent
</Button>
</Link>
</div>
</CardHeader>
<CardContent>
<p className="text-muted-foreground mb-8">Your voice agent run has been completed successfully. You can preview or download the transcript and recording.</p>

View file

@ -1,10 +1,10 @@
"use client";
import { ArrowLeft, BookA, Brain, ExternalLink, Mic, PhoneOff, Rocket, Settings, Trash2Icon, Variable } from "lucide-react";
import { ArrowLeft, BookA, Brain, ExternalLink, Loader2, Mic, Pause, PhoneOff, Play, Rocket, Settings, Trash2Icon, Upload, Variable, X } from "lucide-react";
import { useParams, useRouter } from "next/navigation";
import { useEffect, useMemo, useState } from "react";
import { useEffect, useMemo, useRef, useState } from "react";
import { getWorkflowApiV1WorkflowFetchWorkflowIdGet } from "@/client/sdk.gen";
import { getAmbientNoiseUploadUrlApiV1WorkflowAmbientNoiseUploadUrlPost, getWorkflowApiV1WorkflowFetchWorkflowIdGet } from "@/client/sdk.gen";
import type { WorkflowResponse } from "@/client/types.gen";
import { FlowEdge, FlowNode } from "@/components/flow/types";
import { LLMConfigSelector } from "@/components/LLMConfigSelector";
@ -19,6 +19,7 @@ import { Separator } from "@/components/ui/separator";
import { Switch } from "@/components/ui/switch";
import { Textarea } from "@/components/ui/textarea";
import { SETTINGS_DOCUMENTATION_URLS } from "@/constants/documentation";
import { useAudioPlayback } from "@/hooks/useAudioPlayback";
import { useAuth } from "@/lib/auth";
import logger from "@/lib/logger";
import {
@ -80,13 +81,17 @@ const NAV_ITEMS = [
// Section: General
// ---------------------------------------------------------------------------
const MAX_AMBIENT_NOISE_FILE_SIZE = 10 * 1024 * 1024; // 10MB
function GeneralSection({
workflowConfigurations,
workflowName,
workflowId,
onSave,
}: {
workflowConfigurations: WorkflowConfigurations;
workflowName: string;
workflowId: number;
onSave: (configurations: WorkflowConfigurations, workflowName: string) => Promise<void>;
}) {
const [name, setName] = useState(workflowName);
@ -103,6 +108,68 @@ function GeneralSection({
workflowConfigurations.context_compaction_enabled ?? false,
);
const [isSaving, setIsSaving] = useState(false);
const [isUploadingAudio, setIsUploadingAudio] = useState(false);
const [audioUploadError, setAudioUploadError] = useState<string | null>(null);
const ambientFileInputRef = useRef<HTMLInputElement>(null);
const { playingId, toggle: togglePlayback } = useAudioPlayback();
const handleAmbientFileUpload = async (file: File) => {
if (file.size > MAX_AMBIENT_NOISE_FILE_SIZE) {
setAudioUploadError(`File too large (${(file.size / (1024 * 1024)).toFixed(1)}MB). Maximum is 10MB.`);
return;
}
setIsUploadingAudio(true);
setAudioUploadError(null);
try {
// 1. Get presigned upload URL
const res = await getAmbientNoiseUploadUrlApiV1WorkflowAmbientNoiseUploadUrlPost({
body: {
workflow_id: Number(workflowId),
filename: file.name,
mime_type: file.type || "audio/wav",
file_size: file.size,
},
});
if (res.error || !res.data?.upload_url) {
throw new Error("Failed to get upload URL");
}
const data = res.data;
// 2. Upload file to storage
const uploadRes = await fetch(data.upload_url, {
method: "PUT",
body: file,
headers: { "Content-Type": file.type || "audio/wav" },
});
if (!uploadRes.ok) {
throw new Error("File upload failed");
}
// 3. Update config with storage reference
setAmbientNoiseConfig((prev) => ({
...prev,
storage_key: data.storage_key,
storage_backend: data.storage_backend,
original_filename: file.name,
}));
} catch (err) {
setAudioUploadError(err instanceof Error ? err.message : "Upload failed");
} finally {
setIsUploadingAudio(false);
if (ambientFileInputRef.current) ambientFileInputRef.current.value = "";
}
};
const handleRemoveCustomAudio = () => {
setAmbientNoiseConfig((prev) => ({
enabled: prev.enabled,
volume: prev.volume,
}));
};
const handleSave = async () => {
setIsSaving(true);
@ -156,7 +223,7 @@ function GeneralSection({
<div>
<h3 className="text-sm font-medium">Ambient Noise</h3>
<p className="text-xs text-muted-foreground mt-0.5">
Add background office ambient noise to make the conversation sound more natural.
Add background ambient noise to make the conversation sound more natural.
</p>
</div>
<div className="flex items-center justify-between">
@ -170,20 +237,108 @@ function GeneralSection({
/>
</div>
{ambientNoiseConfig.enabled && (
<div className="space-y-2">
<Label htmlFor="ambient-volume" className="text-xs">Volume</Label>
<Input
id="ambient-volume"
type="number"
step="0.1"
min="0"
max="1"
value={ambientNoiseConfig.volume}
onChange={(e) => {
const value = parseFloat(e.target.value);
if (!isNaN(value)) setAmbientNoiseConfig((prev) => ({ ...prev, volume: value }));
}}
/>
<div className="space-y-4">
<div className="space-y-2">
<Label htmlFor="ambient-volume" className="text-xs">Volume</Label>
<Input
id="ambient-volume"
type="number"
step="0.1"
min="0"
max="1"
value={ambientNoiseConfig.volume}
onChange={(e) => {
const value = parseFloat(e.target.value);
if (!isNaN(value)) setAmbientNoiseConfig((prev) => ({ ...prev, volume: value }));
}}
/>
</div>
{/* Custom Audio File */}
<div className="space-y-2">
<Label className="text-xs">Custom Audio File</Label>
<p className="text-xs text-muted-foreground">
Upload your own audio file or use the default office ambience.
</p>
{ambientNoiseConfig.storage_key ? (
<div className="flex items-center gap-2 rounded-md border p-2 bg-muted/10">
<code className="text-xs bg-muted px-1.5 py-0.5 rounded font-mono truncate flex-1">
{ambientNoiseConfig.original_filename || "Custom audio"}
</code>
<Button
type="button"
size="sm"
variant="ghost"
className="h-6 w-6 p-0 shrink-0"
onClick={async () => {
try {
await togglePlayback(
"ambient-noise",
ambientNoiseConfig.storage_key!,
ambientNoiseConfig.storage_backend,
);
} catch {
setAudioUploadError("Failed to play audio");
}
}}
>
{playingId === "ambient-noise" ? (
<Pause className="w-3.5 h-3.5" />
) : (
<Play className="w-3.5 h-3.5" />
)}
</Button>
<Button
type="button"
size="sm"
variant="ghost"
className="h-6 w-6 p-0 shrink-0"
onClick={handleRemoveCustomAudio}
>
<X className="w-3.5 h-3.5" />
</Button>
</div>
) : (
<div>
<input
ref={ambientFileInputRef}
type="file"
accept="audio/*"
onChange={(e) => {
const file = e.target.files?.[0];
if (file) handleAmbientFileUpload(file);
}}
className="hidden"
/>
<Button
type="button"
variant="outline"
size="sm"
className="text-sm font-normal"
onClick={() => ambientFileInputRef.current?.click()}
disabled={isUploadingAudio}
>
{isUploadingAudio ? (
<Loader2 className="w-4 h-4 mr-2 animate-spin" />
) : (
<Upload className="w-4 h-4 mr-2" />
)}
{isUploadingAudio ? "Uploading..." : "Upload audio file (max 10MB)"}
</Button>
</div>
)}
{audioUploadError && (
<p className="text-xs text-destructive">{audioUploadError}</p>
)}
{!ambientNoiseConfig.storage_key && (
<p className="text-xs text-muted-foreground italic">
Using default office ambience
</p>
)}
</div>
</div>
)}
</div>
@ -786,6 +941,7 @@ function WorkflowSettingsContent({
<GeneralSection
workflowConfigurations={workflowConfigurations}
workflowName={workflowName || workflow.name}
workflowId={workflowId}
onSave={saveWorkflowConfigurations}
/>

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View file

@ -168,6 +168,48 @@ export type AccessTokenResponse = {
connection_id: string;
};
/**
* AmbientNoiseUploadRequest
*/
export type AmbientNoiseUploadRequest = {
/**
* Workflow Id
*/
workflow_id: number;
/**
* Filename
*/
filename: string;
/**
* Mime Type
*/
mime_type?: string;
/**
* File Size
*
* Max 10MB
*/
file_size: number;
};
/**
* AmbientNoiseUploadResponse
*/
export type AmbientNoiseUploadResponse = {
/**
* Upload Url
*/
upload_url: string;
/**
* Storage Key
*/
storage_key: string;
/**
* Storage Backend
*/
storage_backend: string;
};
/**
* AuthResponse
*/
@ -1295,6 +1337,10 @@ export type DocumentResponseSchema = {
* Total Chunks
*/
total_chunks: number;
/**
* Retrieval Mode
*/
retrieval_mode?: string;
/**
* Custom Metadata
*/
@ -2036,6 +2082,12 @@ export type ProcessDocumentRequestSchema = {
* S3 key of the uploaded file
*/
s3_key: string;
/**
* Retrieval Mode
*
* Retrieval mode: 'chunked' for vector search or 'full_document' for full text retrieval
*/
retrieval_mode?: string;
};
/**
@ -5175,6 +5227,45 @@ export type DuplicateWorkflowTemplateApiV1WorkflowTemplatesDuplicatePostResponse
export type DuplicateWorkflowTemplateApiV1WorkflowTemplatesDuplicatePostResponse = DuplicateWorkflowTemplateApiV1WorkflowTemplatesDuplicatePostResponses[keyof DuplicateWorkflowTemplateApiV1WorkflowTemplatesDuplicatePostResponses];
export type GetAmbientNoiseUploadUrlApiV1WorkflowAmbientNoiseUploadUrlPostData = {
body: AmbientNoiseUploadRequest;
headers?: {
/**
* Authorization
*/
authorization?: string | null;
/**
* X-Api-Key
*/
'X-API-Key'?: string | null;
};
path?: never;
query?: never;
url: '/api/v1/workflow/ambient-noise/upload-url';
};
export type GetAmbientNoiseUploadUrlApiV1WorkflowAmbientNoiseUploadUrlPostErrors = {
/**
* Not found
*/
404: unknown;
/**
* Validation Error
*/
422: HttpValidationError;
};
export type GetAmbientNoiseUploadUrlApiV1WorkflowAmbientNoiseUploadUrlPostError = GetAmbientNoiseUploadUrlApiV1WorkflowAmbientNoiseUploadUrlPostErrors[keyof GetAmbientNoiseUploadUrlApiV1WorkflowAmbientNoiseUploadUrlPostErrors];
export type GetAmbientNoiseUploadUrlApiV1WorkflowAmbientNoiseUploadUrlPostResponses = {
/**
* Successful Response
*/
200: AmbientNoiseUploadResponse;
};
export type GetAmbientNoiseUploadUrlApiV1WorkflowAmbientNoiseUploadUrlPostResponse = GetAmbientNoiseUploadUrlApiV1WorkflowAmbientNoiseUploadUrlPostResponses[keyof GetAmbientNoiseUploadUrlApiV1WorkflowAmbientNoiseUploadUrlPostResponses];
export type GetDefaultConfigurationsApiV1UserConfigurationsDefaultsGetData = {
body?: never;
path?: never;

View file

@ -123,7 +123,7 @@ export const DocumentSelector = ({
{doc.filename}
</div>
<div className="text-xs text-muted-foreground">
{formatFileSize(doc.file_size_bytes)} {doc.total_chunks} chunks
{formatFileSize(doc.file_size_bytes)} {doc.retrieval_mode === 'full_document' ? 'Full Document' : `${doc.total_chunks} chunks`}
</div>
</div>
</label>

View file

@ -0,0 +1,57 @@
import { useCallback, useRef, useState } from "react";
import { getSignedUrlApiV1S3SignedUrlGet } from "@/client/sdk.gen";
/**
* Hook for playing audio files stored in S3/MinIO via signed URLs.
*
* Returns the currently-playing ID (or null), a toggle function, and a stop function.
*/
export function useAudioPlayback() {
const [playingId, setPlayingId] = useState<string | null>(null);
const audioRef = useRef<HTMLAudioElement | null>(null);
const stop = useCallback(() => {
if (audioRef.current) {
audioRef.current.pause();
audioRef.current = null;
}
setPlayingId(null);
}, []);
const toggle = useCallback(
async (id: string, storageKey: string, storageBackend?: string) => {
// If already playing this id, stop it
if (audioRef.current && playingId === id) {
stop();
return;
}
// Stop any previous playback
stop();
const result = await getSignedUrlApiV1S3SignedUrlGet({
query: {
key: storageKey,
storage_backend: storageBackend,
},
});
if (!result.data?.url) {
throw new Error("Failed to get audio URL");
}
const audio = new Audio(result.data.url);
audio.onended = () => {
audioRef.current = null;
setPlayingId(null);
};
audioRef.current = audio;
setPlayingId(id);
await audio.play();
},
[playingId, stop],
);
return { playingId, toggle, stop } as const;
}

View file

@ -8,6 +8,9 @@ export interface VADConfiguration {
export interface AmbientNoiseConfiguration {
enabled: boolean;
volume: number;
storage_key?: string;
storage_backend?: string;
original_filename?: string;
}
export type TurnStopStrategy = 'transcription' | 'turn_analyzer';