chore: refactor file upload mechanism to avoid NFS dependency (#496)

* chore: refactor file upload mechanism to avoid NFS dependency

* add regression test for deregistration of calls

* fix: fix minio upload issue

* fix: make transcript upload async
This commit is contained in:
Abhishek 2026-07-03 20:01:52 +05:30 committed by GitHub
parent 79a4a3c9f1
commit a54ab519b8
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
23 changed files with 370 additions and 401 deletions

View file

@ -1,23 +1,51 @@
from abc import ABC, abstractmethod
from typing import Any, BinaryIO, Dict, Optional
from typing import Any, Dict, Optional, Protocol
class AsyncReadable(Protocol):
"""Anything exposing ``await .read() -> bytes`` (aiofiles handles, in-memory wrappers)."""
async def read(self) -> bytes: ...
class _AsyncBytesReader:
"""Async file-like wrapper over in-memory bytes for acreate_file()."""
def __init__(self, data: bytes):
self._data = data
async def read(self) -> bytes:
return self._data
class BaseFileSystem(ABC):
"""Abstract base class for filesystem operations."""
@abstractmethod
async def acreate_file(self, file_path: str, content: BinaryIO) -> bool:
async def acreate_file(self, file_path: str, content: AsyncReadable) -> bool:
"""Create a new file with the given content.
Args:
file_path: Path where the file should be created
content: File content as a binary stream
content: File content readable via ``await content.read()``
Returns:
bool: True if file was created successfully, False otherwise
"""
pass
async def acreate_file_from_bytes(self, file_path: str, data: bytes) -> bool:
"""Create a file directly from in-memory bytes (no local file needed).
Args:
file_path: Path where the file should be created
data: File content as bytes
Returns:
bool: True if file was created successfully, False otherwise
"""
return await self.acreate_file(file_path, _AsyncBytesReader(data))
@abstractmethod
async def aupload_file(self, local_path: str, destination_path: str) -> bool:
"""Upload a file from local path to destination.

View file

@ -1,11 +1,11 @@
import asyncio
import os
from datetime import datetime
from typing import BinaryIO, Optional
from typing import Optional
import aiofiles
from .base import BaseFileSystem
from .base import AsyncReadable, BaseFileSystem
class LocalFileSystem(BaseFileSystem):
@ -24,7 +24,7 @@ class LocalFileSystem(BaseFileSystem):
"""Get the full path by joining with base path."""
return os.path.join(self.base_path, file_path)
async def acreate_file(self, file_path: str, content: BinaryIO) -> bool:
async def acreate_file(self, file_path: str, content: AsyncReadable) -> bool:
try:
full_path = self._get_full_path(file_path)
os.makedirs(os.path.dirname(full_path), exist_ok=True)

View file

@ -1,12 +1,13 @@
import asyncio
import io
import json
from typing import Any, BinaryIO, Dict, Optional
from typing import Any, Dict, Optional
from loguru import logger
from minio import Minio
from minio.error import S3Error
from .base import BaseFileSystem
from .base import AsyncReadable, BaseFileSystem
class MinioFileSystem(BaseFileSystem):
@ -89,15 +90,16 @@ class MinioFileSystem(BaseFileSystem):
logger.debug(f"Bucket setup note: {e}")
pass
async def acreate_file(self, file_path: str, content: BinaryIO) -> bool:
async def acreate_file(self, file_path: str, content: AsyncReadable) -> bool:
try:
data = await content.read()
def _put():
# The MinIO SDK requires a stream with .read(), not raw bytes.
self.client.put_object(
self.bucket_name,
file_path,
data=bytes(data),
data=io.BytesIO(data),
length=len(data),
)

View file

@ -1,6 +1,6 @@
from typing import Any, BinaryIO, Dict, NoReturn, Optional
from typing import Any, Dict, NoReturn, Optional
from .base import BaseFileSystem
from .base import AsyncReadable, BaseFileSystem
class NullFileSystem(BaseFileSystem):
@ -16,7 +16,7 @@ class NullFileSystem(BaseFileSystem):
"Set ENVIRONMENT to a non-test value or inject a real filesystem fixture."
)
async def acreate_file(self, file_path: str, content: BinaryIO) -> bool:
async def acreate_file(self, file_path: str, content: AsyncReadable) -> bool:
self._fail("acreate_file")
async def aupload_file(self, local_path: str, destination_path: str) -> bool:

View file

@ -1,10 +1,10 @@
from typing import Any, BinaryIO, Dict, Optional
from typing import Any, Dict, Optional
import aioboto3
from botocore.config import Config
from botocore.exceptions import ClientError
from .base import BaseFileSystem
from .base import AsyncReadable, BaseFileSystem
class S3FileSystem(BaseFileSystem):
@ -57,7 +57,7 @@ class S3FileSystem(BaseFileSystem):
kwargs["config"] = self._config
return kwargs
async def acreate_file(self, file_path: str, content: BinaryIO) -> bool:
async def acreate_file(self, file_path: str, content: AsyncReadable) -> bool:
try:
async with self.session.client("s3", **self._client_kwargs()) as s3_client:
await s3_client.put_object(

View file

@ -16,6 +16,7 @@ from api.services.pipecat.pipeline_metrics_aggregator import PipelineMetricsAggr
from api.services.pipecat.tracing_config import get_trace_url
from api.services.posthog_client import capture_event
from api.services.workflow.pipecat_engine import PipecatEngine
from api.services.workflow_run_artifacts import upload_workflow_run_artifacts
from api.tasks.arq import enqueue_job
from api.tasks.function_names import FunctionNames
from pipecat.frames.frames import (
@ -361,50 +362,49 @@ def register_event_handlers(
except Exception as e:
logger.error(f"Error saving workflow run logs: {e}", exc_info=True)
# Write buffers to temp files and enqueue combined processing task
audio_temp_path = None
user_audio_temp_path = None
bot_audio_temp_path = None
transcript_temp_path = None
# Upload artifacts straight from the in-memory buffers so nothing has
# to cross a process/host boundary via temp files. Must complete
# before the completion job is enqueued so QA and webhooks see the
# artifacts in storage.
try:
mixed_audio_wav = None
user_audio_wav = None
bot_audio_wav = None
if not in_memory_audio_buffers.mixed.is_empty:
audio_temp_path = (
await in_memory_audio_buffers.mixed.write_to_temp_file()
)
mixed_audio_wav = await in_memory_audio_buffers.mixed.to_wav_bytes()
else:
logger.debug("Audio buffer is empty, skipping upload")
if not in_memory_audio_buffers.user.is_empty:
user_audio_temp_path = (
await in_memory_audio_buffers.user.write_to_temp_file()
)
user_audio_wav = await in_memory_audio_buffers.user.to_wav_bytes()
else:
logger.debug("User audio buffer is empty, skipping upload")
if not in_memory_audio_buffers.bot.is_empty:
bot_audio_temp_path = (
await in_memory_audio_buffers.bot.write_to_temp_file()
)
bot_audio_wav = await in_memory_audio_buffers.bot.to_wav_bytes()
else:
logger.debug("Bot audio buffer is empty, skipping upload")
transcript_temp_path = in_memory_logs_buffer.write_transcript_to_temp_file()
if not transcript_temp_path:
transcript_text = in_memory_logs_buffer.generate_transcript_text()
if not transcript_text:
logger.debug("No transcript events in logs buffer, skipping upload")
await upload_workflow_run_artifacts(
workflow_run_id,
mixed_audio_wav=mixed_audio_wav,
user_audio_wav=user_audio_wav,
bot_audio_wav=bot_audio_wav,
transcript_text=transcript_text,
)
except Exception as e:
logger.error(f"Error preparing buffers for S3 upload: {e}", exc_info=True)
logger.error(f"Error uploading call artifacts: {e}", exc_info=True)
# Combined task: uploads artifacts, runs integrations (including QA),
# then calculates cost (so QA token usage is captured in usage_info)
# Combined task: runs integrations (including QA), then calculates
# cost (so QA token usage is captured in usage_info)
await enqueue_job(
FunctionNames.PROCESS_WORKFLOW_COMPLETION,
workflow_run_id,
audio_temp_path,
transcript_temp_path,
user_audio_temp_path,
bot_audio_temp_path,
)
# Return the buffer so it can be passed to other handlers

View file

@ -1,5 +1,5 @@
import asyncio
import tempfile
import io
import wave
from datetime import UTC, datetime
from typing import List, Optional
@ -15,7 +15,7 @@ from pipecat.utils.enums import RealtimeFeedbackType
class InMemoryAudioBuffer:
"""Buffer audio data in memory during a call, then write to temp file on disconnect."""
"""Buffer audio data in memory during a call, then encode to WAV bytes on disconnect."""
def __init__(self, workflow_run_id: int, sample_rate: int, num_channels: int = 1):
self._workflow_run_id = workflow_run_id
@ -41,28 +41,30 @@ class InMemoryAudioBuffer:
f"Appended {len(pcm_data)} bytes to audio buffer. Total size: {self._total_size}"
)
async def write_to_temp_file(self) -> str:
"""Write audio data to a temporary WAV file and return the path."""
async def to_wav_bytes(self) -> bytes:
"""Encode the buffered PCM data as an in-memory WAV file."""
async with self._lock:
temp_file = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
logger.debug(
f"Writing audio buffer to temp file {temp_file.name} for workflow {self._workflow_run_id}"
)
chunks = list(self._chunks)
# Write WAV header and PCM data
with wave.open(temp_file.name, "wb") as wf:
def _encode() -> bytes:
wav_io = io.BytesIO()
with wave.open(wav_io, "wb") as wf:
wf.setnchannels(self._num_channels)
wf.setsampwidth(2) # 16-bit audio
wf.setframerate(self._sample_rate)
# Concatenate all chunks
for chunk in self._chunks:
for chunk in chunks:
wf.writeframes(chunk)
return wav_io.getvalue()
logger.info(
f"Successfully wrote {self._total_size} bytes of audio to {temp_file.name}"
)
return temp_file.name
# Encoding is mostly memcpy but can touch ~100MB; keep it off the event loop
data = await asyncio.to_thread(_encode)
logger.info(
f"Encoded {self._total_size} bytes of audio to {len(data)} WAV bytes "
f"for workflow {self._workflow_run_id}"
)
return data
@property
def is_empty(self) -> bool:
@ -172,27 +174,6 @@ class InMemoryLogsBuffer:
"""
return _generate_transcript_text(self._sorted_events())
def write_transcript_to_temp_file(self) -> Optional[str]:
"""Write transcript to a temporary text file and return the path.
Returns None if there are no transcript events.
"""
content = self.generate_transcript_text()
if not content:
return None
temp_file = tempfile.NamedTemporaryFile(mode="w", suffix=".txt", delete=False)
logger.debug(
f"Writing transcript to temp file {temp_file.name} for workflow {self._workflow_run_id}"
)
temp_file.write(content)
temp_file.close()
logger.info(
f"Successfully wrote {len(content)} chars of transcript to {temp_file.name}"
)
return temp_file.name
@property
def is_empty(self) -> bool:
"""Check if the buffer is empty."""

View file

@ -0,0 +1,126 @@
"""Upload end-of-call artifacts (recordings, transcript) to object storage.
Called from the pipeline process itself, straight from the in-memory call
buffers, so no local file ever has to cross a process/host boundary (no
shared /tmp between web and ARQ workers). Uploads happen before the
workflow-completion job is enqueued so QA and webhooks see the artifacts
in storage.
"""
from loguru import logger
from api.db import db_client
from api.services.storage import get_current_storage_backend, storage_fs
def _recording_metadata(storage_key: str, storage_backend: str, track: str) -> dict:
return {
"storage_key": storage_key,
"storage_backend": storage_backend,
"format": "wav",
"track": track,
}
async def _upload_bytes(
workflow_run_id: int,
data: bytes,
storage_key: str,
label: str,
) -> bool:
try:
logger.debug(f"{label} size: {len(data)} bytes")
if await storage_fs.acreate_file_from_bytes(storage_key, data):
logger.info(f"Successfully uploaded {label}: {storage_key}")
return True
logger.error(
f"Storage backend rejected {label} upload for workflow "
f"{workflow_run_id}: {storage_key}"
)
return False
except Exception as e:
logger.error(f"Error uploading {label} for workflow {workflow_run_id}: {e}")
return False
async def upload_workflow_run_artifacts(
workflow_run_id: int,
*,
mixed_audio_wav: bytes | None = None,
user_audio_wav: bytes | None = None,
bot_audio_wav: bytes | None = None,
transcript_text: str | None = None,
) -> None:
"""Upload call artifacts to object storage and persist their metadata.
Each artifact is uploaded independently; a failure is logged and the
remaining artifacts are still attempted.
"""
storage_backend = get_current_storage_backend()
recordings_metadata: dict[str, dict] = {}
if mixed_audio_wav:
recording_url = f"recordings/{workflow_run_id}.wav"
logger.info(
f"Uploading mixed audio to {storage_backend.name} - workflow_run_id: {workflow_run_id}"
)
if await _upload_bytes(
workflow_run_id, mixed_audio_wav, recording_url, "mixed audio"
):
recordings_metadata["mixed"] = _recording_metadata(
recording_url, storage_backend.value, "mixed"
)
await db_client.update_workflow_run(
run_id=workflow_run_id,
recording_url=recording_url,
storage_backend=storage_backend.value,
)
if user_audio_wav:
user_recording_url = f"recordings/{workflow_run_id}/user.wav"
logger.info(
f"Uploading user audio to {storage_backend.name} - workflow_run_id: {workflow_run_id}"
)
if await _upload_bytes(
workflow_run_id, user_audio_wav, user_recording_url, "user audio"
):
recordings_metadata["user"] = _recording_metadata(
user_recording_url, storage_backend.value, "user"
)
if bot_audio_wav:
bot_recording_url = f"recordings/{workflow_run_id}/bot.wav"
logger.info(
f"Uploading bot audio to {storage_backend.name} - workflow_run_id: {workflow_run_id}"
)
if await _upload_bytes(
workflow_run_id, bot_audio_wav, bot_recording_url, "bot audio"
):
recordings_metadata["bot"] = _recording_metadata(
bot_recording_url, storage_backend.value, "bot"
)
if recordings_metadata:
await db_client.update_workflow_run(
run_id=workflow_run_id,
storage_backend=storage_backend.value,
extra={"recordings": recordings_metadata},
)
if transcript_text:
transcript_url = f"transcripts/{workflow_run_id}.txt"
logger.info(
f"Uploading transcript to {storage_backend.name} - workflow_run_id: {workflow_run_id}"
)
if await _upload_bytes(
workflow_run_id,
transcript_text.encode("utf-8"),
transcript_url,
"transcript",
):
await db_client.update_workflow_run(
run_id=workflow_run_id,
transcript_url=transcript_url,
storage_backend=storage_backend.value,
)