mirror of
https://github.com/dograh-hq/dograh.git
synced 2026-07-04 10:52:17 +02:00
chore: refactor file upload mechanism to avoid NFS dependency (#496)
* chore: refactor file upload mechanism to avoid NFS dependency * add regression test for deregistration of calls * fix: fix minio upload issue * fix: make transcript upload async
This commit is contained in:
parent
79a4a3c9f1
commit
a54ab519b8
23 changed files with 370 additions and 401 deletions
126
api/services/workflow_run_artifacts.py
Normal file
126
api/services/workflow_run_artifacts.py
Normal file
|
|
@ -0,0 +1,126 @@
|
|||
"""Upload end-of-call artifacts (recordings, transcript) to object storage.
|
||||
|
||||
Called from the pipeline process itself, straight from the in-memory call
|
||||
buffers, so no local file ever has to cross a process/host boundary (no
|
||||
shared /tmp between web and ARQ workers). Uploads happen before the
|
||||
workflow-completion job is enqueued so QA and webhooks see the artifacts
|
||||
in storage.
|
||||
"""
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from api.db import db_client
|
||||
from api.services.storage import get_current_storage_backend, storage_fs
|
||||
|
||||
|
||||
def _recording_metadata(storage_key: str, storage_backend: str, track: str) -> dict:
|
||||
return {
|
||||
"storage_key": storage_key,
|
||||
"storage_backend": storage_backend,
|
||||
"format": "wav",
|
||||
"track": track,
|
||||
}
|
||||
|
||||
|
||||
async def _upload_bytes(
|
||||
workflow_run_id: int,
|
||||
data: bytes,
|
||||
storage_key: str,
|
||||
label: str,
|
||||
) -> bool:
|
||||
try:
|
||||
logger.debug(f"{label} size: {len(data)} bytes")
|
||||
if await storage_fs.acreate_file_from_bytes(storage_key, data):
|
||||
logger.info(f"Successfully uploaded {label}: {storage_key}")
|
||||
return True
|
||||
logger.error(
|
||||
f"Storage backend rejected {label} upload for workflow "
|
||||
f"{workflow_run_id}: {storage_key}"
|
||||
)
|
||||
return False
|
||||
except Exception as e:
|
||||
logger.error(f"Error uploading {label} for workflow {workflow_run_id}: {e}")
|
||||
return False
|
||||
|
||||
|
||||
async def upload_workflow_run_artifacts(
|
||||
workflow_run_id: int,
|
||||
*,
|
||||
mixed_audio_wav: bytes | None = None,
|
||||
user_audio_wav: bytes | None = None,
|
||||
bot_audio_wav: bytes | None = None,
|
||||
transcript_text: str | None = None,
|
||||
) -> None:
|
||||
"""Upload call artifacts to object storage and persist their metadata.
|
||||
|
||||
Each artifact is uploaded independently; a failure is logged and the
|
||||
remaining artifacts are still attempted.
|
||||
"""
|
||||
storage_backend = get_current_storage_backend()
|
||||
|
||||
recordings_metadata: dict[str, dict] = {}
|
||||
|
||||
if mixed_audio_wav:
|
||||
recording_url = f"recordings/{workflow_run_id}.wav"
|
||||
logger.info(
|
||||
f"Uploading mixed audio to {storage_backend.name} - workflow_run_id: {workflow_run_id}"
|
||||
)
|
||||
if await _upload_bytes(
|
||||
workflow_run_id, mixed_audio_wav, recording_url, "mixed audio"
|
||||
):
|
||||
recordings_metadata["mixed"] = _recording_metadata(
|
||||
recording_url, storage_backend.value, "mixed"
|
||||
)
|
||||
await db_client.update_workflow_run(
|
||||
run_id=workflow_run_id,
|
||||
recording_url=recording_url,
|
||||
storage_backend=storage_backend.value,
|
||||
)
|
||||
|
||||
if user_audio_wav:
|
||||
user_recording_url = f"recordings/{workflow_run_id}/user.wav"
|
||||
logger.info(
|
||||
f"Uploading user audio to {storage_backend.name} - workflow_run_id: {workflow_run_id}"
|
||||
)
|
||||
if await _upload_bytes(
|
||||
workflow_run_id, user_audio_wav, user_recording_url, "user audio"
|
||||
):
|
||||
recordings_metadata["user"] = _recording_metadata(
|
||||
user_recording_url, storage_backend.value, "user"
|
||||
)
|
||||
|
||||
if bot_audio_wav:
|
||||
bot_recording_url = f"recordings/{workflow_run_id}/bot.wav"
|
||||
logger.info(
|
||||
f"Uploading bot audio to {storage_backend.name} - workflow_run_id: {workflow_run_id}"
|
||||
)
|
||||
if await _upload_bytes(
|
||||
workflow_run_id, bot_audio_wav, bot_recording_url, "bot audio"
|
||||
):
|
||||
recordings_metadata["bot"] = _recording_metadata(
|
||||
bot_recording_url, storage_backend.value, "bot"
|
||||
)
|
||||
|
||||
if recordings_metadata:
|
||||
await db_client.update_workflow_run(
|
||||
run_id=workflow_run_id,
|
||||
storage_backend=storage_backend.value,
|
||||
extra={"recordings": recordings_metadata},
|
||||
)
|
||||
|
||||
if transcript_text:
|
||||
transcript_url = f"transcripts/{workflow_run_id}.txt"
|
||||
logger.info(
|
||||
f"Uploading transcript to {storage_backend.name} - workflow_run_id: {workflow_run_id}"
|
||||
)
|
||||
if await _upload_bytes(
|
||||
workflow_run_id,
|
||||
transcript_text.encode("utf-8"),
|
||||
transcript_url,
|
||||
"transcript",
|
||||
):
|
||||
await db_client.update_workflow_run(
|
||||
run_id=workflow_run_id,
|
||||
transcript_url=transcript_url,
|
||||
storage_backend=storage_backend.value,
|
||||
)
|
||||
Loading…
Add table
Add a link
Reference in a new issue