dograh/api/services/workflow_run_artifacts.py

"""Upload end-of-call artifacts (recordings, transcript) to object storage.

Called from the pipeline process itself, straight from the in-memory call
buffers, so no local file ever has to cross a process/host boundary (no
shared /tmp between web and ARQ workers). Uploads happen before the
workflow-completion job is enqueued so QA and webhooks see the artifacts
in storage.
"""

from loguru import logger

from api.db import db_client
from api.services.storage import get_current_storage_backend, storage_fs


def _recording_metadata(storage_key: str, storage_backend: str, track: str) -> dict:
    return {
        "storage_key": storage_key,
        "storage_backend": storage_backend,
        "format": "wav",
        "track": track,
    }


async def _upload_bytes(
    workflow_run_id: int,
    data: bytes,
    storage_key: str,
    label: str,
) -> bool:
    try:
        logger.debug(f"{label} size: {len(data)} bytes")
        if await storage_fs.acreate_file_from_bytes(storage_key, data):
            logger.info(f"Successfully uploaded {label}: {storage_key}")
            return True
        logger.error(
            f"Storage backend rejected {label} upload for workflow "
            f"{workflow_run_id}: {storage_key}"
        )
        return False
    except Exception as e:
        logger.error(f"Error uploading {label} for workflow {workflow_run_id}: {e}")
        return False


async def upload_workflow_run_artifacts(
    workflow_run_id: int,
    *,
    mixed_audio_wav: bytes | None = None,
    user_audio_wav: bytes | None = None,
    bot_audio_wav: bytes | None = None,
    transcript_text: str | None = None,
) -> None:
    """Upload call artifacts to object storage and persist their metadata.

    Each artifact is uploaded independently; a failure is logged and the
    remaining artifacts are still attempted.
    """
    storage_backend = get_current_storage_backend()

    recordings_metadata: dict[str, dict] = {}

    if mixed_audio_wav:
        recording_url = f"recordings/{workflow_run_id}.wav"
        logger.info(
            f"Uploading mixed audio to {storage_backend.name} - workflow_run_id: {workflow_run_id}"
        )
        if await _upload_bytes(
            workflow_run_id, mixed_audio_wav, recording_url, "mixed audio"
        ):
            recordings_metadata["mixed"] = _recording_metadata(
                recording_url, storage_backend.value, "mixed"
            )
            await db_client.update_workflow_run(
                run_id=workflow_run_id,
                recording_url=recording_url,
                storage_backend=storage_backend.value,
            )

    if user_audio_wav:
        user_recording_url = f"recordings/{workflow_run_id}/user.wav"
        logger.info(
            f"Uploading user audio to {storage_backend.name} - workflow_run_id: {workflow_run_id}"
        )
        if await _upload_bytes(
            workflow_run_id, user_audio_wav, user_recording_url, "user audio"
        ):
            recordings_metadata["user"] = _recording_metadata(
                user_recording_url, storage_backend.value, "user"
            )

    if bot_audio_wav:
        bot_recording_url = f"recordings/{workflow_run_id}/bot.wav"
        logger.info(
            f"Uploading bot audio to {storage_backend.name} - workflow_run_id: {workflow_run_id}"
        )
        if await _upload_bytes(
            workflow_run_id, bot_audio_wav, bot_recording_url, "bot audio"
        ):
            recordings_metadata["bot"] = _recording_metadata(
                bot_recording_url, storage_backend.value, "bot"
            )

    if recordings_metadata:
        await db_client.update_workflow_run(
            run_id=workflow_run_id,
            storage_backend=storage_backend.value,
            extra={"recordings": recordings_metadata},
        )

    if transcript_text:
        transcript_url = f"transcripts/{workflow_run_id}.txt"
        logger.info(
            f"Uploading transcript to {storage_backend.name} - workflow_run_id: {workflow_run_id}"
        )
        if await _upload_bytes(
            workflow_run_id,
            transcript_text.encode("utf-8"),
            transcript_url,
            "transcript",
        ):
            await db_client.update_workflow_run(
                run_id=workflow_run_id,
                transcript_url=transcript_url,
                storage_backend=storage_backend.value,
            )
chore: refactor file upload mechanism to avoid NFS dependency (#496) * chore: refactor file upload mechanism to avoid NFS dependency * add regression test for deregistration of calls * fix: fix minio upload issue * fix: make transcript upload async 2026-07-03 20:01:52 +05:30			`"""Upload end-of-call artifacts (recordings, transcript) to object storage.`

			`Called from the pipeline process itself, straight from the in-memory call`
			`buffers, so no local file ever has to cross a process/host boundary (no`
			`shared /tmp between web and ARQ workers). Uploads happen before the`
			`workflow-completion job is enqueued so QA and webhooks see the artifacts`
			`in storage.`
			`"""`

			`from loguru import logger`

			`from api.db import db_client`
			`from api.services.storage import get_current_storage_backend, storage_fs`


			`def _recording_metadata(storage_key: str, storage_backend: str, track: str) -> dict:`
			`return {`
			`"storage_key": storage_key,`
			`"storage_backend": storage_backend,`
			`"format": "wav",`
			`"track": track,`
			`}`


			`async def _upload_bytes(`
			`workflow_run_id: int,`
			`data: bytes,`
			`storage_key: str,`
			`label: str,`
			`) -> bool:`
			`try:`
			`logger.debug(f"{label} size: {len(data)} bytes")`
			`if await storage_fs.acreate_file_from_bytes(storage_key, data):`
			`logger.info(f"Successfully uploaded {label}: {storage_key}")`
			`return True`
			`logger.error(`
			`f"Storage backend rejected {label} upload for workflow "`
			`f"{workflow_run_id}: {storage_key}"`
			`)`
			`return False`
			`except Exception as e:`
			`logger.error(f"Error uploading {label} for workflow {workflow_run_id}: {e}")`
			`return False`


			`async def upload_workflow_run_artifacts(`
			`workflow_run_id: int,`
			`*,`
			`mixed_audio_wav: bytes \| None = None,`
			`user_audio_wav: bytes \| None = None,`
			`bot_audio_wav: bytes \| None = None,`
			`transcript_text: str \| None = None,`
			`) -> None:`
			`"""Upload call artifacts to object storage and persist their metadata.`

			`Each artifact is uploaded independently; a failure is logged and the`
			`remaining artifacts are still attempted.`
			`"""`
			`storage_backend = get_current_storage_backend()`

			`recordings_metadata: dict[str, dict] = {}`

			`if mixed_audio_wav:`
			`recording_url = f"recordings/{workflow_run_id}.wav"`
			`logger.info(`
			`f"Uploading mixed audio to {storage_backend.name} - workflow_run_id: {workflow_run_id}"`
			`)`
			`if await _upload_bytes(`
			`workflow_run_id, mixed_audio_wav, recording_url, "mixed audio"`
			`):`
			`recordings_metadata["mixed"] = _recording_metadata(`
			`recording_url, storage_backend.value, "mixed"`
			`)`
			`await db_client.update_workflow_run(`
			`run_id=workflow_run_id,`
			`recording_url=recording_url,`
			`storage_backend=storage_backend.value,`
			`)`

			`if user_audio_wav:`
			`user_recording_url = f"recordings/{workflow_run_id}/user.wav"`
			`logger.info(`
			`f"Uploading user audio to {storage_backend.name} - workflow_run_id: {workflow_run_id}"`
			`)`
			`if await _upload_bytes(`
			`workflow_run_id, user_audio_wav, user_recording_url, "user audio"`
			`):`
			`recordings_metadata["user"] = _recording_metadata(`
			`user_recording_url, storage_backend.value, "user"`
			`)`

			`if bot_audio_wav:`
			`bot_recording_url = f"recordings/{workflow_run_id}/bot.wav"`
			`logger.info(`
			`f"Uploading bot audio to {storage_backend.name} - workflow_run_id: {workflow_run_id}"`
			`)`
			`if await _upload_bytes(`
			`workflow_run_id, bot_audio_wav, bot_recording_url, "bot audio"`
			`):`
			`recordings_metadata["bot"] = _recording_metadata(`
			`bot_recording_url, storage_backend.value, "bot"`
			`)`

			`if recordings_metadata:`
			`await db_client.update_workflow_run(`
			`run_id=workflow_run_id,`
			`storage_backend=storage_backend.value,`
			`extra={"recordings": recordings_metadata},`
			`)`

			`if transcript_text:`
			`transcript_url = f"transcripts/{workflow_run_id}.txt"`
			`logger.info(`
			`f"Uploading transcript to {storage_backend.name} - workflow_run_id: {workflow_run_id}"`
			`)`
			`if await _upload_bytes(`
			`workflow_run_id,`
			`transcript_text.encode("utf-8"),`
			`transcript_url,`
			`"transcript",`
			`):`
			`await db_client.update_workflow_run(`
			`run_id=workflow_run_id,`
			`transcript_url=transcript_url,`
			`storage_backend=storage_backend.value,`
			`)`