dograh/api/db/workflow_run_client.py

import uuid
from typing import Any, Dict, List, Optional, Tuple

from sqlalchemy import func
from sqlalchemy.future import select
from sqlalchemy.orm import joinedload, selectinload

from api.db.base_client import BaseDBClient
from api.db.filters import apply_workflow_run_filters, get_workflow_run_order_clause
from api.db.models import (
    OrganizationModel,
    UserModel,
    WorkflowDefinitionModel,
    WorkflowModel,
    WorkflowRunModel,
)
from api.enums import CallType, StorageBackend
from api.schemas.workflow import WorkflowRunResponseSchema
from api.services.workflow.run_usage_response import format_public_cost_info
from api.utils.recording_artifacts import get_recording_storage_key


class WorkflowRunClient(BaseDBClient):
    async def create_workflow_run(
        self,
        name: str,
        workflow_id: int,
        mode: str,
        user_id: int,
        call_type: CallType = CallType.OUTBOUND,
        initial_context: dict = None,
        gathered_context: dict = None,
        logs: dict = None,
        campaign_id: int = None,
        queued_run_id: int = None,
        use_draft: bool = False,
        organization_id: int | None = None,
    ) -> WorkflowRunModel:
        async with self.async_session() as session:
            workflow_query = (
                select(WorkflowModel)
                .options(joinedload(WorkflowModel.user))
                .where(
                    WorkflowModel.id == workflow_id, WorkflowModel.user_id == user_id
                )
            )
            if organization_id is not None:
                workflow_query = workflow_query.where(
                    WorkflowModel.organization_id == organization_id
                )

            workflow = await session.execute(workflow_query)
            workflow = workflow.scalars().first()
            if not workflow:
                raise ValueError(f"Workflow with ID {workflow_id} not found")

            # Resolve which definition to bind to this run
            target_def = None

            if use_draft:
                # For test calls: prefer draft if it exists, fall back to published
                draft_result = await session.execute(
                    select(WorkflowDefinitionModel).where(
                        WorkflowDefinitionModel.workflow_id == workflow.id,
                        WorkflowDefinitionModel.status == "draft",
                    )
                )
                target_def = draft_result.scalars().first()

            if target_def is None:
                # Use the published version via released_definition_id (preferred)
                # or fall back to is_current for backward compatibility
                if workflow.released_definition_id:
                    target_def = await session.get(
                        WorkflowDefinitionModel, workflow.released_definition_id
                    )
                else:
                    pub_result = await session.execute(
                        select(WorkflowDefinitionModel).where(
                            WorkflowDefinitionModel.workflow_id == workflow.id,
                            WorkflowDefinitionModel.is_current == True,
                        )
                    )
                    target_def = pub_result.scalars().first()

            # Get the current storage backend based on ENABLE_AWS_S3 flag
            current_backend = StorageBackend.get_current_backend()

            # Use initial_context from the version if available, else from workflow
            default_context = (
                target_def.template_context_variables
                if target_def and target_def.template_context_variables
                else workflow.template_context_variables
            )

            merged_initial_context = {
                **(default_context or {}),
                **(initial_context or {}),
            }

            new_run = WorkflowRunModel(
                name=name,
                workflow=workflow,
                mode=mode,
                definition_id=target_def.id if target_def else None,
                initial_context=merged_initial_context,
                gathered_context=gathered_context or {},
                logs=logs or {},
                campaign_id=campaign_id,
                queued_run_id=queued_run_id,
                storage_backend=current_backend.value,
                call_type=call_type.value,
            )
            session.add(new_run)
            try:
                await session.commit()
            except Exception as e:
                await session.rollback()
                raise e
            await session.refresh(new_run)
        return new_run

    async def get_all_workflow_runs(self) -> list[WorkflowRunModel]:
        async with self.async_session() as session:
            result = await session.execute(select(WorkflowRunModel))
            return result.scalars().all()

    async def get_workflow_runs_for_superadmin(
        self,
        limit: int = 50,
        offset: int = 0,
        filters: Optional[List[Dict[str, Any]]] = None,
        sort_by: Optional[str] = None,
        sort_order: str = "desc",
    ) -> tuple[list[dict], int]:
        """
        Get paginated workflow runs for superadmin with organization information.
        Returns tuple of (workflow_runs, total_count).

        Args:
            sort_by: Field to sort by ('duration', 'created_at', etc.)
            sort_order: 'asc' or 'desc'
        """
        async with self.async_session() as session:
            # Build base query with joins
            base_query = (
                select(WorkflowRunModel)
                .join(WorkflowModel, WorkflowRunModel.workflow_id == WorkflowModel.id)
                .join(UserModel, WorkflowModel.user_id == UserModel.id)
                .outerjoin(
                    OrganizationModel,
                    UserModel.selected_organization_id == OrganizationModel.id,
                )
            )

            # Apply filters
            base_query = apply_workflow_run_filters(base_query, filters)

            # Count total with filters
            count_query = base_query.with_only_columns(func.count(WorkflowRunModel.id))
            count_result = await session.execute(count_query)
            total_count = count_result.scalar()

            # Get paginated results with filters and sorting
            order_clause = get_workflow_run_order_clause(sort_by, sort_order)
            result = await session.execute(
                base_query.options(
                    joinedload(WorkflowRunModel.workflow).joinedload(
                        WorkflowModel.user
                    ),
                    joinedload(WorkflowRunModel.workflow)
                    .joinedload(WorkflowModel.user)
                    .joinedload(UserModel.selected_organization),
                )
                .order_by(order_clause)
                .limit(limit)
                .offset(offset)
            )
            workflow_runs = result.scalars().all()

            # Format the response
            formatted_runs = []
            for run in workflow_runs:
                organization = (
                    run.workflow.user.selected_organization
                    if run.workflow.user
                    else None
                )
                formatted_runs.append(
                    {
                        "id": run.id,
                        "name": run.name,
                        "workflow_id": run.workflow_id,
                        "workflow_name": run.workflow.name if run.workflow else None,
                        "user_id": run.workflow.user_id if run.workflow else None,
                        "organization_id": organization.id if organization else None,
                        "organization_name": (
                            organization.provider_id if organization else None
                        ),
                        "mode": run.mode,
                        "is_completed": run.is_completed,
                        "recording_url": run.recording_url,
                        "transcript_url": run.transcript_url,
                        "user_recording_url": get_recording_storage_key(
                            run.extra, "user"
                        ),
                        "bot_recording_url": get_recording_storage_key(
                            run.extra, "bot"
                        ),
                        "usage_info": run.usage_info,
                        "cost_info": run.cost_info,
                        "initial_context": run.initial_context,
                        "gathered_context": run.gathered_context,
                        "created_at": run.created_at,
                    }
                )

            return formatted_runs, total_count

    async def get_workflow_run(
        self, run_id: int, user_id: int = None, organization_id: int = None
    ) -> WorkflowRunModel | None:
        async with self.async_session() as session:
            query = (
                select(WorkflowRunModel)
                .options(selectinload(WorkflowRunModel.definition))
                .join(WorkflowRunModel.workflow)
            )

            if organization_id:
                # Filter by organization_id when provided
                query = query.where(
                    WorkflowRunModel.id == run_id,
                    WorkflowModel.organization_id == organization_id,
                )
            elif user_id:
                # Fallback to user_id for backwards compatibility
                query = query.where(
                    WorkflowRunModel.id == run_id,
                    WorkflowModel.user_id == user_id,
                )
            else:
                query = query.where(WorkflowRunModel.id == run_id)

            result = await session.execute(query)
            return result.scalars().first()

    async def get_workflow_run_by_id(self, run_id: int) -> WorkflowRunModel | None:
        """Get workflow run by ID without user filtering - for background tasks"""
        async with self.async_session() as session:
            result = await session.execute(
                select(WorkflowRunModel)
                .options(
                    joinedload(WorkflowRunModel.workflow).joinedload(WorkflowModel.user)
                )
                .where(WorkflowRunModel.id == run_id)
            )
            return result.scalars().first()

    async def get_organization_id_by_workflow_run_id(
        self, run_id: int | None
    ) -> int | None:
        """Resolve organization_id from a workflow run via workflow.user."""
        if not run_id:
            return None
        async with self.async_session() as session:
            result = await session.execute(
                select(WorkflowModel.organization_id)
                .join(
                    WorkflowRunModel, WorkflowRunModel.workflow_id == WorkflowModel.id
                )
                .where(WorkflowRunModel.id == run_id)
            )
            return result.scalar_one_or_none()

    async def get_workflow_runs_by_workflow_id(
        self,
        workflow_id: int,
        user_id: int = None,
        organization_id: int = None,
        limit: int = 50,
        offset: int = 0,
        filters: Optional[List[Dict[str, Any]]] = None,
        sort_by: Optional[str] = None,
        sort_order: Optional[str] = "desc",
    ) -> tuple[list[WorkflowRunResponseSchema], int]:
        async with self.async_session() as session:
            # Build base query
            base_query = (
                select(WorkflowRunModel)
                .join(WorkflowModel, WorkflowRunModel.workflow_id == WorkflowModel.id)
                .where(WorkflowRunModel.workflow_id == workflow_id)
            )

            if organization_id:
                # Filter by organization_id when provided
                base_query = base_query.where(
                    WorkflowModel.organization_id == organization_id
                )
            elif user_id:
                # Fallback to user_id for backwards compatibility
                base_query = base_query.where(WorkflowModel.user_id == user_id)

            # Apply filters
            base_query = apply_workflow_run_filters(base_query, filters)

            # Count total with filters
            count_query = base_query.with_only_columns(func.count(WorkflowRunModel.id))
            count_result = await session.execute(count_query)
            total_count = count_result.scalar()

            # Get paginated results with filters and sorting
            order_clause = get_workflow_run_order_clause(sort_by, sort_order)
            result = await session.execute(
                base_query.order_by(order_clause).limit(limit).offset(offset)
            )
            runs = [
                WorkflowRunResponseSchema.model_validate(
                    {
                        "id": run.id,
                        "workflow_id": run.workflow_id,
                        "name": run.name,
                        "mode": run.mode,
                        "created_at": run.created_at,
                        "is_completed": run.is_completed,
                        "recording_url": run.recording_url,
                        "transcript_url": run.transcript_url,
                        "user_recording_url": get_recording_storage_key(
                            run.extra, "user"
                        ),
                        "bot_recording_url": get_recording_storage_key(
                            run.extra, "bot"
                        ),
                        "cost_info": format_public_cost_info(
                            run.cost_info, run.usage_info
                        ),
                        "definition_id": run.definition_id,
                        "initial_context": run.initial_context,
                        "gathered_context": run.gathered_context,
                        "call_type": run.call_type,
                    }
                )
                for run in result.scalars().all()
            ]
            return runs, total_count

    async def update_workflow_run(
        self,
        run_id: int,
        is_completed: bool = False,
        recording_url: str | None = None,
        transcript_url: str | None = None,
        storage_backend: str | None = None,
        usage_info: dict | None = None,
        cost_info: dict | None = None,
        initial_context: dict | None = None,
        gathered_context: dict | None = None,
        logs: dict | None = None,
        state: str | None = None,
        annotations: dict | None = None,
        extra: dict | None = None,
    ) -> WorkflowRunModel:
        async with self.async_session() as session:
            # Use SELECT FOR UPDATE to lock the row during the update
            result = await session.execute(
                select(WorkflowRunModel)
                .where(WorkflowRunModel.id == run_id)
                .with_for_update()
            )
            run = result.scalars().first()
            if not run:
                raise ValueError(f"Workflow run with ID {run_id} not found")
            if recording_url:
                run.recording_url = recording_url
            if transcript_url:
                run.transcript_url = transcript_url
            if storage_backend:
                run.storage_backend = storage_backend
            if usage_info:
                run.usage_info = usage_info
            if cost_info:
                run.cost_info = cost_info
            if initial_context:
                # Merge initial context patches so independent call-start/runtime
                # writers do not erase keys stored earlier in the run lifecycle.
                run.initial_context = {
                    **(run.initial_context or {}),
                    **initial_context,
                }
            if gathered_context:
                # Lets merge the incoming gathered context keys with the existing ones
                run.gathered_context = {
                    **run.gathered_context,
                    **gathered_context,
                }
            if logs:
                # Lets merge the incoming logs key with existing ones
                run.logs = {**run.logs, **logs}
            if annotations:
                run.annotations = {**run.annotations, **annotations}
            if extra:
                run.extra = {**run.extra, **extra}
            if is_completed:
                run.is_completed = is_completed
            if state:
                run.state = state
            try:
                await session.commit()
            except Exception as e:
                await session.rollback()
                raise e
            await session.refresh(run)
        return run

    async def get_workflow_run_with_context(
        self, workflow_run_id: int
    ) -> Tuple[Optional[WorkflowRunModel], Optional[int]]:
        """
        Get workflow run with all related data and return organization_id.

        Returns:
            Tuple of (workflow_run, organization_id) or (None, None) if not found
        """
        async with self.async_session() as session:
            result = await session.execute(
                select(WorkflowRunModel)
                .options(
                    selectinload(WorkflowRunModel.definition),
                    selectinload(WorkflowRunModel.workflow).options(
                        selectinload(WorkflowModel.user),
                        selectinload(WorkflowModel.current_definition),
                    ),
                )
                .where(WorkflowRunModel.id == workflow_run_id)
            )
            workflow_run = result.scalars().first()

            if not workflow_run:
                return None, None

            if not workflow_run.workflow:
                return workflow_run, None

            organization_id = workflow_run.workflow.organization_id
            return workflow_run, organization_id

    async def ensure_public_access_token(self, workflow_run_id: int) -> Optional[str]:
        """Generate a public access token if not exists, return existing if present (idempotent).

        Args:
            workflow_run_id: The ID of the workflow run

        Returns:
            The public access token string, or None if workflow run not found
        """
        async with self.async_session() as session:
            result = await session.execute(
                select(WorkflowRunModel).where(WorkflowRunModel.id == workflow_run_id)
            )
            run = result.scalars().first()
            if not run:
                return None

            # Return existing token if present
            if run.public_access_token:
                return run.public_access_token

            # Generate and persist new token
            token = str(uuid.uuid4())
            run.public_access_token = token

            try:
                await session.commit()
            except Exception as e:
                await session.rollback()
                raise e
            await session.refresh(run)

            return run.public_access_token

    async def get_workflow_run_by_public_token(
        self, token: str
    ) -> Optional[WorkflowRunModel]:
        """Lookup workflow run by public access token.

        Args:
            token: The public access token

        Returns:
            The WorkflowRunModel if found, None otherwise
        """
        async with self.async_session() as session:
            result = await session.execute(
                select(WorkflowRunModel).where(
                    WorkflowRunModel.public_access_token == token
                )
            )
            return result.scalars().first()

    async def get_workflow_run_by_call_id(
        self, call_id: str
    ) -> Optional[WorkflowRunModel]:
        """Find workflow run by call_id stored in gathered_context.

        Args:
            call_id: The telephony call ID to search for

        Returns:
            The WorkflowRunModel if found, None otherwise
        """
        async with self.async_session() as session:
            # Use JSON text extraction to find matching call_id
            # This leverages the idx_workflow_runs_call_id index
            result = await session.execute(
                select(WorkflowRunModel)
                .options(
                    joinedload(WorkflowRunModel.workflow).joinedload(WorkflowModel.user)
                )
                .where(
                    WorkflowRunModel.gathered_context.op("->>")("call_id") == call_id
                )
                .order_by(WorkflowRunModel.created_at.desc())
                .limit(1)
            )
            return result.scalars().first()