2026-01-23 18:53:59 +05:30
|
|
|
import uuid
|
2025-09-09 14:37:32 +05:30
|
|
|
from typing import Any, Dict, List, Optional, Tuple
|
|
|
|
|
|
|
|
|
|
from sqlalchemy import func
|
|
|
|
|
from sqlalchemy.future import select
|
|
|
|
|
from sqlalchemy.orm import joinedload, selectinload
|
|
|
|
|
|
|
|
|
|
from api.db.base_client import BaseDBClient
|
2026-01-30 17:08:15 +05:30
|
|
|
from api.db.filters import apply_workflow_run_filters, get_workflow_run_order_clause
|
2025-09-09 14:37:32 +05:30
|
|
|
from api.db.models import (
|
|
|
|
|
OrganizationModel,
|
|
|
|
|
UserModel,
|
|
|
|
|
WorkflowDefinitionModel,
|
|
|
|
|
WorkflowModel,
|
|
|
|
|
WorkflowRunModel,
|
|
|
|
|
)
|
2026-01-12 10:10:30 +05:30
|
|
|
from api.enums import CallType, StorageBackend
|
2025-09-09 14:37:32 +05:30
|
|
|
from api.schemas.workflow import WorkflowRunResponseSchema
|
2026-06-12 14:55:30 +05:30
|
|
|
from api.services.workflow.run_usage_response import format_public_cost_info
|
2026-06-16 15:19:49 +05:30
|
|
|
from api.utils.recording_artifacts import get_recording_storage_key
|
2025-09-09 14:37:32 +05:30
|
|
|
|
|
|
|
|
|
|
|
|
|
class WorkflowRunClient(BaseDBClient):
|
|
|
|
|
async def create_workflow_run(
|
|
|
|
|
self,
|
|
|
|
|
name: str,
|
|
|
|
|
workflow_id: int,
|
|
|
|
|
mode: str,
|
|
|
|
|
user_id: int,
|
2026-01-12 10:10:30 +05:30
|
|
|
call_type: CallType = CallType.OUTBOUND,
|
2025-09-09 14:37:32 +05:30
|
|
|
initial_context: dict = None,
|
2026-02-18 21:13:28 +05:30
|
|
|
gathered_context: dict = None,
|
2026-05-02 15:53:58 +05:30
|
|
|
logs: dict = None,
|
2025-09-09 14:37:32 +05:30
|
|
|
campaign_id: int = None,
|
|
|
|
|
queued_run_id: int = None,
|
2026-04-08 19:20:31 +05:30
|
|
|
use_draft: bool = False,
|
2026-05-21 15:20:02 +05:30
|
|
|
organization_id: int | None = None,
|
2025-09-09 14:37:32 +05:30
|
|
|
) -> WorkflowRunModel:
|
|
|
|
|
async with self.async_session() as session:
|
2026-05-21 15:20:02 +05:30
|
|
|
workflow_query = (
|
2025-09-09 14:37:32 +05:30
|
|
|
select(WorkflowModel)
|
|
|
|
|
.options(joinedload(WorkflowModel.user))
|
|
|
|
|
.where(
|
|
|
|
|
WorkflowModel.id == workflow_id, WorkflowModel.user_id == user_id
|
|
|
|
|
)
|
|
|
|
|
)
|
2026-05-21 15:20:02 +05:30
|
|
|
if organization_id is not None:
|
|
|
|
|
workflow_query = workflow_query.where(
|
|
|
|
|
WorkflowModel.organization_id == organization_id
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
workflow = await session.execute(workflow_query)
|
2025-09-09 14:37:32 +05:30
|
|
|
workflow = workflow.scalars().first()
|
|
|
|
|
if not workflow:
|
|
|
|
|
raise ValueError(f"Workflow with ID {workflow_id} not found")
|
|
|
|
|
|
2026-04-08 19:20:31 +05:30
|
|
|
# Resolve which definition to bind to this run
|
|
|
|
|
target_def = None
|
2025-09-09 14:37:32 +05:30
|
|
|
|
2026-04-08 19:20:31 +05:30
|
|
|
if use_draft:
|
|
|
|
|
# For test calls: prefer draft if it exists, fall back to published
|
|
|
|
|
draft_result = await session.execute(
|
|
|
|
|
select(WorkflowDefinitionModel).where(
|
|
|
|
|
WorkflowDefinitionModel.workflow_id == workflow.id,
|
|
|
|
|
WorkflowDefinitionModel.status == "draft",
|
|
|
|
|
)
|
2025-09-09 14:37:32 +05:30
|
|
|
)
|
2026-04-08 19:20:31 +05:30
|
|
|
target_def = draft_result.scalars().first()
|
|
|
|
|
|
|
|
|
|
if target_def is None:
|
|
|
|
|
# Use the published version via released_definition_id (preferred)
|
|
|
|
|
# or fall back to is_current for backward compatibility
|
|
|
|
|
if workflow.released_definition_id:
|
|
|
|
|
target_def = await session.get(
|
|
|
|
|
WorkflowDefinitionModel, workflow.released_definition_id
|
|
|
|
|
)
|
|
|
|
|
else:
|
|
|
|
|
pub_result = await session.execute(
|
|
|
|
|
select(WorkflowDefinitionModel).where(
|
|
|
|
|
WorkflowDefinitionModel.workflow_id == workflow.id,
|
|
|
|
|
WorkflowDefinitionModel.is_current == True,
|
|
|
|
|
)
|
|
|
|
|
)
|
|
|
|
|
target_def = pub_result.scalars().first()
|
2025-09-09 14:37:32 +05:30
|
|
|
|
2025-09-09 16:37:05 +05:30
|
|
|
# Get the current storage backend based on ENABLE_AWS_S3 flag
|
|
|
|
|
current_backend = StorageBackend.get_current_backend()
|
2025-09-20 14:07:00 +05:30
|
|
|
|
2026-04-08 19:20:31 +05:30
|
|
|
# Use initial_context from the version if available, else from workflow
|
|
|
|
|
default_context = (
|
|
|
|
|
target_def.template_context_variables
|
|
|
|
|
if target_def and target_def.template_context_variables
|
|
|
|
|
else workflow.template_context_variables
|
|
|
|
|
)
|
|
|
|
|
|
2026-06-26 07:07:40 -07:00
|
|
|
merged_initial_context = {
|
|
|
|
|
**(default_context or {}),
|
|
|
|
|
**(initial_context or {}),
|
|
|
|
|
}
|
|
|
|
|
|
2025-09-09 14:37:32 +05:30
|
|
|
new_run = WorkflowRunModel(
|
|
|
|
|
name=name,
|
|
|
|
|
workflow=workflow,
|
|
|
|
|
mode=mode,
|
2026-04-08 19:20:31 +05:30
|
|
|
definition_id=target_def.id if target_def else None,
|
2026-06-26 07:07:40 -07:00
|
|
|
initial_context=merged_initial_context,
|
2026-02-18 21:13:28 +05:30
|
|
|
gathered_context=gathered_context or {},
|
2026-05-02 15:53:58 +05:30
|
|
|
logs=logs or {},
|
2025-09-09 14:37:32 +05:30
|
|
|
campaign_id=campaign_id,
|
|
|
|
|
queued_run_id=queued_run_id,
|
2025-09-09 16:37:05 +05:30
|
|
|
storage_backend=current_backend.value,
|
2026-01-12 10:10:30 +05:30
|
|
|
call_type=call_type.value,
|
2025-09-09 14:37:32 +05:30
|
|
|
)
|
|
|
|
|
session.add(new_run)
|
|
|
|
|
try:
|
|
|
|
|
await session.commit()
|
|
|
|
|
except Exception as e:
|
|
|
|
|
await session.rollback()
|
|
|
|
|
raise e
|
|
|
|
|
await session.refresh(new_run)
|
|
|
|
|
return new_run
|
|
|
|
|
|
|
|
|
|
async def get_all_workflow_runs(self) -> list[WorkflowRunModel]:
|
|
|
|
|
async with self.async_session() as session:
|
|
|
|
|
result = await session.execute(select(WorkflowRunModel))
|
|
|
|
|
return result.scalars().all()
|
|
|
|
|
|
|
|
|
|
async def get_workflow_runs_for_superadmin(
|
|
|
|
|
self,
|
|
|
|
|
limit: int = 50,
|
|
|
|
|
offset: int = 0,
|
|
|
|
|
filters: Optional[List[Dict[str, Any]]] = None,
|
2026-01-30 17:08:15 +05:30
|
|
|
sort_by: Optional[str] = None,
|
|
|
|
|
sort_order: str = "desc",
|
2025-09-09 14:37:32 +05:30
|
|
|
) -> tuple[list[dict], int]:
|
|
|
|
|
"""
|
|
|
|
|
Get paginated workflow runs for superadmin with organization information.
|
|
|
|
|
Returns tuple of (workflow_runs, total_count).
|
2026-01-30 17:08:15 +05:30
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
sort_by: Field to sort by ('duration', 'created_at', etc.)
|
|
|
|
|
sort_order: 'asc' or 'desc'
|
2025-09-09 14:37:32 +05:30
|
|
|
"""
|
|
|
|
|
async with self.async_session() as session:
|
|
|
|
|
# Build base query with joins
|
|
|
|
|
base_query = (
|
|
|
|
|
select(WorkflowRunModel)
|
|
|
|
|
.join(WorkflowModel, WorkflowRunModel.workflow_id == WorkflowModel.id)
|
|
|
|
|
.join(UserModel, WorkflowModel.user_id == UserModel.id)
|
|
|
|
|
.outerjoin(
|
|
|
|
|
OrganizationModel,
|
|
|
|
|
UserModel.selected_organization_id == OrganizationModel.id,
|
|
|
|
|
)
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
# Apply filters
|
|
|
|
|
base_query = apply_workflow_run_filters(base_query, filters)
|
|
|
|
|
|
|
|
|
|
# Count total with filters
|
|
|
|
|
count_query = base_query.with_only_columns(func.count(WorkflowRunModel.id))
|
|
|
|
|
count_result = await session.execute(count_query)
|
|
|
|
|
total_count = count_result.scalar()
|
|
|
|
|
|
2026-01-30 17:08:15 +05:30
|
|
|
# Get paginated results with filters and sorting
|
|
|
|
|
order_clause = get_workflow_run_order_clause(sort_by, sort_order)
|
2025-09-09 14:37:32 +05:30
|
|
|
result = await session.execute(
|
|
|
|
|
base_query.options(
|
|
|
|
|
joinedload(WorkflowRunModel.workflow).joinedload(
|
|
|
|
|
WorkflowModel.user
|
|
|
|
|
),
|
|
|
|
|
joinedload(WorkflowRunModel.workflow)
|
|
|
|
|
.joinedload(WorkflowModel.user)
|
|
|
|
|
.joinedload(UserModel.selected_organization),
|
|
|
|
|
)
|
2026-01-30 17:08:15 +05:30
|
|
|
.order_by(order_clause)
|
2025-09-09 14:37:32 +05:30
|
|
|
.limit(limit)
|
|
|
|
|
.offset(offset)
|
|
|
|
|
)
|
|
|
|
|
workflow_runs = result.scalars().all()
|
|
|
|
|
|
|
|
|
|
# Format the response
|
|
|
|
|
formatted_runs = []
|
|
|
|
|
for run in workflow_runs:
|
|
|
|
|
organization = (
|
|
|
|
|
run.workflow.user.selected_organization
|
|
|
|
|
if run.workflow.user
|
|
|
|
|
else None
|
|
|
|
|
)
|
|
|
|
|
formatted_runs.append(
|
|
|
|
|
{
|
|
|
|
|
"id": run.id,
|
|
|
|
|
"name": run.name,
|
|
|
|
|
"workflow_id": run.workflow_id,
|
|
|
|
|
"workflow_name": run.workflow.name if run.workflow else None,
|
|
|
|
|
"user_id": run.workflow.user_id if run.workflow else None,
|
|
|
|
|
"organization_id": organization.id if organization else None,
|
2026-06-16 15:19:49 +05:30
|
|
|
"organization_name": (
|
|
|
|
|
organization.provider_id if organization else None
|
|
|
|
|
),
|
2025-09-09 14:37:32 +05:30
|
|
|
"mode": run.mode,
|
|
|
|
|
"is_completed": run.is_completed,
|
|
|
|
|
"recording_url": run.recording_url,
|
|
|
|
|
"transcript_url": run.transcript_url,
|
2026-06-16 15:19:49 +05:30
|
|
|
"user_recording_url": get_recording_storage_key(
|
|
|
|
|
run.extra, "user"
|
|
|
|
|
),
|
|
|
|
|
"bot_recording_url": get_recording_storage_key(
|
|
|
|
|
run.extra, "bot"
|
|
|
|
|
),
|
2025-09-09 14:37:32 +05:30
|
|
|
"usage_info": run.usage_info,
|
|
|
|
|
"cost_info": run.cost_info,
|
|
|
|
|
"initial_context": run.initial_context,
|
|
|
|
|
"gathered_context": run.gathered_context,
|
|
|
|
|
"created_at": run.created_at,
|
|
|
|
|
}
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
return formatted_runs, total_count
|
|
|
|
|
|
|
|
|
|
async def get_workflow_run(
|
|
|
|
|
self, run_id: int, user_id: int = None, organization_id: int = None
|
|
|
|
|
) -> WorkflowRunModel | None:
|
|
|
|
|
async with self.async_session() as session:
|
2026-04-08 19:20:31 +05:30
|
|
|
query = (
|
|
|
|
|
select(WorkflowRunModel)
|
|
|
|
|
.options(selectinload(WorkflowRunModel.definition))
|
|
|
|
|
.join(WorkflowRunModel.workflow)
|
|
|
|
|
)
|
2025-09-09 14:37:32 +05:30
|
|
|
|
|
|
|
|
if organization_id:
|
|
|
|
|
# Filter by organization_id when provided
|
|
|
|
|
query = query.where(
|
|
|
|
|
WorkflowRunModel.id == run_id,
|
|
|
|
|
WorkflowModel.organization_id == organization_id,
|
|
|
|
|
)
|
|
|
|
|
elif user_id:
|
|
|
|
|
# Fallback to user_id for backwards compatibility
|
|
|
|
|
query = query.where(
|
|
|
|
|
WorkflowRunModel.id == run_id,
|
|
|
|
|
WorkflowModel.user_id == user_id,
|
|
|
|
|
)
|
|
|
|
|
else:
|
|
|
|
|
query = query.where(WorkflowRunModel.id == run_id)
|
|
|
|
|
|
|
|
|
|
result = await session.execute(query)
|
|
|
|
|
return result.scalars().first()
|
|
|
|
|
|
|
|
|
|
async def get_workflow_run_by_id(self, run_id: int) -> WorkflowRunModel | None:
|
|
|
|
|
"""Get workflow run by ID without user filtering - for background tasks"""
|
|
|
|
|
async with self.async_session() as session:
|
|
|
|
|
result = await session.execute(
|
|
|
|
|
select(WorkflowRunModel)
|
|
|
|
|
.options(
|
|
|
|
|
joinedload(WorkflowRunModel.workflow).joinedload(WorkflowModel.user)
|
|
|
|
|
)
|
|
|
|
|
.where(WorkflowRunModel.id == run_id)
|
|
|
|
|
)
|
|
|
|
|
return result.scalars().first()
|
|
|
|
|
|
2026-05-04 21:35:37 +05:30
|
|
|
async def get_organization_id_by_workflow_run_id(
|
|
|
|
|
self, run_id: int | None
|
|
|
|
|
) -> int | None:
|
|
|
|
|
"""Resolve organization_id from a workflow run via workflow.user."""
|
|
|
|
|
if not run_id:
|
|
|
|
|
return None
|
|
|
|
|
async with self.async_session() as session:
|
|
|
|
|
result = await session.execute(
|
|
|
|
|
select(WorkflowModel.organization_id)
|
|
|
|
|
.join(
|
|
|
|
|
WorkflowRunModel, WorkflowRunModel.workflow_id == WorkflowModel.id
|
|
|
|
|
)
|
|
|
|
|
.where(WorkflowRunModel.id == run_id)
|
|
|
|
|
)
|
|
|
|
|
return result.scalar_one_or_none()
|
|
|
|
|
|
2025-09-09 14:37:32 +05:30
|
|
|
async def get_workflow_runs_by_workflow_id(
|
|
|
|
|
self,
|
|
|
|
|
workflow_id: int,
|
|
|
|
|
user_id: int = None,
|
|
|
|
|
organization_id: int = None,
|
|
|
|
|
limit: int = 50,
|
|
|
|
|
offset: int = 0,
|
|
|
|
|
filters: Optional[List[Dict[str, Any]]] = None,
|
2026-01-30 17:08:15 +05:30
|
|
|
sort_by: Optional[str] = None,
|
|
|
|
|
sort_order: Optional[str] = "desc",
|
2025-09-09 14:37:32 +05:30
|
|
|
) -> tuple[list[WorkflowRunResponseSchema], int]:
|
|
|
|
|
async with self.async_session() as session:
|
|
|
|
|
# Build base query
|
|
|
|
|
base_query = (
|
|
|
|
|
select(WorkflowRunModel)
|
|
|
|
|
.join(WorkflowModel, WorkflowRunModel.workflow_id == WorkflowModel.id)
|
|
|
|
|
.where(WorkflowRunModel.workflow_id == workflow_id)
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
if organization_id:
|
|
|
|
|
# Filter by organization_id when provided
|
|
|
|
|
base_query = base_query.where(
|
|
|
|
|
WorkflowModel.organization_id == organization_id
|
|
|
|
|
)
|
|
|
|
|
elif user_id:
|
|
|
|
|
# Fallback to user_id for backwards compatibility
|
|
|
|
|
base_query = base_query.where(WorkflowModel.user_id == user_id)
|
|
|
|
|
|
|
|
|
|
# Apply filters
|
|
|
|
|
base_query = apply_workflow_run_filters(base_query, filters)
|
|
|
|
|
|
|
|
|
|
# Count total with filters
|
|
|
|
|
count_query = base_query.with_only_columns(func.count(WorkflowRunModel.id))
|
|
|
|
|
count_result = await session.execute(count_query)
|
|
|
|
|
total_count = count_result.scalar()
|
|
|
|
|
|
2026-01-30 17:08:15 +05:30
|
|
|
# Get paginated results with filters and sorting
|
|
|
|
|
order_clause = get_workflow_run_order_clause(sort_by, sort_order)
|
2025-09-09 14:37:32 +05:30
|
|
|
result = await session.execute(
|
2026-01-30 17:08:15 +05:30
|
|
|
base_query.order_by(order_clause).limit(limit).offset(offset)
|
2025-09-09 14:37:32 +05:30
|
|
|
)
|
|
|
|
|
runs = [
|
|
|
|
|
WorkflowRunResponseSchema.model_validate(
|
|
|
|
|
{
|
|
|
|
|
"id": run.id,
|
|
|
|
|
"workflow_id": run.workflow_id,
|
|
|
|
|
"name": run.name,
|
|
|
|
|
"mode": run.mode,
|
|
|
|
|
"created_at": run.created_at,
|
|
|
|
|
"is_completed": run.is_completed,
|
|
|
|
|
"recording_url": run.recording_url,
|
|
|
|
|
"transcript_url": run.transcript_url,
|
2026-06-16 15:19:49 +05:30
|
|
|
"user_recording_url": get_recording_storage_key(
|
|
|
|
|
run.extra, "user"
|
|
|
|
|
),
|
|
|
|
|
"bot_recording_url": get_recording_storage_key(
|
|
|
|
|
run.extra, "bot"
|
|
|
|
|
),
|
2026-06-12 14:55:30 +05:30
|
|
|
"cost_info": format_public_cost_info(
|
|
|
|
|
run.cost_info, run.usage_info
|
|
|
|
|
),
|
2025-09-09 14:37:32 +05:30
|
|
|
"definition_id": run.definition_id,
|
|
|
|
|
"initial_context": run.initial_context,
|
|
|
|
|
"gathered_context": run.gathered_context,
|
2026-01-12 10:10:30 +05:30
|
|
|
"call_type": run.call_type,
|
2025-09-09 14:37:32 +05:30
|
|
|
}
|
|
|
|
|
)
|
|
|
|
|
for run in result.scalars().all()
|
|
|
|
|
]
|
|
|
|
|
return runs, total_count
|
|
|
|
|
|
|
|
|
|
async def update_workflow_run(
|
|
|
|
|
self,
|
|
|
|
|
run_id: int,
|
|
|
|
|
is_completed: bool = False,
|
|
|
|
|
recording_url: str | None = None,
|
|
|
|
|
transcript_url: str | None = None,
|
|
|
|
|
storage_backend: str | None = None,
|
|
|
|
|
usage_info: dict | None = None,
|
|
|
|
|
cost_info: dict | None = None,
|
|
|
|
|
initial_context: dict | None = None,
|
|
|
|
|
gathered_context: dict | None = None,
|
|
|
|
|
logs: dict | None = None,
|
2025-12-11 15:42:28 +05:30
|
|
|
state: str | None = None,
|
2026-02-25 13:53:30 +05:30
|
|
|
annotations: dict | None = None,
|
2026-06-16 15:19:49 +05:30
|
|
|
extra: dict | None = None,
|
2025-09-09 14:37:32 +05:30
|
|
|
) -> WorkflowRunModel:
|
|
|
|
|
async with self.async_session() as session:
|
2026-02-17 19:32:03 +05:30
|
|
|
# Use SELECT FOR UPDATE to lock the row during the update
|
2025-09-09 14:37:32 +05:30
|
|
|
result = await session.execute(
|
2026-02-17 19:32:03 +05:30
|
|
|
select(WorkflowRunModel)
|
|
|
|
|
.where(WorkflowRunModel.id == run_id)
|
|
|
|
|
.with_for_update()
|
2025-09-09 14:37:32 +05:30
|
|
|
)
|
|
|
|
|
run = result.scalars().first()
|
|
|
|
|
if not run:
|
|
|
|
|
raise ValueError(f"Workflow run with ID {run_id} not found")
|
|
|
|
|
if recording_url:
|
|
|
|
|
run.recording_url = recording_url
|
|
|
|
|
if transcript_url:
|
|
|
|
|
run.transcript_url = transcript_url
|
|
|
|
|
if storage_backend:
|
|
|
|
|
run.storage_backend = storage_backend
|
|
|
|
|
if usage_info:
|
|
|
|
|
run.usage_info = usage_info
|
|
|
|
|
if cost_info:
|
|
|
|
|
run.cost_info = cost_info
|
|
|
|
|
if initial_context:
|
2026-06-18 09:25:02 +05:30
|
|
|
# Merge initial context patches so independent call-start/runtime
|
|
|
|
|
# writers do not erase keys stored earlier in the run lifecycle.
|
|
|
|
|
run.initial_context = {
|
|
|
|
|
**(run.initial_context or {}),
|
|
|
|
|
**initial_context,
|
|
|
|
|
}
|
2025-09-09 14:37:32 +05:30
|
|
|
if gathered_context:
|
|
|
|
|
# Lets merge the incoming gathered context keys with the existing ones
|
|
|
|
|
run.gathered_context = {
|
|
|
|
|
**run.gathered_context,
|
|
|
|
|
**gathered_context,
|
|
|
|
|
}
|
|
|
|
|
if logs:
|
|
|
|
|
# Lets merge the incoming logs key with existing ones
|
|
|
|
|
run.logs = {**run.logs, **logs}
|
2026-02-25 13:53:30 +05:30
|
|
|
if annotations:
|
|
|
|
|
run.annotations = {**run.annotations, **annotations}
|
2026-06-16 15:19:49 +05:30
|
|
|
if extra:
|
|
|
|
|
run.extra = {**run.extra, **extra}
|
2025-09-09 14:37:32 +05:30
|
|
|
if is_completed:
|
|
|
|
|
run.is_completed = is_completed
|
2025-12-11 15:42:28 +05:30
|
|
|
if state:
|
|
|
|
|
run.state = state
|
2025-09-09 14:37:32 +05:30
|
|
|
try:
|
|
|
|
|
await session.commit()
|
|
|
|
|
except Exception as e:
|
|
|
|
|
await session.rollback()
|
|
|
|
|
raise e
|
|
|
|
|
await session.refresh(run)
|
|
|
|
|
return run
|
|
|
|
|
|
|
|
|
|
async def get_workflow_run_with_context(
|
|
|
|
|
self, workflow_run_id: int
|
|
|
|
|
) -> Tuple[Optional[WorkflowRunModel], Optional[int]]:
|
|
|
|
|
"""
|
|
|
|
|
Get workflow run with all related data and return organization_id.
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
Tuple of (workflow_run, organization_id) or (None, None) if not found
|
|
|
|
|
"""
|
|
|
|
|
async with self.async_session() as session:
|
|
|
|
|
result = await session.execute(
|
|
|
|
|
select(WorkflowRunModel)
|
|
|
|
|
.options(
|
2026-02-25 17:17:48 +05:30
|
|
|
selectinload(WorkflowRunModel.definition),
|
2025-12-22 14:08:30 +05:30
|
|
|
selectinload(WorkflowRunModel.workflow).options(
|
|
|
|
|
selectinload(WorkflowModel.user),
|
|
|
|
|
selectinload(WorkflowModel.current_definition),
|
2026-02-25 17:17:48 +05:30
|
|
|
),
|
2025-09-09 14:37:32 +05:30
|
|
|
)
|
|
|
|
|
.where(WorkflowRunModel.id == workflow_run_id)
|
|
|
|
|
)
|
|
|
|
|
workflow_run = result.scalars().first()
|
|
|
|
|
|
|
|
|
|
if not workflow_run:
|
|
|
|
|
return None, None
|
|
|
|
|
|
feat(webhooks): durable retrying delivery for final webhooks (#478)
* feat(webhooks): durable retrying delivery for final webhooks
Final webhook nodes were fired inline with a single best-effort httpx POST
(run_integrations._execute_webhook_node). On a transient error the failure was
swallowed at three levels, so ARQ never retried and the final call report was
permanently lost -- leaving downstream receivers stuck (e.g. a CRM showing a
call as still "in conversation").
Replace the one-shot POST with a durable, idempotent delivery pipeline modelled
on the campaign retry pattern (persisted row + scheduled_for + bounded attempts):
- New webhook_deliveries table (WebhookDeliveryModel) is the source of truth.
Payload is rendered once and frozen so retries are deterministic; secrets are
not stored -- the credential is referenced by uuid and re-resolved at send time.
- run_integrations now persists a delivery row and enqueues deliver_webhook with
a deterministic ARQ job id instead of sending inline.
- deliver_webhook (new ARQ task) sends the request and:
* 2xx -> succeeded
* transient -> retry with capped exponential backoff (RequestError /
5xx / 408 / 425 / 429), up to max_attempts then dead_letter
* permanent 4xx -> dead_letter immediately (no pointless looping)
It is idempotent: a non-pending delivery is a no-op, so a duplicate enqueue or
sweeper re-injection can't double-send.
- sweep_webhook_deliveries cron (every 5 min) re-enqueues overdue pending
deliveries so nothing is lost to a worker restart / Redis flush.
- Stable X-Dograh-Delivery-Id / Workflow-Run-Id / Attempt headers let receivers
dedupe retried deliveries.
- enqueue_job now forwards ARQ job options (_job_id, _defer_by); failures log
repr(e) so empty-message errors like ConnectTimeout are diagnosable.
Config via DEFAULT_WEBHOOK_DELIVERY_CONFIG (env-overridable): max_attempts=5,
base_delay=30s, max_delay=600s, timeout=30s.
Tests cover payload rendering, persist+enqueue, success, transient retry,
retryable 5xx, permanent 4xx dead-letter, attempt exhaustion, and idempotency.
Migration verified to apply/rollback against Postgres; table/enum/indexes confirmed.
* fix(webhooks): atomic claim, safe success-recording, sweep paging, migration cleanup
Address review feedback on the webhook delivery pipeline:
- deliver_webhook now atomically claims a delivery (conditional UPDATE that
leases scheduled_for) before sending, so concurrent ARQ executions can't
double-send (the prior status=='pending' read was non-atomic).
- Recording success is moved out of the dead-letter try-block: if the receiver
accepted the payload (2xx) but the success DB-write fails, the row is left
pending for the sweeper to reconcile instead of being dead-lettered.
- The sweep keyset-paginates by id so a backlog over the page size is fully
drained, and logs the true re-enqueued total.
- Migration downgrade drops the enum via op.execute(DROP TYPE IF EXISTS ...)
instead of the deprecated op.get_bind().
* fix(webhooks): idempotent delivery creation and drop secret custom headers
Address the remaining review feedback:
- Add a (workflow_run_id, webhook_node_id) unique constraint and make
create_webhook_delivery a get-or-create returning (delivery, created). A
retried run_integrations now reuses the existing row instead of creating and
sending a duplicate final webhook; only a freshly-created row is enqueued.
- Stop persisting secret-looking custom headers (Authorization, X-API-Key,
Cookie, ...) in plaintext on the delivery row: they are dropped with a warning
pointing at the credential store (which is re-resolved securely at send time).
Non-secret custom headers are unaffected.
* fix(webhooks): harden idempotency key, secret-header match, sweep reclaim id
Address follow-up review feedback:
- webhook_node_id is now NOT NULL so a NULL can't slip past the
(workflow_run_id, webhook_node_id) unique constraint and create duplicates.
- Secret-header filtering matches normalized markers (auth/token/secret/cookie/
api-key/...) instead of an exact name list, catching variants like
X-Custom-Auth-Token while leaving benign headers (e.g. X-Idempotency-Key).
- The sweeper re-enqueues with a reclaim-specific job id (the lease timestamp)
so reconciling a delivered-but-unrecorded row isn't deduped against the
original attempt's already-completed ARQ job. The atomic claim still ensures
at most one send.
* fix(webhooks): scope delivery rows to workflow org
---------
Co-authored-by: Abhishek Kumar <abhishek@a6k.me>
2026-07-02 17:14:14 +01:00
|
|
|
if not workflow_run.workflow:
|
2025-09-09 14:37:32 +05:30
|
|
|
return workflow_run, None
|
|
|
|
|
|
feat(webhooks): durable retrying delivery for final webhooks (#478)
* feat(webhooks): durable retrying delivery for final webhooks
Final webhook nodes were fired inline with a single best-effort httpx POST
(run_integrations._execute_webhook_node). On a transient error the failure was
swallowed at three levels, so ARQ never retried and the final call report was
permanently lost -- leaving downstream receivers stuck (e.g. a CRM showing a
call as still "in conversation").
Replace the one-shot POST with a durable, idempotent delivery pipeline modelled
on the campaign retry pattern (persisted row + scheduled_for + bounded attempts):
- New webhook_deliveries table (WebhookDeliveryModel) is the source of truth.
Payload is rendered once and frozen so retries are deterministic; secrets are
not stored -- the credential is referenced by uuid and re-resolved at send time.
- run_integrations now persists a delivery row and enqueues deliver_webhook with
a deterministic ARQ job id instead of sending inline.
- deliver_webhook (new ARQ task) sends the request and:
* 2xx -> succeeded
* transient -> retry with capped exponential backoff (RequestError /
5xx / 408 / 425 / 429), up to max_attempts then dead_letter
* permanent 4xx -> dead_letter immediately (no pointless looping)
It is idempotent: a non-pending delivery is a no-op, so a duplicate enqueue or
sweeper re-injection can't double-send.
- sweep_webhook_deliveries cron (every 5 min) re-enqueues overdue pending
deliveries so nothing is lost to a worker restart / Redis flush.
- Stable X-Dograh-Delivery-Id / Workflow-Run-Id / Attempt headers let receivers
dedupe retried deliveries.
- enqueue_job now forwards ARQ job options (_job_id, _defer_by); failures log
repr(e) so empty-message errors like ConnectTimeout are diagnosable.
Config via DEFAULT_WEBHOOK_DELIVERY_CONFIG (env-overridable): max_attempts=5,
base_delay=30s, max_delay=600s, timeout=30s.
Tests cover payload rendering, persist+enqueue, success, transient retry,
retryable 5xx, permanent 4xx dead-letter, attempt exhaustion, and idempotency.
Migration verified to apply/rollback against Postgres; table/enum/indexes confirmed.
* fix(webhooks): atomic claim, safe success-recording, sweep paging, migration cleanup
Address review feedback on the webhook delivery pipeline:
- deliver_webhook now atomically claims a delivery (conditional UPDATE that
leases scheduled_for) before sending, so concurrent ARQ executions can't
double-send (the prior status=='pending' read was non-atomic).
- Recording success is moved out of the dead-letter try-block: if the receiver
accepted the payload (2xx) but the success DB-write fails, the row is left
pending for the sweeper to reconcile instead of being dead-lettered.
- The sweep keyset-paginates by id so a backlog over the page size is fully
drained, and logs the true re-enqueued total.
- Migration downgrade drops the enum via op.execute(DROP TYPE IF EXISTS ...)
instead of the deprecated op.get_bind().
* fix(webhooks): idempotent delivery creation and drop secret custom headers
Address the remaining review feedback:
- Add a (workflow_run_id, webhook_node_id) unique constraint and make
create_webhook_delivery a get-or-create returning (delivery, created). A
retried run_integrations now reuses the existing row instead of creating and
sending a duplicate final webhook; only a freshly-created row is enqueued.
- Stop persisting secret-looking custom headers (Authorization, X-API-Key,
Cookie, ...) in plaintext on the delivery row: they are dropped with a warning
pointing at the credential store (which is re-resolved securely at send time).
Non-secret custom headers are unaffected.
* fix(webhooks): harden idempotency key, secret-header match, sweep reclaim id
Address follow-up review feedback:
- webhook_node_id is now NOT NULL so a NULL can't slip past the
(workflow_run_id, webhook_node_id) unique constraint and create duplicates.
- Secret-header filtering matches normalized markers (auth/token/secret/cookie/
api-key/...) instead of an exact name list, catching variants like
X-Custom-Auth-Token while leaving benign headers (e.g. X-Idempotency-Key).
- The sweeper re-enqueues with a reclaim-specific job id (the lease timestamp)
so reconciling a delivered-but-unrecorded row isn't deduped against the
original attempt's already-completed ARQ job. The atomic claim still ensures
at most one send.
* fix(webhooks): scope delivery rows to workflow org
---------
Co-authored-by: Abhishek Kumar <abhishek@a6k.me>
2026-07-02 17:14:14 +01:00
|
|
|
organization_id = workflow_run.workflow.organization_id
|
2025-09-09 14:37:32 +05:30
|
|
|
return workflow_run, organization_id
|
2026-01-23 18:53:59 +05:30
|
|
|
|
|
|
|
|
async def ensure_public_access_token(self, workflow_run_id: int) -> Optional[str]:
|
|
|
|
|
"""Generate a public access token if not exists, return existing if present (idempotent).
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
workflow_run_id: The ID of the workflow run
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
The public access token string, or None if workflow run not found
|
|
|
|
|
"""
|
|
|
|
|
async with self.async_session() as session:
|
|
|
|
|
result = await session.execute(
|
|
|
|
|
select(WorkflowRunModel).where(WorkflowRunModel.id == workflow_run_id)
|
|
|
|
|
)
|
|
|
|
|
run = result.scalars().first()
|
|
|
|
|
if not run:
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
# Return existing token if present
|
|
|
|
|
if run.public_access_token:
|
|
|
|
|
return run.public_access_token
|
|
|
|
|
|
|
|
|
|
# Generate and persist new token
|
|
|
|
|
token = str(uuid.uuid4())
|
|
|
|
|
run.public_access_token = token
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
await session.commit()
|
|
|
|
|
except Exception as e:
|
|
|
|
|
await session.rollback()
|
|
|
|
|
raise e
|
|
|
|
|
await session.refresh(run)
|
|
|
|
|
|
|
|
|
|
return run.public_access_token
|
|
|
|
|
|
|
|
|
|
async def get_workflow_run_by_public_token(
|
|
|
|
|
self, token: str
|
|
|
|
|
) -> Optional[WorkflowRunModel]:
|
|
|
|
|
"""Lookup workflow run by public access token.
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
token: The public access token
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
The WorkflowRunModel if found, None otherwise
|
|
|
|
|
"""
|
|
|
|
|
async with self.async_session() as session:
|
|
|
|
|
result = await session.execute(
|
|
|
|
|
select(WorkflowRunModel).where(
|
|
|
|
|
WorkflowRunModel.public_access_token == token
|
|
|
|
|
)
|
|
|
|
|
)
|
|
|
|
|
return result.scalars().first()
|
2026-01-29 19:06:52 +05:30
|
|
|
|
|
|
|
|
async def get_workflow_run_by_call_id(
|
|
|
|
|
self, call_id: str
|
|
|
|
|
) -> Optional[WorkflowRunModel]:
|
|
|
|
|
"""Find workflow run by call_id stored in gathered_context.
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
call_id: The telephony call ID to search for
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
The WorkflowRunModel if found, None otherwise
|
|
|
|
|
"""
|
|
|
|
|
async with self.async_session() as session:
|
|
|
|
|
# Use JSON text extraction to find matching call_id
|
|
|
|
|
# This leverages the idx_workflow_runs_call_id index
|
|
|
|
|
result = await session.execute(
|
|
|
|
|
select(WorkflowRunModel)
|
|
|
|
|
.options(
|
|
|
|
|
joinedload(WorkflowRunModel.workflow).joinedload(WorkflowModel.user)
|
|
|
|
|
)
|
|
|
|
|
.where(
|
|
|
|
|
WorkflowRunModel.gathered_context.op("->>")("call_id") == call_id
|
|
|
|
|
)
|
|
|
|
|
.order_by(WorkflowRunModel.created_at.desc())
|
|
|
|
|
.limit(1)
|
|
|
|
|
)
|
|
|
|
|
return result.scalars().first()
|