mirror of
https://github.com/dograh-hq/dograh.git
synced 2026-06-19 08:28:10 +02:00
fix: fix circuit breaker failure recording
fix: fix circuit breaker failure recording chore: provide advanced configuration option in UI for campaigns
This commit is contained in:
parent
628132f29b
commit
3ea235a666
17 changed files with 448 additions and 58 deletions
|
|
@ -6,7 +6,10 @@ from zoneinfo import ZoneInfo
|
|||
from fastapi import APIRouter, Depends, HTTPException, Query
|
||||
from pydantic import BaseModel, Field, field_validator, model_validator
|
||||
|
||||
from api.constants import DEFAULT_CAMPAIGN_RETRY_CONFIG, DEFAULT_ORG_CONCURRENCY_LIMIT
|
||||
from api.constants import (
|
||||
DEFAULT_CAMPAIGN_RETRY_CONFIG,
|
||||
DEFAULT_ORG_CONCURRENCY_LIMIT,
|
||||
)
|
||||
from api.db import db_client
|
||||
from api.db.models import UserModel
|
||||
from api.enums import OrganizationConfigurationKey
|
||||
|
|
@ -126,6 +129,20 @@ class ScheduleConfigResponse(BaseModel):
|
|||
slots: List[TimeSlotResponse]
|
||||
|
||||
|
||||
class CircuitBreakerConfigRequest(BaseModel):
|
||||
enabled: bool = True
|
||||
failure_threshold: float = Field(default=0.5, ge=0.0, le=1.0)
|
||||
window_seconds: int = Field(default=120, ge=30, le=600)
|
||||
min_calls_in_window: int = Field(default=5, ge=1, le=100)
|
||||
|
||||
|
||||
class CircuitBreakerConfigResponse(BaseModel):
|
||||
enabled: bool
|
||||
failure_threshold: float
|
||||
window_seconds: int
|
||||
min_calls_in_window: int
|
||||
|
||||
|
||||
class CreateCampaignRequest(BaseModel):
|
||||
name: str = Field(..., min_length=1, max_length=255)
|
||||
workflow_id: int
|
||||
|
|
@ -134,6 +151,7 @@ class CreateCampaignRequest(BaseModel):
|
|||
retry_config: Optional[RetryConfigRequest] = None
|
||||
max_concurrency: Optional[int] = Field(default=None, ge=1, le=100)
|
||||
schedule_config: Optional[ScheduleConfigRequest] = None
|
||||
circuit_breaker: Optional[CircuitBreakerConfigRequest] = None
|
||||
|
||||
|
||||
class UpdateCampaignRequest(BaseModel):
|
||||
|
|
@ -141,6 +159,7 @@ class UpdateCampaignRequest(BaseModel):
|
|||
retry_config: Optional[RetryConfigRequest] = None
|
||||
max_concurrency: Optional[int] = Field(default=None, ge=1, le=100)
|
||||
schedule_config: Optional[ScheduleConfigRequest] = None
|
||||
circuit_breaker: Optional[CircuitBreakerConfigRequest] = None
|
||||
|
||||
|
||||
class CampaignResponse(BaseModel):
|
||||
|
|
@ -160,6 +179,7 @@ class CampaignResponse(BaseModel):
|
|||
retry_config: RetryConfigResponse
|
||||
max_concurrency: Optional[int] = None
|
||||
schedule_config: Optional[ScheduleConfigResponse] = None
|
||||
circuit_breaker: Optional[CircuitBreakerConfigResponse] = None
|
||||
|
||||
|
||||
class CampaignsResponse(BaseModel):
|
||||
|
|
@ -209,9 +229,10 @@ def _build_campaign_response(campaign, workflow_name: str) -> CampaignResponse:
|
|||
else DEFAULT_CAMPAIGN_RETRY_CONFIG
|
||||
)
|
||||
|
||||
# Get max_concurrency and schedule_config from orchestrator_metadata
|
||||
# Get max_concurrency, schedule_config, circuit_breaker from orchestrator_metadata
|
||||
max_concurrency = None
|
||||
schedule_config = None
|
||||
circuit_breaker_config = None
|
||||
if campaign.orchestrator_metadata:
|
||||
max_concurrency = campaign.orchestrator_metadata.get("max_concurrency")
|
||||
sc = campaign.orchestrator_metadata.get("schedule_config")
|
||||
|
|
@ -221,6 +242,9 @@ def _build_campaign_response(campaign, workflow_name: str) -> CampaignResponse:
|
|||
timezone=sc.get("timezone", "UTC"),
|
||||
slots=[TimeSlotResponse(**slot) for slot in sc.get("slots", [])],
|
||||
)
|
||||
cb = campaign.orchestrator_metadata.get("circuit_breaker")
|
||||
if cb:
|
||||
circuit_breaker_config = CircuitBreakerConfigResponse(**cb)
|
||||
|
||||
return CampaignResponse(
|
||||
id=campaign.id,
|
||||
|
|
@ -239,6 +263,7 @@ def _build_campaign_response(campaign, workflow_name: str) -> CampaignResponse:
|
|||
retry_config=RetryConfigResponse(**retry_config),
|
||||
max_concurrency=max_concurrency,
|
||||
schedule_config=schedule_config,
|
||||
circuit_breaker=circuit_breaker_config,
|
||||
)
|
||||
|
||||
|
||||
|
|
@ -276,6 +301,11 @@ async def create_campaign(
|
|||
if request.schedule_config:
|
||||
schedule_config = request.schedule_config.model_dump()
|
||||
|
||||
# Build circuit_breaker dict if provided
|
||||
circuit_breaker_config = None
|
||||
if request.circuit_breaker:
|
||||
circuit_breaker_config = request.circuit_breaker.model_dump()
|
||||
|
||||
campaign = await db_client.create_campaign(
|
||||
name=request.name,
|
||||
workflow_id=request.workflow_id,
|
||||
|
|
@ -286,6 +316,7 @@ async def create_campaign(
|
|||
retry_config=retry_config,
|
||||
max_concurrency=request.max_concurrency,
|
||||
schedule_config=schedule_config,
|
||||
circuit_breaker=circuit_breaker_config,
|
||||
)
|
||||
|
||||
return _build_campaign_response(campaign, workflow_name)
|
||||
|
|
@ -436,6 +467,10 @@ async def update_campaign(
|
|||
metadata["schedule_config"] = request.schedule_config.model_dump()
|
||||
metadata_changed = True
|
||||
|
||||
if request.circuit_breaker is not None:
|
||||
metadata["circuit_breaker"] = request.circuit_breaker.model_dump()
|
||||
metadata_changed = True
|
||||
|
||||
if metadata_changed:
|
||||
update_kwargs["orchestrator_metadata"] = metadata
|
||||
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
from typing import Union
|
||||
from typing import List, Optional, Union
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException
|
||||
from pydantic import BaseModel
|
||||
|
|
@ -257,14 +257,41 @@ class RetryConfigResponse(BaseModel):
|
|||
retry_on_voicemail: bool
|
||||
|
||||
|
||||
class CampaignLimitsResponse(BaseModel):
|
||||
class TimeSlotResponse(BaseModel):
|
||||
day_of_week: int
|
||||
start_time: str
|
||||
end_time: str
|
||||
|
||||
|
||||
class ScheduleConfigResponse(BaseModel):
|
||||
enabled: bool
|
||||
timezone: str
|
||||
slots: List[TimeSlotResponse]
|
||||
|
||||
|
||||
class CircuitBreakerConfigResponse(BaseModel):
|
||||
enabled: bool
|
||||
failure_threshold: float
|
||||
window_seconds: int
|
||||
min_calls_in_window: int
|
||||
|
||||
|
||||
class LastCampaignSettingsResponse(BaseModel):
|
||||
retry_config: Optional[RetryConfigResponse] = None
|
||||
max_concurrency: Optional[int] = None
|
||||
schedule_config: Optional[ScheduleConfigResponse] = None
|
||||
circuit_breaker: Optional[CircuitBreakerConfigResponse] = None
|
||||
|
||||
|
||||
class CampaignDefaultsResponse(BaseModel):
|
||||
concurrent_call_limit: int
|
||||
from_numbers_count: int
|
||||
default_retry_config: RetryConfigResponse
|
||||
last_campaign_settings: Optional[LastCampaignSettingsResponse] = None
|
||||
|
||||
|
||||
@router.get("/campaign-limits", response_model=CampaignLimitsResponse)
|
||||
async def get_campaign_limits(user: UserModel = Depends(get_user)):
|
||||
@router.get("/campaign-defaults", response_model=CampaignDefaultsResponse)
|
||||
async def get_campaign_defaults(user: UserModel = Depends(get_user)):
|
||||
"""Get campaign limits for the user's organization.
|
||||
|
||||
Returns the organization's concurrent call limit and default retry configuration.
|
||||
|
|
@ -299,8 +326,47 @@ async def get_campaign_limits(user: UserModel = Depends(get_user)):
|
|||
except Exception:
|
||||
pass
|
||||
|
||||
return CampaignLimitsResponse(
|
||||
# Get last campaign settings for pre-population
|
||||
last_campaign_settings = None
|
||||
try:
|
||||
last_campaign = await db_client.get_latest_campaign(
|
||||
user.selected_organization_id
|
||||
)
|
||||
if last_campaign:
|
||||
retry = None
|
||||
if last_campaign.retry_config:
|
||||
retry = RetryConfigResponse(**last_campaign.retry_config)
|
||||
|
||||
max_conc = None
|
||||
sched = None
|
||||
cb = None
|
||||
if last_campaign.orchestrator_metadata:
|
||||
max_conc = last_campaign.orchestrator_metadata.get("max_concurrency")
|
||||
sc = last_campaign.orchestrator_metadata.get("schedule_config")
|
||||
if sc:
|
||||
sched = ScheduleConfigResponse(
|
||||
enabled=sc.get("enabled", False),
|
||||
timezone=sc.get("timezone", "UTC"),
|
||||
slots=[
|
||||
TimeSlotResponse(**slot) for slot in sc.get("slots", [])
|
||||
],
|
||||
)
|
||||
cb_data = last_campaign.orchestrator_metadata.get("circuit_breaker")
|
||||
if cb_data:
|
||||
cb = CircuitBreakerConfigResponse(**cb_data)
|
||||
|
||||
last_campaign_settings = LastCampaignSettingsResponse(
|
||||
retry_config=retry,
|
||||
max_concurrency=max_conc,
|
||||
schedule_config=sched,
|
||||
circuit_breaker=cb,
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return CampaignDefaultsResponse(
|
||||
concurrent_call_limit=concurrent_limit,
|
||||
from_numbers_count=from_numbers_count,
|
||||
default_retry_config=RetryConfigResponse(**DEFAULT_CAMPAIGN_RETRY_CONFIG),
|
||||
last_campaign_settings=last_campaign_settings,
|
||||
)
|
||||
|
|
|
|||
|
|
@ -783,7 +783,8 @@ async def _process_status_update(workflow_run_id: int, status: StatusCallbackReq
|
|||
if workflow_run.campaign_id:
|
||||
await campaign_call_dispatcher.release_call_slot(workflow_run_id)
|
||||
await circuit_breaker.record_and_evaluate(
|
||||
workflow_run.campaign_id, is_failure=True
|
||||
workflow_run.campaign_id,
|
||||
is_failure=status.status == "error",
|
||||
)
|
||||
|
||||
# Check if retry is needed for campaign calls (busy/no-answer)
|
||||
|
|
@ -1209,6 +1210,7 @@ async def handle_cloudonix_status_callback(
|
|||
|
||||
return {"status": "success"}
|
||||
|
||||
|
||||
@router.post("/cloudonix/amd-callback/{workflow_run_id}")
|
||||
async def handle_cloudonix_amd_callback(
|
||||
workflow_run_id: int,
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue