mirror of
https://github.com/dograh-hq/dograh.git
synced 2026-06-07 07:55:16 +02:00
feat: add retry config during campaign creation
This commit is contained in:
parent
db75d90535
commit
6f41e91f67
14 changed files with 1036 additions and 221 deletions
|
|
@ -92,3 +92,13 @@ COUNTRY_CODES = {
|
|||
"LU": "352", # Luxembourg
|
||||
"IE": "353", # Ireland
|
||||
}
|
||||
|
||||
DEFAULT_ORG_CONCURRENCY_LIMIT = os.getenv("DEFAULT_ORG_CONCURRENCY_LIMIT", 2)
|
||||
DEFAULT_CAMPAIGN_RETRY_CONFIG = {
|
||||
"enabled": True,
|
||||
"max_retries": 1,
|
||||
"retry_delay_seconds": 120,
|
||||
"retry_on_busy": True,
|
||||
"retry_on_no_answer": True,
|
||||
"retry_on_voicemail": False,
|
||||
}
|
||||
|
|
|
|||
|
|
@ -17,9 +17,16 @@ class CampaignClient(BaseDBClient):
|
|||
source_id: str,
|
||||
user_id: int,
|
||||
organization_id: int,
|
||||
retry_config: Optional[dict] = None,
|
||||
max_concurrency: Optional[int] = None,
|
||||
) -> CampaignModel:
|
||||
"""Create a new campaign"""
|
||||
async with self.async_session() as session:
|
||||
# Build orchestrator_metadata with max_concurrency if provided
|
||||
orchestrator_metadata = {}
|
||||
if max_concurrency is not None:
|
||||
orchestrator_metadata["max_concurrency"] = max_concurrency
|
||||
|
||||
campaign = CampaignModel(
|
||||
name=name,
|
||||
workflow_id=workflow_id,
|
||||
|
|
@ -27,6 +34,10 @@ class CampaignClient(BaseDBClient):
|
|||
source_id=source_id,
|
||||
created_by=user_id,
|
||||
organization_id=organization_id,
|
||||
retry_config=retry_config
|
||||
if retry_config
|
||||
else CampaignModel.retry_config.default.arg,
|
||||
orchestrator_metadata=orchestrator_metadata,
|
||||
)
|
||||
session.add(campaign)
|
||||
try:
|
||||
|
|
|
|||
|
|
@ -22,6 +22,8 @@ from sqlalchemy import (
|
|||
)
|
||||
from sqlalchemy.orm import declarative_base, relationship
|
||||
|
||||
from api.constants import DEFAULT_CAMPAIGN_RETRY_CONFIG
|
||||
|
||||
from ..enums import (
|
||||
CallType,
|
||||
IntegrationAction,
|
||||
|
|
@ -537,14 +539,7 @@ class CampaignModel(Base):
|
|||
retry_config = Column(
|
||||
JSON,
|
||||
nullable=False,
|
||||
default={
|
||||
"enabled": True,
|
||||
"max_retries": 2,
|
||||
"retry_delay_seconds": 120,
|
||||
"retry_on_busy": True,
|
||||
"retry_on_no_answer": True,
|
||||
"retry_on_voicemail": True,
|
||||
},
|
||||
default=DEFAULT_CAMPAIGN_RETRY_CONFIG,
|
||||
server_default=text(
|
||||
'\'{"enabled": true, "max_retries": 2, "retry_on_busy": true, "retry_on_no_answer": true, "retry_on_voicemail": true, "retry_delay_seconds": 120}\'::jsonb'
|
||||
),
|
||||
|
|
|
|||
|
|
@ -4,22 +4,61 @@ from typing import List, Optional
|
|||
from fastapi import APIRouter, Depends, HTTPException
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from api.constants import DEFAULT_CAMPAIGN_RETRY_CONFIG, DEFAULT_ORG_CONCURRENCY_LIMIT
|
||||
from api.db import db_client
|
||||
from api.db.models import UserModel
|
||||
from api.enums import OrganizationConfigurationKey
|
||||
from api.services.auth.depends import get_user
|
||||
from api.services.campaign.runner import campaign_runner_service
|
||||
from api.services.campaign.source_validator import (
|
||||
validate_csv_source,
|
||||
validate_google_sheet_source,
|
||||
)
|
||||
from api.services.quota_service import check_dograh_quota
|
||||
from api.services.storage import storage_fs
|
||||
|
||||
router = APIRouter(prefix="/campaign")
|
||||
|
||||
|
||||
async def _get_org_concurrent_limit(organization_id: int) -> int:
|
||||
"""Get the concurrent call limit for an organization."""
|
||||
try:
|
||||
config = await db_client.get_configuration(
|
||||
organization_id,
|
||||
OrganizationConfigurationKey.CONCURRENT_CALL_LIMIT.value,
|
||||
)
|
||||
if config and config.value:
|
||||
return int(config.value.get("value", DEFAULT_ORG_CONCURRENCY_LIMIT))
|
||||
except Exception:
|
||||
pass
|
||||
return DEFAULT_ORG_CONCURRENCY_LIMIT
|
||||
|
||||
|
||||
class RetryConfigRequest(BaseModel):
|
||||
enabled: bool = True
|
||||
max_retries: int = Field(default=2, ge=0, le=10)
|
||||
retry_delay_seconds: int = Field(default=120, ge=30, le=3600)
|
||||
retry_on_busy: bool = True
|
||||
retry_on_no_answer: bool = True
|
||||
retry_on_voicemail: bool = True
|
||||
|
||||
|
||||
class RetryConfigResponse(BaseModel):
|
||||
enabled: bool
|
||||
max_retries: int
|
||||
retry_delay_seconds: int
|
||||
retry_on_busy: bool
|
||||
retry_on_no_answer: bool
|
||||
retry_on_voicemail: bool
|
||||
|
||||
|
||||
class CreateCampaignRequest(BaseModel):
|
||||
name: str = Field(..., min_length=1, max_length=255)
|
||||
workflow_id: int
|
||||
source_type: str = Field(..., pattern="^(google-sheet|csv)$")
|
||||
source_id: str # Google Sheet URL or CSV file key
|
||||
retry_config: Optional[RetryConfigRequest] = None
|
||||
max_concurrency: Optional[int] = Field(default=None, ge=1, le=100)
|
||||
|
||||
|
||||
class CampaignResponse(BaseModel):
|
||||
|
|
@ -36,6 +75,8 @@ class CampaignResponse(BaseModel):
|
|||
created_at: datetime
|
||||
started_at: Optional[datetime]
|
||||
completed_at: Optional[datetime]
|
||||
retry_config: RetryConfigResponse
|
||||
max_concurrency: Optional[int] = None
|
||||
|
||||
|
||||
class CampaignsResponse(BaseModel):
|
||||
|
|
@ -63,26 +104,23 @@ class CampaignProgressResponse(BaseModel):
|
|||
completed_at: Optional[datetime]
|
||||
|
||||
|
||||
@router.post("/create")
|
||||
async def create_campaign(
|
||||
request: CreateCampaignRequest,
|
||||
user: UserModel = Depends(get_user),
|
||||
) -> CampaignResponse:
|
||||
"""Create a new campaign"""
|
||||
# Verify workflow exists and belongs to organization
|
||||
workflow_name = await db_client.get_workflow_name(request.workflow_id, user.id)
|
||||
if not workflow_name:
|
||||
raise HTTPException(status_code=404, detail="Workflow not found")
|
||||
# Default retry config for campaigns
|
||||
|
||||
campaign = await db_client.create_campaign(
|
||||
name=request.name,
|
||||
workflow_id=request.workflow_id,
|
||||
source_type=request.source_type,
|
||||
source_id=request.source_id,
|
||||
user_id=user.id,
|
||||
organization_id=user.selected_organization_id,
|
||||
|
||||
def _build_campaign_response(campaign, workflow_name: str) -> CampaignResponse:
|
||||
"""Build a CampaignResponse from a campaign model."""
|
||||
# Get retry_config from campaign or use defaults
|
||||
retry_config = (
|
||||
campaign.retry_config
|
||||
if campaign.retry_config
|
||||
else DEFAULT_CAMPAIGN_RETRY_CONFIG
|
||||
)
|
||||
|
||||
# Get max_concurrency from orchestrator_metadata
|
||||
max_concurrency = None
|
||||
if campaign.orchestrator_metadata:
|
||||
max_concurrency = campaign.orchestrator_metadata.get("max_concurrency")
|
||||
|
||||
return CampaignResponse(
|
||||
id=campaign.id,
|
||||
name=campaign.name,
|
||||
|
|
@ -97,9 +135,62 @@ async def create_campaign(
|
|||
created_at=campaign.created_at,
|
||||
started_at=campaign.started_at,
|
||||
completed_at=campaign.completed_at,
|
||||
retry_config=RetryConfigResponse(**retry_config),
|
||||
max_concurrency=max_concurrency,
|
||||
)
|
||||
|
||||
|
||||
@router.post("/create")
|
||||
async def create_campaign(
|
||||
request: CreateCampaignRequest,
|
||||
user: UserModel = Depends(get_user),
|
||||
) -> CampaignResponse:
|
||||
"""Create a new campaign"""
|
||||
# Verify workflow exists and belongs to organization
|
||||
workflow_name = await db_client.get_workflow_name(request.workflow_id, user.id)
|
||||
if not workflow_name:
|
||||
raise HTTPException(status_code=404, detail="Workflow not found")
|
||||
|
||||
# Validate source data (phone_number column and format)
|
||||
if request.source_type == "csv":
|
||||
validation_result = await validate_csv_source(request.source_id)
|
||||
if not validation_result.is_valid:
|
||||
raise HTTPException(status_code=400, detail=validation_result.error.message)
|
||||
elif request.source_type == "google-sheet":
|
||||
validation_result = await validate_google_sheet_source(
|
||||
request.source_id, user.selected_organization_id
|
||||
)
|
||||
if not validation_result.is_valid:
|
||||
raise HTTPException(status_code=400, detail=validation_result.error.message)
|
||||
|
||||
# Validate max_concurrency against org limit if provided
|
||||
if request.max_concurrency is not None:
|
||||
org_limit = await _get_org_concurrent_limit(user.selected_organization_id)
|
||||
if request.max_concurrency > org_limit:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail=f"max_concurrency ({request.max_concurrency}) cannot exceed organization limit ({org_limit})",
|
||||
)
|
||||
|
||||
# Build retry_config dict if provided
|
||||
retry_config = None
|
||||
if request.retry_config:
|
||||
retry_config = request.retry_config.model_dump()
|
||||
|
||||
campaign = await db_client.create_campaign(
|
||||
name=request.name,
|
||||
workflow_id=request.workflow_id,
|
||||
source_type=request.source_type,
|
||||
source_id=request.source_id,
|
||||
user_id=user.id,
|
||||
organization_id=user.selected_organization_id,
|
||||
retry_config=retry_config,
|
||||
max_concurrency=request.max_concurrency,
|
||||
)
|
||||
|
||||
return _build_campaign_response(campaign, workflow_name)
|
||||
|
||||
|
||||
@router.get("/")
|
||||
async def get_campaigns(
|
||||
user: UserModel = Depends(get_user),
|
||||
|
|
@ -115,21 +206,7 @@ async def get_campaigns(
|
|||
workflow_map = {w.id: w.name for w in workflows}
|
||||
|
||||
campaign_responses = [
|
||||
CampaignResponse(
|
||||
id=c.id,
|
||||
name=c.name,
|
||||
workflow_id=c.workflow_id,
|
||||
workflow_name=workflow_map.get(c.workflow_id, "Unknown"),
|
||||
state=c.state,
|
||||
source_type=c.source_type,
|
||||
source_id=c.source_id,
|
||||
total_rows=c.total_rows,
|
||||
processed_rows=c.processed_rows,
|
||||
failed_rows=c.failed_rows,
|
||||
created_at=c.created_at,
|
||||
started_at=c.started_at,
|
||||
completed_at=c.completed_at,
|
||||
)
|
||||
_build_campaign_response(c, workflow_map.get(c.workflow_id, "Unknown"))
|
||||
for c in campaigns
|
||||
]
|
||||
|
||||
|
|
@ -148,21 +225,7 @@ async def get_campaign(
|
|||
|
||||
workflow_name = await db_client.get_workflow_name(campaign.workflow_id, user.id)
|
||||
|
||||
return CampaignResponse(
|
||||
id=campaign.id,
|
||||
name=campaign.name,
|
||||
workflow_id=campaign.workflow_id,
|
||||
workflow_name=workflow_name or "Unknown",
|
||||
state=campaign.state,
|
||||
source_type=campaign.source_type,
|
||||
source_id=campaign.source_id,
|
||||
total_rows=campaign.total_rows,
|
||||
processed_rows=campaign.processed_rows,
|
||||
failed_rows=campaign.failed_rows,
|
||||
created_at=campaign.created_at,
|
||||
started_at=campaign.started_at,
|
||||
completed_at=campaign.completed_at,
|
||||
)
|
||||
return _build_campaign_response(campaign, workflow_name or "Unknown")
|
||||
|
||||
|
||||
@router.post("/{campaign_id}/start")
|
||||
|
|
@ -203,21 +266,7 @@ async def start_campaign(
|
|||
campaign = await db_client.get_campaign(campaign_id, user.selected_organization_id)
|
||||
workflow_name = await db_client.get_workflow_name(campaign.workflow_id, user.id)
|
||||
|
||||
return CampaignResponse(
|
||||
id=campaign.id,
|
||||
name=campaign.name,
|
||||
workflow_id=campaign.workflow_id,
|
||||
workflow_name=workflow_name or "Unknown",
|
||||
state=campaign.state,
|
||||
source_type=campaign.source_type,
|
||||
source_id=campaign.source_id,
|
||||
total_rows=campaign.total_rows,
|
||||
processed_rows=campaign.processed_rows,
|
||||
failed_rows=campaign.failed_rows,
|
||||
created_at=campaign.created_at,
|
||||
started_at=campaign.started_at,
|
||||
completed_at=campaign.completed_at,
|
||||
)
|
||||
return _build_campaign_response(campaign, workflow_name or "Unknown")
|
||||
|
||||
|
||||
@router.post("/{campaign_id}/pause")
|
||||
|
|
@ -241,21 +290,7 @@ async def pause_campaign(
|
|||
campaign = await db_client.get_campaign(campaign_id, user.selected_organization_id)
|
||||
workflow_name = await db_client.get_workflow_name(campaign.workflow_id, user.id)
|
||||
|
||||
return CampaignResponse(
|
||||
id=campaign.id,
|
||||
name=campaign.name,
|
||||
workflow_id=campaign.workflow_id,
|
||||
workflow_name=workflow_name or "Unknown",
|
||||
state=campaign.state,
|
||||
source_type=campaign.source_type,
|
||||
source_id=campaign.source_id,
|
||||
total_rows=campaign.total_rows,
|
||||
processed_rows=campaign.processed_rows,
|
||||
failed_rows=campaign.failed_rows,
|
||||
created_at=campaign.created_at,
|
||||
started_at=campaign.started_at,
|
||||
completed_at=campaign.completed_at,
|
||||
)
|
||||
return _build_campaign_response(campaign, workflow_name or "Unknown")
|
||||
|
||||
|
||||
@router.get("/{campaign_id}/runs")
|
||||
|
|
@ -316,21 +351,7 @@ async def resume_campaign(
|
|||
campaign = await db_client.get_campaign(campaign_id, user.selected_organization_id)
|
||||
workflow_name = await db_client.get_workflow_name(campaign.workflow_id, user.id)
|
||||
|
||||
return CampaignResponse(
|
||||
id=campaign.id,
|
||||
name=campaign.name,
|
||||
workflow_id=campaign.workflow_id,
|
||||
workflow_name=workflow_name or "Unknown",
|
||||
state=campaign.state,
|
||||
source_type=campaign.source_type,
|
||||
source_id=campaign.source_id,
|
||||
total_rows=campaign.total_rows,
|
||||
processed_rows=campaign.processed_rows,
|
||||
failed_rows=campaign.failed_rows,
|
||||
created_at=campaign.created_at,
|
||||
started_at=campaign.started_at,
|
||||
completed_at=campaign.completed_at,
|
||||
)
|
||||
return _build_campaign_response(campaign, workflow_name or "Unknown")
|
||||
|
||||
|
||||
@router.get("/{campaign_id}/progress")
|
||||
|
|
|
|||
|
|
@ -1,7 +1,9 @@
|
|||
from typing import Union
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException
|
||||
from pydantic import BaseModel
|
||||
|
||||
from api.constants import DEFAULT_CAMPAIGN_RETRY_CONFIG, DEFAULT_ORG_CONCURRENCY_LIMIT
|
||||
from api.db import db_client
|
||||
from api.db.models import UserModel
|
||||
from api.enums import OrganizationConfigurationKey
|
||||
|
|
@ -210,3 +212,46 @@ def preserve_masked_fields(request, existing_config, config_value):
|
|||
field_value, existing_config.value.get(field_name, "")
|
||||
):
|
||||
config_value[field_name] = existing_config.value[field_name]
|
||||
|
||||
|
||||
class RetryConfigResponse(BaseModel):
|
||||
enabled: bool
|
||||
max_retries: int
|
||||
retry_delay_seconds: int
|
||||
retry_on_busy: bool
|
||||
retry_on_no_answer: bool
|
||||
retry_on_voicemail: bool
|
||||
|
||||
|
||||
class CampaignLimitsResponse(BaseModel):
|
||||
concurrent_call_limit: int
|
||||
default_retry_config: RetryConfigResponse
|
||||
|
||||
|
||||
@router.get("/campaign-limits", response_model=CampaignLimitsResponse)
|
||||
async def get_campaign_limits(user: UserModel = Depends(get_user)):
|
||||
"""Get campaign limits for the user's organization.
|
||||
|
||||
Returns the organization's concurrent call limit and default retry configuration.
|
||||
"""
|
||||
if not user.selected_organization_id:
|
||||
raise HTTPException(status_code=400, detail="No organization selected")
|
||||
|
||||
# Get concurrent call limit
|
||||
concurrent_limit = DEFAULT_ORG_CONCURRENCY_LIMIT
|
||||
try:
|
||||
config = await db_client.get_configuration(
|
||||
user.selected_organization_id,
|
||||
OrganizationConfigurationKey.CONCURRENT_CALL_LIMIT.value,
|
||||
)
|
||||
if config and config.value:
|
||||
concurrent_limit = int(
|
||||
config.value.get("value", DEFAULT_ORG_CONCURRENCY_LIMIT)
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return CampaignLimitsResponse(
|
||||
concurrent_call_limit=concurrent_limit,
|
||||
default_retry_config=RetryConfigResponse(**DEFAULT_CAMPAIGN_RETRY_CONFIG),
|
||||
)
|
||||
|
|
|
|||
|
|
@ -5,6 +5,7 @@ from typing import Optional
|
|||
|
||||
from loguru import logger
|
||||
|
||||
from api.constants import DEFAULT_ORG_CONCURRENCY_LIMIT
|
||||
from api.db import db_client
|
||||
from api.db.models import QueuedRunModel, WorkflowRunModel
|
||||
from api.enums import OrganizationConfigurationKey, WorkflowRunState
|
||||
|
|
@ -18,7 +19,7 @@ class CampaignCallDispatcher:
|
|||
"""Manages rate-limited and concurrent-limited call dispatching"""
|
||||
|
||||
def __init__(self):
|
||||
self.default_concurrent_limit = 20
|
||||
self.default_concurrent_limit = int(DEFAULT_ORG_CONCURRENCY_LIMIT)
|
||||
|
||||
async def get_telephony_provider(self, organization_id: int) -> TelephonyProvider:
|
||||
"""Get telephony provider instance for specific organization"""
|
||||
|
|
@ -132,7 +133,22 @@ class CampaignCallDispatcher:
|
|||
) -> Optional[WorkflowRunModel]:
|
||||
"""Creates workflow run and initiates call with concurrent limiting"""
|
||||
# Get concurrent limit for organization
|
||||
max_concurrent = await self.get_org_concurrent_limit(campaign.organization_id)
|
||||
org_concurrent_limit = await self.get_org_concurrent_limit(
|
||||
campaign.organization_id
|
||||
)
|
||||
|
||||
# Check for campaign-level max_concurrency in orchestrator_metadata
|
||||
campaign_max_concurrency = None
|
||||
if campaign.orchestrator_metadata:
|
||||
campaign_max_concurrency = campaign.orchestrator_metadata.get(
|
||||
"max_concurrency"
|
||||
)
|
||||
|
||||
# Use the lower of campaign limit and org limit
|
||||
if campaign_max_concurrency is not None:
|
||||
max_concurrent = min(campaign_max_concurrency, org_concurrent_limit)
|
||||
else:
|
||||
max_concurrent = org_concurrent_limit
|
||||
|
||||
# Track wait time for alerting
|
||||
wait_start = time.time()
|
||||
|
|
|
|||
265
api/services/campaign/source_validator.py
Normal file
265
api/services/campaign/source_validator.py
Normal file
|
|
@ -0,0 +1,265 @@
|
|||
"""
|
||||
Source validation for campaign data sources (CSV, Google Sheets).
|
||||
|
||||
Validates that:
|
||||
- phone_number column exists
|
||||
- All phone numbers include country code (start with '+')
|
||||
"""
|
||||
|
||||
import csv
|
||||
from dataclasses import dataclass
|
||||
from io import StringIO
|
||||
from typing import List, Optional
|
||||
|
||||
import httpx
|
||||
from loguru import logger
|
||||
|
||||
from api.services.storage import storage_fs
|
||||
|
||||
|
||||
@dataclass
|
||||
class ValidationError:
|
||||
"""Represents a validation error with details."""
|
||||
|
||||
message: str
|
||||
invalid_rows: Optional[List[int]] = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class ValidationResult:
|
||||
"""Result of source validation."""
|
||||
|
||||
is_valid: bool
|
||||
error: Optional[ValidationError] = None
|
||||
|
||||
|
||||
def _validate_source_data(
|
||||
headers: List[str], rows: List[List[str]]
|
||||
) -> ValidationResult:
|
||||
"""
|
||||
Validate source data for campaign creation.
|
||||
|
||||
Args:
|
||||
headers: List of column headers
|
||||
rows: List of data rows (excluding header)
|
||||
|
||||
Returns:
|
||||
ValidationResult with is_valid=True if valid, or error details if invalid
|
||||
"""
|
||||
# Normalize headers to lowercase for comparison
|
||||
normalized_headers = [h.strip().lower() for h in headers]
|
||||
|
||||
# Check for phone_number column
|
||||
if "phone_number" not in normalized_headers:
|
||||
return ValidationResult(
|
||||
is_valid=False,
|
||||
error=ValidationError(
|
||||
message="Source must contain a 'phone_number' column"
|
||||
),
|
||||
)
|
||||
|
||||
phone_number_idx = normalized_headers.index("phone_number")
|
||||
|
||||
# Validate phone numbers in all data rows
|
||||
invalid_rows = []
|
||||
for row_idx, row in enumerate(rows, start=2): # Start at 2 (1-indexed, skip header)
|
||||
if len(row) <= phone_number_idx:
|
||||
continue # Skip rows that don't have enough columns
|
||||
|
||||
phone_number = row[phone_number_idx].strip()
|
||||
if phone_number and not phone_number.startswith("+"):
|
||||
invalid_rows.append(row_idx)
|
||||
|
||||
if invalid_rows:
|
||||
# Limit the number of rows shown in error message
|
||||
if len(invalid_rows) > 5:
|
||||
rows_str = f"{', '.join(map(str, invalid_rows[:5]))} and {len(invalid_rows) - 5} more"
|
||||
else:
|
||||
rows_str = ", ".join(map(str, invalid_rows))
|
||||
|
||||
return ValidationResult(
|
||||
is_valid=False,
|
||||
error=ValidationError(
|
||||
message=f"Invalid phone numbers in rows: {rows_str}. All phone numbers must include country code (start with '+')",
|
||||
invalid_rows=invalid_rows,
|
||||
),
|
||||
)
|
||||
|
||||
return ValidationResult(is_valid=True)
|
||||
|
||||
|
||||
async def validate_csv_source(file_key: str) -> ValidationResult:
|
||||
"""
|
||||
Validate a CSV source file for campaign creation.
|
||||
|
||||
Args:
|
||||
file_key: S3/MinIO file key for the CSV file
|
||||
|
||||
Returns:
|
||||
ValidationResult with is_valid=True if valid, or error details if invalid
|
||||
"""
|
||||
# Get download URL using internal endpoint
|
||||
signed_url = await storage_fs.aget_signed_url(
|
||||
file_key, expiration=3600, use_internal_endpoint=True
|
||||
)
|
||||
|
||||
if not signed_url:
|
||||
return ValidationResult(
|
||||
is_valid=False,
|
||||
error=ValidationError(message=f"Failed to access CSV file: {file_key}"),
|
||||
)
|
||||
|
||||
# Download CSV file
|
||||
async with httpx.AsyncClient() as client:
|
||||
try:
|
||||
response = await client.get(signed_url)
|
||||
response.raise_for_status()
|
||||
csv_content = response.text
|
||||
except httpx.HTTPError as e:
|
||||
logger.error(f"Failed to download CSV file for validation: {e}")
|
||||
return ValidationResult(
|
||||
is_valid=False,
|
||||
error=ValidationError(
|
||||
message="Failed to download CSV file for validation"
|
||||
),
|
||||
)
|
||||
|
||||
# Parse CSV
|
||||
try:
|
||||
csv_file = StringIO(csv_content)
|
||||
reader = csv.reader(csv_file)
|
||||
rows = list(reader)
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to parse CSV: {e}")
|
||||
return ValidationResult(
|
||||
is_valid=False,
|
||||
error=ValidationError(message=f"Invalid CSV format: {str(e)}"),
|
||||
)
|
||||
|
||||
if not rows or len(rows) < 2:
|
||||
return ValidationResult(
|
||||
is_valid=False,
|
||||
error=ValidationError(
|
||||
message="CSV file must have a header row and at least one data row"
|
||||
),
|
||||
)
|
||||
|
||||
headers = rows[0]
|
||||
data_rows = rows[1:]
|
||||
|
||||
return _validate_source_data(headers, data_rows)
|
||||
|
||||
|
||||
async def validate_google_sheet_source(
|
||||
sheet_url: str, organization_id: int
|
||||
) -> ValidationResult:
|
||||
"""
|
||||
Validate a Google Sheet source for campaign creation.
|
||||
|
||||
Args:
|
||||
sheet_url: Google Sheets URL
|
||||
organization_id: Organization ID to get integration credentials
|
||||
|
||||
Returns:
|
||||
ValidationResult with is_valid=True if valid, or error details if invalid
|
||||
"""
|
||||
import re
|
||||
|
||||
from api.db import db_client
|
||||
from api.services.integrations.nango import NangoService
|
||||
|
||||
# Extract sheet ID from URL
|
||||
pattern = r"/spreadsheets/d/([a-zA-Z0-9-_]+)"
|
||||
match = re.search(pattern, sheet_url)
|
||||
if not match:
|
||||
return ValidationResult(
|
||||
is_valid=False,
|
||||
error=ValidationError(message=f"Invalid Google Sheets URL: {sheet_url}"),
|
||||
)
|
||||
|
||||
sheet_id = match.group(1)
|
||||
|
||||
# Get Google Sheets integration for the organization
|
||||
integrations = await db_client.get_integrations_by_organization_id(organization_id)
|
||||
integration = None
|
||||
for intg in integrations:
|
||||
if intg.provider == "google-sheet" and intg.is_active:
|
||||
integration = intg
|
||||
break
|
||||
|
||||
if not integration:
|
||||
return ValidationResult(
|
||||
is_valid=False,
|
||||
error=ValidationError(
|
||||
message="Google Sheets integration not found or inactive"
|
||||
),
|
||||
)
|
||||
|
||||
# Get OAuth token via Nango
|
||||
try:
|
||||
nango_service = NangoService()
|
||||
token_data = await nango_service.get_access_token(
|
||||
connection_id=integration.integration_id, provider_config_key="google-sheet"
|
||||
)
|
||||
access_token = token_data["credentials"]["access_token"]
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to get Google Sheets access token: {e}")
|
||||
return ValidationResult(
|
||||
is_valid=False,
|
||||
error=ValidationError(message="Failed to authenticate with Google Sheets"),
|
||||
)
|
||||
|
||||
# Fetch sheet data
|
||||
sheets_api_base = "https://sheets.googleapis.com/v4/spreadsheets"
|
||||
|
||||
async with httpx.AsyncClient() as client:
|
||||
try:
|
||||
# Get sheet metadata to find the first sheet name
|
||||
metadata_url = f"{sheets_api_base}/{sheet_id}"
|
||||
headers = {"Authorization": f"Bearer {access_token}"}
|
||||
response = await client.get(metadata_url, headers=headers)
|
||||
response.raise_for_status()
|
||||
metadata = response.json()
|
||||
|
||||
if not metadata.get("sheets"):
|
||||
return ValidationResult(
|
||||
is_valid=False,
|
||||
error=ValidationError(message="No sheets found in the spreadsheet"),
|
||||
)
|
||||
|
||||
sheet_name = metadata["sheets"][0]["properties"]["title"]
|
||||
|
||||
# Fetch all data from sheet
|
||||
data_url = f"{sheets_api_base}/{sheet_id}/values/{sheet_name}!A:Z"
|
||||
response = await client.get(data_url, headers=headers)
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
rows = data.get("values", [])
|
||||
|
||||
except httpx.HTTPStatusError as e:
|
||||
logger.error(f"HTTP error fetching Google Sheet: {e.response.status_code}")
|
||||
return ValidationResult(
|
||||
is_valid=False,
|
||||
error=ValidationError(
|
||||
message=f"Failed to fetch Google Sheet data: {e.response.status_code}"
|
||||
),
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Error fetching Google Sheet: {e}")
|
||||
return ValidationResult(
|
||||
is_valid=False,
|
||||
error=ValidationError(message="Failed to fetch Google Sheet data"),
|
||||
)
|
||||
|
||||
if not rows or len(rows) < 2:
|
||||
return ValidationResult(
|
||||
is_valid=False,
|
||||
error=ValidationError(
|
||||
message="Google Sheet must have a header row and at least one data row"
|
||||
),
|
||||
)
|
||||
|
||||
headers = rows[0]
|
||||
data_rows = rows[1:]
|
||||
|
||||
return _validate_source_data(headers, data_rows)
|
||||
Loading…
Add table
Add a link
Reference in a new issue