feat: add csv upload functionality for OSS (#29)

feat: add csv upload functionality
chore: remove redundant arq-worker from docker-compose
This commit is contained in:
Abhishek 2025-10-09 17:54:31 +05:30 committed by GitHub
parent 2633ff0a2a
commit 3babb5ced6
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
26 changed files with 941 additions and 234 deletions

View file

@ -9,6 +9,7 @@ from api.db.models import UserModel
from api.enums import OrganizationConfigurationKey
from api.services.auth.depends import get_user
from api.services.campaign.runner import campaign_runner_service
from api.services.storage import storage_fs
router = APIRouter(prefix="/campaign")
@ -16,7 +17,8 @@ router = APIRouter(prefix="/campaign")
class CreateCampaignRequest(BaseModel):
name: str = Field(..., min_length=1, max_length=255)
workflow_id: int
source_id: str # Sheet URL
source_type: str = Field(..., pattern="^(google-sheet|csv)$")
source_id: str # Google Sheet URL or CSV file key
class CampaignResponse(BaseModel):
@ -74,7 +76,7 @@ async def create_campaign(
campaign = await db_client.create_campaign(
name=request.name,
workflow_id=request.workflow_id,
source_type="google-sheet",
source_type=request.source_type,
source_id=request.source_id,
user_id=user.id,
organization_id=user.selected_organization_id,
@ -174,14 +176,10 @@ async def start_campaign(
OrganizationConfigurationKey.TWILIO_CONFIGURATION.value,
)
if (
not twilio_config
or not twilio_config.value
or not twilio_config.value.get("value")
):
if not twilio_config or not twilio_config.value:
raise HTTPException(
status_code=401,
detail="Your organisation is not allowed to make phone call. Contact founders@dograh.com for further support.",
detail="You must configure telephony first by going to APP_URL/configure-telephony",
)
# Verify campaign exists and belongs to organization
@ -286,14 +284,10 @@ async def resume_campaign(
OrganizationConfigurationKey.TWILIO_CONFIGURATION.value,
)
if (
not twilio_config
or not twilio_config.value
or not twilio_config.value.get("value")
):
if not twilio_config or not twilio_config.value:
raise HTTPException(
status_code=401,
detail="Your organisation is not allowed to make phone call. Contact founders@dograh.com for further support.",
detail="You must configure telephony first by going to APP_URL/configure-telephony",
)
# Verify campaign exists and belongs to organization
@ -345,3 +339,59 @@ async def get_campaign_progress(
return CampaignProgressResponse(**progress)
except ValueError as e:
raise HTTPException(status_code=400, detail=str(e))
class CampaignSourceDownloadResponse(BaseModel):
download_url: str
expires_in: int
@router.get("/{campaign_id}/source-download-url")
async def get_campaign_source_download_url(
campaign_id: int,
user: UserModel = Depends(get_user),
) -> CampaignSourceDownloadResponse:
"""Get presigned download URL for campaign CSV source file
Only works for CSV source type. For Google Sheets, use the source_id directly.
Validates that the campaign belongs to the user's organization for security.
"""
# Verify campaign exists and belongs to organization
campaign = await db_client.get_campaign(campaign_id, user.selected_organization_id)
if not campaign:
raise HTTPException(status_code=404, detail="Campaign not found")
# Only generate download URL for CSV files
if campaign.source_type != "csv":
raise HTTPException(
status_code=400,
detail=f"Download URL only available for CSV sources. This campaign uses {campaign.source_type}",
)
# Verify the file key belongs to the user's organization
# File key format: campaigns/{org_id}/{uuid}_{filename}.csv
if not campaign.source_id.startswith(f"campaigns/{user.selected_organization_id}/"):
raise HTTPException(
status_code=403,
detail="Access denied: Source file does not belong to your organization",
)
# Generate presigned download URL
try:
download_url = await storage_fs.aget_signed_url(
campaign.source_id,
expiration=3600, # 1 hour
)
if not download_url:
raise HTTPException(
status_code=500, detail="Failed to generate download URL"
)
return CampaignSourceDownloadResponse(
download_url=download_url, expires_in=3600
)
except Exception as e:
raise HTTPException(
status_code=500, detail=f"Failed to generate download URL: {str(e)}"
)

View file

@ -1,8 +1,11 @@
import re
import uuid
from typing import Annotated, Any, Dict, Optional, TypedDict
from botocore.exceptions import ClientError
from fastapi import APIRouter, Depends, HTTPException, Query
from loguru import logger
from pydantic import BaseModel, Field
from api.db import db_client
from api.enums import StorageBackend
@ -20,6 +23,20 @@ class FileMetadataResponse(TypedDict):
metadata: Optional[Dict[str, Any]]
class PresignedUploadUrlRequest(BaseModel):
file_name: str = Field(..., pattern=r".*\.csv$", description="CSV filename")
file_size: int = Field(
..., gt=0, le=10_485_760, description="File size in bytes (max 10MB)"
)
content_type: str = Field(default="text/csv", description="File content type")
class PresignedUploadUrlResponse(BaseModel):
upload_url: str
file_key: str
expires_in: int
router = APIRouter(prefix="/s3", tags=["s3"])
@ -217,3 +234,65 @@ async def get_file_metadata(
except Exception as exc:
logger.error(f"Error getting file metadata: {exc}")
raise HTTPException(status_code=500, detail="Failed to get file metadata")
@router.post(
"/presigned-upload-url",
response_model=PresignedUploadUrlResponse,
summary="Generate a presigned URL for direct CSV upload",
)
async def get_presigned_upload_url(
request: PresignedUploadUrlRequest,
user=Depends(get_user),
):
"""Generate a presigned PUT URL for direct CSV file upload to S3/MinIO.
This endpoint enables browser-to-storage uploads without passing through the backend
Access Control:
* All authenticated users can upload CSV files scoped to their organization.
* Files are stored with organization-scoped keys for multi-tenancy.
Returns:
* upload_url: Presigned URL (valid for 15 minutes) for PUT request
* file_key: Unique storage key to use as source_id in campaign creation
* expires_in: URL expiration time in seconds
"""
# Sanitize filename - remove special chars, keep only alphanumeric, dash, underscore, and dot
sanitized_name = re.sub(r"[^a-zA-Z0-9._-]", "_", request.file_name)
# Generate unique file key: campaigns/{org_id}/{uuid}_{filename}.csv
file_key = (
f"campaigns/{user.selected_organization_id}/{uuid.uuid4()}_{sanitized_name}"
)
try:
# Generate presigned PUT URL using current storage backend
upload_url = await storage_fs.aget_presigned_put_url(
file_path=file_key,
expiration=900, # 15 minutes
content_type=request.content_type,
max_size=request.file_size,
)
if not upload_url:
raise HTTPException(
status_code=500, detail="Failed to generate presigned upload URL"
)
logger.info(
f"Generated presigned upload URL for user {user.id}, org {user.selected_organization_id}, file_key: {file_key}"
)
return PresignedUploadUrlResponse(
upload_url=upload_url,
file_key=file_key,
expires_in=900,
)
except Exception as exc:
logger.error(f"Error generating presigned upload URL: {exc}")
raise HTTPException(
status_code=500, detail="Failed to generate presigned upload URL"
)