feat: added circleback connector

2026-04-28 18:36:23 +02:00 · 2025-12-30 09:00:59 -08:00 · 2025-12-30 09:00:59 -08:00 · c19d300c9d
commit c19d300c9d
parent 23870042f3
27 changed files with 1153 additions and 97 deletions
--- a/surfsense_backend/app/routes/init.py
+++ b/surfsense_backend/app/routes/init.py
@ -3,17 +3,18 @@ from fastapi import APIRouter
 from .airtable_add_connector_route import (
    router as airtable_add_connector_router,
 )
+from .circleback_webhook_route import router as circleback_webhook_router
 from .documents_routes import router as documents_router
 from .editor_routes import router as editor_router
 from .google_calendar_add_connector_route import (
    router as google_calendar_add_connector_router,
 )
-from .google_gmail_add_connector_route import (
-    router as google_gmail_add_connector_router,
-)
 from .google_drive_add_connector_route import (
    router as google_drive_add_connector_router,
 )
+from .google_gmail_add_connector_route import (
+    router as google_gmail_add_connector_router,
+)
 from .logs_routes import router as logs_router
 from .luma_add_connector_route import router as luma_add_connector_router
 from .new_chat_routes import router as new_chat_router
@ -41,3 +42,4 @@ router.include_router(airtable_add_connector_router)
 router.include_router(luma_add_connector_router)
 router.include_router(new_llm_config_router)  # LLM configs with prompt configuration
 router.include_router(logs_router)
+router.include_router(circleback_webhook_router)  # Circleback meeting webhooks
--- a/surfsense_backend/app/routes/circleback_webhook_route.py
+++ b/surfsense_backend/app/routes/circleback_webhook_route.py
@ -0,0 +1,317 @@
+"""
+Circleback Webhook Route
+
+This module provides a webhook endpoint for receiving meeting data from Circleback.
+It processes the incoming webhook payload and saves it as a document in the specified search space.
+"""
+
+import logging
+from datetime import datetime
+from typing import Any
+
+from fastapi import APIRouter, HTTPException
+from pydantic import BaseModel, Field
+
+logger = logging.getLogger(__name__)
+
+router = APIRouter()
+
+
+# Pydantic models for Circleback webhook payload
+class CirclebackAttendee(BaseModel):
+    """Attendee model for Circleback meeting."""
+
+    name: str | None = None
+    email: str | None = None
+
+
+class CirclebackActionItemAssignee(BaseModel):
+    """Assignee model for action items."""
+
+    name: str | None = None
+    email: str | None = None
+
+
+class CirclebackActionItem(BaseModel):
+    """Action item model for Circleback meeting."""
+
+    id: int
+    title: str
+    description: str = ""
+    assignee: CirclebackActionItemAssignee | None = None
+    status: str = "PENDING"
+
+
+class CirclebackTranscriptSegment(BaseModel):
+    """Transcript segment model for Circleback meeting."""
+
+    speaker: str
+    text: str
+    timestamp: float
+
+
+class CirclebackInsightItem(BaseModel):
+    """Individual insight item."""
+
+    insight: str | dict[str, Any]
+    speaker: str | None = None
+    timestamp: float | None = None
+
+
+class CirclebackWebhookPayload(BaseModel):
+    """
+    Circleback webhook payload model.
+
+    This model represents the data sent by Circleback when a meeting is processed.
+    """
+
+    model_config = {"populate_by_name": True}
+
+    id: int = Field(..., description="Circleback meeting ID")
+    name: str = Field(..., description="Meeting name")
+    created_at: str = Field(
+        ..., alias="createdAt", description="Meeting creation date in ISO format"
+    )
+    duration: float = Field(..., description="Meeting duration in seconds")
+    url: str | None = Field(None, description="URL of the virtual meeting")
+    recording_url: str | None = Field(
+        None,
+        alias="recordingUrl",
+        description="URL of the meeting recording (valid for 24 hours)",
+    )
+    tags: list[str] = Field(default_factory=list, description="Meeting tags")
+    ical_uid: str | None = Field(
+        None, alias="icalUid", description="Unique identifier of the calendar event"
+    )
+    attendees: list[CirclebackAttendee] = Field(
+        default_factory=list, description="Meeting attendees"
+    )
+    notes: str = Field("", description="Meeting notes in Markdown format")
+    action_items: list[CirclebackActionItem] = Field(
+        default_factory=list,
+        alias="actionItems",
+        description="Action items from the meeting",
+    )
+    transcript: list[CirclebackTranscriptSegment] = Field(
+        default_factory=list, description="Meeting transcript segments"
+    )
+    insights: dict[str, list[CirclebackInsightItem]] = Field(
+        default_factory=dict, description="Custom insights from the meeting"
+    )
+
+
+def format_circleback_meeting_to_markdown(payload: CirclebackWebhookPayload) -> str:
+    """
+    Convert Circleback webhook payload to a well-formatted Markdown document.
+
+    Args:
+        payload: The Circleback webhook payload
+
+    Returns:
+        Markdown string representation of the meeting
+    """
+    lines = []
+
+    # Title
+    lines.append(f"# {payload.name}")
+    lines.append("")
+
+    # Meeting metadata
+    lines.append("## Meeting Details")
+    lines.append("")
+
+    # Parse and format date
+    try:
+        created_dt = datetime.fromisoformat(payload.created_at.replace("Z", "+00:00"))
+        formatted_date = created_dt.strftime("%Y-%m-%d %H:%M:%S UTC")
+    except (ValueError, AttributeError):
+        formatted_date = payload.created_at
+
+    lines.append(f"- **Date:** {formatted_date}")
+    lines.append(f"- **Duration:** {int(payload.duration // 60)} minutes")
+
+    if payload.url:
+        lines.append(f"- **Meeting URL:** {payload.url}")
+
+    if payload.tags:
+        lines.append(f"- **Tags:** {', '.join(payload.tags)}")
+
+    lines.append(
+        f"- **Circleback Link:** [View on Circleback](https://app.circleback.ai/meetings/{payload.id})"
+    )
+    lines.append("")
+
+    # Attendees
+    if payload.attendees:
+        lines.append("## Attendees")
+        lines.append("")
+        for attendee in payload.attendees:
+            name = attendee.name or "Unknown"
+            if attendee.email:
+                lines.append(f"- **{name}** ({attendee.email})")
+            else:
+                lines.append(f"- **{name}**")
+        lines.append("")
+
+    # Notes (if provided)
+    if payload.notes:
+        lines.append("## Meeting Notes")
+        lines.append("")
+        lines.append(payload.notes)
+        lines.append("")
+
+    # Action Items
+    if payload.action_items:
+        lines.append("## Action Items")
+        lines.append("")
+        for item in payload.action_items:
+            status_emoji = "✅" if item.status == "DONE" else "⬜"
+            assignee_text = ""
+            if item.assignee and item.assignee.name:
+                assignee_text = f" (Assigned to: {item.assignee.name})"
+
+            lines.append(f"{status_emoji} **{item.title}**{assignee_text}")
+            if item.description:
+                lines.append(f"   {item.description}")
+            lines.append("")
+
+    # Insights
+    if payload.insights:
+        lines.append("## Insights")
+        lines.append("")
+        for insight_name, insight_items in payload.insights.items():
+            lines.append(f"### {insight_name}")
+            lines.append("")
+            for insight_item in insight_items:
+                if isinstance(insight_item.insight, dict):
+                    for key, value in insight_item.insight.items():
+                        lines.append(f"- **{key}:** {value}")
+                else:
+                    speaker_info = (
+                        f" _{insight_item.speaker}_" if insight_item.speaker else ""
+                    )
+                    lines.append(f"- {insight_item.insight}{speaker_info}")
+            lines.append("")
+
+    # Transcript
+    if payload.transcript:
+        lines.append("## Transcript")
+        lines.append("")
+        for segment in payload.transcript:
+            # Format timestamp as MM:SS
+            minutes = int(segment.timestamp // 60)
+            seconds = int(segment.timestamp % 60)
+            timestamp_str = f"[{minutes:02d}:{seconds:02d}]"
+            lines.append(f"**{segment.speaker}** {timestamp_str}: {segment.text}")
+            lines.append("")
+
+    return "\n".join(lines)
+
+
+@router.post("/webhooks/circleback/{search_space_id}")
+async def receive_circleback_webhook(
+    search_space_id: int,
+    payload: CirclebackWebhookPayload,
+):
+    """
+    Receive and process a Circleback webhook.
+
+    This endpoint receives meeting data from Circleback and saves it as a document
+    in the specified search space. The meeting data is converted to Markdown format
+    and processed asynchronously.
+
+    Args:
+        search_space_id: The ID of the search space to save the document to
+        payload: The Circleback webhook payload containing meeting data
+
+    Returns:
+        Success message with document details
+
+    Note:
+        This endpoint does not require authentication as it's designed to receive
+        webhooks from Circleback. Signature verification can be added later for security.
+    """
+    try:
+        logger.info(
+            f"Received Circleback webhook for meeting {payload.id} in search space {search_space_id}"
+        )
+
+        # Convert to markdown
+        markdown_content = format_circleback_meeting_to_markdown(payload)
+
+        # Trigger async document processing
+        from app.tasks.celery_tasks.document_tasks import (
+            process_circleback_meeting_task,
+        )
+
+        # Prepare meeting metadata for the task
+        meeting_metadata = {
+            "circleback_meeting_id": payload.id,
+            "meeting_name": payload.name,
+            "meeting_date": payload.created_at,
+            "duration_seconds": payload.duration,
+            "meeting_url": payload.url,
+            "tags": payload.tags,
+            "attendees_count": len(payload.attendees),
+            "action_items_count": len(payload.action_items),
+            "has_transcript": len(payload.transcript) > 0,
+        }
+
+        # Queue the processing task
+        process_circleback_meeting_task.delay(
+            meeting_id=payload.id,
+            meeting_name=payload.name,
+            markdown_content=markdown_content,
+            metadata=meeting_metadata,
+            search_space_id=search_space_id,
+        )
+
+        logger.info(
+            f"Queued Circleback meeting {payload.id} for processing in search space {search_space_id}"
+        )
+
+        return {
+            "status": "accepted",
+            "message": f"Meeting '{payload.name}' queued for processing",
+            "meeting_id": payload.id,
+            "search_space_id": search_space_id,
+        }
+
+    except Exception as e:
+        logger.error(f"Error processing Circleback webhook: {e!s}", exc_info=True)
+        raise HTTPException(
+            status_code=500,
+            detail=f"Failed to process Circleback webhook: {e!s}",
+        ) from e
+
+
+@router.get("/webhooks/circleback/{search_space_id}/info")
+async def get_circleback_webhook_info(
+    search_space_id: int,
+):
+    """
+    Get information about the Circleback webhook endpoint.
+
+    This endpoint provides information about how to configure the Circleback
+    webhook integration.
+
+    Args:
+        search_space_id: The ID of the search space
+
+    Returns:
+        Webhook configuration information
+    """
+    from app.config import config
+
+    # Construct the webhook URL
+    base_url = getattr(config, "API_BASE_URL", "http://localhost:8000")
+    webhook_url = f"{base_url}/api/v1/webhooks/circleback/{search_space_id}"
+
+    return {
+        "webhook_url": webhook_url,
+        "search_space_id": search_space_id,
+        "method": "POST",
+        "content_type": "application/json",
+        "description": "Use this URL in your Circleback automation to send meeting data to SurfSense",
+        "note": "Configure this URL in Circleback Settings → Automations → Create automation → Send webhook request",
+    }
--- a/surfsense_backend/app/routes/google_drive_add_connector_route.py
+++ b/surfsense_backend/app/routes/google_drive_add_connector_route.py
@ -28,10 +28,8 @@ from app.config import config
 from app.connectors.google_drive import (
    GoogleDriveClient,
    get_start_page_token,
-    get_valid_credentials,
    list_folder_contents,
 )
-from app.connectors.google_drive.folder_manager import list_folders
 from app.db import (
    SearchSourceConnector,
    SearchSourceConnectorType,
@ -111,7 +109,9 @@ async def connect_drive(space_id: int, user: User = Depends(current_active_user)
            state=state_encoded,
        )

-        logger.info(f"Initiating Google Drive OAuth for user {user.id}, space {space_id}")
+        logger.info(
+            f"Initiating Google Drive OAuth for user {user.id}, space {space_id}"
+        )
        return {"auth_url": auth_url}

    except Exception as e:
@ -146,7 +146,9 @@ async def drive_callback(
        user_id = UUID(data["user_id"])
        space_id = data["space_id"]

-        logger.info(f"Processing Google Drive callback for user {user_id}, space {space_id}")
+        logger.info(
+            f"Processing Google Drive callback for user {user_id}, space {space_id}"
+        )

        # Exchange authorization code for tokens
        flow = get_google_flow()
@ -200,7 +202,9 @@ async def drive_callback(

                flag_modified(db_connector, "config")
                await session.commit()
-                logger.info(f"Set initial start page token for connector {db_connector.id}")
+                logger.info(
+                    f"Set initial start page token for connector {db_connector.id}"
+                )
        except Exception as e:
            logger.warning(f"Failed to get initial start page token: {e!s}")

@ -246,7 +250,7 @@ async def list_google_drive_folders(
 ):
    """
    List folders AND files in user's Google Drive with hierarchical support.
-    
+
    This is called at index time from the manage connector page to display
    the complete file system (folders and files). Only folders are selectable.

@ -299,7 +303,7 @@ async def list_google_drive_folders(
            f"✅ Listed {len(items)} total items ({folder_count} folders, {file_count} files) for connector {connector_id}"
            + (f" in folder {parent_id}" if parent_id else " in ROOT")
        )
-        
+
        # Log first few items for debugging
        if items:
            logger.info(f"First 3 items: {[item.get('name') for item in items[:3]]}")
--- a/surfsense_backend/app/routes/search_source_connectors_routes.py
+++ b/surfsense_backend/app/routes/search_source_connectors_routes.py
@ -45,7 +45,6 @@ from app.tasks.connector_indexers import (
    index_github_repos,
    index_google_calendar_events,
    index_google_gmail_messages,
-    index_google_drive_files,
    index_jira_issues,
    index_linear_issues,
    index_luma_events,
@ -1572,7 +1571,9 @@ async def run_google_drive_indexing(
        errors = []

        # Index each folder
-        for folder_id, folder_name in zip(folder_id_list, folder_name_list):
+        for folder_id, folder_name in zip(
+            folder_id_list, folder_name_list, strict=False
+        ):
            try:
                indexed_count, error_message = await index_google_drive_files(
                    session,
@ -1589,7 +1590,7 @@ async def run_google_drive_indexing(
                else:
                    total_indexed += indexed_count
            except Exception as e:
-                errors.append(f"{folder_name}: {str(e)}")
+                errors.append(f"{folder_name}: {e!s}")
                logger.error(
                    f"Error indexing folder {folder_name} ({folder_id}): {e}",
                    exc_info=True,