feat: added circleback connector

This commit is contained in:
DESKTOP-RTLN3BA\$punk 2025-12-30 09:00:59 -08:00
parent 23870042f3
commit c19d300c9d
27 changed files with 1153 additions and 97 deletions

View file

@ -3,17 +3,18 @@ from fastapi import APIRouter
from .airtable_add_connector_route import (
router as airtable_add_connector_router,
)
from .circleback_webhook_route import router as circleback_webhook_router
from .documents_routes import router as documents_router
from .editor_routes import router as editor_router
from .google_calendar_add_connector_route import (
router as google_calendar_add_connector_router,
)
from .google_gmail_add_connector_route import (
router as google_gmail_add_connector_router,
)
from .google_drive_add_connector_route import (
router as google_drive_add_connector_router,
)
from .google_gmail_add_connector_route import (
router as google_gmail_add_connector_router,
)
from .logs_routes import router as logs_router
from .luma_add_connector_route import router as luma_add_connector_router
from .new_chat_routes import router as new_chat_router
@ -41,3 +42,4 @@ router.include_router(airtable_add_connector_router)
router.include_router(luma_add_connector_router)
router.include_router(new_llm_config_router) # LLM configs with prompt configuration
router.include_router(logs_router)
router.include_router(circleback_webhook_router) # Circleback meeting webhooks

View file

@ -0,0 +1,317 @@
"""
Circleback Webhook Route
This module provides a webhook endpoint for receiving meeting data from Circleback.
It processes the incoming webhook payload and saves it as a document in the specified search space.
"""
import logging
from datetime import datetime
from typing import Any
from fastapi import APIRouter, HTTPException
from pydantic import BaseModel, Field
logger = logging.getLogger(__name__)
router = APIRouter()
# Pydantic models for Circleback webhook payload
class CirclebackAttendee(BaseModel):
"""Attendee model for Circleback meeting."""
name: str | None = None
email: str | None = None
class CirclebackActionItemAssignee(BaseModel):
"""Assignee model for action items."""
name: str | None = None
email: str | None = None
class CirclebackActionItem(BaseModel):
"""Action item model for Circleback meeting."""
id: int
title: str
description: str = ""
assignee: CirclebackActionItemAssignee | None = None
status: str = "PENDING"
class CirclebackTranscriptSegment(BaseModel):
"""Transcript segment model for Circleback meeting."""
speaker: str
text: str
timestamp: float
class CirclebackInsightItem(BaseModel):
"""Individual insight item."""
insight: str | dict[str, Any]
speaker: str | None = None
timestamp: float | None = None
class CirclebackWebhookPayload(BaseModel):
"""
Circleback webhook payload model.
This model represents the data sent by Circleback when a meeting is processed.
"""
model_config = {"populate_by_name": True}
id: int = Field(..., description="Circleback meeting ID")
name: str = Field(..., description="Meeting name")
created_at: str = Field(
..., alias="createdAt", description="Meeting creation date in ISO format"
)
duration: float = Field(..., description="Meeting duration in seconds")
url: str | None = Field(None, description="URL of the virtual meeting")
recording_url: str | None = Field(
None,
alias="recordingUrl",
description="URL of the meeting recording (valid for 24 hours)",
)
tags: list[str] = Field(default_factory=list, description="Meeting tags")
ical_uid: str | None = Field(
None, alias="icalUid", description="Unique identifier of the calendar event"
)
attendees: list[CirclebackAttendee] = Field(
default_factory=list, description="Meeting attendees"
)
notes: str = Field("", description="Meeting notes in Markdown format")
action_items: list[CirclebackActionItem] = Field(
default_factory=list,
alias="actionItems",
description="Action items from the meeting",
)
transcript: list[CirclebackTranscriptSegment] = Field(
default_factory=list, description="Meeting transcript segments"
)
insights: dict[str, list[CirclebackInsightItem]] = Field(
default_factory=dict, description="Custom insights from the meeting"
)
def format_circleback_meeting_to_markdown(payload: CirclebackWebhookPayload) -> str:
"""
Convert Circleback webhook payload to a well-formatted Markdown document.
Args:
payload: The Circleback webhook payload
Returns:
Markdown string representation of the meeting
"""
lines = []
# Title
lines.append(f"# {payload.name}")
lines.append("")
# Meeting metadata
lines.append("## Meeting Details")
lines.append("")
# Parse and format date
try:
created_dt = datetime.fromisoformat(payload.created_at.replace("Z", "+00:00"))
formatted_date = created_dt.strftime("%Y-%m-%d %H:%M:%S UTC")
except (ValueError, AttributeError):
formatted_date = payload.created_at
lines.append(f"- **Date:** {formatted_date}")
lines.append(f"- **Duration:** {int(payload.duration // 60)} minutes")
if payload.url:
lines.append(f"- **Meeting URL:** {payload.url}")
if payload.tags:
lines.append(f"- **Tags:** {', '.join(payload.tags)}")
lines.append(
f"- **Circleback Link:** [View on Circleback](https://app.circleback.ai/meetings/{payload.id})"
)
lines.append("")
# Attendees
if payload.attendees:
lines.append("## Attendees")
lines.append("")
for attendee in payload.attendees:
name = attendee.name or "Unknown"
if attendee.email:
lines.append(f"- **{name}** ({attendee.email})")
else:
lines.append(f"- **{name}**")
lines.append("")
# Notes (if provided)
if payload.notes:
lines.append("## Meeting Notes")
lines.append("")
lines.append(payload.notes)
lines.append("")
# Action Items
if payload.action_items:
lines.append("## Action Items")
lines.append("")
for item in payload.action_items:
status_emoji = "" if item.status == "DONE" else ""
assignee_text = ""
if item.assignee and item.assignee.name:
assignee_text = f" (Assigned to: {item.assignee.name})"
lines.append(f"{status_emoji} **{item.title}**{assignee_text}")
if item.description:
lines.append(f" {item.description}")
lines.append("")
# Insights
if payload.insights:
lines.append("## Insights")
lines.append("")
for insight_name, insight_items in payload.insights.items():
lines.append(f"### {insight_name}")
lines.append("")
for insight_item in insight_items:
if isinstance(insight_item.insight, dict):
for key, value in insight_item.insight.items():
lines.append(f"- **{key}:** {value}")
else:
speaker_info = (
f" _{insight_item.speaker}_" if insight_item.speaker else ""
)
lines.append(f"- {insight_item.insight}{speaker_info}")
lines.append("")
# Transcript
if payload.transcript:
lines.append("## Transcript")
lines.append("")
for segment in payload.transcript:
# Format timestamp as MM:SS
minutes = int(segment.timestamp // 60)
seconds = int(segment.timestamp % 60)
timestamp_str = f"[{minutes:02d}:{seconds:02d}]"
lines.append(f"**{segment.speaker}** {timestamp_str}: {segment.text}")
lines.append("")
return "\n".join(lines)
@router.post("/webhooks/circleback/{search_space_id}")
async def receive_circleback_webhook(
search_space_id: int,
payload: CirclebackWebhookPayload,
):
"""
Receive and process a Circleback webhook.
This endpoint receives meeting data from Circleback and saves it as a document
in the specified search space. The meeting data is converted to Markdown format
and processed asynchronously.
Args:
search_space_id: The ID of the search space to save the document to
payload: The Circleback webhook payload containing meeting data
Returns:
Success message with document details
Note:
This endpoint does not require authentication as it's designed to receive
webhooks from Circleback. Signature verification can be added later for security.
"""
try:
logger.info(
f"Received Circleback webhook for meeting {payload.id} in search space {search_space_id}"
)
# Convert to markdown
markdown_content = format_circleback_meeting_to_markdown(payload)
# Trigger async document processing
from app.tasks.celery_tasks.document_tasks import (
process_circleback_meeting_task,
)
# Prepare meeting metadata for the task
meeting_metadata = {
"circleback_meeting_id": payload.id,
"meeting_name": payload.name,
"meeting_date": payload.created_at,
"duration_seconds": payload.duration,
"meeting_url": payload.url,
"tags": payload.tags,
"attendees_count": len(payload.attendees),
"action_items_count": len(payload.action_items),
"has_transcript": len(payload.transcript) > 0,
}
# Queue the processing task
process_circleback_meeting_task.delay(
meeting_id=payload.id,
meeting_name=payload.name,
markdown_content=markdown_content,
metadata=meeting_metadata,
search_space_id=search_space_id,
)
logger.info(
f"Queued Circleback meeting {payload.id} for processing in search space {search_space_id}"
)
return {
"status": "accepted",
"message": f"Meeting '{payload.name}' queued for processing",
"meeting_id": payload.id,
"search_space_id": search_space_id,
}
except Exception as e:
logger.error(f"Error processing Circleback webhook: {e!s}", exc_info=True)
raise HTTPException(
status_code=500,
detail=f"Failed to process Circleback webhook: {e!s}",
) from e
@router.get("/webhooks/circleback/{search_space_id}/info")
async def get_circleback_webhook_info(
search_space_id: int,
):
"""
Get information about the Circleback webhook endpoint.
This endpoint provides information about how to configure the Circleback
webhook integration.
Args:
search_space_id: The ID of the search space
Returns:
Webhook configuration information
"""
from app.config import config
# Construct the webhook URL
base_url = getattr(config, "API_BASE_URL", "http://localhost:8000")
webhook_url = f"{base_url}/api/v1/webhooks/circleback/{search_space_id}"
return {
"webhook_url": webhook_url,
"search_space_id": search_space_id,
"method": "POST",
"content_type": "application/json",
"description": "Use this URL in your Circleback automation to send meeting data to SurfSense",
"note": "Configure this URL in Circleback Settings → Automations → Create automation → Send webhook request",
}

View file

@ -28,10 +28,8 @@ from app.config import config
from app.connectors.google_drive import (
GoogleDriveClient,
get_start_page_token,
get_valid_credentials,
list_folder_contents,
)
from app.connectors.google_drive.folder_manager import list_folders
from app.db import (
SearchSourceConnector,
SearchSourceConnectorType,
@ -111,7 +109,9 @@ async def connect_drive(space_id: int, user: User = Depends(current_active_user)
state=state_encoded,
)
logger.info(f"Initiating Google Drive OAuth for user {user.id}, space {space_id}")
logger.info(
f"Initiating Google Drive OAuth for user {user.id}, space {space_id}"
)
return {"auth_url": auth_url}
except Exception as e:
@ -146,7 +146,9 @@ async def drive_callback(
user_id = UUID(data["user_id"])
space_id = data["space_id"]
logger.info(f"Processing Google Drive callback for user {user_id}, space {space_id}")
logger.info(
f"Processing Google Drive callback for user {user_id}, space {space_id}"
)
# Exchange authorization code for tokens
flow = get_google_flow()
@ -200,7 +202,9 @@ async def drive_callback(
flag_modified(db_connector, "config")
await session.commit()
logger.info(f"Set initial start page token for connector {db_connector.id}")
logger.info(
f"Set initial start page token for connector {db_connector.id}"
)
except Exception as e:
logger.warning(f"Failed to get initial start page token: {e!s}")
@ -246,7 +250,7 @@ async def list_google_drive_folders(
):
"""
List folders AND files in user's Google Drive with hierarchical support.
This is called at index time from the manage connector page to display
the complete file system (folders and files). Only folders are selectable.
@ -299,7 +303,7 @@ async def list_google_drive_folders(
f"✅ Listed {len(items)} total items ({folder_count} folders, {file_count} files) for connector {connector_id}"
+ (f" in folder {parent_id}" if parent_id else " in ROOT")
)
# Log first few items for debugging
if items:
logger.info(f"First 3 items: {[item.get('name') for item in items[:3]]}")

View file

@ -45,7 +45,6 @@ from app.tasks.connector_indexers import (
index_github_repos,
index_google_calendar_events,
index_google_gmail_messages,
index_google_drive_files,
index_jira_issues,
index_linear_issues,
index_luma_events,
@ -1572,7 +1571,9 @@ async def run_google_drive_indexing(
errors = []
# Index each folder
for folder_id, folder_name in zip(folder_id_list, folder_name_list):
for folder_id, folder_name in zip(
folder_id_list, folder_name_list, strict=False
):
try:
indexed_count, error_message = await index_google_drive_files(
session,
@ -1589,7 +1590,7 @@ async def run_google_drive_indexing(
else:
total_indexed += indexed_count
except Exception as e:
errors.append(f"{folder_name}: {str(e)}")
errors.append(f"{folder_name}: {e!s}")
logger.error(
f"Error indexing folder {folder_name} ({folder_id}): {e}",
exc_info=True,