Merge pull request #765 from AnishSarkar22/fix/documents

feat: Add document ownership & deletion of documents
This commit is contained in:
Rohan Verma 2026-02-02 14:50:18 -08:00 committed by GitHub
commit d0673cecf6
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
41 changed files with 832 additions and 16 deletions

View file

@ -9,8 +9,12 @@ import logging
from datetime import datetime
from typing import Any
from fastapi import APIRouter, HTTPException
from fastapi import APIRouter, Depends, HTTPException
from pydantic import BaseModel, Field
from sqlalchemy import select
from sqlalchemy.ext.asyncio import AsyncSession
from app.db import SearchSourceConnector, SearchSourceConnectorType, get_async_session
logger = logging.getLogger(__name__)
@ -212,6 +216,7 @@ def format_circleback_meeting_to_markdown(payload: CirclebackWebhookPayload) ->
async def receive_circleback_webhook(
search_space_id: int,
payload: CirclebackWebhookPayload,
session: AsyncSession = Depends(get_async_session),
):
"""
Receive and process a Circleback webhook.
@ -223,6 +228,7 @@ async def receive_circleback_webhook(
Args:
search_space_id: The ID of the search space to save the document to
payload: The Circleback webhook payload containing meeting data
session: Database session for looking up the connector
Returns:
Success message with document details
@ -236,6 +242,26 @@ async def receive_circleback_webhook(
f"Received Circleback webhook for meeting {payload.id} in search space {search_space_id}"
)
# Look up the Circleback connector for this search space
connector_result = await session.execute(
select(SearchSourceConnector.id).where(
SearchSourceConnector.search_space_id == search_space_id,
SearchSourceConnector.connector_type
== SearchSourceConnectorType.CIRCLEBACK_CONNECTOR,
)
)
connector_id = connector_result.scalar_one_or_none()
if connector_id:
logger.info(
f"Found Circleback connector {connector_id} for search space {search_space_id}"
)
else:
logger.warning(
f"No Circleback connector found for search space {search_space_id}. "
"Document will be created without connector_id."
)
# Convert to markdown
markdown_content = format_circleback_meeting_to_markdown(payload)
@ -264,6 +290,7 @@ async def receive_circleback_webhook(
markdown_content=markdown_content,
metadata=meeting_metadata,
search_space_id=search_space_id,
connector_id=connector_id,
)
logger.info(

View file

@ -20,6 +20,7 @@ from pydantic import ValidationError
from sqlalchemy.exc import IntegrityError
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy.future import select
from sqlalchemy.orm.attributes import flag_modified
from app.config import config
from app.db import (
@ -330,10 +331,19 @@ async def composio_callback(
)
# Update existing connector with new connected_account_id
# IMPORTANT: Merge new credentials with existing config to preserve
# user settings like selected_folders, selected_files, indexing_options,
# drive_page_token, etc. that would otherwise be wiped on reconnection.
logger.info(
f"Updating existing Composio connector {existing_connector.id} with new connected_account_id {final_connected_account_id}"
)
existing_connector.config = connector_config
existing_config = (
existing_connector.config.copy() if existing_connector.config else {}
)
existing_config.update(connector_config)
existing_connector.config = existing_config
flag_modified(existing_connector, "config")
await session.commit()
await session.refresh(existing_connector)

View file

@ -76,6 +76,7 @@ async def create_note(
document_metadata={"NOTE": True},
embedding=None, # Will be generated on first reindex
updated_at=datetime.now(UTC),
created_by_id=user.id, # Track who created this note
)
session.add(document)
@ -93,6 +94,7 @@ async def create_note(
search_space_id=document.search_space_id,
created_at=document.created_at,
updated_at=document.updated_at,
created_by_id=document.created_by_id,
)

View file

@ -527,9 +527,17 @@ async def delete_search_source_connector(
user: User = Depends(current_active_user),
):
"""
Delete a search source connector.
Delete a search source connector and all its associated documents.
The deletion runs in background via Celery task. User is notified
via the notification system when complete (no polling required).
Requires CONNECTORS_DELETE permission.
"""
from app.tasks.celery_tasks.connector_deletion_task import (
delete_connector_with_documents_task,
)
try:
# Get the connector first
result = await session.execute(
@ -551,7 +559,12 @@ async def delete_search_source_connector(
"You don't have permission to delete this connector",
)
# Delete any periodic schedule associated with this connector
# Store connector info before we queue the deletion task
connector_name = db_connector.name
connector_type = db_connector.connector_type.value
search_space_id = db_connector.search_space_id
# Delete any periodic schedule associated with this connector (lightweight, sync)
if db_connector.periodic_indexing_enabled:
success = delete_periodic_schedule(connector_id)
if not success:
@ -559,7 +572,7 @@ async def delete_search_source_connector(
f"Failed to delete periodic schedule for connector {connector_id}"
)
# For Composio connectors, also delete the connected account in Composio
# For Composio connectors, delete the connected account in Composio (lightweight API call, sync)
composio_connector_types = [
SearchSourceConnectorType.COMPOSIO_GOOGLE_DRIVE_CONNECTOR,
SearchSourceConnectorType.COMPOSIO_GMAIL_CONNECTOR,
@ -591,16 +604,33 @@ async def delete_search_source_connector(
f"Error deleting Composio connected account {composio_connected_account_id}: {composio_error!s}"
)
await session.delete(db_connector)
await session.commit()
return {"message": "Search source connector deleted successfully"}
# Queue background task to delete documents and connector
# This handles potentially large document counts without blocking the API
delete_connector_with_documents_task.delay(
connector_id=connector_id,
user_id=str(user.id),
search_space_id=search_space_id,
connector_name=connector_name,
connector_type=connector_type,
)
logger.info(
f"Queued deletion task for connector {connector_id} ({connector_name})"
)
return {
"message": "Connector deletion started. You will be notified when complete.",
"status": "queued",
"connector_id": connector_id,
"connector_name": connector_name,
}
except HTTPException:
raise
except Exception as e:
await session.rollback()
raise HTTPException(
status_code=500,
detail=f"Failed to delete search source connector: {e!s}",
detail=f"Failed to start connector deletion: {e!s}",
) from e