mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-04-28 10:26:33 +02:00
Merge pull request #765 from AnishSarkar22/fix/documents
feat: Add document ownership & deletion of documents
This commit is contained in:
commit
d0673cecf6
41 changed files with 832 additions and 16 deletions
|
|
@ -0,0 +1,269 @@
|
|||
"""Celery task for background connector deletion.
|
||||
|
||||
This task handles the deletion of all documents associated with a connector
|
||||
in the background, then deletes the connector itself. User is notified via
|
||||
the notification system when complete (no polling required).
|
||||
|
||||
Features:
|
||||
- Batch deletion to handle large document counts
|
||||
- Automatic retry on failure
|
||||
- Progress tracking via notifications
|
||||
- Handles both success and failure notifications
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
from uuid import UUID
|
||||
|
||||
from sqlalchemy import delete, func, select
|
||||
from sqlalchemy.ext.asyncio import async_sessionmaker, create_async_engine
|
||||
from sqlalchemy.pool import NullPool
|
||||
|
||||
from app.celery_app import celery_app
|
||||
from app.config import config
|
||||
from app.db import Document, Notification, SearchSourceConnector
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Batch size for document deletion
|
||||
DELETION_BATCH_SIZE = 500
|
||||
|
||||
|
||||
def _get_celery_session_maker():
|
||||
"""Create async session maker for Celery tasks."""
|
||||
engine = create_async_engine(
|
||||
config.DATABASE_URL,
|
||||
poolclass=NullPool,
|
||||
echo=False,
|
||||
)
|
||||
return async_sessionmaker(engine, expire_on_commit=False), engine
|
||||
|
||||
|
||||
@celery_app.task(
|
||||
bind=True,
|
||||
name="delete_connector_with_documents",
|
||||
max_retries=3,
|
||||
default_retry_delay=60,
|
||||
autoretry_for=(Exception,),
|
||||
retry_backoff=True,
|
||||
)
|
||||
def delete_connector_with_documents_task(
|
||||
self,
|
||||
connector_id: int,
|
||||
user_id: str,
|
||||
search_space_id: int,
|
||||
connector_name: str,
|
||||
connector_type: str,
|
||||
):
|
||||
"""
|
||||
Background task to delete a connector and all its associated documents.
|
||||
|
||||
Creates a notification when complete (success or failure).
|
||||
No polling required - user sees notification in UI.
|
||||
|
||||
Args:
|
||||
connector_id: ID of the connector to delete
|
||||
user_id: ID of the user who initiated the deletion
|
||||
search_space_id: ID of the search space
|
||||
connector_name: Name of the connector (for notification message)
|
||||
connector_type: Type of the connector (for logging)
|
||||
"""
|
||||
loop = asyncio.new_event_loop()
|
||||
asyncio.set_event_loop(loop)
|
||||
|
||||
try:
|
||||
return loop.run_until_complete(
|
||||
_delete_connector_async(
|
||||
connector_id=connector_id,
|
||||
user_id=user_id,
|
||||
search_space_id=search_space_id,
|
||||
connector_name=connector_name,
|
||||
connector_type=connector_type,
|
||||
)
|
||||
)
|
||||
finally:
|
||||
loop.close()
|
||||
|
||||
|
||||
async def _delete_connector_async(
|
||||
connector_id: int,
|
||||
user_id: str,
|
||||
search_space_id: int,
|
||||
connector_name: str,
|
||||
connector_type: str,
|
||||
) -> dict:
|
||||
"""
|
||||
Async implementation of connector deletion.
|
||||
|
||||
Steps:
|
||||
1. Count total documents to delete
|
||||
2. Delete documents in batches (chunks cascade automatically)
|
||||
3. Delete the connector record
|
||||
4. Create success notification
|
||||
|
||||
On failure, creates failure notification and re-raises exception.
|
||||
"""
|
||||
session_maker, engine = _get_celery_session_maker()
|
||||
total_deleted = 0
|
||||
|
||||
try:
|
||||
async with session_maker() as session:
|
||||
# Step 1: Count total documents for this connector
|
||||
count_result = await session.execute(
|
||||
select(func.count(Document.id)).where(
|
||||
Document.connector_id == connector_id
|
||||
)
|
||||
)
|
||||
total_docs = count_result.scalar() or 0
|
||||
|
||||
logger.info(
|
||||
f"Starting deletion of connector {connector_id} ({connector_name}). "
|
||||
f"Documents to delete: {total_docs}"
|
||||
)
|
||||
|
||||
# Step 2: Delete documents in batches
|
||||
while True:
|
||||
# Get batch of document IDs
|
||||
result = await session.execute(
|
||||
select(Document.id)
|
||||
.where(Document.connector_id == connector_id)
|
||||
.limit(DELETION_BATCH_SIZE)
|
||||
)
|
||||
doc_ids = [row[0] for row in result.fetchall()]
|
||||
|
||||
if not doc_ids:
|
||||
break
|
||||
|
||||
# Delete this batch (chunks are deleted via CASCADE)
|
||||
await session.execute(delete(Document).where(Document.id.in_(doc_ids)))
|
||||
await session.commit()
|
||||
|
||||
total_deleted += len(doc_ids)
|
||||
logger.info(
|
||||
f"Deleted batch of {len(doc_ids)} documents. "
|
||||
f"Progress: {total_deleted}/{total_docs}"
|
||||
)
|
||||
|
||||
# Step 3: Delete the connector record
|
||||
result = await session.execute(
|
||||
select(SearchSourceConnector).where(
|
||||
SearchSourceConnector.id == connector_id
|
||||
)
|
||||
)
|
||||
connector = result.scalar_one_or_none()
|
||||
|
||||
if connector:
|
||||
await session.delete(connector)
|
||||
logger.info(f"Deleted connector record: {connector_id}")
|
||||
else:
|
||||
logger.warning(
|
||||
f"Connector {connector_id} not found - may have been already deleted"
|
||||
)
|
||||
|
||||
# Step 4: Create success notification
|
||||
doc_text = "document" if total_deleted == 1 else "documents"
|
||||
notification = Notification(
|
||||
user_id=UUID(user_id),
|
||||
search_space_id=search_space_id,
|
||||
type="connector_deletion",
|
||||
title=f"{connector_name} Removed",
|
||||
message=f"Connector and {total_deleted} {doc_text} have been removed from your knowledge base.",
|
||||
notification_metadata={
|
||||
"connector_id": connector_id,
|
||||
"connector_name": connector_name,
|
||||
"connector_type": connector_type,
|
||||
"documents_deleted": total_deleted,
|
||||
"status": "completed",
|
||||
},
|
||||
)
|
||||
session.add(notification)
|
||||
await session.commit()
|
||||
|
||||
logger.info(
|
||||
f"Connector {connector_id} ({connector_name}) deleted successfully. "
|
||||
f"Total documents deleted: {total_deleted}"
|
||||
)
|
||||
|
||||
return {
|
||||
"status": "success",
|
||||
"connector_id": connector_id,
|
||||
"connector_name": connector_name,
|
||||
"documents_deleted": total_deleted,
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
f"Failed to delete connector {connector_id} ({connector_name}): {e!s}",
|
||||
exc_info=True,
|
||||
)
|
||||
|
||||
# Create failure notification
|
||||
try:
|
||||
async with session_maker() as session:
|
||||
notification = Notification(
|
||||
user_id=UUID(user_id),
|
||||
search_space_id=search_space_id,
|
||||
type="connector_deletion",
|
||||
title=f"Failed to Remove {connector_name}",
|
||||
message="Something went wrong while removing this connector. Please try again.",
|
||||
notification_metadata={
|
||||
"connector_id": connector_id,
|
||||
"connector_name": connector_name,
|
||||
"connector_type": connector_type,
|
||||
"documents_deleted": total_deleted,
|
||||
"status": "failed",
|
||||
"error": str(e),
|
||||
},
|
||||
)
|
||||
session.add(notification)
|
||||
await session.commit()
|
||||
except Exception as notify_error:
|
||||
logger.error(
|
||||
f"Failed to create failure notification: {notify_error!s}",
|
||||
exc_info=True,
|
||||
)
|
||||
|
||||
# Re-raise to trigger Celery retry
|
||||
raise
|
||||
|
||||
finally:
|
||||
await engine.dispose()
|
||||
|
||||
|
||||
async def delete_documents_by_connector_id(
|
||||
session,
|
||||
connector_id: int,
|
||||
batch_size: int = DELETION_BATCH_SIZE,
|
||||
) -> int:
|
||||
"""
|
||||
Delete all documents associated with a connector in batches.
|
||||
|
||||
This is a utility function that can be used independently of the Celery task
|
||||
for synchronous deletion scenarios (e.g., small document counts).
|
||||
|
||||
Args:
|
||||
session: AsyncSession instance
|
||||
connector_id: ID of the connector
|
||||
batch_size: Number of documents to delete per batch
|
||||
|
||||
Returns:
|
||||
Total number of documents deleted
|
||||
"""
|
||||
total_deleted = 0
|
||||
|
||||
while True:
|
||||
result = await session.execute(
|
||||
select(Document.id)
|
||||
.where(Document.connector_id == connector_id)
|
||||
.limit(batch_size)
|
||||
)
|
||||
doc_ids = [row[0] for row in result.fetchall()]
|
||||
|
||||
if not doc_ids:
|
||||
break
|
||||
|
||||
await session.execute(delete(Document).where(Document.id.in_(doc_ids)))
|
||||
await session.commit()
|
||||
total_deleted += len(doc_ids)
|
||||
|
||||
return total_deleted
|
||||
|
|
@ -545,6 +545,7 @@ def process_circleback_meeting_task(
|
|||
markdown_content: str,
|
||||
metadata: dict,
|
||||
search_space_id: int,
|
||||
connector_id: int | None = None,
|
||||
):
|
||||
"""
|
||||
Celery task to process Circleback meeting webhook data.
|
||||
|
|
@ -555,6 +556,7 @@ def process_circleback_meeting_task(
|
|||
markdown_content: Meeting content formatted as markdown
|
||||
metadata: Meeting metadata dictionary
|
||||
search_space_id: ID of the search space
|
||||
connector_id: ID of the Circleback connector (for deletion support)
|
||||
"""
|
||||
import asyncio
|
||||
|
||||
|
|
@ -569,6 +571,7 @@ def process_circleback_meeting_task(
|
|||
markdown_content,
|
||||
metadata,
|
||||
search_space_id,
|
||||
connector_id,
|
||||
)
|
||||
)
|
||||
finally:
|
||||
|
|
@ -581,6 +584,7 @@ async def _process_circleback_meeting(
|
|||
markdown_content: str,
|
||||
metadata: dict,
|
||||
search_space_id: int,
|
||||
connector_id: int | None = None,
|
||||
):
|
||||
"""Process Circleback meeting with new session."""
|
||||
from app.tasks.document_processors.circleback_processor import (
|
||||
|
|
@ -637,6 +641,7 @@ async def _process_circleback_meeting(
|
|||
markdown_content=markdown_content,
|
||||
metadata=metadata,
|
||||
search_space_id=search_space_id,
|
||||
connector_id=connector_id,
|
||||
)
|
||||
|
||||
if result:
|
||||
|
|
|
|||
|
|
@ -417,6 +417,8 @@ async def index_airtable_records(
|
|||
embedding=summary_embedding,
|
||||
chunks=chunks,
|
||||
updated_at=get_current_timestamp(),
|
||||
created_by_id=user_id,
|
||||
connector_id=connector_id,
|
||||
)
|
||||
|
||||
session.add(document)
|
||||
|
|
|
|||
|
|
@ -396,6 +396,8 @@ async def index_bookstack_pages(
|
|||
embedding=summary_embedding,
|
||||
chunks=chunks,
|
||||
updated_at=get_current_timestamp(),
|
||||
created_by_id=user_id,
|
||||
connector_id=connector_id,
|
||||
)
|
||||
|
||||
session.add(document)
|
||||
|
|
|
|||
|
|
@ -395,6 +395,8 @@ async def index_clickup_tasks(
|
|||
embedding=summary_embedding,
|
||||
chunks=chunks,
|
||||
updated_at=get_current_timestamp(),
|
||||
created_by_id=user_id,
|
||||
connector_id=connector_id,
|
||||
)
|
||||
|
||||
session.add(document)
|
||||
|
|
|
|||
|
|
@ -402,6 +402,8 @@ async def index_confluence_pages(
|
|||
embedding=summary_embedding,
|
||||
chunks=chunks,
|
||||
updated_at=get_current_timestamp(),
|
||||
created_by_id=user_id,
|
||||
connector_id=connector_id,
|
||||
)
|
||||
|
||||
session.add(document)
|
||||
|
|
|
|||
|
|
@ -527,6 +527,8 @@ async def index_discord_messages(
|
|||
content_hash=content_hash,
|
||||
unique_identifier_hash=unique_identifier_hash,
|
||||
updated_at=get_current_timestamp(),
|
||||
created_by_id=user_id,
|
||||
connector_id=connector_id,
|
||||
)
|
||||
|
||||
session.add(document)
|
||||
|
|
|
|||
|
|
@ -292,6 +292,8 @@ async def index_elasticsearch_documents(
|
|||
document_metadata=metadata,
|
||||
search_space_id=search_space_id,
|
||||
updated_at=get_current_timestamp(),
|
||||
created_by_id=user_id,
|
||||
connector_id=connector_id,
|
||||
)
|
||||
|
||||
# Create chunks and attach to document (persist via relationship)
|
||||
|
|
|
|||
|
|
@ -220,6 +220,7 @@ async def index_github_repos(
|
|||
user_id=user_id,
|
||||
task_logger=task_logger,
|
||||
log_entry=log_entry,
|
||||
connector_id=connector_id,
|
||||
)
|
||||
|
||||
documents_processed += docs_created
|
||||
|
|
@ -292,6 +293,7 @@ async def _process_repository_digest(
|
|||
user_id: str,
|
||||
task_logger: TaskLoggingService,
|
||||
log_entry,
|
||||
connector_id: int,
|
||||
) -> int:
|
||||
"""
|
||||
Process a repository digest and create documents.
|
||||
|
|
@ -426,6 +428,8 @@ async def _process_repository_digest(
|
|||
search_space_id=search_space_id,
|
||||
chunks=chunks_data,
|
||||
updated_at=get_current_timestamp(),
|
||||
created_by_id=user_id,
|
||||
connector_id=connector_id,
|
||||
)
|
||||
|
||||
session.add(document)
|
||||
|
|
|
|||
|
|
@ -499,6 +499,8 @@ async def index_google_calendar_events(
|
|||
embedding=summary_embedding,
|
||||
chunks=chunks,
|
||||
updated_at=get_current_timestamp(),
|
||||
created_by_id=user_id,
|
||||
connector_id=connector_id,
|
||||
)
|
||||
|
||||
session.add(document)
|
||||
|
|
|
|||
|
|
@ -767,6 +767,7 @@ async def _process_single_file(
|
|||
session=session,
|
||||
task_logger=task_logger,
|
||||
log_entry=log_entry,
|
||||
connector_id=connector_id,
|
||||
)
|
||||
|
||||
if error:
|
||||
|
|
|
|||
|
|
@ -413,7 +413,6 @@ async def index_google_gmail_messages(
|
|||
"subject": subject,
|
||||
"sender": sender,
|
||||
"date": date_str,
|
||||
"connector_id": connector_id,
|
||||
},
|
||||
content=summary_content,
|
||||
content_hash=content_hash,
|
||||
|
|
@ -421,6 +420,8 @@ async def index_google_gmail_messages(
|
|||
embedding=summary_embedding,
|
||||
chunks=chunks,
|
||||
updated_at=get_current_timestamp(),
|
||||
created_by_id=user_id,
|
||||
connector_id=connector_id,
|
||||
)
|
||||
session.add(document)
|
||||
documents_indexed += 1
|
||||
|
|
|
|||
|
|
@ -380,6 +380,8 @@ async def index_jira_issues(
|
|||
embedding=summary_embedding,
|
||||
chunks=chunks,
|
||||
updated_at=get_current_timestamp(),
|
||||
created_by_id=user_id,
|
||||
connector_id=connector_id,
|
||||
)
|
||||
|
||||
session.add(document)
|
||||
|
|
|
|||
|
|
@ -413,6 +413,8 @@ async def index_linear_issues(
|
|||
embedding=summary_embedding,
|
||||
chunks=chunks,
|
||||
updated_at=get_current_timestamp(),
|
||||
created_by_id=user_id,
|
||||
connector_id=connector_id,
|
||||
)
|
||||
|
||||
session.add(document)
|
||||
|
|
|
|||
|
|
@ -476,6 +476,8 @@ async def index_luma_events(
|
|||
embedding=summary_embedding,
|
||||
chunks=chunks,
|
||||
updated_at=get_current_timestamp(),
|
||||
created_by_id=user_id,
|
||||
connector_id=connector_id,
|
||||
)
|
||||
|
||||
session.add(document)
|
||||
|
|
|
|||
|
|
@ -398,6 +398,7 @@ async def index_notion_pages(
|
|||
}
|
||||
existing_document.chunks = chunks
|
||||
existing_document.updated_at = get_current_timestamp()
|
||||
existing_document.connector_id = connector_id
|
||||
|
||||
documents_indexed += 1
|
||||
logger.info(f"Successfully updated Notion page: {page_title}")
|
||||
|
|
@ -470,6 +471,8 @@ async def index_notion_pages(
|
|||
embedding=summary_embedding,
|
||||
chunks=chunks,
|
||||
updated_at=get_current_timestamp(),
|
||||
created_by_id=user_id,
|
||||
connector_id=connector_id,
|
||||
)
|
||||
|
||||
session.add(document)
|
||||
|
|
|
|||
|
|
@ -500,6 +500,8 @@ async def index_obsidian_vault(
|
|||
embedding=embedding,
|
||||
chunks=chunks,
|
||||
updated_at=get_current_timestamp(),
|
||||
created_by_id=user_id,
|
||||
connector_id=connector_id,
|
||||
)
|
||||
|
||||
session.add(new_document)
|
||||
|
|
|
|||
|
|
@ -389,6 +389,8 @@ async def index_slack_messages(
|
|||
content_hash=content_hash,
|
||||
unique_identifier_hash=unique_identifier_hash,
|
||||
updated_at=get_current_timestamp(),
|
||||
created_by_id=user_id,
|
||||
connector_id=connector_id,
|
||||
)
|
||||
|
||||
session.add(document)
|
||||
|
|
|
|||
|
|
@ -430,6 +430,8 @@ async def index_teams_messages(
|
|||
content_hash=content_hash,
|
||||
unique_identifier_hash=unique_identifier_hash,
|
||||
updated_at=get_current_timestamp(),
|
||||
created_by_id=user_id,
|
||||
connector_id=connector_id,
|
||||
)
|
||||
|
||||
session.add(document)
|
||||
|
|
|
|||
|
|
@ -371,6 +371,8 @@ async def index_crawled_urls(
|
|||
embedding=summary_embedding,
|
||||
chunks=chunks,
|
||||
updated_at=get_current_timestamp(),
|
||||
created_by_id=user_id,
|
||||
connector_id=connector_id,
|
||||
)
|
||||
|
||||
session.add(document)
|
||||
|
|
|
|||
|
|
@ -8,10 +8,17 @@ and stores it as searchable documents in the database.
|
|||
import logging
|
||||
from typing import Any
|
||||
|
||||
from sqlalchemy import select
|
||||
from sqlalchemy.exc import SQLAlchemyError
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from app.db import Document, DocumentType
|
||||
from app.db import (
|
||||
Document,
|
||||
DocumentType,
|
||||
SearchSourceConnector,
|
||||
SearchSourceConnectorType,
|
||||
SearchSpace,
|
||||
)
|
||||
from app.services.llm_service import get_document_summary_llm
|
||||
from app.utils.document_converters import (
|
||||
create_document_chunks,
|
||||
|
|
@ -35,6 +42,7 @@ async def add_circleback_meeting_document(
|
|||
markdown_content: str,
|
||||
metadata: dict[str, Any],
|
||||
search_space_id: int,
|
||||
connector_id: int | None = None,
|
||||
) -> Document | None:
|
||||
"""
|
||||
Process and store a Circleback meeting document.
|
||||
|
|
@ -46,6 +54,7 @@ async def add_circleback_meeting_document(
|
|||
markdown_content: Meeting content formatted as markdown
|
||||
metadata: Meeting metadata dictionary
|
||||
search_space_id: ID of the search space
|
||||
connector_id: ID of the Circleback connector (for deletion support)
|
||||
|
||||
Returns:
|
||||
Document object if successful, None if failed or duplicate
|
||||
|
|
@ -125,6 +134,30 @@ async def add_circleback_meeting_document(
|
|||
**metadata,
|
||||
}
|
||||
|
||||
# Fetch the user who set up the Circleback connector (preferred)
|
||||
# or fall back to search space owner if no connector found
|
||||
created_by_user_id = None
|
||||
|
||||
# Try to find the Circleback connector for this search space
|
||||
connector_result = await session.execute(
|
||||
select(SearchSourceConnector.user_id).where(
|
||||
SearchSourceConnector.search_space_id == search_space_id,
|
||||
SearchSourceConnector.connector_type
|
||||
== SearchSourceConnectorType.CIRCLEBACK_CONNECTOR,
|
||||
)
|
||||
)
|
||||
connector_user = connector_result.scalar_one_or_none()
|
||||
|
||||
if connector_user:
|
||||
# Use the user who set up the Circleback connector
|
||||
created_by_user_id = connector_user
|
||||
else:
|
||||
# Fallback: use search space owner if no connector found
|
||||
search_space_result = await session.execute(
|
||||
select(SearchSpace.user_id).where(SearchSpace.id == search_space_id)
|
||||
)
|
||||
created_by_user_id = search_space_result.scalar_one_or_none()
|
||||
|
||||
# Update or create document
|
||||
if existing_document:
|
||||
# Update existing document
|
||||
|
|
@ -138,6 +171,9 @@ async def add_circleback_meeting_document(
|
|||
existing_document.blocknote_document = blocknote_json
|
||||
existing_document.content_needs_reindexing = False
|
||||
existing_document.updated_at = get_current_timestamp()
|
||||
# Ensure connector_id is set (backfill for documents created before this field)
|
||||
if connector_id is not None:
|
||||
existing_document.connector_id = connector_id
|
||||
|
||||
await session.commit()
|
||||
await session.refresh(existing_document)
|
||||
|
|
@ -160,6 +196,8 @@ async def add_circleback_meeting_document(
|
|||
blocknote_document=blocknote_json,
|
||||
content_needs_reindexing=False,
|
||||
updated_at=get_current_timestamp(),
|
||||
created_by_id=created_by_user_id,
|
||||
connector_id=connector_id,
|
||||
)
|
||||
|
||||
session.add(document)
|
||||
|
|
|
|||
|
|
@ -185,6 +185,7 @@ async def add_extension_received_document(
|
|||
unique_identifier_hash=unique_identifier_hash,
|
||||
blocknote_document=blocknote_json,
|
||||
updated_at=get_current_timestamp(),
|
||||
created_by_id=user_id,
|
||||
)
|
||||
|
||||
session.add(document)
|
||||
|
|
|
|||
|
|
@ -526,6 +526,8 @@ async def add_received_file_document_using_unstructured(
|
|||
blocknote_document=blocknote_json,
|
||||
content_needs_reindexing=False,
|
||||
updated_at=get_current_timestamp(),
|
||||
created_by_id=user_id,
|
||||
connector_id=connector.get("connector_id") if connector else None,
|
||||
)
|
||||
|
||||
session.add(document)
|
||||
|
|
@ -665,6 +667,8 @@ async def add_received_file_document_using_llamacloud(
|
|||
blocknote_document=blocknote_json,
|
||||
content_needs_reindexing=False,
|
||||
updated_at=get_current_timestamp(),
|
||||
created_by_id=user_id,
|
||||
connector_id=connector.get("connector_id") if connector else None,
|
||||
)
|
||||
|
||||
session.add(document)
|
||||
|
|
@ -829,6 +833,8 @@ async def add_received_file_document_using_docling(
|
|||
blocknote_document=blocknote_json,
|
||||
content_needs_reindexing=False,
|
||||
updated_at=get_current_timestamp(),
|
||||
created_by_id=user_id,
|
||||
connector_id=connector.get("connector_id") if connector else None,
|
||||
)
|
||||
|
||||
session.add(document)
|
||||
|
|
@ -849,7 +855,7 @@ async def add_received_file_document_using_docling(
|
|||
async def _update_document_from_connector(
|
||||
document: Document | None, connector: dict | None, session: AsyncSession
|
||||
) -> None:
|
||||
"""Helper to update document type and metadata from connector info."""
|
||||
"""Helper to update document type, metadata, and connector_id from connector info."""
|
||||
if document and connector:
|
||||
if "type" in connector:
|
||||
document.document_type = connector["type"]
|
||||
|
|
@ -861,6 +867,9 @@ async def _update_document_from_connector(
|
|||
# Expand existing metadata with connector metadata
|
||||
merged = {**document.document_metadata, **connector["metadata"]}
|
||||
document.document_metadata = merged
|
||||
# Set connector_id if provided for de-indexing support
|
||||
if "connector_id" in connector:
|
||||
document.connector_id = connector["connector_id"]
|
||||
await session.commit()
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -295,6 +295,8 @@ async def add_received_markdown_file_document(
|
|||
unique_identifier_hash=primary_hash,
|
||||
blocknote_document=blocknote_json,
|
||||
updated_at=get_current_timestamp(),
|
||||
created_by_id=user_id,
|
||||
connector_id=connector.get("connector_id") if connector else None,
|
||||
)
|
||||
|
||||
session.add(document)
|
||||
|
|
|
|||
|
|
@ -357,6 +357,7 @@ async def add_youtube_video_document(
|
|||
unique_identifier_hash=unique_identifier_hash,
|
||||
blocknote_document=blocknote_json,
|
||||
updated_at=get_current_timestamp(),
|
||||
created_by_id=user_id,
|
||||
)
|
||||
|
||||
session.add(document)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue