fix(connectors): properly commit Google Drive document type changes

- Return file metadata from content_extractor for indexer to use
- Update document type and metadata in indexer after processing
- Explicitly commit changes to database
- Ensures documents are properly marked as GOOGLE_DRIVE_CONNECTOR type
This commit is contained in:
CREDO23 2025-12-28 17:15:29 +02:00
parent 9f1fd20944
commit b2b891e4d7
2 changed files with 44 additions and 37 deletions

View file

@ -388,7 +388,7 @@ async def _process_single_file(
# Download and process using Surfsense's existing infrastructure
# This handles: markdown, audio, PDFs, Office docs, images, etc.
# It also handles: deduplication, chunking, summarization, embedding
document, error = await download_and_process_file(
document, error, file_metadata = await download_and_process_file(
client=drive_client,
file=file,
search_space_id=search_space_id,
@ -407,7 +407,28 @@ async def _process_single_file(
)
return 0, 1
if document:
if document and file_metadata:
# Update document type to GOOGLE_DRIVE_CONNECTOR and add metadata
original_type = document.document_type
document.document_type = DocumentType.GOOGLE_DRIVE_CONNECTOR
# Add Google Drive specific metadata
if not document.metadata:
document.metadata = {}
document.metadata.update({
**file_metadata,
"original_document_type": original_type,
"source_connector": "google_drive",
})
# Commit the document type and metadata changes
await session.commit()
logger.info(
f"Updated document {document.id} to GOOGLE_DRIVE_CONNECTOR type with metadata"
)
# Successfully indexed
await task_logger.log_task_progress(
log_entry,
@ -416,6 +437,7 @@ async def _process_single_file(
"status": "indexed",
"document_id": document.id,
"file_name": file_name,
"document_type": DocumentType.GOOGLE_DRIVE_CONNECTOR,
},
)
return 1, 0