feat: enhance document upload handling by managing duplicates and updating statuses for existing documents

This commit is contained in:
Anish Sarkar 2026-02-06 18:12:46 +05:30
parent 72205ce11b
commit e3faf4cc5e
2 changed files with 23 additions and 6 deletions

View file

@ -178,9 +178,26 @@ async def create_documents_file_upload(
session, unique_identifier_hash
)
if existing:
# Clean up temp file for duplicates
os.unlink(temp_path)
skipped_duplicates += 1
if DocumentStatus.is_state(existing.status, DocumentStatus.READY):
# True duplicate — content already indexed, skip
os.unlink(temp_path)
skipped_duplicates += 1
continue
# Existing document is stuck (failed/pending/processing)
# Reset it to pending and re-dispatch for processing
existing.status = DocumentStatus.pending()
existing.content = "Processing..."
existing.document_metadata = {
**(existing.document_metadata or {}),
"file_size": file_size,
"upload_time": datetime.now().isoformat(),
}
existing.updated_at = get_current_timestamp()
created_documents.append(existing)
files_to_process.append(
(existing, temp_path, file.filename or "unknown")
)
continue
# Create pending document (visible immediately in UI via ElectricSQL)

View file

@ -413,7 +413,7 @@ def process_file_upload_task(
if not os.path.exists(file_path):
logger.error(
f"[process_file_upload] File does not exist: {file_path}. "
"The temp file may have been cleaned up before the task ran."
"File may have been removed before syncing could start."
)
return
@ -654,7 +654,7 @@ def process_file_upload_with_document_task(
if not os.path.exists(temp_path):
logger.error(
f"[process_file_upload_with_document] File does not exist: {temp_path}. "
"The temp file may have been cleaned up before the task ran."
"File may have been removed before syncing could start."
)
# Mark document as failed since file is missing
loop = asyncio.new_event_loop()
@ -663,7 +663,7 @@ def process_file_upload_with_document_task(
loop.run_until_complete(
_mark_document_failed(
document_id,
"File not found - temp file may have been cleaned up",
"File not found. Please re-upload the file.",
)
)
finally: