mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-05-31 19:45:15 +02:00
feat: enhance document upload handling by managing duplicates and updating statuses for existing documents
This commit is contained in:
parent
72205ce11b
commit
e3faf4cc5e
2 changed files with 23 additions and 6 deletions
|
|
@ -178,9 +178,26 @@ async def create_documents_file_upload(
|
|||
session, unique_identifier_hash
|
||||
)
|
||||
if existing:
|
||||
# Clean up temp file for duplicates
|
||||
os.unlink(temp_path)
|
||||
skipped_duplicates += 1
|
||||
if DocumentStatus.is_state(existing.status, DocumentStatus.READY):
|
||||
# True duplicate — content already indexed, skip
|
||||
os.unlink(temp_path)
|
||||
skipped_duplicates += 1
|
||||
continue
|
||||
|
||||
# Existing document is stuck (failed/pending/processing)
|
||||
# Reset it to pending and re-dispatch for processing
|
||||
existing.status = DocumentStatus.pending()
|
||||
existing.content = "Processing..."
|
||||
existing.document_metadata = {
|
||||
**(existing.document_metadata or {}),
|
||||
"file_size": file_size,
|
||||
"upload_time": datetime.now().isoformat(),
|
||||
}
|
||||
existing.updated_at = get_current_timestamp()
|
||||
created_documents.append(existing)
|
||||
files_to_process.append(
|
||||
(existing, temp_path, file.filename or "unknown")
|
||||
)
|
||||
continue
|
||||
|
||||
# Create pending document (visible immediately in UI via ElectricSQL)
|
||||
|
|
|
|||
|
|
@ -413,7 +413,7 @@ def process_file_upload_task(
|
|||
if not os.path.exists(file_path):
|
||||
logger.error(
|
||||
f"[process_file_upload] File does not exist: {file_path}. "
|
||||
"The temp file may have been cleaned up before the task ran."
|
||||
"File may have been removed before syncing could start."
|
||||
)
|
||||
return
|
||||
|
||||
|
|
@ -654,7 +654,7 @@ def process_file_upload_with_document_task(
|
|||
if not os.path.exists(temp_path):
|
||||
logger.error(
|
||||
f"[process_file_upload_with_document] File does not exist: {temp_path}. "
|
||||
"The temp file may have been cleaned up before the task ran."
|
||||
"File may have been removed before syncing could start."
|
||||
)
|
||||
# Mark document as failed since file is missing
|
||||
loop = asyncio.new_event_loop()
|
||||
|
|
@ -663,7 +663,7 @@ def process_file_upload_with_document_task(
|
|||
loop.run_until_complete(
|
||||
_mark_document_failed(
|
||||
document_id,
|
||||
"File not found - temp file may have been cleaned up",
|
||||
"File not found. Please re-upload the file.",
|
||||
)
|
||||
)
|
||||
finally:
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue