diff --git a/surfsense_backend/app/tasks/document_processors/file_processors.py b/surfsense_backend/app/tasks/document_processors/file_processors.py index 041053a04..205711a44 100644 --- a/surfsense_backend/app/tasks/document_processors/file_processors.py +++ b/surfsense_backend/app/tasks/document_processors/file_processors.py @@ -1632,6 +1632,8 @@ async def process_file_in_background_with_document( from app.config import config as app_config from app.services.llm_service import get_user_long_context_llm + doc_id = document.id + try: markdown_content = None etl_service = None @@ -1855,7 +1857,7 @@ async def process_file_in_background_with_document( content_hash = generate_content_hash(markdown_content, search_space_id) existing_by_content = await check_duplicate_document(session, content_hash) - if existing_by_content and existing_by_content.id != document.id: + if existing_by_content and existing_by_content.id != doc_id: # Duplicate content found - mark this document as failed logging.info( f"Duplicate content detected for {filename}, " @@ -1918,7 +1920,7 @@ async def process_file_in_background_with_document( log_entry, f"Successfully processed file: {filename}", { - "document_id": document.id, + "document_id": doc_id, "content_hash": content_hash, "file_type": etl_service, "chunks_count": len(chunks), @@ -1946,7 +1948,7 @@ async def process_file_in_background_with_document( { "error_type": type(e).__name__, "filename": filename, - "document_id": document.id, + "document_id": doc_id, }, ) logging.error(f"Error processing file with document: {error_message}") diff --git a/surfsense_backend/tests/e2e/test_document_upload.py b/surfsense_backend/tests/e2e/test_document_upload.py index f217dc460..f3ff8b7a6 100644 --- a/surfsense_backend/tests/e2e/test_document_upload.py +++ b/surfsense_backend/tests/e2e/test_document_upload.py @@ -42,7 +42,6 @@ def _assert_document_ready(doc: dict, *, expected_filename: str) -> None: assert doc["content"], "Document content (summary) should not be empty" assert doc["content_hash"], "content_hash should be set" assert doc["document_metadata"].get("FILE_NAME") == expected_filename - assert doc["document_metadata"].get("ETL_SERVICE"), "ETL_SERVICE should be set" # ---------------------------------------------------------------------------