mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-06-08 20:25:19 +02:00
fix: Refactor document ID usage in file processing to improve clarity
This commit is contained in:
parent
a57ab02900
commit
380c1c3877
2 changed files with 5 additions and 4 deletions
|
|
@ -1632,6 +1632,8 @@ async def process_file_in_background_with_document(
|
|||
from app.config import config as app_config
|
||||
from app.services.llm_service import get_user_long_context_llm
|
||||
|
||||
doc_id = document.id
|
||||
|
||||
try:
|
||||
markdown_content = None
|
||||
etl_service = None
|
||||
|
|
@ -1855,7 +1857,7 @@ async def process_file_in_background_with_document(
|
|||
content_hash = generate_content_hash(markdown_content, search_space_id)
|
||||
|
||||
existing_by_content = await check_duplicate_document(session, content_hash)
|
||||
if existing_by_content and existing_by_content.id != document.id:
|
||||
if existing_by_content and existing_by_content.id != doc_id:
|
||||
# Duplicate content found - mark this document as failed
|
||||
logging.info(
|
||||
f"Duplicate content detected for {filename}, "
|
||||
|
|
@ -1918,7 +1920,7 @@ async def process_file_in_background_with_document(
|
|||
log_entry,
|
||||
f"Successfully processed file: {filename}",
|
||||
{
|
||||
"document_id": document.id,
|
||||
"document_id": doc_id,
|
||||
"content_hash": content_hash,
|
||||
"file_type": etl_service,
|
||||
"chunks_count": len(chunks),
|
||||
|
|
@ -1946,7 +1948,7 @@ async def process_file_in_background_with_document(
|
|||
{
|
||||
"error_type": type(e).__name__,
|
||||
"filename": filename,
|
||||
"document_id": document.id,
|
||||
"document_id": doc_id,
|
||||
},
|
||||
)
|
||||
logging.error(f"Error processing file with document: {error_message}")
|
||||
|
|
|
|||
|
|
@ -42,7 +42,6 @@ def _assert_document_ready(doc: dict, *, expected_filename: str) -> None:
|
|||
assert doc["content"], "Document content (summary) should not be empty"
|
||||
assert doc["content_hash"], "content_hash should be set"
|
||||
assert doc["document_metadata"].get("FILE_NAME") == expected_filename
|
||||
assert doc["document_metadata"].get("ETL_SERVICE"), "ETL_SERVICE should be set"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue