mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-06-06 20:15:17 +02:00
feat: assign folder_id to documents before indexing to ensure correct folder visibility during processing
This commit is contained in:
parent
bd21c2842d
commit
44e39792da
1 changed files with 23 additions and 14 deletions
|
|
@ -719,6 +719,21 @@ async def index_local_folder(
|
||||||
}
|
}
|
||||||
documents = await pipeline.prepare_for_indexing(connector_docs)
|
documents = await pipeline.prepare_for_indexing(connector_docs)
|
||||||
|
|
||||||
|
# Assign folder_id immediately so docs appear in the correct
|
||||||
|
# folder while still pending/processing (visible via Zero sync).
|
||||||
|
for document in documents:
|
||||||
|
cd = doc_map.get(document.unique_identifier_hash)
|
||||||
|
if cd is None:
|
||||||
|
continue
|
||||||
|
rel_path = (cd.metadata or {}).get("file_path", "")
|
||||||
|
parent_dir = str(Path(rel_path).parent) if rel_path else ""
|
||||||
|
if parent_dir == ".":
|
||||||
|
parent_dir = ""
|
||||||
|
document.folder_id = folder_mapping.get(
|
||||||
|
parent_dir, folder_mapping.get("")
|
||||||
|
)
|
||||||
|
await session.commit()
|
||||||
|
|
||||||
llm = await get_user_long_context_llm(session, user_id, search_space_id)
|
llm = await get_user_long_context_llm(session, user_id, search_space_id)
|
||||||
|
|
||||||
for document in documents:
|
for document in documents:
|
||||||
|
|
@ -732,17 +747,9 @@ async def index_local_folder(
|
||||||
if DocumentStatus.is_state(result.status, DocumentStatus.READY):
|
if DocumentStatus.is_state(result.status, DocumentStatus.READY):
|
||||||
indexed_count += 1
|
indexed_count += 1
|
||||||
|
|
||||||
# Assign folder_id and mtime post-pipeline
|
|
||||||
rel_path = (connector_doc.metadata or {}).get("file_path", "")
|
|
||||||
parent_dir = str(Path(rel_path).parent) if rel_path else ""
|
|
||||||
if parent_dir == ".":
|
|
||||||
parent_dir = ""
|
|
||||||
fid = folder_mapping.get(parent_dir, folder_mapping.get(""))
|
|
||||||
|
|
||||||
unique_id = connector_doc.unique_id
|
unique_id = connector_doc.unique_id
|
||||||
mtime_info = file_meta_map.get(unique_id, {})
|
mtime_info = file_meta_map.get(unique_id, {})
|
||||||
|
|
||||||
result.folder_id = fid
|
|
||||||
doc_meta = dict(result.document_metadata or {})
|
doc_meta = dict(result.document_metadata or {})
|
||||||
doc_meta["mtime"] = mtime_info.get("mtime")
|
doc_meta["mtime"] = mtime_info.get("mtime")
|
||||||
result.document_metadata = doc_meta
|
result.document_metadata = doc_meta
|
||||||
|
|
@ -894,16 +901,18 @@ async def _index_single_file(
|
||||||
return 0, 1, None
|
return 0, 1, None
|
||||||
|
|
||||||
db_doc = documents[0]
|
db_doc = documents[0]
|
||||||
await pipeline.index(db_doc, connector_doc, llm)
|
|
||||||
|
|
||||||
# Post-pipeline: assign folder_id and mtime
|
# Assign folder_id before indexing so the doc appears in the
|
||||||
await session.refresh(db_doc)
|
# correct folder while still pending/processing.
|
||||||
folder_id = None
|
|
||||||
if root_folder_id:
|
if root_folder_id:
|
||||||
folder_id = await _resolve_folder_for_file(
|
db_doc.folder_id = await _resolve_folder_for_file(
|
||||||
session, rel_path, root_folder_id, search_space_id, user_id
|
session, rel_path, root_folder_id, search_space_id, user_id
|
||||||
)
|
)
|
||||||
db_doc.folder_id = folder_id
|
await session.commit()
|
||||||
|
|
||||||
|
await pipeline.index(db_doc, connector_doc, llm)
|
||||||
|
|
||||||
|
await session.refresh(db_doc)
|
||||||
doc_meta = dict(db_doc.document_metadata or {})
|
doc_meta = dict(db_doc.document_metadata or {})
|
||||||
doc_meta["mtime"] = mtime
|
doc_meta["mtime"] = mtime
|
||||||
db_doc.document_metadata = doc_meta
|
db_doc.document_metadata = doc_meta
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue