mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-04-26 09:16:22 +02:00
feat(connectors): add connector parameter to file processor for source tracking
- Add optional 'connector' parameter with 'type' and 'metadata' fields - Create helper function _update_document_from_connector - Use document_metadata column (not metadata) for JSON field - Merge metadata with existing using dict spread operator - Google Drive documents now marked as GOOGLE_DRIVE_CONNECTOR - Backward compatible - no changes to existing logic - Simple and clean implementation
This commit is contained in:
parent
8da58be9e0
commit
a5935bc677
3 changed files with 60 additions and 71 deletions
|
|
@ -447,6 +447,24 @@ async def add_received_file_document_using_docling(
|
|||
) from e
|
||||
|
||||
|
||||
async def _update_document_from_connector(
|
||||
document: Document | None, connector: dict | None, session: AsyncSession
|
||||
) -> None:
|
||||
"""Helper to update document type and metadata from connector info."""
|
||||
if document and connector:
|
||||
if "type" in connector:
|
||||
document.document_type = connector["type"]
|
||||
if "metadata" in connector:
|
||||
# Merge with existing document_metadata (the actual column name)
|
||||
if not document.document_metadata:
|
||||
document.document_metadata = connector["metadata"]
|
||||
else:
|
||||
# Expand existing metadata with connector metadata
|
||||
merged = {**document.document_metadata, **connector["metadata"]}
|
||||
document.document_metadata = merged
|
||||
await session.commit()
|
||||
|
||||
|
||||
async def process_file_in_background(
|
||||
file_path: str,
|
||||
filename: str,
|
||||
|
|
@ -455,6 +473,7 @@ async def process_file_in_background(
|
|||
session: AsyncSession,
|
||||
task_logger: TaskLoggingService,
|
||||
log_entry: Log,
|
||||
connector: dict | None = None, # Optional: {"type": "GOOGLE_DRIVE_CONNECTOR", "metadata": {...}}
|
||||
):
|
||||
try:
|
||||
# Check if the file is a markdown or text file
|
||||
|
|
@ -492,6 +511,9 @@ async def process_file_in_background(
|
|||
session, filename, markdown_content, search_space_id, user_id
|
||||
)
|
||||
|
||||
# Update from connector if provided
|
||||
await _update_document_from_connector(result, connector, session)
|
||||
|
||||
if result:
|
||||
await task_logger.log_task_success(
|
||||
log_entry,
|
||||
|
|
@ -608,6 +630,9 @@ async def process_file_in_background(
|
|||
session, filename, transcribed_text, search_space_id, user_id
|
||||
)
|
||||
|
||||
# Update from connector if provided
|
||||
await _update_document_from_connector(result, connector, session)
|
||||
|
||||
if result:
|
||||
await task_logger.log_task_success(
|
||||
log_entry,
|
||||
|
|
@ -753,6 +778,9 @@ async def process_file_in_background(
|
|||
session, filename, docs, search_space_id, user_id
|
||||
)
|
||||
|
||||
# Update from connector if provided
|
||||
await _update_document_from_connector(result, connector, session)
|
||||
|
||||
if result:
|
||||
# Update page usage after successful processing
|
||||
# allow_exceed=True because document was already created after passing initial check
|
||||
|
|
@ -897,6 +925,9 @@ async def process_file_in_background(
|
|||
user_id, final_page_count, allow_exceed=True
|
||||
)
|
||||
|
||||
# Update from connector if provided
|
||||
await _update_document_from_connector(last_created_doc, connector, session)
|
||||
|
||||
await task_logger.log_task_success(
|
||||
log_entry,
|
||||
f"Successfully processed file with LlamaCloud: {filename}",
|
||||
|
|
@ -1021,6 +1052,9 @@ async def process_file_in_background(
|
|||
user_id, final_page_count, allow_exceed=True
|
||||
)
|
||||
|
||||
# Update from connector if provided
|
||||
await _update_document_from_connector(doc_result, connector, session)
|
||||
|
||||
await task_logger.log_task_success(
|
||||
log_entry,
|
||||
f"Successfully processed file with Docling: {filename}",
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue