mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-04-27 17:56:25 +02:00
Revert "Merge pull request #686 from AnishSarkar22/feat/replace-logs"
This reverts commit5963a1125e, reversing changes made to0d2a2f8ea1.
This commit is contained in:
parent
5963a1125e
commit
3418c0e026
84 changed files with 4613 additions and 8266 deletions
|
|
@ -17,9 +17,8 @@ from sqlalchemy.exc import SQLAlchemyError
|
|||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from app.config import config as app_config
|
||||
from app.db import Document, DocumentType, Log, Notification
|
||||
from app.db import Document, DocumentType, Log
|
||||
from app.services.llm_service import get_user_long_context_llm
|
||||
from app.services.notification_service import NotificationService
|
||||
from app.services.task_logging_service import TaskLoggingService
|
||||
from app.utils.document_converters import (
|
||||
convert_document_to_markdown,
|
||||
|
|
@ -595,23 +594,10 @@ async def process_file_in_background(
|
|||
log_entry: Log,
|
||||
connector: dict
|
||||
| None = None, # Optional: {"type": "GOOGLE_DRIVE_FILE", "metadata": {...}}
|
||||
notification: Notification
|
||||
| None = None, # Optional notification for progress updates
|
||||
) -> Document | None:
|
||||
):
|
||||
try:
|
||||
# Check if the file is a markdown or text file
|
||||
if filename.lower().endswith((".md", ".markdown", ".txt")):
|
||||
# Update notification: parsing stage
|
||||
if notification:
|
||||
await (
|
||||
NotificationService.document_processing.notify_processing_progress(
|
||||
session,
|
||||
notification,
|
||||
stage="parsing",
|
||||
stage_message="Reading file",
|
||||
)
|
||||
)
|
||||
|
||||
await task_logger.log_task_progress(
|
||||
log_entry,
|
||||
f"Processing markdown/text file: {filename}",
|
||||
|
|
@ -631,14 +617,6 @@ async def process_file_in_background(
|
|||
print("Error deleting temp file", e)
|
||||
pass
|
||||
|
||||
# Update notification: chunking stage
|
||||
if notification:
|
||||
await (
|
||||
NotificationService.document_processing.notify_processing_progress(
|
||||
session, notification, stage="chunking"
|
||||
)
|
||||
)
|
||||
|
||||
await task_logger.log_task_progress(
|
||||
log_entry,
|
||||
f"Creating document from markdown content: {filename}",
|
||||
|
|
@ -666,30 +644,17 @@ async def process_file_in_background(
|
|||
"file_type": "markdown",
|
||||
},
|
||||
)
|
||||
return result
|
||||
else:
|
||||
await task_logger.log_task_success(
|
||||
log_entry,
|
||||
f"Markdown file already exists (duplicate): {filename}",
|
||||
{"duplicate_detected": True, "file_type": "markdown"},
|
||||
)
|
||||
return None
|
||||
|
||||
# Check if the file is an audio file
|
||||
elif filename.lower().endswith(
|
||||
(".mp3", ".mp4", ".mpeg", ".mpga", ".m4a", ".wav", ".webm")
|
||||
):
|
||||
# Update notification: parsing stage (transcription)
|
||||
if notification:
|
||||
await (
|
||||
NotificationService.document_processing.notify_processing_progress(
|
||||
session,
|
||||
notification,
|
||||
stage="parsing",
|
||||
stage_message="Transcribing audio",
|
||||
)
|
||||
)
|
||||
|
||||
await task_logger.log_task_progress(
|
||||
log_entry,
|
||||
f"Processing audio file for transcription: {filename}",
|
||||
|
|
@ -773,14 +738,6 @@ async def process_file_in_background(
|
|||
},
|
||||
)
|
||||
|
||||
# Update notification: chunking stage
|
||||
if notification:
|
||||
await (
|
||||
NotificationService.document_processing.notify_processing_progress(
|
||||
session, notification, stage="chunking"
|
||||
)
|
||||
)
|
||||
|
||||
# Clean up the temp file
|
||||
try:
|
||||
os.unlink(file_path)
|
||||
|
|
@ -808,14 +765,12 @@ async def process_file_in_background(
|
|||
"stt_service": stt_service_type,
|
||||
},
|
||||
)
|
||||
return result
|
||||
else:
|
||||
await task_logger.log_task_success(
|
||||
log_entry,
|
||||
f"Audio file transcript already exists (duplicate): {filename}",
|
||||
{"duplicate_detected": True, "file_type": "audio"},
|
||||
)
|
||||
return None
|
||||
|
||||
else:
|
||||
# Import page limit service
|
||||
|
|
@ -880,15 +835,6 @@ async def process_file_in_background(
|
|||
) from e
|
||||
|
||||
if app_config.ETL_SERVICE == "UNSTRUCTURED":
|
||||
# Update notification: parsing stage
|
||||
if notification:
|
||||
await NotificationService.document_processing.notify_processing_progress(
|
||||
session,
|
||||
notification,
|
||||
stage="parsing",
|
||||
stage_message="Extracting content",
|
||||
)
|
||||
|
||||
await task_logger.log_task_progress(
|
||||
log_entry,
|
||||
f"Processing file with Unstructured ETL: {filename}",
|
||||
|
|
@ -914,12 +860,6 @@ async def process_file_in_background(
|
|||
|
||||
docs = await loader.aload()
|
||||
|
||||
# Update notification: chunking stage
|
||||
if notification:
|
||||
await NotificationService.document_processing.notify_processing_progress(
|
||||
session, notification, stage="chunking", chunks_count=len(docs)
|
||||
)
|
||||
|
||||
await task_logger.log_task_progress(
|
||||
log_entry,
|
||||
f"Unstructured ETL completed, creating document: {filename}",
|
||||
|
|
@ -979,7 +919,6 @@ async def process_file_in_background(
|
|||
"pages_processed": final_page_count,
|
||||
},
|
||||
)
|
||||
return result
|
||||
else:
|
||||
await task_logger.log_task_success(
|
||||
log_entry,
|
||||
|
|
@ -990,18 +929,8 @@ async def process_file_in_background(
|
|||
"etl_service": "UNSTRUCTURED",
|
||||
},
|
||||
)
|
||||
return None
|
||||
|
||||
elif app_config.ETL_SERVICE == "LLAMACLOUD":
|
||||
# Update notification: parsing stage
|
||||
if notification:
|
||||
await NotificationService.document_processing.notify_processing_progress(
|
||||
session,
|
||||
notification,
|
||||
stage="parsing",
|
||||
stage_message="Extracting content",
|
||||
)
|
||||
|
||||
await task_logger.log_task_progress(
|
||||
log_entry,
|
||||
f"Processing file with LlamaCloud ETL: {filename}",
|
||||
|
|
@ -1035,15 +964,6 @@ async def process_file_in_background(
|
|||
split_by_page=False
|
||||
)
|
||||
|
||||
# Update notification: chunking stage
|
||||
if notification:
|
||||
await NotificationService.document_processing.notify_processing_progress(
|
||||
session,
|
||||
notification,
|
||||
stage="chunking",
|
||||
chunks_count=len(markdown_documents),
|
||||
)
|
||||
|
||||
await task_logger.log_task_progress(
|
||||
log_entry,
|
||||
f"LlamaCloud parsing completed, creating documents: {filename}",
|
||||
|
|
@ -1136,7 +1056,6 @@ async def process_file_in_background(
|
|||
"documents_count": len(markdown_documents),
|
||||
},
|
||||
)
|
||||
return last_created_doc
|
||||
else:
|
||||
# All documents were duplicates (markdown_documents was not empty, but all returned None)
|
||||
await task_logger.log_task_success(
|
||||
|
|
@ -1149,18 +1068,8 @@ async def process_file_in_background(
|
|||
"documents_count": len(markdown_documents),
|
||||
},
|
||||
)
|
||||
return None
|
||||
|
||||
elif app_config.ETL_SERVICE == "DOCLING":
|
||||
# Update notification: parsing stage
|
||||
if notification:
|
||||
await NotificationService.document_processing.notify_processing_progress(
|
||||
session,
|
||||
notification,
|
||||
stage="parsing",
|
||||
stage_message="Extracting content",
|
||||
)
|
||||
|
||||
await task_logger.log_task_progress(
|
||||
log_entry,
|
||||
f"Processing file with Docling ETL: {filename}",
|
||||
|
|
@ -1243,12 +1152,6 @@ async def process_file_in_background(
|
|||
},
|
||||
)
|
||||
|
||||
# Update notification: chunking stage
|
||||
if notification:
|
||||
await NotificationService.document_processing.notify_processing_progress(
|
||||
session, notification, stage="chunking"
|
||||
)
|
||||
|
||||
# Process the document using our Docling background task
|
||||
doc_result = await add_received_file_document_using_docling(
|
||||
session,
|
||||
|
|
@ -1281,7 +1184,6 @@ async def process_file_in_background(
|
|||
"pages_processed": final_page_count,
|
||||
},
|
||||
)
|
||||
return doc_result
|
||||
else:
|
||||
await task_logger.log_task_success(
|
||||
log_entry,
|
||||
|
|
@ -1292,7 +1194,6 @@ async def process_file_in_background(
|
|||
"etl_service": "DOCLING",
|
||||
},
|
||||
)
|
||||
return None
|
||||
except Exception as e:
|
||||
await session.rollback()
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue