mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-05-17 18:35:19 +02:00
Merge commit '97ed791ca0' into dev
This commit is contained in:
commit
351b38801b
1 changed files with 29 additions and 2 deletions
|
|
@ -4,6 +4,8 @@ File document processors for different ETL services (Unstructured, LlamaCloud, D
|
|||
|
||||
import contextlib
|
||||
import logging
|
||||
import warnings
|
||||
from logging import ERROR, getLogger
|
||||
|
||||
from fastapi import HTTPException
|
||||
from langchain_core.documents import Document as LangChainDocument
|
||||
|
|
@ -891,8 +893,33 @@ async def process_file_in_background(
|
|||
# Create Docling service
|
||||
docling_service = create_docling_service()
|
||||
|
||||
# Process the document
|
||||
result = await docling_service.process_document(file_path, filename)
|
||||
# Suppress pdfminer warnings that can cause processing to hang
|
||||
# These warnings are harmless but can spam logs and potentially halt processing
|
||||
# Suppress both Python warnings and logging warnings from pdfminer
|
||||
pdfminer_logger = getLogger("pdfminer")
|
||||
original_level = pdfminer_logger.level
|
||||
|
||||
with warnings.catch_warnings():
|
||||
warnings.filterwarnings(
|
||||
"ignore", category=UserWarning, module="pdfminer"
|
||||
)
|
||||
warnings.filterwarnings(
|
||||
"ignore",
|
||||
message=".*Cannot set gray non-stroke color.*",
|
||||
)
|
||||
warnings.filterwarnings(
|
||||
"ignore", message=".*invalid float value.*"
|
||||
)
|
||||
|
||||
# Temporarily suppress pdfminer logging warnings
|
||||
pdfminer_logger.setLevel(ERROR)
|
||||
|
||||
try:
|
||||
# Process the document
|
||||
result = await docling_service.process_document(file_path, filename)
|
||||
finally:
|
||||
# Restore original logging level
|
||||
pdfminer_logger.setLevel(original_level)
|
||||
|
||||
# Clean up the temp file
|
||||
import os
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue