fix: suppress pdfminer warnings to prevent upload halting

- Added warning suppression for pdfminer warnings during Docling PDF processing
- Suppresses 'Cannot set gray non-stroke color' warnings that cause uploads to halt
- Temporarily sets pdfminer logger to ERROR level during document processing
- Fixes issue where files ~34MB would fail due to pdfminer warning spam

Resolves issue where PDF uploads would halt with repeated pdfminer warnings
This commit is contained in:
Chirag 2025-11-01 17:42:23 +05:30
parent 57fd82f17f
commit 094bdfad45
2 changed files with 40 additions and 2 deletions

View file

@ -886,13 +886,40 @@ async def process_file_in_background(
)
# Use Docling service for document processing
import warnings
from app.services.docling_service import create_docling_service
# Create Docling service
docling_service = create_docling_service()
# Process the document
result = await docling_service.process_document(file_path, filename)
# Suppress pdfminer warnings that can cause processing to hang
# These warnings are harmless but can spam logs and potentially halt processing
# Suppress both Python warnings and logging warnings from pdfminer
pdfminer_logger = logging.getLogger("pdfminer")
original_level = pdfminer_logger.level
with warnings.catch_warnings():
warnings.filterwarnings(
"ignore", category=UserWarning, module="pdfminer"
)
warnings.filterwarnings(
"ignore",
message=".*Cannot set gray non-stroke color.*",
)
warnings.filterwarnings(
"ignore", message=".*invalid float value.*"
)
# Temporarily suppress pdfminer logging warnings
pdfminer_logger.setLevel(logging.ERROR)
try:
# Process the document
result = await docling_service.process_document(file_path, filename)
finally:
# Restore original logging level
pdfminer_logger.setLevel(original_level)
# Clean up the temp file
import os