mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-05-12 01:02:39 +02:00
fix: suppress pdfminer warnings to prevent upload halting
- Added warning suppression for pdfminer warnings during Docling PDF processing - Suppresses 'Cannot set gray non-stroke color' warnings that cause uploads to halt - Temporarily sets pdfminer logger to ERROR level during document processing - Fixes issue where files ~34MB would fail due to pdfminer warning spam Resolves issue where PDF uploads would halt with repeated pdfminer warnings
This commit is contained in:
parent
57fd82f17f
commit
094bdfad45
2 changed files with 40 additions and 2 deletions
|
|
@ -886,13 +886,40 @@ async def process_file_in_background(
|
||||||
)
|
)
|
||||||
|
|
||||||
# Use Docling service for document processing
|
# Use Docling service for document processing
|
||||||
|
import warnings
|
||||||
|
|
||||||
from app.services.docling_service import create_docling_service
|
from app.services.docling_service import create_docling_service
|
||||||
|
|
||||||
# Create Docling service
|
# Create Docling service
|
||||||
docling_service = create_docling_service()
|
docling_service = create_docling_service()
|
||||||
|
|
||||||
# Process the document
|
# Suppress pdfminer warnings that can cause processing to hang
|
||||||
result = await docling_service.process_document(file_path, filename)
|
# These warnings are harmless but can spam logs and potentially halt processing
|
||||||
|
# Suppress both Python warnings and logging warnings from pdfminer
|
||||||
|
pdfminer_logger = logging.getLogger("pdfminer")
|
||||||
|
original_level = pdfminer_logger.level
|
||||||
|
|
||||||
|
with warnings.catch_warnings():
|
||||||
|
warnings.filterwarnings(
|
||||||
|
"ignore", category=UserWarning, module="pdfminer"
|
||||||
|
)
|
||||||
|
warnings.filterwarnings(
|
||||||
|
"ignore",
|
||||||
|
message=".*Cannot set gray non-stroke color.*",
|
||||||
|
)
|
||||||
|
warnings.filterwarnings(
|
||||||
|
"ignore", message=".*invalid float value.*"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Temporarily suppress pdfminer logging warnings
|
||||||
|
pdfminer_logger.setLevel(logging.ERROR)
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Process the document
|
||||||
|
result = await docling_service.process_document(file_path, filename)
|
||||||
|
finally:
|
||||||
|
# Restore original logging level
|
||||||
|
pdfminer_logger.setLevel(original_level)
|
||||||
|
|
||||||
# Clean up the temp file
|
# Clean up the temp file
|
||||||
import os
|
import os
|
||||||
|
|
|
||||||
|
|
@ -1,3 +1,14 @@
|
||||||
|
[build-system]
|
||||||
|
requires = ["setuptools>=61.0", "wheel"]
|
||||||
|
build-backend = "setuptools.build_meta"
|
||||||
|
|
||||||
|
[tool.setuptools]
|
||||||
|
packages = ["app"]
|
||||||
|
include-package-data = false
|
||||||
|
|
||||||
|
[tool.setuptools.package-data]
|
||||||
|
app = ["**/*"]
|
||||||
|
|
||||||
[project]
|
[project]
|
||||||
name = "surf-new-backend"
|
name = "surf-new-backend"
|
||||||
version = "0.0.8"
|
version = "0.0.8"
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue