diff --git a/surfsense_backend/app/tasks/document_processors/file_processors.py b/surfsense_backend/app/tasks/document_processors/file_processors.py index 1858a5b93..e762607c1 100644 --- a/surfsense_backend/app/tasks/document_processors/file_processors.py +++ b/surfsense_backend/app/tasks/document_processors/file_processors.py @@ -886,13 +886,40 @@ async def process_file_in_background( ) # Use Docling service for document processing + import warnings + from app.services.docling_service import create_docling_service # Create Docling service docling_service = create_docling_service() - # Process the document - result = await docling_service.process_document(file_path, filename) + # Suppress pdfminer warnings that can cause processing to hang + # These warnings are harmless but can spam logs and potentially halt processing + # Suppress both Python warnings and logging warnings from pdfminer + pdfminer_logger = logging.getLogger("pdfminer") + original_level = pdfminer_logger.level + + with warnings.catch_warnings(): + warnings.filterwarnings( + "ignore", category=UserWarning, module="pdfminer" + ) + warnings.filterwarnings( + "ignore", + message=".*Cannot set gray non-stroke color.*", + ) + warnings.filterwarnings( + "ignore", message=".*invalid float value.*" + ) + + # Temporarily suppress pdfminer logging warnings + pdfminer_logger.setLevel(logging.ERROR) + + try: + # Process the document + result = await docling_service.process_document(file_path, filename) + finally: + # Restore original logging level + pdfminer_logger.setLevel(original_level) # Clean up the temp file import os diff --git a/surfsense_backend/pyproject.toml b/surfsense_backend/pyproject.toml index 58511a101..1b3c18660 100644 --- a/surfsense_backend/pyproject.toml +++ b/surfsense_backend/pyproject.toml @@ -1,3 +1,14 @@ +[build-system] +requires = ["setuptools>=61.0", "wheel"] +build-backend = "setuptools.build_meta" + +[tool.setuptools] +packages = ["app"] +include-package-data = false + +[tool.setuptools.package-data] +app = ["**/*"] + [project] name = "surf-new-backend" version = "0.0.8"