mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-07-02 22:01:05 +02:00
feat: Add Docling support as ETL_SERVICE option
- Added DOCLING as third ETL_SERVICE option (alongside UNSTRUCTURED/LLAMACLOUD) - Implemented add_received_file_document_using_docling function - Added Docling processing logic in documents_routes.py - Enhanced chunking with configurable overlap support - Added comprehensive document processing service - Supports both CPU and GPU processing with user selection Addresses #161 - Add Docling Support as an ETL_SERVICE Follows same pattern as LlamaCloud integration (PR #123)
This commit is contained in:
parent
f852bcb188
commit
aa00822169
14 changed files with 3125 additions and 2090 deletions
|
|
@ -2,7 +2,7 @@ version: '3.8'
|
|||
|
||||
services:
|
||||
frontend:
|
||||
image: ghcr.io/modsetter/surfsense_ui:latest
|
||||
build: ./surfsense_web
|
||||
ports:
|
||||
- "${FRONTEND_PORT:-3000}:3000"
|
||||
volumes:
|
||||
|
|
@ -14,7 +14,7 @@ services:
|
|||
- NEXT_PUBLIC_API_URL=${NEXT_PUBLIC_API_URL:-http://backend:8000}
|
||||
|
||||
backend:
|
||||
image: ghcr.io/modsetter/surfsense_backend:latest
|
||||
build: ./surfsense_backend
|
||||
ports:
|
||||
- "${BACKEND_PORT:-8000}:8000"
|
||||
volumes:
|
||||
|
|
@ -28,3 +28,15 @@ services:
|
|||
- PYTHONPATH=/app
|
||||
- UVICORN_LOOP=asyncio
|
||||
- UNSTRUCTURED_HAS_PATCHED_LOOP=1
|
||||
- NVIDIA_VISIBLE_DEVICES=all
|
||||
- NVIDIA_DRIVER_CAPABILITIES=compute,utility
|
||||
- LANGCHAIN_TRACING_V2=false
|
||||
- LANGSMITH_TRACING=false
|
||||
- TESSDATA_PREFIX=/usr/share/tesseract-ocr/5/tessdata/
|
||||
deploy:
|
||||
resources:
|
||||
reservations:
|
||||
devices:
|
||||
- driver: nvidia
|
||||
count: all
|
||||
capabilities: [gpu]
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue