mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-04-26 17:26:23 +02:00
Some checks are pending
Build and Push Docker Images / tag_release (push) Waiting to run
Build and Push Docker Images / build (./surfsense_backend, ./surfsense_backend/Dockerfile, backend, surfsense-backend, ubuntu-24.04-arm, linux/arm64, arm64) (push) Blocked by required conditions
Build and Push Docker Images / build (./surfsense_backend, ./surfsense_backend/Dockerfile, backend, surfsense-backend, ubuntu-latest, linux/amd64, amd64) (push) Blocked by required conditions
Build and Push Docker Images / build (./surfsense_web, ./surfsense_web/Dockerfile, web, surfsense-web, ubuntu-24.04-arm, linux/arm64, arm64) (push) Blocked by required conditions
Build and Push Docker Images / build (./surfsense_web, ./surfsense_web/Dockerfile, web, surfsense-web, ubuntu-latest, linux/amd64, amd64) (push) Blocked by required conditions
Build and Push Docker Images / create_manifest (backend, surfsense-backend) (push) Blocked by required conditions
Build and Push Docker Images / create_manifest (web, surfsense-web) (push) Blocked by required conditions
- Introduced a `ProcessingMode` enum to differentiate between basic and premium processing modes. - Updated `EtlRequest` to include a `processing_mode` field, defaulting to basic. - Enhanced ETL pipeline services to utilize the selected processing mode for Azure Document Intelligence and LlamaCloud parsing. - Modified various routes and services to handle processing mode, affecting document upload and indexing tasks. - Improved error handling and logging to include processing mode details. - Added tests to validate processing mode functionality and its impact on ETL operations.
48 lines
1.1 KiB
Python
48 lines
1.1 KiB
Python
from enum import StrEnum
|
|
|
|
from pydantic import BaseModel, field_validator
|
|
|
|
|
|
class ProcessingMode(StrEnum):
|
|
BASIC = "basic"
|
|
PREMIUM = "premium"
|
|
|
|
@classmethod
|
|
def coerce(cls, value: str | None) -> "ProcessingMode":
|
|
if value is None:
|
|
return cls.BASIC
|
|
try:
|
|
return cls(value.lower())
|
|
except ValueError:
|
|
return cls.BASIC
|
|
|
|
@property
|
|
def page_multiplier(self) -> int:
|
|
return _PAGE_MULTIPLIERS[self]
|
|
|
|
|
|
_PAGE_MULTIPLIERS: dict["ProcessingMode", int] = {
|
|
ProcessingMode.BASIC: 1,
|
|
ProcessingMode.PREMIUM: 10,
|
|
}
|
|
|
|
|
|
class EtlRequest(BaseModel):
|
|
file_path: str
|
|
filename: str
|
|
estimated_pages: int = 0
|
|
processing_mode: ProcessingMode = ProcessingMode.BASIC
|
|
|
|
@field_validator("filename")
|
|
@classmethod
|
|
def filename_must_not_be_empty(cls, v: str) -> str:
|
|
if not v.strip():
|
|
raise ValueError("filename must not be empty")
|
|
return v
|
|
|
|
|
|
class EtlResult(BaseModel):
|
|
markdown_content: str
|
|
etl_service: str
|
|
actual_pages: int = 0
|
|
content_type: str
|