feat: implement ETL pipeline with file classification and extraction services

This commit is contained in:
Anish Sarkar 2026-04-05 17:25:25 +05:30
parent 9c0af6569d
commit 5d22349dc1
6 changed files with 188 additions and 0 deletions

View file

@ -0,0 +1,21 @@
from pydantic import BaseModel, field_validator
class EtlRequest(BaseModel):
file_path: str
filename: str
estimated_pages: int = 0
@field_validator("filename")
@classmethod
def filename_must_not_be_empty(cls, v: str) -> str:
if not v.strip():
raise ValueError("filename must not be empty")
return v
class EtlResult(BaseModel):
markdown_content: str
etl_service: str
actual_pages: int = 0
content_type: str