feat: add support for Azure Document Intelligence in ETL pipeline

This commit is contained in:
Anish Sarkar 2026-04-08 00:59:12 +05:30
parent 73a9c5fbd1
commit 1fa8d1220b
10 changed files with 248 additions and 5 deletions

View file

@ -56,7 +56,7 @@ class EtlPipelineService:
if not etl_service:
raise EtlServiceUnavailableError(
"No ETL_SERVICE configured. "
"Set ETL_SERVICE to UNSTRUCTURED, LLAMACLOUD, or DOCLING in your .env"
"Set ETL_SERVICE to UNSTRUCTURED, LLAMACLOUD, DOCLING, or AZURE_DI in your .env"
)
ext = PurePosixPath(request.filename).suffix.lower()
@ -80,6 +80,12 @@ class EtlPipelineService:
content = await parse_with_llamacloud(
request.file_path, request.estimated_pages
)
elif etl_service == "AZURE_DI":
from app.etl_pipeline.parsers.azure_doc_intelligence import (
parse_with_azure_doc_intelligence,
)
content = await parse_with_azure_doc_intelligence(request.file_path)
else:
raise EtlServiceUnavailableError(f"Unknown ETL_SERVICE: {etl_service}")