feat: add support for Azure Document Intelligence in ETL pipeline

This commit is contained in:
Anish Sarkar 2026-04-08 00:59:12 +05:30
parent 73a9c5fbd1
commit 1fa8d1220b
10 changed files with 248 additions and 5 deletions

View file

@ -1,6 +1,6 @@
NEXT_PUBLIC_FASTAPI_BACKEND_URL=http://localhost:8000
NEXT_PUBLIC_FASTAPI_BACKEND_AUTH_TYPE=LOCAL or GOOGLE
NEXT_PUBLIC_ETL_SERVICE=UNSTRUCTURED or LLAMACLOUD or DOCLING
NEXT_PUBLIC_ETL_SERVICE=UNSTRUCTURED or LLAMACLOUD or DOCLING or AZURE_DI
NEXT_PUBLIC_ZERO_CACHE_URL=http://localhost:4848
# Contact Form Vars (optional)

View file

@ -96,6 +96,11 @@ const FILE_TYPE_CONFIG: Record<string, Record<string, string[]>> = {
"image/tiff": [".tiff", ".tif"],
...audioFileTypes,
},
AZURE_DI: {
...commonTypes,
"image/heic": [".heic"],
...audioFileTypes,
},
default: {
...commonTypes,
"application/msword": [".doc"],

View file

@ -19,7 +19,7 @@ export const AUTH_TYPE = process.env.NEXT_PUBLIC_FASTAPI_BACKEND_AUTH_TYPE || "G
// Placeholder: __NEXT_PUBLIC_FASTAPI_BACKEND_URL__
export const BACKEND_URL = process.env.NEXT_PUBLIC_FASTAPI_BACKEND_URL || "http://localhost:8000";
// ETL Service: "DOCLING" or "UNSTRUCTURED"
// ETL Service: "DOCLING", "UNSTRUCTURED", "LLAMACLOUD", or "AZURE_DI"
// Placeholder: __NEXT_PUBLIC_ETL_SERVICE__
export const ETL_SERVICE = process.env.NEXT_PUBLIC_ETL_SERVICE || "DOCLING";