mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-04-27 01:36:30 +02:00
Route uploaded images to vision LLM with document-parser fallback
This commit is contained in:
parent
78fa2d926a
commit
7e90a8ed3c
7 changed files with 199 additions and 5 deletions
|
|
@ -7,10 +7,33 @@ Extensions already covered by PLAINTEXT_EXTENSIONS, AUDIO_EXTENSIONS, or
|
|||
DIRECT_CONVERT_EXTENSIONS in file_classifier are NOT repeated here -- these
|
||||
sets are exclusively for the "document" ETL path (Docling / LlamaParse /
|
||||
Unstructured).
|
||||
|
||||
Image extensions intentionally remain in the per-parser sets for fallback
|
||||
compatibility. IMAGE_EXTENSIONS is used only for routing classification.
|
||||
"""
|
||||
|
||||
from pathlib import PurePosixPath
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Image extensions (used by file_classifier for routing to vision LLM)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
IMAGE_EXTENSIONS: frozenset[str] = frozenset(
|
||||
{
|
||||
".png",
|
||||
".jpg",
|
||||
".jpeg",
|
||||
".gif",
|
||||
".bmp",
|
||||
".tiff",
|
||||
".tif",
|
||||
".webp",
|
||||
".svg",
|
||||
".heic",
|
||||
".heif",
|
||||
}
|
||||
)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Per-parser document extension sets (from official documentation)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue