Route uploaded images to vision LLM with document-parser fallback

This commit is contained in:
CREDO23 2026-04-09 14:33:33 +02:00
parent 78fa2d926a
commit 7e90a8ed3c
7 changed files with 199 additions and 5 deletions

View file

@ -549,8 +549,11 @@ def test_unsupported_extensions_classified_correctly(filename):
("doc.docx", "document"),
("slides.pptx", "document"),
("sheet.xlsx", "document"),
("photo.png", "document"),
("photo.jpg", "document"),
("photo.png", "image"),
("photo.jpg", "image"),
("photo.webp", "image"),
("photo.gif", "image"),
("photo.heic", "image"),
("book.epub", "document"),
("letter.odt", "document"),
("readme.md", "plaintext"),
@ -680,3 +683,57 @@ async def test_extract_eml_with_docling_raises_unsupported(tmp_path, mocker):
await EtlPipelineService().extract(
EtlRequest(file_path=str(eml_file), filename="mail.eml")
)
# ---------------------------------------------------------------------------
# Image extraction via vision LLM
# ---------------------------------------------------------------------------
async def test_extract_image_with_vision_llm(tmp_path):
"""An image file is analyzed by the vision LLM when provided."""
from unittest.mock import AsyncMock, MagicMock
img_file = tmp_path / "photo.png"
img_file.write_bytes(b"\x89PNG\r\n\x1a\n" + b"\x00" * 50)
fake_response = MagicMock()
fake_response.content = "# A photo of a sunset over the ocean"
fake_llm = AsyncMock()
fake_llm.ainvoke.return_value = fake_response
service = EtlPipelineService(vision_llm=fake_llm)
result = await service.extract(
EtlRequest(file_path=str(img_file), filename="photo.png")
)
assert result.markdown_content == "# A photo of a sunset over the ocean"
assert result.etl_service == "VISION_LLM"
assert result.content_type == "image"
fake_llm.ainvoke.assert_called_once()
async def test_extract_image_falls_back_to_document_without_vision_llm(
tmp_path, mocker
):
"""Without a vision LLM, image files fall back to the document parser."""
mocker.patch("app.config.config.ETL_SERVICE", "DOCLING")
fake_docling = mocker.AsyncMock()
fake_docling.process_document.return_value = {"content": "# OCR text from image"}
mocker.patch(
"app.services.docling_service.create_docling_service",
return_value=fake_docling,
)
img_file = tmp_path / "scan.png"
img_file.write_bytes(b"\x89PNG\r\n\x1a\n" + b"\x00" * 50)
service = EtlPipelineService()
result = await service.extract(
EtlRequest(file_path=str(img_file), filename="scan.png")
)
assert result.markdown_content == "# OCR text from image"
assert result.etl_service == "DOCLING"
assert result.content_type == "document"

View file

@ -154,3 +154,40 @@ def test_get_extensions_for_none_returns_union():
)
assert get_document_extensions_for_service(None) == DOCUMENT_EXTENSIONS
# ---------------------------------------------------------------------------
# IMAGE_EXTENSIONS
# ---------------------------------------------------------------------------
@pytest.mark.parametrize(
"ext",
[
".png",
".jpg",
".jpeg",
".gif",
".bmp",
".tiff",
".tif",
".webp",
".svg",
".heic",
".heif",
],
)
def test_image_extensions_contains_expected(ext):
from app.utils.file_extensions import IMAGE_EXTENSIONS
assert ext in IMAGE_EXTENSIONS
def test_image_extensions_are_subset_of_document_extensions():
"""Image extensions used for routing should also be in DOCUMENT_EXTENSIONS for fallback."""
from app.utils.file_extensions import DOCUMENT_EXTENSIONS, IMAGE_EXTENSIONS
missing = IMAGE_EXTENSIONS - DOCUMENT_EXTENSIONS
assert not missing, (
f"Image extensions missing from document sets (breaks fallback): {missing}"
)