Route uploaded images to vision LLM with document-parser fallback

This commit is contained in:
CREDO23 2026-04-09 14:33:33 +02:00
parent 78fa2d926a
commit 7e90a8ed3c
7 changed files with 199 additions and 5 deletions

View file

@ -549,8 +549,11 @@ def test_unsupported_extensions_classified_correctly(filename):
("doc.docx", "document"),
("slides.pptx", "document"),
("sheet.xlsx", "document"),
("photo.png", "document"),
("photo.jpg", "document"),
("photo.png", "image"),
("photo.jpg", "image"),
("photo.webp", "image"),
("photo.gif", "image"),
("photo.heic", "image"),
("book.epub", "document"),
("letter.odt", "document"),
("readme.md", "plaintext"),
@ -680,3 +683,57 @@ async def test_extract_eml_with_docling_raises_unsupported(tmp_path, mocker):
await EtlPipelineService().extract(
EtlRequest(file_path=str(eml_file), filename="mail.eml")
)
# ---------------------------------------------------------------------------
# Image extraction via vision LLM
# ---------------------------------------------------------------------------
async def test_extract_image_with_vision_llm(tmp_path):
"""An image file is analyzed by the vision LLM when provided."""
from unittest.mock import AsyncMock, MagicMock
img_file = tmp_path / "photo.png"
img_file.write_bytes(b"\x89PNG\r\n\x1a\n" + b"\x00" * 50)
fake_response = MagicMock()
fake_response.content = "# A photo of a sunset over the ocean"
fake_llm = AsyncMock()
fake_llm.ainvoke.return_value = fake_response
service = EtlPipelineService(vision_llm=fake_llm)
result = await service.extract(
EtlRequest(file_path=str(img_file), filename="photo.png")
)
assert result.markdown_content == "# A photo of a sunset over the ocean"
assert result.etl_service == "VISION_LLM"
assert result.content_type == "image"
fake_llm.ainvoke.assert_called_once()
async def test_extract_image_falls_back_to_document_without_vision_llm(
tmp_path, mocker
):
"""Without a vision LLM, image files fall back to the document parser."""
mocker.patch("app.config.config.ETL_SERVICE", "DOCLING")
fake_docling = mocker.AsyncMock()
fake_docling.process_document.return_value = {"content": "# OCR text from image"}
mocker.patch(
"app.services.docling_service.create_docling_service",
return_value=fake_docling,
)
img_file = tmp_path / "scan.png"
img_file.write_bytes(b"\x89PNG\r\n\x1a\n" + b"\x00" * 50)
service = EtlPipelineService()
result = await service.extract(
EtlRequest(file_path=str(img_file), filename="scan.png")
)
assert result.markdown_content == "# OCR text from image"
assert result.etl_service == "DOCLING"
assert result.content_type == "document"