mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-04-25 16:56:22 +02:00
Route uploaded images to vision LLM with document-parser fallback
This commit is contained in:
parent
78fa2d926a
commit
7e90a8ed3c
7 changed files with 199 additions and 5 deletions
|
|
@ -549,8 +549,11 @@ def test_unsupported_extensions_classified_correctly(filename):
|
|||
("doc.docx", "document"),
|
||||
("slides.pptx", "document"),
|
||||
("sheet.xlsx", "document"),
|
||||
("photo.png", "document"),
|
||||
("photo.jpg", "document"),
|
||||
("photo.png", "image"),
|
||||
("photo.jpg", "image"),
|
||||
("photo.webp", "image"),
|
||||
("photo.gif", "image"),
|
||||
("photo.heic", "image"),
|
||||
("book.epub", "document"),
|
||||
("letter.odt", "document"),
|
||||
("readme.md", "plaintext"),
|
||||
|
|
@ -680,3 +683,57 @@ async def test_extract_eml_with_docling_raises_unsupported(tmp_path, mocker):
|
|||
await EtlPipelineService().extract(
|
||||
EtlRequest(file_path=str(eml_file), filename="mail.eml")
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Image extraction via vision LLM
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
async def test_extract_image_with_vision_llm(tmp_path):
|
||||
"""An image file is analyzed by the vision LLM when provided."""
|
||||
from unittest.mock import AsyncMock, MagicMock
|
||||
|
||||
img_file = tmp_path / "photo.png"
|
||||
img_file.write_bytes(b"\x89PNG\r\n\x1a\n" + b"\x00" * 50)
|
||||
|
||||
fake_response = MagicMock()
|
||||
fake_response.content = "# A photo of a sunset over the ocean"
|
||||
fake_llm = AsyncMock()
|
||||
fake_llm.ainvoke.return_value = fake_response
|
||||
|
||||
service = EtlPipelineService(vision_llm=fake_llm)
|
||||
result = await service.extract(
|
||||
EtlRequest(file_path=str(img_file), filename="photo.png")
|
||||
)
|
||||
|
||||
assert result.markdown_content == "# A photo of a sunset over the ocean"
|
||||
assert result.etl_service == "VISION_LLM"
|
||||
assert result.content_type == "image"
|
||||
fake_llm.ainvoke.assert_called_once()
|
||||
|
||||
|
||||
async def test_extract_image_falls_back_to_document_without_vision_llm(
|
||||
tmp_path, mocker
|
||||
):
|
||||
"""Without a vision LLM, image files fall back to the document parser."""
|
||||
mocker.patch("app.config.config.ETL_SERVICE", "DOCLING")
|
||||
|
||||
fake_docling = mocker.AsyncMock()
|
||||
fake_docling.process_document.return_value = {"content": "# OCR text from image"}
|
||||
mocker.patch(
|
||||
"app.services.docling_service.create_docling_service",
|
||||
return_value=fake_docling,
|
||||
)
|
||||
|
||||
img_file = tmp_path / "scan.png"
|
||||
img_file.write_bytes(b"\x89PNG\r\n\x1a\n" + b"\x00" * 50)
|
||||
|
||||
service = EtlPipelineService()
|
||||
result = await service.extract(
|
||||
EtlRequest(file_path=str(img_file), filename="scan.png")
|
||||
)
|
||||
|
||||
assert result.markdown_content == "# OCR text from image"
|
||||
assert result.etl_service == "DOCLING"
|
||||
assert result.content_type == "document"
|
||||
|
|
|
|||
|
|
@ -154,3 +154,40 @@ def test_get_extensions_for_none_returns_union():
|
|||
)
|
||||
|
||||
assert get_document_extensions_for_service(None) == DOCUMENT_EXTENSIONS
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# IMAGE_EXTENSIONS
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"ext",
|
||||
[
|
||||
".png",
|
||||
".jpg",
|
||||
".jpeg",
|
||||
".gif",
|
||||
".bmp",
|
||||
".tiff",
|
||||
".tif",
|
||||
".webp",
|
||||
".svg",
|
||||
".heic",
|
||||
".heif",
|
||||
],
|
||||
)
|
||||
def test_image_extensions_contains_expected(ext):
|
||||
from app.utils.file_extensions import IMAGE_EXTENSIONS
|
||||
|
||||
assert ext in IMAGE_EXTENSIONS
|
||||
|
||||
|
||||
def test_image_extensions_are_subset_of_document_extensions():
|
||||
"""Image extensions used for routing should also be in DOCUMENT_EXTENSIONS for fallback."""
|
||||
from app.utils.file_extensions import DOCUMENT_EXTENSIONS, IMAGE_EXTENSIONS
|
||||
|
||||
missing = IMAGE_EXTENSIONS - DOCUMENT_EXTENSIONS
|
||||
assert not missing, (
|
||||
f"Image extensions missing from document sets (breaks fallback): {missing}"
|
||||
)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue