diff --git a/surfsense_backend/app/etl_pipeline/etl_pipeline_service.py b/surfsense_backend/app/etl_pipeline/etl_pipeline_service.py
index fbd2e4e73..5f1495cdb 100644
--- a/surfsense_backend/app/etl_pipeline/etl_pipeline_service.py
+++ b/surfsense_backend/app/etl_pipeline/etl_pipeline_service.py
@@ -15,6 +15,9 @@ from app.etl_pipeline.parsers.plaintext import read_plaintext
 class EtlPipelineService:
     """Single pipeline for extracting markdown from files. All callers use this."""
 
+    def __init__(self, *, vision_llm=None):
+        self._vision_llm = vision_llm
+
     async def extract(self, request: EtlRequest) -> EtlResult:
         category = classify_file(request.filename)
 
@@ -47,6 +50,28 @@ class EtlPipelineService:
                 content_type="audio",
             )
 
+        if category == FileCategory.IMAGE:
+            return await self._extract_image(request)
+
+        return await self._extract_document(request)
+
+    async def _extract_image(self, request: EtlRequest) -> EtlResult:
+        if self._vision_llm:
+            from app.etl_pipeline.parsers.vision_llm import parse_with_vision_llm
+
+            content = await parse_with_vision_llm(
+                request.file_path, request.filename, self._vision_llm
+            )
+            return EtlResult(
+                markdown_content=content,
+                etl_service="VISION_LLM",
+                content_type="image",
+            )
+
+        logging.info(
+            "No vision LLM provided, falling back to document parser for %s",
+            request.filename,
+        )
         return await self._extract_document(request)
 
     async def _extract_document(self, request: EtlRequest) -> EtlResult:
diff --git a/surfsense_backend/app/etl_pipeline/file_classifier.py b/surfsense_backend/app/etl_pipeline/file_classifier.py
index 4e690bcdc..120369a27 100644
--- a/surfsense_backend/app/etl_pipeline/file_classifier.py
+++ b/surfsense_backend/app/etl_pipeline/file_classifier.py
@@ -3,6 +3,7 @@ from pathlib import PurePosixPath
 
 from app.utils.file_extensions import (
     DOCUMENT_EXTENSIONS,
+    IMAGE_EXTENSIONS,
     get_document_extensions_for_service,
 )
 
@@ -105,6 +106,7 @@ class FileCategory(Enum):
     PLAINTEXT = "plaintext"
     AUDIO = "audio"
     DIRECT_CONVERT = "direct_convert"
+    IMAGE = "image"
     UNSUPPORTED = "unsupported"
     DOCUMENT = "document"
 
@@ -117,6 +119,8 @@ def classify_file(filename: str) -> FileCategory:
         return FileCategory.AUDIO
     if suffix in DIRECT_CONVERT_EXTENSIONS:
         return FileCategory.DIRECT_CONVERT
+    if suffix in IMAGE_EXTENSIONS:
+        return FileCategory.IMAGE
     if suffix in DOCUMENT_EXTENSIONS:
         return FileCategory.DOCUMENT
     return FileCategory.UNSUPPORTED
@@ -126,12 +130,14 @@ def should_skip_for_service(filename: str, etl_service: str | None) -> bool:
     """Return True if *filename* cannot be processed by *etl_service*.
 
     Plaintext, audio, and direct-convert files are parser-agnostic and never
-    skipped.  Document files are checked against the per-parser extension set.
+    skipped.  Image and document files are checked against the per-parser
+    extension set (images fall back to the document parser when no vision LLM
+    is available, so the same service constraint applies).
     """
     category = classify_file(filename)
     if category == FileCategory.UNSUPPORTED:
         return True
-    if category == FileCategory.DOCUMENT:
+    if category in (FileCategory.DOCUMENT, FileCategory.IMAGE):
         suffix = PurePosixPath(filename).suffix.lower()
         return suffix not in get_document_extensions_for_service(etl_service)
     return False
diff --git a/surfsense_backend/app/etl_pipeline/parsers/vision_llm.py b/surfsense_backend/app/etl_pipeline/parsers/vision_llm.py
new file mode 100644
index 000000000..e75f81c4b
--- /dev/null
+++ b/surfsense_backend/app/etl_pipeline/parsers/vision_llm.py
@@ -0,0 +1,37 @@
+import base64
+import mimetypes
+
+from langchain_core.messages import HumanMessage
+
+_PROMPT = (
+    "Analyze this image thoroughly and produce a detailed markdown description.\n\n"
+    "Include:\n"
+    "- All visible text, transcribed verbatim\n"
+    "- Description of diagrams, charts, tables, or visual structures\n"
+    "- Key subjects, objects, or scenes depicted\n\n"
+    "Output only the markdown content, no preamble."
+)
+
+
+def _image_to_data_url(file_path: str) -> str:
+    mime_type, _ = mimetypes.guess_type(file_path)
+    if not mime_type or not mime_type.startswith("image/"):
+        mime_type = "image/png"
+    with open(file_path, "rb") as f:
+        encoded = base64.b64encode(f.read()).decode("ascii")
+    return f"data:{mime_type};base64,{encoded}"
+
+
+async def parse_with_vision_llm(file_path: str, filename: str, llm) -> str:
+    data_url = _image_to_data_url(file_path)
+    message = HumanMessage(
+        content=[
+            {"type": "text", "text": _PROMPT},
+            {"type": "image_url", "image_url": {"url": data_url}},
+        ]
+    )
+    response = await llm.ainvoke([message])
+    text = response.content if hasattr(response, "content") else str(response)
+    if not text or not text.strip():
+        raise ValueError(f"Vision LLM returned empty content for {filename}")
+    return text.strip()
diff --git a/surfsense_backend/app/tasks/document_processors/file_processors.py b/surfsense_backend/app/tasks/document_processors/file_processors.py
index c765dbd87..9992231e0 100644
--- a/surfsense_backend/app/tasks/document_processors/file_processors.py
+++ b/surfsense_backend/app/tasks/document_processors/file_processors.py
@@ -333,6 +333,7 @@ async def process_file_in_background(
 async def _extract_file_content(
     file_path: str,
     filename: str,
+    search_space_id: int,
     session: AsyncSession,
     user_id: str,
     task_logger: TaskLoggingService,
@@ -360,6 +361,7 @@ async def _extract_file_content(
             FileCategory.PLAINTEXT: "Reading file",
             FileCategory.DIRECT_CONVERT: "Converting file",
             FileCategory.AUDIO: "Transcribing audio",
+            FileCategory.IMAGE: "Analyzing image",
             FileCategory.UNSUPPORTED: "Unsupported file type",
             FileCategory.DOCUMENT: "Extracting content",
         }
@@ -383,7 +385,13 @@ async def _extract_file_content(
         estimated_pages = _estimate_pages_safe(page_limit_service, file_path)
         await page_limit_service.check_page_limit(user_id, estimated_pages)
 
-    result = await EtlPipelineService().extract(
+    vision_llm = None
+    if category == FileCategory.IMAGE:
+        from app.services.llm_service import get_vision_llm
+
+        vision_llm = await get_vision_llm(session, search_space_id)
+
+    result = await EtlPipelineService(vision_llm=vision_llm).extract(
         EtlRequest(
             file_path=file_path,
             filename=filename,
@@ -439,6 +447,7 @@ async def process_file_in_background_with_document(
         markdown_content, etl_service = await _extract_file_content(
             file_path,
             filename,
+            search_space_id,
             session,
             user_id,
             task_logger,
diff --git a/surfsense_backend/app/utils/file_extensions.py b/surfsense_backend/app/utils/file_extensions.py
index 16ac585b7..e8be1b83a 100644
--- a/surfsense_backend/app/utils/file_extensions.py
+++ b/surfsense_backend/app/utils/file_extensions.py
@@ -7,10 +7,33 @@ Extensions already covered by PLAINTEXT_EXTENSIONS, AUDIO_EXTENSIONS, or
 DIRECT_CONVERT_EXTENSIONS in file_classifier are NOT repeated here -- these
 sets are exclusively for the "document" ETL path (Docling / LlamaParse /
 Unstructured).
+
+Image extensions intentionally remain in the per-parser sets for fallback
+compatibility.  IMAGE_EXTENSIONS is used only for routing classification.
 """
 
 from pathlib import PurePosixPath
 
+# ---------------------------------------------------------------------------
+# Image extensions (used by file_classifier for routing to vision LLM)
+# ---------------------------------------------------------------------------
+
+IMAGE_EXTENSIONS: frozenset[str] = frozenset(
+    {
+        ".png",
+        ".jpg",
+        ".jpeg",
+        ".gif",
+        ".bmp",
+        ".tiff",
+        ".tif",
+        ".webp",
+        ".svg",
+        ".heic",
+        ".heif",
+    }
+)
+
 # ---------------------------------------------------------------------------
 # Per-parser document extension sets (from official documentation)
 # ---------------------------------------------------------------------------
diff --git a/surfsense_backend/tests/unit/etl_pipeline/test_etl_pipeline_service.py b/surfsense_backend/tests/unit/etl_pipeline/test_etl_pipeline_service.py
index 9608b011d..4e1d603a3 100644
--- a/surfsense_backend/tests/unit/etl_pipeline/test_etl_pipeline_service.py
+++ b/surfsense_backend/tests/unit/etl_pipeline/test_etl_pipeline_service.py
@@ -549,8 +549,11 @@ def test_unsupported_extensions_classified_correctly(filename):
         ("doc.docx", "document"),
         ("slides.pptx", "document"),
         ("sheet.xlsx", "document"),
-        ("photo.png", "document"),
-        ("photo.jpg", "document"),
+        ("photo.png", "image"),
+        ("photo.jpg", "image"),
+        ("photo.webp", "image"),
+        ("photo.gif", "image"),
+        ("photo.heic", "image"),
         ("book.epub", "document"),
         ("letter.odt", "document"),
         ("readme.md", "plaintext"),
@@ -680,3 +683,57 @@ async def test_extract_eml_with_docling_raises_unsupported(tmp_path, mocker):
         await EtlPipelineService().extract(
             EtlRequest(file_path=str(eml_file), filename="mail.eml")
         )
+
+
+# ---------------------------------------------------------------------------
+# Image extraction via vision LLM
+# ---------------------------------------------------------------------------
+
+
+async def test_extract_image_with_vision_llm(tmp_path):
+    """An image file is analyzed by the vision LLM when provided."""
+    from unittest.mock import AsyncMock, MagicMock
+
+    img_file = tmp_path / "photo.png"
+    img_file.write_bytes(b"\x89PNG\r\n\x1a\n" + b"\x00" * 50)
+
+    fake_response = MagicMock()
+    fake_response.content = "# A photo of a sunset over the ocean"
+    fake_llm = AsyncMock()
+    fake_llm.ainvoke.return_value = fake_response
+
+    service = EtlPipelineService(vision_llm=fake_llm)
+    result = await service.extract(
+        EtlRequest(file_path=str(img_file), filename="photo.png")
+    )
+
+    assert result.markdown_content == "# A photo of a sunset over the ocean"
+    assert result.etl_service == "VISION_LLM"
+    assert result.content_type == "image"
+    fake_llm.ainvoke.assert_called_once()
+
+
+async def test_extract_image_falls_back_to_document_without_vision_llm(
+    tmp_path, mocker
+):
+    """Without a vision LLM, image files fall back to the document parser."""
+    mocker.patch("app.config.config.ETL_SERVICE", "DOCLING")
+
+    fake_docling = mocker.AsyncMock()
+    fake_docling.process_document.return_value = {"content": "# OCR text from image"}
+    mocker.patch(
+        "app.services.docling_service.create_docling_service",
+        return_value=fake_docling,
+    )
+
+    img_file = tmp_path / "scan.png"
+    img_file.write_bytes(b"\x89PNG\r\n\x1a\n" + b"\x00" * 50)
+
+    service = EtlPipelineService()
+    result = await service.extract(
+        EtlRequest(file_path=str(img_file), filename="scan.png")
+    )
+
+    assert result.markdown_content == "# OCR text from image"
+    assert result.etl_service == "DOCLING"
+    assert result.content_type == "document"
diff --git a/surfsense_backend/tests/unit/utils/test_file_extensions.py b/surfsense_backend/tests/unit/utils/test_file_extensions.py
index 43dfef5f0..ccf5eb70f 100644
--- a/surfsense_backend/tests/unit/utils/test_file_extensions.py
+++ b/surfsense_backend/tests/unit/utils/test_file_extensions.py
@@ -154,3 +154,40 @@ def test_get_extensions_for_none_returns_union():
     )
 
     assert get_document_extensions_for_service(None) == DOCUMENT_EXTENSIONS
+
+
+# ---------------------------------------------------------------------------
+# IMAGE_EXTENSIONS
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.parametrize(
+    "ext",
+    [
+        ".png",
+        ".jpg",
+        ".jpeg",
+        ".gif",
+        ".bmp",
+        ".tiff",
+        ".tif",
+        ".webp",
+        ".svg",
+        ".heic",
+        ".heif",
+    ],
+)
+def test_image_extensions_contains_expected(ext):
+    from app.utils.file_extensions import IMAGE_EXTENSIONS
+
+    assert ext in IMAGE_EXTENSIONS
+
+
+def test_image_extensions_are_subset_of_document_extensions():
+    """Image extensions used for routing should also be in DOCUMENT_EXTENSIONS for fallback."""
+    from app.utils.file_extensions import DOCUMENT_EXTENSIONS, IMAGE_EXTENSIONS
+
+    missing = IMAGE_EXTENSIONS - DOCUMENT_EXTENSIONS
+    assert not missing, (
+        f"Image extensions missing from document sets (breaks fallback): {missing}"
+    )