diff --git a/surfsense_backend/app/etl_pipeline/parsers/vision_llm.py b/surfsense_backend/app/etl_pipeline/parsers/vision_llm.py
index fb12a1e75..bd39de71d 100644
--- a/surfsense_backend/app/etl_pipeline/parsers/vision_llm.py
+++ b/surfsense_backend/app/etl_pipeline/parsers/vision_llm.py
@@ -1,5 +1,6 @@
 import base64
 import mimetypes
+import os
 
 from langchain_core.messages import HumanMessage
 
@@ -9,8 +10,16 @@ _PROMPT = (
     "Be concise but complete — let the image content guide the level of detail."
 )
 
+_MAX_IMAGE_BYTES = 5 * 1024 * 1024  # 5 MB (Anthropic Claude's limit, the most restrictive)
+
 
 def _image_to_data_url(file_path: str) -> str:
+    file_size = os.path.getsize(file_path)
+    if file_size > _MAX_IMAGE_BYTES:
+        raise ValueError(
+            f"Image too large for vision LLM ({file_size / (1024 * 1024):.1f} MB, "
+            f"limit {_MAX_IMAGE_BYTES // (1024 * 1024)} MB): {file_path}"
+        )
     mime_type, _ = mimetypes.guess_type(file_path)
     if not mime_type or not mime_type.startswith("image/"):
         mime_type = "image/png"