diff --git a/surfsense_backend/app/connectors/dropbox/file_types.py b/surfsense_backend/app/connectors/dropbox/file_types.py
index 7b72c1857..13209ffd2 100644
--- a/surfsense_backend/app/connectors/dropbox/file_types.py
+++ b/surfsense_backend/app/connectors/dropbox/file_types.py
@@ -1,6 +1,6 @@
 """File type handlers for Dropbox."""
 
-from app.etl_pipeline.file_classifier import FileCategory, classify_file
+from app.etl_pipeline.file_classifier import should_skip_for_service
 
 PAPER_EXTENSION = ".paper"
 
@@ -53,5 +53,7 @@ def should_skip_file(item: dict) -> bool:
         return False
     if not item.get("is_downloadable", True):
         return True
+    from app.config import config as app_config
+
     name = item.get("name", "")
-    return classify_file(name) == FileCategory.UNSUPPORTED
+    return should_skip_for_service(name, app_config.ETL_SERVICE)
diff --git a/surfsense_backend/app/connectors/google_drive/file_types.py b/surfsense_backend/app/connectors/google_drive/file_types.py
index e0b8f001e..73f016ceb 100644
--- a/surfsense_backend/app/connectors/google_drive/file_types.py
+++ b/surfsense_backend/app/connectors/google_drive/file_types.py
@@ -1,6 +1,6 @@
 """File type handlers for Google Drive."""
 
-from app.etl_pipeline.file_classifier import FileCategory, classify_file
+from app.etl_pipeline.file_classifier import should_skip_for_service
 
 GOOGLE_DOC = "application/vnd.google-apps.document"
 GOOGLE_SHEET = "application/vnd.google-apps.spreadsheet"
@@ -49,8 +49,10 @@ def should_skip_file(mime_type: str) -> bool:
 
 
 def should_skip_by_extension(filename: str) -> bool:
-    """Return True if the file extension is not parseable by any ETL pipeline."""
-    return classify_file(filename) == FileCategory.UNSUPPORTED
+    """Return True if the file extension is not parseable by the configured ETL service."""
+    from app.config import config as app_config
+
+    return should_skip_for_service(filename, app_config.ETL_SERVICE)
 
 
 def get_export_mime_type(mime_type: str) -> str | None:
diff --git a/surfsense_backend/app/connectors/onedrive/file_types.py b/surfsense_backend/app/connectors/onedrive/file_types.py
index bcd78b711..f9c147da8 100644
--- a/surfsense_backend/app/connectors/onedrive/file_types.py
+++ b/surfsense_backend/app/connectors/onedrive/file_types.py
@@ -1,6 +1,6 @@
 """File type handlers for Microsoft OneDrive."""
 
-from app.etl_pipeline.file_classifier import FileCategory, classify_file
+from app.etl_pipeline.file_classifier import should_skip_for_service
 
 ONEDRIVE_FOLDER_FACET = "folder"
 ONENOTE_MIME = "application/msonenote"
@@ -51,5 +51,7 @@ def should_skip_file(item: dict) -> bool:
     mime = item.get("file", {}).get("mimeType", "")
     if mime in SKIP_MIME_TYPES:
         return True
+    from app.config import config as app_config
+
     name = item.get("name", "")
-    return classify_file(name) == FileCategory.UNSUPPORTED
+    return should_skip_for_service(name, app_config.ETL_SERVICE)
diff --git a/surfsense_backend/app/etl_pipeline/etl_pipeline_service.py b/surfsense_backend/app/etl_pipeline/etl_pipeline_service.py
index 7c67d2345..a0041c843 100644
--- a/surfsense_backend/app/etl_pipeline/etl_pipeline_service.py
+++ b/surfsense_backend/app/etl_pipeline/etl_pipeline_service.py
@@ -45,6 +45,10 @@ class EtlPipelineService:
         return await self._extract_document(request)
 
     async def _extract_document(self, request: EtlRequest) -> EtlResult:
+        from pathlib import PurePosixPath
+
+        from app.utils.file_extensions import get_document_extensions_for_service
+
         etl_service = app_config.ETL_SERVICE
         if not etl_service:
             raise EtlServiceUnavailableError(
@@ -52,6 +56,13 @@ class EtlPipelineService:
                 "Set ETL_SERVICE to UNSTRUCTURED, LLAMACLOUD, or DOCLING in your .env"
             )
 
+        ext = PurePosixPath(request.filename).suffix.lower()
+        supported = get_document_extensions_for_service(etl_service)
+        if ext not in supported:
+            raise EtlUnsupportedFileError(
+                f"File type {ext} is not supported by {etl_service}"
+            )
+
         if etl_service == "DOCLING":
             from app.etl_pipeline.parsers.docling import parse_with_docling
 
diff --git a/surfsense_backend/app/etl_pipeline/file_classifier.py b/surfsense_backend/app/etl_pipeline/file_classifier.py
index eea9cce22..bc7b4537c 100644
--- a/surfsense_backend/app/etl_pipeline/file_classifier.py
+++ b/surfsense_backend/app/etl_pipeline/file_classifier.py
@@ -1,7 +1,7 @@
 from enum import Enum
 from pathlib import PurePosixPath
 
-from app.utils.file_extensions import DOCUMENT_EXTENSIONS
+from app.utils.file_extensions import DOCUMENT_EXTENSIONS, get_document_extensions_for_service
 
 PLAINTEXT_EXTENSIONS = frozenset(
     {
@@ -29,7 +29,7 @@ AUDIO_EXTENSIONS = frozenset(
     {".mp3", ".mp4", ".mpeg", ".mpga", ".m4a", ".wav", ".webm"}
 )
 
-DIRECT_CONVERT_EXTENSIONS = frozenset({".csv", ".tsv", ".html", ".htm"})
+DIRECT_CONVERT_EXTENSIONS = frozenset({".csv", ".tsv", ".html", ".htm", ".xhtml"})
 
 
 class FileCategory(Enum):
@@ -51,3 +51,18 @@ def classify_file(filename: str) -> FileCategory:
     if suffix in DOCUMENT_EXTENSIONS:
         return FileCategory.DOCUMENT
     return FileCategory.UNSUPPORTED
+
+
+def should_skip_for_service(filename: str, etl_service: str | None) -> bool:
+    """Return True if *filename* cannot be processed by *etl_service*.
+
+    Plaintext, audio, and direct-convert files are parser-agnostic and never
+    skipped.  Document files are checked against the per-parser extension set.
+    """
+    category = classify_file(filename)
+    if category == FileCategory.UNSUPPORTED:
+        return True
+    if category == FileCategory.DOCUMENT:
+        suffix = PurePosixPath(filename).suffix.lower()
+        return suffix not in get_document_extensions_for_service(etl_service)
+    return False
diff --git a/surfsense_backend/app/utils/file_extensions.py b/surfsense_backend/app/utils/file_extensions.py
index b0a4c808c..5eed36872 100644
--- a/surfsense_backend/app/utils/file_extensions.py
+++ b/surfsense_backend/app/utils/file_extensions.py
@@ -1,29 +1,69 @@
-"""Allowlist of document extensions the ETL parsers can handle.
+"""Per-parser document extension sets for the ETL pipeline.
 
-Every consumer (file_classifier, connector-level skip checks) imports from
-here so there is a single source of truth.  Extensions already covered by
-PLAINTEXT_EXTENSIONS, AUDIO_EXTENSIONS, or DIRECT_CONVERT_EXTENSIONS in
-file_classifier are NOT repeated here -- this set is exclusively for the
-"document" ETL path (Docling / LlamaParse / Unstructured).
+Every consumer (file_classifier, connector-level skip checks, ETL pipeline
+validation) imports from here so there is a single source of truth.
+
+Extensions already covered by PLAINTEXT_EXTENSIONS, AUDIO_EXTENSIONS, or
+DIRECT_CONVERT_EXTENSIONS in file_classifier are NOT repeated here -- these
+sets are exclusively for the "document" ETL path (Docling / LlamaParse /
+Unstructured).
 """
 
 from pathlib import PurePosixPath
 
-DOCUMENT_EXTENSIONS: frozenset[str] = frozenset({
-    # PDF
+# ---------------------------------------------------------------------------
+# Per-parser document extension sets (from official documentation)
+# ---------------------------------------------------------------------------
+
+DOCLING_DOCUMENT_EXTENSIONS: frozenset[str] = frozenset({
     ".pdf",
-    # Microsoft Office
-    ".docx", ".doc", ".xlsx", ".xls", ".pptx", ".ppt",
-    # Images (raster: OCR / vision parsing)
-    ".png", ".jpg", ".jpeg", ".bmp", ".tiff", ".tif",
-    # Rich text / e-book
-    ".rtf", ".epub",
-    # OpenDocument
-    ".odt", ".ods", ".odp",
-    # Other (LlamaParse / Unstructured specific)
-    ".hwpx",
+    ".docx", ".xlsx", ".pptx",
+    ".png", ".jpg", ".jpeg", ".tiff", ".tif", ".bmp", ".webp",
 })
 
+LLAMAPARSE_DOCUMENT_EXTENSIONS: frozenset[str] = frozenset({
+    ".pdf",
+    ".docx", ".doc", ".xlsx", ".xls", ".pptx", ".ppt",
+    ".docm", ".dot", ".dotm", ".pptm", ".pot", ".potx",
+    ".xlsm", ".xlsb", ".xlw",
+    ".rtf", ".epub",
+    ".png", ".jpg", ".jpeg", ".gif", ".bmp", ".tiff", ".tif", ".webp", ".svg",
+    ".odt", ".ods", ".odp",
+    ".hwp", ".hwpx",
+})
+
+UNSTRUCTURED_DOCUMENT_EXTENSIONS: frozenset[str] = frozenset({
+    ".pdf",
+    ".docx", ".doc", ".xlsx", ".xls", ".pptx", ".ppt",
+    ".png", ".jpg", ".jpeg", ".bmp", ".tiff", ".tif", ".heic",
+    ".rtf", ".epub", ".odt",
+    ".eml", ".msg", ".p7s",
+})
+
+# ---------------------------------------------------------------------------
+# Union (used by classify_file for routing) + service lookup
+# ---------------------------------------------------------------------------
+
+DOCUMENT_EXTENSIONS: frozenset[str] = (
+    DOCLING_DOCUMENT_EXTENSIONS
+    | LLAMAPARSE_DOCUMENT_EXTENSIONS
+    | UNSTRUCTURED_DOCUMENT_EXTENSIONS
+)
+
+_SERVICE_MAP: dict[str, frozenset[str]] = {
+    "DOCLING": DOCLING_DOCUMENT_EXTENSIONS,
+    "LLAMACLOUD": LLAMAPARSE_DOCUMENT_EXTENSIONS,
+    "UNSTRUCTURED": UNSTRUCTURED_DOCUMENT_EXTENSIONS,
+}
+
+
+def get_document_extensions_for_service(etl_service: str | None) -> frozenset[str]:
+    """Return the document extensions supported by *etl_service*.
+
+    Falls back to the full union when the service is ``None`` or unknown.
+    """
+    return _SERVICE_MAP.get(etl_service or "", DOCUMENT_EXTENSIONS)
+
 
 def is_supported_document_extension(filename: str) -> bool:
     """Return True if the file's extension is in the supported document set."""
diff --git a/surfsense_backend/tests/unit/connector_indexers/test_dropbox_parallel.py b/surfsense_backend/tests/unit/connector_indexers/test_dropbox_parallel.py
index 7a828b9c4..8572fa8ea 100644
--- a/surfsense_backend/tests/unit/connector_indexers/test_dropbox_parallel.py
+++ b/surfsense_backend/tests/unit/connector_indexers/test_dropbox_parallel.py
@@ -261,6 +261,8 @@ def full_scan_mocks(mock_dropbox_client, monkeypatch):
 
     skip_results: dict[str, tuple[bool, str | None]] = {}
 
+    monkeypatch.setattr("app.config.config.ETL_SERVICE", "LLAMACLOUD")
+
     async def _fake_skip(session, file, search_space_id):
         from app.connectors.dropbox.file_types import should_skip_file as _skip
         if _skip(file):
diff --git a/surfsense_backend/tests/unit/connectors/test_dropbox_file_types.py b/surfsense_backend/tests/unit/connectors/test_dropbox_file_types.py
index 5480d8c8a..e092872c5 100644
--- a/surfsense_backend/tests/unit/connectors/test_dropbox_file_types.py
+++ b/surfsense_backend/tests/unit/connectors/test_dropbox_file_types.py
@@ -7,6 +7,11 @@ from app.connectors.dropbox.file_types import should_skip_file
 pytestmark = pytest.mark.unit
 
 
+# ---------------------------------------------------------------------------
+# Structural skips (independent of ETL service)
+# ---------------------------------------------------------------------------
+
+
 def test_folder_item_is_skipped():
     item = {".tag": "folder", "name": "My Folder"}
     assert should_skip_file(item) is True
@@ -22,13 +27,18 @@ def test_non_downloadable_item_is_skipped():
     assert should_skip_file(item) is True
 
 
+# ---------------------------------------------------------------------------
+# Extension-based skips (require ETL service context)
+# ---------------------------------------------------------------------------
+
+
 @pytest.mark.parametrize(
     "filename",
     [
         "archive.zip", "backup.tar", "data.gz", "stuff.rar", "pack.7z",
         "program.exe", "lib.dll", "module.so", "image.dmg", "disk.iso",
         "movie.mov", "clip.avi", "video.mkv", "film.wmv", "stream.flv",
-        "icon.svg", "anim.gif", "photo.webp", "shot.heic", "favicon.ico",
+        "favicon.ico",
         "raw.cr2", "photo.nef", "image.arw", "pic.dng",
         "design.psd", "vector.ai", "mockup.sketch", "proto.fig",
         "font.ttf", "font.otf", "font.woff", "font.woff2",
@@ -36,7 +46,8 @@ def test_non_downloadable_item_is_skipped():
         "local.db", "data.sqlite", "access.mdb",
     ],
 )
-def test_non_parseable_extensions_are_skipped(filename):
+def test_non_parseable_extensions_are_skipped(filename, mocker):
+    mocker.patch("app.config.config.ETL_SERVICE", "DOCLING")
     item = {".tag": "file", "name": filename}
     assert should_skip_file(item) is True, f"{filename} should be skipped"
 
@@ -45,29 +56,61 @@ def test_non_parseable_extensions_are_skipped(filename):
     "filename",
     [
         "report.pdf", "document.docx", "sheet.xlsx", "slides.pptx",
-        "old.doc", "legacy.xls", "deck.ppt",
         "readme.txt", "data.csv", "page.html", "notes.md",
         "config.json", "feed.xml",
     ],
 )
-def test_parseable_documents_are_not_skipped(filename):
-    item = {".tag": "file", "name": filename}
-    assert should_skip_file(item) is False, f"{filename} should NOT be skipped"
+def test_parseable_documents_are_not_skipped(filename, mocker):
+    """Files in plaintext/direct_convert/universal document sets are never skipped."""
+    for service in ("DOCLING", "LLAMACLOUD", "UNSTRUCTURED"):
+        mocker.patch("app.config.config.ETL_SERVICE", service)
+        item = {".tag": "file", "name": filename}
+        assert should_skip_file(item) is False, (
+            f"{filename} should NOT be skipped with {service}"
+        )
 
 
 @pytest.mark.parametrize(
     "filename",
     ["photo.jpg", "image.jpeg", "screenshot.png", "scan.bmp", "page.tiff", "doc.tif"],
 )
-def test_universal_images_are_not_skipped(filename):
-    item = {".tag": "file", "name": filename}
-    assert should_skip_file(item) is False, f"{filename} should NOT be skipped"
+def test_universal_images_are_not_skipped(filename, mocker):
+    """Images supported by all parsers are never skipped."""
+    for service in ("DOCLING", "LLAMACLOUD", "UNSTRUCTURED"):
+        mocker.patch("app.config.config.ETL_SERVICE", service)
+        item = {".tag": "file", "name": filename}
+        assert should_skip_file(item) is False, (
+            f"{filename} should NOT be skipped with {service}"
+        )
 
 
-@pytest.mark.parametrize(
-    "filename",
-    ["icon.svg", "anim.gif", "photo.webp", "live.heic"],
-)
-def test_non_universal_images_are_skipped(filename):
+@pytest.mark.parametrize("filename,service,expected_skip", [
+    ("old.doc", "DOCLING", True),
+    ("old.doc", "LLAMACLOUD", False),
+    ("old.doc", "UNSTRUCTURED", False),
+    ("legacy.xls", "DOCLING", True),
+    ("legacy.xls", "LLAMACLOUD", False),
+    ("legacy.xls", "UNSTRUCTURED", False),
+    ("deck.ppt", "DOCLING", True),
+    ("deck.ppt", "LLAMACLOUD", False),
+    ("deck.ppt", "UNSTRUCTURED", False),
+    ("icon.svg", "DOCLING", True),
+    ("icon.svg", "LLAMACLOUD", False),
+    ("anim.gif", "DOCLING", True),
+    ("anim.gif", "LLAMACLOUD", False),
+    ("photo.webp", "DOCLING", False),
+    ("photo.webp", "LLAMACLOUD", False),
+    ("photo.webp", "UNSTRUCTURED", True),
+    ("live.heic", "DOCLING", True),
+    ("live.heic", "UNSTRUCTURED", False),
+    ("macro.docm", "DOCLING", True),
+    ("macro.docm", "LLAMACLOUD", False),
+    ("mail.eml", "DOCLING", True),
+    ("mail.eml", "UNSTRUCTURED", False),
+])
+def test_parser_specific_extensions(filename, service, expected_skip, mocker):
+    mocker.patch("app.config.config.ETL_SERVICE", service)
     item = {".tag": "file", "name": filename}
-    assert should_skip_file(item) is True, f"{filename} should be skipped"
+    assert should_skip_file(item) is expected_skip, (
+        f"{filename} with {service}: expected skip={expected_skip}"
+    )
diff --git a/surfsense_backend/tests/unit/connectors/test_google_drive_file_types.py b/surfsense_backend/tests/unit/connectors/test_google_drive_file_types.py
index adbad74c2..4ed7eb4db 100644
--- a/surfsense_backend/tests/unit/connectors/test_google_drive_file_types.py
+++ b/surfsense_backend/tests/unit/connectors/test_google_drive_file_types.py
@@ -10,13 +10,38 @@ pytestmark = pytest.mark.unit
 @pytest.mark.parametrize("filename", [
     "malware.exe", "archive.zip", "video.mov", "font.woff2", "model.blend",
 ])
-def test_unsupported_extensions_are_skipped(filename):
-    assert should_skip_by_extension(filename) is True
+def test_unsupported_extensions_are_skipped_regardless_of_service(filename, mocker):
+    """Truly unsupported files are skipped no matter which ETL service is configured."""
+    for service in ("DOCLING", "LLAMACLOUD", "UNSTRUCTURED"):
+        mocker.patch("app.config.config.ETL_SERVICE", service)
+        assert should_skip_by_extension(filename) is True
 
 
 @pytest.mark.parametrize("filename", [
     "report.pdf", "doc.docx", "sheet.xlsx", "slides.pptx",
     "readme.txt", "data.csv", "photo.png", "notes.md",
 ])
-def test_parseable_extensions_are_not_skipped(filename):
-    assert should_skip_by_extension(filename) is False
+def test_universal_extensions_are_not_skipped(filename, mocker):
+    """Files supported by all parsers (or handled by plaintext/direct_convert) are never skipped."""
+    for service in ("DOCLING", "LLAMACLOUD", "UNSTRUCTURED"):
+        mocker.patch("app.config.config.ETL_SERVICE", service)
+        assert should_skip_by_extension(filename) is False, (
+            f"{filename} should NOT be skipped with {service}"
+        )
+
+
+@pytest.mark.parametrize("filename,service,expected_skip", [
+    ("macro.docm", "DOCLING", True),
+    ("macro.docm", "LLAMACLOUD", False),
+    ("mail.eml", "DOCLING", True),
+    ("mail.eml", "UNSTRUCTURED", False),
+    ("photo.gif", "DOCLING", True),
+    ("photo.gif", "LLAMACLOUD", False),
+    ("photo.heic", "UNSTRUCTURED", False),
+    ("photo.heic", "DOCLING", True),
+])
+def test_parser_specific_extensions(filename, service, expected_skip, mocker):
+    mocker.patch("app.config.config.ETL_SERVICE", service)
+    assert should_skip_by_extension(filename) is expected_skip, (
+        f"{filename} with {service}: expected skip={expected_skip}"
+    )
diff --git a/surfsense_backend/tests/unit/connectors/test_onedrive_file_types.py b/surfsense_backend/tests/unit/connectors/test_onedrive_file_types.py
index a2491257d..e73f799e2 100644
--- a/surfsense_backend/tests/unit/connectors/test_onedrive_file_types.py
+++ b/surfsense_backend/tests/unit/connectors/test_onedrive_file_types.py
@@ -7,6 +7,11 @@ from app.connectors.onedrive.file_types import should_skip_file
 pytestmark = pytest.mark.unit
 
 
+# ---------------------------------------------------------------------------
+# Structural skips (independent of ETL service)
+# ---------------------------------------------------------------------------
+
+
 def test_folder_is_skipped():
     item = {"folder": {}, "name": "My Folder"}
     assert should_skip_file(item) is True
@@ -27,10 +32,16 @@ def test_onenote_is_skipped():
     assert should_skip_file(item) is True
 
 
+# ---------------------------------------------------------------------------
+# Extension-based skips (require ETL service context)
+# ---------------------------------------------------------------------------
+
+
 @pytest.mark.parametrize("filename", [
     "malware.exe", "archive.zip", "video.mov", "font.woff2", "model.blend",
 ])
-def test_unsupported_extensions_are_skipped(filename):
+def test_unsupported_extensions_are_skipped(filename, mocker):
+    mocker.patch("app.config.config.ETL_SERVICE", "DOCLING")
     item = {"name": filename, "file": {"mimeType": "application/octet-stream"}}
     assert should_skip_file(item) is True, f"{filename} should be skipped"
 
@@ -39,6 +50,26 @@ def test_unsupported_extensions_are_skipped(filename):
     "report.pdf", "doc.docx", "sheet.xlsx", "slides.pptx",
     "readme.txt", "data.csv", "photo.png", "notes.md",
 ])
-def test_parseable_files_are_not_skipped(filename):
+def test_universal_files_are_not_skipped(filename, mocker):
+    for service in ("DOCLING", "LLAMACLOUD", "UNSTRUCTURED"):
+        mocker.patch("app.config.config.ETL_SERVICE", service)
+        item = {"name": filename, "file": {"mimeType": "application/octet-stream"}}
+        assert should_skip_file(item) is False, (
+            f"{filename} should NOT be skipped with {service}"
+        )
+
+
+@pytest.mark.parametrize("filename,service,expected_skip", [
+    ("macro.docm", "DOCLING", True),
+    ("macro.docm", "LLAMACLOUD", False),
+    ("mail.eml", "DOCLING", True),
+    ("mail.eml", "UNSTRUCTURED", False),
+    ("photo.heic", "UNSTRUCTURED", False),
+    ("photo.heic", "DOCLING", True),
+])
+def test_parser_specific_extensions(filename, service, expected_skip, mocker):
+    mocker.patch("app.config.config.ETL_SERVICE", service)
     item = {"name": filename, "file": {"mimeType": "application/octet-stream"}}
-    assert should_skip_file(item) is False, f"{filename} should NOT be skipped"
+    assert should_skip_file(item) is expected_skip, (
+        f"{filename} with {service}: expected skip={expected_skip}"
+    )
diff --git a/surfsense_backend/tests/unit/etl_pipeline/test_etl_pipeline_service.py b/surfsense_backend/tests/unit/etl_pipeline/test_etl_pipeline_service.py
index facf15eab..e90847e3a 100644
--- a/surfsense_backend/tests/unit/etl_pipeline/test_etl_pipeline_service.py
+++ b/surfsense_backend/tests/unit/etl_pipeline/test_etl_pipeline_service.py
@@ -377,3 +377,72 @@ async def test_extract_zip_raises_unsupported_error(tmp_path):
         await EtlPipelineService().extract(
             EtlRequest(file_path=str(zip_file), filename="archive.zip")
         )
+
+
+# ---------------------------------------------------------------------------
+# Slice 14 – should_skip_for_service (per-parser document filtering)
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.parametrize("filename,etl_service,expected_skip", [
+    ("file.eml", "DOCLING", True),
+    ("file.eml", "UNSTRUCTURED", False),
+    ("file.docm", "LLAMACLOUD", False),
+    ("file.docm", "DOCLING", True),
+    ("file.txt", "DOCLING", False),
+    ("file.csv", "LLAMACLOUD", False),
+    ("file.mp3", "UNSTRUCTURED", False),
+    ("file.exe", "LLAMACLOUD", True),
+    ("file.pdf", "DOCLING", False),
+    ("file.webp", "DOCLING", False),
+    ("file.webp", "UNSTRUCTURED", True),
+    ("file.gif", "LLAMACLOUD", False),
+    ("file.gif", "DOCLING", True),
+    ("file.heic", "UNSTRUCTURED", False),
+    ("file.heic", "DOCLING", True),
+    ("file.svg", "LLAMACLOUD", False),
+    ("file.svg", "DOCLING", True),
+    ("file.p7s", "UNSTRUCTURED", False),
+    ("file.p7s", "LLAMACLOUD", True),
+])
+def test_should_skip_for_service(filename, etl_service, expected_skip):
+    from app.etl_pipeline.file_classifier import should_skip_for_service
+
+    assert should_skip_for_service(filename, etl_service) is expected_skip, (
+        f"{filename} with {etl_service}: expected skip={expected_skip}"
+    )
+
+
+# ---------------------------------------------------------------------------
+# Slice 14b – ETL pipeline rejects per-parser incompatible documents
+# ---------------------------------------------------------------------------
+
+
+async def test_extract_docm_with_docling_raises_unsupported(tmp_path, mocker):
+    """Docling cannot parse .docm -- pipeline should reject before dispatching."""
+    from app.etl_pipeline.exceptions import EtlUnsupportedFileError
+
+    mocker.patch("app.config.config.ETL_SERVICE", "DOCLING")
+
+    docm_file = tmp_path / "macro.docm"
+    docm_file.write_bytes(b"\x00" * 10)
+
+    with pytest.raises(EtlUnsupportedFileError, match="not supported by DOCLING"):
+        await EtlPipelineService().extract(
+            EtlRequest(file_path=str(docm_file), filename="macro.docm")
+        )
+
+
+async def test_extract_eml_with_docling_raises_unsupported(tmp_path, mocker):
+    """Docling cannot parse .eml -- pipeline should reject before dispatching."""
+    from app.etl_pipeline.exceptions import EtlUnsupportedFileError
+
+    mocker.patch("app.config.config.ETL_SERVICE", "DOCLING")
+
+    eml_file = tmp_path / "mail.eml"
+    eml_file.write_bytes(b"From: test@example.com")
+
+    with pytest.raises(EtlUnsupportedFileError, match="not supported by DOCLING"):
+        await EtlPipelineService().extract(
+            EtlRequest(file_path=str(eml_file), filename="mail.eml")
+        )
diff --git a/surfsense_backend/tests/unit/utils/test_file_extensions.py b/surfsense_backend/tests/unit/utils/test_file_extensions.py
index a376f44bd..acd8945ce 100644
--- a/surfsense_backend/tests/unit/utils/test_file_extensions.py
+++ b/surfsense_backend/tests/unit/utils/test_file_extensions.py
@@ -21,10 +21,17 @@ def test_exe_is_not_supported_document():
     "report.pdf", "doc.docx", "old.doc",
     "sheet.xlsx", "legacy.xls",
     "slides.pptx", "deck.ppt",
+    "macro.docm", "macro.xlsm", "macro.pptm",
     "photo.png", "photo.jpg", "photo.jpeg", "scan.bmp", "scan.tiff", "scan.tif",
+    "photo.webp", "anim.gif", "iphone.heic",
     "manual.rtf", "book.epub",
     "letter.odt", "data.ods", "presentation.odp",
-    "korean.hwpx",
+    "inbox.eml", "outlook.msg",
+    "korean.hwpx", "korean.hwp",
+    "template.dot", "template.dotm",
+    "template.pot", "template.potx",
+    "binary.xlsb", "workspace.xlw",
+    "vector.svg", "signature.p7s",
 ])
 def test_document_extensions_are_supported(filename):
     from app.utils.file_extensions import is_supported_document_extension
@@ -40,3 +47,70 @@ def test_non_document_extensions_are_not_supported(filename):
     from app.utils.file_extensions import is_supported_document_extension
 
     assert is_supported_document_extension(filename) is False, f"{filename} should NOT be supported"
+
+
+# ---------------------------------------------------------------------------
+# Per-parser extension sets
+# ---------------------------------------------------------------------------
+
+
+def test_union_equals_all_three_sets():
+    from app.utils.file_extensions import (
+        DOCLING_DOCUMENT_EXTENSIONS,
+        DOCUMENT_EXTENSIONS,
+        LLAMAPARSE_DOCUMENT_EXTENSIONS,
+        UNSTRUCTURED_DOCUMENT_EXTENSIONS,
+    )
+
+    expected = (
+        DOCLING_DOCUMENT_EXTENSIONS
+        | LLAMAPARSE_DOCUMENT_EXTENSIONS
+        | UNSTRUCTURED_DOCUMENT_EXTENSIONS
+    )
+    assert DOCUMENT_EXTENSIONS == expected
+
+
+def test_get_extensions_for_docling():
+    from app.utils.file_extensions import get_document_extensions_for_service
+
+    exts = get_document_extensions_for_service("DOCLING")
+    assert ".pdf" in exts
+    assert ".webp" in exts
+    assert ".docx" in exts
+    assert ".eml" not in exts
+    assert ".docm" not in exts
+    assert ".gif" not in exts
+    assert ".heic" not in exts
+
+
+def test_get_extensions_for_llamacloud():
+    from app.utils.file_extensions import get_document_extensions_for_service
+
+    exts = get_document_extensions_for_service("LLAMACLOUD")
+    assert ".docm" in exts
+    assert ".gif" in exts
+    assert ".svg" in exts
+    assert ".hwp" in exts
+    assert ".eml" not in exts
+    assert ".heic" not in exts
+
+
+def test_get_extensions_for_unstructured():
+    from app.utils.file_extensions import get_document_extensions_for_service
+
+    exts = get_document_extensions_for_service("UNSTRUCTURED")
+    assert ".eml" in exts
+    assert ".heic" in exts
+    assert ".p7s" in exts
+    assert ".docm" not in exts
+    assert ".gif" not in exts
+    assert ".svg" not in exts
+
+
+def test_get_extensions_for_none_returns_union():
+    from app.utils.file_extensions import (
+        DOCUMENT_EXTENSIONS,
+        get_document_extensions_for_service,
+    )
+
+    assert get_document_extensions_for_service(None) == DOCUMENT_EXTENSIONS
diff --git a/surfsense_web/components/sources/DocumentUploadTab.tsx b/surfsense_web/components/sources/DocumentUploadTab.tsx
index 6b59f8ef6..c8ce195aa 100644
--- a/surfsense_web/components/sources/DocumentUploadTab.tsx
+++ b/surfsense_web/components/sources/DocumentUploadTab.tsx
@@ -85,7 +85,6 @@ const FILE_TYPE_CONFIG: Record<string, Record<string, string[]>> = {
 		"application/rtf": [".rtf"],
 		"application/xml": [".xml"],
 		"application/epub+zip": [".epub"],
-		"text/html": [".html", ".htm", ".web"],
 		"image/gif": [".gif"],
 		"image/svg+xml": [".svg"],
 		...audioFileTypes,
@@ -472,12 +471,13 @@ export function DocumentUploadTab({
 						</button>
 					))
 				) : (
-					<div
-						className="flex flex-col items-center gap-4 py-12 px-4 cursor-pointer"
-						onClick={() => {
-							if (!isElectron) fileInputRef.current?.click();
-						}}
-					>
+				<button
+					type="button"
+					className="flex flex-col items-center gap-4 py-12 px-4 cursor-pointer w-full bg-transparent border-none"
+					onClick={() => {
+						if (!isElectron) fileInputRef.current?.click();
+					}}
+				>
 						<Upload className="h-10 w-10 text-muted-foreground" />
 						<div className="text-center space-y-1.5">
 							<p className="text-base font-medium">
@@ -485,10 +485,11 @@ export function DocumentUploadTab({
 							</p>
 							<p className="text-sm text-muted-foreground">{t("file_size_limit")}</p>
 						</div>
-						<div className="w-full mt-1" onClick={(e) => e.stopPropagation()}>
-							{renderBrowseButton({ fullWidth: true })}
-						</div>
+					{/* biome-ignore lint/a11y/useSemanticElements: wrapper to stop click propagation to parent button */}
+					<div className="w-full mt-1" onClick={(e) => e.stopPropagation()} onKeyDown={(e) => e.stopPropagation()} role="group">
+						{renderBrowseButton({ fullWidth: true })}
 					</div>
+					</button>
 				)}
 			</div>
 
@@ -683,13 +684,17 @@ export function DocumentUploadTab({
 						</span>
 					</AccordionTrigger>
 					<AccordionContent className="px-3 pb-3">
-						<div className="flex flex-wrap gap-1">
-							{supportedExtensions.map((ext) => (
-								<Badge key={ext} variant="outline" className="text-[10px] px-1.5 py-0">
-									{ext}
-								</Badge>
-							))}
-						</div>
+					<div className="flex flex-wrap gap-1.5">
+						{supportedExtensions.map((ext) => (
+							<Badge
+								key={ext}
+								variant="secondary"
+								className="rounded border-0 bg-neutral-200/80 dark:bg-neutral-700/60 text-muted-foreground text-[10px] px-2 py-0.5 font-normal"
+							>
+								{ext}
+							</Badge>
+						))}
+					</div>
 					</AccordionContent>
 				</AccordionItem>
 			</Accordion>