From 8d810467dd95a2cb7e34fa83324dc07d35801a04 Mon Sep 17 00:00:00 2001 From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com> Date: Tue, 7 Apr 2026 05:57:13 +0530 Subject: [PATCH] refactor: add support for XHTML file conversion to markdown in document processors --- .../app/tasks/document_processors/_direct_converters.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/surfsense_backend/app/tasks/document_processors/_direct_converters.py b/surfsense_backend/app/tasks/document_processors/_direct_converters.py index b1a69ef4f..bbff4838e 100644 --- a/surfsense_backend/app/tasks/document_processors/_direct_converters.py +++ b/surfsense_backend/app/tasks/document_processors/_direct_converters.py @@ -4,8 +4,8 @@ Lossless file-to-markdown converters for text-based formats. These converters handle file types that can be faithfully represented as markdown without any external ETL/OCR service: -- CSV / TSV → markdown table (stdlib ``csv``) -- HTML / HTM → markdown (``markdownify``) +- CSV / TSV → markdown table (stdlib ``csv``) +- HTML / HTM / XHTML → markdown (``markdownify``) """ from __future__ import annotations @@ -73,6 +73,7 @@ _CONVERTER_MAP: dict[str, Callable[..., str]] = { ".tsv": tsv_to_markdown, ".html": html_to_markdown, ".htm": html_to_markdown, + ".xhtml": html_to_markdown, }