mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-05-17 18:35:19 +02:00
refactor: add support for XHTML file conversion to markdown in document processors
This commit is contained in:
parent
0a26a6c5bb
commit
8d810467dd
1 changed files with 3 additions and 2 deletions
|
|
@ -4,8 +4,8 @@ Lossless file-to-markdown converters for text-based formats.
|
||||||
These converters handle file types that can be faithfully represented as
|
These converters handle file types that can be faithfully represented as
|
||||||
markdown without any external ETL/OCR service:
|
markdown without any external ETL/OCR service:
|
||||||
|
|
||||||
- CSV / TSV → markdown table (stdlib ``csv``)
|
- CSV / TSV → markdown table (stdlib ``csv``)
|
||||||
- HTML / HTM → markdown (``markdownify``)
|
- HTML / HTM / XHTML → markdown (``markdownify``)
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
@ -73,6 +73,7 @@ _CONVERTER_MAP: dict[str, Callable[..., str]] = {
|
||||||
".tsv": tsv_to_markdown,
|
".tsv": tsv_to_markdown,
|
||||||
".html": html_to_markdown,
|
".html": html_to_markdown,
|
||||||
".htm": html_to_markdown,
|
".htm": html_to_markdown,
|
||||||
|
".xhtml": html_to_markdown,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue