From e16e4e2c5ccda5724bbc98c9064d05b696719008 Mon Sep 17 00:00:00 2001 From: DhruvTilva Date: Thu, 25 Jun 2026 23:52:15 +0530 Subject: [PATCH] fix: guard missing text_as_html in Table element markdown conversion When the Unstructured API returns a Table element without text_as_html in its metadata (e.g. local install or free-tier API), the lambda was raising KeyError: 'text_as_html', crashing the entire document indexing pipeline for any file containing tables. Guard the key access with .get() and fall back to the plain extracted text content (x) so the pipeline continues and the table content is still indexed, just without HTML formatting. --- surfsense_backend/app/utils/document_converters.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/surfsense_backend/app/utils/document_converters.py b/surfsense_backend/app/utils/document_converters.py index fef51d692..bd8740358 100644 --- a/surfsense_backend/app/utils/document_converters.py +++ b/surfsense_backend/app/utils/document_converters.py @@ -221,7 +221,11 @@ async def convert_element_to_markdown(element) -> str: "EmailAddress": lambda x: f"`{x}`", "Image": lambda x: f"![{x}]({x})", "PageBreak": lambda x: "\n---\n", - "Table": lambda x: f"```html\n{element.metadata['text_as_html']}\n```", + "Table": lambda x: ( + f"```html\n{element.metadata['text_as_html']}\n```" + if element.metadata.get("text_as_html") + else x + ), "Header": lambda x: f"## {x}\n\n", "Footer": lambda x: f"*{x}*\n\n", "CodeSnippet": lambda x: f"```\n{x}\n```",