From a3d6fa6196f1871fc2e580f82c335305778362ca Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Wed, 20 May 2026 10:03:42 +0200 Subject: [PATCH] perf(document-converters): offload sync embed_text/embed_texts to thread generate_document_summary and create_document_chunks are async helpers called from the chat path and from many connector indexers. Both wrapped embed_text/embed_texts directly inside the coroutine, blocking the event loop for the full duration of the embedding call. --- surfsense_backend/app/utils/document_converters.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/surfsense_backend/app/utils/document_converters.py b/surfsense_backend/app/utils/document_converters.py index ed52c1b7b..9bc8103c5 100644 --- a/surfsense_backend/app/utils/document_converters.py +++ b/surfsense_backend/app/utils/document_converters.py @@ -1,3 +1,4 @@ +import asyncio import hashlib import logging import threading @@ -221,7 +222,9 @@ async def generate_document_summary( else: enhanced_summary_content = summary_content - summary_embedding = embed_text(enhanced_summary_content) + summary_embedding = await asyncio.to_thread( + embed_text, enhanced_summary_content + ) return enhanced_summary_content, summary_embedding @@ -237,7 +240,7 @@ async def create_document_chunks(content: str) -> list[Chunk]: List of Chunk objects with embeddings """ chunk_texts = [c.text for c in config.chunker_instance.chunk(content)] - chunk_embeddings = embed_texts(chunk_texts) + chunk_embeddings = await asyncio.to_thread(embed_texts, chunk_texts) return [ Chunk(content=text, embedding=emb) for text, emb in zip(chunk_texts, chunk_embeddings, strict=False)