From 0269900c60de86b18dd6872651bf9ca771bc6835 Mon Sep 17 00:00:00 2001 From: Tim Ren <137012659+xr843@users.noreply.github.com> Date: Sun, 15 Mar 2026 16:21:19 +0800 Subject: [PATCH] fix: use asyncio.to_thread for embedding calls in search endpoints Wrap synchronous embedding_model.embed() calls with asyncio.to_thread() in both vector_search and hybrid_search methods. This prevents blocking the asyncio event loop during embedding computation, improving server responsiveness under concurrent load. Fixes #794 Signed-off-by: Tim Ren <137012659+xr843@users.noreply.github.com> --- surfsense_backend/app/retriever/chunks_hybrid_search.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/surfsense_backend/app/retriever/chunks_hybrid_search.py b/surfsense_backend/app/retriever/chunks_hybrid_search.py index 5ab2964ca..2a0bc1a4a 100644 --- a/surfsense_backend/app/retriever/chunks_hybrid_search.py +++ b/surfsense_backend/app/retriever/chunks_hybrid_search.py @@ -1,3 +1,4 @@ +import asyncio import time from datetime import datetime @@ -49,7 +50,7 @@ class ChucksHybridSearchRetriever: # Get embedding for the query embedding_model = config.embedding_model_instance t_embed = time.perf_counter() - query_embedding = embedding_model.embed(query_text) + query_embedding = await asyncio.to_thread(embedding_model.embed, query_text) perf.debug( "[chunk_search] vector_search embedding in %.3fs", time.perf_counter() - t_embed, @@ -195,7 +196,7 @@ class ChucksHybridSearchRetriever: if query_embedding is None: embedding_model = config.embedding_model_instance t_embed = time.perf_counter() - query_embedding = embedding_model.embed(query_text) + query_embedding = await asyncio.to_thread(embedding_model.embed, query_text) perf.debug( "[chunk_search] hybrid_search embedding in %.3fs", time.perf_counter() - t_embed, @@ -427,4 +428,4 @@ class ChucksHybridSearchRetriever: search_space_id, document_type, ) - return final_docs + return final_docs \ No newline at end of file