fix: use asyncio.to_thread for embedding calls in search endpoints

Wrap synchronous embedding_model.embed() calls with asyncio.to_thread() in both vector_search and hybrid_search methods. This prevents blocking the asyncio event loop during embedding computation, improving server responsiveness under concurrent load. Fixes #794 Signed-off-by: Tim Ren <137012659+xr843@users.noreply.github.com>
2026-07-24 23:41:10 +02:00 · 2026-03-15 16:21:19 +08:00 · 2026-03-15 16:21:19 +08:00 · 0269900c60
commit 0269900c60
parent ee07ed8168
1 changed files with 4 additions and 3 deletions
--- a/surfsense_backend/app/retriever/chunks_hybrid_search.py
+++ b/surfsense_backend/app/retriever/chunks_hybrid_search.py
@ -1,3 +1,4 @@
+import asyncio
 import time
 from datetime import datetime

@ -49,7 +50,7 @@ class ChucksHybridSearchRetriever:
        # Get embedding for the query
        embedding_model = config.embedding_model_instance
        t_embed = time.perf_counter()
-        query_embedding = embedding_model.embed(query_text)
+        query_embedding = await asyncio.to_thread(embedding_model.embed, query_text)
        perf.debug(
            "[chunk_search] vector_search embedding in %.3fs",
            time.perf_counter() - t_embed,
@ -195,7 +196,7 @@ class ChucksHybridSearchRetriever:
        if query_embedding is None:
            embedding_model = config.embedding_model_instance
            t_embed = time.perf_counter()
-            query_embedding = embedding_model.embed(query_text)
+            query_embedding = await asyncio.to_thread(embedding_model.embed, query_text)
            perf.debug(
                "[chunk_search] hybrid_search embedding in %.3fs",
                time.perf_counter() - t_embed,
@ -427,4 +428,4 @@ class ChucksHybridSearchRetriever:
            search_space_id,
            document_type,
        )
-        return final_docs
+        return final_docs