Merge pull request #886 from xr843/fix/async-embedding-search-dev

fix: use asyncio.to_thread for embedding calls in search endpoints
This commit is contained in:
Rohan Verma 2026-03-15 02:49:25 -07:00 committed by GitHub
commit 0751f18e5a
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -1,3 +1,4 @@
import asyncio
import time
from datetime import datetime
@ -49,7 +50,7 @@ class ChucksHybridSearchRetriever:
# Get embedding for the query
embedding_model = config.embedding_model_instance
t_embed = time.perf_counter()
query_embedding = embedding_model.embed(query_text)
query_embedding = await asyncio.to_thread(embedding_model.embed, query_text)
perf.debug(
"[chunk_search] vector_search embedding in %.3fs",
time.perf_counter() - t_embed,
@ -195,7 +196,7 @@ class ChucksHybridSearchRetriever:
if query_embedding is None:
embedding_model = config.embedding_model_instance
t_embed = time.perf_counter()
query_embedding = embedding_model.embed(query_text)
query_embedding = await asyncio.to_thread(embedding_model.embed, query_text)
perf.debug(
"[chunk_search] hybrid_search embedding in %.3fs",
time.perf_counter() - t_embed,
@ -427,4 +428,4 @@ class ChucksHybridSearchRetriever:
search_space_id,
document_type,
)
return final_docs
return final_docs