From a49a4db6d6543bf78cd97b94298fceca3e9ede1c Mon Sep 17 00:00:00 2001
From: CREDO23 <thierrybakera12@gmail.com>
Date: Mon, 9 Mar 2026 16:33:24 +0200
Subject: [PATCH] perf: use asyncio.to_thread for embed_text in FastAPI paths

---
 .../app/agents/new_chat/tools/search_surfsense_docs.py     | 3 ++-
 .../app/agents/new_chat/tools/shared_memory.py             | 5 +++--
 surfsense_backend/app/agents/new_chat/tools/user_memory.py | 7 +++----
 surfsense_backend/app/services/connector_service.py        | 4 +++-
 4 files changed, 11 insertions(+), 8 deletions(-)

diff --git a/surfsense_backend/app/agents/new_chat/tools/search_surfsense_docs.py b/surfsense_backend/app/agents/new_chat/tools/search_surfsense_docs.py
index ec86c3ffa..b8b1527c7 100644
--- a/surfsense_backend/app/agents/new_chat/tools/search_surfsense_docs.py
+++ b/surfsense_backend/app/agents/new_chat/tools/search_surfsense_docs.py
@@ -8,6 +8,7 @@ The documentation is indexed at deployment time from MDX files and stored
 in dedicated tables (surfsense_docs_documents, surfsense_docs_chunks).
 """
 
+import asyncio
 import json
 
 from langchain_core.tools import tool
@@ -100,7 +101,7 @@ async def search_surfsense_docs_async(
         Formatted string with relevant documentation content
     """
     # Get embedding for the query
-    query_embedding = embed_text(query)
+    query_embedding = await asyncio.to_thread(embed_text, query)
 
     # Vector similarity search on chunks, joining with documents
     stmt = (
diff --git a/surfsense_backend/app/agents/new_chat/tools/shared_memory.py b/surfsense_backend/app/agents/new_chat/tools/shared_memory.py
index ba69f1ce8..c826d808f 100644
--- a/surfsense_backend/app/agents/new_chat/tools/shared_memory.py
+++ b/surfsense_backend/app/agents/new_chat/tools/shared_memory.py
@@ -1,5 +1,6 @@
 """Shared (team) memory backend for search-space-scoped AI context."""
 
+import asyncio
 import logging
 from typing import Any
 from uuid import UUID
@@ -64,7 +65,7 @@ async def save_shared_memory(
         count = await get_shared_memory_count(db_session, search_space_id)
         if count >= MAX_MEMORIES_PER_SEARCH_SPACE:
             await delete_oldest_shared_memory(db_session, search_space_id)
-        embedding = embed_text(content)
+        embedding = await asyncio.to_thread(embed_text, content)
         row = SharedMemory(
             search_space_id=search_space_id,
             created_by_id=_to_uuid(created_by_id),
@@ -108,7 +109,7 @@ async def recall_shared_memory(
         if category and category in valid_categories:
             stmt = stmt.where(SharedMemory.category == MemoryCategory(category))
         if query:
-            query_embedding = embed_text(query)
+            query_embedding = await asyncio.to_thread(embed_text, query)
             stmt = stmt.order_by(
                 SharedMemory.embedding.op("<=>")(query_embedding)
             ).limit(top_k)
diff --git a/surfsense_backend/app/agents/new_chat/tools/user_memory.py b/surfsense_backend/app/agents/new_chat/tools/user_memory.py
index 8aa516454..81e849856 100644
--- a/surfsense_backend/app/agents/new_chat/tools/user_memory.py
+++ b/surfsense_backend/app/agents/new_chat/tools/user_memory.py
@@ -9,6 +9,7 @@ Features:
 - recall_memory: Retrieve relevant memories using semantic search
 """
 
+import asyncio
 import logging
 from typing import Any
 from uuid import UUID
@@ -177,8 +178,7 @@ def create_save_memory_tool(
                 # Delete oldest memory to make room
                 await delete_oldest_memory(db_session, user_id, search_space_id)
 
-            # Generate embedding for the memory
-            embedding = embed_text(content)
+            embedding = await asyncio.to_thread(embed_text, content)
 
             # Create new memory using ORM
             # The pgvector Vector column type handles embedding conversion automatically
@@ -267,8 +267,7 @@ def create_recall_memory_tool(
             uuid_user_id = _to_uuid(user_id)
 
             if query:
-                # Semantic search using embeddings
-                query_embedding = embed_text(query)
+                query_embedding = await asyncio.to_thread(embed_text, query)
 
                 # Build query with vector similarity
                 stmt = (
diff --git a/surfsense_backend/app/services/connector_service.py b/surfsense_backend/app/services/connector_service.py
index 0aa48eccd..ac51e1833 100644
--- a/surfsense_backend/app/services/connector_service.py
+++ b/surfsense_backend/app/services/connector_service.py
@@ -264,7 +264,9 @@ class ConnectorService:
         # Reuse caller-provided embedding or compute once for both retrievers.
         if query_embedding is None:
             t_embed = time.perf_counter()
-            query_embedding = config.embedding_model_instance.embed(query_text)
+            query_embedding = await asyncio.to_thread(
+                config.embedding_model_instance.embed, query_text
+            )
             perf.info(
                 "[connector_svc] _combined_rrf embedding in %.3fs type=%s",
                 time.perf_counter() - t_embed,