perf: use asyncio.to_thread for embed_text in FastAPI paths

This commit is contained in:
CREDO23 2026-03-09 16:33:24 +02:00
parent c4f2e9a3a5
commit a49a4db6d6
4 changed files with 11 additions and 8 deletions

View file

@ -8,6 +8,7 @@ The documentation is indexed at deployment time from MDX files and stored
in dedicated tables (surfsense_docs_documents, surfsense_docs_chunks).
"""
import asyncio
import json
from langchain_core.tools import tool
@ -100,7 +101,7 @@ async def search_surfsense_docs_async(
Formatted string with relevant documentation content
"""
# Get embedding for the query
query_embedding = embed_text(query)
query_embedding = await asyncio.to_thread(embed_text, query)
# Vector similarity search on chunks, joining with documents
stmt = (

View file

@ -1,5 +1,6 @@
"""Shared (team) memory backend for search-space-scoped AI context."""
import asyncio
import logging
from typing import Any
from uuid import UUID
@ -64,7 +65,7 @@ async def save_shared_memory(
count = await get_shared_memory_count(db_session, search_space_id)
if count >= MAX_MEMORIES_PER_SEARCH_SPACE:
await delete_oldest_shared_memory(db_session, search_space_id)
embedding = embed_text(content)
embedding = await asyncio.to_thread(embed_text, content)
row = SharedMemory(
search_space_id=search_space_id,
created_by_id=_to_uuid(created_by_id),
@ -108,7 +109,7 @@ async def recall_shared_memory(
if category and category in valid_categories:
stmt = stmt.where(SharedMemory.category == MemoryCategory(category))
if query:
query_embedding = embed_text(query)
query_embedding = await asyncio.to_thread(embed_text, query)
stmt = stmt.order_by(
SharedMemory.embedding.op("<=>")(query_embedding)
).limit(top_k)

View file

@ -9,6 +9,7 @@ Features:
- recall_memory: Retrieve relevant memories using semantic search
"""
import asyncio
import logging
from typing import Any
from uuid import UUID
@ -177,8 +178,7 @@ def create_save_memory_tool(
# Delete oldest memory to make room
await delete_oldest_memory(db_session, user_id, search_space_id)
# Generate embedding for the memory
embedding = embed_text(content)
embedding = await asyncio.to_thread(embed_text, content)
# Create new memory using ORM
# The pgvector Vector column type handles embedding conversion automatically
@ -267,8 +267,7 @@ def create_recall_memory_tool(
uuid_user_id = _to_uuid(user_id)
if query:
# Semantic search using embeddings
query_embedding = embed_text(query)
query_embedding = await asyncio.to_thread(embed_text, query)
# Build query with vector similarity
stmt = (