mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-05-06 06:12:40 +02:00
perf: use asyncio.to_thread for embed_text in FastAPI paths
This commit is contained in:
parent
c4f2e9a3a5
commit
a49a4db6d6
4 changed files with 11 additions and 8 deletions
|
|
@ -8,6 +8,7 @@ The documentation is indexed at deployment time from MDX files and stored
|
||||||
in dedicated tables (surfsense_docs_documents, surfsense_docs_chunks).
|
in dedicated tables (surfsense_docs_documents, surfsense_docs_chunks).
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
import asyncio
|
||||||
import json
|
import json
|
||||||
|
|
||||||
from langchain_core.tools import tool
|
from langchain_core.tools import tool
|
||||||
|
|
@ -100,7 +101,7 @@ async def search_surfsense_docs_async(
|
||||||
Formatted string with relevant documentation content
|
Formatted string with relevant documentation content
|
||||||
"""
|
"""
|
||||||
# Get embedding for the query
|
# Get embedding for the query
|
||||||
query_embedding = embed_text(query)
|
query_embedding = await asyncio.to_thread(embed_text, query)
|
||||||
|
|
||||||
# Vector similarity search on chunks, joining with documents
|
# Vector similarity search on chunks, joining with documents
|
||||||
stmt = (
|
stmt = (
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,6 @@
|
||||||
"""Shared (team) memory backend for search-space-scoped AI context."""
|
"""Shared (team) memory backend for search-space-scoped AI context."""
|
||||||
|
|
||||||
|
import asyncio
|
||||||
import logging
|
import logging
|
||||||
from typing import Any
|
from typing import Any
|
||||||
from uuid import UUID
|
from uuid import UUID
|
||||||
|
|
@ -64,7 +65,7 @@ async def save_shared_memory(
|
||||||
count = await get_shared_memory_count(db_session, search_space_id)
|
count = await get_shared_memory_count(db_session, search_space_id)
|
||||||
if count >= MAX_MEMORIES_PER_SEARCH_SPACE:
|
if count >= MAX_MEMORIES_PER_SEARCH_SPACE:
|
||||||
await delete_oldest_shared_memory(db_session, search_space_id)
|
await delete_oldest_shared_memory(db_session, search_space_id)
|
||||||
embedding = embed_text(content)
|
embedding = await asyncio.to_thread(embed_text, content)
|
||||||
row = SharedMemory(
|
row = SharedMemory(
|
||||||
search_space_id=search_space_id,
|
search_space_id=search_space_id,
|
||||||
created_by_id=_to_uuid(created_by_id),
|
created_by_id=_to_uuid(created_by_id),
|
||||||
|
|
@ -108,7 +109,7 @@ async def recall_shared_memory(
|
||||||
if category and category in valid_categories:
|
if category and category in valid_categories:
|
||||||
stmt = stmt.where(SharedMemory.category == MemoryCategory(category))
|
stmt = stmt.where(SharedMemory.category == MemoryCategory(category))
|
||||||
if query:
|
if query:
|
||||||
query_embedding = embed_text(query)
|
query_embedding = await asyncio.to_thread(embed_text, query)
|
||||||
stmt = stmt.order_by(
|
stmt = stmt.order_by(
|
||||||
SharedMemory.embedding.op("<=>")(query_embedding)
|
SharedMemory.embedding.op("<=>")(query_embedding)
|
||||||
).limit(top_k)
|
).limit(top_k)
|
||||||
|
|
|
||||||
|
|
@ -9,6 +9,7 @@ Features:
|
||||||
- recall_memory: Retrieve relevant memories using semantic search
|
- recall_memory: Retrieve relevant memories using semantic search
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
import asyncio
|
||||||
import logging
|
import logging
|
||||||
from typing import Any
|
from typing import Any
|
||||||
from uuid import UUID
|
from uuid import UUID
|
||||||
|
|
@ -177,8 +178,7 @@ def create_save_memory_tool(
|
||||||
# Delete oldest memory to make room
|
# Delete oldest memory to make room
|
||||||
await delete_oldest_memory(db_session, user_id, search_space_id)
|
await delete_oldest_memory(db_session, user_id, search_space_id)
|
||||||
|
|
||||||
# Generate embedding for the memory
|
embedding = await asyncio.to_thread(embed_text, content)
|
||||||
embedding = embed_text(content)
|
|
||||||
|
|
||||||
# Create new memory using ORM
|
# Create new memory using ORM
|
||||||
# The pgvector Vector column type handles embedding conversion automatically
|
# The pgvector Vector column type handles embedding conversion automatically
|
||||||
|
|
@ -267,8 +267,7 @@ def create_recall_memory_tool(
|
||||||
uuid_user_id = _to_uuid(user_id)
|
uuid_user_id = _to_uuid(user_id)
|
||||||
|
|
||||||
if query:
|
if query:
|
||||||
# Semantic search using embeddings
|
query_embedding = await asyncio.to_thread(embed_text, query)
|
||||||
query_embedding = embed_text(query)
|
|
||||||
|
|
||||||
# Build query with vector similarity
|
# Build query with vector similarity
|
||||||
stmt = (
|
stmt = (
|
||||||
|
|
|
||||||
|
|
@ -264,7 +264,9 @@ class ConnectorService:
|
||||||
# Reuse caller-provided embedding or compute once for both retrievers.
|
# Reuse caller-provided embedding or compute once for both retrievers.
|
||||||
if query_embedding is None:
|
if query_embedding is None:
|
||||||
t_embed = time.perf_counter()
|
t_embed = time.perf_counter()
|
||||||
query_embedding = config.embedding_model_instance.embed(query_text)
|
query_embedding = await asyncio.to_thread(
|
||||||
|
config.embedding_model_instance.embed, query_text
|
||||||
|
)
|
||||||
perf.info(
|
perf.info(
|
||||||
"[connector_svc] _combined_rrf embedding in %.3fs type=%s",
|
"[connector_svc] _combined_rrf embedding in %.3fs type=%s",
|
||||||
time.perf_counter() - t_embed,
|
time.perf_counter() - t_embed,
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue