mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-05-25 19:15:18 +02:00
perf(indexers): offload sync embed_text to thread across background workers
Connector kb_sync_services (gmail, onedrive, google_calendar, jira), streaming indexers (discord, luma, teams) and the file-processor save path all called embed_text inside async coroutines, blocking the background worker's event loop for the duration of the embed. Wrap each call site in asyncio.to_thread so concurrent indexing tasks stop serialising on the embed.
This commit is contained in:
parent
a8de98895a
commit
1791241c0c
8 changed files with 34 additions and 11 deletions
|
|
@ -670,7 +670,9 @@ async def index_discord_messages(
|
|||
|
||||
# Heavy processing (embeddings, chunks)
|
||||
chunks = await create_document_chunks(item["combined_document_string"])
|
||||
doc_embedding = embed_text(item["combined_document_string"])
|
||||
doc_embedding = await asyncio.to_thread(
|
||||
embed_text, item["combined_document_string"]
|
||||
)
|
||||
|
||||
# Update document to READY with actual content
|
||||
document.title = f"{item['guild_name']}#{item['channel_name']}"
|
||||
|
|
|
|||
|
|
@ -6,6 +6,7 @@ Implements 2-phase document status updates for real-time UI feedback:
|
|||
- Phase 2: Process each event: pending → processing → ready/failed
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import time
|
||||
from collections.abc import Awaitable, Callable
|
||||
from datetime import datetime, timedelta
|
||||
|
|
@ -465,7 +466,9 @@ async def index_luma_events(
|
|||
summary_content = (
|
||||
f"Luma Event: {item['event_name']}\n\n{item['event_markdown']}"
|
||||
)
|
||||
summary_embedding = embed_text(summary_content)
|
||||
summary_embedding = await asyncio.to_thread(
|
||||
embed_text, summary_content
|
||||
)
|
||||
|
||||
chunks = await create_document_chunks(item["event_markdown"])
|
||||
|
||||
|
|
|
|||
|
|
@ -9,6 +9,7 @@ Uses 2-phase document status updates for real-time UI feedback:
|
|||
- Phase 2: Process each document: pending → processing → ready/failed
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import time
|
||||
from collections.abc import Awaitable, Callable
|
||||
from datetime import UTC, datetime
|
||||
|
|
@ -581,7 +582,9 @@ async def index_teams_messages(
|
|||
|
||||
# Heavy processing (embeddings, chunks)
|
||||
chunks = await create_document_chunks(item["combined_document_string"])
|
||||
doc_embedding = embed_text(item["combined_document_string"])
|
||||
doc_embedding = await asyncio.to_thread(
|
||||
embed_text, item["combined_document_string"]
|
||||
)
|
||||
|
||||
# Update document to READY with actual content
|
||||
document.title = f"{item['team_name']} - {item['channel_name']}"
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue