mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-05-25 19:15:18 +02:00
perf(indexers): offload sync embed_text to thread across background workers
Connector kb_sync_services (gmail, onedrive, google_calendar, jira), streaming indexers (discord, luma, teams) and the file-processor save path all called embed_text inside async coroutines, blocking the background worker's event loop for the duration of the embed. Wrap each call site in asyncio.to_thread so concurrent indexing tasks stop serialising on the embed.
This commit is contained in:
parent
a8de98895a
commit
1791241c0c
8 changed files with 34 additions and 11 deletions
|
|
@ -2,6 +2,7 @@
|
|||
Unified document save/update logic for file processors.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
|
||||
from sqlalchemy.exc import SQLAlchemyError
|
||||
|
|
@ -43,7 +44,7 @@ async def _generate_summary(
|
|||
"""
|
||||
if not enable_summary:
|
||||
summary = f"File: {file_name}\n\n{markdown_content[:4000]}"
|
||||
return summary, embed_text(summary)
|
||||
return summary, await asyncio.to_thread(embed_text, summary)
|
||||
|
||||
if etl_service == "DOCLING":
|
||||
from app.services.docling_service import create_docling_service
|
||||
|
|
@ -65,7 +66,7 @@ async def _generate_summary(
|
|||
parts.append(f"**{formatted_key}:** {value}")
|
||||
|
||||
enhanced = "\n".join(parts) + "\n\n# DOCUMENT SUMMARY\n\n" + summary_text
|
||||
return enhanced, embed_text(enhanced)
|
||||
return enhanced, await asyncio.to_thread(embed_text, enhanced)
|
||||
|
||||
# Standard summary (Unstructured / LlamaCloud / others)
|
||||
meta = {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue