mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-05-27 19:25:15 +02:00
perf(document-converters): offload sync embed_text/embed_texts to thread
generate_document_summary and create_document_chunks are async helpers called from the chat path and from many connector indexers. Both wrapped embed_text/embed_texts directly inside the coroutine, blocking the event loop for the full duration of the embedding call.
This commit is contained in:
parent
52d425f170
commit
a3d6fa6196
1 changed files with 5 additions and 2 deletions
|
|
@ -1,3 +1,4 @@
|
||||||
|
import asyncio
|
||||||
import hashlib
|
import hashlib
|
||||||
import logging
|
import logging
|
||||||
import threading
|
import threading
|
||||||
|
|
@ -221,7 +222,9 @@ async def generate_document_summary(
|
||||||
else:
|
else:
|
||||||
enhanced_summary_content = summary_content
|
enhanced_summary_content = summary_content
|
||||||
|
|
||||||
summary_embedding = embed_text(enhanced_summary_content)
|
summary_embedding = await asyncio.to_thread(
|
||||||
|
embed_text, enhanced_summary_content
|
||||||
|
)
|
||||||
|
|
||||||
return enhanced_summary_content, summary_embedding
|
return enhanced_summary_content, summary_embedding
|
||||||
|
|
||||||
|
|
@ -237,7 +240,7 @@ async def create_document_chunks(content: str) -> list[Chunk]:
|
||||||
List of Chunk objects with embeddings
|
List of Chunk objects with embeddings
|
||||||
"""
|
"""
|
||||||
chunk_texts = [c.text for c in config.chunker_instance.chunk(content)]
|
chunk_texts = [c.text for c in config.chunker_instance.chunk(content)]
|
||||||
chunk_embeddings = embed_texts(chunk_texts)
|
chunk_embeddings = await asyncio.to_thread(embed_texts, chunk_texts)
|
||||||
return [
|
return [
|
||||||
Chunk(content=text, embedding=emb)
|
Chunk(content=text, embedding=emb)
|
||||||
for text, emb in zip(chunk_texts, chunk_embeddings, strict=False)
|
for text, emb in zip(chunk_texts, chunk_embeddings, strict=False)
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue