From c4f2e9a3a5a85daecb66471beb2463a6cec4302c Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Mon, 9 Mar 2026 16:21:14 +0200 Subject: [PATCH] feat: use batch embedding in create_document_chunks --- surfsense_backend/app/utils/document_converters.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/surfsense_backend/app/utils/document_converters.py b/surfsense_backend/app/utils/document_converters.py index 41be0fa16..c96cb698d 100644 --- a/surfsense_backend/app/utils/document_converters.py +++ b/surfsense_backend/app/utils/document_converters.py @@ -223,12 +223,11 @@ async def create_document_chunks(content: str) -> list[Chunk]: Returns: List of Chunk objects with embeddings """ + chunk_texts = [c.text for c in config.chunker_instance.chunk(content)] + chunk_embeddings = embed_texts(chunk_texts) return [ - Chunk( - content=chunk.text, - embedding=embed_text(chunk.text), - ) - for chunk in config.chunker_instance.chunk(content) + Chunk(content=text, embedding=emb) + for text, emb in zip(chunk_texts, chunk_embeddings) ]