mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-05-25 19:15:18 +02:00
feat: use batch embedding in create_document_chunks
This commit is contained in:
parent
929445afd9
commit
c4f2e9a3a5
1 changed files with 4 additions and 5 deletions
|
|
@ -223,12 +223,11 @@ async def create_document_chunks(content: str) -> list[Chunk]:
|
||||||
Returns:
|
Returns:
|
||||||
List of Chunk objects with embeddings
|
List of Chunk objects with embeddings
|
||||||
"""
|
"""
|
||||||
|
chunk_texts = [c.text for c in config.chunker_instance.chunk(content)]
|
||||||
|
chunk_embeddings = embed_texts(chunk_texts)
|
||||||
return [
|
return [
|
||||||
Chunk(
|
Chunk(content=text, embedding=emb)
|
||||||
content=chunk.text,
|
for text, emb in zip(chunk_texts, chunk_embeddings)
|
||||||
embedding=embed_text(chunk.text),
|
|
||||||
)
|
|
||||||
for chunk in config.chunker_instance.chunk(content)
|
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue