feat: add embed_texts batch embedding utility

This commit is contained in:
CREDO23 2026-03-09 15:53:40 +02:00
parent 79daebbd7d
commit 15aeec1fcb

View file

@ -55,6 +55,20 @@ def embed_text(text: str) -> np.ndarray:
return config.embedding_model_instance.embed(truncate_for_embedding(text))
def embed_texts(texts: list[str]) -> list[np.ndarray]:
"""Batch-embed multiple texts in a single call.
Each text is truncated to fit the model's context window before embedding.
Uses ``embed_batch`` under the hood, which every chonkie provider
(OpenAI, Azure, Cohere, SentenceTransformers, etc.) optimizes
into fewer API calls / GPU passes than sequential ``embed``.
"""
if not texts:
return []
truncated = [truncate_for_embedding(t) for t in texts]
return config.embedding_model_instance.embed_batch(truncated)
def get_model_context_window(model_name: str) -> int:
"""Get the total context window size for a model (input + output tokens)."""
try: