From 6eabfe2396070e551f3d4d93f01c2a5ba174dd8b Mon Sep 17 00:00:00 2001
From: CREDO23 <thierrybakera12@gmail.com>
Date: Mon, 9 Mar 2026 19:12:43 +0200
Subject: [PATCH] =?UTF-8?q?perf:=20conditional=20batch=20embedding=20?=
 =?UTF-8?q?=E2=80=94=20batch=20for=20API,=20sequential=20for=20local?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 surfsense_backend/app/config/__init__.py           | 1 +
 surfsense_backend/app/utils/document_converters.py | 9 ++++++---
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/surfsense_backend/app/config/__init__.py b/surfsense_backend/app/config/__init__.py
index 68c65a818..a03ef5f8a 100644
--- a/surfsense_backend/app/config/__init__.py
+++ b/surfsense_backend/app/config/__init__.py
@@ -327,6 +327,7 @@ class Config:
         EMBEDDING_MODEL,
         **embedding_kwargs,
     )
+    is_local_embedding_model = "://" not in (EMBEDDING_MODEL or "")
     chunker_instance = RecursiveChunker(
         chunk_size=getattr(embedding_model_instance, "max_seq_length", 512)
     )
diff --git a/surfsense_backend/app/utils/document_converters.py b/surfsense_backend/app/utils/document_converters.py
index c96cb698d..6a59990f5 100644
--- a/surfsense_backend/app/utils/document_converters.py
+++ b/surfsense_backend/app/utils/document_converters.py
@@ -59,13 +59,16 @@ def embed_texts(texts: list[str]) -> list[np.ndarray]:
     """Batch-embed multiple texts in a single call.
 
     Each text is truncated to fit the model's context window before embedding.
-    Uses ``embed_batch`` under the hood, which every chonkie provider
-    (OpenAI, Azure, Cohere, SentenceTransformers, etc.) optimizes
-    into fewer API calls / GPU passes than sequential ``embed``.
+    For API-based models (``://`` in the model string) this uses
+    ``embed_batch`` to collapse many network round-trips into one.
+    For local models (SentenceTransformers) it falls back to sequential
+    ``embed`` calls to avoid padding overhead.
     """
     if not texts:
         return []
     truncated = [truncate_for_embedding(t) for t in texts]
+    if config.is_local_embedding_model:
+        return [config.embedding_model_instance.embed(t) for t in truncated]
     return config.embedding_model_instance.embed_batch(truncated)