feat: enhance knowledge base search and document retrieval

- Introduced a mechanism to identify degenerate queries that lack meaningful search signals, improving search accuracy. - Implemented a fallback method for browsing recent documents when queries are degenerate, ensuring relevant results are returned. - Added limits on the number of chunks fetched per document to optimize performance and prevent excessive data loading. - Updated the ConnectorService to allow for reusable query embeddings, enhancing efficiency in search operations. - Enhanced LLM router service to support context window fallbacks, improving robustness during context window limitations.
2026-05-01 20:03:30 +02:00 · 2026-02-28 19:40:24 -08:00 · 2026-02-28 19:40:24 -08:00 · 40a091f8cc
commit 40a091f8cc
parent b08e8da40c
7 changed files with 476 additions and 100 deletions
--- a/surfsense_backend/app/retriever/documents_hybrid_search.py
+++ b/surfsense_backend/app/retriever/documents_hybrid_search.py
@ -3,6 +3,8 @@ from datetime import datetime

 from app.utils.perf import get_perf_logger

+_MAX_FETCH_CHUNKS_PER_DOC = 30
+

 class DocumentHybridSearchRetriever:
    def __init__(self, db_session):
@ -279,7 +281,8 @@ class DocumentHybridSearchRetriever:
        # Collect document IDs for chunk fetching
        doc_ids: list[int] = [doc.id for doc, _score in documents_with_scores]

-        # Fetch ALL chunks for these documents in a single query
+        # Fetch chunks for these documents, capped per document to avoid
+        # loading hundreds of chunks for a single large file.
        chunks_query = (
            select(Chunk)
            .options(joinedload(Chunk.document))
@ -287,7 +290,16 @@ class DocumentHybridSearchRetriever:
            .order_by(Chunk.document_id, Chunk.id)
        )
        chunks_result = await self.db_session.execute(chunks_query)
-        chunks = chunks_result.scalars().all()
+        raw_chunks = chunks_result.scalars().all()
+
+        doc_chunk_counts: dict[int, int] = {}
+        chunks: list = []
+        for chunk in raw_chunks:
+            did = chunk.document_id
+            count = doc_chunk_counts.get(did, 0)
+            if count < _MAX_FETCH_CHUNKS_PER_DOC:
+                chunks.append(chunk)
+                doc_chunk_counts[did] = count + 1

        # Assemble doc-grouped results
        doc_map: dict[int, dict] = {