feat: enhance knowledge base search and document retrieval

- Introduced a mechanism to identify degenerate queries that lack meaningful search signals, improving search accuracy.
- Implemented a fallback method for browsing recent documents when queries are degenerate, ensuring relevant results are returned.
- Added limits on the number of chunks fetched per document to optimize performance and prevent excessive data loading.
- Updated the ConnectorService to allow for reusable query embeddings, enhancing efficiency in search operations.
- Enhanced LLM router service to support context window fallbacks, improving robustness during context window limitations.
This commit is contained in:
DESKTOP-RTLN3BA\$punk 2026-02-28 19:40:24 -08:00
parent b08e8da40c
commit 40a091f8cc
7 changed files with 476 additions and 100 deletions

View file

@ -3,6 +3,8 @@ from datetime import datetime
from app.utils.perf import get_perf_logger
_MAX_FETCH_CHUNKS_PER_DOC = 30
class DocumentHybridSearchRetriever:
def __init__(self, db_session):
@ -279,7 +281,8 @@ class DocumentHybridSearchRetriever:
# Collect document IDs for chunk fetching
doc_ids: list[int] = [doc.id for doc, _score in documents_with_scores]
# Fetch ALL chunks for these documents in a single query
# Fetch chunks for these documents, capped per document to avoid
# loading hundreds of chunks for a single large file.
chunks_query = (
select(Chunk)
.options(joinedload(Chunk.document))
@ -287,7 +290,16 @@ class DocumentHybridSearchRetriever:
.order_by(Chunk.document_id, Chunk.id)
)
chunks_result = await self.db_session.execute(chunks_query)
chunks = chunks_result.scalars().all()
raw_chunks = chunks_result.scalars().all()
doc_chunk_counts: dict[int, int] = {}
chunks: list = []
for chunk in raw_chunks:
did = chunk.document_id
count = doc_chunk_counts.get(did, 0)
if count < _MAX_FETCH_CHUNKS_PER_DOC:
chunks.append(chunk)
doc_chunk_counts[did] = count + 1
# Assemble doc-grouped results
doc_map: dict[int, dict] = {