feat(retriever): instrument knowledge base search

This commit is contained in:
Anish Sarkar 2026-05-21 23:03:31 +05:30
parent 53691f9c51
commit b9d76f006d
3 changed files with 143 additions and 0 deletions

View file

@ -1,13 +1,51 @@
import asyncio
import contextlib
import functools
import time
from datetime import datetime
from app.observability import metrics as ot_metrics, otel as ot
from app.utils.perf import get_perf_logger
_MAX_FETCH_CHUNKS_PER_DOC = 20
def _instrument_search(mode: str):
def _decorator(func):
@functools.wraps(func)
async def _wrapper(
self, query_text: str, top_k: int, search_space_id: int, *args, **kwargs
):
t0 = time.perf_counter()
with ot.kb_search_span(
search_space_id=search_space_id,
query_chars=len(query_text),
extra={"search.surface": "chunks", "search.mode": mode},
) as sp:
try:
result = await func(
self, query_text, top_k, search_space_id, *args, **kwargs
)
except Exception:
ot_metrics.record_kb_search_duration(
(time.perf_counter() - t0) * 1000,
search_space_id=search_space_id,
surface="chunks",
)
raise
sp.set_attribute("result.count", len(result))
ot_metrics.record_kb_search_duration(
(time.perf_counter() - t0) * 1000,
search_space_id=search_space_id,
surface="chunks",
)
return result
return _wrapper
return _decorator
class ChucksHybridSearchRetriever:
def __init__(self, db_session):
"""
@ -18,6 +56,7 @@ class ChucksHybridSearchRetriever:
"""
self.db_session = db_session
@_instrument_search("vector")
async def vector_search(
self,
query_text: str,
@ -88,6 +127,7 @@ class ChucksHybridSearchRetriever:
return chunks
@_instrument_search("full_text")
async def full_text_search(
self,
query_text: str,
@ -153,6 +193,7 @@ class ChucksHybridSearchRetriever:
return chunks
@_instrument_search("hybrid")
async def hybrid_search(
self,
query_text: str,

View file

@ -1,12 +1,50 @@
import contextlib
import functools
import time
from datetime import datetime
from app.observability import metrics as ot_metrics, otel as ot
from app.utils.perf import get_perf_logger
_MAX_FETCH_CHUNKS_PER_DOC = 20
def _instrument_search(mode: str):
def _decorator(func):
@functools.wraps(func)
async def _wrapper(
self, query_text: str, top_k: int, search_space_id: int, *args, **kwargs
):
t0 = time.perf_counter()
with ot.kb_search_span(
search_space_id=search_space_id,
query_chars=len(query_text),
extra={"search.surface": "documents", "search.mode": mode},
) as sp:
try:
result = await func(
self, query_text, top_k, search_space_id, *args, **kwargs
)
except Exception:
ot_metrics.record_kb_search_duration(
(time.perf_counter() - t0) * 1000,
search_space_id=search_space_id,
surface="documents",
)
raise
sp.set_attribute("result.count", len(result))
ot_metrics.record_kb_search_duration(
(time.perf_counter() - t0) * 1000,
search_space_id=search_space_id,
surface="documents",
)
return result
return _wrapper
return _decorator
class DocumentHybridSearchRetriever:
def __init__(self, db_session):
"""
@ -17,6 +55,7 @@ class DocumentHybridSearchRetriever:
"""
self.db_session = db_session
@_instrument_search("vector")
async def vector_search(
self,
query_text: str,
@ -81,6 +120,7 @@ class DocumentHybridSearchRetriever:
return documents
@_instrument_search("full_text")
async def full_text_search(
self,
query_text: str,
@ -145,6 +185,7 @@ class DocumentHybridSearchRetriever:
return documents
@_instrument_search("hybrid")
async def hybrid_search(
self,
query_text: str,