feat(retriever): instrument knowledge base search

This commit is contained in:
Anish Sarkar 2026-05-21 23:03:31 +05:30
parent 53691f9c51
commit b9d76f006d
3 changed files with 143 additions and 0 deletions

View file

@ -1,13 +1,51 @@
import asyncio import asyncio
import contextlib import contextlib
import functools
import time import time
from datetime import datetime from datetime import datetime
from app.observability import metrics as ot_metrics, otel as ot
from app.utils.perf import get_perf_logger from app.utils.perf import get_perf_logger
_MAX_FETCH_CHUNKS_PER_DOC = 20 _MAX_FETCH_CHUNKS_PER_DOC = 20
def _instrument_search(mode: str):
def _decorator(func):
@functools.wraps(func)
async def _wrapper(
self, query_text: str, top_k: int, search_space_id: int, *args, **kwargs
):
t0 = time.perf_counter()
with ot.kb_search_span(
search_space_id=search_space_id,
query_chars=len(query_text),
extra={"search.surface": "chunks", "search.mode": mode},
) as sp:
try:
result = await func(
self, query_text, top_k, search_space_id, *args, **kwargs
)
except Exception:
ot_metrics.record_kb_search_duration(
(time.perf_counter() - t0) * 1000,
search_space_id=search_space_id,
surface="chunks",
)
raise
sp.set_attribute("result.count", len(result))
ot_metrics.record_kb_search_duration(
(time.perf_counter() - t0) * 1000,
search_space_id=search_space_id,
surface="chunks",
)
return result
return _wrapper
return _decorator
class ChucksHybridSearchRetriever: class ChucksHybridSearchRetriever:
def __init__(self, db_session): def __init__(self, db_session):
""" """
@ -18,6 +56,7 @@ class ChucksHybridSearchRetriever:
""" """
self.db_session = db_session self.db_session = db_session
@_instrument_search("vector")
async def vector_search( async def vector_search(
self, self,
query_text: str, query_text: str,
@ -88,6 +127,7 @@ class ChucksHybridSearchRetriever:
return chunks return chunks
@_instrument_search("full_text")
async def full_text_search( async def full_text_search(
self, self,
query_text: str, query_text: str,
@ -153,6 +193,7 @@ class ChucksHybridSearchRetriever:
return chunks return chunks
@_instrument_search("hybrid")
async def hybrid_search( async def hybrid_search(
self, self,
query_text: str, query_text: str,

View file

@ -1,12 +1,50 @@
import contextlib import contextlib
import functools
import time import time
from datetime import datetime from datetime import datetime
from app.observability import metrics as ot_metrics, otel as ot
from app.utils.perf import get_perf_logger from app.utils.perf import get_perf_logger
_MAX_FETCH_CHUNKS_PER_DOC = 20 _MAX_FETCH_CHUNKS_PER_DOC = 20
def _instrument_search(mode: str):
def _decorator(func):
@functools.wraps(func)
async def _wrapper(
self, query_text: str, top_k: int, search_space_id: int, *args, **kwargs
):
t0 = time.perf_counter()
with ot.kb_search_span(
search_space_id=search_space_id,
query_chars=len(query_text),
extra={"search.surface": "documents", "search.mode": mode},
) as sp:
try:
result = await func(
self, query_text, top_k, search_space_id, *args, **kwargs
)
except Exception:
ot_metrics.record_kb_search_duration(
(time.perf_counter() - t0) * 1000,
search_space_id=search_space_id,
surface="documents",
)
raise
sp.set_attribute("result.count", len(result))
ot_metrics.record_kb_search_duration(
(time.perf_counter() - t0) * 1000,
search_space_id=search_space_id,
surface="documents",
)
return result
return _wrapper
return _decorator
class DocumentHybridSearchRetriever: class DocumentHybridSearchRetriever:
def __init__(self, db_session): def __init__(self, db_session):
""" """
@ -17,6 +55,7 @@ class DocumentHybridSearchRetriever:
""" """
self.db_session = db_session self.db_session = db_session
@_instrument_search("vector")
async def vector_search( async def vector_search(
self, self,
query_text: str, query_text: str,
@ -81,6 +120,7 @@ class DocumentHybridSearchRetriever:
return documents return documents
@_instrument_search("full_text")
async def full_text_search( async def full_text_search(
self, self,
query_text: str, query_text: str,
@ -145,6 +185,7 @@ class DocumentHybridSearchRetriever:
return documents return documents
@_instrument_search("hybrid")
async def hybrid_search( async def hybrid_search(
self, self,
query_text: str, query_text: str,

View file

@ -0,0 +1,61 @@
"""Tests for retriever OTel wrappers."""
from __future__ import annotations
from contextlib import contextmanager
from typing import Any
import pytest
from app.retriever.documents_hybrid_search import _instrument_search
pytestmark = pytest.mark.unit
class _Span:
def __init__(self) -> None:
self.attrs: dict[str, Any] = {}
def set_attribute(self, key: str, value: Any) -> None:
self.attrs[key] = value
@contextmanager
def _fake_span(**kwargs):
span = _Span()
span.attrs.update(kwargs)
yield span
@pytest.mark.asyncio
async def test_retriever_wrapper_records_one_span_and_metric(monkeypatch) -> None:
calls: list[dict[str, Any]] = []
monkeypatch.setattr(
"app.retriever.documents_hybrid_search.ot.kb_search_span",
lambda **kwargs: _fake_span(**kwargs),
)
monkeypatch.setattr(
"app.retriever.documents_hybrid_search.ot_metrics.record_kb_search_duration",
lambda duration_ms, **attrs: calls.append(
{"duration_ms": duration_ms, **attrs}
),
)
class Retriever:
@_instrument_search("hybrid")
async def search(
self,
query_text: str,
top_k: int,
search_space_id: int,
) -> list[str]:
del query_text, top_k, search_space_id
return ["doc-1", "doc-2"]
result = await Retriever().search("hello", 3, 42)
assert result == ["doc-1", "doc-2"]
assert len(calls) == 1
assert calls[0]["search_space_id"] == 42
assert calls[0]["surface"] == "documents"