mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-05-25 19:15:18 +02:00
feat(retriever): instrument knowledge base search
This commit is contained in:
parent
53691f9c51
commit
b9d76f006d
3 changed files with 143 additions and 0 deletions
|
|
@ -1,13 +1,51 @@
|
|||
import asyncio
|
||||
import contextlib
|
||||
import functools
|
||||
import time
|
||||
from datetime import datetime
|
||||
|
||||
from app.observability import metrics as ot_metrics, otel as ot
|
||||
from app.utils.perf import get_perf_logger
|
||||
|
||||
_MAX_FETCH_CHUNKS_PER_DOC = 20
|
||||
|
||||
|
||||
def _instrument_search(mode: str):
|
||||
def _decorator(func):
|
||||
@functools.wraps(func)
|
||||
async def _wrapper(
|
||||
self, query_text: str, top_k: int, search_space_id: int, *args, **kwargs
|
||||
):
|
||||
t0 = time.perf_counter()
|
||||
with ot.kb_search_span(
|
||||
search_space_id=search_space_id,
|
||||
query_chars=len(query_text),
|
||||
extra={"search.surface": "chunks", "search.mode": mode},
|
||||
) as sp:
|
||||
try:
|
||||
result = await func(
|
||||
self, query_text, top_k, search_space_id, *args, **kwargs
|
||||
)
|
||||
except Exception:
|
||||
ot_metrics.record_kb_search_duration(
|
||||
(time.perf_counter() - t0) * 1000,
|
||||
search_space_id=search_space_id,
|
||||
surface="chunks",
|
||||
)
|
||||
raise
|
||||
sp.set_attribute("result.count", len(result))
|
||||
ot_metrics.record_kb_search_duration(
|
||||
(time.perf_counter() - t0) * 1000,
|
||||
search_space_id=search_space_id,
|
||||
surface="chunks",
|
||||
)
|
||||
return result
|
||||
|
||||
return _wrapper
|
||||
|
||||
return _decorator
|
||||
|
||||
|
||||
class ChucksHybridSearchRetriever:
|
||||
def __init__(self, db_session):
|
||||
"""
|
||||
|
|
@ -18,6 +56,7 @@ class ChucksHybridSearchRetriever:
|
|||
"""
|
||||
self.db_session = db_session
|
||||
|
||||
@_instrument_search("vector")
|
||||
async def vector_search(
|
||||
self,
|
||||
query_text: str,
|
||||
|
|
@ -88,6 +127,7 @@ class ChucksHybridSearchRetriever:
|
|||
|
||||
return chunks
|
||||
|
||||
@_instrument_search("full_text")
|
||||
async def full_text_search(
|
||||
self,
|
||||
query_text: str,
|
||||
|
|
@ -153,6 +193,7 @@ class ChucksHybridSearchRetriever:
|
|||
|
||||
return chunks
|
||||
|
||||
@_instrument_search("hybrid")
|
||||
async def hybrid_search(
|
||||
self,
|
||||
query_text: str,
|
||||
|
|
|
|||
|
|
@ -1,12 +1,50 @@
|
|||
import contextlib
|
||||
import functools
|
||||
import time
|
||||
from datetime import datetime
|
||||
|
||||
from app.observability import metrics as ot_metrics, otel as ot
|
||||
from app.utils.perf import get_perf_logger
|
||||
|
||||
_MAX_FETCH_CHUNKS_PER_DOC = 20
|
||||
|
||||
|
||||
def _instrument_search(mode: str):
|
||||
def _decorator(func):
|
||||
@functools.wraps(func)
|
||||
async def _wrapper(
|
||||
self, query_text: str, top_k: int, search_space_id: int, *args, **kwargs
|
||||
):
|
||||
t0 = time.perf_counter()
|
||||
with ot.kb_search_span(
|
||||
search_space_id=search_space_id,
|
||||
query_chars=len(query_text),
|
||||
extra={"search.surface": "documents", "search.mode": mode},
|
||||
) as sp:
|
||||
try:
|
||||
result = await func(
|
||||
self, query_text, top_k, search_space_id, *args, **kwargs
|
||||
)
|
||||
except Exception:
|
||||
ot_metrics.record_kb_search_duration(
|
||||
(time.perf_counter() - t0) * 1000,
|
||||
search_space_id=search_space_id,
|
||||
surface="documents",
|
||||
)
|
||||
raise
|
||||
sp.set_attribute("result.count", len(result))
|
||||
ot_metrics.record_kb_search_duration(
|
||||
(time.perf_counter() - t0) * 1000,
|
||||
search_space_id=search_space_id,
|
||||
surface="documents",
|
||||
)
|
||||
return result
|
||||
|
||||
return _wrapper
|
||||
|
||||
return _decorator
|
||||
|
||||
|
||||
class DocumentHybridSearchRetriever:
|
||||
def __init__(self, db_session):
|
||||
"""
|
||||
|
|
@ -17,6 +55,7 @@ class DocumentHybridSearchRetriever:
|
|||
"""
|
||||
self.db_session = db_session
|
||||
|
||||
@_instrument_search("vector")
|
||||
async def vector_search(
|
||||
self,
|
||||
query_text: str,
|
||||
|
|
@ -81,6 +120,7 @@ class DocumentHybridSearchRetriever:
|
|||
|
||||
return documents
|
||||
|
||||
@_instrument_search("full_text")
|
||||
async def full_text_search(
|
||||
self,
|
||||
query_text: str,
|
||||
|
|
@ -145,6 +185,7 @@ class DocumentHybridSearchRetriever:
|
|||
|
||||
return documents
|
||||
|
||||
@_instrument_search("hybrid")
|
||||
async def hybrid_search(
|
||||
self,
|
||||
query_text: str,
|
||||
|
|
|
|||
|
|
@ -0,0 +1,61 @@
|
|||
"""Tests for retriever OTel wrappers."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from contextlib import contextmanager
|
||||
from typing import Any
|
||||
|
||||
import pytest
|
||||
|
||||
from app.retriever.documents_hybrid_search import _instrument_search
|
||||
|
||||
pytestmark = pytest.mark.unit
|
||||
|
||||
|
||||
class _Span:
|
||||
def __init__(self) -> None:
|
||||
self.attrs: dict[str, Any] = {}
|
||||
|
||||
def set_attribute(self, key: str, value: Any) -> None:
|
||||
self.attrs[key] = value
|
||||
|
||||
|
||||
@contextmanager
|
||||
def _fake_span(**kwargs):
|
||||
span = _Span()
|
||||
span.attrs.update(kwargs)
|
||||
yield span
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_retriever_wrapper_records_one_span_and_metric(monkeypatch) -> None:
|
||||
calls: list[dict[str, Any]] = []
|
||||
|
||||
monkeypatch.setattr(
|
||||
"app.retriever.documents_hybrid_search.ot.kb_search_span",
|
||||
lambda **kwargs: _fake_span(**kwargs),
|
||||
)
|
||||
monkeypatch.setattr(
|
||||
"app.retriever.documents_hybrid_search.ot_metrics.record_kb_search_duration",
|
||||
lambda duration_ms, **attrs: calls.append(
|
||||
{"duration_ms": duration_ms, **attrs}
|
||||
),
|
||||
)
|
||||
|
||||
class Retriever:
|
||||
@_instrument_search("hybrid")
|
||||
async def search(
|
||||
self,
|
||||
query_text: str,
|
||||
top_k: int,
|
||||
search_space_id: int,
|
||||
) -> list[str]:
|
||||
del query_text, top_k, search_space_id
|
||||
return ["doc-1", "doc-2"]
|
||||
|
||||
result = await Retriever().search("hello", 3, 42)
|
||||
|
||||
assert result == ["doc-1", "doc-2"]
|
||||
assert len(calls) == 1
|
||||
assert calls[0]["search_space_id"] == 42
|
||||
assert calls[0]["surface"] == "documents"
|
||||
Loading…
Add table
Add a link
Reference in a new issue