mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-05-27 19:25:15 +02:00
feat(retriever): instrument knowledge base search
This commit is contained in:
parent
53691f9c51
commit
b9d76f006d
3 changed files with 143 additions and 0 deletions
|
|
@ -1,13 +1,51 @@
|
||||||
import asyncio
|
import asyncio
|
||||||
import contextlib
|
import contextlib
|
||||||
|
import functools
|
||||||
import time
|
import time
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
|
||||||
|
from app.observability import metrics as ot_metrics, otel as ot
|
||||||
from app.utils.perf import get_perf_logger
|
from app.utils.perf import get_perf_logger
|
||||||
|
|
||||||
_MAX_FETCH_CHUNKS_PER_DOC = 20
|
_MAX_FETCH_CHUNKS_PER_DOC = 20
|
||||||
|
|
||||||
|
|
||||||
|
def _instrument_search(mode: str):
|
||||||
|
def _decorator(func):
|
||||||
|
@functools.wraps(func)
|
||||||
|
async def _wrapper(
|
||||||
|
self, query_text: str, top_k: int, search_space_id: int, *args, **kwargs
|
||||||
|
):
|
||||||
|
t0 = time.perf_counter()
|
||||||
|
with ot.kb_search_span(
|
||||||
|
search_space_id=search_space_id,
|
||||||
|
query_chars=len(query_text),
|
||||||
|
extra={"search.surface": "chunks", "search.mode": mode},
|
||||||
|
) as sp:
|
||||||
|
try:
|
||||||
|
result = await func(
|
||||||
|
self, query_text, top_k, search_space_id, *args, **kwargs
|
||||||
|
)
|
||||||
|
except Exception:
|
||||||
|
ot_metrics.record_kb_search_duration(
|
||||||
|
(time.perf_counter() - t0) * 1000,
|
||||||
|
search_space_id=search_space_id,
|
||||||
|
surface="chunks",
|
||||||
|
)
|
||||||
|
raise
|
||||||
|
sp.set_attribute("result.count", len(result))
|
||||||
|
ot_metrics.record_kb_search_duration(
|
||||||
|
(time.perf_counter() - t0) * 1000,
|
||||||
|
search_space_id=search_space_id,
|
||||||
|
surface="chunks",
|
||||||
|
)
|
||||||
|
return result
|
||||||
|
|
||||||
|
return _wrapper
|
||||||
|
|
||||||
|
return _decorator
|
||||||
|
|
||||||
|
|
||||||
class ChucksHybridSearchRetriever:
|
class ChucksHybridSearchRetriever:
|
||||||
def __init__(self, db_session):
|
def __init__(self, db_session):
|
||||||
"""
|
"""
|
||||||
|
|
@ -18,6 +56,7 @@ class ChucksHybridSearchRetriever:
|
||||||
"""
|
"""
|
||||||
self.db_session = db_session
|
self.db_session = db_session
|
||||||
|
|
||||||
|
@_instrument_search("vector")
|
||||||
async def vector_search(
|
async def vector_search(
|
||||||
self,
|
self,
|
||||||
query_text: str,
|
query_text: str,
|
||||||
|
|
@ -88,6 +127,7 @@ class ChucksHybridSearchRetriever:
|
||||||
|
|
||||||
return chunks
|
return chunks
|
||||||
|
|
||||||
|
@_instrument_search("full_text")
|
||||||
async def full_text_search(
|
async def full_text_search(
|
||||||
self,
|
self,
|
||||||
query_text: str,
|
query_text: str,
|
||||||
|
|
@ -153,6 +193,7 @@ class ChucksHybridSearchRetriever:
|
||||||
|
|
||||||
return chunks
|
return chunks
|
||||||
|
|
||||||
|
@_instrument_search("hybrid")
|
||||||
async def hybrid_search(
|
async def hybrid_search(
|
||||||
self,
|
self,
|
||||||
query_text: str,
|
query_text: str,
|
||||||
|
|
|
||||||
|
|
@ -1,12 +1,50 @@
|
||||||
import contextlib
|
import contextlib
|
||||||
|
import functools
|
||||||
import time
|
import time
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
|
||||||
|
from app.observability import metrics as ot_metrics, otel as ot
|
||||||
from app.utils.perf import get_perf_logger
|
from app.utils.perf import get_perf_logger
|
||||||
|
|
||||||
_MAX_FETCH_CHUNKS_PER_DOC = 20
|
_MAX_FETCH_CHUNKS_PER_DOC = 20
|
||||||
|
|
||||||
|
|
||||||
|
def _instrument_search(mode: str):
|
||||||
|
def _decorator(func):
|
||||||
|
@functools.wraps(func)
|
||||||
|
async def _wrapper(
|
||||||
|
self, query_text: str, top_k: int, search_space_id: int, *args, **kwargs
|
||||||
|
):
|
||||||
|
t0 = time.perf_counter()
|
||||||
|
with ot.kb_search_span(
|
||||||
|
search_space_id=search_space_id,
|
||||||
|
query_chars=len(query_text),
|
||||||
|
extra={"search.surface": "documents", "search.mode": mode},
|
||||||
|
) as sp:
|
||||||
|
try:
|
||||||
|
result = await func(
|
||||||
|
self, query_text, top_k, search_space_id, *args, **kwargs
|
||||||
|
)
|
||||||
|
except Exception:
|
||||||
|
ot_metrics.record_kb_search_duration(
|
||||||
|
(time.perf_counter() - t0) * 1000,
|
||||||
|
search_space_id=search_space_id,
|
||||||
|
surface="documents",
|
||||||
|
)
|
||||||
|
raise
|
||||||
|
sp.set_attribute("result.count", len(result))
|
||||||
|
ot_metrics.record_kb_search_duration(
|
||||||
|
(time.perf_counter() - t0) * 1000,
|
||||||
|
search_space_id=search_space_id,
|
||||||
|
surface="documents",
|
||||||
|
)
|
||||||
|
return result
|
||||||
|
|
||||||
|
return _wrapper
|
||||||
|
|
||||||
|
return _decorator
|
||||||
|
|
||||||
|
|
||||||
class DocumentHybridSearchRetriever:
|
class DocumentHybridSearchRetriever:
|
||||||
def __init__(self, db_session):
|
def __init__(self, db_session):
|
||||||
"""
|
"""
|
||||||
|
|
@ -17,6 +55,7 @@ class DocumentHybridSearchRetriever:
|
||||||
"""
|
"""
|
||||||
self.db_session = db_session
|
self.db_session = db_session
|
||||||
|
|
||||||
|
@_instrument_search("vector")
|
||||||
async def vector_search(
|
async def vector_search(
|
||||||
self,
|
self,
|
||||||
query_text: str,
|
query_text: str,
|
||||||
|
|
@ -81,6 +120,7 @@ class DocumentHybridSearchRetriever:
|
||||||
|
|
||||||
return documents
|
return documents
|
||||||
|
|
||||||
|
@_instrument_search("full_text")
|
||||||
async def full_text_search(
|
async def full_text_search(
|
||||||
self,
|
self,
|
||||||
query_text: str,
|
query_text: str,
|
||||||
|
|
@ -145,6 +185,7 @@ class DocumentHybridSearchRetriever:
|
||||||
|
|
||||||
return documents
|
return documents
|
||||||
|
|
||||||
|
@_instrument_search("hybrid")
|
||||||
async def hybrid_search(
|
async def hybrid_search(
|
||||||
self,
|
self,
|
||||||
query_text: str,
|
query_text: str,
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,61 @@
|
||||||
|
"""Tests for retriever OTel wrappers."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from contextlib import contextmanager
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from app.retriever.documents_hybrid_search import _instrument_search
|
||||||
|
|
||||||
|
pytestmark = pytest.mark.unit
|
||||||
|
|
||||||
|
|
||||||
|
class _Span:
|
||||||
|
def __init__(self) -> None:
|
||||||
|
self.attrs: dict[str, Any] = {}
|
||||||
|
|
||||||
|
def set_attribute(self, key: str, value: Any) -> None:
|
||||||
|
self.attrs[key] = value
|
||||||
|
|
||||||
|
|
||||||
|
@contextmanager
|
||||||
|
def _fake_span(**kwargs):
|
||||||
|
span = _Span()
|
||||||
|
span.attrs.update(kwargs)
|
||||||
|
yield span
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_retriever_wrapper_records_one_span_and_metric(monkeypatch) -> None:
|
||||||
|
calls: list[dict[str, Any]] = []
|
||||||
|
|
||||||
|
monkeypatch.setattr(
|
||||||
|
"app.retriever.documents_hybrid_search.ot.kb_search_span",
|
||||||
|
lambda **kwargs: _fake_span(**kwargs),
|
||||||
|
)
|
||||||
|
monkeypatch.setattr(
|
||||||
|
"app.retriever.documents_hybrid_search.ot_metrics.record_kb_search_duration",
|
||||||
|
lambda duration_ms, **attrs: calls.append(
|
||||||
|
{"duration_ms": duration_ms, **attrs}
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
class Retriever:
|
||||||
|
@_instrument_search("hybrid")
|
||||||
|
async def search(
|
||||||
|
self,
|
||||||
|
query_text: str,
|
||||||
|
top_k: int,
|
||||||
|
search_space_id: int,
|
||||||
|
) -> list[str]:
|
||||||
|
del query_text, top_k, search_space_id
|
||||||
|
return ["doc-1", "doc-2"]
|
||||||
|
|
||||||
|
result = await Retriever().search("hello", 3, 42)
|
||||||
|
|
||||||
|
assert result == ["doc-1", "doc-2"]
|
||||||
|
assert len(calls) == 1
|
||||||
|
assert calls[0]["search_space_id"] == 42
|
||||||
|
assert calls[0]["surface"] == "documents"
|
||||||
Loading…
Add table
Add a link
Reference in a new issue