mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-04-26 01:06:23 +02:00
feat: enhance knowledge base search with date filtering
This commit is contained in:
parent
006dccbe4b
commit
ad0e77c3d6
7 changed files with 660 additions and 12 deletions
|
|
@ -1,12 +1,16 @@
|
|||
"""Unit tests for knowledge_search middleware helpers.
|
||||
"""Unit tests for knowledge_search middleware helpers."""
|
||||
|
||||
These test pure functions that don't require a database.
|
||||
"""
|
||||
import json
|
||||
|
||||
import pytest
|
||||
from langchain_core.messages import AIMessage, HumanMessage
|
||||
|
||||
from app.agents.new_chat.middleware.knowledge_search import (
|
||||
KnowledgeBaseSearchMiddleware,
|
||||
_build_document_xml,
|
||||
_normalize_optional_date_range,
|
||||
_parse_kb_search_plan_response,
|
||||
_render_recent_conversation,
|
||||
_resolve_search_types,
|
||||
)
|
||||
|
||||
|
|
@ -131,3 +135,234 @@ class TestBuildDocumentXml:
|
|||
line for line in lines if "<![CDATA[" in line and "<chunk" in line
|
||||
]
|
||||
assert len(chunk_lines) == 3
|
||||
|
||||
|
||||
# ── planner parsing / date normalization ───────────────────────────────
|
||||
|
||||
|
||||
class TestPlannerHelpers:
|
||||
def test_parse_kb_search_plan_response_accepts_plain_json(self):
|
||||
plan = _parse_kb_search_plan_response(
|
||||
json.dumps(
|
||||
{
|
||||
"optimized_query": "ocv meeting decisions summary",
|
||||
"start_date": "2026-03-01",
|
||||
"end_date": "2026-03-31",
|
||||
}
|
||||
)
|
||||
)
|
||||
assert plan.optimized_query == "ocv meeting decisions summary"
|
||||
assert plan.start_date == "2026-03-01"
|
||||
assert plan.end_date == "2026-03-31"
|
||||
|
||||
def test_parse_kb_search_plan_response_accepts_fenced_json(self):
|
||||
plan = _parse_kb_search_plan_response(
|
||||
"""```json
|
||||
{"optimized_query":"deel founders guide","start_date":null,"end_date":null}
|
||||
```"""
|
||||
)
|
||||
assert plan.optimized_query == "deel founders guide"
|
||||
assert plan.start_date is None
|
||||
assert plan.end_date is None
|
||||
|
||||
def test_normalize_optional_date_range_returns_none_when_absent(self):
|
||||
start_date, end_date = _normalize_optional_date_range(None, None)
|
||||
assert start_date is None
|
||||
assert end_date is None
|
||||
|
||||
def test_normalize_optional_date_range_resolves_single_bound(self):
|
||||
start_date, end_date = _normalize_optional_date_range("2026-03-01", None)
|
||||
assert start_date is not None
|
||||
assert end_date is not None
|
||||
assert start_date.date().isoformat() == "2026-03-01"
|
||||
assert end_date >= start_date
|
||||
|
||||
|
||||
class FakeLLM:
|
||||
def __init__(self, response_text: str):
|
||||
self.response_text = response_text
|
||||
self.calls: list[dict] = []
|
||||
|
||||
async def ainvoke(self, messages, config=None):
|
||||
self.calls.append({"messages": messages, "config": config})
|
||||
return AIMessage(content=self.response_text)
|
||||
|
||||
|
||||
class FakeBudgetLLM:
|
||||
def __init__(self, *, max_input_tokens: int):
|
||||
self._max_input_tokens_value = max_input_tokens
|
||||
|
||||
def _get_max_input_tokens(self) -> int:
|
||||
return self._max_input_tokens_value
|
||||
|
||||
def _count_tokens(self, messages) -> int:
|
||||
# Deterministic, simple proxy for tests: count characters as tokens.
|
||||
return sum(len(msg.get("content", "")) for msg in messages)
|
||||
|
||||
|
||||
class TestKnowledgeBaseSearchMiddlewarePlanner:
|
||||
def test_render_recent_conversation_prefers_latest_messages_under_budget(self):
|
||||
messages = [
|
||||
HumanMessage(content="old user context " * 40),
|
||||
AIMessage(content="old assistant answer " * 35),
|
||||
HumanMessage(content="recent user context " * 20),
|
||||
AIMessage(content="recent assistant answer " * 18),
|
||||
HumanMessage(content="latest question"),
|
||||
]
|
||||
|
||||
rendered = _render_recent_conversation(
|
||||
messages,
|
||||
llm=FakeBudgetLLM(max_input_tokens=900),
|
||||
user_text="latest question",
|
||||
)
|
||||
|
||||
assert "recent user context" in rendered
|
||||
assert "recent assistant answer" in rendered
|
||||
assert "latest question" not in rendered
|
||||
assert rendered.index("recent user context") < rendered.index(
|
||||
"recent assistant answer"
|
||||
)
|
||||
|
||||
def test_render_recent_conversation_falls_back_to_legacy_without_budgeting(self):
|
||||
messages = [
|
||||
HumanMessage(content="message one"),
|
||||
AIMessage(content="message two"),
|
||||
HumanMessage(content="latest question"),
|
||||
]
|
||||
|
||||
rendered = _render_recent_conversation(
|
||||
messages,
|
||||
llm=None,
|
||||
user_text="latest question",
|
||||
)
|
||||
|
||||
assert "user: message one" in rendered
|
||||
assert "assistant: message two" in rendered
|
||||
assert "latest question" not in rendered
|
||||
|
||||
async def test_middleware_uses_optimized_query_and_dates(self, monkeypatch):
|
||||
captured: dict = {}
|
||||
|
||||
async def fake_search_knowledge_base(**kwargs):
|
||||
captured.update(kwargs)
|
||||
return []
|
||||
|
||||
async def fake_build_scoped_filesystem(**kwargs):
|
||||
return {}
|
||||
|
||||
monkeypatch.setattr(
|
||||
"app.agents.new_chat.middleware.knowledge_search.search_knowledge_base",
|
||||
fake_search_knowledge_base,
|
||||
)
|
||||
monkeypatch.setattr(
|
||||
"app.agents.new_chat.middleware.knowledge_search.build_scoped_filesystem",
|
||||
fake_build_scoped_filesystem,
|
||||
)
|
||||
|
||||
llm = FakeLLM(
|
||||
json.dumps(
|
||||
{
|
||||
"optimized_query": "ocv meeting decisions action items",
|
||||
"start_date": "2026-03-01",
|
||||
"end_date": "2026-03-31",
|
||||
}
|
||||
)
|
||||
)
|
||||
middleware = KnowledgeBaseSearchMiddleware(llm=llm, search_space_id=37)
|
||||
|
||||
result = await middleware.abefore_agent(
|
||||
{
|
||||
"messages": [
|
||||
HumanMessage(content="what happened in our OCV meeting last month?")
|
||||
]
|
||||
},
|
||||
runtime=None,
|
||||
)
|
||||
|
||||
assert result is not None
|
||||
assert captured["query"] == "ocv meeting decisions action items"
|
||||
assert captured["start_date"] is not None
|
||||
assert captured["end_date"] is not None
|
||||
assert captured["start_date"].date().isoformat() == "2026-03-01"
|
||||
assert captured["end_date"].date().isoformat() == "2026-03-31"
|
||||
assert llm.calls[0]["config"] == {"tags": ["surfsense:internal"]}
|
||||
|
||||
async def test_middleware_falls_back_when_planner_returns_invalid_json(
|
||||
self,
|
||||
monkeypatch,
|
||||
):
|
||||
captured: dict = {}
|
||||
|
||||
async def fake_search_knowledge_base(**kwargs):
|
||||
captured.update(kwargs)
|
||||
return []
|
||||
|
||||
async def fake_build_scoped_filesystem(**kwargs):
|
||||
return {}
|
||||
|
||||
monkeypatch.setattr(
|
||||
"app.agents.new_chat.middleware.knowledge_search.search_knowledge_base",
|
||||
fake_search_knowledge_base,
|
||||
)
|
||||
monkeypatch.setattr(
|
||||
"app.agents.new_chat.middleware.knowledge_search.build_scoped_filesystem",
|
||||
fake_build_scoped_filesystem,
|
||||
)
|
||||
|
||||
middleware = KnowledgeBaseSearchMiddleware(
|
||||
llm=FakeLLM("not json"),
|
||||
search_space_id=37,
|
||||
)
|
||||
|
||||
await middleware.abefore_agent(
|
||||
{"messages": [HumanMessage(content="summarize founders guide by deel")]},
|
||||
runtime=None,
|
||||
)
|
||||
|
||||
assert captured["query"] == "summarize founders guide by deel"
|
||||
assert captured["start_date"] is None
|
||||
assert captured["end_date"] is None
|
||||
|
||||
async def test_middleware_passes_none_dates_when_planner_returns_nulls(
|
||||
self,
|
||||
monkeypatch,
|
||||
):
|
||||
captured: dict = {}
|
||||
|
||||
async def fake_search_knowledge_base(**kwargs):
|
||||
captured.update(kwargs)
|
||||
return []
|
||||
|
||||
async def fake_build_scoped_filesystem(**kwargs):
|
||||
return {}
|
||||
|
||||
monkeypatch.setattr(
|
||||
"app.agents.new_chat.middleware.knowledge_search.search_knowledge_base",
|
||||
fake_search_knowledge_base,
|
||||
)
|
||||
monkeypatch.setattr(
|
||||
"app.agents.new_chat.middleware.knowledge_search.build_scoped_filesystem",
|
||||
fake_build_scoped_filesystem,
|
||||
)
|
||||
|
||||
middleware = KnowledgeBaseSearchMiddleware(
|
||||
llm=FakeLLM(
|
||||
json.dumps(
|
||||
{
|
||||
"optimized_query": "deel founders guide summary",
|
||||
"start_date": None,
|
||||
"end_date": None,
|
||||
}
|
||||
)
|
||||
),
|
||||
search_space_id=37,
|
||||
)
|
||||
|
||||
await middleware.abefore_agent(
|
||||
{"messages": [HumanMessage(content="summarize founders guide by deel")]},
|
||||
runtime=None,
|
||||
)
|
||||
|
||||
assert captured["query"] == "deel founders guide summary"
|
||||
assert captured["start_date"] is None
|
||||
assert captured["end_date"] is None
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue