feat: enhance knowledge base search with date filtering

This commit is contained in:
DESKTOP-RTLN3BA\$punk 2026-03-31 20:13:46 -07:00
parent 006dccbe4b
commit ad0e77c3d6
7 changed files with 660 additions and 12 deletions

View file

@ -152,7 +152,9 @@ class _FakeReconciliationStripeClient:
class TestStripeCheckoutSessionCreation:
async def test_get_status_reflects_backend_toggle(self, client, headers, monkeypatch):
async def test_get_status_reflects_backend_toggle(
self, client, headers, monkeypatch
):
monkeypatch.setattr(stripe_routes.config, "STRIPE_PAGE_BUYING_ENABLED", False)
disabled_response = await client.get("/api/v1/stripe/status", headers=headers)
assert disabled_response.status_code == 200, disabled_response.text
@ -237,7 +239,9 @@ class TestStripeCheckoutSessionCreation:
)
assert response.status_code == 503, response.text
assert response.json()["detail"] == "Page purchases are temporarily unavailable."
assert (
response.json()["detail"] == "Page purchases are temporarily unavailable."
)
purchase_count = await _fetchrow("SELECT COUNT(*) AS count FROM page_purchases")
assert purchase_count is not None

View file

@ -3,7 +3,7 @@
from __future__ import annotations
import uuid
from datetime import UTC, datetime
from datetime import UTC, datetime, timedelta
import pytest_asyncio
from sqlalchemy.ext.asyncio import AsyncSession
@ -22,6 +22,7 @@ def _make_document(
content: str,
search_space_id: int,
created_by_id: str,
updated_at: datetime | None = None,
) -> Document:
uid = uuid.uuid4().hex[:12]
return Document(
@ -34,7 +35,7 @@ def _make_document(
search_space_id=search_space_id,
created_by_id=created_by_id,
embedding=DUMMY_EMBEDDING,
updated_at=datetime.now(UTC),
updated_at=updated_at or datetime.now(UTC),
status={"state": "ready"},
)
@ -104,3 +105,54 @@ async def seed_large_doc(
"search_space": db_search_space,
"user": db_user,
}
@pytest_asyncio.fixture
async def seed_date_filtered_docs(
db_session: AsyncSession, db_user: User, db_search_space: SearchSpace
):
"""Insert matching docs with different timestamps for date-filter tests."""
user_id = str(db_user.id)
space_id = db_search_space.id
now = datetime.now(UTC)
recent_doc = _make_document(
title="Recent OCV Notes",
document_type=DocumentType.FILE,
content="ocv meeting decisions and action items",
search_space_id=space_id,
created_by_id=user_id,
updated_at=now,
)
old_doc = _make_document(
title="Old OCV Notes",
document_type=DocumentType.FILE,
content="ocv meeting decisions and action items",
search_space_id=space_id,
created_by_id=user_id,
updated_at=now - timedelta(days=730),
)
db_session.add_all([recent_doc, old_doc])
await db_session.flush()
db_session.add_all(
[
_make_chunk(
content="ocv meeting decisions and action items recent",
document_id=recent_doc.id,
),
_make_chunk(
content="ocv meeting decisions and action items old",
document_id=old_doc.id,
),
]
)
await db_session.flush()
return {
"recent_doc": recent_doc,
"old_doc": old_doc,
"search_space": db_search_space,
"user": db_user,
}

View file

@ -0,0 +1,62 @@
"""Integration smoke tests for KB search query/date scoping."""
from __future__ import annotations
from contextlib import asynccontextmanager
from datetime import UTC, datetime, timedelta
import numpy as np
import pytest
from app.agents.new_chat.middleware.knowledge_search import search_knowledge_base
from .conftest import DUMMY_EMBEDDING
pytestmark = pytest.mark.integration
async def test_search_knowledge_base_applies_date_filters(
db_session,
seed_date_filtered_docs,
monkeypatch,
):
"""Date filters should remove older matching documents from scoped KB results."""
@asynccontextmanager
async def fake_shielded_async_session():
yield db_session
monkeypatch.setattr(
"app.agents.new_chat.middleware.knowledge_search.shielded_async_session",
fake_shielded_async_session,
)
monkeypatch.setattr(
"app.agents.new_chat.middleware.knowledge_search.embed_texts",
lambda texts: [np.array(DUMMY_EMBEDDING) for _ in texts],
)
space_id = seed_date_filtered_docs["search_space"].id
recent_cutoff = datetime.now(UTC) - timedelta(days=30)
unfiltered_results = await search_knowledge_base(
query="ocv meeting decisions",
search_space_id=space_id,
available_document_types=["FILE"],
top_k=10,
)
filtered_results = await search_knowledge_base(
query="ocv meeting decisions",
search_space_id=space_id,
available_document_types=["FILE"],
top_k=10,
start_date=recent_cutoff,
end_date=datetime.now(UTC),
)
unfiltered_ids = {result["document"]["id"] for result in unfiltered_results}
filtered_ids = {result["document"]["id"] for result in filtered_results}
assert seed_date_filtered_docs["recent_doc"].id in unfiltered_ids
assert seed_date_filtered_docs["old_doc"].id in unfiltered_ids
assert seed_date_filtered_docs["recent_doc"].id in filtered_ids
assert seed_date_filtered_docs["old_doc"].id not in filtered_ids