From 915ad80e19914daa8026e2e252f158a472c70257 Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Thu, 25 Jun 2026 08:23:30 +0200 Subject: [PATCH] retrieval: add unit tests for pure components --- .../shared/retrieval/test_adapter.py | 50 ++++++++++++++ .../shared/retrieval/test_service.py | 65 +++++++++++++++++++ .../shared/retrieval/test_source_label.py | 35 ++++++++++ 3 files changed, 150 insertions(+) create mode 100644 surfsense_backend/tests/unit/agents/multi_agent_chat/shared/retrieval/test_adapter.py create mode 100644 surfsense_backend/tests/unit/agents/multi_agent_chat/shared/retrieval/test_service.py create mode 100644 surfsense_backend/tests/unit/agents/multi_agent_chat/shared/retrieval/test_source_label.py diff --git a/surfsense_backend/tests/unit/agents/multi_agent_chat/shared/retrieval/test_adapter.py b/surfsense_backend/tests/unit/agents/multi_agent_chat/shared/retrieval/test_adapter.py new file mode 100644 index 000000000..c38cc624d --- /dev/null +++ b/surfsense_backend/tests/unit/agents/multi_agent_chat/shared/retrieval/test_adapter.py @@ -0,0 +1,50 @@ +"""Tests for mapping a DocumentHit to a renderable RetrievedDocument.""" + +from __future__ import annotations + +import pytest + +from app.agents.chat.multi_agent_chat.shared.retrieval.adapter import ( + to_retrieved_document, +) +from app.agents.chat.multi_agent_chat.shared.retrieval.models import ( + ChunkHit, + DocumentHit, +) + +pytestmark = pytest.mark.unit + + +def test_maps_identity_source_label_and_passages() -> None: + hit = DocumentHit( + document_id=42, + title="Q3 Launch Notes", + document_type="SLACK_CONNECTOR", + metadata={}, + score=0.9, + chunks=[ + ChunkHit(chunk_id=880, content="a", position=4, score=0.9), + ChunkHit(chunk_id=881, content="b", position=7, score=0.5), + ], + ) + + document = to_retrieved_document(hit) + + assert document.document_id == 42 + assert document.title == "Q3 Launch Notes" + assert document.source_label == "Slack" + assert [(p.chunk_id, p.content) for p in document.passages] == [(880, "a"), (881, "b")] + assert all(p.document_id == 42 for p in document.passages) + + +def test_document_with_no_chunks_maps_to_no_passages() -> None: + hit = DocumentHit( + document_id=1, + title="Empty", + document_type=None, + metadata={}, + score=0.0, + chunks=[], + ) + + assert to_retrieved_document(hit).passages == [] diff --git a/surfsense_backend/tests/unit/agents/multi_agent_chat/shared/retrieval/test_service.py b/surfsense_backend/tests/unit/agents/multi_agent_chat/shared/retrieval/test_service.py new file mode 100644 index 000000000..bd44f5dc2 --- /dev/null +++ b/surfsense_backend/tests/unit/agents/multi_agent_chat/shared/retrieval/test_service.py @@ -0,0 +1,65 @@ +"""Tests for the build_context pipeline (rerank → adapt → render).""" + +from __future__ import annotations + +from typing import Any + +import pytest + +from app.agents.chat.multi_agent_chat.shared.citations import CitationRegistry +from app.agents.chat.multi_agent_chat.shared.retrieval.models import ( + ChunkHit, + DocumentHit, +) +from app.agents.chat.multi_agent_chat.shared.retrieval.service import build_context + +pytestmark = pytest.mark.unit + + +def _hit(document_id: int, chunk_id: int) -> DocumentHit: + return DocumentHit( + document_id=document_id, + title=f"Doc {document_id}", + document_type="FILE", + metadata={}, + score=1.0 / document_id, + chunks=[ChunkHit(chunk_id=chunk_id, content=f"text {chunk_id}", position=0, score=1.0)], + ) + + +def test_no_hits_renders_nothing() -> None: + assert build_context("q", [], CitationRegistry()) is None + + +def test_renders_block_and_registers_labels_in_order() -> None: + registry = CitationRegistry() + + block = build_context("q", [_hit(1, 880), _hit(2, 12)], registry) + + assert block is not None + assert "[1] text 880" in block + assert "[2] text 12" in block + assert registry.resolve(1).locator == {"document_id": 1, "chunk_id": 880} + assert registry.resolve(2).locator == {"document_id": 2, "chunk_id": 12} + + +class _ReverseReranker: + """Stand-in reranker that simply reverses document order.""" + + def rerank_documents( + self, query_text: str, documents: list[dict[str, Any]] + ) -> list[dict[str, Any]]: + return list(reversed(documents)) + + +def test_reranker_reorders_documents_before_labeling() -> None: + registry = CitationRegistry() + + block = build_context( + "q", [_hit(1, 880), _hit(2, 12)], registry, reranker=_ReverseReranker() + ) + + assert block is not None + # Reversed: doc 2 now renders first and gets [1]. + assert registry.resolve(1).locator == {"document_id": 2, "chunk_id": 12} + assert registry.resolve(2).locator == {"document_id": 1, "chunk_id": 880} diff --git a/surfsense_backend/tests/unit/agents/multi_agent_chat/shared/retrieval/test_source_label.py b/surfsense_backend/tests/unit/agents/multi_agent_chat/shared/retrieval/test_source_label.py new file mode 100644 index 000000000..54c74fb0b --- /dev/null +++ b/surfsense_backend/tests/unit/agents/multi_agent_chat/shared/retrieval/test_source_label.py @@ -0,0 +1,35 @@ +"""Tests for building a document's source label.""" + +from __future__ import annotations + +import pytest + +from app.agents.chat.multi_agent_chat.shared.retrieval.source_label import source_label + +pytestmark = pytest.mark.unit + + +def test_known_type_uses_friendly_name() -> None: + assert source_label("SLACK_CONNECTOR", {}) == "Slack" + + +def test_unmapped_type_is_prettified() -> None: + assert source_label("GOOGLE_DRIVE_FILE", {}) == "Google Drive" + + +def test_url_host_is_appended_and_www_stripped() -> None: + label = source_label("CRAWLED_URL", {"url": "https://www.docs.python.org/3/"}) + + assert label == "Web · docs.python.org" + + +def test_host_only_when_type_unknown() -> None: + assert source_label(None, {"url": "https://example.com/a"}) == "example.com" + + +def test_returns_none_when_nothing_known() -> None: + assert source_label(None, {}) is None + + +def test_non_http_url_is_ignored() -> None: + assert source_label("FILE", {"url": "/local/path"}) == "File"