retrieval: add unit tests for pure components

This commit is contained in:
CREDO23 2026-06-25 08:23:30 +02:00
parent 4fe208557a
commit 915ad80e19
3 changed files with 150 additions and 0 deletions

View file

@ -0,0 +1,50 @@
"""Tests for mapping a DocumentHit to a renderable RetrievedDocument."""
from __future__ import annotations
import pytest
from app.agents.chat.multi_agent_chat.shared.retrieval.adapter import (
to_retrieved_document,
)
from app.agents.chat.multi_agent_chat.shared.retrieval.models import (
ChunkHit,
DocumentHit,
)
pytestmark = pytest.mark.unit
def test_maps_identity_source_label_and_passages() -> None:
hit = DocumentHit(
document_id=42,
title="Q3 Launch Notes",
document_type="SLACK_CONNECTOR",
metadata={},
score=0.9,
chunks=[
ChunkHit(chunk_id=880, content="a", position=4, score=0.9),
ChunkHit(chunk_id=881, content="b", position=7, score=0.5),
],
)
document = to_retrieved_document(hit)
assert document.document_id == 42
assert document.title == "Q3 Launch Notes"
assert document.source_label == "Slack"
assert [(p.chunk_id, p.content) for p in document.passages] == [(880, "a"), (881, "b")]
assert all(p.document_id == 42 for p in document.passages)
def test_document_with_no_chunks_maps_to_no_passages() -> None:
hit = DocumentHit(
document_id=1,
title="Empty",
document_type=None,
metadata={},
score=0.0,
chunks=[],
)
assert to_retrieved_document(hit).passages == []

View file

@ -0,0 +1,65 @@
"""Tests for the build_context pipeline (rerank → adapt → render)."""
from __future__ import annotations
from typing import Any
import pytest
from app.agents.chat.multi_agent_chat.shared.citations import CitationRegistry
from app.agents.chat.multi_agent_chat.shared.retrieval.models import (
ChunkHit,
DocumentHit,
)
from app.agents.chat.multi_agent_chat.shared.retrieval.service import build_context
pytestmark = pytest.mark.unit
def _hit(document_id: int, chunk_id: int) -> DocumentHit:
return DocumentHit(
document_id=document_id,
title=f"Doc {document_id}",
document_type="FILE",
metadata={},
score=1.0 / document_id,
chunks=[ChunkHit(chunk_id=chunk_id, content=f"text {chunk_id}", position=0, score=1.0)],
)
def test_no_hits_renders_nothing() -> None:
assert build_context("q", [], CitationRegistry()) is None
def test_renders_block_and_registers_labels_in_order() -> None:
registry = CitationRegistry()
block = build_context("q", [_hit(1, 880), _hit(2, 12)], registry)
assert block is not None
assert "[1] text 880" in block
assert "[2] text 12" in block
assert registry.resolve(1).locator == {"document_id": 1, "chunk_id": 880}
assert registry.resolve(2).locator == {"document_id": 2, "chunk_id": 12}
class _ReverseReranker:
"""Stand-in reranker that simply reverses document order."""
def rerank_documents(
self, query_text: str, documents: list[dict[str, Any]]
) -> list[dict[str, Any]]:
return list(reversed(documents))
def test_reranker_reorders_documents_before_labeling() -> None:
registry = CitationRegistry()
block = build_context(
"q", [_hit(1, 880), _hit(2, 12)], registry, reranker=_ReverseReranker()
)
assert block is not None
# Reversed: doc 2 now renders first and gets [1].
assert registry.resolve(1).locator == {"document_id": 2, "chunk_id": 12}
assert registry.resolve(2).locator == {"document_id": 1, "chunk_id": 880}

View file

@ -0,0 +1,35 @@
"""Tests for building a document's source label."""
from __future__ import annotations
import pytest
from app.agents.chat.multi_agent_chat.shared.retrieval.source_label import source_label
pytestmark = pytest.mark.unit
def test_known_type_uses_friendly_name() -> None:
assert source_label("SLACK_CONNECTOR", {}) == "Slack"
def test_unmapped_type_is_prettified() -> None:
assert source_label("GOOGLE_DRIVE_FILE", {}) == "Google Drive"
def test_url_host_is_appended_and_www_stripped() -> None:
label = source_label("CRAWLED_URL", {"url": "https://www.docs.python.org/3/"})
assert label == "Web · docs.python.org"
def test_host_only_when_type_unknown() -> None:
assert source_label(None, {"url": "https://example.com/a"}) == "example.com"
def test_returns_none_when_nothing_known() -> None:
assert source_label(None, {}) is None
def test_non_http_url_is_ignored() -> None:
assert source_label("FILE", {"url": "/local/path"}) == "File"