mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-06-26 21:39:43 +02:00
retrieval: add unit tests for pure components
This commit is contained in:
parent
4fe208557a
commit
915ad80e19
3 changed files with 150 additions and 0 deletions
|
|
@ -0,0 +1,50 @@
|
|||
"""Tests for mapping a DocumentHit to a renderable RetrievedDocument."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import pytest
|
||||
|
||||
from app.agents.chat.multi_agent_chat.shared.retrieval.adapter import (
|
||||
to_retrieved_document,
|
||||
)
|
||||
from app.agents.chat.multi_agent_chat.shared.retrieval.models import (
|
||||
ChunkHit,
|
||||
DocumentHit,
|
||||
)
|
||||
|
||||
pytestmark = pytest.mark.unit
|
||||
|
||||
|
||||
def test_maps_identity_source_label_and_passages() -> None:
|
||||
hit = DocumentHit(
|
||||
document_id=42,
|
||||
title="Q3 Launch Notes",
|
||||
document_type="SLACK_CONNECTOR",
|
||||
metadata={},
|
||||
score=0.9,
|
||||
chunks=[
|
||||
ChunkHit(chunk_id=880, content="a", position=4, score=0.9),
|
||||
ChunkHit(chunk_id=881, content="b", position=7, score=0.5),
|
||||
],
|
||||
)
|
||||
|
||||
document = to_retrieved_document(hit)
|
||||
|
||||
assert document.document_id == 42
|
||||
assert document.title == "Q3 Launch Notes"
|
||||
assert document.source_label == "Slack"
|
||||
assert [(p.chunk_id, p.content) for p in document.passages] == [(880, "a"), (881, "b")]
|
||||
assert all(p.document_id == 42 for p in document.passages)
|
||||
|
||||
|
||||
def test_document_with_no_chunks_maps_to_no_passages() -> None:
|
||||
hit = DocumentHit(
|
||||
document_id=1,
|
||||
title="Empty",
|
||||
document_type=None,
|
||||
metadata={},
|
||||
score=0.0,
|
||||
chunks=[],
|
||||
)
|
||||
|
||||
assert to_retrieved_document(hit).passages == []
|
||||
|
|
@ -0,0 +1,65 @@
|
|||
"""Tests for the build_context pipeline (rerank → adapt → render)."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any
|
||||
|
||||
import pytest
|
||||
|
||||
from app.agents.chat.multi_agent_chat.shared.citations import CitationRegistry
|
||||
from app.agents.chat.multi_agent_chat.shared.retrieval.models import (
|
||||
ChunkHit,
|
||||
DocumentHit,
|
||||
)
|
||||
from app.agents.chat.multi_agent_chat.shared.retrieval.service import build_context
|
||||
|
||||
pytestmark = pytest.mark.unit
|
||||
|
||||
|
||||
def _hit(document_id: int, chunk_id: int) -> DocumentHit:
|
||||
return DocumentHit(
|
||||
document_id=document_id,
|
||||
title=f"Doc {document_id}",
|
||||
document_type="FILE",
|
||||
metadata={},
|
||||
score=1.0 / document_id,
|
||||
chunks=[ChunkHit(chunk_id=chunk_id, content=f"text {chunk_id}", position=0, score=1.0)],
|
||||
)
|
||||
|
||||
|
||||
def test_no_hits_renders_nothing() -> None:
|
||||
assert build_context("q", [], CitationRegistry()) is None
|
||||
|
||||
|
||||
def test_renders_block_and_registers_labels_in_order() -> None:
|
||||
registry = CitationRegistry()
|
||||
|
||||
block = build_context("q", [_hit(1, 880), _hit(2, 12)], registry)
|
||||
|
||||
assert block is not None
|
||||
assert "[1] text 880" in block
|
||||
assert "[2] text 12" in block
|
||||
assert registry.resolve(1).locator == {"document_id": 1, "chunk_id": 880}
|
||||
assert registry.resolve(2).locator == {"document_id": 2, "chunk_id": 12}
|
||||
|
||||
|
||||
class _ReverseReranker:
|
||||
"""Stand-in reranker that simply reverses document order."""
|
||||
|
||||
def rerank_documents(
|
||||
self, query_text: str, documents: list[dict[str, Any]]
|
||||
) -> list[dict[str, Any]]:
|
||||
return list(reversed(documents))
|
||||
|
||||
|
||||
def test_reranker_reorders_documents_before_labeling() -> None:
|
||||
registry = CitationRegistry()
|
||||
|
||||
block = build_context(
|
||||
"q", [_hit(1, 880), _hit(2, 12)], registry, reranker=_ReverseReranker()
|
||||
)
|
||||
|
||||
assert block is not None
|
||||
# Reversed: doc 2 now renders first and gets [1].
|
||||
assert registry.resolve(1).locator == {"document_id": 2, "chunk_id": 12}
|
||||
assert registry.resolve(2).locator == {"document_id": 1, "chunk_id": 880}
|
||||
|
|
@ -0,0 +1,35 @@
|
|||
"""Tests for building a document's source label."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import pytest
|
||||
|
||||
from app.agents.chat.multi_agent_chat.shared.retrieval.source_label import source_label
|
||||
|
||||
pytestmark = pytest.mark.unit
|
||||
|
||||
|
||||
def test_known_type_uses_friendly_name() -> None:
|
||||
assert source_label("SLACK_CONNECTOR", {}) == "Slack"
|
||||
|
||||
|
||||
def test_unmapped_type_is_prettified() -> None:
|
||||
assert source_label("GOOGLE_DRIVE_FILE", {}) == "Google Drive"
|
||||
|
||||
|
||||
def test_url_host_is_appended_and_www_stripped() -> None:
|
||||
label = source_label("CRAWLED_URL", {"url": "https://www.docs.python.org/3/"})
|
||||
|
||||
assert label == "Web · docs.python.org"
|
||||
|
||||
|
||||
def test_host_only_when_type_unknown() -> None:
|
||||
assert source_label(None, {"url": "https://example.com/a"}) == "example.com"
|
||||
|
||||
|
||||
def test_returns_none_when_nothing_known() -> None:
|
||||
assert source_label(None, {}) is None
|
||||
|
||||
|
||||
def test_non_http_url_is_ignored() -> None:
|
||||
assert source_label("FILE", {"url": "/local/path"}) == "File"
|
||||
Loading…
Add table
Add a link
Reference in a new issue