retrieved_context: drop document completeness concept

This commit is contained in:
CREDO23 2026-06-25 08:23:29 +02:00
parent 6bb20df510
commit 26a1431e87
3 changed files with 10 additions and 20 deletions

View file

@ -21,17 +21,11 @@ class RetrievedPassage:
@dataclass(frozen=True)
class RetrievedDocument:
"""A source document and the passages retrieved from it, in order.
``is_complete`` is ``True`` when every chunk of the document is present in
this block, so the model knows whether it is seeing the whole source or
only excerpts.
"""
"""A source document and the passages retrieved from it, in order."""
document_id: int
title: str
source_label: str | None = None
is_complete: bool = False
passages: list[RetrievedPassage] = field(default_factory=list)

View file

@ -18,9 +18,8 @@ from .models import RetrievedDocument, RetrievedPassage
_HEADER = (
"These are excerpts from the user's knowledge base, selected for this query.\n"
"A document is a full source (a file, a Slack thread, a Notion page); a chunk\n"
"is one ordered fragment of it. Each document is tagged (partial) when only\n"
"some of its chunks were retrieved or (complete) when all of them are shown\n"
"here, so you know whether you have the whole source or only parts of it.\n"
"is one ordered fragment of it. You are seeing only the chunks that matched\n"
"this query, not the whole source.\n"
"Cite a chunk with [n]."
)
@ -61,10 +60,9 @@ def _render_document(
def _render_header(document: RetrievedDocument) -> str:
"""``Document: "Title" (source) (partial|complete)``."""
"""``Document: "Title" (source)``."""
source = f" ({document.source_label})" if document.source_label else ""
completeness = "(complete)" if document.is_complete else "(partial)"
return f'Document: "{_clean(document.title)}"{source} {completeness}'
return f'Document: "{_clean(document.title)}"{source}'
def _render_passage(

View file

@ -23,13 +23,11 @@ def _document(
chunk_ids: list[int],
*,
source_label: str | None = None,
is_complete: bool = False,
) -> RetrievedDocument:
return RetrievedDocument(
document_id=document_id,
title=title,
source_label=source_label,
is_complete=is_complete,
passages=[
RetrievedPassage(document_id=document_id, chunk_id=cid, content=f"text {cid}")
for cid in chunk_ids
@ -73,20 +71,20 @@ def test_registers_passages_with_chunk_locators() -> None:
assert entry.display["title"] == "Doc"
def test_header_shows_source_and_completeness() -> None:
def test_header_shows_source_when_present() -> None:
registry = CitationRegistry()
block = render_retrieved_context(
[
_document(1, "Q3", [1], source_label="Slack · #launch", is_complete=False),
_document(2, "Plan", [2], is_complete=True),
_document(1, "Q3", [1], source_label="Slack · #launch"),
_document(2, "Plan", [2]),
],
registry,
)
assert block is not None
assert 'Document: "Q3" (Slack · #launch) (partial)' in block
assert 'Document: "Plan" (complete)' in block
assert 'Document: "Q3" (Slack · #launch)' in block
assert 'Document: "Plan"' in block
def test_wraps_block_and_explains_chunk_vs_document() -> None: