trustgraph/tests/contract/test_translator_completion_flags.py

317 lines
11 KiB
Python
Raw Normal View History

"""
Contract tests for message translator completion flag behavior.
These tests verify that translators correctly compute the is_final flag
based on message fields like end_of_stream and end_of_dialog.
"""
import pytest
from trustgraph.schema import (
GraphRagResponse, DocumentRagResponse, AgentResponse, Error
)
from trustgraph.messaging import TranslatorRegistry
@pytest.mark.contract
class TestRAGTranslatorCompletionFlags:
"""Contract tests for RAG response translator completion flags"""
GraphRAG Query-Time Explainability (#677) Implements full explainability pipeline for GraphRAG queries, enabling traceability from answers back to source documents. Renamed throughout for clarity: - provenance_callback → explain_callback - provenance_id → explain_id - provenance_collection → explain_collection - message_type "provenance" → "explain" - Queue name "provenance" → "explainability" GraphRAG queries now emit explainability events as they execute: 1. Session - query text and timestamp 2. Retrieval - edges retrieved from subgraph 3. Selection - selected edges with LLM reasoning (JSONL with id + reasoning) 4. Answer - reference to synthesized response Events stream via explain_callback during query(), enabling real-time UX. - Answers stored in librarian service (not inline in graph - too large) - Document ID as URN: urn:trustgraph:answer:{session_id} - Graph stores tg:document reference (IRI) to librarian document - Added librarian producer/consumer to graph-rag service - get_labelgraph() now returns (labeled_edges, uri_map) - uri_map maps edge_id(label_s, label_p, label_o) → (uri_s, uri_p, uri_o) - Explainability data stores original URIs, not labels - Enables tracing edges back to reifying statements via tg:reifies - Added serialize_triple() to query service (matches storage format) - get_term_value() now handles TRIPLE type terms - Enables querying by quoted triple in object position: ?stmt tg:reifies <<s p o>> - Displays real-time explainability events during query - Resolves rdfs:label for edge components (s, p, o) - Traces source chain via prov:wasDerivedFrom to root document - Output: "Source: Chunk 1 → Page 2 → Document Title" - Label caching to avoid repeated queries GraphRagResponse: - explain_id: str | None - explain_collection: str | None - message_type: str ("chunk" or "explain") - end_of_session: bool trustgraph-base/trustgraph/provenance/: - namespaces.py - Added TG_DOCUMENT predicate - triples.py - answer_triples() supports document_id reference - uris.py - Added edge_selection_uri() trustgraph-base/trustgraph/schema/services/retrieval.py: - GraphRagResponse with explain_id, explain_collection, end_of_session trustgraph-flow/trustgraph/retrieval/graph_rag/: - graph_rag.py - URI preservation, streaming answer accumulation - rag.py - Librarian integration, real-time explain emission trustgraph-flow/trustgraph/query/triples/cassandra/service.py: - Quoted triple serialization for query matching trustgraph-cli/trustgraph/cli/invoke_graph_rag.py: - Full explainability display with label resolution and source tracing
2026-03-10 10:00:01 +00:00
def test_graph_rag_translator_is_final_with_end_of_session_true(self):
"""
Test that GraphRagResponseTranslator returns is_final=True
GraphRAG Query-Time Explainability (#677) Implements full explainability pipeline for GraphRAG queries, enabling traceability from answers back to source documents. Renamed throughout for clarity: - provenance_callback → explain_callback - provenance_id → explain_id - provenance_collection → explain_collection - message_type "provenance" → "explain" - Queue name "provenance" → "explainability" GraphRAG queries now emit explainability events as they execute: 1. Session - query text and timestamp 2. Retrieval - edges retrieved from subgraph 3. Selection - selected edges with LLM reasoning (JSONL with id + reasoning) 4. Answer - reference to synthesized response Events stream via explain_callback during query(), enabling real-time UX. - Answers stored in librarian service (not inline in graph - too large) - Document ID as URN: urn:trustgraph:answer:{session_id} - Graph stores tg:document reference (IRI) to librarian document - Added librarian producer/consumer to graph-rag service - get_labelgraph() now returns (labeled_edges, uri_map) - uri_map maps edge_id(label_s, label_p, label_o) → (uri_s, uri_p, uri_o) - Explainability data stores original URIs, not labels - Enables tracing edges back to reifying statements via tg:reifies - Added serialize_triple() to query service (matches storage format) - get_term_value() now handles TRIPLE type terms - Enables querying by quoted triple in object position: ?stmt tg:reifies <<s p o>> - Displays real-time explainability events during query - Resolves rdfs:label for edge components (s, p, o) - Traces source chain via prov:wasDerivedFrom to root document - Output: "Source: Chunk 1 → Page 2 → Document Title" - Label caching to avoid repeated queries GraphRagResponse: - explain_id: str | None - explain_collection: str | None - message_type: str ("chunk" or "explain") - end_of_session: bool trustgraph-base/trustgraph/provenance/: - namespaces.py - Added TG_DOCUMENT predicate - triples.py - answer_triples() supports document_id reference - uris.py - Added edge_selection_uri() trustgraph-base/trustgraph/schema/services/retrieval.py: - GraphRagResponse with explain_id, explain_collection, end_of_session trustgraph-flow/trustgraph/retrieval/graph_rag/: - graph_rag.py - URI preservation, streaming answer accumulation - rag.py - Librarian integration, real-time explain emission trustgraph-flow/trustgraph/query/triples/cassandra/service.py: - Quoted triple serialization for query matching trustgraph-cli/trustgraph/cli/invoke_graph_rag.py: - Full explainability display with label resolution and source tracing
2026-03-10 10:00:01 +00:00
when end_of_session=True.
"""
# Arrange
translator = TranslatorRegistry.get_response_translator("graph-rag")
response = GraphRagResponse(
response="A small domesticated mammal.",
GraphRAG Query-Time Explainability (#677) Implements full explainability pipeline for GraphRAG queries, enabling traceability from answers back to source documents. Renamed throughout for clarity: - provenance_callback → explain_callback - provenance_id → explain_id - provenance_collection → explain_collection - message_type "provenance" → "explain" - Queue name "provenance" → "explainability" GraphRAG queries now emit explainability events as they execute: 1. Session - query text and timestamp 2. Retrieval - edges retrieved from subgraph 3. Selection - selected edges with LLM reasoning (JSONL with id + reasoning) 4. Answer - reference to synthesized response Events stream via explain_callback during query(), enabling real-time UX. - Answers stored in librarian service (not inline in graph - too large) - Document ID as URN: urn:trustgraph:answer:{session_id} - Graph stores tg:document reference (IRI) to librarian document - Added librarian producer/consumer to graph-rag service - get_labelgraph() now returns (labeled_edges, uri_map) - uri_map maps edge_id(label_s, label_p, label_o) → (uri_s, uri_p, uri_o) - Explainability data stores original URIs, not labels - Enables tracing edges back to reifying statements via tg:reifies - Added serialize_triple() to query service (matches storage format) - get_term_value() now handles TRIPLE type terms - Enables querying by quoted triple in object position: ?stmt tg:reifies <<s p o>> - Displays real-time explainability events during query - Resolves rdfs:label for edge components (s, p, o) - Traces source chain via prov:wasDerivedFrom to root document - Output: "Source: Chunk 1 → Page 2 → Document Title" - Label caching to avoid repeated queries GraphRagResponse: - explain_id: str | None - explain_collection: str | None - message_type: str ("chunk" or "explain") - end_of_session: bool trustgraph-base/trustgraph/provenance/: - namespaces.py - Added TG_DOCUMENT predicate - triples.py - answer_triples() supports document_id reference - uris.py - Added edge_selection_uri() trustgraph-base/trustgraph/schema/services/retrieval.py: - GraphRagResponse with explain_id, explain_collection, end_of_session trustgraph-flow/trustgraph/retrieval/graph_rag/: - graph_rag.py - URI preservation, streaming answer accumulation - rag.py - Librarian integration, real-time explain emission trustgraph-flow/trustgraph/query/triples/cassandra/service.py: - Quoted triple serialization for query matching trustgraph-cli/trustgraph/cli/invoke_graph_rag.py: - Full explainability display with label resolution and source tracing
2026-03-10 10:00:01 +00:00
message_type="chunk",
end_of_stream=True,
GraphRAG Query-Time Explainability (#677) Implements full explainability pipeline for GraphRAG queries, enabling traceability from answers back to source documents. Renamed throughout for clarity: - provenance_callback → explain_callback - provenance_id → explain_id - provenance_collection → explain_collection - message_type "provenance" → "explain" - Queue name "provenance" → "explainability" GraphRAG queries now emit explainability events as they execute: 1. Session - query text and timestamp 2. Retrieval - edges retrieved from subgraph 3. Selection - selected edges with LLM reasoning (JSONL with id + reasoning) 4. Answer - reference to synthesized response Events stream via explain_callback during query(), enabling real-time UX. - Answers stored in librarian service (not inline in graph - too large) - Document ID as URN: urn:trustgraph:answer:{session_id} - Graph stores tg:document reference (IRI) to librarian document - Added librarian producer/consumer to graph-rag service - get_labelgraph() now returns (labeled_edges, uri_map) - uri_map maps edge_id(label_s, label_p, label_o) → (uri_s, uri_p, uri_o) - Explainability data stores original URIs, not labels - Enables tracing edges back to reifying statements via tg:reifies - Added serialize_triple() to query service (matches storage format) - get_term_value() now handles TRIPLE type terms - Enables querying by quoted triple in object position: ?stmt tg:reifies <<s p o>> - Displays real-time explainability events during query - Resolves rdfs:label for edge components (s, p, o) - Traces source chain via prov:wasDerivedFrom to root document - Output: "Source: Chunk 1 → Page 2 → Document Title" - Label caching to avoid repeated queries GraphRagResponse: - explain_id: str | None - explain_collection: str | None - message_type: str ("chunk" or "explain") - end_of_session: bool trustgraph-base/trustgraph/provenance/: - namespaces.py - Added TG_DOCUMENT predicate - triples.py - answer_triples() supports document_id reference - uris.py - Added edge_selection_uri() trustgraph-base/trustgraph/schema/services/retrieval.py: - GraphRagResponse with explain_id, explain_collection, end_of_session trustgraph-flow/trustgraph/retrieval/graph_rag/: - graph_rag.py - URI preservation, streaming answer accumulation - rag.py - Librarian integration, real-time explain emission trustgraph-flow/trustgraph/query/triples/cassandra/service.py: - Quoted triple serialization for query matching trustgraph-cli/trustgraph/cli/invoke_graph_rag.py: - Full explainability display with label resolution and source tracing
2026-03-10 10:00:01 +00:00
end_of_session=True,
error=None
)
# Act
response_dict, is_final = translator.from_response_with_completion(response)
# Assert
GraphRAG Query-Time Explainability (#677) Implements full explainability pipeline for GraphRAG queries, enabling traceability from answers back to source documents. Renamed throughout for clarity: - provenance_callback → explain_callback - provenance_id → explain_id - provenance_collection → explain_collection - message_type "provenance" → "explain" - Queue name "provenance" → "explainability" GraphRAG queries now emit explainability events as they execute: 1. Session - query text and timestamp 2. Retrieval - edges retrieved from subgraph 3. Selection - selected edges with LLM reasoning (JSONL with id + reasoning) 4. Answer - reference to synthesized response Events stream via explain_callback during query(), enabling real-time UX. - Answers stored in librarian service (not inline in graph - too large) - Document ID as URN: urn:trustgraph:answer:{session_id} - Graph stores tg:document reference (IRI) to librarian document - Added librarian producer/consumer to graph-rag service - get_labelgraph() now returns (labeled_edges, uri_map) - uri_map maps edge_id(label_s, label_p, label_o) → (uri_s, uri_p, uri_o) - Explainability data stores original URIs, not labels - Enables tracing edges back to reifying statements via tg:reifies - Added serialize_triple() to query service (matches storage format) - get_term_value() now handles TRIPLE type terms - Enables querying by quoted triple in object position: ?stmt tg:reifies <<s p o>> - Displays real-time explainability events during query - Resolves rdfs:label for edge components (s, p, o) - Traces source chain via prov:wasDerivedFrom to root document - Output: "Source: Chunk 1 → Page 2 → Document Title" - Label caching to avoid repeated queries GraphRagResponse: - explain_id: str | None - explain_collection: str | None - message_type: str ("chunk" or "explain") - end_of_session: bool trustgraph-base/trustgraph/provenance/: - namespaces.py - Added TG_DOCUMENT predicate - triples.py - answer_triples() supports document_id reference - uris.py - Added edge_selection_uri() trustgraph-base/trustgraph/schema/services/retrieval.py: - GraphRagResponse with explain_id, explain_collection, end_of_session trustgraph-flow/trustgraph/retrieval/graph_rag/: - graph_rag.py - URI preservation, streaming answer accumulation - rag.py - Librarian integration, real-time explain emission trustgraph-flow/trustgraph/query/triples/cassandra/service.py: - Quoted triple serialization for query matching trustgraph-cli/trustgraph/cli/invoke_graph_rag.py: - Full explainability display with label resolution and source tracing
2026-03-10 10:00:01 +00:00
assert is_final is True, "is_final must be True when end_of_session=True"
assert response_dict["response"] == "A small domesticated mammal."
GraphRAG Query-Time Explainability (#677) Implements full explainability pipeline for GraphRAG queries, enabling traceability from answers back to source documents. Renamed throughout for clarity: - provenance_callback → explain_callback - provenance_id → explain_id - provenance_collection → explain_collection - message_type "provenance" → "explain" - Queue name "provenance" → "explainability" GraphRAG queries now emit explainability events as they execute: 1. Session - query text and timestamp 2. Retrieval - edges retrieved from subgraph 3. Selection - selected edges with LLM reasoning (JSONL with id + reasoning) 4. Answer - reference to synthesized response Events stream via explain_callback during query(), enabling real-time UX. - Answers stored in librarian service (not inline in graph - too large) - Document ID as URN: urn:trustgraph:answer:{session_id} - Graph stores tg:document reference (IRI) to librarian document - Added librarian producer/consumer to graph-rag service - get_labelgraph() now returns (labeled_edges, uri_map) - uri_map maps edge_id(label_s, label_p, label_o) → (uri_s, uri_p, uri_o) - Explainability data stores original URIs, not labels - Enables tracing edges back to reifying statements via tg:reifies - Added serialize_triple() to query service (matches storage format) - get_term_value() now handles TRIPLE type terms - Enables querying by quoted triple in object position: ?stmt tg:reifies <<s p o>> - Displays real-time explainability events during query - Resolves rdfs:label for edge components (s, p, o) - Traces source chain via prov:wasDerivedFrom to root document - Output: "Source: Chunk 1 → Page 2 → Document Title" - Label caching to avoid repeated queries GraphRagResponse: - explain_id: str | None - explain_collection: str | None - message_type: str ("chunk" or "explain") - end_of_session: bool trustgraph-base/trustgraph/provenance/: - namespaces.py - Added TG_DOCUMENT predicate - triples.py - answer_triples() supports document_id reference - uris.py - Added edge_selection_uri() trustgraph-base/trustgraph/schema/services/retrieval.py: - GraphRagResponse with explain_id, explain_collection, end_of_session trustgraph-flow/trustgraph/retrieval/graph_rag/: - graph_rag.py - URI preservation, streaming answer accumulation - rag.py - Librarian integration, real-time explain emission trustgraph-flow/trustgraph/query/triples/cassandra/service.py: - Quoted triple serialization for query matching trustgraph-cli/trustgraph/cli/invoke_graph_rag.py: - Full explainability display with label resolution and source tracing
2026-03-10 10:00:01 +00:00
assert response_dict["end_of_session"] is True
assert response_dict["message_type"] == "chunk"
GraphRAG Query-Time Explainability (#677) Implements full explainability pipeline for GraphRAG queries, enabling traceability from answers back to source documents. Renamed throughout for clarity: - provenance_callback → explain_callback - provenance_id → explain_id - provenance_collection → explain_collection - message_type "provenance" → "explain" - Queue name "provenance" → "explainability" GraphRAG queries now emit explainability events as they execute: 1. Session - query text and timestamp 2. Retrieval - edges retrieved from subgraph 3. Selection - selected edges with LLM reasoning (JSONL with id + reasoning) 4. Answer - reference to synthesized response Events stream via explain_callback during query(), enabling real-time UX. - Answers stored in librarian service (not inline in graph - too large) - Document ID as URN: urn:trustgraph:answer:{session_id} - Graph stores tg:document reference (IRI) to librarian document - Added librarian producer/consumer to graph-rag service - get_labelgraph() now returns (labeled_edges, uri_map) - uri_map maps edge_id(label_s, label_p, label_o) → (uri_s, uri_p, uri_o) - Explainability data stores original URIs, not labels - Enables tracing edges back to reifying statements via tg:reifies - Added serialize_triple() to query service (matches storage format) - get_term_value() now handles TRIPLE type terms - Enables querying by quoted triple in object position: ?stmt tg:reifies <<s p o>> - Displays real-time explainability events during query - Resolves rdfs:label for edge components (s, p, o) - Traces source chain via prov:wasDerivedFrom to root document - Output: "Source: Chunk 1 → Page 2 → Document Title" - Label caching to avoid repeated queries GraphRagResponse: - explain_id: str | None - explain_collection: str | None - message_type: str ("chunk" or "explain") - end_of_session: bool trustgraph-base/trustgraph/provenance/: - namespaces.py - Added TG_DOCUMENT predicate - triples.py - answer_triples() supports document_id reference - uris.py - Added edge_selection_uri() trustgraph-base/trustgraph/schema/services/retrieval.py: - GraphRagResponse with explain_id, explain_collection, end_of_session trustgraph-flow/trustgraph/retrieval/graph_rag/: - graph_rag.py - URI preservation, streaming answer accumulation - rag.py - Librarian integration, real-time explain emission trustgraph-flow/trustgraph/query/triples/cassandra/service.py: - Quoted triple serialization for query matching trustgraph-cli/trustgraph/cli/invoke_graph_rag.py: - Full explainability display with label resolution and source tracing
2026-03-10 10:00:01 +00:00
def test_graph_rag_translator_is_final_with_end_of_session_false(self):
"""
Test that GraphRagResponseTranslator returns is_final=False
GraphRAG Query-Time Explainability (#677) Implements full explainability pipeline for GraphRAG queries, enabling traceability from answers back to source documents. Renamed throughout for clarity: - provenance_callback → explain_callback - provenance_id → explain_id - provenance_collection → explain_collection - message_type "provenance" → "explain" - Queue name "provenance" → "explainability" GraphRAG queries now emit explainability events as they execute: 1. Session - query text and timestamp 2. Retrieval - edges retrieved from subgraph 3. Selection - selected edges with LLM reasoning (JSONL with id + reasoning) 4. Answer - reference to synthesized response Events stream via explain_callback during query(), enabling real-time UX. - Answers stored in librarian service (not inline in graph - too large) - Document ID as URN: urn:trustgraph:answer:{session_id} - Graph stores tg:document reference (IRI) to librarian document - Added librarian producer/consumer to graph-rag service - get_labelgraph() now returns (labeled_edges, uri_map) - uri_map maps edge_id(label_s, label_p, label_o) → (uri_s, uri_p, uri_o) - Explainability data stores original URIs, not labels - Enables tracing edges back to reifying statements via tg:reifies - Added serialize_triple() to query service (matches storage format) - get_term_value() now handles TRIPLE type terms - Enables querying by quoted triple in object position: ?stmt tg:reifies <<s p o>> - Displays real-time explainability events during query - Resolves rdfs:label for edge components (s, p, o) - Traces source chain via prov:wasDerivedFrom to root document - Output: "Source: Chunk 1 → Page 2 → Document Title" - Label caching to avoid repeated queries GraphRagResponse: - explain_id: str | None - explain_collection: str | None - message_type: str ("chunk" or "explain") - end_of_session: bool trustgraph-base/trustgraph/provenance/: - namespaces.py - Added TG_DOCUMENT predicate - triples.py - answer_triples() supports document_id reference - uris.py - Added edge_selection_uri() trustgraph-base/trustgraph/schema/services/retrieval.py: - GraphRagResponse with explain_id, explain_collection, end_of_session trustgraph-flow/trustgraph/retrieval/graph_rag/: - graph_rag.py - URI preservation, streaming answer accumulation - rag.py - Librarian integration, real-time explain emission trustgraph-flow/trustgraph/query/triples/cassandra/service.py: - Quoted triple serialization for query matching trustgraph-cli/trustgraph/cli/invoke_graph_rag.py: - Full explainability display with label resolution and source tracing
2026-03-10 10:00:01 +00:00
when end_of_session=False (even if end_of_stream=True).
"""
# Arrange
translator = TranslatorRegistry.get_response_translator("graph-rag")
response = GraphRagResponse(
response="Chunk 1",
GraphRAG Query-Time Explainability (#677) Implements full explainability pipeline for GraphRAG queries, enabling traceability from answers back to source documents. Renamed throughout for clarity: - provenance_callback → explain_callback - provenance_id → explain_id - provenance_collection → explain_collection - message_type "provenance" → "explain" - Queue name "provenance" → "explainability" GraphRAG queries now emit explainability events as they execute: 1. Session - query text and timestamp 2. Retrieval - edges retrieved from subgraph 3. Selection - selected edges with LLM reasoning (JSONL with id + reasoning) 4. Answer - reference to synthesized response Events stream via explain_callback during query(), enabling real-time UX. - Answers stored in librarian service (not inline in graph - too large) - Document ID as URN: urn:trustgraph:answer:{session_id} - Graph stores tg:document reference (IRI) to librarian document - Added librarian producer/consumer to graph-rag service - get_labelgraph() now returns (labeled_edges, uri_map) - uri_map maps edge_id(label_s, label_p, label_o) → (uri_s, uri_p, uri_o) - Explainability data stores original URIs, not labels - Enables tracing edges back to reifying statements via tg:reifies - Added serialize_triple() to query service (matches storage format) - get_term_value() now handles TRIPLE type terms - Enables querying by quoted triple in object position: ?stmt tg:reifies <<s p o>> - Displays real-time explainability events during query - Resolves rdfs:label for edge components (s, p, o) - Traces source chain via prov:wasDerivedFrom to root document - Output: "Source: Chunk 1 → Page 2 → Document Title" - Label caching to avoid repeated queries GraphRagResponse: - explain_id: str | None - explain_collection: str | None - message_type: str ("chunk" or "explain") - end_of_session: bool trustgraph-base/trustgraph/provenance/: - namespaces.py - Added TG_DOCUMENT predicate - triples.py - answer_triples() supports document_id reference - uris.py - Added edge_selection_uri() trustgraph-base/trustgraph/schema/services/retrieval.py: - GraphRagResponse with explain_id, explain_collection, end_of_session trustgraph-flow/trustgraph/retrieval/graph_rag/: - graph_rag.py - URI preservation, streaming answer accumulation - rag.py - Librarian integration, real-time explain emission trustgraph-flow/trustgraph/query/triples/cassandra/service.py: - Quoted triple serialization for query matching trustgraph-cli/trustgraph/cli/invoke_graph_rag.py: - Full explainability display with label resolution and source tracing
2026-03-10 10:00:01 +00:00
message_type="chunk",
end_of_stream=False,
GraphRAG Query-Time Explainability (#677) Implements full explainability pipeline for GraphRAG queries, enabling traceability from answers back to source documents. Renamed throughout for clarity: - provenance_callback → explain_callback - provenance_id → explain_id - provenance_collection → explain_collection - message_type "provenance" → "explain" - Queue name "provenance" → "explainability" GraphRAG queries now emit explainability events as they execute: 1. Session - query text and timestamp 2. Retrieval - edges retrieved from subgraph 3. Selection - selected edges with LLM reasoning (JSONL with id + reasoning) 4. Answer - reference to synthesized response Events stream via explain_callback during query(), enabling real-time UX. - Answers stored in librarian service (not inline in graph - too large) - Document ID as URN: urn:trustgraph:answer:{session_id} - Graph stores tg:document reference (IRI) to librarian document - Added librarian producer/consumer to graph-rag service - get_labelgraph() now returns (labeled_edges, uri_map) - uri_map maps edge_id(label_s, label_p, label_o) → (uri_s, uri_p, uri_o) - Explainability data stores original URIs, not labels - Enables tracing edges back to reifying statements via tg:reifies - Added serialize_triple() to query service (matches storage format) - get_term_value() now handles TRIPLE type terms - Enables querying by quoted triple in object position: ?stmt tg:reifies <<s p o>> - Displays real-time explainability events during query - Resolves rdfs:label for edge components (s, p, o) - Traces source chain via prov:wasDerivedFrom to root document - Output: "Source: Chunk 1 → Page 2 → Document Title" - Label caching to avoid repeated queries GraphRagResponse: - explain_id: str | None - explain_collection: str | None - message_type: str ("chunk" or "explain") - end_of_session: bool trustgraph-base/trustgraph/provenance/: - namespaces.py - Added TG_DOCUMENT predicate - triples.py - answer_triples() supports document_id reference - uris.py - Added edge_selection_uri() trustgraph-base/trustgraph/schema/services/retrieval.py: - GraphRagResponse with explain_id, explain_collection, end_of_session trustgraph-flow/trustgraph/retrieval/graph_rag/: - graph_rag.py - URI preservation, streaming answer accumulation - rag.py - Librarian integration, real-time explain emission trustgraph-flow/trustgraph/query/triples/cassandra/service.py: - Quoted triple serialization for query matching trustgraph-cli/trustgraph/cli/invoke_graph_rag.py: - Full explainability display with label resolution and source tracing
2026-03-10 10:00:01 +00:00
end_of_session=False,
error=None
)
# Act
response_dict, is_final = translator.from_response_with_completion(response)
# Assert
GraphRAG Query-Time Explainability (#677) Implements full explainability pipeline for GraphRAG queries, enabling traceability from answers back to source documents. Renamed throughout for clarity: - provenance_callback → explain_callback - provenance_id → explain_id - provenance_collection → explain_collection - message_type "provenance" → "explain" - Queue name "provenance" → "explainability" GraphRAG queries now emit explainability events as they execute: 1. Session - query text and timestamp 2. Retrieval - edges retrieved from subgraph 3. Selection - selected edges with LLM reasoning (JSONL with id + reasoning) 4. Answer - reference to synthesized response Events stream via explain_callback during query(), enabling real-time UX. - Answers stored in librarian service (not inline in graph - too large) - Document ID as URN: urn:trustgraph:answer:{session_id} - Graph stores tg:document reference (IRI) to librarian document - Added librarian producer/consumer to graph-rag service - get_labelgraph() now returns (labeled_edges, uri_map) - uri_map maps edge_id(label_s, label_p, label_o) → (uri_s, uri_p, uri_o) - Explainability data stores original URIs, not labels - Enables tracing edges back to reifying statements via tg:reifies - Added serialize_triple() to query service (matches storage format) - get_term_value() now handles TRIPLE type terms - Enables querying by quoted triple in object position: ?stmt tg:reifies <<s p o>> - Displays real-time explainability events during query - Resolves rdfs:label for edge components (s, p, o) - Traces source chain via prov:wasDerivedFrom to root document - Output: "Source: Chunk 1 → Page 2 → Document Title" - Label caching to avoid repeated queries GraphRagResponse: - explain_id: str | None - explain_collection: str | None - message_type: str ("chunk" or "explain") - end_of_session: bool trustgraph-base/trustgraph/provenance/: - namespaces.py - Added TG_DOCUMENT predicate - triples.py - answer_triples() supports document_id reference - uris.py - Added edge_selection_uri() trustgraph-base/trustgraph/schema/services/retrieval.py: - GraphRagResponse with explain_id, explain_collection, end_of_session trustgraph-flow/trustgraph/retrieval/graph_rag/: - graph_rag.py - URI preservation, streaming answer accumulation - rag.py - Librarian integration, real-time explain emission trustgraph-flow/trustgraph/query/triples/cassandra/service.py: - Quoted triple serialization for query matching trustgraph-cli/trustgraph/cli/invoke_graph_rag.py: - Full explainability display with label resolution and source tracing
2026-03-10 10:00:01 +00:00
assert is_final is False, "is_final must be False when end_of_session=False"
assert response_dict["response"] == "Chunk 1"
GraphRAG Query-Time Explainability (#677) Implements full explainability pipeline for GraphRAG queries, enabling traceability from answers back to source documents. Renamed throughout for clarity: - provenance_callback → explain_callback - provenance_id → explain_id - provenance_collection → explain_collection - message_type "provenance" → "explain" - Queue name "provenance" → "explainability" GraphRAG queries now emit explainability events as they execute: 1. Session - query text and timestamp 2. Retrieval - edges retrieved from subgraph 3. Selection - selected edges with LLM reasoning (JSONL with id + reasoning) 4. Answer - reference to synthesized response Events stream via explain_callback during query(), enabling real-time UX. - Answers stored in librarian service (not inline in graph - too large) - Document ID as URN: urn:trustgraph:answer:{session_id} - Graph stores tg:document reference (IRI) to librarian document - Added librarian producer/consumer to graph-rag service - get_labelgraph() now returns (labeled_edges, uri_map) - uri_map maps edge_id(label_s, label_p, label_o) → (uri_s, uri_p, uri_o) - Explainability data stores original URIs, not labels - Enables tracing edges back to reifying statements via tg:reifies - Added serialize_triple() to query service (matches storage format) - get_term_value() now handles TRIPLE type terms - Enables querying by quoted triple in object position: ?stmt tg:reifies <<s p o>> - Displays real-time explainability events during query - Resolves rdfs:label for edge components (s, p, o) - Traces source chain via prov:wasDerivedFrom to root document - Output: "Source: Chunk 1 → Page 2 → Document Title" - Label caching to avoid repeated queries GraphRagResponse: - explain_id: str | None - explain_collection: str | None - message_type: str ("chunk" or "explain") - end_of_session: bool trustgraph-base/trustgraph/provenance/: - namespaces.py - Added TG_DOCUMENT predicate - triples.py - answer_triples() supports document_id reference - uris.py - Added edge_selection_uri() trustgraph-base/trustgraph/schema/services/retrieval.py: - GraphRagResponse with explain_id, explain_collection, end_of_session trustgraph-flow/trustgraph/retrieval/graph_rag/: - graph_rag.py - URI preservation, streaming answer accumulation - rag.py - Librarian integration, real-time explain emission trustgraph-flow/trustgraph/query/triples/cassandra/service.py: - Quoted triple serialization for query matching trustgraph-cli/trustgraph/cli/invoke_graph_rag.py: - Full explainability display with label resolution and source tracing
2026-03-10 10:00:01 +00:00
assert response_dict["end_of_session"] is False
def test_graph_rag_translator_provenance_message(self):
"""
Test that GraphRagResponseTranslator handles provenance messages.
"""
# Arrange
translator = TranslatorRegistry.get_response_translator("graph-rag")
response = GraphRagResponse(
response="",
message_type="explain",
explain_id="urn:trustgraph:session:abc123",
end_of_stream=False,
end_of_session=False,
error=None
)
# Act
response_dict, is_final = translator.from_response_with_completion(response)
# Assert
assert is_final is False
assert response_dict["message_type"] == "explain"
assert response_dict["explain_id"] == "urn:trustgraph:session:abc123"
def test_graph_rag_translator_end_of_stream_not_final(self):
"""
Test that end_of_stream=True alone does NOT make is_final=True.
The session continues with provenance messages after LLM stream completes.
"""
# Arrange
translator = TranslatorRegistry.get_response_translator("graph-rag")
response = GraphRagResponse(
response="Final chunk",
message_type="chunk",
end_of_stream=True,
end_of_session=False, # Session continues with provenance
error=None
)
# Act
response_dict, is_final = translator.from_response_with_completion(response)
# Assert
assert is_final is False, "end_of_stream=True should NOT make is_final=True"
assert response_dict["end_of_stream"] is True
assert response_dict["end_of_session"] is False
Add unified explainability support and librarian storage for (#693) Add unified explainability support and librarian storage for all retrieval engines Implements consistent explainability/provenance tracking across GraphRAG, DocumentRAG, and Agent retrieval engines. All large content (answers, thoughts, observations) is now stored in librarian rather than as inline literals in the knowledge graph. Explainability API: - New explainability.py module with entity classes (Question, Exploration, Focus, Synthesis, Analysis, Conclusion) and ExplainabilityClient - Quiescence-based eventual consistency handling for trace fetching - Content fetching from librarian with retry logic CLI updates: - tg-invoke-graph-rag -x/--explainable flag returns explain_id - tg-invoke-document-rag -x/--explainable flag returns explain_id - tg-invoke-agent -x/--explainable flag returns explain_id - tg-list-explain-traces uses new explainability API - tg-show-explain-trace handles all three trace types Agent provenance: - Records session, iterations (think/act/observe), and conclusion - Stores thoughts and observations in librarian with document references - New predicates: tg:thoughtDocument, tg:observationDocument DocumentRAG provenance: - Records question, exploration (chunk retrieval), and synthesis - Stores answers in librarian with document references Schema changes: - AgentResponse: added explain_id, explain_graph fields - RetrievalResponse: added explain_id, explain_graph fields - agent_iteration_triples: supports thought_document_id, observation_document_id Update tests.
2026-03-12 21:40:09 +00:00
def test_document_rag_translator_is_final_with_end_of_session_true(self):
"""
Test that DocumentRagResponseTranslator returns is_final=True
Add unified explainability support and librarian storage for (#693) Add unified explainability support and librarian storage for all retrieval engines Implements consistent explainability/provenance tracking across GraphRAG, DocumentRAG, and Agent retrieval engines. All large content (answers, thoughts, observations) is now stored in librarian rather than as inline literals in the knowledge graph. Explainability API: - New explainability.py module with entity classes (Question, Exploration, Focus, Synthesis, Analysis, Conclusion) and ExplainabilityClient - Quiescence-based eventual consistency handling for trace fetching - Content fetching from librarian with retry logic CLI updates: - tg-invoke-graph-rag -x/--explainable flag returns explain_id - tg-invoke-document-rag -x/--explainable flag returns explain_id - tg-invoke-agent -x/--explainable flag returns explain_id - tg-list-explain-traces uses new explainability API - tg-show-explain-trace handles all three trace types Agent provenance: - Records session, iterations (think/act/observe), and conclusion - Stores thoughts and observations in librarian with document references - New predicates: tg:thoughtDocument, tg:observationDocument DocumentRAG provenance: - Records question, exploration (chunk retrieval), and synthesis - Stores answers in librarian with document references Schema changes: - AgentResponse: added explain_id, explain_graph fields - RetrievalResponse: added explain_id, explain_graph fields - agent_iteration_triples: supports thought_document_id, observation_document_id Update tests.
2026-03-12 21:40:09 +00:00
when end_of_session=True.
"""
# Arrange
translator = TranslatorRegistry.get_response_translator("document-rag")
response = DocumentRagResponse(
response="A document about cats.",
end_of_stream=True,
Add unified explainability support and librarian storage for (#693) Add unified explainability support and librarian storage for all retrieval engines Implements consistent explainability/provenance tracking across GraphRAG, DocumentRAG, and Agent retrieval engines. All large content (answers, thoughts, observations) is now stored in librarian rather than as inline literals in the knowledge graph. Explainability API: - New explainability.py module with entity classes (Question, Exploration, Focus, Synthesis, Analysis, Conclusion) and ExplainabilityClient - Quiescence-based eventual consistency handling for trace fetching - Content fetching from librarian with retry logic CLI updates: - tg-invoke-graph-rag -x/--explainable flag returns explain_id - tg-invoke-document-rag -x/--explainable flag returns explain_id - tg-invoke-agent -x/--explainable flag returns explain_id - tg-list-explain-traces uses new explainability API - tg-show-explain-trace handles all three trace types Agent provenance: - Records session, iterations (think/act/observe), and conclusion - Stores thoughts and observations in librarian with document references - New predicates: tg:thoughtDocument, tg:observationDocument DocumentRAG provenance: - Records question, exploration (chunk retrieval), and synthesis - Stores answers in librarian with document references Schema changes: - AgentResponse: added explain_id, explain_graph fields - RetrievalResponse: added explain_id, explain_graph fields - agent_iteration_triples: supports thought_document_id, observation_document_id Update tests.
2026-03-12 21:40:09 +00:00
end_of_session=True,
error=None
)
# Act
response_dict, is_final = translator.from_response_with_completion(response)
# Assert
Add unified explainability support and librarian storage for (#693) Add unified explainability support and librarian storage for all retrieval engines Implements consistent explainability/provenance tracking across GraphRAG, DocumentRAG, and Agent retrieval engines. All large content (answers, thoughts, observations) is now stored in librarian rather than as inline literals in the knowledge graph. Explainability API: - New explainability.py module with entity classes (Question, Exploration, Focus, Synthesis, Analysis, Conclusion) and ExplainabilityClient - Quiescence-based eventual consistency handling for trace fetching - Content fetching from librarian with retry logic CLI updates: - tg-invoke-graph-rag -x/--explainable flag returns explain_id - tg-invoke-document-rag -x/--explainable flag returns explain_id - tg-invoke-agent -x/--explainable flag returns explain_id - tg-list-explain-traces uses new explainability API - tg-show-explain-trace handles all three trace types Agent provenance: - Records session, iterations (think/act/observe), and conclusion - Stores thoughts and observations in librarian with document references - New predicates: tg:thoughtDocument, tg:observationDocument DocumentRAG provenance: - Records question, exploration (chunk retrieval), and synthesis - Stores answers in librarian with document references Schema changes: - AgentResponse: added explain_id, explain_graph fields - RetrievalResponse: added explain_id, explain_graph fields - agent_iteration_triples: supports thought_document_id, observation_document_id Update tests.
2026-03-12 21:40:09 +00:00
assert is_final is True, "is_final must be True when end_of_session=True"
assert response_dict["response"] == "A document about cats."
Add unified explainability support and librarian storage for (#693) Add unified explainability support and librarian storage for all retrieval engines Implements consistent explainability/provenance tracking across GraphRAG, DocumentRAG, and Agent retrieval engines. All large content (answers, thoughts, observations) is now stored in librarian rather than as inline literals in the knowledge graph. Explainability API: - New explainability.py module with entity classes (Question, Exploration, Focus, Synthesis, Analysis, Conclusion) and ExplainabilityClient - Quiescence-based eventual consistency handling for trace fetching - Content fetching from librarian with retry logic CLI updates: - tg-invoke-graph-rag -x/--explainable flag returns explain_id - tg-invoke-document-rag -x/--explainable flag returns explain_id - tg-invoke-agent -x/--explainable flag returns explain_id - tg-list-explain-traces uses new explainability API - tg-show-explain-trace handles all three trace types Agent provenance: - Records session, iterations (think/act/observe), and conclusion - Stores thoughts and observations in librarian with document references - New predicates: tg:thoughtDocument, tg:observationDocument DocumentRAG provenance: - Records question, exploration (chunk retrieval), and synthesis - Stores answers in librarian with document references Schema changes: - AgentResponse: added explain_id, explain_graph fields - RetrievalResponse: added explain_id, explain_graph fields - agent_iteration_triples: supports thought_document_id, observation_document_id Update tests.
2026-03-12 21:40:09 +00:00
assert response_dict["end_of_session"] is True
def test_document_rag_translator_end_of_stream_not_final(self):
"""
Test that end_of_stream=True alone does NOT make is_final=True.
The session continues with provenance messages after LLM stream completes.
"""
# Arrange
translator = TranslatorRegistry.get_response_translator("document-rag")
response = DocumentRagResponse(
response="Final chunk",
end_of_stream=True,
end_of_session=False, # Session continues with provenance
error=None
)
# Act
response_dict, is_final = translator.from_response_with_completion(response)
# Assert
assert is_final is False, "end_of_stream=True should NOT make is_final=True"
assert response_dict["end_of_stream"] is True
Add unified explainability support and librarian storage for (#693) Add unified explainability support and librarian storage for all retrieval engines Implements consistent explainability/provenance tracking across GraphRAG, DocumentRAG, and Agent retrieval engines. All large content (answers, thoughts, observations) is now stored in librarian rather than as inline literals in the knowledge graph. Explainability API: - New explainability.py module with entity classes (Question, Exploration, Focus, Synthesis, Analysis, Conclusion) and ExplainabilityClient - Quiescence-based eventual consistency handling for trace fetching - Content fetching from librarian with retry logic CLI updates: - tg-invoke-graph-rag -x/--explainable flag returns explain_id - tg-invoke-document-rag -x/--explainable flag returns explain_id - tg-invoke-agent -x/--explainable flag returns explain_id - tg-list-explain-traces uses new explainability API - tg-show-explain-trace handles all three trace types Agent provenance: - Records session, iterations (think/act/observe), and conclusion - Stores thoughts and observations in librarian with document references - New predicates: tg:thoughtDocument, tg:observationDocument DocumentRAG provenance: - Records question, exploration (chunk retrieval), and synthesis - Stores answers in librarian with document references Schema changes: - AgentResponse: added explain_id, explain_graph fields - RetrievalResponse: added explain_id, explain_graph fields - agent_iteration_triples: supports thought_document_id, observation_document_id Update tests.
2026-03-12 21:40:09 +00:00
assert response_dict["end_of_session"] is False
def test_document_rag_translator_is_final_with_end_of_stream_false(self):
"""
Test that DocumentRagResponseTranslator returns is_final=False
when end_of_stream=False.
"""
# Arrange
translator = TranslatorRegistry.get_response_translator("document-rag")
response = DocumentRagResponse(
response="Chunk 1",
end_of_stream=False,
error=None
)
# Act
response_dict, is_final = translator.from_response_with_completion(response)
# Assert
assert is_final is False, "is_final must be False when end_of_stream=False"
assert response_dict["response"] == "Chunk 1"
assert response_dict["end_of_stream"] is False
@pytest.mark.contract
class TestAgentTranslatorCompletionFlags:
"""Contract tests for Agent response translator completion flags"""
def test_agent_translator_is_final_with_end_of_dialog_true(self):
"""
Test that AgentResponseTranslator returns is_final=True
when end_of_dialog=True.
"""
# Arrange
translator = TranslatorRegistry.get_response_translator("agent")
response = AgentResponse(
answer="4",
error=None,
thought=None,
observation=None,
end_of_message=True,
end_of_dialog=True
)
# Act
response_dict, is_final = translator.from_response_with_completion(response)
# Assert
assert is_final is True, "is_final must be True when end_of_dialog=True"
assert response_dict["answer"] == "4"
assert response_dict["end_of_dialog"] is True
def test_agent_translator_is_final_with_end_of_dialog_false(self):
"""
Test that AgentResponseTranslator returns is_final=False
when end_of_dialog=False.
"""
# Arrange
translator = TranslatorRegistry.get_response_translator("agent")
response = AgentResponse(
answer=None,
error=None,
thought="I need to solve this.",
observation=None,
end_of_message=True,
end_of_dialog=False
)
# Act
response_dict, is_final = translator.from_response_with_completion(response)
# Assert
assert is_final is False, "is_final must be False when end_of_dialog=False"
assert response_dict["thought"] == "I need to solve this."
assert response_dict["end_of_dialog"] is False
def test_agent_translator_is_final_fallback_with_answer(self):
"""
Test that AgentResponseTranslator returns is_final=True
when answer is present (fallback for legacy responses).
"""
# Arrange
translator = TranslatorRegistry.get_response_translator("agent")
# Legacy response without end_of_dialog flag
response = AgentResponse(
answer="4",
error=None,
thought=None,
observation=None
)
# Act
response_dict, is_final = translator.from_response_with_completion(response)
# Assert
assert is_final is True, "is_final must be True when answer is present (legacy fallback)"
assert response_dict["answer"] == "4"
def test_agent_translator_intermediate_message_is_not_final(self):
"""
Test that intermediate messages (thought/observation) return is_final=False.
"""
# Arrange
translator = TranslatorRegistry.get_response_translator("agent")
# Test thought message
thought_response = AgentResponse(
answer=None,
error=None,
thought="Processing...",
observation=None,
end_of_message=True,
end_of_dialog=False
)
# Act
thought_dict, thought_is_final = translator.from_response_with_completion(thought_response)
# Assert
assert thought_is_final is False, "Thought message must not be final"
# Test observation message
observation_response = AgentResponse(
answer=None,
error=None,
thought=None,
observation="Result found",
end_of_message=True,
end_of_dialog=False
)
# Act
obs_dict, obs_is_final = translator.from_response_with_completion(observation_response)
# Assert
assert obs_is_final is False, "Observation message must not be final"
def test_agent_translator_streaming_format_with_end_of_dialog(self):
"""
Test that streaming format messages use end_of_dialog for is_final.
"""
# Arrange
translator = TranslatorRegistry.get_response_translator("agent")
# Streaming format with end_of_dialog=True
response = AgentResponse(
chunk_type="answer",
content="",
end_of_message=True,
end_of_dialog=True,
answer=None,
error=None,
thought=None,
observation=None
)
# Act
response_dict, is_final = translator.from_response_with_completion(response)
# Assert
assert is_final is True, "Streaming format must use end_of_dialog for is_final"
assert response_dict["end_of_dialog"] is True