trustgraph/tests/contract/test_translator_completion_flags.py
cybermaggedon d2751553a3
Add agent explainability instrumentation and unify envelope field naming (#795)
Addresses recommendations from the UX developer's agent experience report.
Adds provenance predicates, DAG structure changes, error resilience, and
a published OWL ontology.

Explainability additions:

- Tool candidates: tg:toolCandidate on Analysis events lists the tools
  visible to the LLM for each iteration (names only, descriptions in config)
- Termination reason: tg:terminationReason on Conclusion/Synthesis events
  (final-answer, plan-complete, subagents-complete)
- Step counter: tg:stepNumber on iteration events
- Pattern decision: new tg:PatternDecision entity in the DAG between
  session and first iteration, carrying tg:pattern and tg:taskType
- Latency: tg:llmDurationMs on Analysis events, tg:toolDurationMs on
  Observation events
- Token counts on events: tg:inToken/tg:outToken/tg:llmModel on
  Grounding, Focus, Synthesis, and Analysis events
- Tool/parse errors: tg:toolError on Observation events with tg:Error
  mixin type. Parse failures return as error observations instead of
  crashing the agent, giving it a chance to retry.

Envelope unification:

- Rename chunk_type to message_type across AgentResponse schema,
  translator, SDK types, socket clients, CLI, and all tests.
  Agent and RAG services now both use message_type on the wire.

Ontology:

- specs/ontology/trustgraph.ttl — OWL vocabulary covering all 26 classes,
  7 object properties, and 36+ datatype properties including new predicates.

DAG structure tests:

- tests/unit/test_provenance/test_dag_structure.py verifies the
  wasDerivedFrom chain for GraphRAG, DocumentRAG, and all three agent
  patterns (react, plan, supervisor) including the pattern-decision link.
2026-04-13 16:16:42 +01:00

282 lines
9.7 KiB
Python

"""
Contract tests for message translator completion flag behavior.
These tests verify that translators correctly compute the is_final flag
based on message fields like end_of_stream and end_of_dialog.
"""
import pytest
from trustgraph.schema import (
GraphRagResponse, DocumentRagResponse, AgentResponse, Error
)
from trustgraph.messaging import TranslatorRegistry
@pytest.mark.contract
class TestRAGTranslatorCompletionFlags:
"""Contract tests for RAG response translator completion flags"""
def test_graph_rag_translator_is_final_with_end_of_session_true(self):
"""
Test that GraphRagResponseTranslator returns is_final=True
when end_of_session=True.
"""
# Arrange
translator = TranslatorRegistry.get_response_translator("graph-rag")
response = GraphRagResponse(
response="A small domesticated mammal.",
message_type="chunk",
end_of_stream=True,
end_of_session=True,
error=None
)
# Act
response_dict, is_final = translator.encode_with_completion(response)
# Assert
assert is_final is True, "is_final must be True when end_of_session=True"
assert response_dict["response"] == "A small domesticated mammal."
assert response_dict["end_of_session"] is True
assert response_dict["message_type"] == "chunk"
def test_graph_rag_translator_is_final_with_end_of_session_false(self):
"""
Test that GraphRagResponseTranslator returns is_final=False
when end_of_session=False (even if end_of_stream=True).
"""
# Arrange
translator = TranslatorRegistry.get_response_translator("graph-rag")
response = GraphRagResponse(
response="Chunk 1",
message_type="chunk",
end_of_stream=False,
end_of_session=False,
error=None
)
# Act
response_dict, is_final = translator.encode_with_completion(response)
# Assert
assert is_final is False, "is_final must be False when end_of_session=False"
assert response_dict["response"] == "Chunk 1"
assert response_dict["end_of_session"] is False
def test_graph_rag_translator_provenance_message(self):
"""
Test that GraphRagResponseTranslator handles provenance messages.
"""
# Arrange
translator = TranslatorRegistry.get_response_translator("graph-rag")
response = GraphRagResponse(
response="",
message_type="explain",
explain_id="urn:trustgraph:session:abc123",
end_of_stream=False,
end_of_session=False,
error=None
)
# Act
response_dict, is_final = translator.encode_with_completion(response)
# Assert
assert is_final is False
assert response_dict["message_type"] == "explain"
assert response_dict["explain_id"] == "urn:trustgraph:session:abc123"
def test_graph_rag_translator_end_of_stream_not_final(self):
"""
Test that end_of_stream=True alone does NOT make is_final=True.
The session continues with provenance messages after LLM stream completes.
"""
# Arrange
translator = TranslatorRegistry.get_response_translator("graph-rag")
response = GraphRagResponse(
response="Final chunk",
message_type="chunk",
end_of_stream=True,
end_of_session=False, # Session continues with provenance
error=None
)
# Act
response_dict, is_final = translator.encode_with_completion(response)
# Assert
assert is_final is False, "end_of_stream=True should NOT make is_final=True"
assert response_dict["end_of_stream"] is True
assert response_dict["end_of_session"] is False
def test_document_rag_translator_is_final_with_end_of_session_true(self):
"""
Test that DocumentRagResponseTranslator returns is_final=True
when end_of_session=True.
"""
# Arrange
translator = TranslatorRegistry.get_response_translator("document-rag")
response = DocumentRagResponse(
response="A document about cats.",
end_of_stream=True,
end_of_session=True,
error=None
)
# Act
response_dict, is_final = translator.encode_with_completion(response)
# Assert
assert is_final is True, "is_final must be True when end_of_session=True"
assert response_dict["response"] == "A document about cats."
assert response_dict["end_of_session"] is True
def test_document_rag_translator_end_of_stream_not_final(self):
"""
Test that end_of_stream=True alone does NOT make is_final=True.
The session continues with provenance messages after LLM stream completes.
"""
# Arrange
translator = TranslatorRegistry.get_response_translator("document-rag")
response = DocumentRagResponse(
response="Final chunk",
end_of_stream=True,
end_of_session=False, # Session continues with provenance
error=None
)
# Act
response_dict, is_final = translator.encode_with_completion(response)
# Assert
assert is_final is False, "end_of_stream=True should NOT make is_final=True"
assert response_dict["end_of_stream"] is True
assert response_dict["end_of_session"] is False
def test_document_rag_translator_is_final_with_end_of_stream_false(self):
"""
Test that DocumentRagResponseTranslator returns is_final=False
when end_of_stream=False.
"""
# Arrange
translator = TranslatorRegistry.get_response_translator("document-rag")
response = DocumentRagResponse(
response="Chunk 1",
end_of_stream=False,
error=None
)
# Act
response_dict, is_final = translator.encode_with_completion(response)
# Assert
assert is_final is False, "is_final must be False when end_of_stream=False"
assert response_dict["response"] == "Chunk 1"
assert response_dict["end_of_stream"] is False
@pytest.mark.contract
class TestAgentTranslatorCompletionFlags:
"""Contract tests for Agent response translator completion flags"""
def test_agent_translator_is_final_with_end_of_dialog_true(self):
"""
Test that AgentResponseTranslator returns is_final=True
when end_of_dialog=True.
"""
# Arrange
translator = TranslatorRegistry.get_response_translator("agent")
response = AgentResponse(
message_type="answer",
content="4",
end_of_message=True,
end_of_dialog=True,
)
# Act
response_dict, is_final = translator.encode_with_completion(response)
# Assert
assert is_final is True, "is_final must be True when end_of_dialog=True"
assert response_dict["content"] == "4"
assert response_dict["end_of_dialog"] is True
def test_agent_translator_is_final_with_end_of_dialog_false(self):
"""
Test that AgentResponseTranslator returns is_final=False
when end_of_dialog=False.
"""
# Arrange
translator = TranslatorRegistry.get_response_translator("agent")
response = AgentResponse(
message_type="thought",
content="I need to solve this.",
end_of_message=True,
end_of_dialog=False,
)
# Act
response_dict, is_final = translator.encode_with_completion(response)
# Assert
assert is_final is False, "is_final must be False when end_of_dialog=False"
assert response_dict["content"] == "I need to solve this."
assert response_dict["end_of_dialog"] is False
def test_agent_translator_intermediate_message_is_not_final(self):
"""
Test that intermediate messages (thought/observation) return is_final=False.
"""
# Arrange
translator = TranslatorRegistry.get_response_translator("agent")
# Test thought message
thought_response = AgentResponse(
message_type="thought",
content="Processing...",
end_of_message=True,
end_of_dialog=False,
)
# Act
thought_dict, thought_is_final = translator.encode_with_completion(thought_response)
# Assert
assert thought_is_final is False, "Thought message must not be final"
# Test observation message
observation_response = AgentResponse(
message_type="observation",
content="Result found",
end_of_message=True,
end_of_dialog=False,
)
# Act
obs_dict, obs_is_final = translator.encode_with_completion(observation_response)
# Assert
assert obs_is_final is False, "Observation message must not be final"
def test_agent_translator_streaming_format_with_end_of_dialog(self):
"""
Test that streaming format messages use end_of_dialog for is_final.
"""
# Arrange
translator = TranslatorRegistry.get_response_translator("agent")
# Streaming format with end_of_dialog=True
response = AgentResponse(
message_type="answer",
content="",
end_of_message=True,
end_of_dialog=True,
)
# Act
response_dict, is_final = translator.encode_with_completion(response)
# Assert
assert is_final is True, "Streaming format must use end_of_dialog for is_final"
assert response_dict["end_of_dialog"] is True