Split Analysis into Analysis+ToolUse and Observation, add message_id (#747)

Refactor agent provenance so that the decision (thought + tool
selection) and the result (observation) are separate DAG entities:

  Question ← Analysis+ToolUse ← Observation ← ... ← Conclusion

Analysis gains tg:ToolUse as a mixin RDF type and is emitted
before tool execution via an on_action callback in react().
This ensures sub-traces (e.g. GraphRAG) appear after their
parent Analysis in the streaming event order.

Observation becomes a standalone prov:Entity with tg:Observation
type, emitted after tool execution. The linear DAG chain runs
through Observation — subsequent iterations and the Conclusion
derive from it, not from the Analysis.

message_id is populated on streaming AgentResponse for thought
and observation chunks, using the provenance URI of the entity
being built. This lets clients group streamed chunks by entity.

Wire changes:
- provenance/agent.py: Add ToolUse type, new
  agent_observation_triples(), remove observation from iteration
- agent_manager.py: Add on_action callback between reason() and
  tool execution
- orchestrator/pattern_base.py: Split emit, wire message_id,
  chain through observation URIs
- orchestrator/react_pattern.py: Emit Analysis via on_action
  before tool runs
- agent/react/service.py: Same for non-orchestrator path
- api/explainability.py: New Observation class, updated dispatch
  and chain walker
- api/types.py: Add message_id to AgentThought/AgentObservation
- cli: Render Observation separately, [analysis: tool] labels
This commit is contained in:
cybermaggedon 2026-03-31 17:51:22 +01:00 committed by GitHub
parent 89e13a756a
commit 153ae9ad30
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
28 changed files with 661 additions and 350 deletions

View file

@ -39,7 +39,7 @@ class TestAgentServiceNonStreaming:
mock_agent_manager_class.return_value = mock_agent_instance
# Mock react to call think and observe callbacks
async def mock_react(question, history, think, observe, answer, context, streaming):
async def mock_react(question, history, think, observe, answer, context, streaming, on_action=None):
await think("I need to solve this.", is_final=True)
await observe("The answer is 4.", is_final=True)
return Final(thought="Final answer", final="4")
@ -76,11 +76,22 @@ class TestAgentServiceNonStreaming:
# Execute
await processor.on_request(msg, consumer, flow)
# Verify: should have 3 responses (thought, observation, answer)
assert len(sent_responses) == 3, f"Expected 3 responses, got {len(sent_responses)}"
# Filter out explain events — those are always sent now
content_responses = [
r for r in sent_responses if r.chunk_type != "explain"
]
explain_responses = [
r for r in sent_responses if r.chunk_type == "explain"
]
# Should have explain events for session, iteration, observation, and final
assert len(explain_responses) >= 1, "Expected at least 1 explain event"
# Should have 3 content responses (thought, observation, answer)
assert len(content_responses) == 3, f"Expected 3 content responses, got {len(content_responses)}"
# Check thought message
thought_response = sent_responses[0]
thought_response = content_responses[0]
assert isinstance(thought_response, AgentResponse)
assert thought_response.chunk_type == "thought"
assert thought_response.content == "I need to solve this."
@ -88,7 +99,7 @@ class TestAgentServiceNonStreaming:
assert thought_response.end_of_dialog is False, "Thought message must have end_of_dialog=False"
# Check observation message
observation_response = sent_responses[1]
observation_response = content_responses[1]
assert isinstance(observation_response, AgentResponse)
assert observation_response.chunk_type == "observation"
assert observation_response.content == "The answer is 4."
@ -120,7 +131,7 @@ class TestAgentServiceNonStreaming:
mock_agent_manager_class.return_value = mock_agent_instance
# Mock react to return Final directly
async def mock_react(question, history, think, observe, answer, context, streaming):
async def mock_react(question, history, think, observe, answer, context, streaming, on_action=None):
return Final(thought="Final answer", final="4")
mock_agent_instance.react = mock_react
@ -155,11 +166,22 @@ class TestAgentServiceNonStreaming:
# Execute
await processor.on_request(msg, consumer, flow)
# Verify: should have 1 response (final answer)
assert len(sent_responses) == 1, f"Expected 1 response, got {len(sent_responses)}"
# Filter out explain events — those are always sent now
content_responses = [
r for r in sent_responses if r.chunk_type != "explain"
]
explain_responses = [
r for r in sent_responses if r.chunk_type == "explain"
]
# Should have explain events for session and final
assert len(explain_responses) >= 1, "Expected at least 1 explain event"
# Should have 1 content response (final answer)
assert len(content_responses) == 1, f"Expected 1 content response, got {len(content_responses)}"
# Check final answer message
answer_response = sent_responses[0]
answer_response = content_responses[0]
assert isinstance(answer_response, AgentResponse)
assert answer_response.chunk_type == "answer"
assert answer_response.content == "4"

View file

@ -13,6 +13,7 @@ from trustgraph.api.explainability import (
StepResult,
Synthesis,
Analysis,
Observation,
Conclusion,
TG_DECOMPOSITION,
TG_FINDING,
@ -20,6 +21,7 @@ from trustgraph.api.explainability import (
TG_STEP_RESULT,
TG_SYNTHESIS,
TG_ANSWER_TYPE,
TG_OBSERVATION_TYPE,
TG_ANALYSIS,
TG_CONCLUSION,
TG_DOCUMENT,
@ -74,6 +76,11 @@ class TestFromTriplesDispatch:
entity = ExplainEntity.from_triples("urn:a", triples)
assert isinstance(entity, Analysis)
def test_dispatches_observation(self):
triples = _make_triples("urn:o", [PROV_ENTITY, TG_OBSERVATION_TYPE])
entity = ExplainEntity.from_triples("urn:o", triples)
assert isinstance(entity, Observation)
def test_dispatches_conclusion_unchanged(self):
triples = _make_triples("urn:c",
[PROV_ENTITY, TG_CONCLUSION, TG_ANSWER_TYPE])

View file

@ -14,7 +14,7 @@ from trustgraph.provenance import (
from trustgraph.provenance.namespaces import (
RDF_TYPE, RDFS_LABEL,
PROV_ENTITY, PROV_WAS_DERIVED_FROM, PROV_WAS_GENERATED_BY,
PROV_ENTITY, PROV_WAS_DERIVED_FROM,
TG_DECOMPOSITION, TG_FINDING, TG_PLAN_TYPE, TG_STEP_RESULT,
TG_SYNTHESIS, TG_ANSWER_TYPE, TG_DOCUMENT,
TG_SUBAGENT_GOAL, TG_PLAN_STEP,
@ -63,7 +63,7 @@ class TestDecompositionTriples:
"urn:decompose", "urn:session", ["goal-a"],
)
ts = _triple_set(triples)
assert ("urn:decompose", PROV_WAS_GENERATED_BY, "urn:session") in ts
assert ("urn:decompose", PROV_WAS_DERIVED_FROM, "urn:session") in ts
def test_includes_goals(self):
goals = ["What is X?", "What is Y?", "What is Z?"]
@ -141,7 +141,7 @@ class TestPlanTriples:
"urn:plan", "urn:session", ["step-a"],
)
ts = _triple_set(triples)
assert ("urn:plan", PROV_WAS_GENERATED_BY, "urn:session") in ts
assert ("urn:plan", PROV_WAS_DERIVED_FROM, "urn:session") in ts
def test_includes_steps(self):
steps = ["Define X", "Research Y", "Analyse Z"]