Split Analysis into Analysis+ToolUse and Observation, add message_id (#747)

Refactor agent provenance so that the decision (thought + tool selection) and the result (observation) are separate DAG entities: Question ← Analysis+ToolUse ← Observation ← ... ← Conclusion Analysis gains tg:ToolUse as a mixin RDF type and is emitted before tool execution via an on_action callback in react(). This ensures sub-traces (e.g. GraphRAG) appear after their parent Analysis in the streaming event order. Observation becomes a standalone prov:Entity with tg:Observation type, emitted after tool execution. The linear DAG chain runs through Observation — subsequent iterations and the Conclusion derive from it, not from the Analysis. message_id is populated on streaming AgentResponse for thought and observation chunks, using the provenance URI of the entity being built. This lets clients group streamed chunks by entity. Wire changes: - provenance/agent.py: Add ToolUse type, new agent_observation_triples(), remove observation from iteration - agent_manager.py: Add on_action callback between reason() and tool execution - orchestrator/pattern_base.py: Split emit, wire message_id, chain through observation URIs - orchestrator/react_pattern.py: Emit Analysis via on_action before tool runs - agent/react/service.py: Same for non-orchestrator path - api/explainability.py: New Observation class, updated dispatch and chain walker - api/types.py: Add message_id to AgentThought/AgentObservation - cli: Render Observation separately, [analysis: tool] labels
2026-07-02 22:41:01 +02:00 · 2026-03-31 17:51:22 +01:00 · 2026-03-31 17:51:22 +01:00 · 153ae9ad30
commit 153ae9ad30
parent 89e13a756a
28 changed files with 661 additions and 350 deletions
--- a/tests/unit/test_agent/test_agent_service_non_streaming.py
+++ b/tests/unit/test_agent/test_agent_service_non_streaming.py
@ -39,7 +39,7 @@ class TestAgentServiceNonStreaming:
        mock_agent_manager_class.return_value = mock_agent_instance

        # Mock react to call think and observe callbacks
-        async def mock_react(question, history, think, observe, answer, context, streaming):
+        async def mock_react(question, history, think, observe, answer, context, streaming, on_action=None):
            await think("I need to solve this.", is_final=True)
            await observe("The answer is 4.", is_final=True)
            return Final(thought="Final answer", final="4")
@ -76,11 +76,22 @@ class TestAgentServiceNonStreaming:
        # Execute
        await processor.on_request(msg, consumer, flow)

-        # Verify: should have 3 responses (thought, observation, answer)
-        assert len(sent_responses) == 3, f"Expected 3 responses, got {len(sent_responses)}"
+        # Filter out explain events — those are always sent now
+        content_responses = [
+            r for r in sent_responses if r.chunk_type != "explain"
+        ]
+        explain_responses = [
+            r for r in sent_responses if r.chunk_type == "explain"
+        ]
+
+        # Should have explain events for session, iteration, observation, and final
+        assert len(explain_responses) >= 1, "Expected at least 1 explain event"
+
+        # Should have 3 content responses (thought, observation, answer)
+        assert len(content_responses) == 3, f"Expected 3 content responses, got {len(content_responses)}"

        # Check thought message
-        thought_response = sent_responses[0]
+        thought_response = content_responses[0]
        assert isinstance(thought_response, AgentResponse)
        assert thought_response.chunk_type == "thought"
        assert thought_response.content == "I need to solve this."
@ -88,7 +99,7 @@ class TestAgentServiceNonStreaming:
        assert thought_response.end_of_dialog is False, "Thought message must have end_of_dialog=False"

        # Check observation message
-        observation_response = sent_responses[1]
+        observation_response = content_responses[1]
        assert isinstance(observation_response, AgentResponse)
        assert observation_response.chunk_type == "observation"
        assert observation_response.content == "The answer is 4."
@ -120,7 +131,7 @@ class TestAgentServiceNonStreaming:
        mock_agent_manager_class.return_value = mock_agent_instance

        # Mock react to return Final directly
-        async def mock_react(question, history, think, observe, answer, context, streaming):
+        async def mock_react(question, history, think, observe, answer, context, streaming, on_action=None):
            return Final(thought="Final answer", final="4")

        mock_agent_instance.react = mock_react
@ -155,11 +166,22 @@ class TestAgentServiceNonStreaming:
        # Execute
        await processor.on_request(msg, consumer, flow)

-        # Verify: should have 1 response (final answer)
-        assert len(sent_responses) == 1, f"Expected 1 response, got {len(sent_responses)}"
+        # Filter out explain events — those are always sent now
+        content_responses = [
+            r for r in sent_responses if r.chunk_type != "explain"
+        ]
+        explain_responses = [
+            r for r in sent_responses if r.chunk_type == "explain"
+        ]
+
+        # Should have explain events for session and final
+        assert len(explain_responses) >= 1, "Expected at least 1 explain event"
+
+        # Should have 1 content response (final answer)
+        assert len(content_responses) == 1, f"Expected 1 content response, got {len(content_responses)}"

        # Check final answer message
-        answer_response = sent_responses[0]
+        answer_response = content_responses[0]
        assert isinstance(answer_response, AgentResponse)
        assert answer_response.chunk_type == "answer"
        assert answer_response.content == "4"
--- a/tests/unit/test_agent/test_explainability_parsing.py
+++ b/tests/unit/test_agent/test_explainability_parsing.py
@ -13,6 +13,7 @@ from trustgraph.api.explainability import (
    StepResult,
    Synthesis,
    Analysis,
+    Observation,
    Conclusion,
    TG_DECOMPOSITION,
    TG_FINDING,
@ -20,6 +21,7 @@ from trustgraph.api.explainability import (
    TG_STEP_RESULT,
    TG_SYNTHESIS,
    TG_ANSWER_TYPE,
+    TG_OBSERVATION_TYPE,
    TG_ANALYSIS,
    TG_CONCLUSION,
    TG_DOCUMENT,
@ -74,6 +76,11 @@ class TestFromTriplesDispatch:
        entity = ExplainEntity.from_triples("urn:a", triples)
        assert isinstance(entity, Analysis)

+    def test_dispatches_observation(self):
+        triples = _make_triples("urn:o", [PROV_ENTITY, TG_OBSERVATION_TYPE])
+        entity = ExplainEntity.from_triples("urn:o", triples)
+        assert isinstance(entity, Observation)
+
    def test_dispatches_conclusion_unchanged(self):
        triples = _make_triples("urn:c",
                                [PROV_ENTITY, TG_CONCLUSION, TG_ANSWER_TYPE])
--- a/tests/unit/test_agent/test_provenance_triples.py
+++ b/tests/unit/test_agent/test_provenance_triples.py
@ -14,7 +14,7 @@ from trustgraph.provenance import (

 from trustgraph.provenance.namespaces import (
    RDF_TYPE, RDFS_LABEL,
-    PROV_ENTITY, PROV_WAS_DERIVED_FROM, PROV_WAS_GENERATED_BY,
+    PROV_ENTITY, PROV_WAS_DERIVED_FROM,
    TG_DECOMPOSITION, TG_FINDING, TG_PLAN_TYPE, TG_STEP_RESULT,
    TG_SYNTHESIS, TG_ANSWER_TYPE, TG_DOCUMENT,
    TG_SUBAGENT_GOAL, TG_PLAN_STEP,
@ -63,7 +63,7 @@ class TestDecompositionTriples:
            "urn:decompose", "urn:session", ["goal-a"],
        )
        ts = _triple_set(triples)
-        assert ("urn:decompose", PROV_WAS_GENERATED_BY, "urn:session") in ts
+        assert ("urn:decompose", PROV_WAS_DERIVED_FROM, "urn:session") in ts

    def test_includes_goals(self):
        goals = ["What is X?", "What is Y?", "What is Z?"]
@ -141,7 +141,7 @@ class TestPlanTriples:
            "urn:plan", "urn:session", ["step-a"],
        )
        ts = _triple_set(triples)
-        assert ("urn:plan", PROV_WAS_GENERATED_BY, "urn:session") in ts
+        assert ("urn:plan", PROV_WAS_DERIVED_FROM, "urn:session") in ts

    def test_includes_steps(self):
        steps = ["Define X", "Research Y", "Analyse Z"]