Split Analysis into Analysis+ToolUse and Observation, add message_id (#747)

Refactor agent provenance so that the decision (thought + tool selection) and the result (observation) are separate DAG entities: Question ← Analysis+ToolUse ← Observation ← ... ← Conclusion Analysis gains tg:ToolUse as a mixin RDF type and is emitted before tool execution via an on_action callback in react(). This ensures sub-traces (e.g. GraphRAG) appear after their parent Analysis in the streaming event order. Observation becomes a standalone prov:Entity with tg:Observation type, emitted after tool execution. The linear DAG chain runs through Observation — subsequent iterations and the Conclusion derive from it, not from the Analysis. message_id is populated on streaming AgentResponse for thought and observation chunks, using the provenance URI of the entity being built. This lets clients group streamed chunks by entity. Wire changes: - provenance/agent.py: Add ToolUse type, new agent_observation_triples(), remove observation from iteration - agent_manager.py: Add on_action callback between reason() and tool execution - orchestrator/pattern_base.py: Split emit, wire message_id, chain through observation URIs - orchestrator/react_pattern.py: Emit Analysis via on_action before tool runs - agent/react/service.py: Same for non-orchestrator path - api/explainability.py: New Observation class, updated dispatch and chain walker - api/types.py: Add message_id to AgentThought/AgentObservation - cli: Render Observation separately, [analysis: tool] labels
2026-05-20 21:05:13 +02:00 · 2026-03-31 17:51:22 +01:00 · 2026-03-31 17:51:22 +01:00 · 153ae9ad30
commit 153ae9ad30
parent 89e13a756a
28 changed files with 661 additions and 350 deletions
--- a/trustgraph-base/trustgraph/api/init.py
+++ b/trustgraph-base/trustgraph/api/init.py
@ -81,6 +81,7 @@ from .explainability import (
    Synthesis,
    Reflection,
    Analysis,
+    Observation,
    Conclusion,
    Decomposition,
    Finding,
@ -164,6 +165,7 @@ __all__ = [
    "Focus",
    "Synthesis",
    "Analysis",
+    "Observation",
    "Conclusion",
    "EdgeSelection",
    "wire_triples_to_tuples",
--- a/trustgraph-base/trustgraph/api/explainability.py
+++ b/trustgraph-base/trustgraph/api/explainability.py
@ -40,6 +40,7 @@ TG_ANSWER_TYPE = TG + "Answer"
 TG_REFLECTION_TYPE = TG + "Reflection"
 TG_THOUGHT_TYPE = TG + "Thought"
 TG_OBSERVATION_TYPE = TG + "Observation"
+TG_TOOL_USE = TG + "ToolUse"
 TG_GRAPH_RAG_QUESTION = TG + "GraphRagQuestion"
 TG_DOC_RAG_QUESTION = TG + "DocRagQuestion"
 TG_AGENT_QUESTION = TG + "AgentQuestion"
@ -58,7 +59,6 @@ TG_PLAN_STEP = TG + "planStep"
 PROV = "http://www.w3.org/ns/prov#"
 PROV_STARTED_AT_TIME = PROV + "startedAtTime"
 PROV_WAS_DERIVED_FROM = PROV + "wasDerivedFrom"
-PROV_WAS_GENERATED_BY = PROV + "wasGeneratedBy"

 RDF_TYPE = "http://www.w3.org/1999/02/22-rdf-syntax-ns#type"
 RDFS_LABEL = "http://www.w3.org/2000/01/rdf-schema#label"
@ -102,6 +102,8 @@ class ExplainEntity:
            return StepResult.from_triples(uri, triples)
        elif TG_SYNTHESIS in types:
            return Synthesis.from_triples(uri, triples)
+        elif TG_OBSERVATION_TYPE in types and TG_REFLECTION_TYPE not in types:
+            return Observation.from_triples(uri, triples)
        elif TG_REFLECTION_TYPE in types:
            return Reflection.from_triples(uri, triples)
        elif TG_ANALYSIS in types:
@ -279,18 +281,16 @@ class Reflection(ExplainEntity):

@dataclass
 class Analysis(ExplainEntity):
-    """Analysis entity - one think/act/observe cycle (Agent only)."""
+    """Analysis+ToolUse entity - decision + tool call (Agent only)."""
    action: str = ""
    arguments: str = ""  # JSON string
    thought: str = ""
-    observation: str = ""

    @classmethod
    def from_triples(cls, uri: str, triples: List[Tuple[str, str, Any]]) -> "Analysis":
        action = ""
        arguments = ""
        thought = ""
-        observation = ""

        for s, p, o in triples:
            if p == TG_ACTION:
@ -299,8 +299,6 @@ class Analysis(ExplainEntity):
                arguments = o
            elif p == TG_THOUGHT:
                thought = o
-            elif p == TG_OBSERVATION:
-                observation = o

        return cls(
            uri=uri,
@ -308,7 +306,26 @@ class Analysis(ExplainEntity):
            action=action,
            arguments=arguments,
            thought=thought,
-            observation=observation
+        )
+
+
+@dataclass
+class Observation(ExplainEntity):
+    """Observation entity - standalone tool result (Agent only)."""
+    document: str = ""
+
+    @classmethod
+    def from_triples(cls, uri: str, triples: List[Tuple[str, str, Any]]) -> "Observation":
+        document = ""
+
+        for s, p, o in triples:
+            if p == TG_DOCUMENT:
+                document = o
+
+        return cls(
+            uri=uri,
+            entity_type="observation",
+            document=document,
        )


@ -757,9 +774,9 @@ class ExplainabilityClient:
            return trace
        trace["question"] = question

-        # Find grounding: ?grounding prov:wasGeneratedBy question_uri
+        # Find grounding: ?grounding prov:wasDerivedFrom question_uri
        grounding_triples = self.flow.triples_query(
-            p=PROV_WAS_GENERATED_BY,
+            p=PROV_WAS_DERIVED_FROM,
            o=question_uri,
            g=graph,
            user=user,
@ -894,9 +911,9 @@ class ExplainabilityClient:
            return trace
        trace["question"] = question

-        # Find grounding: ?grounding prov:wasGeneratedBy question_uri
+        # Find grounding: ?grounding prov:wasDerivedFrom question_uri
        grounding_triples = self.flow.triples_query(
-            p=PROV_WAS_GENERATED_BY,
+            p=PROV_WAS_DERIVED_FROM,
            o=question_uri,
            g=graph,
            user=user,
@ -1010,41 +1027,26 @@ class ExplainabilityClient:
        # Follow the provenance chain from the question
        self._follow_provenance_chain(
            session_uri, trace, graph, user, collection,
-            is_first=True, max_depth=50,
+            max_depth=50,
        )

        return trace

    def _follow_provenance_chain(
        self, current_uri, trace, graph, user, collection,
-        is_first=False, max_depth=50,
+        max_depth=50,
    ):
        """Recursively follow the provenance chain, handling branches."""
        if max_depth <= 0:
            return

        # Find entities derived from current_uri
-        if is_first:
-            derived_triples = self.flow.triples_query(
-                p=PROV_WAS_GENERATED_BY,
-                o=current_uri,
-                g=graph, user=user, collection=collection,
-                limit=20
-            )
-            if not derived_triples:
-                derived_triples = self.flow.triples_query(
-                    p=PROV_WAS_DERIVED_FROM,
-                    o=current_uri,
-                    g=graph, user=user, collection=collection,
-                    limit=20
-                )
-        else:
-            derived_triples = self.flow.triples_query(
-                p=PROV_WAS_DERIVED_FROM,
-                o=current_uri,
-                g=graph, user=user, collection=collection,
-                limit=20
-            )
+        derived_triples = self.flow.triples_query(
+            p=PROV_WAS_DERIVED_FROM,
+            o=current_uri,
+            g=graph, user=user, collection=collection,
+            limit=20
+        )

        if not derived_triples:
            return
@ -1062,8 +1064,8 @@ class ExplainabilityClient:
            if entity is None:
                continue

-            if isinstance(entity, (Analysis, Decomposition, Finding,
-                                   Plan, StepResult)):
+            if isinstance(entity, (Analysis, Observation, Decomposition,
+                                   Finding, Plan, StepResult)):
                trace["steps"].append(entity)

                # Continue following from this entity
@ -1072,6 +1074,27 @@ class ExplainabilityClient:
                    max_depth=max_depth - 1,
                )

+            elif isinstance(entity, Question):
+                # Sub-trace: a RAG session linked to this agent step.
+                # Fetch the full sub-trace and embed it.
+                if entity.question_type == "graph-rag":
+                    sub_trace = self.fetch_graphrag_trace(
+                        derived_uri, graph, user, collection,
+                    )
+                elif entity.question_type == "document-rag":
+                    sub_trace = self.fetch_docrag_trace(
+                        derived_uri, graph, user, collection,
+                    )
+                else:
+                    sub_trace = None
+
+                if sub_trace:
+                    trace["steps"].append({
+                        "type": "sub-trace",
+                        "question": entity,
+                        "trace": sub_trace,
+                    })
+
            elif isinstance(entity, (Conclusion, Synthesis)):
                trace["steps"].append(entity)

@ -1114,10 +1137,25 @@ class ExplainabilityClient:
                if isinstance(entity, Question):
                    questions.append(entity)

-        # Sort by timestamp (newest first)
-        questions.sort(key=lambda q: q.timestamp or "", reverse=True)
+        # Filter out sub-traces: sessions that have a wasDerivedFrom link
+        # (they are child sessions linked to a parent agent iteration)
+        top_level = []
+        for q in questions:
+            parent_triples = self.flow.triples_query(
+                s=q.uri,
+                p=PROV_WAS_DERIVED_FROM,
+                g=graph,
+                user=user,
+                collection=collection,
+                limit=1
+            )
+            if not parent_triples:
+                top_level.append(q)

-        return questions
+        # Sort by timestamp (newest first)
+        top_level.sort(key=lambda q: q.timestamp or "", reverse=True)
+
+        return top_level

    def detect_session_type(
        self,
@ -1159,18 +1197,9 @@ class ExplainabilityClient:
            limit=5
        )

-        generated_triples = self.flow.triples_query(
-            p=PROV_WAS_GENERATED_BY,
-            o=session_uri,
-            g=graph,
-            user=user,
-            collection=collection,
-            limit=5
-        )
-
        all_child_uris = [
            extract_term_value(t.get("s", {}))
-            for t in (derived_triples + generated_triples)
+            for t in derived_triples
        ]

        for child_uri in all_child_uris:
--- a/trustgraph-base/trustgraph/api/socket_client.py
+++ b/trustgraph-base/trustgraph/api/socket_client.py
@ -384,12 +384,14 @@ class SocketClient:
        if chunk_type == "thought":
            return AgentThought(
                content=resp.get("content", ""),
-                end_of_message=resp.get("end_of_message", False)
+                end_of_message=resp.get("end_of_message", False),
+                message_id=resp.get("message_id", ""),
            )
        elif chunk_type == "observation":
            return AgentObservation(
                content=resp.get("content", ""),
-                end_of_message=resp.get("end_of_message", False)
+                end_of_message=resp.get("end_of_message", False),
+                message_id=resp.get("message_id", ""),
            )
        elif chunk_type == "answer" or chunk_type == "final-answer":
            return AgentAnswer(
--- a/trustgraph-base/trustgraph/api/types.py
+++ b/trustgraph-base/trustgraph/api/types.py
@ -150,8 +150,10 @@ class AgentThought(StreamingChunk):
        content: Agent's thought text
        end_of_message: True if this completes the current thought
        chunk_type: Always "thought"
+        message_id: Provenance URI of the entity being built
    """
    chunk_type: str = "thought"
+    message_id: str = ""

@dataclasses.dataclass
 class AgentObservation(StreamingChunk):
@ -165,8 +167,10 @@ class AgentObservation(StreamingChunk):
        content: Observation text describing tool results
        end_of_message: True if this completes the current observation
        chunk_type: Always "observation"
+        message_id: Provenance URI of the entity being built
    """
    chunk_type: str = "observation"
+    message_id: str = ""

@dataclasses.dataclass
 class AgentAnswer(StreamingChunk):