Split Analysis into Analysis+ToolUse and Observation, add message_id (#747)

Refactor agent provenance so that the decision (thought + tool
selection) and the result (observation) are separate DAG entities:

  Question ← Analysis+ToolUse ← Observation ← ... ← Conclusion

Analysis gains tg:ToolUse as a mixin RDF type and is emitted
before tool execution via an on_action callback in react().
This ensures sub-traces (e.g. GraphRAG) appear after their
parent Analysis in the streaming event order.

Observation becomes a standalone prov:Entity with tg:Observation
type, emitted after tool execution. The linear DAG chain runs
through Observation — subsequent iterations and the Conclusion
derive from it, not from the Analysis.

message_id is populated on streaming AgentResponse for thought
and observation chunks, using the provenance URI of the entity
being built. This lets clients group streamed chunks by entity.

Wire changes:
- provenance/agent.py: Add ToolUse type, new
  agent_observation_triples(), remove observation from iteration
- agent_manager.py: Add on_action callback between reason() and
  tool execution
- orchestrator/pattern_base.py: Split emit, wire message_id,
  chain through observation URIs
- orchestrator/react_pattern.py: Emit Analysis via on_action
  before tool runs
- agent/react/service.py: Same for non-orchestrator path
- api/explainability.py: New Observation class, updated dispatch
  and chain walker
- api/types.py: Add message_id to AgentThought/AgentObservation
- cli: Render Observation separately, [analysis: tool] labels
This commit is contained in:
cybermaggedon 2026-03-31 17:51:22 +01:00 committed by GitHub
parent 89e13a756a
commit 153ae9ad30
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
28 changed files with 661 additions and 350 deletions

View file

@ -81,6 +81,7 @@ from .explainability import (
Synthesis,
Reflection,
Analysis,
Observation,
Conclusion,
Decomposition,
Finding,
@ -164,6 +165,7 @@ __all__ = [
"Focus",
"Synthesis",
"Analysis",
"Observation",
"Conclusion",
"EdgeSelection",
"wire_triples_to_tuples",

View file

@ -40,6 +40,7 @@ TG_ANSWER_TYPE = TG + "Answer"
TG_REFLECTION_TYPE = TG + "Reflection"
TG_THOUGHT_TYPE = TG + "Thought"
TG_OBSERVATION_TYPE = TG + "Observation"
TG_TOOL_USE = TG + "ToolUse"
TG_GRAPH_RAG_QUESTION = TG + "GraphRagQuestion"
TG_DOC_RAG_QUESTION = TG + "DocRagQuestion"
TG_AGENT_QUESTION = TG + "AgentQuestion"
@ -58,7 +59,6 @@ TG_PLAN_STEP = TG + "planStep"
PROV = "http://www.w3.org/ns/prov#"
PROV_STARTED_AT_TIME = PROV + "startedAtTime"
PROV_WAS_DERIVED_FROM = PROV + "wasDerivedFrom"
PROV_WAS_GENERATED_BY = PROV + "wasGeneratedBy"
RDF_TYPE = "http://www.w3.org/1999/02/22-rdf-syntax-ns#type"
RDFS_LABEL = "http://www.w3.org/2000/01/rdf-schema#label"
@ -102,6 +102,8 @@ class ExplainEntity:
return StepResult.from_triples(uri, triples)
elif TG_SYNTHESIS in types:
return Synthesis.from_triples(uri, triples)
elif TG_OBSERVATION_TYPE in types and TG_REFLECTION_TYPE not in types:
return Observation.from_triples(uri, triples)
elif TG_REFLECTION_TYPE in types:
return Reflection.from_triples(uri, triples)
elif TG_ANALYSIS in types:
@ -279,18 +281,16 @@ class Reflection(ExplainEntity):
@dataclass
class Analysis(ExplainEntity):
"""Analysis entity - one think/act/observe cycle (Agent only)."""
"""Analysis+ToolUse entity - decision + tool call (Agent only)."""
action: str = ""
arguments: str = "" # JSON string
thought: str = ""
observation: str = ""
@classmethod
def from_triples(cls, uri: str, triples: List[Tuple[str, str, Any]]) -> "Analysis":
action = ""
arguments = ""
thought = ""
observation = ""
for s, p, o in triples:
if p == TG_ACTION:
@ -299,8 +299,6 @@ class Analysis(ExplainEntity):
arguments = o
elif p == TG_THOUGHT:
thought = o
elif p == TG_OBSERVATION:
observation = o
return cls(
uri=uri,
@ -308,7 +306,26 @@ class Analysis(ExplainEntity):
action=action,
arguments=arguments,
thought=thought,
observation=observation
)
@dataclass
class Observation(ExplainEntity):
"""Observation entity - standalone tool result (Agent only)."""
document: str = ""
@classmethod
def from_triples(cls, uri: str, triples: List[Tuple[str, str, Any]]) -> "Observation":
document = ""
for s, p, o in triples:
if p == TG_DOCUMENT:
document = o
return cls(
uri=uri,
entity_type="observation",
document=document,
)
@ -757,9 +774,9 @@ class ExplainabilityClient:
return trace
trace["question"] = question
# Find grounding: ?grounding prov:wasGeneratedBy question_uri
# Find grounding: ?grounding prov:wasDerivedFrom question_uri
grounding_triples = self.flow.triples_query(
p=PROV_WAS_GENERATED_BY,
p=PROV_WAS_DERIVED_FROM,
o=question_uri,
g=graph,
user=user,
@ -894,9 +911,9 @@ class ExplainabilityClient:
return trace
trace["question"] = question
# Find grounding: ?grounding prov:wasGeneratedBy question_uri
# Find grounding: ?grounding prov:wasDerivedFrom question_uri
grounding_triples = self.flow.triples_query(
p=PROV_WAS_GENERATED_BY,
p=PROV_WAS_DERIVED_FROM,
o=question_uri,
g=graph,
user=user,
@ -1010,41 +1027,26 @@ class ExplainabilityClient:
# Follow the provenance chain from the question
self._follow_provenance_chain(
session_uri, trace, graph, user, collection,
is_first=True, max_depth=50,
max_depth=50,
)
return trace
def _follow_provenance_chain(
self, current_uri, trace, graph, user, collection,
is_first=False, max_depth=50,
max_depth=50,
):
"""Recursively follow the provenance chain, handling branches."""
if max_depth <= 0:
return
# Find entities derived from current_uri
if is_first:
derived_triples = self.flow.triples_query(
p=PROV_WAS_GENERATED_BY,
o=current_uri,
g=graph, user=user, collection=collection,
limit=20
)
if not derived_triples:
derived_triples = self.flow.triples_query(
p=PROV_WAS_DERIVED_FROM,
o=current_uri,
g=graph, user=user, collection=collection,
limit=20
)
else:
derived_triples = self.flow.triples_query(
p=PROV_WAS_DERIVED_FROM,
o=current_uri,
g=graph, user=user, collection=collection,
limit=20
)
derived_triples = self.flow.triples_query(
p=PROV_WAS_DERIVED_FROM,
o=current_uri,
g=graph, user=user, collection=collection,
limit=20
)
if not derived_triples:
return
@ -1062,8 +1064,8 @@ class ExplainabilityClient:
if entity is None:
continue
if isinstance(entity, (Analysis, Decomposition, Finding,
Plan, StepResult)):
if isinstance(entity, (Analysis, Observation, Decomposition,
Finding, Plan, StepResult)):
trace["steps"].append(entity)
# Continue following from this entity
@ -1072,6 +1074,27 @@ class ExplainabilityClient:
max_depth=max_depth - 1,
)
elif isinstance(entity, Question):
# Sub-trace: a RAG session linked to this agent step.
# Fetch the full sub-trace and embed it.
if entity.question_type == "graph-rag":
sub_trace = self.fetch_graphrag_trace(
derived_uri, graph, user, collection,
)
elif entity.question_type == "document-rag":
sub_trace = self.fetch_docrag_trace(
derived_uri, graph, user, collection,
)
else:
sub_trace = None
if sub_trace:
trace["steps"].append({
"type": "sub-trace",
"question": entity,
"trace": sub_trace,
})
elif isinstance(entity, (Conclusion, Synthesis)):
trace["steps"].append(entity)
@ -1114,10 +1137,25 @@ class ExplainabilityClient:
if isinstance(entity, Question):
questions.append(entity)
# Sort by timestamp (newest first)
questions.sort(key=lambda q: q.timestamp or "", reverse=True)
# Filter out sub-traces: sessions that have a wasDerivedFrom link
# (they are child sessions linked to a parent agent iteration)
top_level = []
for q in questions:
parent_triples = self.flow.triples_query(
s=q.uri,
p=PROV_WAS_DERIVED_FROM,
g=graph,
user=user,
collection=collection,
limit=1
)
if not parent_triples:
top_level.append(q)
return questions
# Sort by timestamp (newest first)
top_level.sort(key=lambda q: q.timestamp or "", reverse=True)
return top_level
def detect_session_type(
self,
@ -1159,18 +1197,9 @@ class ExplainabilityClient:
limit=5
)
generated_triples = self.flow.triples_query(
p=PROV_WAS_GENERATED_BY,
o=session_uri,
g=graph,
user=user,
collection=collection,
limit=5
)
all_child_uris = [
extract_term_value(t.get("s", {}))
for t in (derived_triples + generated_triples)
for t in derived_triples
]
for child_uri in all_child_uris:

View file

@ -384,12 +384,14 @@ class SocketClient:
if chunk_type == "thought":
return AgentThought(
content=resp.get("content", ""),
end_of_message=resp.get("end_of_message", False)
end_of_message=resp.get("end_of_message", False),
message_id=resp.get("message_id", ""),
)
elif chunk_type == "observation":
return AgentObservation(
content=resp.get("content", ""),
end_of_message=resp.get("end_of_message", False)
end_of_message=resp.get("end_of_message", False),
message_id=resp.get("message_id", ""),
)
elif chunk_type == "answer" or chunk_type == "final-answer":
return AgentAnswer(

View file

@ -150,8 +150,10 @@ class AgentThought(StreamingChunk):
content: Agent's thought text
end_of_message: True if this completes the current thought
chunk_type: Always "thought"
message_id: Provenance URI of the entity being built
"""
chunk_type: str = "thought"
message_id: str = ""
@dataclasses.dataclass
class AgentObservation(StreamingChunk):
@ -165,8 +167,10 @@ class AgentObservation(StreamingChunk):
content: Observation text describing tool results
end_of_message: True if this completes the current observation
chunk_type: Always "observation"
message_id: Provenance URI of the entity being built
"""
chunk_type: str = "observation"
message_id: str = ""
@dataclasses.dataclass
class AgentAnswer(StreamingChunk):

View file

@ -5,6 +5,7 @@ from .. schema import GraphRagQuery, GraphRagResponse
class GraphRagClient(RequestResponse):
async def rag(self, query, user="trustgraph", collection="default",
chunk_callback=None, explain_callback=None,
parent_uri="",
timeout=600):
"""
Execute a graph RAG query with optional streaming callbacks.
@ -50,6 +51,7 @@ class GraphRagClient(RequestResponse):
query = query,
user = user,
collection = collection,
parent_uri = parent_uri,
),
timeout=timeout,
recipient=recipient,

View file

@ -96,6 +96,7 @@ from . namespaces import (
TG_ANALYSIS, TG_CONCLUSION,
# Unifying types
TG_ANSWER_TYPE, TG_REFLECTION_TYPE, TG_THOUGHT_TYPE, TG_OBSERVATION_TYPE,
TG_TOOL_USE,
# Question subtypes (to distinguish retrieval mechanism)
TG_GRAPH_RAG_QUESTION, TG_DOC_RAG_QUESTION, TG_AGENT_QUESTION,
# Agent provenance predicates
@ -132,6 +133,7 @@ from . triples import (
from . agent import (
agent_session_triples,
agent_iteration_triples,
agent_observation_triples,
agent_final_triples,
# Orchestrator provenance triple builders
agent_decomposition_triples,
@ -210,6 +212,7 @@ __all__ = [
"TG_ANALYSIS", "TG_CONCLUSION",
# Unifying types
"TG_ANSWER_TYPE", "TG_REFLECTION_TYPE", "TG_THOUGHT_TYPE", "TG_OBSERVATION_TYPE",
"TG_TOOL_USE",
# Question subtypes
"TG_GRAPH_RAG_QUESTION", "TG_DOC_RAG_QUESTION", "TG_AGENT_QUESTION",
# Agent provenance predicates
@ -238,6 +241,7 @@ __all__ = [
# Agent provenance triple builders
"agent_session_triples",
"agent_iteration_triples",
"agent_observation_triples",
"agent_final_triples",
# Orchestrator provenance triple builders
"agent_decomposition_triples",

View file

@ -20,11 +20,12 @@ from .. schema import Triple, Term, IRI, LITERAL
from . namespaces import (
RDF_TYPE, RDFS_LABEL,
PROV_ACTIVITY, PROV_ENTITY, PROV_WAS_DERIVED_FROM,
PROV_WAS_GENERATED_BY, PROV_STARTED_AT_TIME,
TG_QUERY, TG_THOUGHT, TG_ACTION, TG_ARGUMENTS, TG_OBSERVATION,
PROV_ENTITY, PROV_WAS_DERIVED_FROM,
PROV_STARTED_AT_TIME,
TG_QUERY, TG_THOUGHT, TG_ACTION, TG_ARGUMENTS,
TG_QUESTION, TG_ANALYSIS, TG_CONCLUSION, TG_DOCUMENT,
TG_ANSWER_TYPE, TG_REFLECTION_TYPE, TG_THOUGHT_TYPE, TG_OBSERVATION_TYPE,
TG_TOOL_USE,
TG_AGENT_QUESTION,
TG_DECOMPOSITION, TG_FINDING, TG_PLAN_TYPE, TG_STEP_RESULT,
TG_SYNTHESIS, TG_SUBAGENT_GOAL, TG_PLAN_STEP,
@ -70,7 +71,7 @@ def agent_session_triples(
timestamp = datetime.utcnow().isoformat() + "Z"
return [
_triple(session_uri, RDF_TYPE, _iri(PROV_ACTIVITY)),
_triple(session_uri, RDF_TYPE, _iri(PROV_ENTITY)),
_triple(session_uri, RDF_TYPE, _iri(TG_QUESTION)),
_triple(session_uri, RDF_TYPE, _iri(TG_AGENT_QUESTION)),
_triple(session_uri, RDFS_LABEL, _literal("Agent Question")),
@ -87,19 +88,15 @@ def agent_iteration_triples(
arguments: Dict[str, Any] = None,
thought_uri: Optional[str] = None,
thought_document_id: Optional[str] = None,
observation_uri: Optional[str] = None,
observation_document_id: Optional[str] = None,
) -> List[Triple]:
"""
Build triples for one agent iteration (Analysis - think/act/observe cycle).
Build triples for one agent iteration (Analysis+ToolUse).
Creates:
- Entity declaration with tg:Analysis type
- wasGeneratedBy link to question (if first iteration)
- wasDerivedFrom link to previous iteration (if not first)
- Entity declaration with tg:Analysis and tg:ToolUse types
- wasDerivedFrom link to question (if first iteration) or previous
- Action and arguments metadata
- Thought sub-entity (tg:Reflection, tg:Thought) with librarian document
- Observation sub-entity (tg:Reflection, tg:Observation) with librarian document
Args:
iteration_uri: URI of this iteration (from agent_iteration_uri)
@ -109,8 +106,6 @@ def agent_iteration_triples(
arguments: Arguments passed to the tool (will be JSON-encoded)
thought_uri: URI for the thought sub-entity
thought_document_id: Document URI for thought in librarian
observation_uri: URI for the observation sub-entity
observation_document_id: Document URI for observation in librarian
Returns:
List of Triple objects
@ -121,6 +116,7 @@ def agent_iteration_triples(
triples = [
_triple(iteration_uri, RDF_TYPE, _iri(PROV_ENTITY)),
_triple(iteration_uri, RDF_TYPE, _iri(TG_ANALYSIS)),
_triple(iteration_uri, RDF_TYPE, _iri(TG_TOOL_USE)),
_triple(iteration_uri, RDFS_LABEL, _literal(f"Analysis: {action}")),
_triple(iteration_uri, TG_ACTION, _literal(action)),
_triple(iteration_uri, TG_ARGUMENTS, _literal(json.dumps(arguments))),
@ -128,7 +124,7 @@ def agent_iteration_triples(
if question_uri:
triples.append(
_triple(iteration_uri, PROV_WAS_GENERATED_BY, _iri(question_uri))
_triple(iteration_uri, PROV_WAS_DERIVED_FROM, _iri(question_uri))
)
elif previous_uri:
triples.append(
@ -142,26 +138,48 @@ def agent_iteration_triples(
_triple(thought_uri, RDF_TYPE, _iri(TG_REFLECTION_TYPE)),
_triple(thought_uri, RDF_TYPE, _iri(TG_THOUGHT_TYPE)),
_triple(thought_uri, RDFS_LABEL, _literal("Thought")),
_triple(thought_uri, PROV_WAS_GENERATED_BY, _iri(iteration_uri)),
_triple(thought_uri, PROV_WAS_DERIVED_FROM, _iri(iteration_uri)),
])
if thought_document_id:
triples.append(
_triple(thought_uri, TG_DOCUMENT, _iri(thought_document_id))
)
# Observation sub-entity
if observation_uri:
triples.extend([
_triple(iteration_uri, TG_OBSERVATION, _iri(observation_uri)),
_triple(observation_uri, RDF_TYPE, _iri(TG_REFLECTION_TYPE)),
_triple(observation_uri, RDF_TYPE, _iri(TG_OBSERVATION_TYPE)),
_triple(observation_uri, RDFS_LABEL, _literal("Observation")),
_triple(observation_uri, PROV_WAS_GENERATED_BY, _iri(iteration_uri)),
])
if observation_document_id:
triples.append(
_triple(observation_uri, TG_DOCUMENT, _iri(observation_document_id))
)
return triples
def agent_observation_triples(
observation_uri: str,
iteration_uri: str,
document_id: Optional[str] = None,
) -> List[Triple]:
"""
Build triples for an agent observation (standalone entity).
Creates:
- Entity declaration with prov:Entity and tg:Observation types
- wasDerivedFrom link to the iteration (Analysis+ToolUse)
- Document reference to librarian (if provided)
Args:
observation_uri: URI of the observation entity
iteration_uri: URI of the iteration this observation derives from
document_id: Librarian document ID for the observation content
Returns:
List of Triple objects
"""
triples = [
_triple(observation_uri, RDF_TYPE, _iri(PROV_ENTITY)),
_triple(observation_uri, RDF_TYPE, _iri(TG_OBSERVATION_TYPE)),
_triple(observation_uri, RDFS_LABEL, _literal("Observation")),
_triple(observation_uri, PROV_WAS_DERIVED_FROM, _iri(iteration_uri)),
]
if document_id:
triples.append(
_triple(observation_uri, TG_DOCUMENT, _iri(document_id))
)
return triples
@ -199,7 +217,7 @@ def agent_final_triples(
if question_uri:
triples.append(
_triple(final_uri, PROV_WAS_GENERATED_BY, _iri(question_uri))
_triple(final_uri, PROV_WAS_DERIVED_FROM, _iri(question_uri))
)
elif previous_uri:
triples.append(
@ -223,7 +241,7 @@ def agent_decomposition_triples(
_triple(uri, RDF_TYPE, _iri(TG_DECOMPOSITION)),
_triple(uri, RDFS_LABEL,
_literal(f"Decomposed into {len(goals)} research threads")),
_triple(uri, PROV_WAS_GENERATED_BY, _iri(session_uri)),
_triple(uri, PROV_WAS_DERIVED_FROM, _iri(session_uri)),
]
for goal in goals:
triples.append(_triple(uri, TG_SUBAGENT_GOAL, _literal(goal)))
@ -261,7 +279,7 @@ def agent_plan_triples(
_triple(uri, RDF_TYPE, _iri(TG_PLAN_TYPE)),
_triple(uri, RDFS_LABEL,
_literal(f"Plan with {len(steps)} steps")),
_triple(uri, PROV_WAS_GENERATED_BY, _iri(session_uri)),
_triple(uri, PROV_WAS_DERIVED_FROM, _iri(session_uri)),
]
for step in steps:
triples.append(_triple(uri, TG_PLAN_STEP, _literal(step)))

View file

@ -105,6 +105,7 @@ TG_ANSWER_TYPE = TG + "Answer" # Final answer (Synthesis, Conclusion, F
TG_REFLECTION_TYPE = TG + "Reflection" # Intermediate commentary (Thought, Observation)
TG_THOUGHT_TYPE = TG + "Thought" # Agent reasoning
TG_OBSERVATION_TYPE = TG + "Observation" # Agent tool result
TG_TOOL_USE = TG + "ToolUse" # Analysis+ToolUse mixin
# Question subtypes (to distinguish retrieval mechanism)
TG_GRAPH_RAG_QUESTION = TG + "GraphRagQuestion"

View file

@ -353,18 +353,21 @@ def question_triples(
question_uri: str,
query: str,
timestamp: Optional[str] = None,
parent_uri: Optional[str] = None,
) -> List[Triple]:
"""
Build triples for a question activity.
Build triples for a question entity.
Creates:
- Activity declaration for the question
- Entity declaration for the question
- Query text and timestamp
- Optional wasDerivedFrom link to parent (for sub-traces)
Args:
question_uri: URI of the question (from question_uri)
query: The user's query text
timestamp: ISO timestamp (defaults to now)
parent_uri: Optional parent URI to link as wasDerivedFrom (for sub-traces)
Returns:
List of Triple objects
@ -372,8 +375,8 @@ def question_triples(
if timestamp is None:
timestamp = datetime.utcnow().isoformat() + "Z"
return [
_triple(question_uri, RDF_TYPE, _iri(PROV_ACTIVITY)),
triples = [
_triple(question_uri, RDF_TYPE, _iri(PROV_ENTITY)),
_triple(question_uri, RDF_TYPE, _iri(TG_QUESTION)),
_triple(question_uri, RDF_TYPE, _iri(TG_GRAPH_RAG_QUESTION)),
_triple(question_uri, RDFS_LABEL, _literal("GraphRAG Question")),
@ -381,6 +384,13 @@ def question_triples(
_triple(question_uri, TG_QUERY, _literal(query)),
]
if parent_uri:
triples.append(
_triple(question_uri, PROV_WAS_DERIVED_FROM, _iri(parent_uri))
)
return triples
def grounding_triples(
grounding_uri: str,
@ -407,7 +417,7 @@ def grounding_triples(
_triple(grounding_uri, RDF_TYPE, _iri(PROV_ENTITY)),
_triple(grounding_uri, RDF_TYPE, _iri(TG_GROUNDING)),
_triple(grounding_uri, RDFS_LABEL, _literal("Grounding")),
_triple(grounding_uri, PROV_WAS_GENERATED_BY, _iri(question_uri)),
_triple(grounding_uri, PROV_WAS_DERIVED_FROM, _iri(question_uri)),
]
for concept in concepts:
@ -575,18 +585,21 @@ def docrag_question_triples(
question_uri: str,
query: str,
timestamp: Optional[str] = None,
parent_uri: Optional[str] = None,
) -> List[Triple]:
"""
Build triples for a document RAG question activity.
Build triples for a document RAG question entity.
Creates:
- Activity declaration with tg:Question type
- Entity declaration with tg:Question type
- Query text and timestamp
- Optional wasDerivedFrom link to parent (for sub-traces)
Args:
question_uri: URI of the question (from docrag_question_uri)
query: The user's query text
timestamp: ISO timestamp (defaults to now)
parent_uri: Optional parent URI to link as wasDerivedFrom (for sub-traces)
Returns:
List of Triple objects
@ -594,8 +607,8 @@ def docrag_question_triples(
if timestamp is None:
timestamp = datetime.utcnow().isoformat() + "Z"
return [
_triple(question_uri, RDF_TYPE, _iri(PROV_ACTIVITY)),
triples = [
_triple(question_uri, RDF_TYPE, _iri(PROV_ENTITY)),
_triple(question_uri, RDF_TYPE, _iri(TG_QUESTION)),
_triple(question_uri, RDF_TYPE, _iri(TG_DOC_RAG_QUESTION)),
_triple(question_uri, RDFS_LABEL, _literal("DocumentRAG Question")),
@ -603,6 +616,13 @@ def docrag_question_triples(
_triple(question_uri, TG_QUERY, _literal(query)),
]
if parent_uri:
triples.append(
_triple(question_uri, PROV_WAS_DERIVED_FROM, _iri(parent_uri))
)
return triples
def docrag_exploration_triples(
exploration_uri: str,

View file

@ -18,6 +18,7 @@ class GraphRagQuery:
edge_score_limit: int = 0
edge_limit: int = 0
streaming: bool = False
parent_uri: str = ""
@dataclass
class GraphRagResponse: