mirror of
https://github.com/trustgraph-ai/trustgraph.git
synced 2026-04-26 00:46:22 +02:00
Adding explainability to the ReACT agent (#689)
* Added tech spec
* Add provenance recording to React agent loop
Enables agent sessions to be traced and debugged using the same
explainability infrastructure as GraphRAG. Agent traces record:
- Session start with query and timestamp
- Each iteration's thought, action, arguments, and observation
- Final answer with derivation chain
Changes:
- Add session_id and collection fields to AgentRequest schema
- Add agent predicates (TG_THOUGHT, TG_ACTION, etc.) to namespaces
- Create agent provenance triple generators in provenance/agent.py
- Register explainability producer in agent service
- Emit provenance triples during agent execution
- Update CLI tools to detect and render agent traces alongside GraphRAG
* Updated explainability taxonomy:
GraphRAG: tg:Question → tg:Exploration → tg:Focus → tg:Synthesis
Agent: tg:Question → tg:Analysis(s) → tg:Conclusion
All entities also have their PROV-O type (prov:Activity or prov:Entity).
Updated commit message:
Add provenance recording to React agent loop
Enables agent sessions to be traced and debugged using the same
explainability infrastructure as GraphRAG.
Entity types follow human reasoning patterns:
- tg:Question - the user's query (shared with GraphRAG)
- tg:Analysis - each think/act/observe cycle
- tg:Conclusion - the final answer
Also adds explicit TG types to GraphRAG entities:
- tg:Question, tg:Exploration, tg:Focus, tg:Synthesis
All types retain their PROV-O base types (prov:Activity, prov:Entity).
Changes:
- Add session_id and collection fields to AgentRequest schema
- Add explainability entity types to namespaces.py
- Create agent provenance triple generators
- Register explainability producer in agent service
- Emit provenance triples during agent execution
- Update CLI tools to detect and render both trace types
* Document RAG explainability is now complete. Here's a summary of the
changes made:
Schema Changes:
- trustgraph-base/trustgraph/schema/services/retrieval.py: Added
explain_id and explain_graph fields to DocumentRagResponse
- trustgraph-base/trustgraph/messaging/translators/retrieval.py:
Updated translator to handle explainability fields
Provenance Changes:
- trustgraph-base/trustgraph/provenance/namespaces.py: Added
TG_CHUNK_COUNT and TG_SELECTED_CHUNK predicates
- trustgraph-base/trustgraph/provenance/uris.py: Added
docrag_question_uri, docrag_exploration_uri, docrag_synthesis_uri
generators
- trustgraph-base/trustgraph/provenance/triples.py: Added
docrag_question_triples, docrag_exploration_triples,
docrag_synthesis_triples builders
- trustgraph-base/trustgraph/provenance/__init__.py: Exported all
new Document RAG functions and predicates
Service Changes:
- trustgraph-flow/trustgraph/retrieval/document_rag/document_rag.py:
Added explainability callback support and triple emission at each
phase (Question → Exploration → Synthesis)
- trustgraph-flow/trustgraph/retrieval/document_rag/rag.py:
Registered explainability producer and wired up the callback
Documentation:
- docs/tech-specs/agent-explainability.md: Added Document RAG entity
types and provenance model documentation
Document RAG Provenance Model:
Question (urn:trustgraph:docrag:{uuid})
│
│ tg:query, prov:startedAtTime
│ rdf:type = prov:Activity, tg:Question
│
↓ prov:wasGeneratedBy
│
Exploration (urn:trustgraph:docrag:{uuid}/exploration)
│
│ tg:chunkCount, tg:selectedChunk (multiple)
│ rdf:type = prov:Entity, tg:Exploration
│
↓ prov:wasDerivedFrom
│
Synthesis (urn:trustgraph:docrag:{uuid}/synthesis)
│
│ tg:content = "The answer..."
│ rdf:type = prov:Entity, tg:Synthesis
* Specific subtype that makes the retrieval mechanism immediately
obvious:
System: GraphRAG
TG Types on Question: tg:Question, tg:GraphRagQuestion
URI Pattern: urn:trustgraph:question:{uuid}
────────────────────────────────────────
System: Document RAG
TG Types on Question: tg:Question, tg:DocRagQuestion
URI Pattern: urn:trustgraph:docrag:{uuid}
────────────────────────────────────────
System: Agent
TG Types on Question: tg:Question, tg:AgentQuestion
URI Pattern: urn:trustgraph:agent:{uuid}
Files modified:
- trustgraph-base/trustgraph/provenance/namespaces.py - Added
TG_GRAPH_RAG_QUESTION, TG_DOC_RAG_QUESTION, TG_AGENT_QUESTION
- trustgraph-base/trustgraph/provenance/triples.py - Added subtype to
question_triples and docrag_question_triples
- trustgraph-base/trustgraph/provenance/agent.py - Added subtype to
agent_session_triples
- trustgraph-base/trustgraph/provenance/__init__.py - Exported new types
- docs/tech-specs/agent-explainability.md - Documented the subtypes
This allows:
- Query all questions: ?q rdf:type tg:Question
- Query only GraphRAG: ?q rdf:type tg:GraphRagQuestion
- Query only Document RAG: ?q rdf:type tg:DocRagQuestion
- Query only Agent: ?q rdf:type tg:AgentQuestion
* Fixed tests
This commit is contained in:
parent
a53ed41da2
commit
312174eb88
17 changed files with 1269 additions and 44 deletions
|
|
@ -1,8 +1,8 @@
|
|||
"""
|
||||
List all GraphRAG sessions (questions) in a collection.
|
||||
List all explainability sessions (GraphRAG and Agent) in a collection.
|
||||
|
||||
Queries for all questions stored in the retrieval graph and displays them
|
||||
with their session IDs and timestamps.
|
||||
with their session IDs, type (GraphRAG or Agent), and timestamps.
|
||||
|
||||
Examples:
|
||||
tg-list-explain-traces -U trustgraph -C default
|
||||
|
|
@ -24,8 +24,14 @@ default_collection = 'default'
|
|||
# Predicates
|
||||
TG = "https://trustgraph.ai/ns/"
|
||||
TG_QUERY = TG + "query"
|
||||
TG_QUESTION = TG + "Question"
|
||||
TG_ANALYSIS = TG + "Analysis"
|
||||
TG_EXPLORATION = TG + "Exploration"
|
||||
PROV = "http://www.w3.org/ns/prov#"
|
||||
PROV_STARTED_AT_TIME = PROV + "startedAtTime"
|
||||
PROV_WAS_DERIVED_FROM = PROV + "wasDerivedFrom"
|
||||
PROV_WAS_GENERATED_BY = PROV + "wasGeneratedBy"
|
||||
RDF_TYPE = "http://www.w3.org/1999/02/22-rdf-syntax-ns#type"
|
||||
|
||||
# Retrieval graph
|
||||
RETRIEVAL_GRAPH = "urn:graph:retrieval"
|
||||
|
|
@ -117,8 +123,45 @@ def get_timestamp(socket, flow_id, user, collection, question_id):
|
|||
return ""
|
||||
|
||||
|
||||
def get_session_type(socket, flow_id, user, collection, session_id):
|
||||
"""
|
||||
Get the type of session (Agent or GraphRAG).
|
||||
|
||||
Both have tg:Question type, so we distinguish by URI pattern
|
||||
or by checking what's derived from it.
|
||||
"""
|
||||
# Fast path: check URI pattern
|
||||
if session_id.startswith("urn:trustgraph:agent:"):
|
||||
return "Agent"
|
||||
if session_id.startswith("urn:trustgraph:question:"):
|
||||
return "GraphRAG"
|
||||
|
||||
# Check what's derived from this entity
|
||||
derived = query_triples(
|
||||
socket, flow_id, user, collection,
|
||||
p=PROV_WAS_DERIVED_FROM, o=session_id, g=RETRIEVAL_GRAPH
|
||||
)
|
||||
generated = query_triples(
|
||||
socket, flow_id, user, collection,
|
||||
p=PROV_WAS_GENERATED_BY, o=session_id, g=RETRIEVAL_GRAPH
|
||||
)
|
||||
|
||||
for s, p, o in derived + generated:
|
||||
child_types = query_triples(
|
||||
socket, flow_id, user, collection,
|
||||
s=s, p=RDF_TYPE, g=RETRIEVAL_GRAPH
|
||||
)
|
||||
for _, _, child_type in child_types:
|
||||
if child_type == TG_ANALYSIS:
|
||||
return "Agent"
|
||||
if child_type == TG_EXPLORATION:
|
||||
return "GraphRAG"
|
||||
|
||||
return "GraphRAG"
|
||||
|
||||
|
||||
def list_sessions(socket, flow_id, user, collection, limit):
|
||||
"""List all GraphRAG sessions by finding questions."""
|
||||
"""List all explainability sessions (GraphRAG and Agent) by finding questions."""
|
||||
# Query for all triples with predicate = tg:query
|
||||
triples = query_triples(
|
||||
socket, flow_id, user, collection,
|
||||
|
|
@ -129,9 +172,12 @@ def list_sessions(socket, flow_id, user, collection, limit):
|
|||
for question_id, _, query_text in triples:
|
||||
# Get timestamp if available
|
||||
timestamp = get_timestamp(socket, flow_id, user, collection, question_id)
|
||||
# Get session type (Agent or GraphRAG)
|
||||
session_type = get_session_type(socket, flow_id, user, collection, question_id)
|
||||
|
||||
sessions.append({
|
||||
"id": question_id,
|
||||
"type": session_type,
|
||||
"question": query_text,
|
||||
"time": timestamp,
|
||||
})
|
||||
|
|
@ -154,18 +200,19 @@ def truncate_text(text, max_len=60):
|
|||
def print_table(sessions):
|
||||
"""Print sessions as a table."""
|
||||
if not sessions:
|
||||
print("No GraphRAG sessions found.")
|
||||
print("No explainability sessions found.")
|
||||
return
|
||||
|
||||
rows = []
|
||||
for session in sessions:
|
||||
rows.append([
|
||||
session["id"],
|
||||
truncate_text(session["question"], 50),
|
||||
session.get("type", "Unknown"),
|
||||
truncate_text(session["question"], 45),
|
||||
session.get("time", "")
|
||||
])
|
||||
|
||||
headers = ["Session ID", "Question", "Time"]
|
||||
headers = ["Session ID", "Type", "Question", "Time"]
|
||||
print(tabulate(rows, headers=headers, tablefmt="simple"))
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -1,11 +1,15 @@
|
|||
"""
|
||||
Show full explainability trace for a GraphRAG session.
|
||||
Show full explainability trace for a GraphRAG or Agent session.
|
||||
|
||||
Given a question/session URI, displays the complete cascade:
|
||||
Question -> Exploration -> Focus (edge selection) -> Synthesis (answer).
|
||||
Given a question/session URI, displays the complete trace:
|
||||
- GraphRAG: Question -> Exploration -> Focus (edge selection) -> Synthesis (answer)
|
||||
- Agent: Session -> Iteration(s) (thought/action/observation) -> Final Answer
|
||||
|
||||
The tool auto-detects the trace type based on rdf:type.
|
||||
|
||||
Examples:
|
||||
tg-show-explain-trace -U trustgraph -C default "urn:trustgraph:question:abc123"
|
||||
tg-show-explain-trace -U trustgraph -C default "urn:trustgraph:agent:abc123"
|
||||
tg-show-explain-trace --max-answer 1000 "urn:trustgraph:question:abc123"
|
||||
tg-show-explain-trace --show-provenance "urn:trustgraph:question:abc123"
|
||||
"""
|
||||
|
|
@ -31,10 +35,25 @@ TG_REASONING = TG + "reasoning"
|
|||
TG_CONTENT = TG + "content"
|
||||
TG_DOCUMENT = TG + "document"
|
||||
TG_REIFIES = TG + "reifies"
|
||||
# Explainability entity types
|
||||
TG_QUESTION = TG + "Question"
|
||||
TG_EXPLORATION = TG + "Exploration"
|
||||
TG_FOCUS = TG + "Focus"
|
||||
TG_SYNTHESIS = TG + "Synthesis"
|
||||
TG_ANALYSIS = TG + "Analysis"
|
||||
TG_CONCLUSION = TG + "Conclusion"
|
||||
|
||||
# Agent predicates
|
||||
TG_THOUGHT = TG + "thought"
|
||||
TG_ACTION = TG + "action"
|
||||
TG_ARGUMENTS = TG + "arguments"
|
||||
TG_OBSERVATION = TG + "observation"
|
||||
TG_ANSWER = TG + "answer"
|
||||
PROV = "http://www.w3.org/ns/prov#"
|
||||
PROV_STARTED_AT_TIME = PROV + "startedAtTime"
|
||||
PROV_WAS_DERIVED_FROM = PROV + "wasDerivedFrom"
|
||||
PROV_WAS_GENERATED_BY = PROV + "wasGeneratedBy"
|
||||
RDF_TYPE = "http://www.w3.org/1999/02/22-rdf-syntax-ns#type"
|
||||
RDFS_LABEL = "http://www.w3.org/2000/01/rdf-schema#label"
|
||||
|
||||
# Graphs
|
||||
|
|
@ -280,6 +299,186 @@ def format_edge(edge, label_cache=None, socket=None, flow_id=None, user=None, co
|
|||
return f"({s_label}, {p_label}, {o_label})"
|
||||
|
||||
|
||||
def detect_trace_type(socket, flow_id, user, collection, entity_id):
|
||||
"""
|
||||
Detect whether an entity is an agent Question or GraphRAG Question.
|
||||
|
||||
Both have rdf:type = tg:Question, so we distinguish by checking
|
||||
what's derived from it:
|
||||
- Agent: has tg:Analysis or tg:Conclusion derived
|
||||
- GraphRAG: has tg:Exploration derived
|
||||
|
||||
Also checks URI pattern as fallback:
|
||||
- urn:trustgraph:agent: -> agent
|
||||
- urn:trustgraph:question: -> graphrag
|
||||
|
||||
Returns:
|
||||
"agent" or "graphrag"
|
||||
"""
|
||||
# Check URI pattern first (fast path)
|
||||
if entity_id.startswith("urn:trustgraph:agent:"):
|
||||
return "agent"
|
||||
if entity_id.startswith("urn:trustgraph:question:"):
|
||||
return "graphrag"
|
||||
|
||||
# Check what's derived from this entity
|
||||
derived = find_by_predicate_object(
|
||||
socket, flow_id, user, collection,
|
||||
PROV_WAS_DERIVED_FROM, entity_id
|
||||
)
|
||||
|
||||
# Also check wasGeneratedBy (GraphRAG exploration uses this)
|
||||
generated = find_by_predicate_object(
|
||||
socket, flow_id, user, collection,
|
||||
PROV_WAS_GENERATED_BY, entity_id
|
||||
)
|
||||
|
||||
all_children = derived + generated
|
||||
|
||||
for child_id in all_children:
|
||||
child_types = query_triples(
|
||||
socket, flow_id, user, collection,
|
||||
s=child_id, p=RDF_TYPE, g=RETRIEVAL_GRAPH
|
||||
)
|
||||
for s, p, o in child_types:
|
||||
if o == TG_ANALYSIS or o == TG_CONCLUSION:
|
||||
return "agent"
|
||||
if o == TG_EXPLORATION:
|
||||
return "graphrag"
|
||||
|
||||
# Default to graphrag
|
||||
return "graphrag"
|
||||
|
||||
|
||||
def build_agent_trace(socket, flow_id, user, collection, session_id, api=None, max_answer=500):
|
||||
"""Build the full explainability trace for an agent session."""
|
||||
trace = {
|
||||
"session_id": session_id,
|
||||
"type": "agent",
|
||||
"question": None,
|
||||
"time": None,
|
||||
"iterations": [],
|
||||
"final_answer": None,
|
||||
}
|
||||
|
||||
# Get session metadata
|
||||
props = get_node_properties(socket, flow_id, user, collection, session_id)
|
||||
trace["question"] = props.get(TG_QUERY, [None])[0]
|
||||
trace["time"] = props.get(PROV_STARTED_AT_TIME, [None])[0]
|
||||
|
||||
# Find all entities derived from this session (iterations and final)
|
||||
# Start by looking for entities where prov:wasDerivedFrom = session_id
|
||||
current_uri = session_id
|
||||
iteration_num = 1
|
||||
|
||||
while True:
|
||||
# Find entities derived from current
|
||||
derived_ids = find_by_predicate_object(
|
||||
socket, flow_id, user, collection,
|
||||
PROV_WAS_DERIVED_FROM, current_uri
|
||||
)
|
||||
|
||||
if not derived_ids:
|
||||
break
|
||||
|
||||
derived_id = derived_ids[0]
|
||||
derived_props = get_node_properties(socket, flow_id, user, collection, derived_id)
|
||||
|
||||
# Check type
|
||||
types = derived_props.get(RDF_TYPE, [])
|
||||
|
||||
if TG_ANALYSIS in types:
|
||||
iteration = {
|
||||
"id": derived_id,
|
||||
"iteration_num": iteration_num,
|
||||
"thought": derived_props.get(TG_THOUGHT, [None])[0],
|
||||
"action": derived_props.get(TG_ACTION, [None])[0],
|
||||
"arguments": derived_props.get(TG_ARGUMENTS, [None])[0],
|
||||
"observation": derived_props.get(TG_OBSERVATION, [None])[0],
|
||||
}
|
||||
trace["iterations"].append(iteration)
|
||||
current_uri = derived_id
|
||||
iteration_num += 1
|
||||
|
||||
elif TG_CONCLUSION in types:
|
||||
answer = derived_props.get(TG_ANSWER, [None])[0]
|
||||
if answer and len(answer) > max_answer:
|
||||
answer = answer[:max_answer] + "... [truncated]"
|
||||
trace["final_answer"] = {
|
||||
"id": derived_id,
|
||||
"answer": answer,
|
||||
}
|
||||
break
|
||||
|
||||
else:
|
||||
# Unknown type, stop traversal
|
||||
break
|
||||
|
||||
return trace
|
||||
|
||||
|
||||
def print_agent_text(trace):
|
||||
"""Print agent trace in text format."""
|
||||
print(f"=== Agent Session: {trace['session_id']} ===")
|
||||
print()
|
||||
|
||||
if trace["question"]:
|
||||
print(f"Question: {trace['question']}")
|
||||
if trace["time"]:
|
||||
print(f"Time: {trace['time']}")
|
||||
print()
|
||||
|
||||
# Analysis steps
|
||||
print("--- Analysis ---")
|
||||
iterations = trace.get("iterations", [])
|
||||
if iterations:
|
||||
for iteration in iterations:
|
||||
print(f"Analysis {iteration['iteration_num']}:")
|
||||
print(f" Thought: {iteration.get('thought', 'N/A')}")
|
||||
print(f" Action: {iteration.get('action', 'N/A')}")
|
||||
|
||||
args = iteration.get('arguments')
|
||||
if args:
|
||||
# Try to pretty-print JSON arguments
|
||||
try:
|
||||
import json
|
||||
args_obj = json.loads(args)
|
||||
args_str = json.dumps(args_obj, indent=4)
|
||||
# Indent each line
|
||||
args_lines = args_str.split('\n')
|
||||
print(f" Arguments:")
|
||||
for line in args_lines:
|
||||
print(f" {line}")
|
||||
except:
|
||||
print(f" Arguments: {args}")
|
||||
else:
|
||||
print(f" Arguments: N/A")
|
||||
|
||||
obs = iteration.get('observation', 'N/A')
|
||||
if obs and len(obs) > 200:
|
||||
obs = obs[:200] + "... [truncated]"
|
||||
print(f" Observation: {obs}")
|
||||
print()
|
||||
else:
|
||||
print("No analysis steps recorded")
|
||||
print()
|
||||
|
||||
# Conclusion
|
||||
print("--- Conclusion ---")
|
||||
final = trace.get("final_answer")
|
||||
if final and final.get("answer"):
|
||||
print("Answer:")
|
||||
for line in final["answer"].split("\n"):
|
||||
print(f" {line}")
|
||||
else:
|
||||
print("No conclusion recorded")
|
||||
|
||||
|
||||
def print_agent_json(trace):
|
||||
"""Print agent trace as JSON."""
|
||||
print(json.dumps(trace, indent=2))
|
||||
|
||||
|
||||
def build_trace(socket, flow_id, user, collection, question_id, api=None, show_provenance=False, max_answer=500):
|
||||
"""Build the full explainability trace for a question."""
|
||||
label_cache = {}
|
||||
|
|
@ -530,21 +729,48 @@ def main():
|
|||
socket = api.socket()
|
||||
|
||||
try:
|
||||
trace = build_trace(
|
||||
# Detect trace type (agent vs graphrag)
|
||||
trace_type = detect_trace_type(
|
||||
socket=socket,
|
||||
flow_id=args.flow_id,
|
||||
user=args.user,
|
||||
collection=args.collection,
|
||||
question_id=args.question_id,
|
||||
api=api,
|
||||
show_provenance=args.show_provenance,
|
||||
max_answer=args.max_answer,
|
||||
entity_id=args.question_id,
|
||||
)
|
||||
|
||||
if args.format == 'json':
|
||||
print_json(trace)
|
||||
if trace_type == "agent":
|
||||
# Build and print agent trace
|
||||
trace = build_agent_trace(
|
||||
socket=socket,
|
||||
flow_id=args.flow_id,
|
||||
user=args.user,
|
||||
collection=args.collection,
|
||||
session_id=args.question_id,
|
||||
api=api,
|
||||
max_answer=args.max_answer,
|
||||
)
|
||||
|
||||
if args.format == 'json':
|
||||
print_agent_json(trace)
|
||||
else:
|
||||
print_agent_text(trace)
|
||||
else:
|
||||
print_text(trace, show_provenance=args.show_provenance)
|
||||
# Build and print GraphRAG trace (existing behavior)
|
||||
trace = build_trace(
|
||||
socket=socket,
|
||||
flow_id=args.flow_id,
|
||||
user=args.user,
|
||||
collection=args.collection,
|
||||
question_id=args.question_id,
|
||||
api=api,
|
||||
show_provenance=args.show_provenance,
|
||||
max_answer=args.max_answer,
|
||||
)
|
||||
|
||||
if args.format == 'json':
|
||||
print_json(trace)
|
||||
else:
|
||||
print_text(trace, show_provenance=args.show_provenance)
|
||||
|
||||
finally:
|
||||
socket.close()
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue