mirror of
https://github.com/trustgraph-ai/trustgraph.git
synced 2026-04-30 19:06:21 +02:00
* Added tech spec
* Add provenance recording to React agent loop
Enables agent sessions to be traced and debugged using the same
explainability infrastructure as GraphRAG. Agent traces record:
- Session start with query and timestamp
- Each iteration's thought, action, arguments, and observation
- Final answer with derivation chain
Changes:
- Add session_id and collection fields to AgentRequest schema
- Add agent predicates (TG_THOUGHT, TG_ACTION, etc.) to namespaces
- Create agent provenance triple generators in provenance/agent.py
- Register explainability producer in agent service
- Emit provenance triples during agent execution
- Update CLI tools to detect and render agent traces alongside GraphRAG
* Updated explainability taxonomy:
GraphRAG: tg:Question → tg:Exploration → tg:Focus → tg:Synthesis
Agent: tg:Question → tg:Analysis(s) → tg:Conclusion
All entities also have their PROV-O type (prov:Activity or prov:Entity).
Updated commit message:
Add provenance recording to React agent loop
Enables agent sessions to be traced and debugged using the same
explainability infrastructure as GraphRAG.
Entity types follow human reasoning patterns:
- tg:Question - the user's query (shared with GraphRAG)
- tg:Analysis - each think/act/observe cycle
- tg:Conclusion - the final answer
Also adds explicit TG types to GraphRAG entities:
- tg:Question, tg:Exploration, tg:Focus, tg:Synthesis
All types retain their PROV-O base types (prov:Activity, prov:Entity).
Changes:
- Add session_id and collection fields to AgentRequest schema
- Add explainability entity types to namespaces.py
- Create agent provenance triple generators
- Register explainability producer in agent service
- Emit provenance triples during agent execution
- Update CLI tools to detect and render both trace types
* Document RAG explainability is now complete. Here's a summary of the
changes made:
Schema Changes:
- trustgraph-base/trustgraph/schema/services/retrieval.py: Added
explain_id and explain_graph fields to DocumentRagResponse
- trustgraph-base/trustgraph/messaging/translators/retrieval.py:
Updated translator to handle explainability fields
Provenance Changes:
- trustgraph-base/trustgraph/provenance/namespaces.py: Added
TG_CHUNK_COUNT and TG_SELECTED_CHUNK predicates
- trustgraph-base/trustgraph/provenance/uris.py: Added
docrag_question_uri, docrag_exploration_uri, docrag_synthesis_uri
generators
- trustgraph-base/trustgraph/provenance/triples.py: Added
docrag_question_triples, docrag_exploration_triples,
docrag_synthesis_triples builders
- trustgraph-base/trustgraph/provenance/__init__.py: Exported all
new Document RAG functions and predicates
Service Changes:
- trustgraph-flow/trustgraph/retrieval/document_rag/document_rag.py:
Added explainability callback support and triple emission at each
phase (Question → Exploration → Synthesis)
- trustgraph-flow/trustgraph/retrieval/document_rag/rag.py:
Registered explainability producer and wired up the callback
Documentation:
- docs/tech-specs/agent-explainability.md: Added Document RAG entity
types and provenance model documentation
Document RAG Provenance Model:
Question (urn:trustgraph:docrag:{uuid})
│
│ tg:query, prov:startedAtTime
│ rdf:type = prov:Activity, tg:Question
│
↓ prov:wasGeneratedBy
│
Exploration (urn:trustgraph:docrag:{uuid}/exploration)
│
│ tg:chunkCount, tg:selectedChunk (multiple)
│ rdf:type = prov:Entity, tg:Exploration
│
↓ prov:wasDerivedFrom
│
Synthesis (urn:trustgraph:docrag:{uuid}/synthesis)
│
│ tg:content = "The answer..."
│ rdf:type = prov:Entity, tg:Synthesis
* Specific subtype that makes the retrieval mechanism immediately
obvious:
System: GraphRAG
TG Types on Question: tg:Question, tg:GraphRagQuestion
URI Pattern: urn:trustgraph:question:{uuid}
────────────────────────────────────────
System: Document RAG
TG Types on Question: tg:Question, tg:DocRagQuestion
URI Pattern: urn:trustgraph:docrag:{uuid}
────────────────────────────────────────
System: Agent
TG Types on Question: tg:Question, tg:AgentQuestion
URI Pattern: urn:trustgraph:agent:{uuid}
Files modified:
- trustgraph-base/trustgraph/provenance/namespaces.py - Added
TG_GRAPH_RAG_QUESTION, TG_DOC_RAG_QUESTION, TG_AGENT_QUESTION
- trustgraph-base/trustgraph/provenance/triples.py - Added subtype to
question_triples and docrag_question_triples
- trustgraph-base/trustgraph/provenance/agent.py - Added subtype to
agent_session_triples
- trustgraph-base/trustgraph/provenance/__init__.py - Exported new types
- docs/tech-specs/agent-explainability.md - Documented the subtypes
This allows:
- Query all questions: ?q rdf:type tg:Question
- Query only GraphRAG: ?q rdf:type tg:GraphRagQuestion
- Query only Document RAG: ?q rdf:type tg:DocRagQuestion
- Query only Agent: ?q rdf:type tg:AgentQuestion
* Fixed tests
784 lines
24 KiB
Python
784 lines
24 KiB
Python
"""
|
|
Show full explainability trace for a GraphRAG or Agent session.
|
|
|
|
Given a question/session URI, displays the complete trace:
|
|
- GraphRAG: Question -> Exploration -> Focus (edge selection) -> Synthesis (answer)
|
|
- Agent: Session -> Iteration(s) (thought/action/observation) -> Final Answer
|
|
|
|
The tool auto-detects the trace type based on rdf:type.
|
|
|
|
Examples:
|
|
tg-show-explain-trace -U trustgraph -C default "urn:trustgraph:question:abc123"
|
|
tg-show-explain-trace -U trustgraph -C default "urn:trustgraph:agent:abc123"
|
|
tg-show-explain-trace --max-answer 1000 "urn:trustgraph:question:abc123"
|
|
tg-show-explain-trace --show-provenance "urn:trustgraph:question:abc123"
|
|
"""
|
|
|
|
import argparse
|
|
import json
|
|
import os
|
|
import sys
|
|
from trustgraph.api import Api
|
|
|
|
default_url = os.getenv("TRUSTGRAPH_URL", 'http://localhost:8088/')
|
|
default_token = os.getenv("TRUSTGRAPH_TOKEN", None)
|
|
default_user = 'trustgraph'
|
|
default_collection = 'default'
|
|
|
|
# Predicates
|
|
TG = "https://trustgraph.ai/ns/"
|
|
TG_QUERY = TG + "query"
|
|
TG_EDGE_COUNT = TG + "edgeCount"
|
|
TG_SELECTED_EDGE = TG + "selectedEdge"
|
|
TG_EDGE = TG + "edge"
|
|
TG_REASONING = TG + "reasoning"
|
|
TG_CONTENT = TG + "content"
|
|
TG_DOCUMENT = TG + "document"
|
|
TG_REIFIES = TG + "reifies"
|
|
# Explainability entity types
|
|
TG_QUESTION = TG + "Question"
|
|
TG_EXPLORATION = TG + "Exploration"
|
|
TG_FOCUS = TG + "Focus"
|
|
TG_SYNTHESIS = TG + "Synthesis"
|
|
TG_ANALYSIS = TG + "Analysis"
|
|
TG_CONCLUSION = TG + "Conclusion"
|
|
|
|
# Agent predicates
|
|
TG_THOUGHT = TG + "thought"
|
|
TG_ACTION = TG + "action"
|
|
TG_ARGUMENTS = TG + "arguments"
|
|
TG_OBSERVATION = TG + "observation"
|
|
TG_ANSWER = TG + "answer"
|
|
PROV = "http://www.w3.org/ns/prov#"
|
|
PROV_STARTED_AT_TIME = PROV + "startedAtTime"
|
|
PROV_WAS_DERIVED_FROM = PROV + "wasDerivedFrom"
|
|
PROV_WAS_GENERATED_BY = PROV + "wasGeneratedBy"
|
|
RDF_TYPE = "http://www.w3.org/1999/02/22-rdf-syntax-ns#type"
|
|
RDFS_LABEL = "http://www.w3.org/2000/01/rdf-schema#label"
|
|
|
|
# Graphs
|
|
RETRIEVAL_GRAPH = "urn:graph:retrieval"
|
|
SOURCE_GRAPH = "urn:graph:source"
|
|
|
|
|
|
def query_triples(socket, flow_id, user, collection, s=None, p=None, o=None, g=None, limit=1000):
|
|
"""Query triples using the socket API."""
|
|
request = {
|
|
"user": user,
|
|
"collection": collection,
|
|
"limit": limit,
|
|
"streaming": False,
|
|
}
|
|
|
|
if s is not None:
|
|
request["s"] = {"t": "i", "i": s}
|
|
if p is not None:
|
|
request["p"] = {"t": "i", "i": p}
|
|
if o is not None:
|
|
if isinstance(o, str):
|
|
if o.startswith("http://") or o.startswith("https://") or o.startswith("urn:"):
|
|
request["o"] = {"t": "i", "i": o}
|
|
else:
|
|
request["o"] = {"t": "l", "v": o}
|
|
elif isinstance(o, dict):
|
|
request["o"] = o
|
|
if g is not None:
|
|
request["g"] = g
|
|
|
|
triples = []
|
|
try:
|
|
for response in socket._send_request_sync("triples", flow_id, request, streaming_raw=True):
|
|
if isinstance(response, dict):
|
|
triple_list = response.get("response", response.get("triples", []))
|
|
else:
|
|
triple_list = response
|
|
|
|
if not isinstance(triple_list, list):
|
|
triple_list = [triple_list] if triple_list else []
|
|
|
|
for t in triple_list:
|
|
s_val = extract_value(t.get("s", {}))
|
|
p_val = extract_value(t.get("p", {}))
|
|
o_val = extract_value(t.get("o", {}))
|
|
triples.append((s_val, p_val, o_val))
|
|
except Exception as e:
|
|
print(f"Error querying triples: {e}", file=sys.stderr)
|
|
|
|
return triples
|
|
|
|
|
|
def extract_value(term):
|
|
"""Extract value from a term dict."""
|
|
if not term:
|
|
return ""
|
|
|
|
t = term.get("t") or term.get("type")
|
|
|
|
if t == "i":
|
|
return term.get("i") or term.get("iri", "")
|
|
elif t == "l":
|
|
return term.get("v") or term.get("value", "")
|
|
elif t == "t":
|
|
# Quoted triple
|
|
tr = term.get("tr") or term.get("triple", {})
|
|
return {
|
|
"s": extract_value(tr.get("s", {})),
|
|
"p": extract_value(tr.get("p", {})),
|
|
"o": extract_value(tr.get("o", {})),
|
|
}
|
|
|
|
# Fallback for raw values
|
|
if "i" in term:
|
|
return term["i"]
|
|
if "v" in term:
|
|
return term["v"]
|
|
|
|
return str(term)
|
|
|
|
|
|
def get_node_properties(socket, flow_id, user, collection, node_uri, graph=RETRIEVAL_GRAPH):
|
|
"""Get all properties of a node as a dict."""
|
|
triples = query_triples(socket, flow_id, user, collection, s=node_uri, g=graph)
|
|
props = {}
|
|
for s, p, o in triples:
|
|
if p not in props:
|
|
props[p] = []
|
|
props[p].append(o)
|
|
return props
|
|
|
|
|
|
def find_by_predicate_object(socket, flow_id, user, collection, predicate, obj, graph=RETRIEVAL_GRAPH):
|
|
"""Find subjects where predicate = obj."""
|
|
triples = query_triples(socket, flow_id, user, collection, p=predicate, o=obj, g=graph)
|
|
return [s for s, p, o in triples]
|
|
|
|
|
|
def get_label(socket, flow_id, user, collection, uri, label_cache):
|
|
"""Get label for a URI, with caching."""
|
|
if not isinstance(uri, str) or not (uri.startswith("http://") or uri.startswith("https://") or uri.startswith("urn:")):
|
|
return uri
|
|
|
|
if uri in label_cache:
|
|
return label_cache[uri]
|
|
|
|
triples = query_triples(socket, flow_id, user, collection, s=uri, p=RDFS_LABEL)
|
|
for s, p, o in triples:
|
|
label_cache[uri] = o
|
|
return o
|
|
|
|
label_cache[uri] = uri
|
|
return uri
|
|
|
|
|
|
def get_document_content(api, user, doc_id, max_content):
|
|
"""Fetch document content from librarian API."""
|
|
try:
|
|
library = api.library()
|
|
content = library.get_document_content(user=user, id=doc_id)
|
|
|
|
# Try to decode as text
|
|
try:
|
|
text = content.decode('utf-8')
|
|
if len(text) > max_content:
|
|
return text[:max_content] + "... [truncated]"
|
|
return text
|
|
except UnicodeDecodeError:
|
|
return f"[Binary: {len(content)} bytes]"
|
|
except Exception as e:
|
|
return f"[Error fetching content: {e}]"
|
|
|
|
|
|
def trace_edge_provenance(socket, flow_id, user, collection, edge_s, edge_p, edge_o, label_cache):
|
|
"""Trace an edge back to its source document via reification."""
|
|
# Build the quoted triple for lookup
|
|
quoted_triple = {
|
|
"t": "t",
|
|
"tr": {
|
|
"s": {"t": "i", "i": edge_s} if isinstance(edge_s, str) and (edge_s.startswith("http") or edge_s.startswith("urn:")) else {"t": "l", "v": edge_s},
|
|
"p": {"t": "i", "i": edge_p},
|
|
"o": {"t": "i", "i": edge_o} if isinstance(edge_o, str) and (edge_o.startswith("http") or edge_o.startswith("urn:")) else {"t": "l", "v": edge_o},
|
|
}
|
|
}
|
|
|
|
# Query: ?stmt tg:reifies <<edge>>
|
|
request = {
|
|
"user": user,
|
|
"collection": collection,
|
|
"limit": 10,
|
|
"streaming": False,
|
|
"p": {"t": "i", "i": TG_REIFIES},
|
|
"o": quoted_triple,
|
|
"g": SOURCE_GRAPH,
|
|
}
|
|
|
|
stmt_uris = []
|
|
try:
|
|
for response in socket._send_request_sync("triples", flow_id, request, streaming_raw=True):
|
|
if isinstance(response, dict):
|
|
triple_list = response.get("response", response.get("triples", []))
|
|
else:
|
|
triple_list = response
|
|
|
|
if not isinstance(triple_list, list):
|
|
triple_list = [triple_list] if triple_list else []
|
|
|
|
for t in triple_list:
|
|
s_val = extract_value(t.get("s", {}))
|
|
if s_val:
|
|
stmt_uris.append(s_val)
|
|
except Exception:
|
|
pass
|
|
|
|
# For each statement, find wasDerivedFrom chain
|
|
provenance_chains = []
|
|
for stmt_uri in stmt_uris:
|
|
chain = trace_provenance_chain(socket, flow_id, user, collection, stmt_uri, label_cache)
|
|
if chain:
|
|
provenance_chains.append(chain)
|
|
|
|
return provenance_chains
|
|
|
|
|
|
def trace_provenance_chain(socket, flow_id, user, collection, start_uri, label_cache, max_depth=10):
|
|
"""Trace prov:wasDerivedFrom chain from start_uri to root."""
|
|
chain = []
|
|
current = start_uri
|
|
|
|
for _ in range(max_depth):
|
|
if not current:
|
|
break
|
|
|
|
label = get_label(socket, flow_id, user, collection, current, label_cache)
|
|
chain.append({"uri": current, "label": label})
|
|
|
|
# Get parent
|
|
triples = query_triples(
|
|
socket, flow_id, user, collection,
|
|
s=current, p=PROV_WAS_DERIVED_FROM, g=SOURCE_GRAPH
|
|
)
|
|
parent = None
|
|
for s, p, o in triples:
|
|
parent = o
|
|
break
|
|
|
|
if not parent or parent == current:
|
|
break
|
|
current = parent
|
|
|
|
return chain
|
|
|
|
|
|
def format_provenance_chain(chain):
|
|
"""Format a provenance chain for display."""
|
|
if not chain:
|
|
return ""
|
|
labels = [item.get("label", item.get("uri", "?")) for item in chain]
|
|
return " -> ".join(labels)
|
|
|
|
|
|
def format_edge(edge, label_cache=None, socket=None, flow_id=None, user=None, collection=None):
|
|
"""Format a quoted triple edge for display."""
|
|
if not isinstance(edge, dict):
|
|
return str(edge)
|
|
|
|
s = edge.get("s", "?")
|
|
p = edge.get("p", "?")
|
|
o = edge.get("o", "?")
|
|
|
|
# Get labels if available
|
|
if label_cache and socket:
|
|
s_label = get_label(socket, flow_id, user, collection, s, label_cache)
|
|
p_label = get_label(socket, flow_id, user, collection, p, label_cache)
|
|
o_label = get_label(socket, flow_id, user, collection, o, label_cache)
|
|
else:
|
|
# Shorten URIs for display
|
|
s_label = s.split("/")[-1] if "/" in str(s) else s
|
|
p_label = p.split("/")[-1] if "/" in str(p) else p
|
|
o_label = o.split("/")[-1] if "/" in str(o) else o
|
|
|
|
return f"({s_label}, {p_label}, {o_label})"
|
|
|
|
|
|
def detect_trace_type(socket, flow_id, user, collection, entity_id):
|
|
"""
|
|
Detect whether an entity is an agent Question or GraphRAG Question.
|
|
|
|
Both have rdf:type = tg:Question, so we distinguish by checking
|
|
what's derived from it:
|
|
- Agent: has tg:Analysis or tg:Conclusion derived
|
|
- GraphRAG: has tg:Exploration derived
|
|
|
|
Also checks URI pattern as fallback:
|
|
- urn:trustgraph:agent: -> agent
|
|
- urn:trustgraph:question: -> graphrag
|
|
|
|
Returns:
|
|
"agent" or "graphrag"
|
|
"""
|
|
# Check URI pattern first (fast path)
|
|
if entity_id.startswith("urn:trustgraph:agent:"):
|
|
return "agent"
|
|
if entity_id.startswith("urn:trustgraph:question:"):
|
|
return "graphrag"
|
|
|
|
# Check what's derived from this entity
|
|
derived = find_by_predicate_object(
|
|
socket, flow_id, user, collection,
|
|
PROV_WAS_DERIVED_FROM, entity_id
|
|
)
|
|
|
|
# Also check wasGeneratedBy (GraphRAG exploration uses this)
|
|
generated = find_by_predicate_object(
|
|
socket, flow_id, user, collection,
|
|
PROV_WAS_GENERATED_BY, entity_id
|
|
)
|
|
|
|
all_children = derived + generated
|
|
|
|
for child_id in all_children:
|
|
child_types = query_triples(
|
|
socket, flow_id, user, collection,
|
|
s=child_id, p=RDF_TYPE, g=RETRIEVAL_GRAPH
|
|
)
|
|
for s, p, o in child_types:
|
|
if o == TG_ANALYSIS or o == TG_CONCLUSION:
|
|
return "agent"
|
|
if o == TG_EXPLORATION:
|
|
return "graphrag"
|
|
|
|
# Default to graphrag
|
|
return "graphrag"
|
|
|
|
|
|
def build_agent_trace(socket, flow_id, user, collection, session_id, api=None, max_answer=500):
|
|
"""Build the full explainability trace for an agent session."""
|
|
trace = {
|
|
"session_id": session_id,
|
|
"type": "agent",
|
|
"question": None,
|
|
"time": None,
|
|
"iterations": [],
|
|
"final_answer": None,
|
|
}
|
|
|
|
# Get session metadata
|
|
props = get_node_properties(socket, flow_id, user, collection, session_id)
|
|
trace["question"] = props.get(TG_QUERY, [None])[0]
|
|
trace["time"] = props.get(PROV_STARTED_AT_TIME, [None])[0]
|
|
|
|
# Find all entities derived from this session (iterations and final)
|
|
# Start by looking for entities where prov:wasDerivedFrom = session_id
|
|
current_uri = session_id
|
|
iteration_num = 1
|
|
|
|
while True:
|
|
# Find entities derived from current
|
|
derived_ids = find_by_predicate_object(
|
|
socket, flow_id, user, collection,
|
|
PROV_WAS_DERIVED_FROM, current_uri
|
|
)
|
|
|
|
if not derived_ids:
|
|
break
|
|
|
|
derived_id = derived_ids[0]
|
|
derived_props = get_node_properties(socket, flow_id, user, collection, derived_id)
|
|
|
|
# Check type
|
|
types = derived_props.get(RDF_TYPE, [])
|
|
|
|
if TG_ANALYSIS in types:
|
|
iteration = {
|
|
"id": derived_id,
|
|
"iteration_num": iteration_num,
|
|
"thought": derived_props.get(TG_THOUGHT, [None])[0],
|
|
"action": derived_props.get(TG_ACTION, [None])[0],
|
|
"arguments": derived_props.get(TG_ARGUMENTS, [None])[0],
|
|
"observation": derived_props.get(TG_OBSERVATION, [None])[0],
|
|
}
|
|
trace["iterations"].append(iteration)
|
|
current_uri = derived_id
|
|
iteration_num += 1
|
|
|
|
elif TG_CONCLUSION in types:
|
|
answer = derived_props.get(TG_ANSWER, [None])[0]
|
|
if answer and len(answer) > max_answer:
|
|
answer = answer[:max_answer] + "... [truncated]"
|
|
trace["final_answer"] = {
|
|
"id": derived_id,
|
|
"answer": answer,
|
|
}
|
|
break
|
|
|
|
else:
|
|
# Unknown type, stop traversal
|
|
break
|
|
|
|
return trace
|
|
|
|
|
|
def print_agent_text(trace):
|
|
"""Print agent trace in text format."""
|
|
print(f"=== Agent Session: {trace['session_id']} ===")
|
|
print()
|
|
|
|
if trace["question"]:
|
|
print(f"Question: {trace['question']}")
|
|
if trace["time"]:
|
|
print(f"Time: {trace['time']}")
|
|
print()
|
|
|
|
# Analysis steps
|
|
print("--- Analysis ---")
|
|
iterations = trace.get("iterations", [])
|
|
if iterations:
|
|
for iteration in iterations:
|
|
print(f"Analysis {iteration['iteration_num']}:")
|
|
print(f" Thought: {iteration.get('thought', 'N/A')}")
|
|
print(f" Action: {iteration.get('action', 'N/A')}")
|
|
|
|
args = iteration.get('arguments')
|
|
if args:
|
|
# Try to pretty-print JSON arguments
|
|
try:
|
|
import json
|
|
args_obj = json.loads(args)
|
|
args_str = json.dumps(args_obj, indent=4)
|
|
# Indent each line
|
|
args_lines = args_str.split('\n')
|
|
print(f" Arguments:")
|
|
for line in args_lines:
|
|
print(f" {line}")
|
|
except:
|
|
print(f" Arguments: {args}")
|
|
else:
|
|
print(f" Arguments: N/A")
|
|
|
|
obs = iteration.get('observation', 'N/A')
|
|
if obs and len(obs) > 200:
|
|
obs = obs[:200] + "... [truncated]"
|
|
print(f" Observation: {obs}")
|
|
print()
|
|
else:
|
|
print("No analysis steps recorded")
|
|
print()
|
|
|
|
# Conclusion
|
|
print("--- Conclusion ---")
|
|
final = trace.get("final_answer")
|
|
if final and final.get("answer"):
|
|
print("Answer:")
|
|
for line in final["answer"].split("\n"):
|
|
print(f" {line}")
|
|
else:
|
|
print("No conclusion recorded")
|
|
|
|
|
|
def print_agent_json(trace):
|
|
"""Print agent trace as JSON."""
|
|
print(json.dumps(trace, indent=2))
|
|
|
|
|
|
def build_trace(socket, flow_id, user, collection, question_id, api=None, show_provenance=False, max_answer=500):
|
|
"""Build the full explainability trace for a question."""
|
|
label_cache = {}
|
|
|
|
trace = {
|
|
"question_id": question_id,
|
|
"question": None,
|
|
"time": None,
|
|
"exploration": None,
|
|
"focus": None,
|
|
"synthesis": None,
|
|
}
|
|
|
|
# Get question metadata
|
|
props = get_node_properties(socket, flow_id, user, collection, question_id)
|
|
trace["question"] = props.get(TG_QUERY, [None])[0]
|
|
trace["time"] = props.get(PROV_STARTED_AT_TIME, [None])[0]
|
|
|
|
# Find exploration: ?exploration prov:wasGeneratedBy question_id
|
|
exploration_ids = find_by_predicate_object(
|
|
socket, flow_id, user, collection,
|
|
PROV_WAS_GENERATED_BY, question_id
|
|
)
|
|
|
|
if exploration_ids:
|
|
exploration_id = exploration_ids[0]
|
|
exploration_props = get_node_properties(socket, flow_id, user, collection, exploration_id)
|
|
trace["exploration"] = {
|
|
"id": exploration_id,
|
|
"edge_count": exploration_props.get(TG_EDGE_COUNT, [None])[0],
|
|
}
|
|
|
|
# Find focus: ?focus prov:wasDerivedFrom exploration_id
|
|
focus_ids = find_by_predicate_object(
|
|
socket, flow_id, user, collection,
|
|
PROV_WAS_DERIVED_FROM, exploration_id
|
|
)
|
|
|
|
if focus_ids:
|
|
focus_id = focus_ids[0]
|
|
focus_props = get_node_properties(socket, flow_id, user, collection, focus_id)
|
|
|
|
# Get selected edges
|
|
edge_selection_uris = focus_props.get(TG_SELECTED_EDGE, [])
|
|
selected_edges = []
|
|
|
|
for edge_sel_uri in edge_selection_uris:
|
|
edge_sel_props = get_node_properties(socket, flow_id, user, collection, edge_sel_uri)
|
|
edge = edge_sel_props.get(TG_EDGE, [None])[0]
|
|
reasoning = edge_sel_props.get(TG_REASONING, [None])[0]
|
|
|
|
edge_info = {
|
|
"edge": edge,
|
|
"reasoning": reasoning,
|
|
}
|
|
|
|
# Trace provenance if requested
|
|
if show_provenance and isinstance(edge, dict):
|
|
provenance = trace_edge_provenance(
|
|
socket, flow_id, user, collection,
|
|
edge.get("s", ""), edge.get("p", ""), edge.get("o", ""),
|
|
label_cache
|
|
)
|
|
edge_info["provenance"] = provenance
|
|
|
|
selected_edges.append(edge_info)
|
|
|
|
trace["focus"] = {
|
|
"id": focus_id,
|
|
"selected_edges": selected_edges,
|
|
}
|
|
|
|
# Find synthesis: ?synthesis prov:wasDerivedFrom focus_id
|
|
synthesis_ids = find_by_predicate_object(
|
|
socket, flow_id, user, collection,
|
|
PROV_WAS_DERIVED_FROM, focus_id
|
|
)
|
|
|
|
if synthesis_ids:
|
|
synthesis_id = synthesis_ids[0]
|
|
synthesis_props = get_node_properties(socket, flow_id, user, collection, synthesis_id)
|
|
|
|
# Get content directly or via document reference
|
|
content = synthesis_props.get(TG_CONTENT, [None])[0]
|
|
doc_id = synthesis_props.get(TG_DOCUMENT, [None])[0]
|
|
|
|
if not content and doc_id and api:
|
|
content = get_document_content(api, user, doc_id, max_answer)
|
|
elif content and len(content) > max_answer:
|
|
content = content[:max_answer] + "... [truncated]"
|
|
|
|
trace["synthesis"] = {
|
|
"id": synthesis_id,
|
|
"document_id": doc_id,
|
|
"answer": content,
|
|
}
|
|
|
|
# Store label cache for formatting
|
|
trace["_label_cache"] = label_cache
|
|
|
|
return trace
|
|
|
|
|
|
def print_text(trace, show_provenance=False):
|
|
"""Print trace in text format."""
|
|
label_cache = trace.get("_label_cache", {})
|
|
|
|
print(f"=== GraphRAG Session: {trace['question_id']} ===")
|
|
print()
|
|
|
|
if trace["question"]:
|
|
print(f"Question: {trace['question']}")
|
|
if trace["time"]:
|
|
print(f"Time: {trace['time']}")
|
|
print()
|
|
|
|
# Exploration
|
|
print("--- Exploration ---")
|
|
exploration = trace.get("exploration")
|
|
if exploration:
|
|
edge_count = exploration.get("edge_count", "?")
|
|
print(f"Retrieved {edge_count} edges from knowledge graph")
|
|
else:
|
|
print("No exploration data found")
|
|
print()
|
|
|
|
# Focus
|
|
print("--- Focus (Edge Selection) ---")
|
|
focus = trace.get("focus")
|
|
if focus:
|
|
edges = focus.get("selected_edges", [])
|
|
print(f"Selected {len(edges)} edges:")
|
|
print()
|
|
|
|
for i, edge_info in enumerate(edges, 1):
|
|
edge = edge_info.get("edge")
|
|
reasoning = edge_info.get("reasoning")
|
|
|
|
if edge:
|
|
edge_str = format_edge(edge)
|
|
print(f" {i}. {edge_str}")
|
|
|
|
if reasoning:
|
|
r_short = reasoning[:100] + "..." if len(reasoning) > 100 else reasoning
|
|
print(f" Reasoning: {r_short}")
|
|
|
|
if show_provenance:
|
|
provenance = edge_info.get("provenance", [])
|
|
for chain in provenance:
|
|
chain_str = format_provenance_chain(chain)
|
|
if chain_str:
|
|
print(f" Source: {chain_str}")
|
|
|
|
print()
|
|
else:
|
|
print("No focus data found")
|
|
print()
|
|
|
|
# Synthesis
|
|
print("--- Synthesis ---")
|
|
synthesis = trace.get("synthesis")
|
|
if synthesis:
|
|
answer = synthesis.get("answer")
|
|
if answer:
|
|
print("Answer:")
|
|
# Indent the answer
|
|
for line in answer.split("\n"):
|
|
print(f" {line}")
|
|
else:
|
|
print("No answer content found")
|
|
else:
|
|
print("No synthesis data found")
|
|
|
|
|
|
def print_json(trace):
|
|
"""Print trace as JSON."""
|
|
# Remove internal cache before printing
|
|
output = {k: v for k, v in trace.items() if not k.startswith("_")}
|
|
print(json.dumps(output, indent=2))
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(
|
|
prog='tg-show-explain-trace',
|
|
description=__doc__,
|
|
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
)
|
|
|
|
parser.add_argument(
|
|
'question_id',
|
|
help='Question/session URI to show trace for',
|
|
)
|
|
|
|
parser.add_argument(
|
|
'-u', '--api-url',
|
|
default=default_url,
|
|
help=f'API URL (default: {default_url})',
|
|
)
|
|
|
|
parser.add_argument(
|
|
'-t', '--token',
|
|
default=default_token,
|
|
help='Auth token (default: $TRUSTGRAPH_TOKEN)',
|
|
)
|
|
|
|
parser.add_argument(
|
|
'-U', '--user',
|
|
default=default_user,
|
|
help=f'User ID (default: {default_user})',
|
|
)
|
|
|
|
parser.add_argument(
|
|
'-C', '--collection',
|
|
default=default_collection,
|
|
help=f'Collection (default: {default_collection})',
|
|
)
|
|
|
|
parser.add_argument(
|
|
'-f', '--flow-id',
|
|
default='default',
|
|
help='Flow ID (default: default)',
|
|
)
|
|
|
|
parser.add_argument(
|
|
'--max-answer',
|
|
type=int,
|
|
default=500,
|
|
help='Max chars for answer display (default: 500)',
|
|
)
|
|
|
|
parser.add_argument(
|
|
'--show-provenance',
|
|
action='store_true',
|
|
help='Also trace edges back to source documents',
|
|
)
|
|
|
|
parser.add_argument(
|
|
'--format',
|
|
choices=['text', 'json'],
|
|
default='text',
|
|
help='Output format: text (default), json',
|
|
)
|
|
|
|
args = parser.parse_args()
|
|
|
|
try:
|
|
api = Api(args.api_url, token=args.token)
|
|
socket = api.socket()
|
|
|
|
try:
|
|
# Detect trace type (agent vs graphrag)
|
|
trace_type = detect_trace_type(
|
|
socket=socket,
|
|
flow_id=args.flow_id,
|
|
user=args.user,
|
|
collection=args.collection,
|
|
entity_id=args.question_id,
|
|
)
|
|
|
|
if trace_type == "agent":
|
|
# Build and print agent trace
|
|
trace = build_agent_trace(
|
|
socket=socket,
|
|
flow_id=args.flow_id,
|
|
user=args.user,
|
|
collection=args.collection,
|
|
session_id=args.question_id,
|
|
api=api,
|
|
max_answer=args.max_answer,
|
|
)
|
|
|
|
if args.format == 'json':
|
|
print_agent_json(trace)
|
|
else:
|
|
print_agent_text(trace)
|
|
else:
|
|
# Build and print GraphRAG trace (existing behavior)
|
|
trace = build_trace(
|
|
socket=socket,
|
|
flow_id=args.flow_id,
|
|
user=args.user,
|
|
collection=args.collection,
|
|
question_id=args.question_id,
|
|
api=api,
|
|
show_provenance=args.show_provenance,
|
|
max_answer=args.max_answer,
|
|
)
|
|
|
|
if args.format == 'json':
|
|
print_json(trace)
|
|
else:
|
|
print_text(trace, show_provenance=args.show_provenance)
|
|
|
|
finally:
|
|
socket.close()
|
|
|
|
except Exception as e:
|
|
print(f"Error: {e}", file=sys.stderr)
|
|
sys.exit(1)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|