Deliver explainability triples inline in retrieval response stream (#763)

Provenance triples are now included directly in explain messages from
GraphRAG, DocumentRAG, and Agent services, eliminating the need for
follow-up knowledge graph queries to retrieve explainability details.

Each explain message in the response stream now carries:
- explain_id: root URI for this provenance step (unchanged)
- explain_graph: named graph where triples are stored (unchanged)
- explain_triples: the actual provenance triples for this step (new)

Changes across the stack:
- Schema: added explain_triples field to GraphRagResponse,
  DocumentRagResponse, and AgentResponse
- Services: all explain message call sites pass triples through
  (graph_rag, document_rag, agent react, agent orchestrator)
- Translators: encode explain_triples via TripleTranslator for
  gateway wire format
- Python SDK: ProvenanceEvent now includes parsed ExplainEntity
  and raw triples; expanded event_type detection
- CLI: invoke_graph_rag, invoke_agent, invoke_document_rag use
  inline entity when available, fall back to graph query
- Tech specs updated

Additional explainability test
This commit is contained in:
cybermaggedon 2026-04-07 12:19:05 +01:00 committed by GitHub
parent 2f8d6a3ffb
commit ddd4bd7790
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
16 changed files with 521 additions and 49 deletions

View file

@ -366,19 +366,13 @@ class SocketClient:
# Handle GraphRAG/DocRAG message format with message_type
if message_type == "explain":
if include_provenance:
return ProvenanceEvent(
explain_id=resp.get("explain_id", ""),
explain_graph=resp.get("explain_graph", "")
)
return self._build_provenance_event(resp)
return None
# Handle Agent message format with chunk_type="explain"
if chunk_type == "explain":
if include_provenance:
return ProvenanceEvent(
explain_id=resp.get("explain_id", ""),
explain_graph=resp.get("explain_graph", "")
)
return self._build_provenance_event(resp)
return None
if chunk_type == "thought":
@ -413,6 +407,42 @@ class SocketClient:
error=None
)
def _build_provenance_event(self, resp: Dict[str, Any]) -> ProvenanceEvent:
"""Build a ProvenanceEvent from a response dict, parsing inline triples
into an ExplainEntity if available."""
explain_id = resp.get("explain_id", "")
explain_graph = resp.get("explain_graph", "")
raw_triples = resp.get("explain_triples", [])
entity = None
if raw_triples:
try:
from .explainability import ExplainEntity
# Convert wire-format triple dicts to (s, p, o) tuples
parsed = []
for t in raw_triples:
s = t.get("s", {}).get("i", "") if t.get("s") else ""
p = t.get("p", {}).get("i", "") if t.get("p") else ""
o_term = t.get("o", {})
if o_term:
if o_term.get("t") == "i":
o = o_term.get("i", "")
else:
o = o_term.get("v", "")
else:
o = ""
parsed.append((s, p, o))
entity = ExplainEntity.from_triples(explain_id, parsed)
except Exception:
pass
return ProvenanceEvent(
explain_id=explain_id,
explain_graph=explain_graph,
entity=entity,
triples=raw_triples,
)
def close(self) -> None:
"""Close the persistent WebSocket connection."""
if self._loop and not self._loop.is_closed():

View file

@ -213,25 +213,47 @@ class ProvenanceEvent:
"""
Provenance event for explainability.
Emitted during GraphRAG queries when explainable mode is enabled.
Emitted during retrieval queries when explainable mode is enabled.
Each event represents a provenance node created during query processing.
Attributes:
explain_id: URI of the provenance node (e.g., urn:trustgraph:question:abc123)
explain_graph: Named graph where provenance triples are stored (e.g., urn:graph:retrieval)
event_type: Type of provenance event (question, exploration, focus, synthesis)
event_type: Type of provenance event (question, exploration, focus, synthesis, etc.)
entity: Parsed ExplainEntity from inline triples (if available)
triples: Raw triples from the response (wire format dicts)
"""
explain_id: str
explain_graph: str = ""
event_type: str = "" # Derived from explain_id
entity: object = None # ExplainEntity (parsed from triples)
triples: list = dataclasses.field(default_factory=list) # Raw wire-format triple dicts
def __post_init__(self):
# Extract event type from explain_id
if "question" in self.explain_id:
self.event_type = "question"
elif "grounding" in self.explain_id:
self.event_type = "grounding"
elif "exploration" in self.explain_id:
self.event_type = "exploration"
elif "focus" in self.explain_id:
self.event_type = "focus"
elif "synthesis" in self.explain_id:
self.event_type = "synthesis"
elif "iteration" in self.explain_id:
self.event_type = "iteration"
elif "observation" in self.explain_id:
self.event_type = "observation"
elif "conclusion" in self.explain_id:
self.event_type = "conclusion"
elif "decomposition" in self.explain_id:
self.event_type = "decomposition"
elif "finding" in self.explain_id:
self.event_type = "finding"
elif "plan" in self.explain_id:
self.event_type = "plan"
elif "step-result" in self.explain_id:
self.event_type = "step-result"
elif "session" in self.explain_id:
self.event_type = "session"