GraphRAG Query-Time Explainability (#677)

Implements full explainability pipeline for GraphRAG queries, enabling traceability from answers back to source documents. Renamed throughout for clarity: - provenance_callback → explain_callback - provenance_id → explain_id - provenance_collection → explain_collection - message_type "provenance" → "explain" - Queue name "provenance" → "explainability" GraphRAG queries now emit explainability events as they execute: 1. Session - query text and timestamp 2. Retrieval - edges retrieved from subgraph 3. Selection - selected edges with LLM reasoning (JSONL with id + reasoning) 4. Answer - reference to synthesized response Events stream via explain_callback during query(), enabling real-time UX. - Answers stored in librarian service (not inline in graph - too large) - Document ID as URN: urn:trustgraph:answer:{session_id} - Graph stores tg:document reference (IRI) to librarian document - Added librarian producer/consumer to graph-rag service - get_labelgraph() now returns (labeled_edges, uri_map) - uri_map maps edge_id(label_s, label_p, label_o) → (uri_s, uri_p, uri_o) - Explainability data stores original URIs, not labels - Enables tracing edges back to reifying statements via tg:reifies - Added serialize_triple() to query service (matches storage format) - get_term_value() now handles TRIPLE type terms - Enables querying by quoted triple in object position: ?stmt tg:reifies <<s p o>> - Displays real-time explainability events during query - Resolves rdfs:label for edge components (s, p, o) - Traces source chain via prov:wasDerivedFrom to root document - Output: "Source: Chunk 1 → Page 2 → Document Title" - Label caching to avoid repeated queries GraphRagResponse: - explain_id: str | None - explain_collection: str | None - message_type: str ("chunk" or "explain") - end_of_session: bool trustgraph-base/trustgraph/provenance/: - namespaces.py - Added TG_DOCUMENT predicate - triples.py - answer_triples() supports document_id reference - uris.py - Added edge_selection_uri() trustgraph-base/trustgraph/schema/services/retrieval.py: - GraphRagResponse with explain_id, explain_collection, end_of_session trustgraph-flow/trustgraph/retrieval/graph_rag/: - graph_rag.py - URI preservation, streaming answer accumulation - rag.py - Librarian integration, real-time explain emission trustgraph-flow/trustgraph/query/triples/cassandra/service.py: - Quoted triple serialization for query matching trustgraph-cli/trustgraph/cli/invoke_graph_rag.py: - Full explainability display with label resolution and source tracing
2026-06-16 02:23:39 +02:00 · 2026-03-10 10:00:01 +00:00 · 2026-03-10 10:00:01 +00:00 · 7a6197d8c3
commit 7a6197d8c3
parent d2d71f859d
24 changed files with 2001 additions and 323 deletions
--- a/trustgraph-cli/trustgraph/cli/invoke_graph_rag.py
+++ b/trustgraph-cli/trustgraph/cli/invoke_graph_rag.py
@ -3,7 +3,11 @@ Uses the GraphRAG service to answer a question
 """

 import argparse
+import json
 import os
+import sys
+import websockets
+import asyncio
 from trustgraph.api import Api

 default_url = os.getenv("TRUSTGRAPH_URL", 'http://localhost:8088/')
@ -15,11 +19,609 @@ default_triple_limit = 30
 default_max_subgraph_size = 150
 default_max_path_length = 2

+# Provenance predicates
+TG = "https://trustgraph.ai/ns/"
+TG_QUERY = TG + "query"
+TG_EDGE_COUNT = TG + "edgeCount"
+TG_SELECTED_EDGE = TG + "selectedEdge"
+TG_EDGE = TG + "edge"
+TG_REASONING = TG + "reasoning"
+TG_CONTENT = TG + "content"
+TG_REIFIES = TG + "reifies"
+PROV = "http://www.w3.org/ns/prov#"
+PROV_STARTED_AT_TIME = PROV + "startedAtTime"
+PROV_WAS_DERIVED_FROM = PROV + "wasDerivedFrom"
+RDFS_LABEL = "http://www.w3.org/2000/01/rdf-schema#label"
+
+
+def _get_event_type(prov_id):
+    """Extract event type from provenance_id"""
+    if "session" in prov_id:
+        return "session"
+    elif "retrieval" in prov_id:
+        return "retrieval"
+    elif "selection" in prov_id:
+        return "selection"
+    elif "answer" in prov_id:
+        return "answer"
+    return "provenance"
+
+
+def _format_provenance_details(event_type, triples):
+    """Format provenance details based on event type and triples"""
+    lines = []
+
+    if event_type == "session":
+        # Show query and timestamp
+        for s, p, o in triples:
+            if p == TG_QUERY:
+                lines.append(f"    Query: {o}")
+            elif p == PROV_STARTED_AT_TIME:
+                lines.append(f"    Time: {o}")
+
+    elif event_type == "retrieval":
+        # Show edge count
+        for s, p, o in triples:
+            if p == TG_EDGE_COUNT:
+                lines.append(f"    Edges retrieved: {o}")
+
+    elif event_type == "selection":
+        # For selection, just count edge selection URIs
+        # The actual edge details are fetched separately via edge_selections parameter
+        edge_sel_uris = []
+        for s, p, o in triples:
+            if p == TG_SELECTED_EDGE:
+                edge_sel_uris.append(o)
+        if edge_sel_uris:
+            lines.append(f"    Selected {len(edge_sel_uris)} edge(s)")
+
+    elif event_type == "answer":
+        # Show content length (not full content - it's already streamed)
+        for s, p, o in triples:
+            if p == TG_CONTENT:
+                lines.append(f"    Answer length: {len(o)} chars")
+
+    return lines
+
+
+async def _query_triples_once(ws_url, flow_id, prov_id, user, collection, debug=False):
+    """Query triples for a provenance node (single attempt)"""
+    request = {
+        "id": "triples-request",
+        "service": "triples",
+        "flow": flow_id,
+        "request": {
+            "s": {"t": "i", "i": prov_id},
+            "user": user,
+            "collection": collection,
+            "limit": 100
+        }
+    }
+
+    if debug:
+        print(f"    [debug] querying triples for s={prov_id}", file=sys.stderr)
+
+    triples = []
+    try:
+        async with websockets.connect(ws_url, ping_interval=20, ping_timeout=30) as websocket:
+            await websocket.send(json.dumps(request))
+
+            async for raw_message in websocket:
+                response = json.loads(raw_message)
+
+                if debug:
+                    print(f"    [debug] response: {json.dumps(response)[:200]}", file=sys.stderr)
+
+                if response.get("id") != "triples-request":
+                    continue
+
+                if "error" in response:
+                    if debug:
+                        print(f"    [debug] error: {response['error']}", file=sys.stderr)
+                    break
+
+                if "response" in response:
+                    resp = response["response"]
+                    # Handle triples response
+                    # Response format: {"response": [triples...]}
+                    # Each triple uses compact keys: "i" for iri, "v" for value, "t" for type
+                    triple_list = resp.get("response", [])
+                    for t in triple_list:
+                        s = t.get("s", {}).get("i", t.get("s", {}).get("v", ""))
+                        p = t.get("p", {}).get("i", t.get("p", {}).get("v", ""))
+                        # Handle quoted triples (type "t") and regular values
+                        o_term = t.get("o", {})
+                        if o_term.get("t") == "t":
+                            # Quoted triple - extract s, p, o from nested structure
+                            tr = o_term.get("tr", {})
+                            o = {
+                                "s": tr.get("s", {}).get("i", ""),
+                                "p": tr.get("p", {}).get("i", ""),
+                                "o": tr.get("o", {}).get("i", tr.get("o", {}).get("v", "")),
+                            }
+                        else:
+                            o = o_term.get("i", o_term.get("v", ""))
+                        triples.append((s, p, o))
+
+                    if resp.get("complete") or response.get("complete"):
+                        break
+    except Exception as e:
+        if debug:
+            print(f"    [debug] exception: {e}", file=sys.stderr)
+
+    if debug:
+        print(f"    [debug] got {len(triples)} triples", file=sys.stderr)
+
+    return triples
+
+
+async def _query_triples(ws_url, flow_id, prov_id, user, collection, max_retries=5, retry_delay=0.2, debug=False):
+    """Query triples for a provenance node with retries for race condition"""
+    for attempt in range(max_retries):
+        triples = await _query_triples_once(ws_url, flow_id, prov_id, user, collection, debug)
+        if triples:
+            return triples
+        # Wait before retry if empty (triples may not be stored yet)
+        if attempt < max_retries - 1:
+            if debug:
+                print(f"    [debug] retry {attempt + 1}/{max_retries}...", file=sys.stderr)
+            await asyncio.sleep(retry_delay)
+    return []
+
+
+async def _query_edge_provenance(ws_url, flow_id, edge_s, edge_p, edge_o, user, collection, debug=False):
+    """
+    Query for provenance of an edge (s, p, o) in the knowledge graph.
+
+    Finds statements that reify the edge via tg:reifies, then follows
+    prov:wasDerivedFrom to find source documents.
+
+    Returns list of source URIs (chunks, pages, documents).
+    """
+    # Query for statements that reify this edge: ?stmt tg:reifies <<s p o>>
+    request = {
+        "id": "edge-prov-request",
+        "service": "triples",
+        "flow": flow_id,
+        "request": {
+            "p": {"t": "i", "i": TG_REIFIES},
+            "o": {
+                "t": "t",  # Quoted triple type
+                "tr": {
+                    "s": {"t": "i", "i": edge_s},
+                    "p": {"t": "i", "i": edge_p},
+                    "o": {"t": "i", "i": edge_o} if edge_o.startswith("http") or edge_o.startswith("urn:") else {"t": "l", "v": edge_o},
+                }
+            },
+            "user": user,
+            "collection": collection,
+            "limit": 10
+        }
+    }
+
+    if debug:
+        print(f"    [debug] querying edge provenance for ({edge_s}, {edge_p}, {edge_o})", file=sys.stderr)
+
+    stmt_uris = []
+    try:
+        async with websockets.connect(ws_url, ping_interval=20, ping_timeout=30) as websocket:
+            await websocket.send(json.dumps(request))
+
+            async for raw_message in websocket:
+                response = json.loads(raw_message)
+
+                if response.get("id") != "edge-prov-request":
+                    continue
+
+                if "error" in response:
+                    if debug:
+                        print(f"    [debug] error: {response['error']}", file=sys.stderr)
+                    break
+
+                if "response" in response:
+                    resp = response["response"]
+                    triple_list = resp.get("response", [])
+                    for t in triple_list:
+                        s = t.get("s", {}).get("i", "")
+                        if s:
+                            stmt_uris.append(s)
+
+                    if resp.get("complete") or response.get("complete"):
+                        break
+    except Exception as e:
+        if debug:
+            print(f"    [debug] exception querying edge provenance: {e}", file=sys.stderr)
+
+    if debug:
+        print(f"    [debug] found {len(stmt_uris)} reifying statements", file=sys.stderr)
+
+    # For each statement, query wasDerivedFrom to find sources
+    sources = []
+    for stmt_uri in stmt_uris:
+        # Query: stmt_uri prov:wasDerivedFrom ?source
+        request = {
+            "id": "derived-from-request",
+            "service": "triples",
+            "flow": flow_id,
+            "request": {
+                "s": {"t": "i", "i": stmt_uri},
+                "p": {"t": "i", "i": PROV_WAS_DERIVED_FROM},
+                "user": user,
+                "collection": collection,
+                "limit": 10
+            }
+        }
+
+        try:
+            async with websockets.connect(ws_url, ping_interval=20, ping_timeout=30) as websocket:
+                await websocket.send(json.dumps(request))
+
+                async for raw_message in websocket:
+                    response = json.loads(raw_message)
+
+                    if response.get("id") != "derived-from-request":
+                        continue
+
+                    if "error" in response:
+                        break
+
+                    if "response" in response:
+                        resp = response["response"]
+                        triple_list = resp.get("response", [])
+                        for t in triple_list:
+                            o = t.get("o", {}).get("i", "")
+                            if o:
+                                sources.append(o)
+
+                        if resp.get("complete") or response.get("complete"):
+                            break
+        except Exception as e:
+            if debug:
+                print(f"    [debug] exception querying wasDerivedFrom: {e}", file=sys.stderr)
+
+    if debug:
+        print(f"    [debug] found {len(sources)} source(s): {sources}", file=sys.stderr)
+
+    return sources
+
+
+async def _query_derived_from(ws_url, flow_id, uri, user, collection, debug=False):
+    """Query for the prov:wasDerivedFrom parent of a URI. Returns None if no parent."""
+    request = {
+        "id": "parent-request",
+        "service": "triples",
+        "flow": flow_id,
+        "request": {
+            "s": {"t": "i", "i": uri},
+            "p": {"t": "i", "i": PROV_WAS_DERIVED_FROM},
+            "user": user,
+            "collection": collection,
+            "limit": 1
+        }
+    }
+
+    try:
+        async with websockets.connect(ws_url, ping_interval=20, ping_timeout=30) as websocket:
+            await websocket.send(json.dumps(request))
+
+            async for raw_message in websocket:
+                response = json.loads(raw_message)
+
+                if response.get("id") != "parent-request":
+                    continue
+
+                if "error" in response:
+                    break
+
+                if "response" in response:
+                    resp = response["response"]
+                    triple_list = resp.get("response", [])
+                    if triple_list:
+                        return triple_list[0].get("o", {}).get("i", None)
+
+                    if resp.get("complete") or response.get("complete"):
+                        break
+    except Exception as e:
+        if debug:
+            print(f"    [debug] exception querying parent: {e}", file=sys.stderr)
+
+    return None
+
+
+async def _trace_provenance_chain(ws_url, flow_id, source_uri, user, collection, label_cache, debug=False):
+    """
+    Trace the full provenance chain from a source URI up to the root document.
+    Returns a list of (uri, label) tuples from leaf to root.
+    """
+    chain = []
+    current = source_uri
+    max_depth = 10  # Prevent infinite loops
+
+    for _ in range(max_depth):
+        if not current:
+            break
+
+        # Get label for current entity
+        label = await _query_label(ws_url, flow_id, current, user, collection, label_cache, debug)
+        chain.append((current, label))
+
+        # Get parent
+        parent = await _query_derived_from(ws_url, flow_id, current, user, collection, debug)
+        if not parent or parent == current:
+            break
+        current = parent
+
+    return chain
+
+
+def _format_provenance_chain(chain):
+    """
+    Format a provenance chain as a human-readable string.
+    Chain is [(uri, label), ...] from leaf to root.
+    """
+    if not chain:
+        return ""
+
+    # Show labels, from leaf to root
+    labels = [label for uri, label in chain]
+    return " → ".join(labels)
+
+
+def _is_iri(value):
+    """Check if a value looks like an IRI."""
+    if not isinstance(value, str):
+        return False
+    return value.startswith("http://") or value.startswith("https://") or value.startswith("urn:")
+
+
+async def _query_label(ws_url, flow_id, iri, user, collection, label_cache, debug=False):
+    """
+    Query for the rdfs:label of an IRI.
+    Uses label_cache to avoid repeated queries.
+    Returns the label if found, otherwise returns the IRI.
+    """
+    if not _is_iri(iri):
+        return iri
+
+    # Check cache first
+    if iri in label_cache:
+        return label_cache[iri]
+
+    request = {
+        "id": "label-request",
+        "service": "triples",
+        "flow": flow_id,
+        "request": {
+            "s": {"t": "i", "i": iri},
+            "p": {"t": "i", "i": RDFS_LABEL},
+            "user": user,
+            "collection": collection,
+            "limit": 1
+        }
+    }
+
+    label = iri  # Default to IRI if no label found
+    try:
+        async with websockets.connect(ws_url, ping_interval=20, ping_timeout=30) as websocket:
+            await websocket.send(json.dumps(request))
+
+            async for raw_message in websocket:
+                response = json.loads(raw_message)
+
+                if response.get("id") != "label-request":
+                    continue
+
+                if "error" in response:
+                    break
+
+                if "response" in response:
+                    resp = response["response"]
+                    triple_list = resp.get("response", [])
+                    if triple_list:
+                        # Get the label value
+                        o = triple_list[0].get("o", {})
+                        label = o.get("v", o.get("i", iri))
+
+                    if resp.get("complete") or response.get("complete"):
+                        break
+    except Exception as e:
+        if debug:
+            print(f"    [debug] exception querying label for {iri}: {e}", file=sys.stderr)
+
+    # Cache the result
+    label_cache[iri] = label
+    return label
+
+
+async def _resolve_edge_labels(ws_url, flow_id, edge_triple, user, collection, label_cache, debug=False):
+    """
+    Resolve labels for all IRI components of an edge triple.
+    Returns (s_label, p_label, o_label).
+    """
+    s = edge_triple.get("s", "?")
+    p = edge_triple.get("p", "?")
+    o = edge_triple.get("o", "?")
+
+    s_label = await _query_label(ws_url, flow_id, s, user, collection, label_cache, debug)
+    p_label = await _query_label(ws_url, flow_id, p, user, collection, label_cache, debug)
+    o_label = await _query_label(ws_url, flow_id, o, user, collection, label_cache, debug)
+
+    return s_label, p_label, o_label
+
+
+async def _question_explainable(
+        url, flow_id, question, user, collection, entity_limit, triple_limit,
+        max_subgraph_size, max_path_length, token=None, debug=False
+):
+    """Execute graph RAG with explainability - shows provenance events with details"""
+    # Convert HTTP URL to WebSocket URL
+    if url.startswith("http://"):
+        ws_url = url.replace("http://", "ws://", 1)
+    elif url.startswith("https://"):
+        ws_url = url.replace("https://", "wss://", 1)
+    else:
+        ws_url = f"ws://{url}"
+
+    ws_url = f"{ws_url.rstrip('/')}/api/v1/socket"
+    if token:
+        ws_url = f"{ws_url}?token={token}"
+
+    # Cache for label lookups to avoid repeated queries
+    label_cache = {}
+
+    request = {
+        "id": "cli-request",
+        "service": "graph-rag",
+        "flow": flow_id,
+        "request": {
+            "query": question,
+            "user": user,
+            "collection": collection,
+            "entity-limit": entity_limit,
+            "triple-limit": triple_limit,
+            "max-subgraph-size": max_subgraph_size,
+            "max-path-length": max_path_length,
+            "streaming": True
+        }
+    }
+
+    async with websockets.connect(ws_url, ping_interval=20, ping_timeout=300) as websocket:
+        await websocket.send(json.dumps(request))
+
+        async for raw_message in websocket:
+            response = json.loads(raw_message)
+
+            if response.get("id") != "cli-request":
+                continue
+
+            if "error" in response:
+                print(f"\nError: {response['error']}", file=sys.stderr)
+                break
+
+            if "response" in response:
+                resp = response["response"]
+
+                # Check for errors in response
+                if "error" in resp and resp["error"]:
+                    err = resp["error"]
+                    print(f"\nError: {err.get('message', 'Unknown error')}", file=sys.stderr)
+                    break
+
+                message_type = resp.get("message_type", "")
+
+                if debug:
+                    print(f"  [debug] message_type={message_type}, keys={list(resp.keys())}", file=sys.stderr)
+
+                if message_type == "explain":
+                    # Display explain event with details
+                    explain_id = resp.get("explain_id", "")
+                    explain_collection = resp.get("explain_collection", "explainability")
+                    if explain_id:
+                        event_type = _get_event_type(explain_id)
+                        print(f"\n  [{event_type}] {explain_id}", file=sys.stderr)
+
+                        # Query triples for this explain node (using explain collection from event)
+                        triples = await _query_triples(
+                            ws_url, flow_id, explain_id, user, explain_collection, debug=debug
+                        )
+
+                        # Format and display details
+                        details = _format_provenance_details(event_type, triples)
+                        for line in details:
+                            print(line, file=sys.stderr)
+
+                        # For selection events, query each edge selection for details
+                        if event_type == "selection":
+                            for s, p, o in triples:
+                                if debug:
+                                    print(f"    [debug] triple: p={p}, o={o}, o_type={type(o).__name__}", file=sys.stderr)
+                                if p == TG_SELECTED_EDGE and isinstance(o, str):
+                                    if debug:
+                                        print(f"    [debug] querying edge selection: {o}", file=sys.stderr)
+                                    # Query the edge selection entity (using explain collection from event)
+                                    edge_triples = await _query_triples(
+                                        ws_url, flow_id, o, user, explain_collection, debug=debug
+                                    )
+                                    if debug:
+                                        print(f"    [debug] got {len(edge_triples)} edge triples", file=sys.stderr)
+                                    # Extract edge and reasoning
+                                    edge_triple = None  # Store the actual triple for provenance lookup
+                                    reasoning = None
+                                    for es, ep, eo in edge_triples:
+                                        if debug:
+                                            print(f"    [debug] edge triple: ep={ep}, eo={eo}", file=sys.stderr)
+                                        if ep == TG_EDGE and isinstance(eo, dict):
+                                            # eo is a quoted triple dict
+                                            edge_triple = eo
+                                        elif ep == TG_REASONING:
+                                            reasoning = eo
+                                    if edge_triple:
+                                        # Resolve labels for edge components
+                                        s_label, p_label, o_label = await _resolve_edge_labels(
+                                            ws_url, flow_id, edge_triple, user, collection,
+                                            label_cache, debug=debug
+                                        )
+                                        print(f"      Edge: ({s_label}, {p_label}, {o_label})", file=sys.stderr)
+                                    if reasoning:
+                                        r_short = reasoning[:100] + "..." if len(reasoning) > 100 else reasoning
+                                        print(f"        Reason: {r_short}", file=sys.stderr)
+
+                                    # Trace edge provenance in the user's collection (not explainability)
+                                    if edge_triple:
+                                        sources = await _query_edge_provenance(
+                                            ws_url, flow_id,
+                                            edge_triple.get("s", ""),
+                                            edge_triple.get("p", ""),
+                                            edge_triple.get("o", ""),
+                                            user, collection,  # Use the query collection, not explainability
+                                            debug=debug
+                                        )
+                                        if sources:
+                                            for src in sources:
+                                                # Trace full chain from source to root document
+                                                chain = await _trace_provenance_chain(
+                                                    ws_url, flow_id, src, user, collection,
+                                                    label_cache, debug=debug
+                                                )
+                                                chain_str = _format_provenance_chain(chain)
+                                                print(f"        Source: {chain_str}", file=sys.stderr)
+
+                elif message_type == "chunk" or not message_type:
+                    # Display response chunk
+                    chunk = resp.get("response", "")
+                    if chunk:
+                        print(chunk, end="", flush=True)
+
+                # Check if session is complete
+                if resp.get("end_of_session"):
+                    break
+
+    print()  # Final newline
+
+
 def question(
        url, flow_id, question, user, collection, entity_limit, triple_limit,
-        max_subgraph_size, max_path_length, streaming=True, token=None
+        max_subgraph_size, max_path_length, streaming=True, token=None,
+        explainable=False, debug=False
 ):

+    # Explainable mode uses direct websocket to capture provenance events
+    if explainable:
+        asyncio.run(_question_explainable(
+            url=url,
+            flow_id=flow_id,
+            question=question,
+            user=user,
+            collection=collection,
+            entity_limit=entity_limit,
+            triple_limit=triple_limit,
+            max_subgraph_size=max_subgraph_size,
+            max_path_length=max_path_length,
+            token=token,
+            debug=debug
+        ))
+        return
+
    # Create API client
    api = Api(url=url, token=token)

@ -138,6 +740,18 @@ def main():
        help='Disable streaming (use non-streaming mode)'
    )

+    parser.add_argument(
+        '-x', '--explainable',
+        action='store_true',
+        help='Show provenance events for explainability (implies streaming)'
+    )
+
+    parser.add_argument(
+        '--debug',
+        action='store_true',
+        help='Show debug output for troubleshooting'
+    )
+
    args = parser.parse_args()

    try:
@ -154,6 +768,8 @@ def main():
            max_path_length=args.max_path_length,
            streaming=not args.no_streaming,
            token=args.token,
+            explainable=args.explainable,
+            debug=args.debug,
        )

    except Exception as e: