Terminology Rename, and named-graphs for explainability (#682)

Terminology Rename, and named-graphs for explainability data

Changed terminology:
  - session -> question
  - retrieval -> exploration
  - selection -> focus
  - answer -> synthesis

- uris.py: Renamed query_session_uri → question_uri,
  retrieval_uri → exploration_uri, selection_uri → focus_uri,
  answer_uri → synthesis_uri
- triples.py: Renamed corresponding triple generation functions with
  updated labels ("GraphRAG question", "Exploration", "Focus",
  "Synthesis")
- namespaces.py: Added named graph constants GRAPH_DEFAULT,
  GRAPH_SOURCE, GRAPH_RETRIEVAL
- init.py: Updated exports
- graph_rag.py: Updated to use new terminology
- invoke_graph_rag.py: Updated CLI to display new stage names
  (Question, Exploration, Focus, Synthesis)

Query-Time Explainability → Named Graph
- triples.py: Added set_graph() helper function to set named graph
  on triples
- graph_rag.py: All explainability triples now use GRAPH_RETRIEVAL
  named graph
- rag.py: Explainability triples stored in user's collection (not
  separate collection) with named graph

Extraction Provenance → Named Graph
- relationships/extract.py: Provenance triples use GRAPH_SOURCE
  named graph
- definitions/extract.py: Provenance triples use GRAPH_SOURCE
  named graph
- chunker.py: Provenance triples use GRAPH_SOURCE named graph
- pdf_decoder.py: Provenance triples use GRAPH_SOURCE named graph

CLI Updates
- show_graph.py: Added -g/--graph option to filter by named graph and
  --show-graph to display graph column

Also:
- Fix knowledge core schemas
This commit is contained in:
cybermaggedon 2026-03-10 14:35:21 +00:00 committed by GitHub
parent 57eda65674
commit e1bc4c04a4
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
17 changed files with 279 additions and 180 deletions

View file

@ -12,14 +12,16 @@ from ... schema import IRI, LITERAL
# Provenance imports
from trustgraph.provenance import (
query_session_uri,
retrieval_uri as make_retrieval_uri,
selection_uri as make_selection_uri,
answer_uri as make_answer_uri,
query_session_triples,
retrieval_triples,
selection_triples,
answer_triples,
question_uri,
exploration_uri as make_exploration_uri,
focus_uri as make_focus_uri,
synthesis_uri as make_synthesis_uri,
question_triples,
exploration_triples,
focus_triples,
synthesis_triples,
set_graph,
GRAPH_RETRIEVAL,
)
# Module logger
@ -396,17 +398,20 @@ class GraphRag:
# Generate explainability URIs upfront
session_id = str(uuid.uuid4())
session_uri = query_session_uri(session_id)
ret_uri = make_retrieval_uri(session_id)
sel_uri = make_selection_uri(session_id)
ans_uri = make_answer_uri(session_id)
q_uri = question_uri(session_id)
exp_uri = make_exploration_uri(session_id)
foc_uri = make_focus_uri(session_id)
syn_uri = make_synthesis_uri(session_id)
timestamp = datetime.utcnow().isoformat() + "Z"
# Emit session explainability immediately
# Emit question explainability immediately
if explain_callback:
session_triples = query_session_triples(session_uri, query, timestamp)
await explain_callback(session_triples, session_uri)
q_triples = set_graph(
question_triples(q_uri, query, timestamp),
GRAPH_RETRIEVAL
)
await explain_callback(q_triples, q_uri)
q = Query(
rag = self, user = user, collection = collection,
@ -418,10 +423,13 @@ class GraphRag:
kg, uri_map = await q.get_labelgraph(query)
# Emit retrieval explain after graph retrieval completes
# Emit exploration explain after graph retrieval completes
if explain_callback:
ret_triples = retrieval_triples(ret_uri, session_uri, len(kg))
await explain_callback(ret_triples, ret_uri)
exp_triples = set_graph(
exploration_triples(exp_uri, q_uri, len(kg)),
GRAPH_RETRIEVAL
)
await explain_callback(exp_triples, exp_uri)
if self.verbose:
logger.debug("Invoking LLM...")
@ -511,12 +519,15 @@ class GraphRag:
if self.verbose:
logger.debug(f"Filtered to {len(selected_edges)} edges")
# Emit selection explain after edge selection completes
# Emit focus explain after edge selection completes
if explain_callback:
sel_triples = selection_triples(
sel_uri, ret_uri, selected_edges_with_reasoning, session_id
foc_triples = set_graph(
focus_triples(
foc_uri, exp_uri, selected_edges_with_reasoning, session_id
),
GRAPH_RETRIEVAL
)
await explain_callback(sel_triples, sel_uri)
await explain_callback(foc_triples, foc_uri)
# Step 2: Synthesis - LLM generates answer from selected edges only
selected_edge_dicts = [
@ -554,30 +565,33 @@ class GraphRag:
if self.verbose:
logger.debug("Query processing complete")
# Emit answer explain after synthesis completes
# Emit synthesis explain after synthesis completes
if explain_callback:
answer_doc_id = None
synthesis_doc_id = None
answer_text = resp if resp else ""
# Save answer to librarian if callback provided
if save_answer_callback and answer_text:
# Generate document ID as URN matching query-time provenance format
answer_doc_id = f"urn:trustgraph:answer:{session_id}"
synthesis_doc_id = f"urn:trustgraph:synthesis:{session_id}"
try:
await save_answer_callback(answer_doc_id, answer_text)
await save_answer_callback(synthesis_doc_id, answer_text)
if self.verbose:
logger.debug(f"Saved answer to librarian: {answer_doc_id}")
logger.debug(f"Saved answer to librarian: {synthesis_doc_id}")
except Exception as e:
logger.warning(f"Failed to save answer to librarian: {e}")
answer_doc_id = None # Fall back to inline content
synthesis_doc_id = None # Fall back to inline content
# Generate triples with document reference or inline content
ans_triples = answer_triples(
ans_uri, sel_uri,
answer_text="" if answer_doc_id else answer_text,
document_id=answer_doc_id,
syn_triples = set_graph(
synthesis_triples(
syn_uri, foc_uri,
answer_text="" if synthesis_doc_id else answer_text,
document_id=synthesis_doc_id,
),
GRAPH_RETRIEVAL
)
await explain_callback(ans_triples, ans_uri)
await explain_callback(syn_triples, syn_uri)
if self.verbose:
logger.debug(f"Emitted explain for session {session_id}")

View file

@ -13,6 +13,7 @@ from ... schema import GraphRagQuery, GraphRagResponse, Error
from ... schema import Triples, Metadata
from ... schema import LibrarianRequest, LibrarianResponse, DocumentMetadata
from ... schema import librarian_request_queue, librarian_response_queue
from ... provenance import GRAPH_RETRIEVAL
from . graph_rag import GraphRag
from ... base import FlowProcessor, ConsumerSpec, ProducerSpec
from ... base import PromptClientSpec, EmbeddingsClientSpec
@ -38,7 +39,6 @@ class Processor(FlowProcessor):
triple_limit = params.get("triple_limit", 30)
max_subgraph_size = params.get("max_subgraph_size", 150)
max_path_length = params.get("max_path_length", 2)
explainability_collection = params.get("explainability_collection", "explainability")
super(Processor, self).__init__(
**params | {
@ -48,7 +48,6 @@ class Processor(FlowProcessor):
"triple_limit": triple_limit,
"max_subgraph_size": max_subgraph_size,
"max_path_length": max_path_length,
"explainability_collection": explainability_collection,
}
)
@ -56,7 +55,6 @@ class Processor(FlowProcessor):
self.default_triple_limit = triple_limit
self.default_max_subgraph_size = max_subgraph_size
self.default_max_path_length = max_path_length
self.explainability_collection = explainability_collection
# CRITICAL SECURITY: NEVER share data between users or collections
# Each user/collection combination MUST have isolated data access
@ -239,24 +237,25 @@ class Processor(FlowProcessor):
explainability_refs_emitted = []
# Real-time explainability callback - emits triples and IDs as they're generated
# Triples are stored in the user's collection with a named graph (urn:graph:retrieval)
async def send_explainability(triples, explain_id):
# Send triples to explainability queue
# Send triples to explainability queue - stores in same collection with named graph
await flow("explainability").send(Triples(
metadata=Metadata(
id=explain_id,
metadata=[],
user=v.user,
collection=self.explainability_collection,
collection=v.collection, # Store in user's collection, not separate explainability collection
),
triples=triples,
))
# Send explain ID and collection to response queue
# Send explain ID and graph to response queue
await flow("response").send(
GraphRagResponse(
message_type="explain",
explain_id=explain_id,
explain_collection=self.explainability_collection,
explain_graph=GRAPH_RETRIEVAL,
),
properties={"id": id}
)
@ -424,11 +423,8 @@ class Processor(FlowProcessor):
help=f'Default max path length (default: 2)'
)
parser.add_argument(
'--explainability-collection',
default='explainability',
help=f'Collection for storing explainability triples (default: explainability)'
)
# Note: Explainability triples are now stored in the user's collection
# with the named graph urn:graph:retrieval (no separate collection needed)
def run():