mirror of
https://github.com/trustgraph-ai/trustgraph.git
synced 2026-04-26 17:06:22 +02:00
Terminology Rename, and named-graphs for explainability (#682)
Terminology Rename, and named-graphs for explainability data
Changed terminology:
- session -> question
- retrieval -> exploration
- selection -> focus
- answer -> synthesis
- uris.py: Renamed query_session_uri → question_uri,
retrieval_uri → exploration_uri, selection_uri → focus_uri,
answer_uri → synthesis_uri
- triples.py: Renamed corresponding triple generation functions with
updated labels ("GraphRAG question", "Exploration", "Focus",
"Synthesis")
- namespaces.py: Added named graph constants GRAPH_DEFAULT,
GRAPH_SOURCE, GRAPH_RETRIEVAL
- init.py: Updated exports
- graph_rag.py: Updated to use new terminology
- invoke_graph_rag.py: Updated CLI to display new stage names
(Question, Exploration, Focus, Synthesis)
Query-Time Explainability → Named Graph
- triples.py: Added set_graph() helper function to set named graph
on triples
- graph_rag.py: All explainability triples now use GRAPH_RETRIEVAL
named graph
- rag.py: Explainability triples stored in user's collection (not
separate collection) with named graph
Extraction Provenance → Named Graph
- relationships/extract.py: Provenance triples use GRAPH_SOURCE
named graph
- definitions/extract.py: Provenance triples use GRAPH_SOURCE
named graph
- chunker.py: Provenance triples use GRAPH_SOURCE named graph
- pdf_decoder.py: Provenance triples use GRAPH_SOURCE named graph
CLI Updates
- show_graph.py: Added -g/--graph option to filter by named graph and
--show-graph to display graph column
Also:
- Fix knowledge core schemas
This commit is contained in:
parent
57eda65674
commit
e1bc4c04a4
17 changed files with 279 additions and 180 deletions
|
|
@ -14,6 +14,7 @@ from ... base import ChunkingService, ConsumerSpec, ProducerSpec
|
|||
from ... provenance import (
|
||||
page_uri, chunk_uri_from_page, chunk_uri_from_doc,
|
||||
derived_entity_triples, document_uri,
|
||||
set_graph, GRAPH_SOURCE,
|
||||
)
|
||||
|
||||
# Component identification for provenance
|
||||
|
|
@ -160,7 +161,7 @@ class Processor(ChunkingService):
|
|||
title=f"Chunk {chunk_index}",
|
||||
)
|
||||
|
||||
# Emit provenance triples
|
||||
# Emit provenance triples (stored in source graph for separation from core knowledge)
|
||||
prov_triples = derived_entity_triples(
|
||||
entity_uri=chunk_uri,
|
||||
parent_uri=parent_uri,
|
||||
|
|
@ -181,7 +182,7 @@ class Processor(ChunkingService):
|
|||
user=v.metadata.user,
|
||||
collection=v.metadata.collection,
|
||||
),
|
||||
triples=prov_triples,
|
||||
triples=set_graph(prov_triples, GRAPH_SOURCE),
|
||||
))
|
||||
|
||||
# Forward chunk ID + content (post-chunker optimization)
|
||||
|
|
|
|||
|
|
@ -24,6 +24,7 @@ from ... base import Consumer, Producer, ConsumerMetrics, ProducerMetrics
|
|||
|
||||
from ... provenance import (
|
||||
document_uri, page_uri, derived_entity_triples,
|
||||
set_graph, GRAPH_SOURCE,
|
||||
)
|
||||
|
||||
# Component identification for provenance
|
||||
|
|
@ -285,7 +286,7 @@ class Processor(FlowProcessor):
|
|||
title=f"Page {page_num}",
|
||||
)
|
||||
|
||||
# Emit provenance triples
|
||||
# Emit provenance triples (stored in source graph for separation from core knowledge)
|
||||
doc_uri = document_uri(source_doc_id)
|
||||
pg_uri = page_uri(source_doc_id, page_num)
|
||||
|
||||
|
|
@ -305,7 +306,7 @@ class Processor(FlowProcessor):
|
|||
user=v.metadata.user,
|
||||
collection=v.metadata.collection,
|
||||
),
|
||||
triples=prov_triples,
|
||||
triples=set_graph(prov_triples, GRAPH_SOURCE),
|
||||
))
|
||||
|
||||
# Forward page document ID to chunker
|
||||
|
|
|
|||
|
|
@ -20,7 +20,7 @@ from .... rdf import TRUSTGRAPH_ENTITIES, DEFINITION, RDF_LABEL, SUBJECT_OF
|
|||
from .... base import FlowProcessor, ConsumerSpec, ProducerSpec
|
||||
from .... base import PromptClientSpec, ParameterSpec
|
||||
|
||||
from .... provenance import statement_uri, triple_provenance_triples
|
||||
from .... provenance import statement_uri, triple_provenance_triples, set_graph, GRAPH_SOURCE
|
||||
from .... flow_version import __version__ as COMPONENT_VERSION
|
||||
|
||||
DEFINITION_VALUE = Term(type=IRI, iri=DEFINITION)
|
||||
|
|
@ -175,6 +175,7 @@ class Processor(FlowProcessor):
|
|||
triples.append(definition_triple)
|
||||
|
||||
# Generate provenance for the definition triple (reification)
|
||||
# Provenance triples go in the source graph for separation from core knowledge
|
||||
stmt_uri = statement_uri()
|
||||
prov_triples = triple_provenance_triples(
|
||||
stmt_uri=stmt_uri,
|
||||
|
|
@ -185,7 +186,7 @@ class Processor(FlowProcessor):
|
|||
llm_model=llm_model,
|
||||
ontology_uri=ontology_uri,
|
||||
)
|
||||
triples.extend(prov_triples)
|
||||
triples.extend(set_graph(prov_triples, GRAPH_SOURCE))
|
||||
|
||||
# Link entity to chunk (not top-level document)
|
||||
triples.append(Triple(
|
||||
|
|
|
|||
|
|
@ -20,7 +20,7 @@ from .... rdf import RDF_LABEL, TRUSTGRAPH_ENTITIES, SUBJECT_OF
|
|||
from .... base import FlowProcessor, ConsumerSpec, ProducerSpec
|
||||
from .... base import PromptClientSpec, ParameterSpec
|
||||
|
||||
from .... provenance import statement_uri, triple_provenance_triples
|
||||
from .... provenance import statement_uri, triple_provenance_triples, set_graph, GRAPH_SOURCE
|
||||
from .... flow_version import __version__ as COMPONENT_VERSION
|
||||
|
||||
RDF_LABEL_VALUE = Term(type=IRI, iri=RDF_LABEL)
|
||||
|
|
@ -162,6 +162,7 @@ class Processor(FlowProcessor):
|
|||
triples.append(relationship_triple)
|
||||
|
||||
# Generate provenance for the relationship triple (reification)
|
||||
# Provenance triples go in the source graph for separation from core knowledge
|
||||
stmt_uri = statement_uri()
|
||||
prov_triples = triple_provenance_triples(
|
||||
stmt_uri=stmt_uri,
|
||||
|
|
@ -172,7 +173,7 @@ class Processor(FlowProcessor):
|
|||
llm_model=llm_model,
|
||||
ontology_uri=ontology_uri,
|
||||
)
|
||||
triples.extend(prov_triples)
|
||||
triples.extend(set_graph(prov_triples, GRAPH_SOURCE))
|
||||
|
||||
# Label for s
|
||||
triples.append(Triple(
|
||||
|
|
|
|||
|
|
@ -12,14 +12,16 @@ from ... schema import IRI, LITERAL
|
|||
|
||||
# Provenance imports
|
||||
from trustgraph.provenance import (
|
||||
query_session_uri,
|
||||
retrieval_uri as make_retrieval_uri,
|
||||
selection_uri as make_selection_uri,
|
||||
answer_uri as make_answer_uri,
|
||||
query_session_triples,
|
||||
retrieval_triples,
|
||||
selection_triples,
|
||||
answer_triples,
|
||||
question_uri,
|
||||
exploration_uri as make_exploration_uri,
|
||||
focus_uri as make_focus_uri,
|
||||
synthesis_uri as make_synthesis_uri,
|
||||
question_triples,
|
||||
exploration_triples,
|
||||
focus_triples,
|
||||
synthesis_triples,
|
||||
set_graph,
|
||||
GRAPH_RETRIEVAL,
|
||||
)
|
||||
|
||||
# Module logger
|
||||
|
|
@ -396,17 +398,20 @@ class GraphRag:
|
|||
|
||||
# Generate explainability URIs upfront
|
||||
session_id = str(uuid.uuid4())
|
||||
session_uri = query_session_uri(session_id)
|
||||
ret_uri = make_retrieval_uri(session_id)
|
||||
sel_uri = make_selection_uri(session_id)
|
||||
ans_uri = make_answer_uri(session_id)
|
||||
q_uri = question_uri(session_id)
|
||||
exp_uri = make_exploration_uri(session_id)
|
||||
foc_uri = make_focus_uri(session_id)
|
||||
syn_uri = make_synthesis_uri(session_id)
|
||||
|
||||
timestamp = datetime.utcnow().isoformat() + "Z"
|
||||
|
||||
# Emit session explainability immediately
|
||||
# Emit question explainability immediately
|
||||
if explain_callback:
|
||||
session_triples = query_session_triples(session_uri, query, timestamp)
|
||||
await explain_callback(session_triples, session_uri)
|
||||
q_triples = set_graph(
|
||||
question_triples(q_uri, query, timestamp),
|
||||
GRAPH_RETRIEVAL
|
||||
)
|
||||
await explain_callback(q_triples, q_uri)
|
||||
|
||||
q = Query(
|
||||
rag = self, user = user, collection = collection,
|
||||
|
|
@ -418,10 +423,13 @@ class GraphRag:
|
|||
|
||||
kg, uri_map = await q.get_labelgraph(query)
|
||||
|
||||
# Emit retrieval explain after graph retrieval completes
|
||||
# Emit exploration explain after graph retrieval completes
|
||||
if explain_callback:
|
||||
ret_triples = retrieval_triples(ret_uri, session_uri, len(kg))
|
||||
await explain_callback(ret_triples, ret_uri)
|
||||
exp_triples = set_graph(
|
||||
exploration_triples(exp_uri, q_uri, len(kg)),
|
||||
GRAPH_RETRIEVAL
|
||||
)
|
||||
await explain_callback(exp_triples, exp_uri)
|
||||
|
||||
if self.verbose:
|
||||
logger.debug("Invoking LLM...")
|
||||
|
|
@ -511,12 +519,15 @@ class GraphRag:
|
|||
if self.verbose:
|
||||
logger.debug(f"Filtered to {len(selected_edges)} edges")
|
||||
|
||||
# Emit selection explain after edge selection completes
|
||||
# Emit focus explain after edge selection completes
|
||||
if explain_callback:
|
||||
sel_triples = selection_triples(
|
||||
sel_uri, ret_uri, selected_edges_with_reasoning, session_id
|
||||
foc_triples = set_graph(
|
||||
focus_triples(
|
||||
foc_uri, exp_uri, selected_edges_with_reasoning, session_id
|
||||
),
|
||||
GRAPH_RETRIEVAL
|
||||
)
|
||||
await explain_callback(sel_triples, sel_uri)
|
||||
await explain_callback(foc_triples, foc_uri)
|
||||
|
||||
# Step 2: Synthesis - LLM generates answer from selected edges only
|
||||
selected_edge_dicts = [
|
||||
|
|
@ -554,30 +565,33 @@ class GraphRag:
|
|||
if self.verbose:
|
||||
logger.debug("Query processing complete")
|
||||
|
||||
# Emit answer explain after synthesis completes
|
||||
# Emit synthesis explain after synthesis completes
|
||||
if explain_callback:
|
||||
answer_doc_id = None
|
||||
synthesis_doc_id = None
|
||||
answer_text = resp if resp else ""
|
||||
|
||||
# Save answer to librarian if callback provided
|
||||
if save_answer_callback and answer_text:
|
||||
# Generate document ID as URN matching query-time provenance format
|
||||
answer_doc_id = f"urn:trustgraph:answer:{session_id}"
|
||||
synthesis_doc_id = f"urn:trustgraph:synthesis:{session_id}"
|
||||
try:
|
||||
await save_answer_callback(answer_doc_id, answer_text)
|
||||
await save_answer_callback(synthesis_doc_id, answer_text)
|
||||
if self.verbose:
|
||||
logger.debug(f"Saved answer to librarian: {answer_doc_id}")
|
||||
logger.debug(f"Saved answer to librarian: {synthesis_doc_id}")
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to save answer to librarian: {e}")
|
||||
answer_doc_id = None # Fall back to inline content
|
||||
synthesis_doc_id = None # Fall back to inline content
|
||||
|
||||
# Generate triples with document reference or inline content
|
||||
ans_triples = answer_triples(
|
||||
ans_uri, sel_uri,
|
||||
answer_text="" if answer_doc_id else answer_text,
|
||||
document_id=answer_doc_id,
|
||||
syn_triples = set_graph(
|
||||
synthesis_triples(
|
||||
syn_uri, foc_uri,
|
||||
answer_text="" if synthesis_doc_id else answer_text,
|
||||
document_id=synthesis_doc_id,
|
||||
),
|
||||
GRAPH_RETRIEVAL
|
||||
)
|
||||
await explain_callback(ans_triples, ans_uri)
|
||||
await explain_callback(syn_triples, syn_uri)
|
||||
|
||||
if self.verbose:
|
||||
logger.debug(f"Emitted explain for session {session_id}")
|
||||
|
|
|
|||
|
|
@ -13,6 +13,7 @@ from ... schema import GraphRagQuery, GraphRagResponse, Error
|
|||
from ... schema import Triples, Metadata
|
||||
from ... schema import LibrarianRequest, LibrarianResponse, DocumentMetadata
|
||||
from ... schema import librarian_request_queue, librarian_response_queue
|
||||
from ... provenance import GRAPH_RETRIEVAL
|
||||
from . graph_rag import GraphRag
|
||||
from ... base import FlowProcessor, ConsumerSpec, ProducerSpec
|
||||
from ... base import PromptClientSpec, EmbeddingsClientSpec
|
||||
|
|
@ -38,7 +39,6 @@ class Processor(FlowProcessor):
|
|||
triple_limit = params.get("triple_limit", 30)
|
||||
max_subgraph_size = params.get("max_subgraph_size", 150)
|
||||
max_path_length = params.get("max_path_length", 2)
|
||||
explainability_collection = params.get("explainability_collection", "explainability")
|
||||
|
||||
super(Processor, self).__init__(
|
||||
**params | {
|
||||
|
|
@ -48,7 +48,6 @@ class Processor(FlowProcessor):
|
|||
"triple_limit": triple_limit,
|
||||
"max_subgraph_size": max_subgraph_size,
|
||||
"max_path_length": max_path_length,
|
||||
"explainability_collection": explainability_collection,
|
||||
}
|
||||
)
|
||||
|
||||
|
|
@ -56,7 +55,6 @@ class Processor(FlowProcessor):
|
|||
self.default_triple_limit = triple_limit
|
||||
self.default_max_subgraph_size = max_subgraph_size
|
||||
self.default_max_path_length = max_path_length
|
||||
self.explainability_collection = explainability_collection
|
||||
|
||||
# CRITICAL SECURITY: NEVER share data between users or collections
|
||||
# Each user/collection combination MUST have isolated data access
|
||||
|
|
@ -239,24 +237,25 @@ class Processor(FlowProcessor):
|
|||
explainability_refs_emitted = []
|
||||
|
||||
# Real-time explainability callback - emits triples and IDs as they're generated
|
||||
# Triples are stored in the user's collection with a named graph (urn:graph:retrieval)
|
||||
async def send_explainability(triples, explain_id):
|
||||
# Send triples to explainability queue
|
||||
# Send triples to explainability queue - stores in same collection with named graph
|
||||
await flow("explainability").send(Triples(
|
||||
metadata=Metadata(
|
||||
id=explain_id,
|
||||
metadata=[],
|
||||
user=v.user,
|
||||
collection=self.explainability_collection,
|
||||
collection=v.collection, # Store in user's collection, not separate explainability collection
|
||||
),
|
||||
triples=triples,
|
||||
))
|
||||
|
||||
# Send explain ID and collection to response queue
|
||||
# Send explain ID and graph to response queue
|
||||
await flow("response").send(
|
||||
GraphRagResponse(
|
||||
message_type="explain",
|
||||
explain_id=explain_id,
|
||||
explain_collection=self.explainability_collection,
|
||||
explain_graph=GRAPH_RETRIEVAL,
|
||||
),
|
||||
properties={"id": id}
|
||||
)
|
||||
|
|
@ -424,11 +423,8 @@ class Processor(FlowProcessor):
|
|||
help=f'Default max path length (default: 2)'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--explainability-collection',
|
||||
default='explainability',
|
||||
help=f'Collection for storing explainability triples (default: explainability)'
|
||||
)
|
||||
# Note: Explainability triples are now stored in the user's collection
|
||||
# with the named graph urn:graph:retrieval (no separate collection needed)
|
||||
|
||||
def run():
|
||||
|
||||
|
|
|
|||
|
|
@ -114,7 +114,7 @@ class KnowledgeTableStore:
|
|||
entity_embeddings list<
|
||||
tuple<
|
||||
tuple<text, boolean>,
|
||||
list<list<double>>
|
||||
list<double>
|
||||
>
|
||||
>,
|
||||
PRIMARY KEY ((user, document_id), id)
|
||||
|
|
@ -140,7 +140,7 @@ class KnowledgeTableStore:
|
|||
chunks list<
|
||||
tuple<
|
||||
blob,
|
||||
list<list<double>>
|
||||
list<double>
|
||||
>
|
||||
>,
|
||||
PRIMARY KEY ((user, document_id), id)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue