mirror of
https://github.com/trustgraph-ai/trustgraph.git
synced 2026-04-25 08:26:21 +02:00
Terminology Rename, and named-graphs for explainability (#682)
Terminology Rename, and named-graphs for explainability data
Changed terminology:
- session -> question
- retrieval -> exploration
- selection -> focus
- answer -> synthesis
- uris.py: Renamed query_session_uri → question_uri,
retrieval_uri → exploration_uri, selection_uri → focus_uri,
answer_uri → synthesis_uri
- triples.py: Renamed corresponding triple generation functions with
updated labels ("GraphRAG question", "Exploration", "Focus",
"Synthesis")
- namespaces.py: Added named graph constants GRAPH_DEFAULT,
GRAPH_SOURCE, GRAPH_RETRIEVAL
- init.py: Updated exports
- graph_rag.py: Updated to use new terminology
- invoke_graph_rag.py: Updated CLI to display new stage names
(Question, Exploration, Focus, Synthesis)
Query-Time Explainability → Named Graph
- triples.py: Added set_graph() helper function to set named graph
on triples
- graph_rag.py: All explainability triples now use GRAPH_RETRIEVAL
named graph
- rag.py: Explainability triples stored in user's collection (not
separate collection) with named graph
Extraction Provenance → Named Graph
- relationships/extract.py: Provenance triples use GRAPH_SOURCE
named graph
- definitions/extract.py: Provenance triples use GRAPH_SOURCE
named graph
- chunker.py: Provenance triples use GRAPH_SOURCE named graph
- pdf_decoder.py: Provenance triples use GRAPH_SOURCE named graph
CLI Updates
- show_graph.py: Added -g/--graph option to filter by named graph and
--show-graph to display graph column
Also:
- Fix knowledge core schemas
This commit is contained in:
parent
57eda65674
commit
e1bc4c04a4
17 changed files with 279 additions and 180 deletions
|
|
@ -104,10 +104,10 @@ class GraphRagResponseTranslator(MessageTranslator):
|
|||
if explain_id:
|
||||
result["explain_id"] = explain_id
|
||||
|
||||
# Include explain_collection for explain messages
|
||||
explain_collection = getattr(obj, "explain_collection", None)
|
||||
if explain_collection:
|
||||
result["explain_collection"] = explain_collection
|
||||
# Include explain_graph for explain messages (named graph filter)
|
||||
explain_graph = getattr(obj, "explain_graph", None)
|
||||
if explain_graph is not None:
|
||||
result["explain_graph"] = explain_graph
|
||||
|
||||
# Include end_of_stream flag (LLM stream complete)
|
||||
result["end_of_stream"] = getattr(obj, "end_of_stream", False)
|
||||
|
|
|
|||
|
|
@ -41,10 +41,10 @@ from . uris import (
|
|||
statement_uri,
|
||||
agent_uri,
|
||||
# Query-time provenance URIs
|
||||
query_session_uri,
|
||||
retrieval_uri,
|
||||
selection_uri,
|
||||
answer_uri,
|
||||
question_uri,
|
||||
exploration_uri,
|
||||
focus_uri,
|
||||
synthesis_uri,
|
||||
)
|
||||
|
||||
# Namespace constants
|
||||
|
|
@ -65,6 +65,8 @@ from . namespaces import (
|
|||
TG_SOURCE_TEXT, TG_SOURCE_CHAR_OFFSET, TG_SOURCE_CHAR_LENGTH,
|
||||
# Query-time provenance predicates
|
||||
TG_QUERY, TG_EDGE_COUNT, TG_SELECTED_EDGE, TG_REASONING, TG_CONTENT,
|
||||
# Named graphs
|
||||
GRAPH_DEFAULT, GRAPH_SOURCE, GRAPH_RETRIEVAL,
|
||||
)
|
||||
|
||||
# Triple builders
|
||||
|
|
@ -73,10 +75,12 @@ from . triples import (
|
|||
derived_entity_triples,
|
||||
triple_provenance_triples,
|
||||
# Query-time provenance triple builders
|
||||
query_session_triples,
|
||||
retrieval_triples,
|
||||
selection_triples,
|
||||
answer_triples,
|
||||
question_triples,
|
||||
exploration_triples,
|
||||
focus_triples,
|
||||
synthesis_triples,
|
||||
# Utility
|
||||
set_graph,
|
||||
)
|
||||
|
||||
# Vocabulary bootstrap
|
||||
|
|
@ -99,10 +103,10 @@ __all__ = [
|
|||
"statement_uri",
|
||||
"agent_uri",
|
||||
# Query-time provenance URIs
|
||||
"query_session_uri",
|
||||
"retrieval_uri",
|
||||
"selection_uri",
|
||||
"answer_uri",
|
||||
"question_uri",
|
||||
"exploration_uri",
|
||||
"focus_uri",
|
||||
"synthesis_uri",
|
||||
# Namespaces
|
||||
"PROV", "PROV_ENTITY", "PROV_ACTIVITY", "PROV_AGENT",
|
||||
"PROV_WAS_DERIVED_FROM", "PROV_WAS_GENERATED_BY",
|
||||
|
|
@ -116,15 +120,19 @@ __all__ = [
|
|||
"TG_SOURCE_TEXT", "TG_SOURCE_CHAR_OFFSET", "TG_SOURCE_CHAR_LENGTH",
|
||||
# Query-time provenance predicates
|
||||
"TG_QUERY", "TG_EDGE_COUNT", "TG_SELECTED_EDGE", "TG_REASONING", "TG_CONTENT",
|
||||
# Named graphs
|
||||
"GRAPH_DEFAULT", "GRAPH_SOURCE", "GRAPH_RETRIEVAL",
|
||||
# Triple builders
|
||||
"document_triples",
|
||||
"derived_entity_triples",
|
||||
"triple_provenance_triples",
|
||||
# Query-time provenance triple builders
|
||||
"query_session_triples",
|
||||
"retrieval_triples",
|
||||
"selection_triples",
|
||||
"answer_triples",
|
||||
"question_triples",
|
||||
"exploration_triples",
|
||||
"focus_triples",
|
||||
"synthesis_triples",
|
||||
# Utility
|
||||
"set_graph",
|
||||
# Vocabulary
|
||||
"get_vocabulary_triples",
|
||||
"PROV_CLASS_LABELS",
|
||||
|
|
|
|||
|
|
@ -67,3 +67,9 @@ TG_EDGE = TG + "edge"
|
|||
TG_REASONING = TG + "reasoning"
|
||||
TG_CONTENT = TG + "content"
|
||||
TG_DOCUMENT = TG + "document" # Reference to document in librarian
|
||||
|
||||
# Named graph URIs for RDF datasets
|
||||
# These separate different types of data while keeping them in the same collection
|
||||
GRAPH_DEFAULT = "" # Core knowledge facts (triples extracted from documents)
|
||||
GRAPH_SOURCE = "urn:graph:source" # Extraction provenance (which document/chunk a triple came from)
|
||||
GRAPH_RETRIEVAL = "urn:graph:retrieval" # Query-time explainability (question, exploration, focus, synthesis)
|
||||
|
|
|
|||
|
|
@ -25,6 +25,26 @@ from . namespaces import (
|
|||
from . uris import activity_uri, agent_uri, edge_selection_uri
|
||||
|
||||
|
||||
def set_graph(triples: List[Triple], graph: str) -> List[Triple]:
|
||||
"""
|
||||
Set the named graph on a list of triples.
|
||||
|
||||
This creates new Triple objects with the graph field set,
|
||||
leaving the original triples unchanged.
|
||||
|
||||
Args:
|
||||
triples: List of Triple objects
|
||||
graph: Named graph URI (e.g., "urn:graph:retrieval")
|
||||
|
||||
Returns:
|
||||
List of Triple objects with graph field set
|
||||
"""
|
||||
return [
|
||||
Triple(s=t.s, p=t.p, o=t.o, g=graph)
|
||||
for t in triples
|
||||
]
|
||||
|
||||
|
||||
def _iri(uri: str) -> Term:
|
||||
"""Create an IRI term."""
|
||||
return Term(type=IRI, iri=uri)
|
||||
|
|
@ -258,21 +278,27 @@ def triple_provenance_triples(
|
|||
|
||||
|
||||
# Query-time provenance triple builders
|
||||
#
|
||||
# Terminology:
|
||||
# Question - What was asked, the anchor for everything
|
||||
# Exploration - Casting wide, what do we know about this space
|
||||
# Focus - Closing down, what's actually relevant here
|
||||
# Synthesis - Weaving the relevant pieces into an answer
|
||||
|
||||
def query_session_triples(
|
||||
session_uri: str,
|
||||
def question_triples(
|
||||
question_uri: str,
|
||||
query: str,
|
||||
timestamp: Optional[str] = None,
|
||||
) -> List[Triple]:
|
||||
"""
|
||||
Build triples for a query session activity.
|
||||
Build triples for a question activity.
|
||||
|
||||
Creates:
|
||||
- Activity declaration for the query session
|
||||
- Activity declaration for the question
|
||||
- Query text and timestamp
|
||||
|
||||
Args:
|
||||
session_uri: URI of the session (from query_session_uri)
|
||||
question_uri: URI of the question (from question_uri)
|
||||
query: The user's query text
|
||||
timestamp: ISO timestamp (defaults to now)
|
||||
|
||||
|
|
@ -283,39 +309,39 @@ def query_session_triples(
|
|||
timestamp = datetime.utcnow().isoformat() + "Z"
|
||||
|
||||
return [
|
||||
_triple(session_uri, RDF_TYPE, _iri(PROV_ACTIVITY)),
|
||||
_triple(session_uri, RDFS_LABEL, _literal("GraphRAG query session")),
|
||||
_triple(session_uri, PROV_STARTED_AT_TIME, _literal(timestamp)),
|
||||
_triple(session_uri, TG_QUERY, _literal(query)),
|
||||
_triple(question_uri, RDF_TYPE, _iri(PROV_ACTIVITY)),
|
||||
_triple(question_uri, RDFS_LABEL, _literal("GraphRAG question")),
|
||||
_triple(question_uri, PROV_STARTED_AT_TIME, _literal(timestamp)),
|
||||
_triple(question_uri, TG_QUERY, _literal(query)),
|
||||
]
|
||||
|
||||
|
||||
def retrieval_triples(
|
||||
retrieval_uri: str,
|
||||
session_uri: str,
|
||||
def exploration_triples(
|
||||
exploration_uri: str,
|
||||
question_uri: str,
|
||||
edge_count: int,
|
||||
) -> List[Triple]:
|
||||
"""
|
||||
Build triples for a retrieval entity (all edges retrieved from subgraph).
|
||||
Build triples for an exploration entity (all edges retrieved from subgraph).
|
||||
|
||||
Creates:
|
||||
- Entity declaration for retrieval
|
||||
- wasGeneratedBy link to session
|
||||
- Entity declaration for exploration
|
||||
- wasGeneratedBy link to question
|
||||
- Edge count metadata
|
||||
|
||||
Args:
|
||||
retrieval_uri: URI of the retrieval entity (from retrieval_uri)
|
||||
session_uri: URI of the parent session
|
||||
exploration_uri: URI of the exploration entity (from exploration_uri)
|
||||
question_uri: URI of the parent question
|
||||
edge_count: Number of edges retrieved
|
||||
|
||||
Returns:
|
||||
List of Triple objects
|
||||
"""
|
||||
return [
|
||||
_triple(retrieval_uri, RDF_TYPE, _iri(PROV_ENTITY)),
|
||||
_triple(retrieval_uri, RDFS_LABEL, _literal("Retrieved edges")),
|
||||
_triple(retrieval_uri, PROV_WAS_GENERATED_BY, _iri(session_uri)),
|
||||
_triple(retrieval_uri, TG_EDGE_COUNT, _literal(edge_count)),
|
||||
_triple(exploration_uri, RDF_TYPE, _iri(PROV_ENTITY)),
|
||||
_triple(exploration_uri, RDFS_LABEL, _literal("Exploration")),
|
||||
_triple(exploration_uri, PROV_WAS_GENERATED_BY, _iri(question_uri)),
|
||||
_triple(exploration_uri, TG_EDGE_COUNT, _literal(edge_count)),
|
||||
]
|
||||
|
||||
|
||||
|
|
@ -327,28 +353,28 @@ def _quoted_triple(s: str, p: str, o: str) -> Term:
|
|||
)
|
||||
|
||||
|
||||
def selection_triples(
|
||||
selection_uri: str,
|
||||
retrieval_uri: str,
|
||||
def focus_triples(
|
||||
focus_uri: str,
|
||||
exploration_uri: str,
|
||||
selected_edges_with_reasoning: List[dict],
|
||||
session_id: str = "",
|
||||
) -> List[Triple]:
|
||||
"""
|
||||
Build triples for a selection entity (selected edges with reasoning).
|
||||
Build triples for a focus entity (selected edges with reasoning).
|
||||
|
||||
Creates:
|
||||
- Entity declaration for selection
|
||||
- wasDerivedFrom link to retrieval
|
||||
- Entity declaration for focus
|
||||
- wasDerivedFrom link to exploration
|
||||
- For each selected edge: an edge selection entity with quoted triple and reasoning
|
||||
|
||||
Structure:
|
||||
<selection> tg:selectedEdge <edge_sel_1> .
|
||||
<focus> tg:selectedEdge <edge_sel_1> .
|
||||
<edge_sel_1> tg:edge << <s> <p> <o> >> .
|
||||
<edge_sel_1> tg:reasoning "reason" .
|
||||
|
||||
Args:
|
||||
selection_uri: URI of the selection entity (from selection_uri)
|
||||
retrieval_uri: URI of the parent retrieval entity
|
||||
focus_uri: URI of the focus entity (from focus_uri)
|
||||
exploration_uri: URI of the parent exploration entity
|
||||
selected_edges_with_reasoning: List of dicts with 'edge' (s,p,o tuple) and 'reasoning'
|
||||
session_id: Session UUID for generating edge selection URIs
|
||||
|
||||
|
|
@ -356,9 +382,9 @@ def selection_triples(
|
|||
List of Triple objects
|
||||
"""
|
||||
triples = [
|
||||
_triple(selection_uri, RDF_TYPE, _iri(PROV_ENTITY)),
|
||||
_triple(selection_uri, RDFS_LABEL, _literal("Selected edges")),
|
||||
_triple(selection_uri, PROV_WAS_DERIVED_FROM, _iri(retrieval_uri)),
|
||||
_triple(focus_uri, RDF_TYPE, _iri(PROV_ENTITY)),
|
||||
_triple(focus_uri, RDFS_LABEL, _literal("Focus")),
|
||||
_triple(focus_uri, PROV_WAS_DERIVED_FROM, _iri(exploration_uri)),
|
||||
]
|
||||
|
||||
# Add each selected edge with its reasoning via intermediate entity
|
||||
|
|
@ -372,9 +398,9 @@ def selection_triples(
|
|||
# Create intermediate entity for this edge selection
|
||||
edge_sel_uri = edge_selection_uri(session_id, idx)
|
||||
|
||||
# Link selection to edge selection entity
|
||||
# Link focus to edge selection entity
|
||||
triples.append(
|
||||
_triple(selection_uri, TG_SELECTED_EDGE, _iri(edge_sel_uri))
|
||||
_triple(focus_uri, TG_SELECTED_EDGE, _iri(edge_sel_uri))
|
||||
)
|
||||
|
||||
# Attach quoted triple to edge selection entity
|
||||
|
|
@ -392,23 +418,23 @@ def selection_triples(
|
|||
return triples
|
||||
|
||||
|
||||
def answer_triples(
|
||||
answer_uri: str,
|
||||
selection_uri: str,
|
||||
def synthesis_triples(
|
||||
synthesis_uri: str,
|
||||
focus_uri: str,
|
||||
answer_text: str = "",
|
||||
document_id: Optional[str] = None,
|
||||
) -> List[Triple]:
|
||||
"""
|
||||
Build triples for an answer entity (final synthesis text).
|
||||
Build triples for a synthesis entity (final answer text).
|
||||
|
||||
Creates:
|
||||
- Entity declaration for answer
|
||||
- wasDerivedFrom link to selection
|
||||
- Entity declaration for synthesis
|
||||
- wasDerivedFrom link to focus
|
||||
- Either document reference (if document_id provided) or inline content
|
||||
|
||||
Args:
|
||||
answer_uri: URI of the answer entity (from answer_uri)
|
||||
selection_uri: URI of the parent selection entity
|
||||
synthesis_uri: URI of the synthesis entity (from synthesis_uri)
|
||||
focus_uri: URI of the parent focus entity
|
||||
answer_text: The synthesized answer text (used if no document_id)
|
||||
document_id: Optional librarian document ID (preferred over inline content)
|
||||
|
||||
|
|
@ -416,16 +442,16 @@ def answer_triples(
|
|||
List of Triple objects
|
||||
"""
|
||||
triples = [
|
||||
_triple(answer_uri, RDF_TYPE, _iri(PROV_ENTITY)),
|
||||
_triple(answer_uri, RDFS_LABEL, _literal("GraphRAG answer")),
|
||||
_triple(answer_uri, PROV_WAS_DERIVED_FROM, _iri(selection_uri)),
|
||||
_triple(synthesis_uri, RDF_TYPE, _iri(PROV_ENTITY)),
|
||||
_triple(synthesis_uri, RDFS_LABEL, _literal("Synthesis")),
|
||||
_triple(synthesis_uri, PROV_WAS_DERIVED_FROM, _iri(focus_uri)),
|
||||
]
|
||||
|
||||
if document_id:
|
||||
# Store reference to document in librarian (as IRI)
|
||||
triples.append(_triple(answer_uri, TG_DOCUMENT, _iri(document_id)))
|
||||
triples.append(_triple(synthesis_uri, TG_DOCUMENT, _iri(document_id)))
|
||||
elif answer_text:
|
||||
# Fallback: store inline content
|
||||
triples.append(_triple(answer_uri, TG_CONTENT, _literal(answer_text)))
|
||||
triples.append(_triple(synthesis_uri, TG_CONTENT, _literal(answer_text)))
|
||||
|
||||
return triples
|
||||
|
|
|
|||
|
|
@ -65,59 +65,65 @@ def agent_uri(component_name: str) -> str:
|
|||
# Query-time provenance URIs
|
||||
# These URIs use the urn:trustgraph: namespace to distinguish query-time
|
||||
# provenance from extraction-time provenance (which uses https://trustgraph.ai/)
|
||||
#
|
||||
# Terminology:
|
||||
# Question - What was asked, the anchor for everything
|
||||
# Exploration - Casting wide, what do we know about this space
|
||||
# Focus - Closing down, what's actually relevant here
|
||||
# Synthesis - Weaving the relevant pieces into an answer
|
||||
|
||||
def query_session_uri(session_id: str = None) -> str:
|
||||
def question_uri(session_id: str = None) -> str:
|
||||
"""
|
||||
Generate URI for a query session activity.
|
||||
Generate URI for a question activity.
|
||||
|
||||
Args:
|
||||
session_id: Optional UUID string. Auto-generates if not provided.
|
||||
|
||||
Returns:
|
||||
URN in format: urn:trustgraph:session:{uuid}
|
||||
URN in format: urn:trustgraph:question:{uuid}
|
||||
"""
|
||||
if session_id is None:
|
||||
session_id = str(uuid.uuid4())
|
||||
return f"urn:trustgraph:session:{session_id}"
|
||||
return f"urn:trustgraph:question:{session_id}"
|
||||
|
||||
|
||||
def retrieval_uri(session_id: str) -> str:
|
||||
def exploration_uri(session_id: str) -> str:
|
||||
"""
|
||||
Generate URI for a retrieval entity (edges retrieved from subgraph).
|
||||
Generate URI for an exploration entity (edges retrieved from subgraph).
|
||||
|
||||
Args:
|
||||
session_id: The session UUID (same as query_session_uri).
|
||||
session_id: The session UUID (same as question_uri).
|
||||
|
||||
Returns:
|
||||
URN in format: urn:trustgraph:prov:retrieval:{uuid}
|
||||
URN in format: urn:trustgraph:prov:exploration:{uuid}
|
||||
"""
|
||||
return f"urn:trustgraph:prov:retrieval:{session_id}"
|
||||
return f"urn:trustgraph:prov:exploration:{session_id}"
|
||||
|
||||
|
||||
def selection_uri(session_id: str) -> str:
|
||||
def focus_uri(session_id: str) -> str:
|
||||
"""
|
||||
Generate URI for a selection entity (selected edges with reasoning).
|
||||
Generate URI for a focus entity (selected edges with reasoning).
|
||||
|
||||
Args:
|
||||
session_id: The session UUID (same as query_session_uri).
|
||||
session_id: The session UUID (same as question_uri).
|
||||
|
||||
Returns:
|
||||
URN in format: urn:trustgraph:prov:selection:{uuid}
|
||||
URN in format: urn:trustgraph:prov:focus:{uuid}
|
||||
"""
|
||||
return f"urn:trustgraph:prov:selection:{session_id}"
|
||||
return f"urn:trustgraph:prov:focus:{session_id}"
|
||||
|
||||
|
||||
def answer_uri(session_id: str) -> str:
|
||||
def synthesis_uri(session_id: str) -> str:
|
||||
"""
|
||||
Generate URI for an answer entity (final synthesis text).
|
||||
Generate URI for a synthesis entity (final answer text).
|
||||
|
||||
Args:
|
||||
session_id: The session UUID (same as query_session_uri).
|
||||
session_id: The session UUID (same as question_uri).
|
||||
|
||||
Returns:
|
||||
URN in format: urn:trustgraph:prov:answer:{uuid}
|
||||
URN in format: urn:trustgraph:prov:synthesis:{uuid}
|
||||
"""
|
||||
return f"urn:trustgraph:prov:answer:{session_id}"
|
||||
return f"urn:trustgraph:prov:synthesis:{session_id}"
|
||||
|
||||
|
||||
def edge_selection_uri(session_id: str, edge_index: int) -> str:
|
||||
|
|
|
|||
|
|
@ -22,8 +22,8 @@ class GraphRagResponse:
|
|||
error: Error | None = None
|
||||
response: str = ""
|
||||
end_of_stream: bool = False # LLM response stream complete
|
||||
explain_id: str | None = None # Single explain URI (announced as created)
|
||||
explain_collection: str | None = None # Collection where explain was stored
|
||||
explain_id: str | None = None # Single explain URI (announced as created)
|
||||
explain_graph: str | None = None # Named graph where explain was stored (e.g., urn:graph:retrieval)
|
||||
message_type: str = "" # "chunk" or "explain"
|
||||
end_of_session: bool = False # Entire session complete
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue