mirror of
https://github.com/trustgraph-ai/trustgraph.git
synced 2026-04-25 16:36:21 +02:00
fix: forward explain_triples through RAG clients and agent tool callback - RAG clients and the KnowledgeQueryImpl tool callback were dropping explain_triples from explain events, losing provenance data (including focus edge selections) when graph-rag is invoked via the agent. Tests for provenance and explainability (56 new): - Client-level forwarding of explain_triples - Graph-RAG structural chain (question → grounding → exploration → focus → synthesis) - Graph-RAG integration with mocked subsidiary clients - Document-RAG integration (question → grounding → exploration → synthesis) - Agent-orchestrator all 3 patterns: react, plan-then-execute, supervisor
295 lines
10 KiB
Python
295 lines
10 KiB
Python
"""
|
|
Structural test for the graph-rag provenance chain.
|
|
|
|
Verifies that a complete graph-rag query produces the expected
|
|
provenance chain:
|
|
|
|
question → grounding → exploration → focus → synthesis
|
|
|
|
Each step must:
|
|
- Have the correct rdf:type
|
|
- Link to its predecessor via prov:wasDerivedFrom
|
|
- Carry expected domain-specific data
|
|
"""
|
|
|
|
import pytest
|
|
|
|
from trustgraph.provenance.triples import (
|
|
question_triples,
|
|
grounding_triples,
|
|
exploration_triples,
|
|
focus_triples,
|
|
synthesis_triples,
|
|
)
|
|
from trustgraph.provenance.uris import (
|
|
question_uri,
|
|
grounding_uri,
|
|
exploration_uri,
|
|
focus_uri,
|
|
synthesis_uri,
|
|
)
|
|
from trustgraph.provenance.namespaces import (
|
|
RDF_TYPE, RDFS_LABEL,
|
|
PROV_ENTITY, PROV_WAS_DERIVED_FROM,
|
|
TG_QUESTION, TG_GROUNDING, TG_EXPLORATION, TG_FOCUS, TG_SYNTHESIS,
|
|
TG_GRAPH_RAG_QUESTION, TG_ANSWER_TYPE,
|
|
TG_QUERY, TG_CONCEPT, TG_ENTITY,
|
|
TG_EDGE_COUNT, TG_SELECTED_EDGE, TG_EDGE, TG_REASONING,
|
|
TG_DOCUMENT,
|
|
PROV_STARTED_AT_TIME,
|
|
)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Helpers
|
|
# ---------------------------------------------------------------------------
|
|
|
|
SESSION_ID = "test-session-1234"
|
|
|
|
|
|
def find_triple(triples, predicate, subject=None):
|
|
"""Find first triple matching predicate (and optionally subject)."""
|
|
for t in triples:
|
|
if t.p.iri == predicate:
|
|
if subject is None or t.s.iri == subject:
|
|
return t
|
|
return None
|
|
|
|
|
|
def find_triples(triples, predicate, subject=None):
|
|
"""Find all triples matching predicate (and optionally subject)."""
|
|
return [
|
|
t for t in triples
|
|
if t.p.iri == predicate
|
|
and (subject is None or t.s.iri == subject)
|
|
]
|
|
|
|
|
|
def has_type(triples, subject, rdf_type):
|
|
"""Check if subject has the given rdf:type."""
|
|
return any(
|
|
t.s.iri == subject and t.p.iri == RDF_TYPE and t.o.iri == rdf_type
|
|
for t in triples
|
|
)
|
|
|
|
|
|
def derived_from(triples, subject):
|
|
"""Get the wasDerivedFrom target URI for a subject."""
|
|
t = find_triple(triples, PROV_WAS_DERIVED_FROM, subject)
|
|
return t.o.iri if t else None
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Build the full chain
|
|
# ---------------------------------------------------------------------------
|
|
|
|
@pytest.fixture
|
|
def chain():
|
|
"""Build all provenance triples for a complete graph-rag query."""
|
|
q_uri = question_uri(SESSION_ID)
|
|
gnd_uri = grounding_uri(SESSION_ID)
|
|
exp_uri = exploration_uri(SESSION_ID)
|
|
foc_uri = focus_uri(SESSION_ID)
|
|
syn_uri = synthesis_uri(SESSION_ID)
|
|
|
|
q = question_triples(q_uri, "What is quantum computing?", "2026-01-01T00:00:00Z")
|
|
gnd = grounding_triples(gnd_uri, q_uri, ["quantum", "computing"])
|
|
exp = exploration_triples(
|
|
exp_uri, gnd_uri, edge_count=42,
|
|
entities=["urn:entity:1", "urn:entity:2"],
|
|
)
|
|
foc = focus_triples(
|
|
foc_uri, exp_uri,
|
|
selected_edges_with_reasoning=[
|
|
{
|
|
"edge": (
|
|
"http://example.com/QuantumComputing",
|
|
"http://schema.org/relatedTo",
|
|
"http://example.com/Physics",
|
|
),
|
|
"reasoning": "Directly relevant to the query",
|
|
},
|
|
{
|
|
"edge": (
|
|
"http://example.com/QuantumComputing",
|
|
"http://schema.org/name",
|
|
"Quantum Computing",
|
|
),
|
|
"reasoning": "Provides the entity label",
|
|
},
|
|
],
|
|
session_id=SESSION_ID,
|
|
)
|
|
syn = synthesis_triples(syn_uri, foc_uri, document_id="urn:doc:answer-1")
|
|
|
|
return {
|
|
"uris": {
|
|
"question": q_uri,
|
|
"grounding": gnd_uri,
|
|
"exploration": exp_uri,
|
|
"focus": foc_uri,
|
|
"synthesis": syn_uri,
|
|
},
|
|
"triples": {
|
|
"question": q,
|
|
"grounding": gnd,
|
|
"exploration": exp,
|
|
"focus": foc,
|
|
"synthesis": syn,
|
|
},
|
|
"all": q + gnd + exp + foc + syn,
|
|
}
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Chain structure tests
|
|
# ---------------------------------------------------------------------------
|
|
|
|
class TestGraphRagProvenanceChain:
|
|
"""Verify the full question → grounding → exploration → focus → synthesis chain."""
|
|
|
|
def test_chain_has_five_stages(self, chain):
|
|
"""Each stage should produce at least some triples."""
|
|
for stage in ["question", "grounding", "exploration", "focus", "synthesis"]:
|
|
assert len(chain["triples"][stage]) > 0, f"{stage} produced no triples"
|
|
|
|
def test_derivation_chain(self, chain):
|
|
"""
|
|
The wasDerivedFrom links must form:
|
|
grounding → question, exploration → grounding,
|
|
focus → exploration, synthesis → focus.
|
|
"""
|
|
uris = chain["uris"]
|
|
all_triples = chain["all"]
|
|
|
|
assert derived_from(all_triples, uris["grounding"]) == uris["question"]
|
|
assert derived_from(all_triples, uris["exploration"]) == uris["grounding"]
|
|
assert derived_from(all_triples, uris["focus"]) == uris["exploration"]
|
|
assert derived_from(all_triples, uris["synthesis"]) == uris["focus"]
|
|
|
|
def test_question_has_no_parent(self, chain):
|
|
"""The root question should not derive from anything (no parent_uri)."""
|
|
uris = chain["uris"]
|
|
all_triples = chain["all"]
|
|
assert derived_from(all_triples, uris["question"]) is None
|
|
|
|
def test_question_with_parent(self):
|
|
"""When a parent_uri is given, question should derive from it."""
|
|
q_uri = question_uri("child-session")
|
|
parent = "urn:trustgraph:agent:iteration:parent"
|
|
q = question_triples(q_uri, "sub-query", "2026-01-01T00:00:00Z",
|
|
parent_uri=parent)
|
|
assert derived_from(q, q_uri) == parent
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Type annotation tests
|
|
# ---------------------------------------------------------------------------
|
|
|
|
class TestGraphRagProvenanceTypes:
|
|
"""Each stage must have the correct rdf:type annotations."""
|
|
|
|
def test_question_types(self, chain):
|
|
uris = chain["uris"]
|
|
triples = chain["triples"]["question"]
|
|
assert has_type(triples, uris["question"], PROV_ENTITY)
|
|
assert has_type(triples, uris["question"], TG_GRAPH_RAG_QUESTION)
|
|
|
|
def test_grounding_types(self, chain):
|
|
uris = chain["uris"]
|
|
triples = chain["triples"]["grounding"]
|
|
assert has_type(triples, uris["grounding"], PROV_ENTITY)
|
|
assert has_type(triples, uris["grounding"], TG_GROUNDING)
|
|
|
|
def test_exploration_types(self, chain):
|
|
uris = chain["uris"]
|
|
triples = chain["triples"]["exploration"]
|
|
assert has_type(triples, uris["exploration"], PROV_ENTITY)
|
|
assert has_type(triples, uris["exploration"], TG_EXPLORATION)
|
|
|
|
def test_focus_types(self, chain):
|
|
uris = chain["uris"]
|
|
triples = chain["triples"]["focus"]
|
|
assert has_type(triples, uris["focus"], PROV_ENTITY)
|
|
assert has_type(triples, uris["focus"], TG_FOCUS)
|
|
|
|
def test_synthesis_types(self, chain):
|
|
uris = chain["uris"]
|
|
triples = chain["triples"]["synthesis"]
|
|
assert has_type(triples, uris["synthesis"], PROV_ENTITY)
|
|
assert has_type(triples, uris["synthesis"], TG_SYNTHESIS)
|
|
assert has_type(triples, uris["synthesis"], TG_ANSWER_TYPE)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Domain-specific content tests
|
|
# ---------------------------------------------------------------------------
|
|
|
|
class TestGraphRagProvenanceContent:
|
|
"""Each stage should carry the expected domain data."""
|
|
|
|
def test_question_has_query_text(self, chain):
|
|
uris = chain["uris"]
|
|
t = find_triple(chain["triples"]["question"], TG_QUERY, uris["question"])
|
|
assert t is not None
|
|
assert t.o.value == "What is quantum computing?"
|
|
|
|
def test_question_has_timestamp(self, chain):
|
|
uris = chain["uris"]
|
|
t = find_triple(chain["triples"]["question"], PROV_STARTED_AT_TIME, uris["question"])
|
|
assert t is not None
|
|
assert t.o.value == "2026-01-01T00:00:00Z"
|
|
|
|
def test_grounding_has_concepts(self, chain):
|
|
uris = chain["uris"]
|
|
concepts = find_triples(chain["triples"]["grounding"], TG_CONCEPT, uris["grounding"])
|
|
concept_values = {t.o.value for t in concepts}
|
|
assert concept_values == {"quantum", "computing"}
|
|
|
|
def test_exploration_has_edge_count(self, chain):
|
|
uris = chain["uris"]
|
|
t = find_triple(chain["triples"]["exploration"], TG_EDGE_COUNT, uris["exploration"])
|
|
assert t is not None
|
|
assert t.o.value == "42"
|
|
|
|
def test_exploration_has_entities(self, chain):
|
|
uris = chain["uris"]
|
|
entities = find_triples(chain["triples"]["exploration"], TG_ENTITY, uris["exploration"])
|
|
entity_iris = {t.o.iri for t in entities}
|
|
assert entity_iris == {"urn:entity:1", "urn:entity:2"}
|
|
|
|
def test_focus_has_selected_edges(self, chain):
|
|
uris = chain["uris"]
|
|
edges = find_triples(chain["triples"]["focus"], TG_SELECTED_EDGE, uris["focus"])
|
|
assert len(edges) == 2
|
|
|
|
def test_focus_edges_have_quoted_triples(self, chain):
|
|
"""Each edge selection entity should have a tg:edge with a quoted triple."""
|
|
focus = chain["triples"]["focus"]
|
|
edge_triples = find_triples(focus, TG_EDGE)
|
|
assert len(edge_triples) == 2
|
|
|
|
# Each should have a quoted triple as the object
|
|
for t in edge_triples:
|
|
assert t.o.triple is not None, "tg:edge object should be a quoted triple"
|
|
|
|
def test_focus_edges_have_reasoning(self, chain):
|
|
"""Each edge selection entity should have tg:reasoning."""
|
|
focus = chain["triples"]["focus"]
|
|
reasoning = find_triples(focus, TG_REASONING)
|
|
assert len(reasoning) == 2
|
|
reasoning_texts = {t.o.value for t in reasoning}
|
|
assert "Directly relevant to the query" in reasoning_texts
|
|
assert "Provides the entity label" in reasoning_texts
|
|
|
|
def test_synthesis_has_document_ref(self, chain):
|
|
uris = chain["uris"]
|
|
t = find_triple(chain["triples"]["synthesis"], TG_DOCUMENT, uris["synthesis"])
|
|
assert t is not None
|
|
assert t.o.iri == "urn:doc:answer-1"
|
|
|
|
def test_synthesis_has_labels(self, chain):
|
|
uris = chain["uris"]
|
|
t = find_triple(chain["triples"]["synthesis"], RDFS_LABEL, uris["synthesis"])
|
|
assert t is not None
|
|
assert t.o.value == "Synthesis"
|