""" Structural test for the graph-rag provenance chain. Verifies that a complete graph-rag query produces the expected provenance chain: question → grounding → exploration → focus → synthesis Each step must: - Have the correct rdf:type - Link to its predecessor via prov:wasDerivedFrom - Carry expected domain-specific data """ import pytest from trustgraph.provenance.triples import ( question_triples, grounding_triples, exploration_triples, focus_triples, synthesis_triples, ) from trustgraph.provenance.uris import ( question_uri, grounding_uri, exploration_uri, focus_uri, synthesis_uri, ) from trustgraph.provenance.namespaces import ( RDF_TYPE, RDFS_LABEL, PROV_ENTITY, PROV_WAS_DERIVED_FROM, TG_QUESTION, TG_GROUNDING, TG_EXPLORATION, TG_FOCUS, TG_SYNTHESIS, TG_GRAPH_RAG_QUESTION, TG_ANSWER_TYPE, TG_QUERY, TG_CONCEPT, TG_ENTITY, TG_EDGE_COUNT, TG_SELECTED_EDGE, TG_EDGE, TG_REASONING, TG_DOCUMENT, PROV_STARTED_AT_TIME, ) # --------------------------------------------------------------------------- # Helpers # --------------------------------------------------------------------------- SESSION_ID = "test-session-1234" def find_triple(triples, predicate, subject=None): """Find first triple matching predicate (and optionally subject).""" for t in triples: if t.p.iri == predicate: if subject is None or t.s.iri == subject: return t return None def find_triples(triples, predicate, subject=None): """Find all triples matching predicate (and optionally subject).""" return [ t for t in triples if t.p.iri == predicate and (subject is None or t.s.iri == subject) ] def has_type(triples, subject, rdf_type): """Check if subject has the given rdf:type.""" return any( t.s.iri == subject and t.p.iri == RDF_TYPE and t.o.iri == rdf_type for t in triples ) def derived_from(triples, subject): """Get the wasDerivedFrom target URI for a subject.""" t = find_triple(triples, PROV_WAS_DERIVED_FROM, subject) return t.o.iri if t else None # --------------------------------------------------------------------------- # Build the full chain # --------------------------------------------------------------------------- @pytest.fixture def chain(): """Build all provenance triples for a complete graph-rag query.""" q_uri = question_uri(SESSION_ID) gnd_uri = grounding_uri(SESSION_ID) exp_uri = exploration_uri(SESSION_ID) foc_uri = focus_uri(SESSION_ID) syn_uri = synthesis_uri(SESSION_ID) q = question_triples(q_uri, "What is quantum computing?", "2026-01-01T00:00:00Z") gnd = grounding_triples(gnd_uri, q_uri, ["quantum", "computing"]) exp = exploration_triples( exp_uri, gnd_uri, edge_count=42, entities=["urn:entity:1", "urn:entity:2"], ) foc = focus_triples( foc_uri, exp_uri, selected_edges_with_reasoning=[ { "edge": ( "http://example.com/QuantumComputing", "http://schema.org/relatedTo", "http://example.com/Physics", ), "reasoning": "Directly relevant to the query", }, { "edge": ( "http://example.com/QuantumComputing", "http://schema.org/name", "Quantum Computing", ), "reasoning": "Provides the entity label", }, ], session_id=SESSION_ID, ) syn = synthesis_triples(syn_uri, foc_uri, document_id="urn:doc:answer-1") return { "uris": { "question": q_uri, "grounding": gnd_uri, "exploration": exp_uri, "focus": foc_uri, "synthesis": syn_uri, }, "triples": { "question": q, "grounding": gnd, "exploration": exp, "focus": foc, "synthesis": syn, }, "all": q + gnd + exp + foc + syn, } # --------------------------------------------------------------------------- # Chain structure tests # --------------------------------------------------------------------------- class TestGraphRagProvenanceChain: """Verify the full question → grounding → exploration → focus → synthesis chain.""" def test_chain_has_five_stages(self, chain): """Each stage should produce at least some triples.""" for stage in ["question", "grounding", "exploration", "focus", "synthesis"]: assert len(chain["triples"][stage]) > 0, f"{stage} produced no triples" def test_derivation_chain(self, chain): """ The wasDerivedFrom links must form: grounding → question, exploration → grounding, focus → exploration, synthesis → focus. """ uris = chain["uris"] all_triples = chain["all"] assert derived_from(all_triples, uris["grounding"]) == uris["question"] assert derived_from(all_triples, uris["exploration"]) == uris["grounding"] assert derived_from(all_triples, uris["focus"]) == uris["exploration"] assert derived_from(all_triples, uris["synthesis"]) == uris["focus"] def test_question_has_no_parent(self, chain): """The root question should not derive from anything (no parent_uri).""" uris = chain["uris"] all_triples = chain["all"] assert derived_from(all_triples, uris["question"]) is None def test_question_with_parent(self): """When a parent_uri is given, question should derive from it.""" q_uri = question_uri("child-session") parent = "urn:trustgraph:agent:iteration:parent" q = question_triples(q_uri, "sub-query", "2026-01-01T00:00:00Z", parent_uri=parent) assert derived_from(q, q_uri) == parent # --------------------------------------------------------------------------- # Type annotation tests # --------------------------------------------------------------------------- class TestGraphRagProvenanceTypes: """Each stage must have the correct rdf:type annotations.""" def test_question_types(self, chain): uris = chain["uris"] triples = chain["triples"]["question"] assert has_type(triples, uris["question"], PROV_ENTITY) assert has_type(triples, uris["question"], TG_GRAPH_RAG_QUESTION) def test_grounding_types(self, chain): uris = chain["uris"] triples = chain["triples"]["grounding"] assert has_type(triples, uris["grounding"], PROV_ENTITY) assert has_type(triples, uris["grounding"], TG_GROUNDING) def test_exploration_types(self, chain): uris = chain["uris"] triples = chain["triples"]["exploration"] assert has_type(triples, uris["exploration"], PROV_ENTITY) assert has_type(triples, uris["exploration"], TG_EXPLORATION) def test_focus_types(self, chain): uris = chain["uris"] triples = chain["triples"]["focus"] assert has_type(triples, uris["focus"], PROV_ENTITY) assert has_type(triples, uris["focus"], TG_FOCUS) def test_synthesis_types(self, chain): uris = chain["uris"] triples = chain["triples"]["synthesis"] assert has_type(triples, uris["synthesis"], PROV_ENTITY) assert has_type(triples, uris["synthesis"], TG_SYNTHESIS) assert has_type(triples, uris["synthesis"], TG_ANSWER_TYPE) # --------------------------------------------------------------------------- # Domain-specific content tests # --------------------------------------------------------------------------- class TestGraphRagProvenanceContent: """Each stage should carry the expected domain data.""" def test_question_has_query_text(self, chain): uris = chain["uris"] t = find_triple(chain["triples"]["question"], TG_QUERY, uris["question"]) assert t is not None assert t.o.value == "What is quantum computing?" def test_question_has_timestamp(self, chain): uris = chain["uris"] t = find_triple(chain["triples"]["question"], PROV_STARTED_AT_TIME, uris["question"]) assert t is not None assert t.o.value == "2026-01-01T00:00:00Z" def test_grounding_has_concepts(self, chain): uris = chain["uris"] concepts = find_triples(chain["triples"]["grounding"], TG_CONCEPT, uris["grounding"]) concept_values = {t.o.value for t in concepts} assert concept_values == {"quantum", "computing"} def test_exploration_has_edge_count(self, chain): uris = chain["uris"] t = find_triple(chain["triples"]["exploration"], TG_EDGE_COUNT, uris["exploration"]) assert t is not None assert t.o.value == "42" def test_exploration_has_entities(self, chain): uris = chain["uris"] entities = find_triples(chain["triples"]["exploration"], TG_ENTITY, uris["exploration"]) entity_iris = {t.o.iri for t in entities} assert entity_iris == {"urn:entity:1", "urn:entity:2"} def test_focus_has_selected_edges(self, chain): uris = chain["uris"] edges = find_triples(chain["triples"]["focus"], TG_SELECTED_EDGE, uris["focus"]) assert len(edges) == 2 def test_focus_edges_have_quoted_triples(self, chain): """Each edge selection entity should have a tg:edge with a quoted triple.""" focus = chain["triples"]["focus"] edge_triples = find_triples(focus, TG_EDGE) assert len(edge_triples) == 2 # Each should have a quoted triple as the object for t in edge_triples: assert t.o.triple is not None, "tg:edge object should be a quoted triple" def test_focus_edges_have_reasoning(self, chain): """Each edge selection entity should have tg:reasoning.""" focus = chain["triples"]["focus"] reasoning = find_triples(focus, TG_REASONING) assert len(reasoning) == 2 reasoning_texts = {t.o.value for t in reasoning} assert "Directly relevant to the query" in reasoning_texts assert "Provides the entity label" in reasoning_texts def test_synthesis_has_document_ref(self, chain): uris = chain["uris"] t = find_triple(chain["triples"]["synthesis"], TG_DOCUMENT, uris["synthesis"]) assert t is not None assert t.o.iri == "urn:doc:answer-1" def test_synthesis_has_labels(self, chain): uris = chain["uris"] t = find_triple(chain["triples"]["synthesis"], RDFS_LABEL, uris["synthesis"]) assert t is not None assert t.o.value == "Synthesis"