mirror of
https://github.com/trustgraph-ai/trustgraph.git
synced 2026-04-25 08:26:21 +02:00
Addresses recommendations from the UX developer's agent experience report. Adds provenance predicates, DAG structure changes, error resilience, and a published OWL ontology. Explainability additions: - Tool candidates: tg:toolCandidate on Analysis events lists the tools visible to the LLM for each iteration (names only, descriptions in config) - Termination reason: tg:terminationReason on Conclusion/Synthesis events (final-answer, plan-complete, subagents-complete) - Step counter: tg:stepNumber on iteration events - Pattern decision: new tg:PatternDecision entity in the DAG between session and first iteration, carrying tg:pattern and tg:taskType - Latency: tg:llmDurationMs on Analysis events, tg:toolDurationMs on Observation events - Token counts on events: tg:inToken/tg:outToken/tg:llmModel on Grounding, Focus, Synthesis, and Analysis events - Tool/parse errors: tg:toolError on Observation events with tg:Error mixin type. Parse failures return as error observations instead of crashing the agent, giving it a chance to retry. Envelope unification: - Rename chunk_type to message_type across AgentResponse schema, translator, SDK types, socket clients, CLI, and all tests. Agent and RAG services now both use message_type on the wire. Ontology: - specs/ontology/trustgraph.ttl — OWL vocabulary covering all 26 classes, 7 object properties, and 36+ datatype properties including new predicates. DAG structure tests: - tests/unit/test_provenance/test_dag_structure.py verifies the wasDerivedFrom chain for GraphRAG, DocumentRAG, and all three agent patterns (react, plan, supervisor) including the pattern-decision link.
812 lines
30 KiB
Python
812 lines
30 KiB
Python
"""
|
|
Tests for provenance triple builder functions (extraction-time and query-time).
|
|
"""
|
|
|
|
import pytest
|
|
from unittest.mock import patch
|
|
|
|
from trustgraph.schema import Triple, Term, IRI, LITERAL, TRIPLE
|
|
|
|
from trustgraph.provenance.triples import (
|
|
set_graph,
|
|
document_triples,
|
|
derived_entity_triples,
|
|
subgraph_provenance_triples,
|
|
question_triples,
|
|
grounding_triples,
|
|
exploration_triples,
|
|
focus_triples,
|
|
synthesis_triples,
|
|
docrag_question_triples,
|
|
docrag_exploration_triples,
|
|
docrag_synthesis_triples,
|
|
)
|
|
|
|
from trustgraph.provenance.namespaces import (
|
|
RDF_TYPE, RDFS_LABEL,
|
|
PROV_ENTITY, PROV_ACTIVITY, PROV_AGENT,
|
|
PROV_WAS_DERIVED_FROM, PROV_WAS_GENERATED_BY,
|
|
PROV_USED, PROV_WAS_ASSOCIATED_WITH, PROV_STARTED_AT_TIME,
|
|
DC_TITLE, DC_SOURCE, DC_DATE, DC_CREATOR,
|
|
TG_PAGE_COUNT, TG_MIME_TYPE, TG_PAGE_NUMBER,
|
|
TG_CHUNK_INDEX, TG_CHAR_OFFSET, TG_CHAR_LENGTH,
|
|
TG_CHUNK_SIZE, TG_CHUNK_OVERLAP, TG_COMPONENT_VERSION,
|
|
TG_LLM_MODEL, TG_ONTOLOGY, TG_CONTAINS,
|
|
TG_DOCUMENT_TYPE, TG_PAGE_TYPE, TG_CHUNK_TYPE, TG_SUBGRAPH_TYPE,
|
|
TG_QUERY, TG_CONCEPT, TG_ENTITY,
|
|
TG_EDGE_COUNT, TG_SELECTED_EDGE, TG_EDGE, TG_REASONING,
|
|
TG_DOCUMENT,
|
|
TG_CHUNK_COUNT, TG_SELECTED_CHUNK,
|
|
TG_QUESTION, TG_GROUNDING, TG_EXPLORATION, TG_FOCUS, TG_SYNTHESIS,
|
|
TG_ANSWER_TYPE,
|
|
TG_GRAPH_RAG_QUESTION, TG_DOC_RAG_QUESTION,
|
|
GRAPH_SOURCE, GRAPH_RETRIEVAL,
|
|
)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Helpers
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def find_triple(triples, predicate, subject=None):
|
|
"""Find first triple matching predicate (and optionally subject)."""
|
|
for t in triples:
|
|
if t.p.iri == predicate:
|
|
if subject is None or t.s.iri == subject:
|
|
return t
|
|
return None
|
|
|
|
|
|
def find_triples(triples, predicate, subject=None):
|
|
"""Find all triples matching predicate (and optionally subject)."""
|
|
return [
|
|
t for t in triples
|
|
if t.p.iri == predicate and (subject is None or t.s.iri == subject)
|
|
]
|
|
|
|
|
|
def has_type(triples, subject, rdf_type):
|
|
"""Check if subject has rdf:type rdf_type."""
|
|
for t in triples:
|
|
if (t.s.iri == subject and t.p.iri == RDF_TYPE
|
|
and t.o.type == IRI and t.o.iri == rdf_type):
|
|
return True
|
|
return False
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# set_graph
|
|
# ---------------------------------------------------------------------------
|
|
|
|
class TestSetGraph:
|
|
|
|
def test_sets_graph_on_all_triples(self):
|
|
triples = [
|
|
Triple(
|
|
s=Term(type=IRI, iri="urn:s1"),
|
|
p=Term(type=IRI, iri="urn:p1"),
|
|
o=Term(type=LITERAL, value="v1"),
|
|
),
|
|
Triple(
|
|
s=Term(type=IRI, iri="urn:s2"),
|
|
p=Term(type=IRI, iri="urn:p2"),
|
|
o=Term(type=LITERAL, value="v2"),
|
|
),
|
|
]
|
|
result = set_graph(triples, GRAPH_RETRIEVAL)
|
|
assert len(result) == 2
|
|
for t in result:
|
|
assert t.g == GRAPH_RETRIEVAL
|
|
|
|
def test_does_not_modify_originals(self):
|
|
original = Triple(
|
|
s=Term(type=IRI, iri="urn:s"),
|
|
p=Term(type=IRI, iri="urn:p"),
|
|
o=Term(type=LITERAL, value="v"),
|
|
)
|
|
result = set_graph([original], "urn:graph:test")
|
|
assert original.g is None
|
|
assert result[0].g == "urn:graph:test"
|
|
|
|
def test_empty_list(self):
|
|
result = set_graph([], GRAPH_SOURCE)
|
|
assert result == []
|
|
|
|
def test_preserves_spo(self):
|
|
original = Triple(
|
|
s=Term(type=IRI, iri="urn:s"),
|
|
p=Term(type=IRI, iri="urn:p"),
|
|
o=Term(type=LITERAL, value="hello"),
|
|
)
|
|
result = set_graph([original], "urn:g")[0]
|
|
assert result.s.iri == "urn:s"
|
|
assert result.p.iri == "urn:p"
|
|
assert result.o.value == "hello"
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# document_triples
|
|
# ---------------------------------------------------------------------------
|
|
|
|
class TestDocumentTriples:
|
|
|
|
DOC_URI = "https://example.com/doc/abc"
|
|
|
|
def test_minimal_document(self):
|
|
triples = document_triples(self.DOC_URI)
|
|
assert has_type(triples, self.DOC_URI, PROV_ENTITY)
|
|
assert has_type(triples, self.DOC_URI, TG_DOCUMENT_TYPE)
|
|
assert len(triples) == 2
|
|
|
|
def test_with_title(self):
|
|
triples = document_triples(self.DOC_URI, title="My Doc")
|
|
title_t = find_triple(triples, DC_TITLE)
|
|
assert title_t is not None
|
|
assert title_t.o.value == "My Doc"
|
|
# Title also creates an rdfs:label
|
|
label_t = find_triple(triples, RDFS_LABEL)
|
|
assert label_t is not None
|
|
assert label_t.o.value == "My Doc"
|
|
|
|
def test_with_source(self):
|
|
triples = document_triples(self.DOC_URI, source="https://source.com/f.pdf")
|
|
source_t = find_triple(triples, DC_SOURCE)
|
|
assert source_t is not None
|
|
assert source_t.o.type == IRI
|
|
assert source_t.o.iri == "https://source.com/f.pdf"
|
|
|
|
def test_with_date(self):
|
|
triples = document_triples(self.DOC_URI, date="2024-01-15")
|
|
date_t = find_triple(triples, DC_DATE)
|
|
assert date_t is not None
|
|
assert date_t.o.value == "2024-01-15"
|
|
|
|
def test_with_creator(self):
|
|
triples = document_triples(self.DOC_URI, creator="Alice")
|
|
creator_t = find_triple(triples, DC_CREATOR)
|
|
assert creator_t is not None
|
|
assert creator_t.o.value == "Alice"
|
|
|
|
def test_with_page_count(self):
|
|
triples = document_triples(self.DOC_URI, page_count=42)
|
|
pc_t = find_triple(triples, TG_PAGE_COUNT)
|
|
assert pc_t is not None
|
|
assert pc_t.o.value == "42"
|
|
|
|
def test_with_page_count_zero(self):
|
|
triples = document_triples(self.DOC_URI, page_count=0)
|
|
pc_t = find_triple(triples, TG_PAGE_COUNT)
|
|
assert pc_t is not None
|
|
assert pc_t.o.value == "0"
|
|
|
|
def test_with_mime_type(self):
|
|
triples = document_triples(self.DOC_URI, mime_type="application/pdf")
|
|
mt_t = find_triple(triples, TG_MIME_TYPE)
|
|
assert mt_t is not None
|
|
assert mt_t.o.value == "application/pdf"
|
|
|
|
def test_all_metadata(self):
|
|
triples = document_triples(
|
|
self.DOC_URI,
|
|
title="Test",
|
|
source="https://s.com",
|
|
date="2024-01-01",
|
|
creator="Bob",
|
|
page_count=10,
|
|
mime_type="application/pdf",
|
|
)
|
|
# 2 type triples + title + label + source + date + creator + page_count + mime_type
|
|
assert len(triples) == 9
|
|
|
|
def test_subject_is_doc_uri(self):
|
|
triples = document_triples(self.DOC_URI, title="T")
|
|
for t in triples:
|
|
assert t.s.iri == self.DOC_URI
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# derived_entity_triples
|
|
# ---------------------------------------------------------------------------
|
|
|
|
class TestDerivedEntityTriples:
|
|
|
|
ENTITY_URI = "https://example.com/doc/abc/p1"
|
|
PARENT_URI = "https://example.com/doc/abc"
|
|
|
|
def test_page_entity_has_page_type(self):
|
|
triples = derived_entity_triples(
|
|
self.ENTITY_URI, self.PARENT_URI,
|
|
"pdf-extractor", "1.0",
|
|
page_number=1,
|
|
timestamp="2024-01-01T00:00:00Z",
|
|
)
|
|
assert has_type(triples, self.ENTITY_URI, PROV_ENTITY)
|
|
assert has_type(triples, self.ENTITY_URI, TG_PAGE_TYPE)
|
|
|
|
def test_chunk_entity_has_message_type(self):
|
|
triples = derived_entity_triples(
|
|
self.ENTITY_URI, self.PARENT_URI,
|
|
"chunker", "1.0",
|
|
chunk_index=0,
|
|
timestamp="2024-01-01T00:00:00Z",
|
|
)
|
|
assert has_type(triples, self.ENTITY_URI, TG_CHUNK_TYPE)
|
|
|
|
def test_no_specific_type_without_page_or_chunk(self):
|
|
triples = derived_entity_triples(
|
|
self.ENTITY_URI, self.PARENT_URI,
|
|
"component", "1.0",
|
|
timestamp="2024-01-01T00:00:00Z",
|
|
)
|
|
assert has_type(triples, self.ENTITY_URI, PROV_ENTITY)
|
|
assert not has_type(triples, self.ENTITY_URI, TG_PAGE_TYPE)
|
|
assert not has_type(triples, self.ENTITY_URI, TG_CHUNK_TYPE)
|
|
|
|
def test_was_derived_from_parent(self):
|
|
triples = derived_entity_triples(
|
|
self.ENTITY_URI, self.PARENT_URI,
|
|
"pdf-extractor", "1.0",
|
|
timestamp="2024-01-01T00:00:00Z",
|
|
)
|
|
derived = find_triple(triples, PROV_WAS_DERIVED_FROM, self.ENTITY_URI)
|
|
assert derived is not None
|
|
assert derived.o.iri == self.PARENT_URI
|
|
|
|
def test_activity_created(self):
|
|
triples = derived_entity_triples(
|
|
self.ENTITY_URI, self.PARENT_URI,
|
|
"pdf-extractor", "1.0",
|
|
timestamp="2024-01-01T00:00:00Z",
|
|
)
|
|
# Entity was generated by an activity
|
|
gen = find_triple(triples, PROV_WAS_GENERATED_BY, self.ENTITY_URI)
|
|
assert gen is not None
|
|
act_uri = gen.o.iri
|
|
|
|
# Activity has correct type and metadata
|
|
assert has_type(triples, act_uri, PROV_ACTIVITY)
|
|
|
|
# Activity used the parent
|
|
used = find_triple(triples, PROV_USED, act_uri)
|
|
assert used is not None
|
|
assert used.o.iri == self.PARENT_URI
|
|
|
|
# Activity has component version
|
|
version = find_triple(triples, TG_COMPONENT_VERSION, act_uri)
|
|
assert version is not None
|
|
assert version.o.value == "1.0"
|
|
|
|
def test_agent_created(self):
|
|
triples = derived_entity_triples(
|
|
self.ENTITY_URI, self.PARENT_URI,
|
|
"pdf-extractor", "1.0",
|
|
timestamp="2024-01-01T00:00:00Z",
|
|
)
|
|
# Find the agent URI via wasAssociatedWith
|
|
gen = find_triple(triples, PROV_WAS_GENERATED_BY, self.ENTITY_URI)
|
|
act_uri = gen.o.iri
|
|
assoc = find_triple(triples, PROV_WAS_ASSOCIATED_WITH, act_uri)
|
|
assert assoc is not None
|
|
agt_uri = assoc.o.iri
|
|
|
|
assert has_type(triples, agt_uri, PROV_AGENT)
|
|
label = find_triple(triples, RDFS_LABEL, agt_uri)
|
|
assert label is not None
|
|
assert label.o.value == "pdf-extractor"
|
|
|
|
def test_timestamp_recorded(self):
|
|
triples = derived_entity_triples(
|
|
self.ENTITY_URI, self.PARENT_URI,
|
|
"pdf-extractor", "1.0",
|
|
timestamp="2024-06-15T12:30:00Z",
|
|
)
|
|
ts = find_triple(triples, PROV_STARTED_AT_TIME)
|
|
assert ts is not None
|
|
assert ts.o.value == "2024-06-15T12:30:00Z"
|
|
|
|
def test_default_timestamp_generated(self):
|
|
triples = derived_entity_triples(
|
|
self.ENTITY_URI, self.PARENT_URI,
|
|
"pdf-extractor", "1.0",
|
|
)
|
|
ts = find_triple(triples, PROV_STARTED_AT_TIME)
|
|
assert ts is not None
|
|
assert len(ts.o.value) > 0
|
|
|
|
def test_optional_label(self):
|
|
triples = derived_entity_triples(
|
|
self.ENTITY_URI, self.PARENT_URI,
|
|
"pdf-extractor", "1.0",
|
|
label="Page 1",
|
|
timestamp="2024-01-01T00:00:00Z",
|
|
)
|
|
label = find_triple(triples, RDFS_LABEL, self.ENTITY_URI)
|
|
assert label is not None
|
|
assert label.o.value == "Page 1"
|
|
|
|
def test_page_number_recorded(self):
|
|
triples = derived_entity_triples(
|
|
self.ENTITY_URI, self.PARENT_URI,
|
|
"pdf-extractor", "1.0",
|
|
page_number=3,
|
|
timestamp="2024-01-01T00:00:00Z",
|
|
)
|
|
pn = find_triple(triples, TG_PAGE_NUMBER, self.ENTITY_URI)
|
|
assert pn is not None
|
|
assert pn.o.value == "3"
|
|
|
|
def test_chunk_metadata_recorded(self):
|
|
triples = derived_entity_triples(
|
|
self.ENTITY_URI, self.PARENT_URI,
|
|
"chunker", "2.0",
|
|
chunk_index=5,
|
|
char_offset=1000,
|
|
char_length=500,
|
|
chunk_size=512,
|
|
chunk_overlap=64,
|
|
timestamp="2024-01-01T00:00:00Z",
|
|
)
|
|
ci = find_triple(triples, TG_CHUNK_INDEX, self.ENTITY_URI)
|
|
assert ci is not None and ci.o.value == "5"
|
|
|
|
co = find_triple(triples, TG_CHAR_OFFSET, self.ENTITY_URI)
|
|
assert co is not None and co.o.value == "1000"
|
|
|
|
cl = find_triple(triples, TG_CHAR_LENGTH, self.ENTITY_URI)
|
|
assert cl is not None and cl.o.value == "500"
|
|
|
|
# chunk_size and chunk_overlap are on the activity, not the entity
|
|
cs = find_triple(triples, TG_CHUNK_SIZE)
|
|
assert cs is not None and cs.o.value == "512"
|
|
|
|
ov = find_triple(triples, TG_CHUNK_OVERLAP)
|
|
assert ov is not None and ov.o.value == "64"
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# subgraph_provenance_triples
|
|
# ---------------------------------------------------------------------------
|
|
|
|
class TestSubgraphProvenanceTriples:
|
|
|
|
SG_URI = "https://trustgraph.ai/subgraph/test-sg"
|
|
CHUNK_URI = "https://example.com/doc/abc/p1/c0"
|
|
|
|
def _make_extracted_triple(self, s="urn:e:Alice", p="urn:r:knows", o="urn:e:Bob"):
|
|
return Triple(
|
|
s=Term(type=IRI, iri=s),
|
|
p=Term(type=IRI, iri=p),
|
|
o=Term(type=IRI, iri=o),
|
|
)
|
|
|
|
def test_contains_quoted_triples(self):
|
|
extracted = [self._make_extracted_triple()]
|
|
triples = subgraph_provenance_triples(
|
|
self.SG_URI, extracted, self.CHUNK_URI,
|
|
"kg-extractor", "1.0",
|
|
timestamp="2024-01-01T00:00:00Z",
|
|
)
|
|
contains = find_triples(triples, TG_CONTAINS, self.SG_URI)
|
|
assert len(contains) == 1
|
|
assert contains[0].o.type == TRIPLE
|
|
assert contains[0].o.triple.s.iri == "urn:e:Alice"
|
|
assert contains[0].o.triple.p.iri == "urn:r:knows"
|
|
assert contains[0].o.triple.o.iri == "urn:e:Bob"
|
|
|
|
def test_multiple_extracted_triples(self):
|
|
extracted = [
|
|
self._make_extracted_triple("urn:e:A", "urn:r:x", "urn:e:B"),
|
|
self._make_extracted_triple("urn:e:C", "urn:r:y", "urn:e:D"),
|
|
self._make_extracted_triple("urn:e:E", "urn:r:z", "urn:e:F"),
|
|
]
|
|
triples = subgraph_provenance_triples(
|
|
self.SG_URI, extracted, self.CHUNK_URI,
|
|
"kg-extractor", "1.0",
|
|
timestamp="2024-01-01T00:00:00Z",
|
|
)
|
|
contains = find_triples(triples, TG_CONTAINS, self.SG_URI)
|
|
assert len(contains) == 3
|
|
|
|
def test_empty_extracted_triples(self):
|
|
triples = subgraph_provenance_triples(
|
|
self.SG_URI, [], self.CHUNK_URI,
|
|
"kg-extractor", "1.0",
|
|
timestamp="2024-01-01T00:00:00Z",
|
|
)
|
|
contains = find_triples(triples, TG_CONTAINS, self.SG_URI)
|
|
assert len(contains) == 0
|
|
# Should still have subgraph provenance metadata
|
|
assert has_type(triples, self.SG_URI, TG_SUBGRAPH_TYPE)
|
|
|
|
def test_subgraph_has_correct_types(self):
|
|
triples = subgraph_provenance_triples(
|
|
self.SG_URI, [], self.CHUNK_URI,
|
|
"kg-extractor", "1.0",
|
|
timestamp="2024-01-01T00:00:00Z",
|
|
)
|
|
assert has_type(triples, self.SG_URI, PROV_ENTITY)
|
|
assert has_type(triples, self.SG_URI, TG_SUBGRAPH_TYPE)
|
|
|
|
def test_derived_from_chunk(self):
|
|
triples = subgraph_provenance_triples(
|
|
self.SG_URI, [], self.CHUNK_URI,
|
|
"kg-extractor", "1.0",
|
|
timestamp="2024-01-01T00:00:00Z",
|
|
)
|
|
derived = find_triple(triples, PROV_WAS_DERIVED_FROM, self.SG_URI)
|
|
assert derived is not None
|
|
assert derived.o.iri == self.CHUNK_URI
|
|
|
|
def test_activity_and_agent(self):
|
|
triples = subgraph_provenance_triples(
|
|
self.SG_URI, [], self.CHUNK_URI,
|
|
"kg-extractor", "1.0",
|
|
timestamp="2024-01-01T00:00:00Z",
|
|
)
|
|
gen = find_triple(triples, PROV_WAS_GENERATED_BY, self.SG_URI)
|
|
assert gen is not None
|
|
act_uri = gen.o.iri
|
|
|
|
assert has_type(triples, act_uri, PROV_ACTIVITY)
|
|
|
|
used = find_triple(triples, PROV_USED, act_uri)
|
|
assert used is not None
|
|
assert used.o.iri == self.CHUNK_URI
|
|
|
|
version = find_triple(triples, TG_COMPONENT_VERSION, act_uri)
|
|
assert version is not None
|
|
assert version.o.value == "1.0"
|
|
|
|
def test_optional_llm_model(self):
|
|
triples = subgraph_provenance_triples(
|
|
self.SG_URI, [], self.CHUNK_URI,
|
|
"kg-extractor", "1.0",
|
|
llm_model="claude-3-opus",
|
|
timestamp="2024-01-01T00:00:00Z",
|
|
)
|
|
llm = find_triple(triples, TG_LLM_MODEL)
|
|
assert llm is not None
|
|
assert llm.o.value == "claude-3-opus"
|
|
|
|
def test_no_llm_model_when_omitted(self):
|
|
triples = subgraph_provenance_triples(
|
|
self.SG_URI, [], self.CHUNK_URI,
|
|
"kg-extractor", "1.0",
|
|
timestamp="2024-01-01T00:00:00Z",
|
|
)
|
|
llm = find_triple(triples, TG_LLM_MODEL)
|
|
assert llm is None
|
|
|
|
def test_optional_ontology(self):
|
|
triples = subgraph_provenance_triples(
|
|
self.SG_URI, [], self.CHUNK_URI,
|
|
"kg-extractor", "1.0",
|
|
ontology_uri="https://example.com/ontology/v1",
|
|
timestamp="2024-01-01T00:00:00Z",
|
|
)
|
|
ont = find_triple(triples, TG_ONTOLOGY)
|
|
assert ont is not None
|
|
assert ont.o.type == IRI
|
|
assert ont.o.iri == "https://example.com/ontology/v1"
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# GraphRAG query-time triples
|
|
# ---------------------------------------------------------------------------
|
|
|
|
class TestQuestionTriples:
|
|
|
|
Q_URI = "urn:trustgraph:question:test-session"
|
|
|
|
def test_question_types(self):
|
|
triples = question_triples(self.Q_URI, "What is AI?", "2024-01-01T00:00:00Z")
|
|
assert has_type(triples, self.Q_URI, PROV_ENTITY)
|
|
assert has_type(triples, self.Q_URI, TG_QUESTION)
|
|
assert has_type(triples, self.Q_URI, TG_GRAPH_RAG_QUESTION)
|
|
|
|
def test_question_query_text(self):
|
|
triples = question_triples(self.Q_URI, "What is AI?", "2024-01-01T00:00:00Z")
|
|
query = find_triple(triples, TG_QUERY, self.Q_URI)
|
|
assert query is not None
|
|
assert query.o.value == "What is AI?"
|
|
|
|
def test_question_timestamp(self):
|
|
triples = question_triples(self.Q_URI, "Q", "2024-06-15T10:00:00Z")
|
|
ts = find_triple(triples, PROV_STARTED_AT_TIME, self.Q_URI)
|
|
assert ts is not None
|
|
assert ts.o.value == "2024-06-15T10:00:00Z"
|
|
|
|
def test_question_default_timestamp(self):
|
|
triples = question_triples(self.Q_URI, "Q")
|
|
ts = find_triple(triples, PROV_STARTED_AT_TIME, self.Q_URI)
|
|
assert ts is not None
|
|
assert len(ts.o.value) > 0
|
|
|
|
def test_question_label(self):
|
|
triples = question_triples(self.Q_URI, "Q", "2024-01-01T00:00:00Z")
|
|
label = find_triple(triples, RDFS_LABEL, self.Q_URI)
|
|
assert label is not None
|
|
assert label.o.value == "GraphRAG Question"
|
|
|
|
def test_question_triple_count(self):
|
|
triples = question_triples(self.Q_URI, "Q", "2024-01-01T00:00:00Z")
|
|
assert len(triples) == 6
|
|
|
|
|
|
class TestGroundingTriples:
|
|
|
|
GND_URI = "urn:trustgraph:prov:grounding:test-session"
|
|
Q_URI = "urn:trustgraph:question:test-session"
|
|
|
|
def test_grounding_types(self):
|
|
triples = grounding_triples(self.GND_URI, self.Q_URI, ["AI", "ML"])
|
|
assert has_type(triples, self.GND_URI, PROV_ENTITY)
|
|
assert has_type(triples, self.GND_URI, TG_GROUNDING)
|
|
|
|
def test_grounding_derived_from_question(self):
|
|
triples = grounding_triples(self.GND_URI, self.Q_URI, ["AI"])
|
|
derived = find_triple(triples, PROV_WAS_DERIVED_FROM, self.GND_URI)
|
|
assert derived is not None
|
|
assert derived.o.iri == self.Q_URI
|
|
|
|
def test_grounding_concepts(self):
|
|
triples = grounding_triples(self.GND_URI, self.Q_URI, ["AI", "ML", "robots"])
|
|
concepts = find_triples(triples, TG_CONCEPT, self.GND_URI)
|
|
assert len(concepts) == 3
|
|
values = {t.o.value for t in concepts}
|
|
assert values == {"AI", "ML", "robots"}
|
|
|
|
def test_grounding_empty_concepts(self):
|
|
triples = grounding_triples(self.GND_URI, self.Q_URI, [])
|
|
concepts = find_triples(triples, TG_CONCEPT, self.GND_URI)
|
|
assert len(concepts) == 0
|
|
|
|
def test_grounding_label(self):
|
|
triples = grounding_triples(self.GND_URI, self.Q_URI, [])
|
|
label = find_triple(triples, RDFS_LABEL, self.GND_URI)
|
|
assert label is not None
|
|
assert label.o.value == "Grounding"
|
|
|
|
|
|
class TestExplorationTriples:
|
|
|
|
EXP_URI = "urn:trustgraph:prov:exploration:test-session"
|
|
GND_URI = "urn:trustgraph:prov:grounding:test-session"
|
|
|
|
def test_exploration_types(self):
|
|
triples = exploration_triples(self.EXP_URI, self.GND_URI, 15)
|
|
assert has_type(triples, self.EXP_URI, PROV_ENTITY)
|
|
assert has_type(triples, self.EXP_URI, TG_EXPLORATION)
|
|
|
|
def test_exploration_derived_from_grounding(self):
|
|
triples = exploration_triples(self.EXP_URI, self.GND_URI, 15)
|
|
derived = find_triple(triples, PROV_WAS_DERIVED_FROM, self.EXP_URI)
|
|
assert derived is not None
|
|
assert derived.o.iri == self.GND_URI
|
|
|
|
def test_exploration_edge_count(self):
|
|
triples = exploration_triples(self.EXP_URI, self.GND_URI, 15)
|
|
ec = find_triple(triples, TG_EDGE_COUNT, self.EXP_URI)
|
|
assert ec is not None
|
|
assert ec.o.value == "15"
|
|
|
|
def test_exploration_zero_edges(self):
|
|
triples = exploration_triples(self.EXP_URI, self.GND_URI, 0)
|
|
ec = find_triple(triples, TG_EDGE_COUNT, self.EXP_URI)
|
|
assert ec is not None
|
|
assert ec.o.value == "0"
|
|
|
|
def test_exploration_with_entities(self):
|
|
entities = ["urn:e:machine-learning", "urn:e:neural-networks"]
|
|
triples = exploration_triples(self.EXP_URI, self.GND_URI, 10, entities=entities)
|
|
ent_triples = find_triples(triples, TG_ENTITY, self.EXP_URI)
|
|
assert len(ent_triples) == 2
|
|
|
|
def test_exploration_triple_count(self):
|
|
triples = exploration_triples(self.EXP_URI, self.GND_URI, 10)
|
|
assert len(triples) == 5
|
|
|
|
|
|
class TestFocusTriples:
|
|
|
|
FOC_URI = "urn:trustgraph:prov:focus:test-session"
|
|
EXP_URI = "urn:trustgraph:prov:exploration:test-session"
|
|
SESSION_ID = "test-session"
|
|
|
|
def test_focus_types(self):
|
|
triples = focus_triples(self.FOC_URI, self.EXP_URI, [], self.SESSION_ID)
|
|
assert has_type(triples, self.FOC_URI, PROV_ENTITY)
|
|
assert has_type(triples, self.FOC_URI, TG_FOCUS)
|
|
|
|
def test_focus_derived_from_exploration(self):
|
|
triples = focus_triples(self.FOC_URI, self.EXP_URI, [], self.SESSION_ID)
|
|
derived = find_triple(triples, PROV_WAS_DERIVED_FROM, self.FOC_URI)
|
|
assert derived is not None
|
|
assert derived.o.iri == self.EXP_URI
|
|
|
|
def test_focus_no_edges(self):
|
|
triples = focus_triples(self.FOC_URI, self.EXP_URI, [], self.SESSION_ID)
|
|
selected = find_triples(triples, TG_SELECTED_EDGE)
|
|
assert len(selected) == 0
|
|
|
|
def test_focus_with_edges_and_reasoning(self):
|
|
edges = [
|
|
{
|
|
"edge": ("urn:e:Alice", "urn:r:knows", "urn:e:Bob"),
|
|
"reasoning": "Alice is connected to Bob",
|
|
},
|
|
{
|
|
"edge": ("urn:e:Bob", "urn:r:worksAt", "urn:e:Acme"),
|
|
"reasoning": "Bob works at Acme",
|
|
},
|
|
]
|
|
triples = focus_triples(self.FOC_URI, self.EXP_URI, edges, self.SESSION_ID)
|
|
|
|
# Two selectedEdge links
|
|
selected = find_triples(triples, TG_SELECTED_EDGE, self.FOC_URI)
|
|
assert len(selected) == 2
|
|
|
|
# Each edge selection has a quoted triple
|
|
edge_triples = find_triples(triples, TG_EDGE)
|
|
assert len(edge_triples) == 2
|
|
for et in edge_triples:
|
|
assert et.o.type == TRIPLE
|
|
|
|
# Each edge selection has reasoning
|
|
reasoning_triples = find_triples(triples, TG_REASONING)
|
|
assert len(reasoning_triples) == 2
|
|
|
|
def test_focus_edge_without_reasoning(self):
|
|
edges = [
|
|
{"edge": ("urn:e:A", "urn:r:x", "urn:e:B"), "reasoning": ""},
|
|
]
|
|
triples = focus_triples(self.FOC_URI, self.EXP_URI, edges, self.SESSION_ID)
|
|
reasoning = find_triples(triples, TG_REASONING)
|
|
assert len(reasoning) == 0
|
|
|
|
def test_focus_edge_without_edge_data(self):
|
|
edges = [
|
|
{"edge": None, "reasoning": "some reasoning"},
|
|
]
|
|
triples = focus_triples(self.FOC_URI, self.EXP_URI, edges, self.SESSION_ID)
|
|
selected = find_triples(triples, TG_SELECTED_EDGE)
|
|
assert len(selected) == 0
|
|
|
|
def test_focus_quoted_triple_content(self):
|
|
edges = [
|
|
{
|
|
"edge": ("urn:e:Alice", "urn:r:knows", "urn:e:Bob"),
|
|
"reasoning": "test",
|
|
},
|
|
]
|
|
triples = focus_triples(self.FOC_URI, self.EXP_URI, edges, self.SESSION_ID)
|
|
edge_t = find_triple(triples, TG_EDGE)
|
|
qt = edge_t.o.triple
|
|
assert qt.s.iri == "urn:e:Alice"
|
|
assert qt.p.iri == "urn:r:knows"
|
|
assert qt.o.iri == "urn:e:Bob"
|
|
|
|
|
|
class TestSynthesisTriples:
|
|
|
|
SYN_URI = "urn:trustgraph:prov:synthesis:test-session"
|
|
FOC_URI = "urn:trustgraph:prov:focus:test-session"
|
|
|
|
def test_synthesis_types(self):
|
|
triples = synthesis_triples(self.SYN_URI, self.FOC_URI)
|
|
assert has_type(triples, self.SYN_URI, PROV_ENTITY)
|
|
assert has_type(triples, self.SYN_URI, TG_SYNTHESIS)
|
|
assert has_type(triples, self.SYN_URI, TG_ANSWER_TYPE)
|
|
|
|
def test_synthesis_derived_from_focus(self):
|
|
triples = synthesis_triples(self.SYN_URI, self.FOC_URI)
|
|
derived = find_triple(triples, PROV_WAS_DERIVED_FROM, self.SYN_URI)
|
|
assert derived is not None
|
|
assert derived.o.iri == self.FOC_URI
|
|
|
|
def test_synthesis_with_document_reference(self):
|
|
triples = synthesis_triples(
|
|
self.SYN_URI, self.FOC_URI,
|
|
document_id="urn:trustgraph:question:abc/answer",
|
|
)
|
|
doc = find_triple(triples, TG_DOCUMENT, self.SYN_URI)
|
|
assert doc is not None
|
|
assert doc.o.type == IRI
|
|
assert doc.o.iri == "urn:trustgraph:question:abc/answer"
|
|
|
|
def test_synthesis_no_document(self):
|
|
triples = synthesis_triples(self.SYN_URI, self.FOC_URI)
|
|
doc = find_triple(triples, TG_DOCUMENT, self.SYN_URI)
|
|
assert doc is None
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# DocumentRAG query-time triples
|
|
# ---------------------------------------------------------------------------
|
|
|
|
class TestDocRagQuestionTriples:
|
|
|
|
Q_URI = "urn:trustgraph:docrag:test-session"
|
|
|
|
def test_docrag_question_types(self):
|
|
triples = docrag_question_triples(self.Q_URI, "Find info", "2024-01-01T00:00:00Z")
|
|
assert has_type(triples, self.Q_URI, PROV_ENTITY)
|
|
assert has_type(triples, self.Q_URI, TG_QUESTION)
|
|
assert has_type(triples, self.Q_URI, TG_DOC_RAG_QUESTION)
|
|
|
|
def test_docrag_question_label(self):
|
|
triples = docrag_question_triples(self.Q_URI, "Q", "2024-01-01T00:00:00Z")
|
|
label = find_triple(triples, RDFS_LABEL, self.Q_URI)
|
|
assert label.o.value == "DocumentRAG Question"
|
|
|
|
def test_docrag_question_query_text(self):
|
|
triples = docrag_question_triples(self.Q_URI, "search query", "2024-01-01T00:00:00Z")
|
|
query = find_triple(triples, TG_QUERY, self.Q_URI)
|
|
assert query.o.value == "search query"
|
|
|
|
|
|
class TestDocRagExplorationTriples:
|
|
|
|
EXP_URI = "urn:trustgraph:docrag:test/exploration"
|
|
GND_URI = "urn:trustgraph:docrag:test/grounding"
|
|
|
|
def test_docrag_exploration_types(self):
|
|
triples = docrag_exploration_triples(self.EXP_URI, self.GND_URI, 5)
|
|
assert has_type(triples, self.EXP_URI, PROV_ENTITY)
|
|
assert has_type(triples, self.EXP_URI, TG_EXPLORATION)
|
|
|
|
def test_docrag_exploration_derived_from_grounding(self):
|
|
triples = docrag_exploration_triples(self.EXP_URI, self.GND_URI, 5)
|
|
derived = find_triple(triples, PROV_WAS_DERIVED_FROM, self.EXP_URI)
|
|
assert derived.o.iri == self.GND_URI
|
|
|
|
def test_docrag_exploration_chunk_count(self):
|
|
triples = docrag_exploration_triples(self.EXP_URI, self.GND_URI, 7)
|
|
cc = find_triple(triples, TG_CHUNK_COUNT, self.EXP_URI)
|
|
assert cc.o.value == "7"
|
|
|
|
def test_docrag_exploration_without_chunk_ids(self):
|
|
triples = docrag_exploration_triples(self.EXP_URI, self.GND_URI, 3)
|
|
chunks = find_triples(triples, TG_SELECTED_CHUNK)
|
|
assert len(chunks) == 0
|
|
|
|
def test_docrag_exploration_with_chunk_ids(self):
|
|
chunk_ids = ["urn:chunk:1", "urn:chunk:2", "urn:chunk:3"]
|
|
triples = docrag_exploration_triples(self.EXP_URI, self.GND_URI, 3, chunk_ids)
|
|
chunks = find_triples(triples, TG_SELECTED_CHUNK, self.EXP_URI)
|
|
assert len(chunks) == 3
|
|
chunk_uris = {t.o.iri for t in chunks}
|
|
assert chunk_uris == set(chunk_ids)
|
|
|
|
|
|
class TestDocRagSynthesisTriples:
|
|
|
|
SYN_URI = "urn:trustgraph:docrag:test/synthesis"
|
|
EXP_URI = "urn:trustgraph:docrag:test/exploration"
|
|
|
|
def test_docrag_synthesis_types(self):
|
|
triples = docrag_synthesis_triples(self.SYN_URI, self.EXP_URI)
|
|
assert has_type(triples, self.SYN_URI, PROV_ENTITY)
|
|
assert has_type(triples, self.SYN_URI, TG_SYNTHESIS)
|
|
|
|
def test_docrag_synthesis_derived_from_exploration(self):
|
|
"""DocRAG skips the focus step — synthesis derives from exploration."""
|
|
triples = docrag_synthesis_triples(self.SYN_URI, self.EXP_URI)
|
|
derived = find_triple(triples, PROV_WAS_DERIVED_FROM, self.SYN_URI)
|
|
assert derived.o.iri == self.EXP_URI
|
|
|
|
def test_docrag_synthesis_has_answer_type(self):
|
|
triples = docrag_synthesis_triples(self.SYN_URI, self.EXP_URI)
|
|
assert has_type(triples, self.SYN_URI, TG_ANSWER_TYPE)
|
|
|
|
def test_docrag_synthesis_with_document(self):
|
|
triples = docrag_synthesis_triples(
|
|
self.SYN_URI, self.EXP_URI, document_id="urn:doc:ans"
|
|
)
|
|
doc = find_triple(triples, TG_DOCUMENT, self.SYN_URI)
|
|
assert doc.o.iri == "urn:doc:ans"
|
|
|
|
def test_docrag_synthesis_no_document(self):
|
|
triples = docrag_synthesis_triples(self.SYN_URI, self.EXP_URI)
|
|
doc = find_triple(triples, TG_DOCUMENT, self.SYN_URI)
|
|
assert doc is None
|