trustgraph/tests/unit/test_provenance/test_vocabulary.py
cybermaggedon 29b4300808
Updated test suite for explainability & provenance (#696)
* Provenance tests

* Embeddings tests

* Test librarian

* Test triples stream

* Test concurrency

* Entity centric graph writes

* Agent tool service tests

* Structured data tests

* RDF tests

* Addition LLM tests

* Reliability tests
2026-03-13 14:27:42 +00:00

124 lines
4 KiB
Python

"""
Tests for provenance vocabulary bootstrap.
"""
import pytest
from trustgraph.schema import Triple, Term, IRI, LITERAL
from trustgraph.provenance.vocabulary import (
get_vocabulary_triples,
PROV_CLASS_LABELS,
PROV_PREDICATE_LABELS,
DC_PREDICATE_LABELS,
SCHEMA_LABELS,
SKOS_LABELS,
TG_CLASS_LABELS,
TG_PREDICATE_LABELS,
)
from trustgraph.provenance.namespaces import (
RDFS_LABEL,
PROV_ENTITY, PROV_ACTIVITY, PROV_AGENT,
PROV_WAS_DERIVED_FROM, PROV_WAS_GENERATED_BY,
PROV_USED, PROV_WAS_ASSOCIATED_WITH, PROV_STARTED_AT_TIME,
DC_TITLE, DC_SOURCE, DC_DATE, DC_CREATOR,
TG_DOCUMENT_TYPE, TG_PAGE_TYPE, TG_CHUNK_TYPE, TG_SUBGRAPH_TYPE,
)
class TestVocabularyTriples:
"""Tests for the vocabulary bootstrap function."""
def test_returns_list_of_triples(self):
result = get_vocabulary_triples()
assert isinstance(result, list)
assert len(result) > 0
for t in result:
assert isinstance(t, Triple)
def test_all_triples_are_label_triples(self):
"""Every vocabulary triple should use rdfs:label as predicate."""
for t in get_vocabulary_triples():
assert t.p.type == IRI
assert t.p.iri == RDFS_LABEL
def test_all_subjects_are_iris(self):
for t in get_vocabulary_triples():
assert t.s.type == IRI
assert len(t.s.iri) > 0
def test_all_objects_are_literals(self):
for t in get_vocabulary_triples():
assert t.o.type == LITERAL
assert len(t.o.value) > 0
def test_no_duplicate_subjects(self):
subjects = [t.s.iri for t in get_vocabulary_triples()]
assert len(subjects) == len(set(subjects))
def test_includes_prov_classes(self):
subjects = {t.s.iri for t in get_vocabulary_triples()}
assert PROV_ENTITY in subjects
assert PROV_ACTIVITY in subjects
assert PROV_AGENT in subjects
def test_includes_prov_predicates(self):
subjects = {t.s.iri for t in get_vocabulary_triples()}
assert PROV_WAS_DERIVED_FROM in subjects
assert PROV_WAS_GENERATED_BY in subjects
assert PROV_USED in subjects
assert PROV_WAS_ASSOCIATED_WITH in subjects
assert PROV_STARTED_AT_TIME in subjects
def test_includes_dc_predicates(self):
subjects = {t.s.iri for t in get_vocabulary_triples()}
assert DC_TITLE in subjects
assert DC_SOURCE in subjects
assert DC_DATE in subjects
assert DC_CREATOR in subjects
def test_includes_tg_classes(self):
subjects = {t.s.iri for t in get_vocabulary_triples()}
assert TG_DOCUMENT_TYPE in subjects
assert TG_PAGE_TYPE in subjects
assert TG_CHUNK_TYPE in subjects
assert TG_SUBGRAPH_TYPE in subjects
def test_component_lists_sum_to_total(self):
total = get_vocabulary_triples()
components = (
PROV_CLASS_LABELS +
PROV_PREDICATE_LABELS +
DC_PREDICATE_LABELS +
SCHEMA_LABELS +
SKOS_LABELS +
TG_CLASS_LABELS +
TG_PREDICATE_LABELS
)
assert len(total) == len(components)
def test_idempotent(self):
"""Calling twice should return equivalent triples."""
r1 = get_vocabulary_triples()
r2 = get_vocabulary_triples()
assert len(r1) == len(r2)
for t1, t2 in zip(r1, r2):
assert t1.s.iri == t2.s.iri
assert t1.o.value == t2.o.value
class TestNamespaceConstants:
"""Verify namespace constants are well-formed IRIs."""
def test_prov_namespace_prefix(self):
assert PROV_ENTITY.startswith("http://www.w3.org/ns/prov#")
def test_dc_namespace_prefix(self):
assert DC_TITLE.startswith("http://purl.org/dc/elements/1.1/")
def test_tg_namespace_prefix(self):
assert TG_DOCUMENT_TYPE.startswith("https://trustgraph.ai/ns/")
def test_rdfs_label_iri(self):
assert RDFS_LABEL == "http://www.w3.org/2000/01/rdf-schema#label"