test(ontology): harden domain/range validation + add missing tests (#848)

Fixes #826. Addresses all five points the maintainer called out in
the follow-up to #825.

Source change (trustgraph-flow/trustgraph/extract/kg/ontology/extract.py):
- Added `_is_subclass_of(cls, target, ontology_subset, max_depth=100)`
  helper with visited-set cycle detection + a defensive depth cap.
  LLM-generated ontologies may emit cycles (A subclass_of B,
  B subclass_of A); the prior while-loop would infinite-loop on that.
- Replaced both near-identical domain and range subclass walks in
  `is_valid_triple` with a single call to the new helper. Net is
  -20 duplicated lines + 26-line helper.

Tests (tests/unit/test_extract/test_ontology/test_prompt_and_extraction.py):
- test_is_valid_triple_subclass_is_accepted: domain expects Recipe,
  actual type is Cake (subclass), validates.
- test_is_valid_triple_handles_subclass_cycle_without_infinite_loop:
  A subclass_of B, B subclass_of A; call returns False within the
  depth cap rather than hanging.
- test_parse_and_validate_triples_collects_entity_types_from_rdf_type:
  end-to-end path: rdf:type triples build the entity_types dict,
  subsequent domain-check triples validate against it.
- test_is_valid_triple_entity_types_none_default: the None default
  path now has explicit coverage.

156 existing tests in tests/unit/test_extract/test_ontology still pass.
This commit is contained in:
Trevin Chow 2026-04-28 08:33:49 -07:00 committed by Cyber MacGeddon
parent 5e28d3cce0
commit 6302eb8c97
2 changed files with 107 additions and 25 deletions

View file

@ -277,6 +277,60 @@ class TestTripleValidation:
is_invalid = extractor.is_valid_triple(subject, predicate, object_val, sample_ontology_subset, entity_types_invalid)
assert not is_invalid, "Invalid range should be rejected"
def test_is_valid_triple_subclass_is_accepted(self, extractor, sample_ontology_subset):
"""Domain check passes when actual type is a subclass of expected."""
sample_ontology_subset.classes["Cake"] = {
"uri": "http://purl.org/ontology/fo/Cake",
"type": "owl:Class",
"subclass_of": "Recipe",
}
sample_ontology_subset.object_properties["has_ingredient"] = {
"domain": "Recipe",
"range": "Ingredient",
}
result = extractor.is_valid_triple(
subject="cake:lemon-drizzle",
predicate="has_ingredient",
object_val="ingredient:lemon",
ontology_subset=sample_ontology_subset,
entity_types={"cake:lemon-drizzle": "Cake", "ingredient:lemon": "Ingredient"},
)
assert result is True
def test_is_valid_triple_handles_subclass_cycle_without_infinite_loop(self, extractor, sample_ontology_subset):
"""A cycle in subclass_of must return False instead of hanging."""
sample_ontology_subset.classes["A"] = {"subclass_of": "B"}
sample_ontology_subset.classes["B"] = {"subclass_of": "A"}
sample_ontology_subset.object_properties["p"] = {"domain": "Recipe", "range": "Ingredient"}
result = extractor.is_valid_triple(
subject="entity:x",
predicate="p",
object_val="ingredient:y",
ontology_subset=sample_ontology_subset,
entity_types={"entity:x": "A", "ingredient:y": "Ingredient"},
)
assert result is False
def test_is_valid_triple_entity_types_none_default(self, extractor, sample_ontology_subset):
"""entity_types=None should not raise; domain/range checks skip if type unknown."""
sample_ontology_subset.object_properties["has_ingredient"] = {
"domain": "Recipe",
"range": "Ingredient",
}
result = extractor.is_valid_triple(
subject="recipe:x",
predicate="has_ingredient",
object_val="ingredient:y",
ontology_subset=sample_ontology_subset,
)
assert result is True
class TestTripleParsing:
"""Test suite for parsing triples from LLM responses."""
@ -377,6 +431,24 @@ class TestTripleParsing:
assert triple.p.type == IRI, "Predicate should be IRI type"
assert triple.o.type == LITERAL, "Object literal should be LITERAL type"
def test_parse_and_validate_triples_collects_entity_types_from_rdf_type(self, extractor, sample_ontology_subset):
"""entity_types should be built from rdf:type triples in the same batch."""
sample_ontology_subset.object_properties["has_ingredient"] = {
"domain": "Recipe",
"range": "Ingredient",
}
triples_response = [
{"subject": "recipe:cornish-pasty", "predicate": "rdf:type", "object": "Recipe"},
{"subject": "ingredient:beef", "predicate": "rdf:type", "object": "Ingredient"},
{"subject": "recipe:cornish-pasty", "predicate": "has_ingredient", "object": "ingredient:beef"},
]
valid_triples = extractor.parse_and_validate_triples(
triples_response, sample_ontology_subset
)
assert len(valid_triples) == 3
class TestURIExpansionInExtraction:
"""Test suite for URI expansion during triple extraction."""