Changed schema for Value -> Term, majorly breaking change (#622)

* Changed schema for Value -> Term, majorly breaking change

* Following the schema change, Value -> Term into all processing

* Updated Cassandra for g, p, s, o index patterns (7 indexes)

* Reviewed and updated all tests

* Neo4j, Memgraph and FalkorDB remain broken, will look at once settled down
This commit is contained in:
cybermaggedon 2026-01-27 13:48:08 +00:00 committed by GitHub
parent e061f2c633
commit cf0daedefa
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
86 changed files with 2458 additions and 1764 deletions

View file

@ -12,7 +12,7 @@ import json
from unittest.mock import AsyncMock, MagicMock, patch
from trustgraph.extract.kg.agent.extract import Processor as AgentKgExtractor
from trustgraph.schema import Chunk, Triple, Triples, Metadata, Value, Error
from trustgraph.schema import Chunk, Triple, Triples, Metadata, Term, Error, IRI, LITERAL
from trustgraph.schema import EntityContext, EntityContexts, AgentRequest, AgentResponse
from trustgraph.rdf import TRUSTGRAPH_ENTITIES, DEFINITION, RDF_LABEL, SUBJECT_OF
from trustgraph.template.prompt_manager import PromptManager
@ -78,9 +78,9 @@ class TestAgentKgExtractionIntegration:
id="doc123",
metadata=[
Triple(
s=Value(value="doc123", is_uri=True),
p=Value(value="http://example.org/type", is_uri=True),
o=Value(value="document", is_uri=False)
s=Term(type=IRI, iri="doc123"),
p=Term(type=IRI, iri="http://example.org/type"),
o=Term(type=LITERAL, value="document")
)
]
)
@ -178,15 +178,15 @@ class TestAgentKgExtractionIntegration:
assert len(sent_triples.triples) > 0
# Check that we have definition triples
definition_triples = [t for t in sent_triples.triples if t.p.value == DEFINITION]
definition_triples = [t for t in sent_triples.triples if t.p.iri == DEFINITION]
assert len(definition_triples) >= 2 # Should have definitions for ML and Neural Networks
# Check that we have label triples
label_triples = [t for t in sent_triples.triples if t.p.value == RDF_LABEL]
label_triples = [t for t in sent_triples.triples if t.p.iri == RDF_LABEL]
assert len(label_triples) >= 2 # Should have labels for entities
# Check subject-of relationships
subject_of_triples = [t for t in sent_triples.triples if t.p.value == SUBJECT_OF]
subject_of_triples = [t for t in sent_triples.triples if t.p.iri == SUBJECT_OF]
assert len(subject_of_triples) >= 2 # Entities should be linked to document
# Verify entity contexts were emitted
@ -198,7 +198,7 @@ class TestAgentKgExtractionIntegration:
assert len(sent_contexts.entities) >= 2 # Should have contexts for both entities
# Verify entity URIs are properly formed
entity_uris = [ec.entity.value for ec in sent_contexts.entities]
entity_uris = [ec.entity.iri for ec in sent_contexts.entities]
assert f"{TRUSTGRAPH_ENTITIES}Machine%20Learning" in entity_uris
assert f"{TRUSTGRAPH_ENTITIES}Neural%20Networks" in entity_uris
@ -401,7 +401,7 @@ class TestAgentKgExtractionIntegration:
sent_triples = triples_publisher.send.call_args[0][0]
# Check that unicode entity was properly processed
entity_labels = [t for t in sent_triples.triples if t.p.value == RDF_LABEL and t.o.value == "機械学習"]
entity_labels = [t for t in sent_triples.triples if t.p.iri == RDF_LABEL and t.o.value == "機械学習"]
assert len(entity_labels) > 0
@pytest.mark.asyncio

View file

@ -16,7 +16,7 @@ from .cassandra_test_helper import cassandra_container
from trustgraph.direct.cassandra_kg import KnowledgeGraph
from trustgraph.storage.triples.cassandra.write import Processor as StorageProcessor
from trustgraph.query.triples.cassandra.service import Processor as QueryProcessor
from trustgraph.schema import Triple, Value, Metadata, Triples, TriplesQueryRequest
from trustgraph.schema import Triple, Term, Metadata, Triples, TriplesQueryRequest, IRI, LITERAL
@pytest.mark.integration
@ -118,19 +118,19 @@ class TestCassandraIntegration:
metadata=Metadata(user="testuser", collection="testcol"),
triples=[
Triple(
s=Value(value="http://example.org/person1", is_uri=True),
p=Value(value="http://example.org/name", is_uri=True),
o=Value(value="Alice Smith", is_uri=False)
s=Term(type=IRI, iri="http://example.org/person1"),
p=Term(type=IRI, iri="http://example.org/name"),
o=Term(type=LITERAL, value="Alice Smith")
),
Triple(
s=Value(value="http://example.org/person1", is_uri=True),
p=Value(value="http://example.org/age", is_uri=True),
o=Value(value="25", is_uri=False)
s=Term(type=IRI, iri="http://example.org/person1"),
p=Term(type=IRI, iri="http://example.org/age"),
o=Term(type=LITERAL, value="25")
),
Triple(
s=Value(value="http://example.org/person1", is_uri=True),
p=Value(value="http://example.org/department", is_uri=True),
o=Value(value="Engineering", is_uri=False)
s=Term(type=IRI, iri="http://example.org/person1"),
p=Term(type=IRI, iri="http://example.org/department"),
o=Term(type=LITERAL, value="Engineering")
)
]
)
@ -181,19 +181,19 @@ class TestCassandraIntegration:
metadata=Metadata(user="testuser", collection="testcol"),
triples=[
Triple(
s=Value(value="http://example.org/alice", is_uri=True),
p=Value(value="http://example.org/knows", is_uri=True),
o=Value(value="http://example.org/bob", is_uri=True)
s=Term(type=IRI, iri="http://example.org/alice"),
p=Term(type=IRI, iri="http://example.org/knows"),
o=Term(type=IRI, iri="http://example.org/bob")
),
Triple(
s=Value(value="http://example.org/alice", is_uri=True),
p=Value(value="http://example.org/age", is_uri=True),
o=Value(value="30", is_uri=False)
s=Term(type=IRI, iri="http://example.org/alice"),
p=Term(type=IRI, iri="http://example.org/age"),
o=Term(type=LITERAL, value="30")
),
Triple(
s=Value(value="http://example.org/bob", is_uri=True),
p=Value(value="http://example.org/knows", is_uri=True),
o=Value(value="http://example.org/charlie", is_uri=True)
s=Term(type=IRI, iri="http://example.org/bob"),
p=Term(type=IRI, iri="http://example.org/knows"),
o=Term(type=IRI, iri="http://example.org/charlie")
)
]
)
@ -208,7 +208,7 @@ class TestCassandraIntegration:
# Test S query (find all relationships for Alice)
s_query = TriplesQueryRequest(
s=Value(value="http://example.org/alice", is_uri=True),
s=Term(type=IRI, iri="http://example.org/alice"),
p=None, # None for wildcard
o=None, # None for wildcard
limit=10,
@ -218,18 +218,18 @@ class TestCassandraIntegration:
s_results = await query_processor.query_triples(s_query)
print(f"Query processor results: {len(s_results)}")
for result in s_results:
print(f" S={result.s.value}, P={result.p.value}, O={result.o.value}")
print(f" S={result.s.iri}, P={result.p.iri}, O={result.o.iri if result.o.type == IRI else result.o.value}")
assert len(s_results) == 2
s_predicates = [t.p.value for t in s_results]
s_predicates = [t.p.iri for t in s_results]
assert "http://example.org/knows" in s_predicates
assert "http://example.org/age" in s_predicates
print("✓ Subject queries via processor working")
# Test P query (find all "knows" relationships)
p_query = TriplesQueryRequest(
s=None, # None for wildcard
p=Value(value="http://example.org/knows", is_uri=True),
p=Term(type=IRI, iri="http://example.org/knows"),
o=None, # None for wildcard
limit=10,
user="testuser",
@ -238,8 +238,8 @@ class TestCassandraIntegration:
p_results = await query_processor.query_triples(p_query)
print(p_results)
assert len(p_results) == 2 # Alice knows Bob, Bob knows Charlie
p_subjects = [t.s.value for t in p_results]
p_subjects = [t.s.iri for t in p_results]
assert "http://example.org/alice" in p_subjects
assert "http://example.org/bob" in p_subjects
print("✓ Predicate queries via processor working")
@ -262,19 +262,19 @@ class TestCassandraIntegration:
metadata=Metadata(user="concurrent_test", collection="people"),
triples=[
Triple(
s=Value(value=f"http://example.org/{person_id}", is_uri=True),
p=Value(value="http://example.org/name", is_uri=True),
o=Value(value=name, is_uri=False)
s=Term(type=IRI, iri=f"http://example.org/{person_id}"),
p=Term(type=IRI, iri="http://example.org/name"),
o=Term(type=LITERAL, value=name)
),
Triple(
s=Value(value=f"http://example.org/{person_id}", is_uri=True),
p=Value(value="http://example.org/age", is_uri=True),
o=Value(value=str(age), is_uri=False)
s=Term(type=IRI, iri=f"http://example.org/{person_id}"),
p=Term(type=IRI, iri="http://example.org/age"),
o=Term(type=LITERAL, value=str(age))
),
Triple(
s=Value(value=f"http://example.org/{person_id}", is_uri=True),
p=Value(value="http://example.org/department", is_uri=True),
o=Value(value=department, is_uri=False)
s=Term(type=IRI, iri=f"http://example.org/{person_id}"),
p=Term(type=IRI, iri="http://example.org/department"),
o=Term(type=LITERAL, value=department)
)
]
)
@ -333,36 +333,36 @@ class TestCassandraIntegration:
triples=[
# People and their types
Triple(
s=Value(value="http://company.org/alice", is_uri=True),
p=Value(value="http://www.w3.org/1999/02/22-rdf-syntax-ns#type", is_uri=True),
o=Value(value="http://company.org/Employee", is_uri=True)
s=Term(type=IRI, iri="http://company.org/alice"),
p=Term(type=IRI, iri="http://www.w3.org/1999/02/22-rdf-syntax-ns#type"),
o=Term(type=IRI, iri="http://company.org/Employee")
),
Triple(
s=Value(value="http://company.org/bob", is_uri=True),
p=Value(value="http://www.w3.org/1999/02/22-rdf-syntax-ns#type", is_uri=True),
o=Value(value="http://company.org/Employee", is_uri=True)
s=Term(type=IRI, iri="http://company.org/bob"),
p=Term(type=IRI, iri="http://www.w3.org/1999/02/22-rdf-syntax-ns#type"),
o=Term(type=IRI, iri="http://company.org/Employee")
),
# Relationships
Triple(
s=Value(value="http://company.org/alice", is_uri=True),
p=Value(value="http://company.org/reportsTo", is_uri=True),
o=Value(value="http://company.org/bob", is_uri=True)
s=Term(type=IRI, iri="http://company.org/alice"),
p=Term(type=IRI, iri="http://company.org/reportsTo"),
o=Term(type=IRI, iri="http://company.org/bob")
),
Triple(
s=Value(value="http://company.org/alice", is_uri=True),
p=Value(value="http://company.org/worksIn", is_uri=True),
o=Value(value="http://company.org/engineering", is_uri=True)
s=Term(type=IRI, iri="http://company.org/alice"),
p=Term(type=IRI, iri="http://company.org/worksIn"),
o=Term(type=IRI, iri="http://company.org/engineering")
),
# Personal info
Triple(
s=Value(value="http://company.org/alice", is_uri=True),
p=Value(value="http://company.org/fullName", is_uri=True),
o=Value(value="Alice Johnson", is_uri=False)
s=Term(type=IRI, iri="http://company.org/alice"),
p=Term(type=IRI, iri="http://company.org/fullName"),
o=Term(type=LITERAL, value="Alice Johnson")
),
Triple(
s=Value(value="http://company.org/alice", is_uri=True),
p=Value(value="http://company.org/email", is_uri=True),
o=Value(value="alice@company.org", is_uri=False)
s=Term(type=IRI, iri="http://company.org/alice"),
p=Term(type=IRI, iri="http://company.org/email"),
o=Term(type=LITERAL, value="alice@company.org")
),
]
)

View file

@ -51,10 +51,10 @@ class MockWebSocket:
"metadata": {
"id": "test-id",
"metadata": {},
"user": "test-user",
"user": "test-user",
"collection": "test-collection"
},
"triples": [{"s": {"v": "subject", "e": False}, "p": {"v": "predicate", "e": False}, "o": {"v": "object", "e": False}}]
"triples": [{"s": {"t": "l", "v": "subject"}, "p": {"t": "l", "v": "predicate"}, "o": {"t": "l", "v": "object"}}]
}
@ -118,7 +118,7 @@ async def test_import_graceful_shutdown_integration(mock_backend):
"user": "test-user",
"collection": "test-collection"
},
"triples": [{"s": {"v": f"subject-{i}", "e": False}, "p": {"v": "predicate", "e": False}, "o": {"v": f"object-{i}", "e": False}}]
"triples": [{"s": {"t": "l", "v": f"subject-{i}"}, "p": {"t": "l", "v": "predicate"}, "o": {"t": "l", "v": f"object-{i}"}}]
}
messages.append(msg_data)
@ -163,7 +163,7 @@ async def test_export_no_message_loss_integration(mock_backend):
"user": "test-user",
"collection": "test-collection"
},
"triples": [{"s": {"v": f"export-subject-{i}", "e": False}, "p": {"v": "predicate", "e": False}, "o": {"v": f"export-object-{i}", "e": False}}]
"triples": [{"s": {"t": "l", "v": f"export-subject-{i}"}, "p": {"t": "l", "v": "predicate"}, "o": {"t": "l", "v": f"export-object-{i}"}}]
}
# Create Triples object instead of raw dict
from trustgraph.schema import Triples, Metadata
@ -302,7 +302,7 @@ async def test_concurrent_import_export_shutdown():
"user": "test-user",
"collection": "test-collection"
},
"triples": [{"s": {"v": f"concurrent-subject-{i}", "e": False}, "p": {"v": "predicate", "e": False}, "o": {"v": "object", "e": False}}]
"triples": [{"s": {"t": "l", "v": f"concurrent-subject-{i}"}, "p": {"t": "l", "v": "predicate"}, "o": {"t": "l", "v": "object"}}]
}
await import_handler.receive(msg)
@ -359,7 +359,7 @@ async def test_websocket_close_during_message_processing():
"user": "test-user",
"collection": "test-collection"
},
"triples": [{"s": {"v": f"slow-subject-{i}", "e": False}, "p": {"v": "predicate", "e": False}, "o": {"v": "object", "e": False}}]
"triples": [{"s": {"t": "l", "v": f"slow-subject-{i}"}, "p": {"t": "l", "v": "predicate"}, "o": {"t": "l", "v": "object"}}]
}
task = asyncio.create_task(import_handler.receive(msg))
message_tasks.append(task)
@ -423,7 +423,7 @@ async def test_backpressure_during_shutdown():
# Simulate receiving and processing a message
msg_data = {
"metadata": {"id": f"msg-{i}"},
"triples": [{"s": {"v": "subject", "e": False}, "p": {"v": "predicate", "e": False}, "o": {"v": "object", "e": False}}]
"triples": [{"s": {"t": "l", "v": "subject"}, "p": {"t": "l", "v": "predicate"}, "o": {"t": "l", "v": "object"}}]
}
await ws.send_json(msg_data)
# Check if we should stop

View file

@ -15,7 +15,7 @@ from unittest.mock import AsyncMock, MagicMock, patch
from trustgraph.extract.kg.definitions.extract import Processor as DefinitionsProcessor
from trustgraph.extract.kg.relationships.extract import Processor as RelationshipsProcessor
from trustgraph.storage.knowledge.store import Processor as KnowledgeStoreProcessor
from trustgraph.schema import Chunk, Triple, Triples, Metadata, Value, Error
from trustgraph.schema import Chunk, Triple, Triples, Metadata, Term, Error, IRI, LITERAL
from trustgraph.schema import EntityContext, EntityContexts, GraphEmbeddings
from trustgraph.rdf import TRUSTGRAPH_ENTITIES, DEFINITION, RDF_LABEL, SUBJECT_OF
@ -253,24 +253,24 @@ class TestKnowledgeGraphPipelineIntegration:
if s and o:
s_uri = definitions_processor.to_uri(s)
s_value = Value(value=str(s_uri), is_uri=True)
o_value = Value(value=str(o), is_uri=False)
s_term = Term(type=IRI, iri=str(s_uri))
o_term = Term(type=LITERAL, value=str(o))
# Generate triples as the processor would
triples.append(Triple(
s=s_value,
p=Value(value=RDF_LABEL, is_uri=True),
o=Value(value=s, is_uri=False)
s=s_term,
p=Term(type=IRI, iri=RDF_LABEL),
o=Term(type=LITERAL, value=s)
))
triples.append(Triple(
s=s_value,
p=Value(value=DEFINITION, is_uri=True),
o=o_value
s=s_term,
p=Term(type=IRI, iri=DEFINITION),
o=o_term
))
entities.append(EntityContext(
entity=s_value,
entity=s_term,
context=defn["definition"]
))
@ -279,16 +279,16 @@ class TestKnowledgeGraphPipelineIntegration:
assert len(entities) == 3 # 1 entity context per entity
# Verify triple structure
label_triples = [t for t in triples if t.p.value == RDF_LABEL]
definition_triples = [t for t in triples if t.p.value == DEFINITION]
label_triples = [t for t in triples if t.p.iri == RDF_LABEL]
definition_triples = [t for t in triples if t.p.iri == DEFINITION]
assert len(label_triples) == 3
assert len(definition_triples) == 3
# Verify entity contexts
for entity in entities:
assert entity.entity.is_uri is True
assert entity.entity.value.startswith(TRUSTGRAPH_ENTITIES)
assert entity.entity.type == IRI
assert entity.entity.iri.startswith(TRUSTGRAPH_ENTITIES)
assert len(entity.context) > 0
@pytest.mark.asyncio
@ -309,52 +309,52 @@ class TestKnowledgeGraphPipelineIntegration:
s = rel["subject"]
p = rel["predicate"]
o = rel["object"]
if s and p and o:
s_uri = relationships_processor.to_uri(s)
s_value = Value(value=str(s_uri), is_uri=True)
s_term = Term(type=IRI, iri=str(s_uri))
p_uri = relationships_processor.to_uri(p)
p_value = Value(value=str(p_uri), is_uri=True)
p_term = Term(type=IRI, iri=str(p_uri))
if rel["object-entity"]:
o_uri = relationships_processor.to_uri(o)
o_value = Value(value=str(o_uri), is_uri=True)
o_term = Term(type=IRI, iri=str(o_uri))
else:
o_value = Value(value=str(o), is_uri=False)
o_term = Term(type=LITERAL, value=str(o))
# Main relationship triple
triples.append(Triple(s=s_value, p=p_value, o=o_value))
triples.append(Triple(s=s_term, p=p_term, o=o_term))
# Label triples
triples.append(Triple(
s=s_value,
p=Value(value=RDF_LABEL, is_uri=True),
o=Value(value=str(s), is_uri=False)
s=s_term,
p=Term(type=IRI, iri=RDF_LABEL),
o=Term(type=LITERAL, value=str(s))
))
triples.append(Triple(
s=p_value,
p=Value(value=RDF_LABEL, is_uri=True),
o=Value(value=str(p), is_uri=False)
s=p_term,
p=Term(type=IRI, iri=RDF_LABEL),
o=Term(type=LITERAL, value=str(p))
))
if rel["object-entity"]:
triples.append(Triple(
s=o_value,
p=Value(value=RDF_LABEL, is_uri=True),
o=Value(value=str(o), is_uri=False)
s=o_term,
p=Term(type=IRI, iri=RDF_LABEL),
o=Term(type=LITERAL, value=str(o))
))
# Assert
assert len(triples) > 0
# Verify relationship triples exist
relationship_triples = [t for t in triples if t.p.value.endswith("is_subset_of") or t.p.value.endswith("is_used_in")]
relationship_triples = [t for t in triples if t.p.iri.endswith("is_subset_of") or t.p.iri.endswith("is_used_in")]
assert len(relationship_triples) >= 2
# Verify label triples
label_triples = [t for t in triples if t.p.value == RDF_LABEL]
label_triples = [t for t in triples if t.p.iri == RDF_LABEL]
assert len(label_triples) > 0
@pytest.mark.asyncio
@ -374,9 +374,9 @@ class TestKnowledgeGraphPipelineIntegration:
),
triples=[
Triple(
s=Value(value="http://trustgraph.ai/e/machine-learning", is_uri=True),
p=Value(value=DEFINITION, is_uri=True),
o=Value(value="A subset of AI", is_uri=False)
s=Term(type=IRI, iri="http://trustgraph.ai/e/machine-learning"),
p=Term(type=IRI, iri=DEFINITION),
o=Term(type=LITERAL, value="A subset of AI")
)
]
)
@ -602,9 +602,9 @@ class TestKnowledgeGraphPipelineIntegration:
collection="test_collection",
metadata=[
Triple(
s=Value(value="doc:test", is_uri=True),
p=Value(value="dc:title", is_uri=True),
o=Value(value="Test Document", is_uri=False)
s=Term(type=IRI, iri="doc:test"),
p=Term(type=IRI, iri="dc:title"),
o=Term(type=LITERAL, value="Test Document")
)
]
)