Entity-centric graph (#633)

* Tech spec for new entity-centric graph schema

* Graph implementation
This commit is contained in:
cybermaggedon 2026-02-16 13:26:43 +00:00 committed by GitHub
parent f24f1ebd80
commit 00c1ca681b
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
8 changed files with 1858 additions and 225 deletions

View file

@ -70,25 +70,29 @@ class TestCassandraQueryProcessor:
assert result.type == LITERAL
@pytest.mark.asyncio
@patch('trustgraph.query.triples.cassandra.service.KnowledgeGraph')
async def test_query_triples_spo_query(self, mock_trustgraph):
@patch('trustgraph.query.triples.cassandra.service.EntityCentricKnowledgeGraph')
async def test_query_triples_spo_query(self, mock_kg_class):
"""Test querying triples with subject, predicate, and object specified"""
from trustgraph.schema import TriplesQueryRequest, Term, IRI, LITERAL
# Setup mock TrustGraph
# Setup mock TrustGraph via factory function
mock_tg_instance = MagicMock()
mock_trustgraph.return_value = mock_tg_instance
mock_kg_class.return_value = mock_tg_instance
# SPO query returns a list of results (with mock graph attribute)
mock_result = MagicMock()
mock_result.g = None
mock_result.g = ''
mock_result.otype = None
mock_result.dtype = None
mock_result.lang = None
mock_result.o = 'test_object'
mock_tg_instance.get_spo.return_value = [mock_result]
processor = Processor(
taskgroup=MagicMock(),
id='test-cassandra-query',
cassandra_host='localhost'
)
# Create query request with all SPO values
query = TriplesQueryRequest(
user='test_user',
@ -98,20 +102,20 @@ class TestCassandraQueryProcessor:
o=Term(type=LITERAL, value='test_object'),
limit=100
)
result = await processor.query_triples(query)
# Verify KnowledgeGraph was created with correct parameters
mock_trustgraph.assert_called_once_with(
mock_kg_class.assert_called_once_with(
hosts=['localhost'],
keyspace='test_user'
)
# Verify get_spo was called with correct parameters
mock_tg_instance.get_spo.assert_called_once_with(
'test_collection', 'test_subject', 'test_predicate', 'test_object', g=None, limit=100
)
# Verify result contains the queried triple
assert len(result) == 1
assert result[0].s.value == 'test_subject'
@ -146,21 +150,25 @@ class TestCassandraQueryProcessor:
assert processor.table is None
@pytest.mark.asyncio
@patch('trustgraph.query.triples.cassandra.service.KnowledgeGraph')
async def test_query_triples_sp_pattern(self, mock_trustgraph):
@patch('trustgraph.query.triples.cassandra.service.EntityCentricKnowledgeGraph')
async def test_query_triples_sp_pattern(self, mock_kg_class):
"""Test SP query pattern (subject and predicate, no object)"""
from trustgraph.schema import TriplesQueryRequest, Term, IRI, LITERAL
# Setup mock TrustGraph and response
# Setup mock TrustGraph via factory function
mock_tg_instance = MagicMock()
mock_trustgraph.return_value = mock_tg_instance
mock_kg_class.return_value = mock_tg_instance
mock_result = MagicMock()
mock_result.o = 'result_object'
mock_result.g = ''
mock_result.otype = None
mock_result.dtype = None
mock_result.lang = None
mock_tg_instance.get_sp.return_value = [mock_result]
processor = Processor(taskgroup=MagicMock())
query = TriplesQueryRequest(
user='test_user',
collection='test_collection',
@ -169,9 +177,9 @@ class TestCassandraQueryProcessor:
o=None,
limit=50
)
result = await processor.query_triples(query)
mock_tg_instance.get_sp.assert_called_once_with('test_collection', 'test_subject', 'test_predicate', g=None, limit=50)
assert len(result) == 1
assert result[0].s.value == 'test_subject'
@ -179,21 +187,25 @@ class TestCassandraQueryProcessor:
assert result[0].o.value == 'result_object'
@pytest.mark.asyncio
@patch('trustgraph.query.triples.cassandra.service.KnowledgeGraph')
async def test_query_triples_s_pattern(self, mock_trustgraph):
@patch('trustgraph.query.triples.cassandra.service.EntityCentricKnowledgeGraph')
async def test_query_triples_s_pattern(self, mock_kg_class):
"""Test S query pattern (subject only)"""
from trustgraph.schema import TriplesQueryRequest, Term, IRI, LITERAL
mock_tg_instance = MagicMock()
mock_trustgraph.return_value = mock_tg_instance
mock_kg_class.return_value = mock_tg_instance
mock_result = MagicMock()
mock_result.p = 'result_predicate'
mock_result.o = 'result_object'
mock_result.g = ''
mock_result.otype = None
mock_result.dtype = None
mock_result.lang = None
mock_tg_instance.get_s.return_value = [mock_result]
processor = Processor(taskgroup=MagicMock())
query = TriplesQueryRequest(
user='test_user',
collection='test_collection',
@ -202,9 +214,9 @@ class TestCassandraQueryProcessor:
o=None,
limit=25
)
result = await processor.query_triples(query)
mock_tg_instance.get_s.assert_called_once_with('test_collection', 'test_subject', g=None, limit=25)
assert len(result) == 1
assert result[0].s.value == 'test_subject'
@ -212,21 +224,25 @@ class TestCassandraQueryProcessor:
assert result[0].o.value == 'result_object'
@pytest.mark.asyncio
@patch('trustgraph.query.triples.cassandra.service.KnowledgeGraph')
async def test_query_triples_p_pattern(self, mock_trustgraph):
@patch('trustgraph.query.triples.cassandra.service.EntityCentricKnowledgeGraph')
async def test_query_triples_p_pattern(self, mock_kg_class):
"""Test P query pattern (predicate only)"""
from trustgraph.schema import TriplesQueryRequest, Term, IRI, LITERAL
mock_tg_instance = MagicMock()
mock_trustgraph.return_value = mock_tg_instance
mock_kg_class.return_value = mock_tg_instance
mock_result = MagicMock()
mock_result.s = 'result_subject'
mock_result.o = 'result_object'
mock_result.g = ''
mock_result.otype = None
mock_result.dtype = None
mock_result.lang = None
mock_tg_instance.get_p.return_value = [mock_result]
processor = Processor(taskgroup=MagicMock())
query = TriplesQueryRequest(
user='test_user',
collection='test_collection',
@ -235,9 +251,9 @@ class TestCassandraQueryProcessor:
o=None,
limit=10
)
result = await processor.query_triples(query)
mock_tg_instance.get_p.assert_called_once_with('test_collection', 'test_predicate', g=None, limit=10)
assert len(result) == 1
assert result[0].s.value == 'result_subject'
@ -245,21 +261,25 @@ class TestCassandraQueryProcessor:
assert result[0].o.value == 'result_object'
@pytest.mark.asyncio
@patch('trustgraph.query.triples.cassandra.service.KnowledgeGraph')
async def test_query_triples_o_pattern(self, mock_trustgraph):
@patch('trustgraph.query.triples.cassandra.service.EntityCentricKnowledgeGraph')
async def test_query_triples_o_pattern(self, mock_kg_class):
"""Test O query pattern (object only)"""
from trustgraph.schema import TriplesQueryRequest, Term, IRI, LITERAL
mock_tg_instance = MagicMock()
mock_trustgraph.return_value = mock_tg_instance
mock_kg_class.return_value = mock_tg_instance
mock_result = MagicMock()
mock_result.s = 'result_subject'
mock_result.p = 'result_predicate'
mock_result.g = ''
mock_result.otype = None
mock_result.dtype = None
mock_result.lang = None
mock_tg_instance.get_o.return_value = [mock_result]
processor = Processor(taskgroup=MagicMock())
query = TriplesQueryRequest(
user='test_user',
collection='test_collection',
@ -268,9 +288,9 @@ class TestCassandraQueryProcessor:
o=Term(type=LITERAL, value='test_object'),
limit=75
)
result = await processor.query_triples(query)
mock_tg_instance.get_o.assert_called_once_with('test_collection', 'test_object', g=None, limit=75)
assert len(result) == 1
assert result[0].s.value == 'result_subject'
@ -278,22 +298,26 @@ class TestCassandraQueryProcessor:
assert result[0].o.value == 'test_object'
@pytest.mark.asyncio
@patch('trustgraph.query.triples.cassandra.service.KnowledgeGraph')
async def test_query_triples_get_all_pattern(self, mock_trustgraph):
@patch('trustgraph.query.triples.cassandra.service.EntityCentricKnowledgeGraph')
async def test_query_triples_get_all_pattern(self, mock_kg_class):
"""Test query pattern with no constraints (get all)"""
from trustgraph.schema import TriplesQueryRequest
mock_tg_instance = MagicMock()
mock_trustgraph.return_value = mock_tg_instance
mock_kg_class.return_value = mock_tg_instance
mock_result = MagicMock()
mock_result.s = 'all_subject'
mock_result.p = 'all_predicate'
mock_result.o = 'all_object'
mock_result.g = ''
mock_result.otype = None
mock_result.dtype = None
mock_result.lang = None
mock_tg_instance.get_all.return_value = [mock_result]
processor = Processor(taskgroup=MagicMock())
query = TriplesQueryRequest(
user='test_user',
collection='test_collection',
@ -302,9 +326,9 @@ class TestCassandraQueryProcessor:
o=None,
limit=1000
)
result = await processor.query_triples(query)
mock_tg_instance.get_all.assert_called_once_with('test_collection', limit=1000)
assert len(result) == 1
assert result[0].s.value == 'all_subject'
@ -378,16 +402,20 @@ class TestCassandraQueryProcessor:
mock_launch.assert_called_once_with(default_ident, '\nTriples query service. Input is a (s, p, o, g) quad pattern, some values may be\nnull. Output is a list of quads.\n')
@pytest.mark.asyncio
@patch('trustgraph.query.triples.cassandra.service.KnowledgeGraph')
async def test_query_triples_with_authentication(self, mock_trustgraph):
@patch('trustgraph.query.triples.cassandra.service.EntityCentricKnowledgeGraph')
async def test_query_triples_with_authentication(self, mock_kg_class):
"""Test querying with username and password authentication"""
from trustgraph.schema import TriplesQueryRequest, Term, IRI, LITERAL
mock_tg_instance = MagicMock()
mock_trustgraph.return_value = mock_tg_instance
mock_kg_class.return_value = mock_tg_instance
# SPO query returns a list of results
mock_result = MagicMock()
mock_result.g = None
mock_result.g = ''
mock_result.otype = None
mock_result.dtype = None
mock_result.lang = None
mock_result.o = 'test_object'
mock_tg_instance.get_spo.return_value = [mock_result]
processor = Processor(
@ -395,7 +423,7 @@ class TestCassandraQueryProcessor:
cassandra_username='authuser',
cassandra_password='authpass'
)
query = TriplesQueryRequest(
user='test_user',
collection='test_collection',
@ -404,11 +432,11 @@ class TestCassandraQueryProcessor:
o=Term(type=LITERAL, value='test_object'),
limit=100
)
await processor.query_triples(query)
# Verify KnowledgeGraph was created with authentication
mock_trustgraph.assert_called_once_with(
mock_kg_class.assert_called_once_with(
hosts=['cassandra'], # Updated default
keyspace='test_user',
username='authuser',
@ -416,16 +444,20 @@ class TestCassandraQueryProcessor:
)
@pytest.mark.asyncio
@patch('trustgraph.query.triples.cassandra.service.KnowledgeGraph')
async def test_query_triples_table_reuse(self, mock_trustgraph):
@patch('trustgraph.query.triples.cassandra.service.EntityCentricKnowledgeGraph')
async def test_query_triples_table_reuse(self, mock_kg_class):
"""Test that TrustGraph is reused for same table"""
from trustgraph.schema import TriplesQueryRequest, Term, IRI, LITERAL
mock_tg_instance = MagicMock()
mock_trustgraph.return_value = mock_tg_instance
mock_kg_class.return_value = mock_tg_instance
# SPO query returns a list of results
mock_result = MagicMock()
mock_result.g = None
mock_result.g = ''
mock_result.otype = None
mock_result.dtype = None
mock_result.lang = None
mock_result.o = 'test_object'
mock_tg_instance.get_spo.return_value = [mock_result]
processor = Processor(taskgroup=MagicMock())
@ -441,24 +473,35 @@ class TestCassandraQueryProcessor:
# First query should create TrustGraph
await processor.query_triples(query)
assert mock_trustgraph.call_count == 1
assert mock_kg_class.call_count == 1
# Second query with same table should reuse TrustGraph
await processor.query_triples(query)
assert mock_trustgraph.call_count == 1 # Should not increase
assert mock_kg_class.call_count == 1 # Should not increase
@pytest.mark.asyncio
@patch('trustgraph.query.triples.cassandra.service.KnowledgeGraph')
async def test_query_triples_table_switching(self, mock_trustgraph):
@patch('trustgraph.query.triples.cassandra.service.EntityCentricKnowledgeGraph')
async def test_query_triples_table_switching(self, mock_kg_class):
"""Test table switching creates new TrustGraph"""
from trustgraph.schema import TriplesQueryRequest, Term, IRI, LITERAL
mock_tg_instance1 = MagicMock()
mock_tg_instance2 = MagicMock()
mock_trustgraph.side_effect = [mock_tg_instance1, mock_tg_instance2]
mock_kg_class.side_effect = [mock_tg_instance1, mock_tg_instance2]
# Setup mock results for both instances
mock_result = MagicMock()
mock_result.g = ''
mock_result.otype = None
mock_result.dtype = None
mock_result.lang = None
mock_result.p = 'p'
mock_result.o = 'o'
mock_tg_instance1.get_s.return_value = [mock_result]
mock_tg_instance2.get_s.return_value = [mock_result]
processor = Processor(taskgroup=MagicMock())
# First query
query1 = TriplesQueryRequest(
user='user1',
@ -468,10 +511,10 @@ class TestCassandraQueryProcessor:
o=None,
limit=100
)
await processor.query_triples(query1)
assert processor.table == 'user1'
# Second query with different table
query2 = TriplesQueryRequest(
user='user2',
@ -481,25 +524,25 @@ class TestCassandraQueryProcessor:
o=None,
limit=100
)
await processor.query_triples(query2)
assert processor.table == 'user2'
# Verify TrustGraph was created twice
assert mock_trustgraph.call_count == 2
assert mock_kg_class.call_count == 2
@pytest.mark.asyncio
@patch('trustgraph.query.triples.cassandra.service.KnowledgeGraph')
async def test_query_triples_exception_handling(self, mock_trustgraph):
@patch('trustgraph.query.triples.cassandra.service.EntityCentricKnowledgeGraph')
async def test_query_triples_exception_handling(self, mock_kg_class):
"""Test exception handling during query execution"""
from trustgraph.schema import TriplesQueryRequest, Term, IRI, LITERAL
mock_tg_instance = MagicMock()
mock_trustgraph.return_value = mock_tg_instance
mock_kg_class.return_value = mock_tg_instance
mock_tg_instance.get_spo.side_effect = Exception("Query failed")
processor = Processor(taskgroup=MagicMock())
query = TriplesQueryRequest(
user='test_user',
collection='test_collection',
@ -508,28 +551,36 @@ class TestCassandraQueryProcessor:
o=Term(type=LITERAL, value='test_object'),
limit=100
)
with pytest.raises(Exception, match="Query failed"):
await processor.query_triples(query)
@pytest.mark.asyncio
@patch('trustgraph.query.triples.cassandra.service.KnowledgeGraph')
async def test_query_triples_multiple_results(self, mock_trustgraph):
@patch('trustgraph.query.triples.cassandra.service.EntityCentricKnowledgeGraph')
async def test_query_triples_multiple_results(self, mock_kg_class):
"""Test query returning multiple results"""
from trustgraph.schema import TriplesQueryRequest, Term, IRI, LITERAL
mock_tg_instance = MagicMock()
mock_trustgraph.return_value = mock_tg_instance
mock_kg_class.return_value = mock_tg_instance
# Mock multiple results
mock_result1 = MagicMock()
mock_result1.o = 'object1'
mock_result1.g = ''
mock_result1.otype = None
mock_result1.dtype = None
mock_result1.lang = None
mock_result2 = MagicMock()
mock_result2.o = 'object2'
mock_result2.g = ''
mock_result2.otype = None
mock_result2.dtype = None
mock_result2.lang = None
mock_tg_instance.get_sp.return_value = [mock_result1, mock_result2]
processor = Processor(taskgroup=MagicMock())
query = TriplesQueryRequest(
user='test_user',
collection='test_collection',
@ -538,9 +589,9 @@ class TestCassandraQueryProcessor:
o=None,
limit=100
)
result = await processor.query_triples(query)
assert len(result) == 2
assert result[0].o.value == 'object1'
assert result[1].o.value == 'object2'
@ -550,16 +601,20 @@ class TestCassandraQueryPerformanceOptimizations:
"""Test cases for multi-table performance optimizations in query service"""
@pytest.mark.asyncio
@patch('trustgraph.query.triples.cassandra.service.KnowledgeGraph')
async def test_get_po_query_optimization(self, mock_trustgraph):
@patch('trustgraph.query.triples.cassandra.service.EntityCentricKnowledgeGraph')
async def test_get_po_query_optimization(self, mock_kg_class):
"""Test that get_po queries use optimized table (no ALLOW FILTERING)"""
from trustgraph.schema import TriplesQueryRequest, Term, IRI, LITERAL
mock_tg_instance = MagicMock()
mock_trustgraph.return_value = mock_tg_instance
mock_kg_class.return_value = mock_tg_instance
mock_result = MagicMock()
mock_result.s = 'result_subject'
mock_result.g = ''
mock_result.otype = None
mock_result.dtype = None
mock_result.lang = None
mock_tg_instance.get_po.return_value = [mock_result]
processor = Processor(taskgroup=MagicMock())
@ -587,16 +642,20 @@ class TestCassandraQueryPerformanceOptimizations:
assert result[0].o.value == 'test_object'
@pytest.mark.asyncio
@patch('trustgraph.query.triples.cassandra.service.KnowledgeGraph')
async def test_get_os_query_optimization(self, mock_trustgraph):
@patch('trustgraph.query.triples.cassandra.service.EntityCentricKnowledgeGraph')
async def test_get_os_query_optimization(self, mock_kg_class):
"""Test that get_os queries use optimized table (no ALLOW FILTERING)"""
from trustgraph.schema import TriplesQueryRequest, Term, IRI, LITERAL
mock_tg_instance = MagicMock()
mock_trustgraph.return_value = mock_tg_instance
mock_kg_class.return_value = mock_tg_instance
mock_result = MagicMock()
mock_result.p = 'result_predicate'
mock_result.g = ''
mock_result.otype = None
mock_result.dtype = None
mock_result.lang = None
mock_tg_instance.get_os.return_value = [mock_result]
processor = Processor(taskgroup=MagicMock())
@ -624,13 +683,13 @@ class TestCassandraQueryPerformanceOptimizations:
assert result[0].o.value == 'test_object'
@pytest.mark.asyncio
@patch('trustgraph.query.triples.cassandra.service.KnowledgeGraph')
async def test_all_query_patterns_use_correct_tables(self, mock_trustgraph):
@patch('trustgraph.query.triples.cassandra.service.EntityCentricKnowledgeGraph')
async def test_all_query_patterns_use_correct_tables(self, mock_kg_class):
"""Test that all query patterns route to their optimal tables"""
from trustgraph.schema import TriplesQueryRequest, Term, IRI, LITERAL
mock_tg_instance = MagicMock()
mock_trustgraph.return_value = mock_tg_instance
mock_kg_class.return_value = mock_tg_instance
# Mock empty results for all queries
mock_tg_instance.get_all.return_value = []
@ -696,19 +755,23 @@ class TestCassandraQueryPerformanceOptimizations:
# Mode is determined in KnowledgeGraph initialization
@pytest.mark.asyncio
@patch('trustgraph.query.triples.cassandra.service.KnowledgeGraph')
async def test_performance_critical_po_query_no_filtering(self, mock_trustgraph):
@patch('trustgraph.query.triples.cassandra.service.EntityCentricKnowledgeGraph')
async def test_performance_critical_po_query_no_filtering(self, mock_kg_class):
"""Test the performance-critical PO query that eliminates ALLOW FILTERING"""
from trustgraph.schema import TriplesQueryRequest, Term, IRI, LITERAL
mock_tg_instance = MagicMock()
mock_trustgraph.return_value = mock_tg_instance
mock_kg_class.return_value = mock_tg_instance
# Mock multiple subjects for the same predicate-object pair
mock_results = []
for i in range(5):
mock_result = MagicMock()
mock_result.s = f'subject_{i}'
mock_result.g = ''
mock_result.otype = None
mock_result.dtype = None
mock_result.lang = None
mock_results.append(mock_result)
mock_tg_instance.get_po.return_value = mock_results