mirror of
https://github.com/trustgraph-ai/trustgraph.git
synced 2026-04-26 08:56:21 +02:00
Entity-centric graph (#633)
* Tech spec for new entity-centric graph schema * Graph implementation
This commit is contained in:
parent
f24f1ebd80
commit
00c1ca681b
8 changed files with 1858 additions and 225 deletions
|
|
@ -6,7 +6,7 @@ import pytest
|
|||
from unittest.mock import MagicMock, patch, AsyncMock
|
||||
|
||||
from trustgraph.storage.triples.cassandra.write import Processor
|
||||
from trustgraph.schema import Triple, LITERAL
|
||||
from trustgraph.schema import Triple, LITERAL, IRI
|
||||
from trustgraph.direct.cassandra_kg import DEFAULT_GRAPH
|
||||
|
||||
|
||||
|
|
@ -87,29 +87,29 @@ class TestCassandraStorageProcessor:
|
|||
assert processor.cassandra_username == 'new-user' # Only cassandra_* params work
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@patch('trustgraph.storage.triples.cassandra.write.KnowledgeGraph')
|
||||
async def test_table_switching_with_auth(self, mock_trustgraph):
|
||||
@patch('trustgraph.storage.triples.cassandra.write.EntityCentricKnowledgeGraph')
|
||||
async def test_table_switching_with_auth(self, mock_kg_class):
|
||||
"""Test table switching logic when authentication is provided"""
|
||||
taskgroup_mock = MagicMock()
|
||||
mock_tg_instance = MagicMock()
|
||||
mock_trustgraph.return_value = mock_tg_instance
|
||||
|
||||
mock_kg_class.return_value = mock_tg_instance
|
||||
|
||||
processor = Processor(
|
||||
taskgroup=taskgroup_mock,
|
||||
cassandra_username='testuser',
|
||||
cassandra_password='testpass'
|
||||
)
|
||||
|
||||
|
||||
# Create mock message
|
||||
mock_message = MagicMock()
|
||||
mock_message.metadata.user = 'user1'
|
||||
mock_message.metadata.collection = 'collection1'
|
||||
mock_message.triples = []
|
||||
|
||||
|
||||
await processor.store_triples(mock_message)
|
||||
|
||||
|
||||
# Verify KnowledgeGraph was called with auth parameters
|
||||
mock_trustgraph.assert_called_once_with(
|
||||
mock_kg_class.assert_called_once_with(
|
||||
hosts=['cassandra'], # Updated default
|
||||
keyspace='user1',
|
||||
username='testuser',
|
||||
|
|
@ -118,81 +118,89 @@ class TestCassandraStorageProcessor:
|
|||
assert processor.table == 'user1'
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@patch('trustgraph.storage.triples.cassandra.write.KnowledgeGraph')
|
||||
async def test_table_switching_without_auth(self, mock_trustgraph):
|
||||
@patch('trustgraph.storage.triples.cassandra.write.EntityCentricKnowledgeGraph')
|
||||
async def test_table_switching_without_auth(self, mock_kg_class):
|
||||
"""Test table switching logic when no authentication is provided"""
|
||||
taskgroup_mock = MagicMock()
|
||||
mock_tg_instance = MagicMock()
|
||||
mock_trustgraph.return_value = mock_tg_instance
|
||||
|
||||
mock_kg_class.return_value = mock_tg_instance
|
||||
|
||||
processor = Processor(taskgroup=taskgroup_mock)
|
||||
|
||||
|
||||
# Create mock message
|
||||
mock_message = MagicMock()
|
||||
mock_message.metadata.user = 'user2'
|
||||
mock_message.metadata.collection = 'collection2'
|
||||
mock_message.triples = []
|
||||
|
||||
|
||||
await processor.store_triples(mock_message)
|
||||
|
||||
|
||||
# Verify KnowledgeGraph was called without auth parameters
|
||||
mock_trustgraph.assert_called_once_with(
|
||||
mock_kg_class.assert_called_once_with(
|
||||
hosts=['cassandra'], # Updated default
|
||||
keyspace='user2'
|
||||
)
|
||||
assert processor.table == 'user2'
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@patch('trustgraph.storage.triples.cassandra.write.KnowledgeGraph')
|
||||
async def test_table_reuse_when_same(self, mock_trustgraph):
|
||||
@patch('trustgraph.storage.triples.cassandra.write.EntityCentricKnowledgeGraph')
|
||||
async def test_table_reuse_when_same(self, mock_kg_class):
|
||||
"""Test that TrustGraph is not recreated when table hasn't changed"""
|
||||
taskgroup_mock = MagicMock()
|
||||
mock_tg_instance = MagicMock()
|
||||
mock_trustgraph.return_value = mock_tg_instance
|
||||
|
||||
mock_kg_class.return_value = mock_tg_instance
|
||||
|
||||
processor = Processor(taskgroup=taskgroup_mock)
|
||||
|
||||
|
||||
# Create mock message
|
||||
mock_message = MagicMock()
|
||||
mock_message.metadata.user = 'user1'
|
||||
mock_message.metadata.collection = 'collection1'
|
||||
mock_message.triples = []
|
||||
|
||||
|
||||
# First call should create TrustGraph
|
||||
await processor.store_triples(mock_message)
|
||||
assert mock_trustgraph.call_count == 1
|
||||
|
||||
assert mock_kg_class.call_count == 1
|
||||
|
||||
# Second call with same table should reuse TrustGraph
|
||||
await processor.store_triples(mock_message)
|
||||
assert mock_trustgraph.call_count == 1 # Should not increase
|
||||
assert mock_kg_class.call_count == 1 # Should not increase
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@patch('trustgraph.storage.triples.cassandra.write.KnowledgeGraph')
|
||||
async def test_triple_insertion(self, mock_trustgraph):
|
||||
@patch('trustgraph.storage.triples.cassandra.write.EntityCentricKnowledgeGraph')
|
||||
async def test_triple_insertion(self, mock_kg_class):
|
||||
"""Test that triples are properly inserted into Cassandra"""
|
||||
taskgroup_mock = MagicMock()
|
||||
mock_tg_instance = MagicMock()
|
||||
mock_trustgraph.return_value = mock_tg_instance
|
||||
|
||||
mock_kg_class.return_value = mock_tg_instance
|
||||
|
||||
processor = Processor(taskgroup=taskgroup_mock)
|
||||
|
||||
|
||||
# Create mock triples with proper Term structure
|
||||
triple1 = MagicMock()
|
||||
triple1.s.type = LITERAL
|
||||
triple1.s.value = 'subject1'
|
||||
triple1.s.datatype = ''
|
||||
triple1.s.language = ''
|
||||
triple1.p.type = LITERAL
|
||||
triple1.p.value = 'predicate1'
|
||||
triple1.o.type = LITERAL
|
||||
triple1.o.value = 'object1'
|
||||
triple1.o.datatype = ''
|
||||
triple1.o.language = ''
|
||||
triple1.g = None
|
||||
|
||||
triple2 = MagicMock()
|
||||
triple2.s.type = LITERAL
|
||||
triple2.s.value = 'subject2'
|
||||
triple2.s.datatype = ''
|
||||
triple2.s.language = ''
|
||||
triple2.p.type = LITERAL
|
||||
triple2.p.value = 'predicate2'
|
||||
triple2.o.type = LITERAL
|
||||
triple2.o.value = 'object2'
|
||||
triple2.o.datatype = ''
|
||||
triple2.o.language = ''
|
||||
triple2.g = None
|
||||
|
||||
# Create mock message
|
||||
|
|
@ -203,51 +211,57 @@ class TestCassandraStorageProcessor:
|
|||
|
||||
await processor.store_triples(mock_message)
|
||||
|
||||
# Verify both triples were inserted (with g= parameter)
|
||||
# Verify both triples were inserted (with g=, otype=, dtype=, lang= parameters)
|
||||
assert mock_tg_instance.insert.call_count == 2
|
||||
mock_tg_instance.insert.assert_any_call('collection1', 'subject1', 'predicate1', 'object1', g=DEFAULT_GRAPH)
|
||||
mock_tg_instance.insert.assert_any_call('collection1', 'subject2', 'predicate2', 'object2', g=DEFAULT_GRAPH)
|
||||
mock_tg_instance.insert.assert_any_call(
|
||||
'collection1', 'subject1', 'predicate1', 'object1',
|
||||
g=DEFAULT_GRAPH, otype='l', dtype='', lang=''
|
||||
)
|
||||
mock_tg_instance.insert.assert_any_call(
|
||||
'collection1', 'subject2', 'predicate2', 'object2',
|
||||
g=DEFAULT_GRAPH, otype='l', dtype='', lang=''
|
||||
)
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@patch('trustgraph.storage.triples.cassandra.write.KnowledgeGraph')
|
||||
async def test_triple_insertion_with_empty_list(self, mock_trustgraph):
|
||||
@patch('trustgraph.storage.triples.cassandra.write.EntityCentricKnowledgeGraph')
|
||||
async def test_triple_insertion_with_empty_list(self, mock_kg_class):
|
||||
"""Test behavior when message has no triples"""
|
||||
taskgroup_mock = MagicMock()
|
||||
mock_tg_instance = MagicMock()
|
||||
mock_trustgraph.return_value = mock_tg_instance
|
||||
|
||||
mock_kg_class.return_value = mock_tg_instance
|
||||
|
||||
processor = Processor(taskgroup=taskgroup_mock)
|
||||
|
||||
|
||||
# Create mock message with empty triples
|
||||
mock_message = MagicMock()
|
||||
mock_message.metadata.user = 'user1'
|
||||
mock_message.metadata.collection = 'collection1'
|
||||
mock_message.triples = []
|
||||
|
||||
|
||||
await processor.store_triples(mock_message)
|
||||
|
||||
|
||||
# Verify no triples were inserted
|
||||
mock_tg_instance.insert.assert_not_called()
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@patch('trustgraph.storage.triples.cassandra.write.KnowledgeGraph')
|
||||
@patch('trustgraph.storage.triples.cassandra.write.EntityCentricKnowledgeGraph')
|
||||
@patch('trustgraph.storage.triples.cassandra.write.time.sleep')
|
||||
async def test_exception_handling_with_retry(self, mock_sleep, mock_trustgraph):
|
||||
async def test_exception_handling_with_retry(self, mock_sleep, mock_kg_class):
|
||||
"""Test exception handling during TrustGraph creation"""
|
||||
taskgroup_mock = MagicMock()
|
||||
mock_trustgraph.side_effect = Exception("Connection failed")
|
||||
|
||||
mock_kg_class.side_effect = Exception("Connection failed")
|
||||
|
||||
processor = Processor(taskgroup=taskgroup_mock)
|
||||
|
||||
|
||||
# Create mock message
|
||||
mock_message = MagicMock()
|
||||
mock_message.metadata.user = 'user1'
|
||||
mock_message.metadata.collection = 'collection1'
|
||||
mock_message.triples = []
|
||||
|
||||
|
||||
with pytest.raises(Exception, match="Connection failed"):
|
||||
await processor.store_triples(mock_message)
|
||||
|
||||
|
||||
# Verify sleep was called before re-raising
|
||||
mock_sleep.assert_called_once_with(1)
|
||||
|
||||
|
|
@ -335,57 +349,61 @@ class TestCassandraStorageProcessor:
|
|||
mock_launch.assert_called_once_with(default_ident, '\nGraph writer. Input is graph edge. Writes edges to Cassandra graph.\n')
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@patch('trustgraph.storage.triples.cassandra.write.KnowledgeGraph')
|
||||
async def test_store_triples_table_switching_between_different_tables(self, mock_trustgraph):
|
||||
@patch('trustgraph.storage.triples.cassandra.write.EntityCentricKnowledgeGraph')
|
||||
async def test_store_triples_table_switching_between_different_tables(self, mock_kg_class):
|
||||
"""Test table switching when different tables are used in sequence"""
|
||||
taskgroup_mock = MagicMock()
|
||||
mock_tg_instance1 = MagicMock()
|
||||
mock_tg_instance2 = MagicMock()
|
||||
mock_trustgraph.side_effect = [mock_tg_instance1, mock_tg_instance2]
|
||||
|
||||
mock_kg_class.side_effect = [mock_tg_instance1, mock_tg_instance2]
|
||||
|
||||
processor = Processor(taskgroup=taskgroup_mock)
|
||||
|
||||
|
||||
# First message with table1
|
||||
mock_message1 = MagicMock()
|
||||
mock_message1.metadata.user = 'user1'
|
||||
mock_message1.metadata.collection = 'collection1'
|
||||
mock_message1.triples = []
|
||||
|
||||
|
||||
await processor.store_triples(mock_message1)
|
||||
assert processor.table == 'user1'
|
||||
assert processor.tg == mock_tg_instance1
|
||||
|
||||
|
||||
# Second message with different table
|
||||
mock_message2 = MagicMock()
|
||||
mock_message2.metadata.user = 'user2'
|
||||
mock_message2.metadata.collection = 'collection2'
|
||||
mock_message2.triples = []
|
||||
|
||||
|
||||
await processor.store_triples(mock_message2)
|
||||
assert processor.table == 'user2'
|
||||
assert processor.tg == mock_tg_instance2
|
||||
|
||||
|
||||
# Verify TrustGraph was created twice for different tables
|
||||
assert mock_trustgraph.call_count == 2
|
||||
assert mock_kg_class.call_count == 2
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@patch('trustgraph.storage.triples.cassandra.write.KnowledgeGraph')
|
||||
async def test_store_triples_with_special_characters_in_values(self, mock_trustgraph):
|
||||
@patch('trustgraph.storage.triples.cassandra.write.EntityCentricKnowledgeGraph')
|
||||
async def test_store_triples_with_special_characters_in_values(self, mock_kg_class):
|
||||
"""Test storing triples with special characters and unicode"""
|
||||
taskgroup_mock = MagicMock()
|
||||
mock_tg_instance = MagicMock()
|
||||
mock_trustgraph.return_value = mock_tg_instance
|
||||
|
||||
mock_kg_class.return_value = mock_tg_instance
|
||||
|
||||
processor = Processor(taskgroup=taskgroup_mock)
|
||||
|
||||
|
||||
# Create triple with special characters and proper Term structure
|
||||
triple = MagicMock()
|
||||
triple.s.type = LITERAL
|
||||
triple.s.value = 'subject with spaces & symbols'
|
||||
triple.s.datatype = ''
|
||||
triple.s.language = ''
|
||||
triple.p.type = LITERAL
|
||||
triple.p.value = 'predicate:with/colons'
|
||||
triple.o.type = LITERAL
|
||||
triple.o.value = 'object with "quotes" and unicode: ñáéíóú'
|
||||
triple.o.datatype = ''
|
||||
triple.o.language = ''
|
||||
triple.g = None
|
||||
|
||||
mock_message = MagicMock()
|
||||
|
|
@ -401,31 +419,34 @@ class TestCassandraStorageProcessor:
|
|||
'subject with spaces & symbols',
|
||||
'predicate:with/colons',
|
||||
'object with "quotes" and unicode: ñáéíóú',
|
||||
g=DEFAULT_GRAPH
|
||||
g=DEFAULT_GRAPH,
|
||||
otype='l',
|
||||
dtype='',
|
||||
lang=''
|
||||
)
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@patch('trustgraph.storage.triples.cassandra.write.KnowledgeGraph')
|
||||
async def test_store_triples_preserves_old_table_on_exception(self, mock_trustgraph):
|
||||
@patch('trustgraph.storage.triples.cassandra.write.EntityCentricKnowledgeGraph')
|
||||
async def test_store_triples_preserves_old_table_on_exception(self, mock_kg_class):
|
||||
"""Test that table remains unchanged when TrustGraph creation fails"""
|
||||
taskgroup_mock = MagicMock()
|
||||
|
||||
|
||||
processor = Processor(taskgroup=taskgroup_mock)
|
||||
|
||||
|
||||
# Set an initial table
|
||||
processor.table = ('old_user', 'old_collection')
|
||||
|
||||
|
||||
# Mock TrustGraph to raise exception
|
||||
mock_trustgraph.side_effect = Exception("Connection failed")
|
||||
|
||||
mock_kg_class.side_effect = Exception("Connection failed")
|
||||
|
||||
mock_message = MagicMock()
|
||||
mock_message.metadata.user = 'new_user'
|
||||
mock_message.metadata.collection = 'new_collection'
|
||||
mock_message.triples = []
|
||||
|
||||
|
||||
with pytest.raises(Exception, match="Connection failed"):
|
||||
await processor.store_triples(mock_message)
|
||||
|
||||
|
||||
# Table should remain unchanged since self.table = table happens after try/except
|
||||
assert processor.table == ('old_user', 'old_collection')
|
||||
# TrustGraph should be set to None though
|
||||
|
|
@ -436,12 +457,12 @@ class TestCassandraPerformanceOptimizations:
|
|||
"""Test cases for multi-table performance optimizations"""
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@patch('trustgraph.storage.triples.cassandra.write.KnowledgeGraph')
|
||||
async def test_legacy_mode_uses_single_table(self, mock_trustgraph):
|
||||
@patch('trustgraph.storage.triples.cassandra.write.EntityCentricKnowledgeGraph')
|
||||
async def test_legacy_mode_uses_single_table(self, mock_kg_class):
|
||||
"""Test that legacy mode still works with single table"""
|
||||
taskgroup_mock = MagicMock()
|
||||
mock_tg_instance = MagicMock()
|
||||
mock_trustgraph.return_value = mock_tg_instance
|
||||
mock_kg_class.return_value = mock_tg_instance
|
||||
|
||||
with patch.dict('os.environ', {'CASSANDRA_USE_LEGACY': 'true'}):
|
||||
processor = Processor(taskgroup=taskgroup_mock)
|
||||
|
|
@ -454,16 +475,15 @@ class TestCassandraPerformanceOptimizations:
|
|||
await processor.store_triples(mock_message)
|
||||
|
||||
# Verify KnowledgeGraph instance uses legacy mode
|
||||
kg_instance = mock_trustgraph.return_value
|
||||
assert kg_instance is not None
|
||||
assert mock_tg_instance is not None
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@patch('trustgraph.storage.triples.cassandra.write.KnowledgeGraph')
|
||||
async def test_optimized_mode_uses_multi_table(self, mock_trustgraph):
|
||||
@patch('trustgraph.storage.triples.cassandra.write.EntityCentricKnowledgeGraph')
|
||||
async def test_optimized_mode_uses_multi_table(self, mock_kg_class):
|
||||
"""Test that optimized mode uses multi-table schema"""
|
||||
taskgroup_mock = MagicMock()
|
||||
mock_tg_instance = MagicMock()
|
||||
mock_trustgraph.return_value = mock_tg_instance
|
||||
mock_kg_class.return_value = mock_tg_instance
|
||||
|
||||
with patch.dict('os.environ', {'CASSANDRA_USE_LEGACY': 'false'}):
|
||||
processor = Processor(taskgroup=taskgroup_mock)
|
||||
|
|
@ -476,16 +496,15 @@ class TestCassandraPerformanceOptimizations:
|
|||
await processor.store_triples(mock_message)
|
||||
|
||||
# Verify KnowledgeGraph instance is in optimized mode
|
||||
kg_instance = mock_trustgraph.return_value
|
||||
assert kg_instance is not None
|
||||
assert mock_tg_instance is not None
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@patch('trustgraph.storage.triples.cassandra.write.KnowledgeGraph')
|
||||
async def test_batch_write_consistency(self, mock_trustgraph):
|
||||
@patch('trustgraph.storage.triples.cassandra.write.EntityCentricKnowledgeGraph')
|
||||
async def test_batch_write_consistency(self, mock_kg_class):
|
||||
"""Test that all tables stay consistent during batch writes"""
|
||||
taskgroup_mock = MagicMock()
|
||||
mock_tg_instance = MagicMock()
|
||||
mock_trustgraph.return_value = mock_tg_instance
|
||||
mock_kg_class.return_value = mock_tg_instance
|
||||
|
||||
processor = Processor(taskgroup=taskgroup_mock)
|
||||
|
||||
|
|
@ -493,10 +512,14 @@ class TestCassandraPerformanceOptimizations:
|
|||
triple = MagicMock()
|
||||
triple.s.type = LITERAL
|
||||
triple.s.value = 'test_subject'
|
||||
triple.s.datatype = ''
|
||||
triple.s.language = ''
|
||||
triple.p.type = LITERAL
|
||||
triple.p.value = 'test_predicate'
|
||||
triple.o.type = LITERAL
|
||||
triple.o.value = 'test_object'
|
||||
triple.o.datatype = ''
|
||||
triple.o.language = ''
|
||||
triple.g = None
|
||||
|
||||
mock_message = MagicMock()
|
||||
|
|
@ -509,7 +532,7 @@ class TestCassandraPerformanceOptimizations:
|
|||
# Verify insert was called for the triple (implementation details tested in KnowledgeGraph)
|
||||
mock_tg_instance.insert.assert_called_once_with(
|
||||
'collection1', 'test_subject', 'test_predicate', 'test_object',
|
||||
g=DEFAULT_GRAPH
|
||||
g=DEFAULT_GRAPH, otype='l', dtype='', lang=''
|
||||
)
|
||||
|
||||
def test_environment_variable_controls_mode(self):
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue