Entity-centric graph (#633)

* Tech spec for new entity-centric graph schema

* Graph implementation
This commit is contained in:
cybermaggedon 2026-02-16 13:26:43 +00:00 committed by GitHub
parent f24f1ebd80
commit 00c1ca681b
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
8 changed files with 1858 additions and 225 deletions

View file

@ -0,0 +1,599 @@
"""
Unit tests for EntityCentricKnowledgeGraph class
Tests the entity-centric knowledge graph implementation without requiring
an actual Cassandra connection. Uses mocking to verify correct behavior.
"""
import pytest
from unittest.mock import MagicMock, patch, call
import os
class TestEntityCentricKnowledgeGraph:
"""Test cases for EntityCentricKnowledgeGraph"""
@pytest.fixture
def mock_cluster(self):
"""Create a mock Cassandra cluster"""
with patch('trustgraph.direct.cassandra_kg.Cluster') as mock_cluster_cls:
mock_cluster = MagicMock()
mock_session = MagicMock()
mock_cluster.connect.return_value = mock_session
mock_cluster_cls.return_value = mock_cluster
yield mock_cluster_cls, mock_cluster, mock_session
@pytest.fixture
def entity_kg(self, mock_cluster):
"""Create an EntityCentricKnowledgeGraph instance with mocked Cassandra"""
from trustgraph.direct.cassandra_kg import EntityCentricKnowledgeGraph
mock_cluster_cls, mock_cluster, mock_session = mock_cluster
# Create instance
kg = EntityCentricKnowledgeGraph(hosts=['localhost'], keyspace='test_keyspace')
return kg, mock_session
def test_init_creates_entity_centric_schema(self, mock_cluster):
"""Test that initialization creates the 2-table entity-centric schema"""
from trustgraph.direct.cassandra_kg import EntityCentricKnowledgeGraph
mock_cluster_cls, mock_cluster, mock_session = mock_cluster
kg = EntityCentricKnowledgeGraph(hosts=['localhost'], keyspace='test_keyspace')
# Verify schema tables were created
execute_calls = mock_session.execute.call_args_list
executed_statements = [str(c) for c in execute_calls]
# Check for keyspace creation
keyspace_created = any('create keyspace' in str(c).lower() for c in execute_calls)
assert keyspace_created
# Check for quads_by_entity table
entity_table_created = any('quads_by_entity' in str(c) for c in execute_calls)
assert entity_table_created
# Check for quads_by_collection table
collection_table_created = any('quads_by_collection' in str(c) for c in execute_calls)
assert collection_table_created
# Check for collection_metadata table
metadata_table_created = any('collection_metadata' in str(c) for c in execute_calls)
assert metadata_table_created
def test_prepare_statements_initialized(self, entity_kg):
"""Test that prepared statements are initialized"""
kg, mock_session = entity_kg
# Verify prepare was called for various statements
assert mock_session.prepare.called
prepare_calls = mock_session.prepare.call_args_list
# Check that key prepared statements exist
prepared_queries = [str(c) for c in prepare_calls]
# Insert statements
insert_entity_stmt = any('INSERT INTO' in str(c) and 'quads_by_entity' in str(c)
for c in prepare_calls)
assert insert_entity_stmt
insert_collection_stmt = any('INSERT INTO' in str(c) and 'quads_by_collection' in str(c)
for c in prepare_calls)
assert insert_collection_stmt
def test_insert_uri_object_creates_4_entity_rows(self, entity_kg):
"""Test that inserting a quad with URI object creates 4 entity rows"""
kg, mock_session = entity_kg
# Reset mocks to track only insert-related calls
mock_session.reset_mock()
kg.insert(
collection='test_collection',
s='http://example.org/Alice',
p='http://example.org/knows',
o='http://example.org/Bob',
g='http://example.org/graph1',
otype='u'
)
# Verify batch was executed
mock_session.execute.assert_called()
def test_insert_literal_object_creates_3_entity_rows(self, entity_kg):
"""Test that inserting a quad with literal object creates 3 entity rows"""
kg, mock_session = entity_kg
mock_session.reset_mock()
kg.insert(
collection='test_collection',
s='http://example.org/Alice',
p='http://www.w3.org/2000/01/rdf-schema#label',
o='Alice Smith',
g=None,
otype='l',
dtype='xsd:string',
lang='en'
)
# Verify batch was executed
mock_session.execute.assert_called()
def test_insert_default_graph(self, entity_kg):
"""Test that None graph is stored as empty string"""
kg, mock_session = entity_kg
mock_session.reset_mock()
kg.insert(
collection='test_collection',
s='http://example.org/Alice',
p='http://example.org/knows',
o='http://example.org/Bob',
g=None,
otype='u'
)
mock_session.execute.assert_called()
def test_insert_auto_detects_otype(self, entity_kg):
"""Test that otype is auto-detected when not provided"""
kg, mock_session = entity_kg
mock_session.reset_mock()
# URI should be auto-detected
kg.insert(
collection='test_collection',
s='http://example.org/Alice',
p='http://example.org/knows',
o='http://example.org/Bob'
)
mock_session.execute.assert_called()
mock_session.reset_mock()
# Literal should be auto-detected
kg.insert(
collection='test_collection',
s='http://example.org/Alice',
p='http://example.org/name',
o='Alice'
)
mock_session.execute.assert_called()
def test_get_s_returns_quads_for_subject(self, entity_kg):
"""Test get_s queries by subject"""
kg, mock_session = entity_kg
# Mock the query result
mock_result = [
MagicMock(p='http://example.org/knows', o='http://example.org/Bob',
d='', otype='u', dtype='', lang='', s='http://example.org/Alice')
]
mock_session.execute.return_value = mock_result
results = kg.get_s('test_collection', 'http://example.org/Alice')
# Verify query was executed
mock_session.execute.assert_called()
# Results should be QuadResult objects
assert len(results) == 1
assert results[0].s == 'http://example.org/Alice'
assert results[0].p == 'http://example.org/knows'
assert results[0].o == 'http://example.org/Bob'
def test_get_p_returns_quads_for_predicate(self, entity_kg):
"""Test get_p queries by predicate"""
kg, mock_session = entity_kg
mock_result = [
MagicMock(s='http://example.org/Alice', o='http://example.org/Bob',
d='', otype='u', dtype='', lang='', p='http://example.org/knows')
]
mock_session.execute.return_value = mock_result
results = kg.get_p('test_collection', 'http://example.org/knows')
mock_session.execute.assert_called()
assert len(results) == 1
def test_get_o_returns_quads_for_object(self, entity_kg):
"""Test get_o queries by object"""
kg, mock_session = entity_kg
mock_result = [
MagicMock(s='http://example.org/Alice', p='http://example.org/knows',
d='', otype='u', dtype='', lang='', o='http://example.org/Bob')
]
mock_session.execute.return_value = mock_result
results = kg.get_o('test_collection', 'http://example.org/Bob')
mock_session.execute.assert_called()
assert len(results) == 1
def test_get_sp_returns_quads_for_subject_predicate(self, entity_kg):
"""Test get_sp queries by subject and predicate"""
kg, mock_session = entity_kg
mock_result = [
MagicMock(o='http://example.org/Bob', d='', otype='u', dtype='', lang='')
]
mock_session.execute.return_value = mock_result
results = kg.get_sp('test_collection', 'http://example.org/Alice',
'http://example.org/knows')
mock_session.execute.assert_called()
assert len(results) == 1
def test_get_po_returns_quads_for_predicate_object(self, entity_kg):
"""Test get_po queries by predicate and object"""
kg, mock_session = entity_kg
mock_result = [
MagicMock(s='http://example.org/Alice', d='', otype='u', dtype='', lang='',
o='http://example.org/Bob')
]
mock_session.execute.return_value = mock_result
results = kg.get_po('test_collection', 'http://example.org/knows',
'http://example.org/Bob')
mock_session.execute.assert_called()
assert len(results) == 1
def test_get_os_returns_quads_for_object_subject(self, entity_kg):
"""Test get_os queries by object and subject"""
kg, mock_session = entity_kg
mock_result = [
MagicMock(p='http://example.org/knows', d='', otype='u', dtype='', lang='',
s='http://example.org/Alice', o='http://example.org/Bob')
]
mock_session.execute.return_value = mock_result
results = kg.get_os('test_collection', 'http://example.org/Bob',
'http://example.org/Alice')
mock_session.execute.assert_called()
assert len(results) == 1
def test_get_spo_returns_quads_for_subject_predicate_object(self, entity_kg):
"""Test get_spo queries by subject, predicate, and object"""
kg, mock_session = entity_kg
mock_result = [
MagicMock(d='', otype='u', dtype='', lang='',
o='http://example.org/Bob')
]
mock_session.execute.return_value = mock_result
results = kg.get_spo('test_collection', 'http://example.org/Alice',
'http://example.org/knows', 'http://example.org/Bob')
mock_session.execute.assert_called()
assert len(results) == 1
def test_get_g_returns_quads_for_graph(self, entity_kg):
"""Test get_g queries by graph"""
kg, mock_session = entity_kg
mock_result = [
MagicMock(s='http://example.org/Alice', p='http://example.org/knows',
o='http://example.org/Bob', otype='u', dtype='', lang='')
]
mock_session.execute.return_value = mock_result
results = kg.get_g('test_collection', 'http://example.org/graph1')
mock_session.execute.assert_called()
def test_get_all_returns_all_quads_in_collection(self, entity_kg):
"""Test get_all returns all quads"""
kg, mock_session = entity_kg
mock_result = [
MagicMock(d='', s='http://example.org/Alice', p='http://example.org/knows',
o='http://example.org/Bob', otype='u', dtype='', lang='')
]
mock_session.execute.return_value = mock_result
results = kg.get_all('test_collection')
mock_session.execute.assert_called()
def test_graph_wildcard_returns_all_graphs(self, entity_kg):
"""Test that g='*' returns quads from all graphs"""
from trustgraph.direct.cassandra_kg import GRAPH_WILDCARD
kg, mock_session = entity_kg
mock_result = [
MagicMock(p='http://example.org/knows', d='http://example.org/graph1',
otype='u', dtype='', lang='', s='http://example.org/Alice',
o='http://example.org/Bob'),
MagicMock(p='http://example.org/knows', d='http://example.org/graph2',
otype='u', dtype='', lang='', s='http://example.org/Alice',
o='http://example.org/Charlie')
]
mock_session.execute.return_value = mock_result
results = kg.get_s('test_collection', 'http://example.org/Alice', g=GRAPH_WILDCARD)
# Should return quads from both graphs
assert len(results) == 2
def test_specific_graph_filters_results(self, entity_kg):
"""Test that specifying a graph filters results"""
kg, mock_session = entity_kg
mock_result = [
MagicMock(p='http://example.org/knows', d='http://example.org/graph1',
otype='u', dtype='', lang='', s='http://example.org/Alice',
o='http://example.org/Bob'),
MagicMock(p='http://example.org/knows', d='http://example.org/graph2',
otype='u', dtype='', lang='', s='http://example.org/Alice',
o='http://example.org/Charlie')
]
mock_session.execute.return_value = mock_result
results = kg.get_s('test_collection', 'http://example.org/Alice',
g='http://example.org/graph1')
# Should only return quads from graph1
assert len(results) == 1
assert results[0].g == 'http://example.org/graph1'
def test_collection_exists_returns_true_when_exists(self, entity_kg):
"""Test collection_exists returns True for existing collection"""
kg, mock_session = entity_kg
mock_result = [MagicMock(collection='test_collection')]
mock_session.execute.return_value = mock_result
exists = kg.collection_exists('test_collection')
assert exists is True
def test_collection_exists_returns_false_when_not_exists(self, entity_kg):
"""Test collection_exists returns False for non-existing collection"""
kg, mock_session = entity_kg
mock_session.execute.return_value = []
exists = kg.collection_exists('nonexistent_collection')
assert exists is False
def test_create_collection_inserts_metadata(self, entity_kg):
"""Test create_collection inserts metadata row"""
kg, mock_session = entity_kg
mock_session.reset_mock()
kg.create_collection('test_collection')
# Verify INSERT was executed for collection_metadata
mock_session.execute.assert_called()
def test_delete_collection_removes_all_data(self, entity_kg):
"""Test delete_collection removes entity partitions and collection rows"""
kg, mock_session = entity_kg
# Mock reading quads from collection
mock_quads = [
MagicMock(d='', s='http://example.org/Alice', p='http://example.org/knows',
o='http://example.org/Bob', otype='u')
]
mock_session.execute.return_value = mock_quads
mock_session.reset_mock()
kg.delete_collection('test_collection')
# Verify delete operations were executed
assert mock_session.execute.called
def test_close_shuts_down_connections(self, entity_kg):
"""Test close shuts down session and cluster"""
kg, mock_session = entity_kg
kg.close()
mock_session.shutdown.assert_called_once()
kg.cluster.shutdown.assert_called_once()
class TestQuadResult:
"""Test cases for QuadResult class"""
def test_quad_result_stores_all_fields(self):
"""Test QuadResult stores all quad fields"""
from trustgraph.direct.cassandra_kg import QuadResult
result = QuadResult(
s='http://example.org/Alice',
p='http://example.org/knows',
o='http://example.org/Bob',
g='http://example.org/graph1',
otype='u',
dtype='',
lang=''
)
assert result.s == 'http://example.org/Alice'
assert result.p == 'http://example.org/knows'
assert result.o == 'http://example.org/Bob'
assert result.g == 'http://example.org/graph1'
assert result.otype == 'u'
assert result.dtype == ''
assert result.lang == ''
def test_quad_result_defaults(self):
"""Test QuadResult default values"""
from trustgraph.direct.cassandra_kg import QuadResult
result = QuadResult(
s='http://example.org/s',
p='http://example.org/p',
o='literal value',
g=''
)
assert result.otype == 'u' # Default otype
assert result.dtype == ''
assert result.lang == ''
def test_quad_result_with_literal_metadata(self):
"""Test QuadResult with literal metadata"""
from trustgraph.direct.cassandra_kg import QuadResult
result = QuadResult(
s='http://example.org/Alice',
p='http://www.w3.org/2000/01/rdf-schema#label',
o='Alice Smith',
g='',
otype='l',
dtype='xsd:string',
lang='en'
)
assert result.otype == 'l'
assert result.dtype == 'xsd:string'
assert result.lang == 'en'
class TestWriteHelperFunctions:
"""Test cases for helper functions in write.py"""
def test_get_term_otype_for_iri(self):
"""Test get_term_otype returns 'u' for IRI terms"""
from trustgraph.storage.triples.cassandra.write import get_term_otype
from trustgraph.schema import Term, IRI
term = Term(type=IRI, iri='http://example.org/Alice')
assert get_term_otype(term) == 'u'
def test_get_term_otype_for_literal(self):
"""Test get_term_otype returns 'l' for LITERAL terms"""
from trustgraph.storage.triples.cassandra.write import get_term_otype
from trustgraph.schema import Term, LITERAL
term = Term(type=LITERAL, value='Alice Smith')
assert get_term_otype(term) == 'l'
def test_get_term_otype_for_blank(self):
"""Test get_term_otype returns 'u' for BLANK terms"""
from trustgraph.storage.triples.cassandra.write import get_term_otype
from trustgraph.schema import Term, BLANK
term = Term(type=BLANK, id='_:b1')
assert get_term_otype(term) == 'u'
def test_get_term_otype_for_triple(self):
"""Test get_term_otype returns 't' for TRIPLE terms"""
from trustgraph.storage.triples.cassandra.write import get_term_otype
from trustgraph.schema import Term, TRIPLE
term = Term(type=TRIPLE)
assert get_term_otype(term) == 't'
def test_get_term_otype_for_none(self):
"""Test get_term_otype returns 'u' for None"""
from trustgraph.storage.triples.cassandra.write import get_term_otype
assert get_term_otype(None) == 'u'
def test_get_term_dtype_for_literal(self):
"""Test get_term_dtype extracts datatype from LITERAL"""
from trustgraph.storage.triples.cassandra.write import get_term_dtype
from trustgraph.schema import Term, LITERAL
term = Term(type=LITERAL, value='42', datatype='xsd:integer')
assert get_term_dtype(term) == 'xsd:integer'
def test_get_term_dtype_for_non_literal(self):
"""Test get_term_dtype returns empty string for non-LITERAL"""
from trustgraph.storage.triples.cassandra.write import get_term_dtype
from trustgraph.schema import Term, IRI
term = Term(type=IRI, iri='http://example.org/Alice')
assert get_term_dtype(term) == ''
def test_get_term_dtype_for_none(self):
"""Test get_term_dtype returns empty string for None"""
from trustgraph.storage.triples.cassandra.write import get_term_dtype
assert get_term_dtype(None) == ''
def test_get_term_lang_for_literal(self):
"""Test get_term_lang extracts language from LITERAL"""
from trustgraph.storage.triples.cassandra.write import get_term_lang
from trustgraph.schema import Term, LITERAL
term = Term(type=LITERAL, value='Alice Smith', language='en')
assert get_term_lang(term) == 'en'
def test_get_term_lang_for_non_literal(self):
"""Test get_term_lang returns empty string for non-LITERAL"""
from trustgraph.storage.triples.cassandra.write import get_term_lang
from trustgraph.schema import Term, IRI
term = Term(type=IRI, iri='http://example.org/Alice')
assert get_term_lang(term) == ''
class TestServiceHelperFunctions:
"""Test cases for helper functions in service.py"""
def test_create_term_with_uri_otype(self):
"""Test create_term creates IRI Term for otype='u'"""
from trustgraph.query.triples.cassandra.service import create_term
from trustgraph.schema import IRI
term = create_term('http://example.org/Alice', otype='u')
assert term.type == IRI
assert term.iri == 'http://example.org/Alice'
def test_create_term_with_literal_otype(self):
"""Test create_term creates LITERAL Term for otype='l'"""
from trustgraph.query.triples.cassandra.service import create_term
from trustgraph.schema import LITERAL
term = create_term('Alice Smith', otype='l', dtype='xsd:string', lang='en')
assert term.type == LITERAL
assert term.value == 'Alice Smith'
assert term.datatype == 'xsd:string'
assert term.language == 'en'
def test_create_term_with_triple_otype(self):
"""Test create_term creates IRI Term for otype='t'"""
from trustgraph.query.triples.cassandra.service import create_term
from trustgraph.schema import IRI
term = create_term('http://example.org/statement1', otype='t')
assert term.type == IRI
assert term.iri == 'http://example.org/statement1'
def test_create_term_heuristic_fallback_uri(self):
"""Test create_term uses URL heuristic when otype not provided"""
from trustgraph.query.triples.cassandra.service import create_term
from trustgraph.schema import IRI
term = create_term('http://example.org/Alice')
assert term.type == IRI
assert term.iri == 'http://example.org/Alice'
def test_create_term_heuristic_fallback_literal(self):
"""Test create_term uses literal heuristic when otype not provided"""
from trustgraph.query.triples.cassandra.service import create_term
from trustgraph.schema import LITERAL
term = create_term('Alice Smith')
assert term.type == LITERAL
assert term.value == 'Alice Smith'

View file

@ -70,25 +70,29 @@ class TestCassandraQueryProcessor:
assert result.type == LITERAL
@pytest.mark.asyncio
@patch('trustgraph.query.triples.cassandra.service.KnowledgeGraph')
async def test_query_triples_spo_query(self, mock_trustgraph):
@patch('trustgraph.query.triples.cassandra.service.EntityCentricKnowledgeGraph')
async def test_query_triples_spo_query(self, mock_kg_class):
"""Test querying triples with subject, predicate, and object specified"""
from trustgraph.schema import TriplesQueryRequest, Term, IRI, LITERAL
# Setup mock TrustGraph
# Setup mock TrustGraph via factory function
mock_tg_instance = MagicMock()
mock_trustgraph.return_value = mock_tg_instance
mock_kg_class.return_value = mock_tg_instance
# SPO query returns a list of results (with mock graph attribute)
mock_result = MagicMock()
mock_result.g = None
mock_result.g = ''
mock_result.otype = None
mock_result.dtype = None
mock_result.lang = None
mock_result.o = 'test_object'
mock_tg_instance.get_spo.return_value = [mock_result]
processor = Processor(
taskgroup=MagicMock(),
id='test-cassandra-query',
cassandra_host='localhost'
)
# Create query request with all SPO values
query = TriplesQueryRequest(
user='test_user',
@ -98,20 +102,20 @@ class TestCassandraQueryProcessor:
o=Term(type=LITERAL, value='test_object'),
limit=100
)
result = await processor.query_triples(query)
# Verify KnowledgeGraph was created with correct parameters
mock_trustgraph.assert_called_once_with(
mock_kg_class.assert_called_once_with(
hosts=['localhost'],
keyspace='test_user'
)
# Verify get_spo was called with correct parameters
mock_tg_instance.get_spo.assert_called_once_with(
'test_collection', 'test_subject', 'test_predicate', 'test_object', g=None, limit=100
)
# Verify result contains the queried triple
assert len(result) == 1
assert result[0].s.value == 'test_subject'
@ -146,21 +150,25 @@ class TestCassandraQueryProcessor:
assert processor.table is None
@pytest.mark.asyncio
@patch('trustgraph.query.triples.cassandra.service.KnowledgeGraph')
async def test_query_triples_sp_pattern(self, mock_trustgraph):
@patch('trustgraph.query.triples.cassandra.service.EntityCentricKnowledgeGraph')
async def test_query_triples_sp_pattern(self, mock_kg_class):
"""Test SP query pattern (subject and predicate, no object)"""
from trustgraph.schema import TriplesQueryRequest, Term, IRI, LITERAL
# Setup mock TrustGraph and response
# Setup mock TrustGraph via factory function
mock_tg_instance = MagicMock()
mock_trustgraph.return_value = mock_tg_instance
mock_kg_class.return_value = mock_tg_instance
mock_result = MagicMock()
mock_result.o = 'result_object'
mock_result.g = ''
mock_result.otype = None
mock_result.dtype = None
mock_result.lang = None
mock_tg_instance.get_sp.return_value = [mock_result]
processor = Processor(taskgroup=MagicMock())
query = TriplesQueryRequest(
user='test_user',
collection='test_collection',
@ -169,9 +177,9 @@ class TestCassandraQueryProcessor:
o=None,
limit=50
)
result = await processor.query_triples(query)
mock_tg_instance.get_sp.assert_called_once_with('test_collection', 'test_subject', 'test_predicate', g=None, limit=50)
assert len(result) == 1
assert result[0].s.value == 'test_subject'
@ -179,21 +187,25 @@ class TestCassandraQueryProcessor:
assert result[0].o.value == 'result_object'
@pytest.mark.asyncio
@patch('trustgraph.query.triples.cassandra.service.KnowledgeGraph')
async def test_query_triples_s_pattern(self, mock_trustgraph):
@patch('trustgraph.query.triples.cassandra.service.EntityCentricKnowledgeGraph')
async def test_query_triples_s_pattern(self, mock_kg_class):
"""Test S query pattern (subject only)"""
from trustgraph.schema import TriplesQueryRequest, Term, IRI, LITERAL
mock_tg_instance = MagicMock()
mock_trustgraph.return_value = mock_tg_instance
mock_kg_class.return_value = mock_tg_instance
mock_result = MagicMock()
mock_result.p = 'result_predicate'
mock_result.o = 'result_object'
mock_result.g = ''
mock_result.otype = None
mock_result.dtype = None
mock_result.lang = None
mock_tg_instance.get_s.return_value = [mock_result]
processor = Processor(taskgroup=MagicMock())
query = TriplesQueryRequest(
user='test_user',
collection='test_collection',
@ -202,9 +214,9 @@ class TestCassandraQueryProcessor:
o=None,
limit=25
)
result = await processor.query_triples(query)
mock_tg_instance.get_s.assert_called_once_with('test_collection', 'test_subject', g=None, limit=25)
assert len(result) == 1
assert result[0].s.value == 'test_subject'
@ -212,21 +224,25 @@ class TestCassandraQueryProcessor:
assert result[0].o.value == 'result_object'
@pytest.mark.asyncio
@patch('trustgraph.query.triples.cassandra.service.KnowledgeGraph')
async def test_query_triples_p_pattern(self, mock_trustgraph):
@patch('trustgraph.query.triples.cassandra.service.EntityCentricKnowledgeGraph')
async def test_query_triples_p_pattern(self, mock_kg_class):
"""Test P query pattern (predicate only)"""
from trustgraph.schema import TriplesQueryRequest, Term, IRI, LITERAL
mock_tg_instance = MagicMock()
mock_trustgraph.return_value = mock_tg_instance
mock_kg_class.return_value = mock_tg_instance
mock_result = MagicMock()
mock_result.s = 'result_subject'
mock_result.o = 'result_object'
mock_result.g = ''
mock_result.otype = None
mock_result.dtype = None
mock_result.lang = None
mock_tg_instance.get_p.return_value = [mock_result]
processor = Processor(taskgroup=MagicMock())
query = TriplesQueryRequest(
user='test_user',
collection='test_collection',
@ -235,9 +251,9 @@ class TestCassandraQueryProcessor:
o=None,
limit=10
)
result = await processor.query_triples(query)
mock_tg_instance.get_p.assert_called_once_with('test_collection', 'test_predicate', g=None, limit=10)
assert len(result) == 1
assert result[0].s.value == 'result_subject'
@ -245,21 +261,25 @@ class TestCassandraQueryProcessor:
assert result[0].o.value == 'result_object'
@pytest.mark.asyncio
@patch('trustgraph.query.triples.cassandra.service.KnowledgeGraph')
async def test_query_triples_o_pattern(self, mock_trustgraph):
@patch('trustgraph.query.triples.cassandra.service.EntityCentricKnowledgeGraph')
async def test_query_triples_o_pattern(self, mock_kg_class):
"""Test O query pattern (object only)"""
from trustgraph.schema import TriplesQueryRequest, Term, IRI, LITERAL
mock_tg_instance = MagicMock()
mock_trustgraph.return_value = mock_tg_instance
mock_kg_class.return_value = mock_tg_instance
mock_result = MagicMock()
mock_result.s = 'result_subject'
mock_result.p = 'result_predicate'
mock_result.g = ''
mock_result.otype = None
mock_result.dtype = None
mock_result.lang = None
mock_tg_instance.get_o.return_value = [mock_result]
processor = Processor(taskgroup=MagicMock())
query = TriplesQueryRequest(
user='test_user',
collection='test_collection',
@ -268,9 +288,9 @@ class TestCassandraQueryProcessor:
o=Term(type=LITERAL, value='test_object'),
limit=75
)
result = await processor.query_triples(query)
mock_tg_instance.get_o.assert_called_once_with('test_collection', 'test_object', g=None, limit=75)
assert len(result) == 1
assert result[0].s.value == 'result_subject'
@ -278,22 +298,26 @@ class TestCassandraQueryProcessor:
assert result[0].o.value == 'test_object'
@pytest.mark.asyncio
@patch('trustgraph.query.triples.cassandra.service.KnowledgeGraph')
async def test_query_triples_get_all_pattern(self, mock_trustgraph):
@patch('trustgraph.query.triples.cassandra.service.EntityCentricKnowledgeGraph')
async def test_query_triples_get_all_pattern(self, mock_kg_class):
"""Test query pattern with no constraints (get all)"""
from trustgraph.schema import TriplesQueryRequest
mock_tg_instance = MagicMock()
mock_trustgraph.return_value = mock_tg_instance
mock_kg_class.return_value = mock_tg_instance
mock_result = MagicMock()
mock_result.s = 'all_subject'
mock_result.p = 'all_predicate'
mock_result.o = 'all_object'
mock_result.g = ''
mock_result.otype = None
mock_result.dtype = None
mock_result.lang = None
mock_tg_instance.get_all.return_value = [mock_result]
processor = Processor(taskgroup=MagicMock())
query = TriplesQueryRequest(
user='test_user',
collection='test_collection',
@ -302,9 +326,9 @@ class TestCassandraQueryProcessor:
o=None,
limit=1000
)
result = await processor.query_triples(query)
mock_tg_instance.get_all.assert_called_once_with('test_collection', limit=1000)
assert len(result) == 1
assert result[0].s.value == 'all_subject'
@ -378,16 +402,20 @@ class TestCassandraQueryProcessor:
mock_launch.assert_called_once_with(default_ident, '\nTriples query service. Input is a (s, p, o, g) quad pattern, some values may be\nnull. Output is a list of quads.\n')
@pytest.mark.asyncio
@patch('trustgraph.query.triples.cassandra.service.KnowledgeGraph')
async def test_query_triples_with_authentication(self, mock_trustgraph):
@patch('trustgraph.query.triples.cassandra.service.EntityCentricKnowledgeGraph')
async def test_query_triples_with_authentication(self, mock_kg_class):
"""Test querying with username and password authentication"""
from trustgraph.schema import TriplesQueryRequest, Term, IRI, LITERAL
mock_tg_instance = MagicMock()
mock_trustgraph.return_value = mock_tg_instance
mock_kg_class.return_value = mock_tg_instance
# SPO query returns a list of results
mock_result = MagicMock()
mock_result.g = None
mock_result.g = ''
mock_result.otype = None
mock_result.dtype = None
mock_result.lang = None
mock_result.o = 'test_object'
mock_tg_instance.get_spo.return_value = [mock_result]
processor = Processor(
@ -395,7 +423,7 @@ class TestCassandraQueryProcessor:
cassandra_username='authuser',
cassandra_password='authpass'
)
query = TriplesQueryRequest(
user='test_user',
collection='test_collection',
@ -404,11 +432,11 @@ class TestCassandraQueryProcessor:
o=Term(type=LITERAL, value='test_object'),
limit=100
)
await processor.query_triples(query)
# Verify KnowledgeGraph was created with authentication
mock_trustgraph.assert_called_once_with(
mock_kg_class.assert_called_once_with(
hosts=['cassandra'], # Updated default
keyspace='test_user',
username='authuser',
@ -416,16 +444,20 @@ class TestCassandraQueryProcessor:
)
@pytest.mark.asyncio
@patch('trustgraph.query.triples.cassandra.service.KnowledgeGraph')
async def test_query_triples_table_reuse(self, mock_trustgraph):
@patch('trustgraph.query.triples.cassandra.service.EntityCentricKnowledgeGraph')
async def test_query_triples_table_reuse(self, mock_kg_class):
"""Test that TrustGraph is reused for same table"""
from trustgraph.schema import TriplesQueryRequest, Term, IRI, LITERAL
mock_tg_instance = MagicMock()
mock_trustgraph.return_value = mock_tg_instance
mock_kg_class.return_value = mock_tg_instance
# SPO query returns a list of results
mock_result = MagicMock()
mock_result.g = None
mock_result.g = ''
mock_result.otype = None
mock_result.dtype = None
mock_result.lang = None
mock_result.o = 'test_object'
mock_tg_instance.get_spo.return_value = [mock_result]
processor = Processor(taskgroup=MagicMock())
@ -441,24 +473,35 @@ class TestCassandraQueryProcessor:
# First query should create TrustGraph
await processor.query_triples(query)
assert mock_trustgraph.call_count == 1
assert mock_kg_class.call_count == 1
# Second query with same table should reuse TrustGraph
await processor.query_triples(query)
assert mock_trustgraph.call_count == 1 # Should not increase
assert mock_kg_class.call_count == 1 # Should not increase
@pytest.mark.asyncio
@patch('trustgraph.query.triples.cassandra.service.KnowledgeGraph')
async def test_query_triples_table_switching(self, mock_trustgraph):
@patch('trustgraph.query.triples.cassandra.service.EntityCentricKnowledgeGraph')
async def test_query_triples_table_switching(self, mock_kg_class):
"""Test table switching creates new TrustGraph"""
from trustgraph.schema import TriplesQueryRequest, Term, IRI, LITERAL
mock_tg_instance1 = MagicMock()
mock_tg_instance2 = MagicMock()
mock_trustgraph.side_effect = [mock_tg_instance1, mock_tg_instance2]
mock_kg_class.side_effect = [mock_tg_instance1, mock_tg_instance2]
# Setup mock results for both instances
mock_result = MagicMock()
mock_result.g = ''
mock_result.otype = None
mock_result.dtype = None
mock_result.lang = None
mock_result.p = 'p'
mock_result.o = 'o'
mock_tg_instance1.get_s.return_value = [mock_result]
mock_tg_instance2.get_s.return_value = [mock_result]
processor = Processor(taskgroup=MagicMock())
# First query
query1 = TriplesQueryRequest(
user='user1',
@ -468,10 +511,10 @@ class TestCassandraQueryProcessor:
o=None,
limit=100
)
await processor.query_triples(query1)
assert processor.table == 'user1'
# Second query with different table
query2 = TriplesQueryRequest(
user='user2',
@ -481,25 +524,25 @@ class TestCassandraQueryProcessor:
o=None,
limit=100
)
await processor.query_triples(query2)
assert processor.table == 'user2'
# Verify TrustGraph was created twice
assert mock_trustgraph.call_count == 2
assert mock_kg_class.call_count == 2
@pytest.mark.asyncio
@patch('trustgraph.query.triples.cassandra.service.KnowledgeGraph')
async def test_query_triples_exception_handling(self, mock_trustgraph):
@patch('trustgraph.query.triples.cassandra.service.EntityCentricKnowledgeGraph')
async def test_query_triples_exception_handling(self, mock_kg_class):
"""Test exception handling during query execution"""
from trustgraph.schema import TriplesQueryRequest, Term, IRI, LITERAL
mock_tg_instance = MagicMock()
mock_trustgraph.return_value = mock_tg_instance
mock_kg_class.return_value = mock_tg_instance
mock_tg_instance.get_spo.side_effect = Exception("Query failed")
processor = Processor(taskgroup=MagicMock())
query = TriplesQueryRequest(
user='test_user',
collection='test_collection',
@ -508,28 +551,36 @@ class TestCassandraQueryProcessor:
o=Term(type=LITERAL, value='test_object'),
limit=100
)
with pytest.raises(Exception, match="Query failed"):
await processor.query_triples(query)
@pytest.mark.asyncio
@patch('trustgraph.query.triples.cassandra.service.KnowledgeGraph')
async def test_query_triples_multiple_results(self, mock_trustgraph):
@patch('trustgraph.query.triples.cassandra.service.EntityCentricKnowledgeGraph')
async def test_query_triples_multiple_results(self, mock_kg_class):
"""Test query returning multiple results"""
from trustgraph.schema import TriplesQueryRequest, Term, IRI, LITERAL
mock_tg_instance = MagicMock()
mock_trustgraph.return_value = mock_tg_instance
mock_kg_class.return_value = mock_tg_instance
# Mock multiple results
mock_result1 = MagicMock()
mock_result1.o = 'object1'
mock_result1.g = ''
mock_result1.otype = None
mock_result1.dtype = None
mock_result1.lang = None
mock_result2 = MagicMock()
mock_result2.o = 'object2'
mock_result2.g = ''
mock_result2.otype = None
mock_result2.dtype = None
mock_result2.lang = None
mock_tg_instance.get_sp.return_value = [mock_result1, mock_result2]
processor = Processor(taskgroup=MagicMock())
query = TriplesQueryRequest(
user='test_user',
collection='test_collection',
@ -538,9 +589,9 @@ class TestCassandraQueryProcessor:
o=None,
limit=100
)
result = await processor.query_triples(query)
assert len(result) == 2
assert result[0].o.value == 'object1'
assert result[1].o.value == 'object2'
@ -550,16 +601,20 @@ class TestCassandraQueryPerformanceOptimizations:
"""Test cases for multi-table performance optimizations in query service"""
@pytest.mark.asyncio
@patch('trustgraph.query.triples.cassandra.service.KnowledgeGraph')
async def test_get_po_query_optimization(self, mock_trustgraph):
@patch('trustgraph.query.triples.cassandra.service.EntityCentricKnowledgeGraph')
async def test_get_po_query_optimization(self, mock_kg_class):
"""Test that get_po queries use optimized table (no ALLOW FILTERING)"""
from trustgraph.schema import TriplesQueryRequest, Term, IRI, LITERAL
mock_tg_instance = MagicMock()
mock_trustgraph.return_value = mock_tg_instance
mock_kg_class.return_value = mock_tg_instance
mock_result = MagicMock()
mock_result.s = 'result_subject'
mock_result.g = ''
mock_result.otype = None
mock_result.dtype = None
mock_result.lang = None
mock_tg_instance.get_po.return_value = [mock_result]
processor = Processor(taskgroup=MagicMock())
@ -587,16 +642,20 @@ class TestCassandraQueryPerformanceOptimizations:
assert result[0].o.value == 'test_object'
@pytest.mark.asyncio
@patch('trustgraph.query.triples.cassandra.service.KnowledgeGraph')
async def test_get_os_query_optimization(self, mock_trustgraph):
@patch('trustgraph.query.triples.cassandra.service.EntityCentricKnowledgeGraph')
async def test_get_os_query_optimization(self, mock_kg_class):
"""Test that get_os queries use optimized table (no ALLOW FILTERING)"""
from trustgraph.schema import TriplesQueryRequest, Term, IRI, LITERAL
mock_tg_instance = MagicMock()
mock_trustgraph.return_value = mock_tg_instance
mock_kg_class.return_value = mock_tg_instance
mock_result = MagicMock()
mock_result.p = 'result_predicate'
mock_result.g = ''
mock_result.otype = None
mock_result.dtype = None
mock_result.lang = None
mock_tg_instance.get_os.return_value = [mock_result]
processor = Processor(taskgroup=MagicMock())
@ -624,13 +683,13 @@ class TestCassandraQueryPerformanceOptimizations:
assert result[0].o.value == 'test_object'
@pytest.mark.asyncio
@patch('trustgraph.query.triples.cassandra.service.KnowledgeGraph')
async def test_all_query_patterns_use_correct_tables(self, mock_trustgraph):
@patch('trustgraph.query.triples.cassandra.service.EntityCentricKnowledgeGraph')
async def test_all_query_patterns_use_correct_tables(self, mock_kg_class):
"""Test that all query patterns route to their optimal tables"""
from trustgraph.schema import TriplesQueryRequest, Term, IRI, LITERAL
mock_tg_instance = MagicMock()
mock_trustgraph.return_value = mock_tg_instance
mock_kg_class.return_value = mock_tg_instance
# Mock empty results for all queries
mock_tg_instance.get_all.return_value = []
@ -696,19 +755,23 @@ class TestCassandraQueryPerformanceOptimizations:
# Mode is determined in KnowledgeGraph initialization
@pytest.mark.asyncio
@patch('trustgraph.query.triples.cassandra.service.KnowledgeGraph')
async def test_performance_critical_po_query_no_filtering(self, mock_trustgraph):
@patch('trustgraph.query.triples.cassandra.service.EntityCentricKnowledgeGraph')
async def test_performance_critical_po_query_no_filtering(self, mock_kg_class):
"""Test the performance-critical PO query that eliminates ALLOW FILTERING"""
from trustgraph.schema import TriplesQueryRequest, Term, IRI, LITERAL
mock_tg_instance = MagicMock()
mock_trustgraph.return_value = mock_tg_instance
mock_kg_class.return_value = mock_tg_instance
# Mock multiple subjects for the same predicate-object pair
mock_results = []
for i in range(5):
mock_result = MagicMock()
mock_result.s = f'subject_{i}'
mock_result.g = ''
mock_result.otype = None
mock_result.dtype = None
mock_result.lang = None
mock_results.append(mock_result)
mock_tg_instance.get_po.return_value = mock_results

View file

@ -6,7 +6,7 @@ import pytest
from unittest.mock import MagicMock, patch, AsyncMock
from trustgraph.storage.triples.cassandra.write import Processor
from trustgraph.schema import Triple, LITERAL
from trustgraph.schema import Triple, LITERAL, IRI
from trustgraph.direct.cassandra_kg import DEFAULT_GRAPH
@ -87,29 +87,29 @@ class TestCassandraStorageProcessor:
assert processor.cassandra_username == 'new-user' # Only cassandra_* params work
@pytest.mark.asyncio
@patch('trustgraph.storage.triples.cassandra.write.KnowledgeGraph')
async def test_table_switching_with_auth(self, mock_trustgraph):
@patch('trustgraph.storage.triples.cassandra.write.EntityCentricKnowledgeGraph')
async def test_table_switching_with_auth(self, mock_kg_class):
"""Test table switching logic when authentication is provided"""
taskgroup_mock = MagicMock()
mock_tg_instance = MagicMock()
mock_trustgraph.return_value = mock_tg_instance
mock_kg_class.return_value = mock_tg_instance
processor = Processor(
taskgroup=taskgroup_mock,
cassandra_username='testuser',
cassandra_password='testpass'
)
# Create mock message
mock_message = MagicMock()
mock_message.metadata.user = 'user1'
mock_message.metadata.collection = 'collection1'
mock_message.triples = []
await processor.store_triples(mock_message)
# Verify KnowledgeGraph was called with auth parameters
mock_trustgraph.assert_called_once_with(
mock_kg_class.assert_called_once_with(
hosts=['cassandra'], # Updated default
keyspace='user1',
username='testuser',
@ -118,81 +118,89 @@ class TestCassandraStorageProcessor:
assert processor.table == 'user1'
@pytest.mark.asyncio
@patch('trustgraph.storage.triples.cassandra.write.KnowledgeGraph')
async def test_table_switching_without_auth(self, mock_trustgraph):
@patch('trustgraph.storage.triples.cassandra.write.EntityCentricKnowledgeGraph')
async def test_table_switching_without_auth(self, mock_kg_class):
"""Test table switching logic when no authentication is provided"""
taskgroup_mock = MagicMock()
mock_tg_instance = MagicMock()
mock_trustgraph.return_value = mock_tg_instance
mock_kg_class.return_value = mock_tg_instance
processor = Processor(taskgroup=taskgroup_mock)
# Create mock message
mock_message = MagicMock()
mock_message.metadata.user = 'user2'
mock_message.metadata.collection = 'collection2'
mock_message.triples = []
await processor.store_triples(mock_message)
# Verify KnowledgeGraph was called without auth parameters
mock_trustgraph.assert_called_once_with(
mock_kg_class.assert_called_once_with(
hosts=['cassandra'], # Updated default
keyspace='user2'
)
assert processor.table == 'user2'
@pytest.mark.asyncio
@patch('trustgraph.storage.triples.cassandra.write.KnowledgeGraph')
async def test_table_reuse_when_same(self, mock_trustgraph):
@patch('trustgraph.storage.triples.cassandra.write.EntityCentricKnowledgeGraph')
async def test_table_reuse_when_same(self, mock_kg_class):
"""Test that TrustGraph is not recreated when table hasn't changed"""
taskgroup_mock = MagicMock()
mock_tg_instance = MagicMock()
mock_trustgraph.return_value = mock_tg_instance
mock_kg_class.return_value = mock_tg_instance
processor = Processor(taskgroup=taskgroup_mock)
# Create mock message
mock_message = MagicMock()
mock_message.metadata.user = 'user1'
mock_message.metadata.collection = 'collection1'
mock_message.triples = []
# First call should create TrustGraph
await processor.store_triples(mock_message)
assert mock_trustgraph.call_count == 1
assert mock_kg_class.call_count == 1
# Second call with same table should reuse TrustGraph
await processor.store_triples(mock_message)
assert mock_trustgraph.call_count == 1 # Should not increase
assert mock_kg_class.call_count == 1 # Should not increase
@pytest.mark.asyncio
@patch('trustgraph.storage.triples.cassandra.write.KnowledgeGraph')
async def test_triple_insertion(self, mock_trustgraph):
@patch('trustgraph.storage.triples.cassandra.write.EntityCentricKnowledgeGraph')
async def test_triple_insertion(self, mock_kg_class):
"""Test that triples are properly inserted into Cassandra"""
taskgroup_mock = MagicMock()
mock_tg_instance = MagicMock()
mock_trustgraph.return_value = mock_tg_instance
mock_kg_class.return_value = mock_tg_instance
processor = Processor(taskgroup=taskgroup_mock)
# Create mock triples with proper Term structure
triple1 = MagicMock()
triple1.s.type = LITERAL
triple1.s.value = 'subject1'
triple1.s.datatype = ''
triple1.s.language = ''
triple1.p.type = LITERAL
triple1.p.value = 'predicate1'
triple1.o.type = LITERAL
triple1.o.value = 'object1'
triple1.o.datatype = ''
triple1.o.language = ''
triple1.g = None
triple2 = MagicMock()
triple2.s.type = LITERAL
triple2.s.value = 'subject2'
triple2.s.datatype = ''
triple2.s.language = ''
triple2.p.type = LITERAL
triple2.p.value = 'predicate2'
triple2.o.type = LITERAL
triple2.o.value = 'object2'
triple2.o.datatype = ''
triple2.o.language = ''
triple2.g = None
# Create mock message
@ -203,51 +211,57 @@ class TestCassandraStorageProcessor:
await processor.store_triples(mock_message)
# Verify both triples were inserted (with g= parameter)
# Verify both triples were inserted (with g=, otype=, dtype=, lang= parameters)
assert mock_tg_instance.insert.call_count == 2
mock_tg_instance.insert.assert_any_call('collection1', 'subject1', 'predicate1', 'object1', g=DEFAULT_GRAPH)
mock_tg_instance.insert.assert_any_call('collection1', 'subject2', 'predicate2', 'object2', g=DEFAULT_GRAPH)
mock_tg_instance.insert.assert_any_call(
'collection1', 'subject1', 'predicate1', 'object1',
g=DEFAULT_GRAPH, otype='l', dtype='', lang=''
)
mock_tg_instance.insert.assert_any_call(
'collection1', 'subject2', 'predicate2', 'object2',
g=DEFAULT_GRAPH, otype='l', dtype='', lang=''
)
@pytest.mark.asyncio
@patch('trustgraph.storage.triples.cassandra.write.KnowledgeGraph')
async def test_triple_insertion_with_empty_list(self, mock_trustgraph):
@patch('trustgraph.storage.triples.cassandra.write.EntityCentricKnowledgeGraph')
async def test_triple_insertion_with_empty_list(self, mock_kg_class):
"""Test behavior when message has no triples"""
taskgroup_mock = MagicMock()
mock_tg_instance = MagicMock()
mock_trustgraph.return_value = mock_tg_instance
mock_kg_class.return_value = mock_tg_instance
processor = Processor(taskgroup=taskgroup_mock)
# Create mock message with empty triples
mock_message = MagicMock()
mock_message.metadata.user = 'user1'
mock_message.metadata.collection = 'collection1'
mock_message.triples = []
await processor.store_triples(mock_message)
# Verify no triples were inserted
mock_tg_instance.insert.assert_not_called()
@pytest.mark.asyncio
@patch('trustgraph.storage.triples.cassandra.write.KnowledgeGraph')
@patch('trustgraph.storage.triples.cassandra.write.EntityCentricKnowledgeGraph')
@patch('trustgraph.storage.triples.cassandra.write.time.sleep')
async def test_exception_handling_with_retry(self, mock_sleep, mock_trustgraph):
async def test_exception_handling_with_retry(self, mock_sleep, mock_kg_class):
"""Test exception handling during TrustGraph creation"""
taskgroup_mock = MagicMock()
mock_trustgraph.side_effect = Exception("Connection failed")
mock_kg_class.side_effect = Exception("Connection failed")
processor = Processor(taskgroup=taskgroup_mock)
# Create mock message
mock_message = MagicMock()
mock_message.metadata.user = 'user1'
mock_message.metadata.collection = 'collection1'
mock_message.triples = []
with pytest.raises(Exception, match="Connection failed"):
await processor.store_triples(mock_message)
# Verify sleep was called before re-raising
mock_sleep.assert_called_once_with(1)
@ -335,57 +349,61 @@ class TestCassandraStorageProcessor:
mock_launch.assert_called_once_with(default_ident, '\nGraph writer. Input is graph edge. Writes edges to Cassandra graph.\n')
@pytest.mark.asyncio
@patch('trustgraph.storage.triples.cassandra.write.KnowledgeGraph')
async def test_store_triples_table_switching_between_different_tables(self, mock_trustgraph):
@patch('trustgraph.storage.triples.cassandra.write.EntityCentricKnowledgeGraph')
async def test_store_triples_table_switching_between_different_tables(self, mock_kg_class):
"""Test table switching when different tables are used in sequence"""
taskgroup_mock = MagicMock()
mock_tg_instance1 = MagicMock()
mock_tg_instance2 = MagicMock()
mock_trustgraph.side_effect = [mock_tg_instance1, mock_tg_instance2]
mock_kg_class.side_effect = [mock_tg_instance1, mock_tg_instance2]
processor = Processor(taskgroup=taskgroup_mock)
# First message with table1
mock_message1 = MagicMock()
mock_message1.metadata.user = 'user1'
mock_message1.metadata.collection = 'collection1'
mock_message1.triples = []
await processor.store_triples(mock_message1)
assert processor.table == 'user1'
assert processor.tg == mock_tg_instance1
# Second message with different table
mock_message2 = MagicMock()
mock_message2.metadata.user = 'user2'
mock_message2.metadata.collection = 'collection2'
mock_message2.triples = []
await processor.store_triples(mock_message2)
assert processor.table == 'user2'
assert processor.tg == mock_tg_instance2
# Verify TrustGraph was created twice for different tables
assert mock_trustgraph.call_count == 2
assert mock_kg_class.call_count == 2
@pytest.mark.asyncio
@patch('trustgraph.storage.triples.cassandra.write.KnowledgeGraph')
async def test_store_triples_with_special_characters_in_values(self, mock_trustgraph):
@patch('trustgraph.storage.triples.cassandra.write.EntityCentricKnowledgeGraph')
async def test_store_triples_with_special_characters_in_values(self, mock_kg_class):
"""Test storing triples with special characters and unicode"""
taskgroup_mock = MagicMock()
mock_tg_instance = MagicMock()
mock_trustgraph.return_value = mock_tg_instance
mock_kg_class.return_value = mock_tg_instance
processor = Processor(taskgroup=taskgroup_mock)
# Create triple with special characters and proper Term structure
triple = MagicMock()
triple.s.type = LITERAL
triple.s.value = 'subject with spaces & symbols'
triple.s.datatype = ''
triple.s.language = ''
triple.p.type = LITERAL
triple.p.value = 'predicate:with/colons'
triple.o.type = LITERAL
triple.o.value = 'object with "quotes" and unicode: ñáéíóú'
triple.o.datatype = ''
triple.o.language = ''
triple.g = None
mock_message = MagicMock()
@ -401,31 +419,34 @@ class TestCassandraStorageProcessor:
'subject with spaces & symbols',
'predicate:with/colons',
'object with "quotes" and unicode: ñáéíóú',
g=DEFAULT_GRAPH
g=DEFAULT_GRAPH,
otype='l',
dtype='',
lang=''
)
@pytest.mark.asyncio
@patch('trustgraph.storage.triples.cassandra.write.KnowledgeGraph')
async def test_store_triples_preserves_old_table_on_exception(self, mock_trustgraph):
@patch('trustgraph.storage.triples.cassandra.write.EntityCentricKnowledgeGraph')
async def test_store_triples_preserves_old_table_on_exception(self, mock_kg_class):
"""Test that table remains unchanged when TrustGraph creation fails"""
taskgroup_mock = MagicMock()
processor = Processor(taskgroup=taskgroup_mock)
# Set an initial table
processor.table = ('old_user', 'old_collection')
# Mock TrustGraph to raise exception
mock_trustgraph.side_effect = Exception("Connection failed")
mock_kg_class.side_effect = Exception("Connection failed")
mock_message = MagicMock()
mock_message.metadata.user = 'new_user'
mock_message.metadata.collection = 'new_collection'
mock_message.triples = []
with pytest.raises(Exception, match="Connection failed"):
await processor.store_triples(mock_message)
# Table should remain unchanged since self.table = table happens after try/except
assert processor.table == ('old_user', 'old_collection')
# TrustGraph should be set to None though
@ -436,12 +457,12 @@ class TestCassandraPerformanceOptimizations:
"""Test cases for multi-table performance optimizations"""
@pytest.mark.asyncio
@patch('trustgraph.storage.triples.cassandra.write.KnowledgeGraph')
async def test_legacy_mode_uses_single_table(self, mock_trustgraph):
@patch('trustgraph.storage.triples.cassandra.write.EntityCentricKnowledgeGraph')
async def test_legacy_mode_uses_single_table(self, mock_kg_class):
"""Test that legacy mode still works with single table"""
taskgroup_mock = MagicMock()
mock_tg_instance = MagicMock()
mock_trustgraph.return_value = mock_tg_instance
mock_kg_class.return_value = mock_tg_instance
with patch.dict('os.environ', {'CASSANDRA_USE_LEGACY': 'true'}):
processor = Processor(taskgroup=taskgroup_mock)
@ -454,16 +475,15 @@ class TestCassandraPerformanceOptimizations:
await processor.store_triples(mock_message)
# Verify KnowledgeGraph instance uses legacy mode
kg_instance = mock_trustgraph.return_value
assert kg_instance is not None
assert mock_tg_instance is not None
@pytest.mark.asyncio
@patch('trustgraph.storage.triples.cassandra.write.KnowledgeGraph')
async def test_optimized_mode_uses_multi_table(self, mock_trustgraph):
@patch('trustgraph.storage.triples.cassandra.write.EntityCentricKnowledgeGraph')
async def test_optimized_mode_uses_multi_table(self, mock_kg_class):
"""Test that optimized mode uses multi-table schema"""
taskgroup_mock = MagicMock()
mock_tg_instance = MagicMock()
mock_trustgraph.return_value = mock_tg_instance
mock_kg_class.return_value = mock_tg_instance
with patch.dict('os.environ', {'CASSANDRA_USE_LEGACY': 'false'}):
processor = Processor(taskgroup=taskgroup_mock)
@ -476,16 +496,15 @@ class TestCassandraPerformanceOptimizations:
await processor.store_triples(mock_message)
# Verify KnowledgeGraph instance is in optimized mode
kg_instance = mock_trustgraph.return_value
assert kg_instance is not None
assert mock_tg_instance is not None
@pytest.mark.asyncio
@patch('trustgraph.storage.triples.cassandra.write.KnowledgeGraph')
async def test_batch_write_consistency(self, mock_trustgraph):
@patch('trustgraph.storage.triples.cassandra.write.EntityCentricKnowledgeGraph')
async def test_batch_write_consistency(self, mock_kg_class):
"""Test that all tables stay consistent during batch writes"""
taskgroup_mock = MagicMock()
mock_tg_instance = MagicMock()
mock_trustgraph.return_value = mock_tg_instance
mock_kg_class.return_value = mock_tg_instance
processor = Processor(taskgroup=taskgroup_mock)
@ -493,10 +512,14 @@ class TestCassandraPerformanceOptimizations:
triple = MagicMock()
triple.s.type = LITERAL
triple.s.value = 'test_subject'
triple.s.datatype = ''
triple.s.language = ''
triple.p.type = LITERAL
triple.p.value = 'test_predicate'
triple.o.type = LITERAL
triple.o.value = 'test_object'
triple.o.datatype = ''
triple.o.language = ''
triple.g = None
mock_message = MagicMock()
@ -509,7 +532,7 @@ class TestCassandraPerformanceOptimizations:
# Verify insert was called for the triple (implementation details tested in KnowledgeGraph)
mock_tg_instance.insert.assert_called_once_with(
'collection1', 'test_subject', 'test_predicate', 'test_object',
g=DEFAULT_GRAPH
g=DEFAULT_GRAPH, otype='l', dtype='', lang=''
)
def test_environment_variable_controls_mode(self):