Release 1.4 -> master (#524)

Catch up
This commit is contained in:
cybermaggedon 2025-09-20 16:00:37 +01:00 committed by GitHub
parent a8e437fc7f
commit 6c7af8789d
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
216 changed files with 31360 additions and 1611 deletions

View file

@ -0,0 +1,429 @@
"""
Integration tests for Cassandra configuration in processors.
Tests that processors correctly use the configuration helper
and handle environment variables, CLI args, and backward compatibility.
"""
import os
import pytest
from unittest.mock import Mock, patch, MagicMock
from trustgraph.storage.triples.cassandra.write import Processor as TriplesWriter
from trustgraph.storage.objects.cassandra.write import Processor as ObjectsWriter
from trustgraph.query.triples.cassandra.service import Processor as TriplesQuery
from trustgraph.storage.knowledge.store import Processor as KgStore
class TestTriplesWriterConfiguration:
"""Test Cassandra configuration in triples writer processor."""
@patch('trustgraph.direct.cassandra_kg.KnowledgeGraph')
def test_environment_variable_configuration(self, mock_trust_graph):
"""Test processor picks up configuration from environment variables."""
env_vars = {
'CASSANDRA_HOST': 'env-host1,env-host2',
'CASSANDRA_USERNAME': 'env-user',
'CASSANDRA_PASSWORD': 'env-pass'
}
with patch.dict(os.environ, env_vars, clear=True):
processor = TriplesWriter(taskgroup=MagicMock())
assert processor.cassandra_host == ['env-host1', 'env-host2']
assert processor.cassandra_username == 'env-user'
assert processor.cassandra_password == 'env-pass'
@patch('trustgraph.direct.cassandra_kg.KnowledgeGraph')
def test_parameter_override_environment(self, mock_trust_graph):
"""Test explicit parameters override environment variables."""
env_vars = {
'CASSANDRA_HOST': 'env-host',
'CASSANDRA_USERNAME': 'env-user',
'CASSANDRA_PASSWORD': 'env-pass'
}
with patch.dict(os.environ, env_vars, clear=True):
processor = TriplesWriter(
taskgroup=MagicMock(),
cassandra_host='param-host1,param-host2',
cassandra_username='param-user',
cassandra_password='param-pass'
)
assert processor.cassandra_host == ['param-host1', 'param-host2']
assert processor.cassandra_username == 'param-user'
assert processor.cassandra_password == 'param-pass'
@patch('trustgraph.direct.cassandra_kg.KnowledgeGraph')
def test_no_backward_compatibility_graph_params(self, mock_trust_graph):
"""Test that old graph_* parameter names are no longer supported."""
processor = TriplesWriter(
taskgroup=MagicMock(),
graph_host='compat-host',
graph_username='compat-user',
graph_password='compat-pass'
)
# Should use defaults since graph_* params are not recognized
assert processor.cassandra_host == ['cassandra'] # Default
assert processor.cassandra_username is None
assert processor.cassandra_password is None
@patch('trustgraph.direct.cassandra_kg.KnowledgeGraph')
def test_default_configuration(self, mock_trust_graph):
"""Test default configuration when no params or env vars provided."""
with patch.dict(os.environ, {}, clear=True):
processor = TriplesWriter(taskgroup=MagicMock())
assert processor.cassandra_host == ['cassandra']
assert processor.cassandra_username is None
assert processor.cassandra_password is None
class TestObjectsWriterConfiguration:
"""Test Cassandra configuration in objects writer processor."""
@patch('trustgraph.storage.objects.cassandra.write.Cluster')
def test_environment_variable_configuration(self, mock_cluster):
"""Test processor picks up configuration from environment variables."""
env_vars = {
'CASSANDRA_HOST': 'obj-env-host1,obj-env-host2',
'CASSANDRA_USERNAME': 'obj-env-user',
'CASSANDRA_PASSWORD': 'obj-env-pass'
}
mock_cluster_instance = MagicMock()
mock_cluster.return_value = mock_cluster_instance
with patch.dict(os.environ, env_vars, clear=True):
processor = ObjectsWriter(taskgroup=MagicMock())
assert processor.cassandra_host == ['obj-env-host1', 'obj-env-host2']
assert processor.cassandra_username == 'obj-env-user'
assert processor.cassandra_password == 'obj-env-pass'
@patch('trustgraph.storage.objects.cassandra.write.Cluster')
def test_cassandra_connection_with_hosts_list(self, mock_cluster):
"""Test that Cassandra connection uses hosts list correctly."""
env_vars = {
'CASSANDRA_HOST': 'conn-host1,conn-host2,conn-host3',
'CASSANDRA_USERNAME': 'conn-user',
'CASSANDRA_PASSWORD': 'conn-pass'
}
mock_cluster_instance = MagicMock()
mock_session = MagicMock()
mock_cluster_instance.connect.return_value = mock_session
mock_cluster.return_value = mock_cluster_instance
with patch.dict(os.environ, env_vars, clear=True):
processor = ObjectsWriter(taskgroup=MagicMock())
processor.connect_cassandra()
# Verify cluster was called with hosts list
mock_cluster.assert_called_once()
call_args = mock_cluster.call_args
# Check that contact_points was passed the hosts list
assert 'contact_points' in call_args.kwargs
assert call_args.kwargs['contact_points'] == ['conn-host1', 'conn-host2', 'conn-host3']
@patch('trustgraph.storage.objects.cassandra.write.Cluster')
@patch('trustgraph.storage.objects.cassandra.write.PlainTextAuthProvider')
def test_authentication_configuration(self, mock_auth_provider, mock_cluster):
"""Test authentication is configured when credentials are provided."""
env_vars = {
'CASSANDRA_HOST': 'auth-host',
'CASSANDRA_USERNAME': 'auth-user',
'CASSANDRA_PASSWORD': 'auth-pass'
}
mock_auth_instance = MagicMock()
mock_auth_provider.return_value = mock_auth_instance
mock_cluster_instance = MagicMock()
mock_cluster.return_value = mock_cluster_instance
with patch.dict(os.environ, env_vars, clear=True):
processor = ObjectsWriter(taskgroup=MagicMock())
processor.connect_cassandra()
# Verify auth provider was created with correct credentials
mock_auth_provider.assert_called_once_with(
username='auth-user',
password='auth-pass'
)
# Verify cluster was configured with auth provider
call_args = mock_cluster.call_args
assert 'auth_provider' in call_args.kwargs
assert call_args.kwargs['auth_provider'] == mock_auth_instance
class TestTriplesQueryConfiguration:
"""Test Cassandra configuration in triples query processor."""
@patch('trustgraph.direct.cassandra_kg.KnowledgeGraph')
def test_environment_variable_configuration(self, mock_trust_graph):
"""Test processor picks up configuration from environment variables."""
env_vars = {
'CASSANDRA_HOST': 'query-env-host1,query-env-host2',
'CASSANDRA_USERNAME': 'query-env-user',
'CASSANDRA_PASSWORD': 'query-env-pass'
}
with patch.dict(os.environ, env_vars, clear=True):
processor = TriplesQuery(taskgroup=MagicMock())
assert processor.cassandra_host == ['query-env-host1', 'query-env-host2']
assert processor.cassandra_username == 'query-env-user'
assert processor.cassandra_password == 'query-env-pass'
@patch('trustgraph.direct.cassandra_kg.KnowledgeGraph')
def test_only_new_parameters_work(self, mock_trust_graph):
"""Test that only new parameters work."""
processor = TriplesQuery(
taskgroup=MagicMock(),
cassandra_host='new-host',
graph_host='old-host', # Should be ignored
cassandra_username='new-user',
graph_username='old-user' # Should be ignored
)
# Only new parameters should work
assert processor.cassandra_host == ['new-host']
assert processor.cassandra_username == 'new-user'
class TestKgStoreConfiguration:
"""Test Cassandra configuration in knowledge store processor."""
@patch('trustgraph.storage.knowledge.store.KnowledgeTableStore')
def test_environment_variable_configuration(self, mock_table_store):
"""Test kg-store picks up configuration from environment variables."""
env_vars = {
'CASSANDRA_HOST': 'kg-env-host1,kg-env-host2,kg-env-host3',
'CASSANDRA_USERNAME': 'kg-env-user',
'CASSANDRA_PASSWORD': 'kg-env-pass'
}
mock_store_instance = MagicMock()
mock_table_store.return_value = mock_store_instance
with patch.dict(os.environ, env_vars, clear=True):
processor = KgStore(taskgroup=MagicMock())
# Verify KnowledgeTableStore was called with resolved config
mock_table_store.assert_called_once_with(
cassandra_host=['kg-env-host1', 'kg-env-host2', 'kg-env-host3'],
cassandra_username='kg-env-user',
cassandra_password='kg-env-pass',
keyspace='knowledge'
)
@patch('trustgraph.storage.knowledge.store.KnowledgeTableStore')
def test_explicit_parameters(self, mock_table_store):
"""Test kg-store with explicit parameters."""
mock_store_instance = MagicMock()
mock_table_store.return_value = mock_store_instance
processor = KgStore(
taskgroup=MagicMock(),
cassandra_host='explicit-host',
cassandra_username='explicit-user',
cassandra_password='explicit-pass'
)
# Verify KnowledgeTableStore was called with explicit config
mock_table_store.assert_called_once_with(
cassandra_host=['explicit-host'],
cassandra_username='explicit-user',
cassandra_password='explicit-pass',
keyspace='knowledge'
)
@patch('trustgraph.storage.knowledge.store.KnowledgeTableStore')
def test_no_backward_compatibility_cassandra_user(self, mock_table_store):
"""Test that cassandra_user parameter is no longer supported."""
mock_store_instance = MagicMock()
mock_table_store.return_value = mock_store_instance
processor = KgStore(
taskgroup=MagicMock(),
cassandra_host='compat-host',
cassandra_user='compat-user', # Old parameter name - should be ignored
cassandra_password='compat-pass'
)
# cassandra_user should be ignored
mock_table_store.assert_called_once_with(
cassandra_host=['compat-host'],
cassandra_username=None, # Should be None since cassandra_user is ignored
cassandra_password='compat-pass',
keyspace='knowledge'
)
@patch('trustgraph.storage.knowledge.store.KnowledgeTableStore')
def test_default_configuration(self, mock_table_store):
"""Test kg-store default configuration."""
mock_store_instance = MagicMock()
mock_table_store.return_value = mock_store_instance
with patch.dict(os.environ, {}, clear=True):
processor = KgStore(taskgroup=MagicMock())
# Should use defaults
mock_table_store.assert_called_once_with(
cassandra_host=['cassandra'],
cassandra_username=None,
cassandra_password=None,
keyspace='knowledge'
)
class TestCommandLineArgumentHandling:
"""Test command-line argument parsing in processors."""
def test_triples_writer_add_args(self):
"""Test that triples writer adds standard Cassandra arguments."""
import argparse
from trustgraph.storage.triples.cassandra.write import Processor as TriplesWriter
parser = argparse.ArgumentParser()
TriplesWriter.add_args(parser)
# Parse empty args to check that arguments exist
args = parser.parse_args([])
assert hasattr(args, 'cassandra_host')
assert hasattr(args, 'cassandra_username')
assert hasattr(args, 'cassandra_password')
def test_objects_writer_add_args(self):
"""Test that objects writer adds standard Cassandra arguments."""
import argparse
from trustgraph.storage.objects.cassandra.write import Processor as ObjectsWriter
parser = argparse.ArgumentParser()
ObjectsWriter.add_args(parser)
# Parse empty args to check that arguments exist
args = parser.parse_args([])
assert hasattr(args, 'cassandra_host')
assert hasattr(args, 'cassandra_username')
assert hasattr(args, 'cassandra_password')
assert hasattr(args, 'config_type') # Objects writer specific arg
def test_triples_query_add_args(self):
"""Test that triples query adds standard Cassandra arguments."""
import argparse
from trustgraph.query.triples.cassandra.service import Processor as TriplesQuery
parser = argparse.ArgumentParser()
TriplesQuery.add_args(parser)
# Parse empty args to check that arguments exist
args = parser.parse_args([])
assert hasattr(args, 'cassandra_host')
assert hasattr(args, 'cassandra_username')
assert hasattr(args, 'cassandra_password')
def test_kg_store_add_args(self):
"""Test that kg-store now adds Cassandra arguments (previously missing)."""
import argparse
from trustgraph.storage.knowledge.store import Processor as KgStore
parser = argparse.ArgumentParser()
KgStore.add_args(parser)
# Parse empty args to check that arguments exist
args = parser.parse_args([])
assert hasattr(args, 'cassandra_host')
assert hasattr(args, 'cassandra_username')
assert hasattr(args, 'cassandra_password')
def test_help_text_with_environment_variables(self):
"""Test that help text shows environment variable values."""
import argparse
from trustgraph.storage.triples.cassandra.write import Processor as TriplesWriter
env_vars = {
'CASSANDRA_HOST': 'help-host1,help-host2',
'CASSANDRA_USERNAME': 'help-user',
'CASSANDRA_PASSWORD': 'help-pass'
}
with patch.dict(os.environ, env_vars, clear=True):
parser = argparse.ArgumentParser()
TriplesWriter.add_args(parser)
help_text = parser.format_help()
# Should show environment variable values (except password)
# Help text may have line breaks - argparse breaks long lines
# So check for the components that should be there
assert 'help-' in help_text and 'host1' in help_text
assert 'help-host2' in help_text
assert 'help-user' in help_text
assert '<set>' in help_text # Password should be hidden
assert 'help-pass' not in help_text # Password value not shown
assert '[from CASSANDRA_HOST]' in help_text
# Check key components (may be split across lines by argparse)
assert '[from' in help_text and 'CASSANDRA_USERNAME]' in help_text
assert '[from' in help_text and 'CASSANDRA_PASSWORD]' in help_text
class TestConfigurationPriorityIntegration:
"""Test complete configuration priority chain in processors."""
@patch('trustgraph.direct.cassandra_kg.KnowledgeGraph')
def test_complete_priority_chain(self, mock_trust_graph):
"""Test CLI params > env vars > defaults priority in actual processor."""
env_vars = {
'CASSANDRA_HOST': 'env-host',
'CASSANDRA_USERNAME': 'env-user',
'CASSANDRA_PASSWORD': 'env-pass'
}
with patch.dict(os.environ, env_vars, clear=True):
# Explicit parameters should override environment
processor = TriplesWriter(
taskgroup=MagicMock(),
cassandra_host='cli-host1,cli-host2',
cassandra_username='cli-user'
# Password not provided - should fall back to env
)
assert processor.cassandra_host == ['cli-host1', 'cli-host2'] # From CLI
assert processor.cassandra_username == 'cli-user' # From CLI
assert processor.cassandra_password == 'env-pass' # From env
@patch('trustgraph.storage.knowledge.store.KnowledgeTableStore')
def test_kg_store_priority_chain(self, mock_table_store):
"""Test configuration priority chain in kg-store processor."""
mock_store_instance = MagicMock()
mock_table_store.return_value = mock_store_instance
env_vars = {
'CASSANDRA_HOST': 'env-host1,env-host2',
'CASSANDRA_USERNAME': 'env-user',
'CASSANDRA_PASSWORD': 'env-pass'
}
with patch.dict(os.environ, env_vars, clear=True):
processor = KgStore(
taskgroup=MagicMock(),
cassandra_host='param-host'
# username and password not provided - should use env
)
# Verify correct priority resolution
mock_table_store.assert_called_once_with(
cassandra_host=['param-host'], # From parameter
cassandra_username='env-user', # From environment
cassandra_password='env-pass', # From environment
keyspace='knowledge'
)

View file

@ -91,37 +91,41 @@ class TestMilvusDocEmbeddingsStorageProcessor:
await processor.store_document_embeddings(message)
# Verify insert was called for each vector
# Verify insert was called for each vector with user/collection parameters
expected_calls = [
([0.1, 0.2, 0.3], "Test document content"),
([0.4, 0.5, 0.6], "Test document content"),
([0.1, 0.2, 0.3], "Test document content", 'test_user', 'test_collection'),
([0.4, 0.5, 0.6], "Test document content", 'test_user', 'test_collection'),
]
assert processor.vecstore.insert.call_count == 2
for i, (expected_vec, expected_doc) in enumerate(expected_calls):
for i, (expected_vec, expected_doc, expected_user, expected_collection) in enumerate(expected_calls):
actual_call = processor.vecstore.insert.call_args_list[i]
assert actual_call[0][0] == expected_vec
assert actual_call[0][1] == expected_doc
assert actual_call[0][2] == expected_user
assert actual_call[0][3] == expected_collection
@pytest.mark.asyncio
async def test_store_document_embeddings_multiple_chunks(self, processor, mock_message):
"""Test storing document embeddings for multiple chunks"""
await processor.store_document_embeddings(mock_message)
# Verify insert was called for each vector of each chunk
# Verify insert was called for each vector of each chunk with user/collection parameters
expected_calls = [
# Chunk 1 vectors
([0.1, 0.2, 0.3], "This is the first document chunk"),
([0.4, 0.5, 0.6], "This is the first document chunk"),
([0.1, 0.2, 0.3], "This is the first document chunk", 'test_user', 'test_collection'),
([0.4, 0.5, 0.6], "This is the first document chunk", 'test_user', 'test_collection'),
# Chunk 2 vectors
([0.7, 0.8, 0.9], "This is the second document chunk"),
([0.7, 0.8, 0.9], "This is the second document chunk", 'test_user', 'test_collection'),
]
assert processor.vecstore.insert.call_count == 3
for i, (expected_vec, expected_doc) in enumerate(expected_calls):
for i, (expected_vec, expected_doc, expected_user, expected_collection) in enumerate(expected_calls):
actual_call = processor.vecstore.insert.call_args_list[i]
assert actual_call[0][0] == expected_vec
assert actual_call[0][1] == expected_doc
assert actual_call[0][2] == expected_user
assert actual_call[0][3] == expected_collection
@pytest.mark.asyncio
async def test_store_document_embeddings_empty_chunk(self, processor):
@ -185,9 +189,9 @@ class TestMilvusDocEmbeddingsStorageProcessor:
await processor.store_document_embeddings(message)
# Verify only valid chunk was inserted
# Verify only valid chunk was inserted with user/collection parameters
processor.vecstore.insert.assert_called_once_with(
[0.1, 0.2, 0.3], "Valid document content"
[0.1, 0.2, 0.3], "Valid document content", 'test_user', 'test_collection'
)
@pytest.mark.asyncio
@ -243,18 +247,20 @@ class TestMilvusDocEmbeddingsStorageProcessor:
await processor.store_document_embeddings(message)
# Verify all vectors were inserted regardless of dimension
# Verify all vectors were inserted regardless of dimension with user/collection parameters
expected_calls = [
([0.1, 0.2], "Document with mixed dimensions"),
([0.3, 0.4, 0.5, 0.6], "Document with mixed dimensions"),
([0.7, 0.8, 0.9], "Document with mixed dimensions"),
([0.1, 0.2], "Document with mixed dimensions", 'test_user', 'test_collection'),
([0.3, 0.4, 0.5, 0.6], "Document with mixed dimensions", 'test_user', 'test_collection'),
([0.7, 0.8, 0.9], "Document with mixed dimensions", 'test_user', 'test_collection'),
]
assert processor.vecstore.insert.call_count == 3
for i, (expected_vec, expected_doc) in enumerate(expected_calls):
for i, (expected_vec, expected_doc, expected_user, expected_collection) in enumerate(expected_calls):
actual_call = processor.vecstore.insert.call_args_list[i]
assert actual_call[0][0] == expected_vec
assert actual_call[0][1] == expected_doc
assert actual_call[0][2] == expected_user
assert actual_call[0][3] == expected_collection
@pytest.mark.asyncio
async def test_store_document_embeddings_unicode_content(self, processor):
@ -272,9 +278,9 @@ class TestMilvusDocEmbeddingsStorageProcessor:
await processor.store_document_embeddings(message)
# Verify Unicode content was properly decoded and inserted
# Verify Unicode content was properly decoded and inserted with user/collection parameters
processor.vecstore.insert.assert_called_once_with(
[0.1, 0.2, 0.3], "Document with Unicode: éñ中文🚀"
[0.1, 0.2, 0.3], "Document with Unicode: éñ中文🚀", 'test_user', 'test_collection'
)
@pytest.mark.asyncio
@ -295,9 +301,9 @@ class TestMilvusDocEmbeddingsStorageProcessor:
await processor.store_document_embeddings(message)
# Verify large content was inserted
# Verify large content was inserted with user/collection parameters
processor.vecstore.insert.assert_called_once_with(
[0.1, 0.2, 0.3], large_content
[0.1, 0.2, 0.3], large_content, 'test_user', 'test_collection'
)
@pytest.mark.asyncio
@ -316,9 +322,103 @@ class TestMilvusDocEmbeddingsStorageProcessor:
await processor.store_document_embeddings(message)
# Verify whitespace content was inserted (not filtered out)
# Verify whitespace content was inserted (not filtered out) with user/collection parameters
processor.vecstore.insert.assert_called_once_with(
[0.1, 0.2, 0.3], " \n\t "
[0.1, 0.2, 0.3], " \n\t ", 'test_user', 'test_collection'
)
@pytest.mark.asyncio
async def test_store_document_embeddings_different_user_collection_combinations(self, processor):
"""Test storing document embeddings with different user/collection combinations"""
test_cases = [
('user1', 'collection1'),
('user2', 'collection2'),
('admin', 'production'),
('test@domain.com', 'test-collection.v1'),
]
for user, collection in test_cases:
processor.vecstore.reset_mock() # Reset mock for each test case
message = MagicMock()
message.metadata = MagicMock()
message.metadata.user = user
message.metadata.collection = collection
chunk = ChunkEmbeddings(
chunk=b"Test content",
vectors=[[0.1, 0.2, 0.3]]
)
message.chunks = [chunk]
await processor.store_document_embeddings(message)
# Verify insert was called with the correct user/collection
processor.vecstore.insert.assert_called_once_with(
[0.1, 0.2, 0.3], "Test content", user, collection
)
@pytest.mark.asyncio
async def test_store_document_embeddings_user_collection_parameter_isolation(self, processor):
"""Test that different user/collection combinations are properly isolated"""
# Store embeddings for user1/collection1
message1 = MagicMock()
message1.metadata = MagicMock()
message1.metadata.user = 'user1'
message1.metadata.collection = 'collection1'
chunk1 = ChunkEmbeddings(
chunk=b"User1 content",
vectors=[[0.1, 0.2, 0.3]]
)
message1.chunks = [chunk1]
# Store embeddings for user2/collection2
message2 = MagicMock()
message2.metadata = MagicMock()
message2.metadata.user = 'user2'
message2.metadata.collection = 'collection2'
chunk2 = ChunkEmbeddings(
chunk=b"User2 content",
vectors=[[0.4, 0.5, 0.6]]
)
message2.chunks = [chunk2]
await processor.store_document_embeddings(message1)
await processor.store_document_embeddings(message2)
# Verify both calls were made with correct parameters
expected_calls = [
([0.1, 0.2, 0.3], "User1 content", 'user1', 'collection1'),
([0.4, 0.5, 0.6], "User2 content", 'user2', 'collection2'),
]
assert processor.vecstore.insert.call_count == 2
for i, (expected_vec, expected_doc, expected_user, expected_collection) in enumerate(expected_calls):
actual_call = processor.vecstore.insert.call_args_list[i]
assert actual_call[0][0] == expected_vec
assert actual_call[0][1] == expected_doc
assert actual_call[0][2] == expected_user
assert actual_call[0][3] == expected_collection
@pytest.mark.asyncio
async def test_store_document_embeddings_special_character_user_collection(self, processor):
"""Test storing document embeddings with special characters in user/collection names"""
message = MagicMock()
message.metadata = MagicMock()
message.metadata.user = 'user@domain.com' # Email-like user
message.metadata.collection = 'test-collection.v1' # Collection with special chars
chunk = ChunkEmbeddings(
chunk=b"Special chars test",
vectors=[[0.1, 0.2, 0.3]]
)
message.chunks = [chunk]
await processor.store_document_embeddings(message)
# Verify the exact user/collection strings are passed (sanitization happens in DocVectors)
processor.vecstore.insert.assert_called_once_with(
[0.1, 0.2, 0.3], "Special chars test", 'user@domain.com', 'test-collection.v1'
)
def test_add_args_method(self):

View file

@ -135,7 +135,7 @@ class TestPineconeDocEmbeddingsStorageProcessor:
await processor.store_document_embeddings(message)
# Verify index name and operations
expected_index_name = "d-test_user-test_collection-3"
expected_index_name = "d-test_user-test_collection"
processor.pinecone.Index.assert_called_with(expected_index_name)
# Verify upsert was called for each vector
@ -203,7 +203,7 @@ class TestPineconeDocEmbeddingsStorageProcessor:
await processor.store_document_embeddings(message)
# Verify index creation was called
expected_index_name = "d-test_user-test_collection-3"
expected_index_name = "d-test_user-test_collection"
processor.pinecone.create_index.assert_called_once()
create_call = processor.pinecone.create_index.call_args
assert create_call[1]['name'] == expected_index_name
@ -299,12 +299,11 @@ class TestPineconeDocEmbeddingsStorageProcessor:
mock_index_3d = MagicMock()
def mock_index_side_effect(name):
if name.endswith("-2"):
return mock_index_2d
elif name.endswith("-4"):
return mock_index_4d
elif name.endswith("-3"):
return mock_index_3d
# All dimensions now use the same index name pattern
# Different dimensions will be handled within the same index
if "test_user" in name and "test_collection" in name:
return mock_index_2d # Just return one mock for all
return MagicMock()
processor.pinecone.Index.side_effect = mock_index_side_effect
processor.pinecone.has_index.return_value = True
@ -312,11 +311,10 @@ class TestPineconeDocEmbeddingsStorageProcessor:
with patch('uuid.uuid4', side_effect=['id1', 'id2', 'id3']):
await processor.store_document_embeddings(message)
# Verify different indexes were used for different dimensions
assert processor.pinecone.Index.call_count == 3
mock_index_2d.upsert.assert_called_once()
mock_index_4d.upsert.assert_called_once()
mock_index_3d.upsert.assert_called_once()
# Verify all vectors are now stored in the same index
# (Pinecone can handle mixed dimensions in the same index)
assert processor.pinecone.Index.call_count == 3 # Called once per vector
mock_index_2d.upsert.call_count == 3 # All upserts go to same index
@pytest.mark.asyncio
async def test_store_document_embeddings_empty_chunks_list(self, processor):

View file

@ -106,7 +106,7 @@ class TestQdrantDocEmbeddingsStorage(IsolatedAsyncioTestCase):
# Assert
# Verify collection existence was checked
expected_collection = 'd_test_user_test_collection_3'
expected_collection = 'd_test_user_test_collection'
mock_qdrant_instance.collection_exists.assert_called_once_with(expected_collection)
# Verify upsert was called
@ -309,7 +309,7 @@ class TestQdrantDocEmbeddingsStorage(IsolatedAsyncioTestCase):
await processor.store_document_embeddings(mock_message)
# Assert
expected_collection = 'd_new_user_new_collection_5'
expected_collection = 'd_new_user_new_collection'
# Verify collection existence check and creation
mock_qdrant_instance.collection_exists.assert_called_once_with(expected_collection)
@ -408,7 +408,7 @@ class TestQdrantDocEmbeddingsStorage(IsolatedAsyncioTestCase):
await processor.store_document_embeddings(mock_message2)
# Assert
expected_collection = 'd_cache_user_cache_collection_3'
expected_collection = 'd_cache_user_cache_collection'
assert processor.last_collection == expected_collection
# Verify second call skipped existence check (cached)
@ -455,17 +455,16 @@ class TestQdrantDocEmbeddingsStorage(IsolatedAsyncioTestCase):
await processor.store_document_embeddings(mock_message)
# Assert
# Should check existence of both collections
expected_collections = ['d_dim_user_dim_collection_2', 'd_dim_user_dim_collection_3']
actual_calls = [call.args[0] for call in mock_qdrant_instance.collection_exists.call_args_list]
assert actual_calls == expected_collections
# Should upsert to both collections
# Should check existence of the same collection (dimensions no longer create separate collections)
expected_collection = 'd_dim_user_dim_collection'
mock_qdrant_instance.collection_exists.assert_called_once_with(expected_collection)
# Should upsert to the same collection for both vectors
assert mock_qdrant_instance.upsert.call_count == 2
upsert_calls = mock_qdrant_instance.upsert.call_args_list
assert upsert_calls[0][1]['collection_name'] == 'd_dim_user_dim_collection_2'
assert upsert_calls[1][1]['collection_name'] == 'd_dim_user_dim_collection_3'
assert upsert_calls[0][1]['collection_name'] == expected_collection
assert upsert_calls[1][1]['collection_name'] == expected_collection
@patch('trustgraph.storage.doc_embeddings.qdrant.write.QdrantClient')
@patch('trustgraph.base.DocumentEmbeddingsStoreService.__init__')

View file

@ -91,37 +91,41 @@ class TestMilvusGraphEmbeddingsStorageProcessor:
await processor.store_graph_embeddings(message)
# Verify insert was called for each vector
# Verify insert was called for each vector with user/collection parameters
expected_calls = [
([0.1, 0.2, 0.3], 'http://example.com/entity'),
([0.4, 0.5, 0.6], 'http://example.com/entity'),
([0.1, 0.2, 0.3], 'http://example.com/entity', 'test_user', 'test_collection'),
([0.4, 0.5, 0.6], 'http://example.com/entity', 'test_user', 'test_collection'),
]
assert processor.vecstore.insert.call_count == 2
for i, (expected_vec, expected_entity) in enumerate(expected_calls):
for i, (expected_vec, expected_entity, expected_user, expected_collection) in enumerate(expected_calls):
actual_call = processor.vecstore.insert.call_args_list[i]
assert actual_call[0][0] == expected_vec
assert actual_call[0][1] == expected_entity
assert actual_call[0][2] == expected_user
assert actual_call[0][3] == expected_collection
@pytest.mark.asyncio
async def test_store_graph_embeddings_multiple_entities(self, processor, mock_message):
"""Test storing graph embeddings for multiple entities"""
await processor.store_graph_embeddings(mock_message)
# Verify insert was called for each vector of each entity
# Verify insert was called for each vector of each entity with user/collection parameters
expected_calls = [
# Entity 1 vectors
([0.1, 0.2, 0.3], 'http://example.com/entity1'),
([0.4, 0.5, 0.6], 'http://example.com/entity1'),
([0.1, 0.2, 0.3], 'http://example.com/entity1', 'test_user', 'test_collection'),
([0.4, 0.5, 0.6], 'http://example.com/entity1', 'test_user', 'test_collection'),
# Entity 2 vectors
([0.7, 0.8, 0.9], 'literal entity'),
([0.7, 0.8, 0.9], 'literal entity', 'test_user', 'test_collection'),
]
assert processor.vecstore.insert.call_count == 3
for i, (expected_vec, expected_entity) in enumerate(expected_calls):
for i, (expected_vec, expected_entity, expected_user, expected_collection) in enumerate(expected_calls):
actual_call = processor.vecstore.insert.call_args_list[i]
assert actual_call[0][0] == expected_vec
assert actual_call[0][1] == expected_entity
assert actual_call[0][2] == expected_user
assert actual_call[0][3] == expected_collection
@pytest.mark.asyncio
async def test_store_graph_embeddings_empty_entity_value(self, processor):
@ -185,9 +189,9 @@ class TestMilvusGraphEmbeddingsStorageProcessor:
await processor.store_graph_embeddings(message)
# Verify only valid entity was inserted
# Verify only valid entity was inserted with user/collection parameters
processor.vecstore.insert.assert_called_once_with(
[0.1, 0.2, 0.3], 'http://example.com/valid'
[0.1, 0.2, 0.3], 'http://example.com/valid', 'test_user', 'test_collection'
)
@pytest.mark.asyncio

View file

@ -135,7 +135,7 @@ class TestPineconeGraphEmbeddingsStorageProcessor:
await processor.store_graph_embeddings(message)
# Verify index name and operations
expected_index_name = "t-test_user-test_collection-3"
expected_index_name = "t-test_user-test_collection"
processor.pinecone.Index.assert_called_with(expected_index_name)
# Verify upsert was called for each vector
@ -203,7 +203,7 @@ class TestPineconeGraphEmbeddingsStorageProcessor:
await processor.store_graph_embeddings(message)
# Verify index creation was called
expected_index_name = "t-test_user-test_collection-3"
expected_index_name = "t-test_user-test_collection"
processor.pinecone.create_index.assert_called_once()
create_call = processor.pinecone.create_index.call_args
assert create_call[1]['name'] == expected_index_name
@ -256,12 +256,12 @@ class TestPineconeGraphEmbeddingsStorageProcessor:
@pytest.mark.asyncio
async def test_store_graph_embeddings_different_vector_dimensions(self, processor):
"""Test storing graph embeddings with different vector dimensions"""
"""Test storing graph embeddings with different vector dimensions to same index"""
message = MagicMock()
message.metadata = MagicMock()
message.metadata.user = 'test_user'
message.metadata.collection = 'test_collection'
entity = EntityEmbeddings(
entity=Value(value="test_entity", is_uri=False),
vectors=[
@ -271,30 +271,21 @@ class TestPineconeGraphEmbeddingsStorageProcessor:
]
)
message.entities = [entity]
mock_index_2d = MagicMock()
mock_index_4d = MagicMock()
mock_index_3d = MagicMock()
def mock_index_side_effect(name):
if name.endswith("-2"):
return mock_index_2d
elif name.endswith("-4"):
return mock_index_4d
elif name.endswith("-3"):
return mock_index_3d
processor.pinecone.Index.side_effect = mock_index_side_effect
# All vectors now use the same index (no dimension in name)
mock_index = MagicMock()
processor.pinecone.Index.return_value = mock_index
processor.pinecone.has_index.return_value = True
with patch('uuid.uuid4', side_effect=['id1', 'id2', 'id3']):
await processor.store_graph_embeddings(message)
# Verify different indexes were used for different dimensions
assert processor.pinecone.Index.call_count == 3
mock_index_2d.upsert.assert_called_once()
mock_index_4d.upsert.assert_called_once()
mock_index_3d.upsert.assert_called_once()
# Verify same index was used for all dimensions
expected_index_name = 't-test_user-test_collection'
processor.pinecone.Index.assert_called_with(expected_index_name)
# Verify all vectors were upserted to the same index
assert mock_index.upsert.call_count == 3
@pytest.mark.asyncio
async def test_store_graph_embeddings_empty_entities_list(self, processor):

View file

@ -69,7 +69,7 @@ class TestQdrantGraphEmbeddingsStorage(IsolatedAsyncioTestCase):
collection_name = processor.get_collection(dim=512, user='test_user', collection='test_collection')
# Assert
expected_name = 't_test_user_test_collection_512'
expected_name = 't_test_user_test_collection'
assert collection_name == expected_name
assert processor.last_collection == expected_name
@ -118,7 +118,7 @@ class TestQdrantGraphEmbeddingsStorage(IsolatedAsyncioTestCase):
# Assert
# Verify collection existence was checked
expected_collection = 't_test_user_test_collection_3'
expected_collection = 't_test_user_test_collection'
mock_qdrant_instance.collection_exists.assert_called_once_with(expected_collection)
# Verify upsert was called
@ -156,7 +156,7 @@ class TestQdrantGraphEmbeddingsStorage(IsolatedAsyncioTestCase):
collection_name = processor.get_collection(dim=256, user='existing_user', collection='existing_collection')
# Assert
expected_name = 't_existing_user_existing_collection_256'
expected_name = 't_existing_user_existing_collection'
assert collection_name == expected_name
assert processor.last_collection == expected_name
@ -194,7 +194,7 @@ class TestQdrantGraphEmbeddingsStorage(IsolatedAsyncioTestCase):
collection_name2 = processor.get_collection(dim=128, user='cache_user', collection='cache_collection')
# Assert
expected_name = 't_cache_user_cache_collection_128'
expected_name = 't_cache_user_cache_collection'
assert collection_name1 == expected_name
assert collection_name2 == expected_name

View file

@ -0,0 +1,363 @@
"""
Tests for Memgraph user/collection isolation in storage service
"""
import pytest
from unittest.mock import MagicMock, patch
from trustgraph.storage.triples.memgraph.write import Processor
class TestMemgraphUserCollectionIsolation:
"""Test cases for Memgraph storage service with user/collection isolation"""
@patch('trustgraph.storage.triples.memgraph.write.GraphDatabase')
def test_storage_creates_indexes_with_user_collection(self, mock_graph_db):
"""Test that storage creates both legacy and user/collection indexes"""
mock_driver = MagicMock()
mock_graph_db.driver.return_value = mock_driver
mock_session = MagicMock()
mock_driver.session.return_value.__enter__.return_value = mock_session
processor = Processor(taskgroup=MagicMock())
# Verify all indexes were attempted (4 legacy + 4 user/collection = 8 total)
assert mock_session.run.call_count == 8
# Check some specific index creation calls
expected_calls = [
"CREATE INDEX ON :Node",
"CREATE INDEX ON :Node(uri)",
"CREATE INDEX ON :Literal",
"CREATE INDEX ON :Literal(value)",
"CREATE INDEX ON :Node(user)",
"CREATE INDEX ON :Node(collection)",
"CREATE INDEX ON :Literal(user)",
"CREATE INDEX ON :Literal(collection)"
]
for expected_call in expected_calls:
mock_session.run.assert_any_call(expected_call)
@patch('trustgraph.storage.triples.memgraph.write.GraphDatabase')
@pytest.mark.asyncio
async def test_store_triples_with_user_collection(self, mock_graph_db):
"""Test that store_triples includes user/collection in all operations"""
mock_driver = MagicMock()
mock_graph_db.driver.return_value = mock_driver
mock_session = MagicMock()
mock_driver.session.return_value.__enter__.return_value = mock_session
# Mock execute_query response
mock_result = MagicMock()
mock_summary = MagicMock()
mock_summary.counters.nodes_created = 1
mock_summary.result_available_after = 10
mock_result.summary = mock_summary
mock_driver.execute_query.return_value = mock_result
processor = Processor(taskgroup=MagicMock())
# Create mock triple with URI object
triple = MagicMock()
triple.s.value = "http://example.com/subject"
triple.p.value = "http://example.com/predicate"
triple.o.value = "http://example.com/object"
triple.o.is_uri = True
# Create mock message with metadata
mock_message = MagicMock()
mock_message.triples = [triple]
mock_message.metadata.user = "test_user"
mock_message.metadata.collection = "test_collection"
await processor.store_triples(mock_message)
# Verify user/collection parameters were passed to all operations
# Should have: create_node (subject), create_node (object), relate_node = 3 calls
assert mock_driver.execute_query.call_count == 3
# Check that user and collection were included in all calls
for call in mock_driver.execute_query.call_args_list:
call_kwargs = call.kwargs if hasattr(call, 'kwargs') else call[1]
assert 'user' in call_kwargs
assert 'collection' in call_kwargs
assert call_kwargs['user'] == "test_user"
assert call_kwargs['collection'] == "test_collection"
@patch('trustgraph.storage.triples.memgraph.write.GraphDatabase')
@pytest.mark.asyncio
async def test_store_triples_with_default_user_collection(self, mock_graph_db):
"""Test that defaults are used when user/collection not provided in metadata"""
mock_driver = MagicMock()
mock_graph_db.driver.return_value = mock_driver
mock_session = MagicMock()
mock_driver.session.return_value.__enter__.return_value = mock_session
# Mock execute_query response
mock_result = MagicMock()
mock_summary = MagicMock()
mock_summary.counters.nodes_created = 1
mock_summary.result_available_after = 10
mock_result.summary = mock_summary
mock_driver.execute_query.return_value = mock_result
processor = Processor(taskgroup=MagicMock())
# Create mock triple
triple = MagicMock()
triple.s.value = "http://example.com/subject"
triple.p.value = "http://example.com/predicate"
triple.o.value = "literal_value"
triple.o.is_uri = False
# Create mock message without user/collection metadata
mock_message = MagicMock()
mock_message.triples = [triple]
mock_message.metadata.user = None
mock_message.metadata.collection = None
await processor.store_triples(mock_message)
# Verify defaults were used
for call in mock_driver.execute_query.call_args_list:
call_kwargs = call.kwargs if hasattr(call, 'kwargs') else call[1]
assert call_kwargs['user'] == "default"
assert call_kwargs['collection'] == "default"
@patch('trustgraph.storage.triples.memgraph.write.GraphDatabase')
def test_create_node_includes_user_collection(self, mock_graph_db):
"""Test that create_node includes user/collection properties"""
mock_driver = MagicMock()
mock_graph_db.driver.return_value = mock_driver
mock_session = MagicMock()
mock_driver.session.return_value.__enter__.return_value = mock_session
# Mock execute_query response
mock_result = MagicMock()
mock_summary = MagicMock()
mock_summary.counters.nodes_created = 1
mock_summary.result_available_after = 10
mock_result.summary = mock_summary
mock_driver.execute_query.return_value = mock_result
processor = Processor(taskgroup=MagicMock())
processor.create_node("http://example.com/node", "test_user", "test_collection")
mock_driver.execute_query.assert_called_with(
"MERGE (n:Node {uri: $uri, user: $user, collection: $collection})",
uri="http://example.com/node",
user="test_user",
collection="test_collection",
database_="memgraph"
)
@patch('trustgraph.storage.triples.memgraph.write.GraphDatabase')
def test_create_literal_includes_user_collection(self, mock_graph_db):
"""Test that create_literal includes user/collection properties"""
mock_driver = MagicMock()
mock_graph_db.driver.return_value = mock_driver
mock_session = MagicMock()
mock_driver.session.return_value.__enter__.return_value = mock_session
# Mock execute_query response
mock_result = MagicMock()
mock_summary = MagicMock()
mock_summary.counters.nodes_created = 1
mock_summary.result_available_after = 10
mock_result.summary = mock_summary
mock_driver.execute_query.return_value = mock_result
processor = Processor(taskgroup=MagicMock())
processor.create_literal("test_value", "test_user", "test_collection")
mock_driver.execute_query.assert_called_with(
"MERGE (n:Literal {value: $value, user: $user, collection: $collection})",
value="test_value",
user="test_user",
collection="test_collection",
database_="memgraph"
)
@patch('trustgraph.storage.triples.memgraph.write.GraphDatabase')
def test_relate_node_includes_user_collection(self, mock_graph_db):
"""Test that relate_node includes user/collection properties"""
mock_driver = MagicMock()
mock_graph_db.driver.return_value = mock_driver
mock_session = MagicMock()
mock_driver.session.return_value.__enter__.return_value = mock_session
# Mock execute_query response
mock_result = MagicMock()
mock_summary = MagicMock()
mock_summary.counters.nodes_created = 0
mock_summary.result_available_after = 10
mock_result.summary = mock_summary
mock_driver.execute_query.return_value = mock_result
processor = Processor(taskgroup=MagicMock())
processor.relate_node(
"http://example.com/subject",
"http://example.com/predicate",
"http://example.com/object",
"test_user",
"test_collection"
)
mock_driver.execute_query.assert_called_with(
"MATCH (src:Node {uri: $src, user: $user, collection: $collection}) "
"MATCH (dest:Node {uri: $dest, user: $user, collection: $collection}) "
"MERGE (src)-[:Rel {uri: $uri, user: $user, collection: $collection}]->(dest)",
src="http://example.com/subject",
dest="http://example.com/object",
uri="http://example.com/predicate",
user="test_user",
collection="test_collection",
database_="memgraph"
)
@patch('trustgraph.storage.triples.memgraph.write.GraphDatabase')
def test_relate_literal_includes_user_collection(self, mock_graph_db):
"""Test that relate_literal includes user/collection properties"""
mock_driver = MagicMock()
mock_graph_db.driver.return_value = mock_driver
mock_session = MagicMock()
mock_driver.session.return_value.__enter__.return_value = mock_session
# Mock execute_query response
mock_result = MagicMock()
mock_summary = MagicMock()
mock_summary.counters.nodes_created = 0
mock_summary.result_available_after = 10
mock_result.summary = mock_summary
mock_driver.execute_query.return_value = mock_result
processor = Processor(taskgroup=MagicMock())
processor.relate_literal(
"http://example.com/subject",
"http://example.com/predicate",
"literal_value",
"test_user",
"test_collection"
)
mock_driver.execute_query.assert_called_with(
"MATCH (src:Node {uri: $src, user: $user, collection: $collection}) "
"MATCH (dest:Literal {value: $dest, user: $user, collection: $collection}) "
"MERGE (src)-[:Rel {uri: $uri, user: $user, collection: $collection}]->(dest)",
src="http://example.com/subject",
dest="literal_value",
uri="http://example.com/predicate",
user="test_user",
collection="test_collection",
database_="memgraph"
)
def test_add_args_includes_memgraph_parameters(self):
"""Test that add_args properly configures Memgraph-specific parameters"""
from argparse import ArgumentParser
from unittest.mock import patch
parser = ArgumentParser()
# Mock the parent class add_args method
with patch('trustgraph.storage.triples.memgraph.write.TriplesStoreService.add_args') as mock_parent_add_args:
Processor.add_args(parser)
# Verify parent add_args was called
mock_parent_add_args.assert_called_once()
# Verify our specific arguments were added with Memgraph defaults
args = parser.parse_args([])
assert hasattr(args, 'graph_host')
assert args.graph_host == 'bolt://memgraph:7687'
assert hasattr(args, 'username')
assert args.username == 'memgraph'
assert hasattr(args, 'password')
assert args.password == 'password'
assert hasattr(args, 'database')
assert args.database == 'memgraph'
class TestMemgraphUserCollectionRegression:
"""Regression tests to ensure user/collection isolation prevents data leakage"""
@patch('trustgraph.storage.triples.memgraph.write.GraphDatabase')
@pytest.mark.asyncio
async def test_regression_no_cross_user_data_access(self, mock_graph_db):
"""Regression test: Ensure users cannot access each other's data"""
mock_driver = MagicMock()
mock_graph_db.driver.return_value = mock_driver
mock_session = MagicMock()
mock_driver.session.return_value.__enter__.return_value = mock_session
# Mock execute_query response
mock_result = MagicMock()
mock_summary = MagicMock()
mock_summary.counters.nodes_created = 1
mock_summary.result_available_after = 10
mock_result.summary = mock_summary
mock_driver.execute_query.return_value = mock_result
processor = Processor(taskgroup=MagicMock())
# Store data for user1
triple = MagicMock()
triple.s.value = "http://example.com/subject"
triple.p.value = "http://example.com/predicate"
triple.o.value = "user1_data"
triple.o.is_uri = False
message_user1 = MagicMock()
message_user1.triples = [triple]
message_user1.metadata.user = "user1"
message_user1.metadata.collection = "collection1"
await processor.store_triples(message_user1)
# Verify that all storage operations included user1/collection1 parameters
for call in mock_driver.execute_query.call_args_list:
call_kwargs = call.kwargs if hasattr(call, 'kwargs') else call[1]
if 'user' in call_kwargs:
assert call_kwargs['user'] == "user1"
assert call_kwargs['collection'] == "collection1"
@patch('trustgraph.storage.triples.memgraph.write.GraphDatabase')
@pytest.mark.asyncio
async def test_regression_same_uri_different_users(self, mock_graph_db):
"""Regression test: Same URI can exist for different users without conflict"""
mock_driver = MagicMock()
mock_graph_db.driver.return_value = mock_driver
mock_session = MagicMock()
mock_driver.session.return_value.__enter__.return_value = mock_session
# Mock execute_query response
mock_result = MagicMock()
mock_summary = MagicMock()
mock_summary.counters.nodes_created = 1
mock_summary.result_available_after = 10
mock_result.summary = mock_summary
mock_driver.execute_query.return_value = mock_result
processor = Processor(taskgroup=MagicMock())
# Same URI for different users should create separate nodes
processor.create_node("http://example.com/same-uri", "user1", "collection1")
processor.create_node("http://example.com/same-uri", "user2", "collection2")
# Verify both calls were made with different user/collection parameters
calls = mock_driver.execute_query.call_args_list[-2:] # Get last 2 calls
call1_kwargs = calls[0].kwargs if hasattr(calls[0], 'kwargs') else calls[0][1]
call2_kwargs = calls[1].kwargs if hasattr(calls[1], 'kwargs') else calls[1][1]
assert call1_kwargs['user'] == "user1" and call1_kwargs['collection'] == "collection1"
assert call2_kwargs['user'] == "user2" and call2_kwargs['collection'] == "collection2"
# Both should have the same URI but different user/collection
assert call1_kwargs['uri'] == call2_kwargs['uri'] == "http://example.com/same-uri"

View file

@ -0,0 +1,470 @@
"""
Tests for Neo4j user/collection isolation in triples storage and query
"""
import pytest
from unittest.mock import MagicMock, patch, call
from trustgraph.storage.triples.neo4j.write import Processor as StorageProcessor
from trustgraph.query.triples.neo4j.service import Processor as QueryProcessor
from trustgraph.schema import Triples, Triple, Value, Metadata
from trustgraph.schema import TriplesQueryRequest
class TestNeo4jUserCollectionIsolation:
"""Test cases for Neo4j user/collection isolation functionality"""
@patch('trustgraph.storage.triples.neo4j.write.GraphDatabase')
def test_storage_creates_indexes_with_user_collection(self, mock_graph_db):
"""Test that storage service creates compound indexes for user/collection"""
taskgroup_mock = MagicMock()
mock_driver = MagicMock()
mock_graph_db.driver.return_value = mock_driver
mock_session = MagicMock()
mock_driver.session.return_value.__enter__.return_value = mock_session
processor = StorageProcessor(taskgroup=taskgroup_mock)
# Verify both legacy and new compound indexes are created
expected_indexes = [
"CREATE INDEX Node_uri FOR (n:Node) ON (n.uri)",
"CREATE INDEX Literal_value FOR (n:Literal) ON (n.value)",
"CREATE INDEX Rel_uri FOR ()-[r:Rel]-() ON (r.uri)",
"CREATE INDEX node_user_collection_uri FOR (n:Node) ON (n.user, n.collection, n.uri)",
"CREATE INDEX literal_user_collection_value FOR (n:Literal) ON (n.user, n.collection, n.value)",
"CREATE INDEX rel_user FOR ()-[r:Rel]-() ON (r.user)",
"CREATE INDEX rel_collection FOR ()-[r:Rel]-() ON (r.collection)"
]
# Check that all expected indexes were created
for expected_query in expected_indexes:
mock_session.run.assert_any_call(expected_query)
@patch('trustgraph.storage.triples.neo4j.write.GraphDatabase')
@pytest.mark.asyncio
async def test_store_triples_with_user_collection(self, mock_graph_db):
"""Test that triples are stored with user/collection properties"""
taskgroup_mock = MagicMock()
mock_driver = MagicMock()
mock_graph_db.driver.return_value = mock_driver
mock_session = MagicMock()
mock_driver.session.return_value.__enter__.return_value = mock_session
processor = StorageProcessor(taskgroup=taskgroup_mock)
# Create test message with user/collection metadata
metadata = Metadata(
id="test-id",
user="test_user",
collection="test_collection"
)
triple = Triple(
s=Value(value="http://example.com/subject", is_uri=True),
p=Value(value="http://example.com/predicate", is_uri=True),
o=Value(value="literal_value", is_uri=False)
)
message = Triples(
metadata=metadata,
triples=[triple]
)
# Mock execute_query to return summaries
mock_summary = MagicMock()
mock_summary.counters.nodes_created = 1
mock_summary.result_available_after = 10
mock_driver.execute_query.return_value.summary = mock_summary
await processor.store_triples(message)
# Verify nodes and relationships were created with user/collection properties
expected_calls = [
call(
"MERGE (n:Node {uri: $uri, user: $user, collection: $collection})",
uri="http://example.com/subject",
user="test_user",
collection="test_collection",
database_='neo4j'
),
call(
"MERGE (n:Literal {value: $value, user: $user, collection: $collection})",
value="literal_value",
user="test_user",
collection="test_collection",
database_='neo4j'
),
call(
"MATCH (src:Node {uri: $src, user: $user, collection: $collection}) "
"MATCH (dest:Literal {value: $dest, user: $user, collection: $collection}) "
"MERGE (src)-[:Rel {uri: $uri, user: $user, collection: $collection}]->(dest)",
src="http://example.com/subject",
dest="literal_value",
uri="http://example.com/predicate",
user="test_user",
collection="test_collection",
database_='neo4j'
)
]
for expected_call in expected_calls:
mock_driver.execute_query.assert_any_call(*expected_call.args, **expected_call.kwargs)
@patch('trustgraph.storage.triples.neo4j.write.GraphDatabase')
@pytest.mark.asyncio
async def test_store_triples_with_default_user_collection(self, mock_graph_db):
"""Test that default user/collection are used when not provided"""
taskgroup_mock = MagicMock()
mock_driver = MagicMock()
mock_graph_db.driver.return_value = mock_driver
mock_session = MagicMock()
mock_driver.session.return_value.__enter__.return_value = mock_session
processor = StorageProcessor(taskgroup=taskgroup_mock)
# Create test message without user/collection
metadata = Metadata(id="test-id")
triple = Triple(
s=Value(value="http://example.com/subject", is_uri=True),
p=Value(value="http://example.com/predicate", is_uri=True),
o=Value(value="http://example.com/object", is_uri=True)
)
message = Triples(
metadata=metadata,
triples=[triple]
)
# Mock execute_query
mock_summary = MagicMock()
mock_summary.counters.nodes_created = 1
mock_summary.result_available_after = 10
mock_driver.execute_query.return_value.summary = mock_summary
await processor.store_triples(message)
# Verify defaults were used
mock_driver.execute_query.assert_any_call(
"MERGE (n:Node {uri: $uri, user: $user, collection: $collection})",
uri="http://example.com/subject",
user="default",
collection="default",
database_='neo4j'
)
@patch('trustgraph.query.triples.neo4j.service.GraphDatabase')
@pytest.mark.asyncio
async def test_query_triples_filters_by_user_collection(self, mock_graph_db):
"""Test that query service filters results by user/collection"""
mock_driver = MagicMock()
mock_graph_db.driver.return_value = mock_driver
processor = QueryProcessor(taskgroup=MagicMock())
# Create test query
query = TriplesQueryRequest(
user="test_user",
collection="test_collection",
s=Value(value="http://example.com/subject", is_uri=True),
p=Value(value="http://example.com/predicate", is_uri=True),
o=None
)
# Mock query results
mock_records = [
MagicMock(data=lambda: {"dest": "http://example.com/object1"}),
MagicMock(data=lambda: {"dest": "literal_value"})
]
mock_driver.execute_query.return_value = (mock_records, MagicMock(), MagicMock())
result = await processor.query_triples(query)
# Verify queries include user/collection filters
expected_literal_query = (
"MATCH (src:Node {uri: $src, user: $user, collection: $collection})-"
"[rel:Rel {uri: $rel, user: $user, collection: $collection}]->"
"(dest:Literal {user: $user, collection: $collection}) "
"RETURN dest.value as dest"
)
expected_node_query = (
"MATCH (src:Node {uri: $src, user: $user, collection: $collection})-"
"[rel:Rel {uri: $rel, user: $user, collection: $collection}]->"
"(dest:Node {user: $user, collection: $collection}) "
"RETURN dest.uri as dest"
)
# Check that queries were executed with user/collection parameters
calls = mock_driver.execute_query.call_args_list
assert any(
expected_literal_query in str(call) and
"user='test_user'" in str(call) and
"collection='test_collection'" in str(call)
for call in calls
)
@patch('trustgraph.query.triples.neo4j.service.GraphDatabase')
@pytest.mark.asyncio
async def test_query_triples_with_default_user_collection(self, mock_graph_db):
"""Test that query service uses defaults when user/collection not provided"""
mock_driver = MagicMock()
mock_graph_db.driver.return_value = mock_driver
processor = QueryProcessor(taskgroup=MagicMock())
# Create test query without user/collection
query = TriplesQueryRequest(
s=None,
p=None,
o=None
)
# Mock empty results
mock_driver.execute_query.return_value = ([], MagicMock(), MagicMock())
result = await processor.query_triples(query)
# Verify defaults were used in queries
calls = mock_driver.execute_query.call_args_list
assert any(
"user='default'" in str(call) and "collection='default'" in str(call)
for call in calls
)
@patch('trustgraph.storage.triples.neo4j.write.GraphDatabase')
@pytest.mark.asyncio
async def test_data_isolation_between_users(self, mock_graph_db):
"""Test that data from different users is properly isolated"""
taskgroup_mock = MagicMock()
mock_driver = MagicMock()
mock_graph_db.driver.return_value = mock_driver
mock_session = MagicMock()
mock_driver.session.return_value.__enter__.return_value = mock_session
processor = StorageProcessor(taskgroup=taskgroup_mock)
# Create messages for different users
message_user1 = Triples(
metadata=Metadata(user="user1", collection="coll1"),
triples=[
Triple(
s=Value(value="http://example.com/user1/subject", is_uri=True),
p=Value(value="http://example.com/predicate", is_uri=True),
o=Value(value="user1_data", is_uri=False)
)
]
)
message_user2 = Triples(
metadata=Metadata(user="user2", collection="coll2"),
triples=[
Triple(
s=Value(value="http://example.com/user2/subject", is_uri=True),
p=Value(value="http://example.com/predicate", is_uri=True),
o=Value(value="user2_data", is_uri=False)
)
]
)
# Mock execute_query
mock_summary = MagicMock()
mock_summary.counters.nodes_created = 1
mock_summary.result_available_after = 10
mock_driver.execute_query.return_value.summary = mock_summary
# Store data for both users
await processor.store_triples(message_user1)
await processor.store_triples(message_user2)
# Verify user1 data was stored with user1/coll1
mock_driver.execute_query.assert_any_call(
"MERGE (n:Literal {value: $value, user: $user, collection: $collection})",
value="user1_data",
user="user1",
collection="coll1",
database_='neo4j'
)
# Verify user2 data was stored with user2/coll2
mock_driver.execute_query.assert_any_call(
"MERGE (n:Literal {value: $value, user: $user, collection: $collection})",
value="user2_data",
user="user2",
collection="coll2",
database_='neo4j'
)
@patch('trustgraph.query.triples.neo4j.service.GraphDatabase')
@pytest.mark.asyncio
async def test_wildcard_query_respects_user_collection(self, mock_graph_db):
"""Test that wildcard queries still filter by user/collection"""
mock_driver = MagicMock()
mock_graph_db.driver.return_value = mock_driver
processor = QueryProcessor(taskgroup=MagicMock())
# Create wildcard query (all nulls) with user/collection
query = TriplesQueryRequest(
user="test_user",
collection="test_collection",
s=None,
p=None,
o=None
)
# Mock results
mock_driver.execute_query.return_value = ([], MagicMock(), MagicMock())
result = await processor.query_triples(query)
# Verify wildcard queries include user/collection filters
wildcard_query = (
"MATCH (src:Node {user: $user, collection: $collection})-"
"[rel:Rel {user: $user, collection: $collection}]->"
"(dest:Literal {user: $user, collection: $collection}) "
"RETURN src.uri as src, rel.uri as rel, dest.value as dest"
)
calls = mock_driver.execute_query.call_args_list
assert any(
wildcard_query in str(call) and
"user='test_user'" in str(call) and
"collection='test_collection'" in str(call)
for call in calls
)
def test_add_args_includes_neo4j_parameters(self):
"""Test that add_args includes Neo4j-specific parameters"""
from argparse import ArgumentParser
from unittest.mock import patch
parser = ArgumentParser()
with patch('trustgraph.storage.triples.neo4j.write.TriplesStoreService.add_args'):
StorageProcessor.add_args(parser)
args = parser.parse_args([])
assert hasattr(args, 'graph_host')
assert hasattr(args, 'username')
assert hasattr(args, 'password')
assert hasattr(args, 'database')
# Check defaults
assert args.graph_host == 'bolt://neo4j:7687'
assert args.username == 'neo4j'
assert args.password == 'password'
assert args.database == 'neo4j'
class TestNeo4jUserCollectionRegression:
"""Regression tests to ensure user/collection isolation prevents data leaks"""
@patch('trustgraph.query.triples.neo4j.service.GraphDatabase')
@pytest.mark.asyncio
async def test_regression_no_cross_user_data_access(self, mock_graph_db):
"""
Regression test: Ensure user1 cannot access user2's data
This test guards against the bug where all users shared the same
Neo4j graph space, causing data contamination between users.
"""
mock_driver = MagicMock()
mock_graph_db.driver.return_value = mock_driver
processor = QueryProcessor(taskgroup=MagicMock())
# User1 queries for all triples
query_user1 = TriplesQueryRequest(
user="user1",
collection="collection1",
s=None, p=None, o=None
)
# Mock that the database has data but none matching user1/collection1
mock_driver.execute_query.return_value = ([], MagicMock(), MagicMock())
result = await processor.query_triples(query_user1)
# Verify empty results (user1 cannot see other users' data)
assert len(result) == 0
# Verify the query included user/collection filters
calls = mock_driver.execute_query.call_args_list
for call in calls:
query_str = str(call)
if "MATCH" in query_str:
assert "user: $user" in query_str or "user='user1'" in query_str
assert "collection: $collection" in query_str or "collection='collection1'" in query_str
@patch('trustgraph.storage.triples.neo4j.write.GraphDatabase')
@pytest.mark.asyncio
async def test_regression_same_uri_different_users(self, mock_graph_db):
"""
Regression test: Same URI in different user contexts should create separate nodes
This ensures that http://example.com/entity for user1 is completely separate
from http://example.com/entity for user2.
"""
taskgroup_mock = MagicMock()
mock_driver = MagicMock()
mock_graph_db.driver.return_value = mock_driver
mock_session = MagicMock()
mock_driver.session.return_value.__enter__.return_value = mock_session
processor = StorageProcessor(taskgroup=taskgroup_mock)
# Same URI for different users
shared_uri = "http://example.com/shared_entity"
message_user1 = Triples(
metadata=Metadata(user="user1", collection="coll1"),
triples=[
Triple(
s=Value(value=shared_uri, is_uri=True),
p=Value(value="http://example.com/p", is_uri=True),
o=Value(value="user1_value", is_uri=False)
)
]
)
message_user2 = Triples(
metadata=Metadata(user="user2", collection="coll2"),
triples=[
Triple(
s=Value(value=shared_uri, is_uri=True),
p=Value(value="http://example.com/p", is_uri=True),
o=Value(value="user2_value", is_uri=False)
)
]
)
# Mock execute_query
mock_summary = MagicMock()
mock_summary.counters.nodes_created = 1
mock_summary.result_available_after = 10
mock_driver.execute_query.return_value.summary = mock_summary
await processor.store_triples(message_user1)
await processor.store_triples(message_user2)
# Verify two separate nodes were created with same URI but different user/collection
user1_node_call = call(
"MERGE (n:Node {uri: $uri, user: $user, collection: $collection})",
uri=shared_uri,
user="user1",
collection="coll1",
database_='neo4j'
)
user2_node_call = call(
"MERGE (n:Node {uri: $uri, user: $user, collection: $collection})",
uri=shared_uri,
user="user2",
collection="coll2",
database_='neo4j'
)
mock_driver.execute_query.assert_has_calls([user1_node_call, user2_node_call], any_order=True)

View file

@ -261,7 +261,7 @@ class TestObjectsCassandraStorageLogic:
metadata=[]
),
schema_name="test_schema",
values={"id": "123", "value": "456"},
values=[{"id": "123", "value": "456"}],
confidence=0.9,
source_span="test source"
)
@ -284,8 +284,8 @@ class TestObjectsCassandraStorageLogic:
assert "INSERT INTO test_user.o_test_schema" in insert_cql
assert "collection" in insert_cql
assert values[0] == "test_collection" # collection value
assert values[1] == "123" # id value
assert values[2] == 456 # converted integer value
assert values[1] == "123" # id value (from values[0])
assert values[2] == 456 # converted integer value (from values[0])
def test_secondary_index_creation(self):
"""Test that secondary indexes are created for indexed fields"""
@ -325,4 +325,201 @@ class TestObjectsCassandraStorageLogic:
index_calls = [call[0][0] for call in calls if "CREATE INDEX" in call[0][0]]
assert len(index_calls) == 2
assert any("o_products_category_idx" in call for call in index_calls)
assert any("o_products_price_idx" in call for call in index_calls)
assert any("o_products_price_idx" in call for call in index_calls)
class TestObjectsCassandraStorageBatchLogic:
"""Test batch processing logic in Cassandra storage"""
@pytest.mark.asyncio
async def test_batch_object_processing_logic(self):
"""Test processing of batch ExtractedObjects"""
processor = MagicMock()
processor.schemas = {
"batch_schema": RowSchema(
name="batch_schema",
description="Test batch schema",
fields=[
Field(name="id", type="string", size=50, primary=True),
Field(name="name", type="string", size=100),
Field(name="value", type="integer", size=4)
]
)
}
processor.ensure_table = MagicMock()
processor.sanitize_name = Processor.sanitize_name.__get__(processor, Processor)
processor.sanitize_table = Processor.sanitize_table.__get__(processor, Processor)
processor.convert_value = Processor.convert_value.__get__(processor, Processor)
processor.session = MagicMock()
processor.on_object = Processor.on_object.__get__(processor, Processor)
# Create batch object with multiple values
batch_obj = ExtractedObject(
metadata=Metadata(
id="batch-001",
user="test_user",
collection="batch_collection",
metadata=[]
),
schema_name="batch_schema",
values=[
{"id": "001", "name": "First", "value": "100"},
{"id": "002", "name": "Second", "value": "200"},
{"id": "003", "name": "Third", "value": "300"}
],
confidence=0.95,
source_span="batch source"
)
# Create mock message
msg = MagicMock()
msg.value.return_value = batch_obj
# Process batch object
await processor.on_object(msg, None, None)
# Verify table was ensured once
processor.ensure_table.assert_called_once_with("test_user", "batch_schema", processor.schemas["batch_schema"])
# Verify 3 separate insert calls (one per batch item)
assert processor.session.execute.call_count == 3
# Check each insert call
calls = processor.session.execute.call_args_list
for i, call in enumerate(calls):
insert_cql = call[0][0]
values = call[0][1]
assert "INSERT INTO test_user.o_batch_schema" in insert_cql
assert "collection" in insert_cql
# Check values for each batch item
assert values[0] == "batch_collection" # collection
assert values[1] == f"00{i+1}" # id from batch item i
assert values[2] == f"First" if i == 0 else f"Second" if i == 1 else f"Third" # name
assert values[3] == (i+1) * 100 # converted integer value
@pytest.mark.asyncio
async def test_empty_batch_processing_logic(self):
"""Test processing of empty batch ExtractedObjects"""
processor = MagicMock()
processor.schemas = {
"empty_schema": RowSchema(
name="empty_schema",
fields=[Field(name="id", type="string", size=50, primary=True)]
)
}
processor.ensure_table = MagicMock()
processor.sanitize_name = Processor.sanitize_name.__get__(processor, Processor)
processor.sanitize_table = Processor.sanitize_table.__get__(processor, Processor)
processor.convert_value = Processor.convert_value.__get__(processor, Processor)
processor.session = MagicMock()
processor.on_object = Processor.on_object.__get__(processor, Processor)
# Create empty batch object
empty_batch_obj = ExtractedObject(
metadata=Metadata(
id="empty-001",
user="test_user",
collection="empty_collection",
metadata=[]
),
schema_name="empty_schema",
values=[], # Empty batch
confidence=1.0,
source_span="empty source"
)
msg = MagicMock()
msg.value.return_value = empty_batch_obj
# Process empty batch object
await processor.on_object(msg, None, None)
# Verify table was ensured
processor.ensure_table.assert_called_once()
# Verify no insert calls for empty batch
processor.session.execute.assert_not_called()
@pytest.mark.asyncio
async def test_single_item_batch_processing_logic(self):
"""Test processing of single-item batch (backward compatibility)"""
processor = MagicMock()
processor.schemas = {
"single_schema": RowSchema(
name="single_schema",
fields=[
Field(name="id", type="string", size=50, primary=True),
Field(name="data", type="string", size=100)
]
)
}
processor.ensure_table = MagicMock()
processor.sanitize_name = Processor.sanitize_name.__get__(processor, Processor)
processor.sanitize_table = Processor.sanitize_table.__get__(processor, Processor)
processor.convert_value = Processor.convert_value.__get__(processor, Processor)
processor.session = MagicMock()
processor.on_object = Processor.on_object.__get__(processor, Processor)
# Create single-item batch object (backward compatibility case)
single_batch_obj = ExtractedObject(
metadata=Metadata(
id="single-001",
user="test_user",
collection="single_collection",
metadata=[]
),
schema_name="single_schema",
values=[{"id": "single-1", "data": "single data"}], # Array with one item
confidence=0.8,
source_span="single source"
)
msg = MagicMock()
msg.value.return_value = single_batch_obj
# Process single-item batch object
await processor.on_object(msg, None, None)
# Verify table was ensured
processor.ensure_table.assert_called_once()
# Verify exactly one insert call
processor.session.execute.assert_called_once()
insert_cql = processor.session.execute.call_args[0][0]
values = processor.session.execute.call_args[0][1]
assert "INSERT INTO test_user.o_single_schema" in insert_cql
assert values[0] == "single_collection" # collection
assert values[1] == "single-1" # id value
assert values[2] == "single data" # data value
def test_batch_value_conversion_logic(self):
"""Test value conversion works correctly for batch items"""
processor = MagicMock()
processor.convert_value = Processor.convert_value.__get__(processor, Processor)
# Test various conversion scenarios that would occur in batch processing
test_cases = [
# Integer conversions for batch items
("123", "integer", 123),
("456", "integer", 456),
("789", "integer", 789),
# Float conversions for batch items
("12.5", "float", 12.5),
("34.7", "float", 34.7),
# Boolean conversions for batch items
("true", "boolean", True),
("false", "boolean", False),
("1", "boolean", True),
("0", "boolean", False),
# String conversions for batch items
(123, "string", "123"),
(45.6, "string", "45.6"),
]
for input_val, field_type, expected_output in test_cases:
result = processor.convert_value(input_val, field_type)
assert result == expected_output, f"Failed for {input_val} -> {field_type}: got {result}, expected {expected_output}"

View file

@ -16,28 +16,30 @@ class TestCassandraStorageProcessor:
"""Test processor initialization with default parameters"""
taskgroup_mock = MagicMock()
processor = Processor(taskgroup=taskgroup_mock)
# Patch environment to ensure clean defaults
with patch.dict('os.environ', {}, clear=True):
processor = Processor(taskgroup=taskgroup_mock)
assert processor.graph_host == ['localhost']
assert processor.username is None
assert processor.password is None
assert processor.cassandra_host == ['cassandra'] # Updated default
assert processor.cassandra_username is None
assert processor.cassandra_password is None
assert processor.table is None
def test_processor_initialization_with_custom_params(self):
"""Test processor initialization with custom parameters"""
"""Test processor initialization with custom parameters (new cassandra_* names)"""
taskgroup_mock = MagicMock()
processor = Processor(
taskgroup=taskgroup_mock,
id='custom-storage',
graph_host='cassandra.example.com',
graph_username='testuser',
graph_password='testpass'
cassandra_host='cassandra.example.com',
cassandra_username='testuser',
cassandra_password='testpass'
)
assert processor.graph_host == ['cassandra.example.com']
assert processor.username == 'testuser'
assert processor.password == 'testpass'
assert processor.cassandra_host == ['cassandra.example.com']
assert processor.cassandra_username == 'testuser'
assert processor.cassandra_password == 'testpass'
assert processor.table is None
def test_processor_initialization_with_partial_auth(self):
@ -46,14 +48,45 @@ class TestCassandraStorageProcessor:
processor = Processor(
taskgroup=taskgroup_mock,
graph_username='testuser'
cassandra_username='testuser'
)
assert processor.username == 'testuser'
assert processor.password is None
assert processor.cassandra_username == 'testuser'
assert processor.cassandra_password is None
def test_processor_no_backward_compatibility(self):
"""Test that old graph_* parameters are no longer supported"""
taskgroup_mock = MagicMock()
processor = Processor(
taskgroup=taskgroup_mock,
graph_host='old-host',
graph_username='old-user',
graph_password='old-pass'
)
# Should use defaults since graph_* params are not recognized
assert processor.cassandra_host == ['cassandra'] # Default
assert processor.cassandra_username is None
assert processor.cassandra_password is None
def test_processor_only_new_parameters_work(self):
"""Test that only new cassandra_* parameters work"""
taskgroup_mock = MagicMock()
processor = Processor(
taskgroup=taskgroup_mock,
cassandra_host='new-host',
graph_host='old-host', # Should be ignored
cassandra_username='new-user',
graph_username='old-user' # Should be ignored
)
assert processor.cassandra_host == ['new-host'] # Only cassandra_* params work
assert processor.cassandra_username == 'new-user' # Only cassandra_* params work
@pytest.mark.asyncio
@patch('trustgraph.storage.triples.cassandra.write.TrustGraph')
@patch('trustgraph.storage.triples.cassandra.write.KnowledgeGraph')
async def test_table_switching_with_auth(self, mock_trustgraph):
"""Test table switching logic when authentication is provided"""
taskgroup_mock = MagicMock()
@ -62,8 +95,8 @@ class TestCassandraStorageProcessor:
processor = Processor(
taskgroup=taskgroup_mock,
graph_username='testuser',
graph_password='testpass'
cassandra_username='testuser',
cassandra_password='testpass'
)
# Create mock message
@ -74,18 +107,17 @@ class TestCassandraStorageProcessor:
await processor.store_triples(mock_message)
# Verify TrustGraph was called with auth parameters
# Verify KnowledgeGraph was called with auth parameters
mock_trustgraph.assert_called_once_with(
hosts=['localhost'],
hosts=['cassandra'], # Updated default
keyspace='user1',
table='collection1',
username='testuser',
password='testpass'
)
assert processor.table == ('user1', 'collection1')
assert processor.table == 'user1'
@pytest.mark.asyncio
@patch('trustgraph.storage.triples.cassandra.write.TrustGraph')
@patch('trustgraph.storage.triples.cassandra.write.KnowledgeGraph')
async def test_table_switching_without_auth(self, mock_trustgraph):
"""Test table switching logic when no authentication is provided"""
taskgroup_mock = MagicMock()
@ -102,16 +134,15 @@ class TestCassandraStorageProcessor:
await processor.store_triples(mock_message)
# Verify TrustGraph was called without auth parameters
# Verify KnowledgeGraph was called without auth parameters
mock_trustgraph.assert_called_once_with(
hosts=['localhost'],
keyspace='user2',
table='collection2'
hosts=['cassandra'], # Updated default
keyspace='user2'
)
assert processor.table == ('user2', 'collection2')
assert processor.table == 'user2'
@pytest.mark.asyncio
@patch('trustgraph.storage.triples.cassandra.write.TrustGraph')
@patch('trustgraph.storage.triples.cassandra.write.KnowledgeGraph')
async def test_table_reuse_when_same(self, mock_trustgraph):
"""Test that TrustGraph is not recreated when table hasn't changed"""
taskgroup_mock = MagicMock()
@ -135,7 +166,7 @@ class TestCassandraStorageProcessor:
assert mock_trustgraph.call_count == 1 # Should not increase
@pytest.mark.asyncio
@patch('trustgraph.storage.triples.cassandra.write.TrustGraph')
@patch('trustgraph.storage.triples.cassandra.write.KnowledgeGraph')
async def test_triple_insertion(self, mock_trustgraph):
"""Test that triples are properly inserted into Cassandra"""
taskgroup_mock = MagicMock()
@ -165,11 +196,11 @@ class TestCassandraStorageProcessor:
# Verify both triples were inserted
assert mock_tg_instance.insert.call_count == 2
mock_tg_instance.insert.assert_any_call('subject1', 'predicate1', 'object1')
mock_tg_instance.insert.assert_any_call('subject2', 'predicate2', 'object2')
mock_tg_instance.insert.assert_any_call('collection1', 'subject1', 'predicate1', 'object1')
mock_tg_instance.insert.assert_any_call('collection1', 'subject2', 'predicate2', 'object2')
@pytest.mark.asyncio
@patch('trustgraph.storage.triples.cassandra.write.TrustGraph')
@patch('trustgraph.storage.triples.cassandra.write.KnowledgeGraph')
async def test_triple_insertion_with_empty_list(self, mock_trustgraph):
"""Test behavior when message has no triples"""
taskgroup_mock = MagicMock()
@ -190,7 +221,7 @@ class TestCassandraStorageProcessor:
mock_tg_instance.insert.assert_not_called()
@pytest.mark.asyncio
@patch('trustgraph.storage.triples.cassandra.write.TrustGraph')
@patch('trustgraph.storage.triples.cassandra.write.KnowledgeGraph')
@patch('trustgraph.storage.triples.cassandra.write.time.sleep')
async def test_exception_handling_with_retry(self, mock_sleep, mock_trustgraph):
"""Test exception handling during TrustGraph creation"""
@ -225,16 +256,16 @@ class TestCassandraStorageProcessor:
# Verify parent add_args was called
mock_parent_add_args.assert_called_once_with(parser)
# Verify our specific arguments were added
# Verify our specific arguments were added (now using cassandra_* names)
# Parse empty args to check defaults
args = parser.parse_args([])
assert hasattr(args, 'graph_host')
assert args.graph_host == 'localhost'
assert hasattr(args, 'graph_username')
assert args.graph_username is None
assert hasattr(args, 'graph_password')
assert args.graph_password is None
assert hasattr(args, 'cassandra_host')
assert args.cassandra_host == 'cassandra' # Updated default
assert hasattr(args, 'cassandra_username')
assert args.cassandra_username is None
assert hasattr(args, 'cassandra_password')
assert args.cassandra_password is None
def test_add_args_with_custom_values(self):
"""Test add_args with custom command line values"""
@ -246,31 +277,44 @@ class TestCassandraStorageProcessor:
with patch('trustgraph.storage.triples.cassandra.write.TriplesStoreService.add_args'):
Processor.add_args(parser)
# Test parsing with custom values
# Test parsing with custom values (new cassandra_* arguments)
args = parser.parse_args([
'--graph-host', 'cassandra.example.com',
'--graph-username', 'testuser',
'--graph-password', 'testpass'
'--cassandra-host', 'cassandra.example.com',
'--cassandra-username', 'testuser',
'--cassandra-password', 'testpass'
])
assert args.graph_host == 'cassandra.example.com'
assert args.graph_username == 'testuser'
assert args.graph_password == 'testpass'
assert args.cassandra_host == 'cassandra.example.com'
assert args.cassandra_username == 'testuser'
assert args.cassandra_password == 'testpass'
def test_add_args_short_form(self):
"""Test add_args with short form arguments"""
def test_add_args_with_env_vars(self):
"""Test add_args shows environment variables in help text"""
from argparse import ArgumentParser
from unittest.mock import patch
import os
parser = ArgumentParser()
# Set environment variables
env_vars = {
'CASSANDRA_HOST': 'env-host1,env-host2',
'CASSANDRA_USERNAME': 'env-user',
'CASSANDRA_PASSWORD': 'env-pass'
}
with patch('trustgraph.storage.triples.cassandra.write.TriplesStoreService.add_args'):
Processor.add_args(parser)
# Test parsing with short form
args = parser.parse_args(['-g', 'short.example.com'])
assert args.graph_host == 'short.example.com'
with patch.dict(os.environ, env_vars, clear=True):
Processor.add_args(parser)
# Check that help text includes environment variable info
help_text = parser.format_help()
# Argparse may break lines, so check for components
assert 'env-' in help_text and 'host1' in help_text
assert 'env-host2' in help_text
assert 'env-user' in help_text
assert '<set>' in help_text # Password should be hidden
assert 'env-pass' not in help_text # Password value not shown
@patch('trustgraph.storage.triples.cassandra.write.Processor.launch')
def test_run_function(self, mock_launch):
@ -282,7 +326,7 @@ class TestCassandraStorageProcessor:
mock_launch.assert_called_once_with(default_ident, '\nGraph writer. Input is graph edge. Writes edges to Cassandra graph.\n')
@pytest.mark.asyncio
@patch('trustgraph.storage.triples.cassandra.write.TrustGraph')
@patch('trustgraph.storage.triples.cassandra.write.KnowledgeGraph')
async def test_store_triples_table_switching_between_different_tables(self, mock_trustgraph):
"""Test table switching when different tables are used in sequence"""
taskgroup_mock = MagicMock()
@ -299,7 +343,7 @@ class TestCassandraStorageProcessor:
mock_message1.triples = []
await processor.store_triples(mock_message1)
assert processor.table == ('user1', 'collection1')
assert processor.table == 'user1'
assert processor.tg == mock_tg_instance1
# Second message with different table
@ -309,14 +353,14 @@ class TestCassandraStorageProcessor:
mock_message2.triples = []
await processor.store_triples(mock_message2)
assert processor.table == ('user2', 'collection2')
assert processor.table == 'user2'
assert processor.tg == mock_tg_instance2
# Verify TrustGraph was created twice for different tables
assert mock_trustgraph.call_count == 2
@pytest.mark.asyncio
@patch('trustgraph.storage.triples.cassandra.write.TrustGraph')
@patch('trustgraph.storage.triples.cassandra.write.KnowledgeGraph')
async def test_store_triples_with_special_characters_in_values(self, mock_trustgraph):
"""Test storing triples with special characters and unicode"""
taskgroup_mock = MagicMock()
@ -340,13 +384,14 @@ class TestCassandraStorageProcessor:
# Verify the triple was inserted with special characters preserved
mock_tg_instance.insert.assert_called_once_with(
'test_collection',
'subject with spaces & symbols',
'predicate:with/colons',
'object with "quotes" and unicode: ñáéíóú'
)
@pytest.mark.asyncio
@patch('trustgraph.storage.triples.cassandra.write.TrustGraph')
@patch('trustgraph.storage.triples.cassandra.write.KnowledgeGraph')
async def test_store_triples_preserves_old_table_on_exception(self, mock_trustgraph):
"""Test that table remains unchanged when TrustGraph creation fails"""
taskgroup_mock = MagicMock()
@ -370,4 +415,99 @@ class TestCassandraStorageProcessor:
# Table should remain unchanged since self.table = table happens after try/except
assert processor.table == ('old_user', 'old_collection')
# TrustGraph should be set to None though
assert processor.tg is None
assert processor.tg is None
class TestCassandraPerformanceOptimizations:
"""Test cases for multi-table performance optimizations"""
@pytest.mark.asyncio
@patch('trustgraph.storage.triples.cassandra.write.KnowledgeGraph')
async def test_legacy_mode_uses_single_table(self, mock_trustgraph):
"""Test that legacy mode still works with single table"""
taskgroup_mock = MagicMock()
mock_tg_instance = MagicMock()
mock_trustgraph.return_value = mock_tg_instance
with patch.dict('os.environ', {'CASSANDRA_USE_LEGACY': 'true'}):
processor = Processor(taskgroup=taskgroup_mock)
mock_message = MagicMock()
mock_message.metadata.user = 'user1'
mock_message.metadata.collection = 'collection1'
mock_message.triples = []
await processor.store_triples(mock_message)
# Verify KnowledgeGraph instance uses legacy mode
kg_instance = mock_trustgraph.return_value
assert kg_instance is not None
@pytest.mark.asyncio
@patch('trustgraph.storage.triples.cassandra.write.KnowledgeGraph')
async def test_optimized_mode_uses_multi_table(self, mock_trustgraph):
"""Test that optimized mode uses multi-table schema"""
taskgroup_mock = MagicMock()
mock_tg_instance = MagicMock()
mock_trustgraph.return_value = mock_tg_instance
with patch.dict('os.environ', {'CASSANDRA_USE_LEGACY': 'false'}):
processor = Processor(taskgroup=taskgroup_mock)
mock_message = MagicMock()
mock_message.metadata.user = 'user1'
mock_message.metadata.collection = 'collection1'
mock_message.triples = []
await processor.store_triples(mock_message)
# Verify KnowledgeGraph instance is in optimized mode
kg_instance = mock_trustgraph.return_value
assert kg_instance is not None
@pytest.mark.asyncio
@patch('trustgraph.storage.triples.cassandra.write.KnowledgeGraph')
async def test_batch_write_consistency(self, mock_trustgraph):
"""Test that all tables stay consistent during batch writes"""
taskgroup_mock = MagicMock()
mock_tg_instance = MagicMock()
mock_trustgraph.return_value = mock_tg_instance
processor = Processor(taskgroup=taskgroup_mock)
# Create test triple
triple = MagicMock()
triple.s.value = 'test_subject'
triple.p.value = 'test_predicate'
triple.o.value = 'test_object'
mock_message = MagicMock()
mock_message.metadata.user = 'user1'
mock_message.metadata.collection = 'collection1'
mock_message.triples = [triple]
await processor.store_triples(mock_message)
# Verify insert was called for the triple (implementation details tested in KnowledgeGraph)
mock_tg_instance.insert.assert_called_once_with(
'collection1', 'test_subject', 'test_predicate', 'test_object'
)
def test_environment_variable_controls_mode(self):
"""Test that CASSANDRA_USE_LEGACY environment variable controls operation mode"""
taskgroup_mock = MagicMock()
# Test legacy mode
with patch.dict('os.environ', {'CASSANDRA_USE_LEGACY': 'true'}):
processor = Processor(taskgroup=taskgroup_mock)
# Mode is determined in KnowledgeGraph initialization
# Test optimized mode
with patch.dict('os.environ', {'CASSANDRA_USE_LEGACY': 'false'}):
processor = Processor(taskgroup=taskgroup_mock)
# Mode is determined in KnowledgeGraph initialization
# Test default mode (optimized when env var not set)
with patch.dict('os.environ', {}, clear=True):
processor = Processor(taskgroup=taskgroup_mock)
# Mode is determined in KnowledgeGraph initialization

View file

@ -86,15 +86,17 @@ class TestFalkorDBStorageProcessor:
mock_result = MagicMock()
mock_result.nodes_created = 1
mock_result.run_time_ms = 10
processor.io.query.return_value = mock_result
processor.create_node(test_uri)
processor.create_node(test_uri, 'test_user', 'test_collection')
processor.io.query.assert_called_once_with(
"MERGE (n:Node {uri: $uri})",
"MERGE (n:Node {uri: $uri, user: $user, collection: $collection})",
params={
"uri": test_uri,
"user": 'test_user',
"collection": 'test_collection',
},
)
@ -104,15 +106,17 @@ class TestFalkorDBStorageProcessor:
mock_result = MagicMock()
mock_result.nodes_created = 1
mock_result.run_time_ms = 10
processor.io.query.return_value = mock_result
processor.create_literal(test_value)
processor.create_literal(test_value, 'test_user', 'test_collection')
processor.io.query.assert_called_once_with(
"MERGE (n:Literal {value: $value})",
"MERGE (n:Literal {value: $value, user: $user, collection: $collection})",
params={
"value": test_value,
"user": 'test_user',
"collection": 'test_collection',
},
)
@ -121,23 +125,25 @@ class TestFalkorDBStorageProcessor:
src_uri = 'http://example.com/src'
pred_uri = 'http://example.com/pred'
dest_uri = 'http://example.com/dest'
mock_result = MagicMock()
mock_result.nodes_created = 0
mock_result.run_time_ms = 5
processor.io.query.return_value = mock_result
processor.relate_node(src_uri, pred_uri, dest_uri)
processor.relate_node(src_uri, pred_uri, dest_uri, 'test_user', 'test_collection')
processor.io.query.assert_called_once_with(
"MATCH (src:Node {uri: $src}) "
"MATCH (dest:Node {uri: $dest}) "
"MERGE (src)-[:Rel {uri: $uri}]->(dest)",
"MATCH (src:Node {uri: $src, user: $user, collection: $collection}) "
"MATCH (dest:Node {uri: $dest, user: $user, collection: $collection}) "
"MERGE (src)-[:Rel {uri: $uri, user: $user, collection: $collection}]->(dest)",
params={
"src": src_uri,
"dest": dest_uri,
"uri": pred_uri,
"user": 'test_user',
"collection": 'test_collection',
},
)
@ -146,23 +152,25 @@ class TestFalkorDBStorageProcessor:
src_uri = 'http://example.com/src'
pred_uri = 'http://example.com/pred'
literal_value = 'literal destination'
mock_result = MagicMock()
mock_result.nodes_created = 0
mock_result.run_time_ms = 5
processor.io.query.return_value = mock_result
processor.relate_literal(src_uri, pred_uri, literal_value)
processor.relate_literal(src_uri, pred_uri, literal_value, 'test_user', 'test_collection')
processor.io.query.assert_called_once_with(
"MATCH (src:Node {uri: $src}) "
"MATCH (dest:Literal {value: $dest}) "
"MERGE (src)-[:Rel {uri: $uri}]->(dest)",
"MATCH (src:Node {uri: $src, user: $user, collection: $collection}) "
"MATCH (dest:Literal {value: $dest, user: $user, collection: $collection}) "
"MERGE (src)-[:Rel {uri: $uri, user: $user, collection: $collection}]->(dest)",
params={
"src": src_uri,
"dest": literal_value,
"uri": pred_uri,
"user": 'test_user',
"collection": 'test_collection',
},
)
@ -191,14 +199,16 @@ class TestFalkorDBStorageProcessor:
# Verify queries were called in the correct order
expected_calls = [
# Create subject node
(("MERGE (n:Node {uri: $uri})",), {"params": {"uri": "http://example.com/subject"}}),
(("MERGE (n:Node {uri: $uri, user: $user, collection: $collection})",),
{"params": {"uri": "http://example.com/subject", "user": "test_user", "collection": "test_collection"}}),
# Create object node
(("MERGE (n:Node {uri: $uri})",), {"params": {"uri": "http://example.com/object"}}),
(("MERGE (n:Node {uri: $uri, user: $user, collection: $collection})",),
{"params": {"uri": "http://example.com/object", "user": "test_user", "collection": "test_collection"}}),
# Create relationship
(("MATCH (src:Node {uri: $src}) "
"MATCH (dest:Node {uri: $dest}) "
"MERGE (src)-[:Rel {uri: $uri}]->(dest)",),
{"params": {"src": "http://example.com/subject", "dest": "http://example.com/object", "uri": "http://example.com/predicate"}}),
(("MATCH (src:Node {uri: $src, user: $user, collection: $collection}) "
"MATCH (dest:Node {uri: $dest, user: $user, collection: $collection}) "
"MERGE (src)-[:Rel {uri: $uri, user: $user, collection: $collection}]->(dest)",),
{"params": {"src": "http://example.com/subject", "dest": "http://example.com/object", "uri": "http://example.com/predicate", "user": "test_user", "collection": "test_collection"}}),
]
assert processor.io.query.call_count == 3
@ -220,14 +230,16 @@ class TestFalkorDBStorageProcessor:
# Verify queries were called in the correct order
expected_calls = [
# Create subject node
(("MERGE (n:Node {uri: $uri})",), {"params": {"uri": "http://example.com/subject"}}),
(("MERGE (n:Node {uri: $uri, user: $user, collection: $collection})",),
{"params": {"uri": "http://example.com/subject", "user": "test_user", "collection": "test_collection"}}),
# Create literal object
(("MERGE (n:Literal {value: $value})",), {"params": {"value": "literal object"}}),
(("MERGE (n:Literal {value: $value, user: $user, collection: $collection})",),
{"params": {"value": "literal object", "user": "test_user", "collection": "test_collection"}}),
# Create relationship
(("MATCH (src:Node {uri: $src}) "
"MATCH (dest:Literal {value: $dest}) "
"MERGE (src)-[:Rel {uri: $uri}]->(dest)",),
{"params": {"src": "http://example.com/subject", "dest": "literal object", "uri": "http://example.com/predicate"}}),
(("MATCH (src:Node {uri: $src, user: $user, collection: $collection}) "
"MATCH (dest:Literal {value: $dest, user: $user, collection: $collection}) "
"MERGE (src)-[:Rel {uri: $uri, user: $user, collection: $collection}]->(dest)",),
{"params": {"src": "http://example.com/subject", "dest": "literal object", "uri": "http://example.com/predicate", "user": "test_user", "collection": "test_collection"}}),
]
assert processor.io.query.call_count == 3
@ -408,12 +420,14 @@ class TestFalkorDBStorageProcessor:
processor.io.query.return_value = mock_result
processor.create_node(test_uri)
processor.create_node(test_uri, 'test_user', 'test_collection')
processor.io.query.assert_called_once_with(
"MERGE (n:Node {uri: $uri})",
"MERGE (n:Node {uri: $uri, user: $user, collection: $collection})",
params={
"uri": test_uri,
"user": 'test_user',
"collection": 'test_collection',
},
)
@ -426,11 +440,13 @@ class TestFalkorDBStorageProcessor:
processor.io.query.return_value = mock_result
processor.create_literal(test_value)
processor.create_literal(test_value, 'test_user', 'test_collection')
processor.io.query.assert_called_once_with(
"MERGE (n:Literal {value: $value})",
"MERGE (n:Literal {value: $value, user: $user, collection: $collection})",
params={
"value": test_value,
"user": 'test_user',
"collection": 'test_collection',
},
)

View file

@ -99,12 +99,16 @@ class TestMemgraphStorageProcessor:
processor = Processor(taskgroup=taskgroup_mock)
# Verify index creation calls
# Verify index creation calls (now includes user/collection indexes)
expected_calls = [
"CREATE INDEX ON :Node",
"CREATE INDEX ON :Node(uri)",
"CREATE INDEX ON :Literal",
"CREATE INDEX ON :Literal(value)"
"CREATE INDEX ON :Literal(value)",
"CREATE INDEX ON :Node(user)",
"CREATE INDEX ON :Node(collection)",
"CREATE INDEX ON :Literal(user)",
"CREATE INDEX ON :Literal(collection)"
]
assert mock_session.run.call_count == len(expected_calls)
@ -127,8 +131,8 @@ class TestMemgraphStorageProcessor:
# Should not raise an exception
processor = Processor(taskgroup=taskgroup_mock)
# Verify all index creation calls were attempted
assert mock_session.run.call_count == 4
# Verify all index creation calls were attempted (8 total)
assert mock_session.run.call_count == 8
def test_create_node(self, processor):
"""Test node creation"""
@ -141,11 +145,13 @@ class TestMemgraphStorageProcessor:
processor.io.execute_query.return_value = mock_result
processor.create_node(test_uri)
processor.create_node(test_uri, "test_user", "test_collection")
processor.io.execute_query.assert_called_once_with(
"MERGE (n:Node {uri: $uri})",
"MERGE (n:Node {uri: $uri, user: $user, collection: $collection})",
uri=test_uri,
user="test_user",
collection="test_collection",
database_=processor.db
)
@ -160,11 +166,13 @@ class TestMemgraphStorageProcessor:
processor.io.execute_query.return_value = mock_result
processor.create_literal(test_value)
processor.create_literal(test_value, "test_user", "test_collection")
processor.io.execute_query.assert_called_once_with(
"MERGE (n:Literal {value: $value})",
"MERGE (n:Literal {value: $value, user: $user, collection: $collection})",
value=test_value,
user="test_user",
collection="test_collection",
database_=processor.db
)
@ -182,13 +190,14 @@ class TestMemgraphStorageProcessor:
processor.io.execute_query.return_value = mock_result
processor.relate_node(src_uri, pred_uri, dest_uri)
processor.relate_node(src_uri, pred_uri, dest_uri, "test_user", "test_collection")
processor.io.execute_query.assert_called_once_with(
"MATCH (src:Node {uri: $src}) "
"MATCH (dest:Node {uri: $dest}) "
"MERGE (src)-[:Rel {uri: $uri}]->(dest)",
"MATCH (src:Node {uri: $src, user: $user, collection: $collection}) "
"MATCH (dest:Node {uri: $dest, user: $user, collection: $collection}) "
"MERGE (src)-[:Rel {uri: $uri, user: $user, collection: $collection}]->(dest)",
src=src_uri, dest=dest_uri, uri=pred_uri,
user="test_user", collection="test_collection",
database_=processor.db
)
@ -206,13 +215,14 @@ class TestMemgraphStorageProcessor:
processor.io.execute_query.return_value = mock_result
processor.relate_literal(src_uri, pred_uri, literal_value)
processor.relate_literal(src_uri, pred_uri, literal_value, "test_user", "test_collection")
processor.io.execute_query.assert_called_once_with(
"MATCH (src:Node {uri: $src}) "
"MATCH (dest:Literal {value: $dest}) "
"MERGE (src)-[:Rel {uri: $uri}]->(dest)",
"MATCH (src:Node {uri: $src, user: $user, collection: $collection}) "
"MATCH (dest:Literal {value: $dest, user: $user, collection: $collection}) "
"MERGE (src)-[:Rel {uri: $uri, user: $user, collection: $collection}]->(dest)",
src=src_uri, dest=literal_value, uri=pred_uri,
user="test_user", collection="test_collection",
database_=processor.db
)
@ -226,19 +236,22 @@ class TestMemgraphStorageProcessor:
o=Value(value='http://example.com/object', is_uri=True)
)
processor.create_triple(mock_tx, triple)
processor.create_triple(mock_tx, triple, "test_user", "test_collection")
# Verify transaction calls
expected_calls = [
# Create subject node
("MERGE (n:Node {uri: $uri})", {'uri': 'http://example.com/subject'}),
("MERGE (n:Node {uri: $uri, user: $user, collection: $collection})",
{'uri': 'http://example.com/subject', 'user': 'test_user', 'collection': 'test_collection'}),
# Create object node
("MERGE (n:Node {uri: $uri})", {'uri': 'http://example.com/object'}),
("MERGE (n:Node {uri: $uri, user: $user, collection: $collection})",
{'uri': 'http://example.com/object', 'user': 'test_user', 'collection': 'test_collection'}),
# Create relationship
("MATCH (src:Node {uri: $src}) "
"MATCH (dest:Node {uri: $dest}) "
"MERGE (src)-[:Rel {uri: $uri}]->(dest)",
{'src': 'http://example.com/subject', 'dest': 'http://example.com/object', 'uri': 'http://example.com/predicate'})
("MATCH (src:Node {uri: $src, user: $user, collection: $collection}) "
"MATCH (dest:Node {uri: $dest, user: $user, collection: $collection}) "
"MERGE (src)-[:Rel {uri: $uri, user: $user, collection: $collection}]->(dest)",
{'src': 'http://example.com/subject', 'dest': 'http://example.com/object', 'uri': 'http://example.com/predicate',
'user': 'test_user', 'collection': 'test_collection'})
]
assert mock_tx.run.call_count == 3
@ -257,19 +270,22 @@ class TestMemgraphStorageProcessor:
o=Value(value='literal object', is_uri=False)
)
processor.create_triple(mock_tx, triple)
processor.create_triple(mock_tx, triple, "test_user", "test_collection")
# Verify transaction calls
expected_calls = [
# Create subject node
("MERGE (n:Node {uri: $uri})", {'uri': 'http://example.com/subject'}),
("MERGE (n:Node {uri: $uri, user: $user, collection: $collection})",
{'uri': 'http://example.com/subject', 'user': 'test_user', 'collection': 'test_collection'}),
# Create literal object
("MERGE (n:Literal {value: $value})", {'value': 'literal object'}),
("MERGE (n:Literal {value: $value, user: $user, collection: $collection})",
{'value': 'literal object', 'user': 'test_user', 'collection': 'test_collection'}),
# Create relationship
("MATCH (src:Node {uri: $src}) "
"MATCH (dest:Literal {value: $dest}) "
"MERGE (src)-[:Rel {uri: $uri}]->(dest)",
{'src': 'http://example.com/subject', 'dest': 'literal object', 'uri': 'http://example.com/predicate'})
("MATCH (src:Node {uri: $src, user: $user, collection: $collection}) "
"MATCH (dest:Literal {value: $dest, user: $user, collection: $collection}) "
"MERGE (src)-[:Rel {uri: $uri, user: $user, collection: $collection}]->(dest)",
{'src': 'http://example.com/subject', 'dest': 'literal object', 'uri': 'http://example.com/predicate',
'user': 'test_user', 'collection': 'test_collection'})
]
assert mock_tx.run.call_count == 3
@ -281,33 +297,42 @@ class TestMemgraphStorageProcessor:
@pytest.mark.asyncio
async def test_store_triples_single_triple(self, processor, mock_message):
"""Test storing a single triple"""
mock_session = MagicMock()
processor.io.session.return_value.__enter__.return_value = mock_session
# Mock the execute_query method used by the direct methods
mock_result = MagicMock()
mock_summary = MagicMock()
mock_summary.counters.nodes_created = 1
mock_summary.result_available_after = 10
mock_result.summary = mock_summary
processor.io.execute_query.return_value = mock_result
# Reset the mock to clear the initialization call
processor.io.session.reset_mock()
# Reset the mock to clear initialization calls
processor.io.execute_query.reset_mock()
await processor.store_triples(mock_message)
# Verify session was created with correct database
processor.io.session.assert_called_once_with(database=processor.db)
# Verify execute_query was called for create_node, create_literal, and relate_literal
# (since mock_message has a literal object)
assert processor.io.execute_query.call_count == 3
# Verify execute_write was called once per triple
mock_session.execute_write.assert_called_once()
# Verify the triple was passed to create_triple
call_args = mock_session.execute_write.call_args
assert call_args[0][0] == processor.create_triple
assert call_args[0][1] == mock_message.triples[0]
# Verify user/collection parameters were included
for call in processor.io.execute_query.call_args_list:
call_kwargs = call.kwargs if hasattr(call, 'kwargs') else call[1]
assert 'user' in call_kwargs
assert 'collection' in call_kwargs
@pytest.mark.asyncio
async def test_store_triples_multiple_triples(self, processor):
"""Test storing multiple triples"""
mock_session = MagicMock()
processor.io.session.return_value.__enter__.return_value = mock_session
# Mock the execute_query method used by the direct methods
mock_result = MagicMock()
mock_summary = MagicMock()
mock_summary.counters.nodes_created = 1
mock_summary.result_available_after = 10
mock_result.summary = mock_summary
processor.io.execute_query.return_value = mock_result
# Reset the mock to clear the initialization call
processor.io.session.reset_mock()
# Reset the mock to clear initialization calls
processor.io.execute_query.reset_mock()
# Create message with multiple triples
message = MagicMock()
@ -329,16 +354,17 @@ class TestMemgraphStorageProcessor:
await processor.store_triples(message)
# Verify session was called twice (once per triple)
assert processor.io.session.call_count == 2
# Verify execute_query was called:
# Triple1: create_node(s) + create_literal(o) + relate_literal = 3 calls
# Triple2: create_node(s) + create_node(o) + relate_node = 3 calls
# Total: 6 calls
assert processor.io.execute_query.call_count == 6
# Verify execute_write was called once per triple
assert mock_session.execute_write.call_count == 2
# Verify each triple was processed
call_args_list = mock_session.execute_write.call_args_list
assert call_args_list[0][0][1] == triple1
assert call_args_list[1][0][1] == triple2
# Verify user/collection parameters were included in all calls
for call in processor.io.execute_query.call_args_list:
call_kwargs = call.kwargs if hasattr(call, 'kwargs') else call[1]
assert call_kwargs['user'] == 'test_user'
assert call_kwargs['collection'] == 'test_collection'
@pytest.mark.asyncio
async def test_store_triples_empty_list(self, processor):

View file

@ -62,14 +62,18 @@ class TestNeo4jStorageProcessor:
processor = Processor(taskgroup=taskgroup_mock)
# Verify index creation queries were executed
# Verify index creation queries were executed (now includes 7 indexes)
expected_calls = [
"CREATE INDEX Node_uri FOR (n:Node) ON (n.uri)",
"CREATE INDEX Literal_value FOR (n:Literal) ON (n.value)",
"CREATE INDEX Rel_uri FOR ()-[r:Rel]-() ON (r.uri)"
"CREATE INDEX Rel_uri FOR ()-[r:Rel]-() ON (r.uri)",
"CREATE INDEX node_user_collection_uri FOR (n:Node) ON (n.user, n.collection, n.uri)",
"CREATE INDEX literal_user_collection_value FOR (n:Literal) ON (n.user, n.collection, n.value)",
"CREATE INDEX rel_user FOR ()-[r:Rel]-() ON (r.user)",
"CREATE INDEX rel_collection FOR ()-[r:Rel]-() ON (r.collection)"
]
assert mock_session.run.call_count == 3
assert mock_session.run.call_count == 7
for expected_query in expected_calls:
mock_session.run.assert_any_call(expected_query)
@ -88,8 +92,8 @@ class TestNeo4jStorageProcessor:
# Should not raise exception - they should be caught and ignored
processor = Processor(taskgroup=taskgroup_mock)
# Should have tried to create all 3 indexes despite exceptions
assert mock_session.run.call_count == 3
# Should have tried to create all 7 indexes despite exceptions
assert mock_session.run.call_count == 7
@patch('trustgraph.storage.triples.neo4j.write.GraphDatabase')
def test_create_node(self, mock_graph_db):
@ -111,11 +115,13 @@ class TestNeo4jStorageProcessor:
processor = Processor(taskgroup=taskgroup_mock)
# Test create_node
processor.create_node("http://example.com/node")
processor.create_node("http://example.com/node", "test_user", "test_collection")
mock_driver.execute_query.assert_called_with(
"MERGE (n:Node {uri: $uri})",
"MERGE (n:Node {uri: $uri, user: $user, collection: $collection})",
uri="http://example.com/node",
user="test_user",
collection="test_collection",
database_="neo4j"
)
@ -139,11 +145,13 @@ class TestNeo4jStorageProcessor:
processor = Processor(taskgroup=taskgroup_mock)
# Test create_literal
processor.create_literal("literal value")
processor.create_literal("literal value", "test_user", "test_collection")
mock_driver.execute_query.assert_called_with(
"MERGE (n:Literal {value: $value})",
"MERGE (n:Literal {value: $value, user: $user, collection: $collection})",
value="literal value",
user="test_user",
collection="test_collection",
database_="neo4j"
)
@ -170,16 +178,20 @@ class TestNeo4jStorageProcessor:
processor.relate_node(
"http://example.com/subject",
"http://example.com/predicate",
"http://example.com/object"
"http://example.com/object",
"test_user",
"test_collection"
)
mock_driver.execute_query.assert_called_with(
"MATCH (src:Node {uri: $src}) "
"MATCH (dest:Node {uri: $dest}) "
"MERGE (src)-[:Rel {uri: $uri}]->(dest)",
"MATCH (src:Node {uri: $src, user: $user, collection: $collection}) "
"MATCH (dest:Node {uri: $dest, user: $user, collection: $collection}) "
"MERGE (src)-[:Rel {uri: $uri, user: $user, collection: $collection}]->(dest)",
src="http://example.com/subject",
dest="http://example.com/object",
uri="http://example.com/predicate",
user="test_user",
collection="test_collection",
database_="neo4j"
)
@ -206,16 +218,20 @@ class TestNeo4jStorageProcessor:
processor.relate_literal(
"http://example.com/subject",
"http://example.com/predicate",
"literal value"
"literal value",
"test_user",
"test_collection"
)
mock_driver.execute_query.assert_called_with(
"MATCH (src:Node {uri: $src}) "
"MATCH (dest:Literal {value: $dest}) "
"MERGE (src)-[:Rel {uri: $uri}]->(dest)",
"MATCH (src:Node {uri: $src, user: $user, collection: $collection}) "
"MATCH (dest:Literal {value: $dest, user: $user, collection: $collection}) "
"MERGE (src)-[:Rel {uri: $uri, user: $user, collection: $collection}]->(dest)",
src="http://example.com/subject",
dest="literal value",
uri="http://example.com/predicate",
user="test_user",
collection="test_collection",
database_="neo4j"
)
@ -246,9 +262,11 @@ class TestNeo4jStorageProcessor:
triple.o.value = "http://example.com/object"
triple.o.is_uri = True
# Create mock message
# Create mock message with metadata
mock_message = MagicMock()
mock_message.triples = [triple]
mock_message.metadata.user = "test_user"
mock_message.metadata.collection = "test_collection"
await processor.store_triples(mock_message)
@ -257,23 +275,25 @@ class TestNeo4jStorageProcessor:
expected_calls = [
# Subject node creation
(
"MERGE (n:Node {uri: $uri})",
{"uri": "http://example.com/subject", "database_": "neo4j"}
"MERGE (n:Node {uri: $uri, user: $user, collection: $collection})",
{"uri": "http://example.com/subject", "user": "test_user", "collection": "test_collection", "database_": "neo4j"}
),
# Object node creation
(
"MERGE (n:Node {uri: $uri})",
{"uri": "http://example.com/object", "database_": "neo4j"}
"MERGE (n:Node {uri: $uri, user: $user, collection: $collection})",
{"uri": "http://example.com/object", "user": "test_user", "collection": "test_collection", "database_": "neo4j"}
),
# Relationship creation
(
"MATCH (src:Node {uri: $src}) "
"MATCH (dest:Node {uri: $dest}) "
"MERGE (src)-[:Rel {uri: $uri}]->(dest)",
"MATCH (src:Node {uri: $src, user: $user, collection: $collection}) "
"MATCH (dest:Node {uri: $dest, user: $user, collection: $collection}) "
"MERGE (src)-[:Rel {uri: $uri, user: $user, collection: $collection}]->(dest)",
{
"src": "http://example.com/subject",
"dest": "http://example.com/object",
"uri": "http://example.com/predicate",
"user": "test_user",
"collection": "test_collection",
"database_": "neo4j"
}
)
@ -310,9 +330,11 @@ class TestNeo4jStorageProcessor:
triple.o.value = "literal value"
triple.o.is_uri = False
# Create mock message
# Create mock message with metadata
mock_message = MagicMock()
mock_message.triples = [triple]
mock_message.metadata.user = "test_user"
mock_message.metadata.collection = "test_collection"
await processor.store_triples(mock_message)
@ -322,23 +344,25 @@ class TestNeo4jStorageProcessor:
expected_calls = [
# Subject node creation
(
"MERGE (n:Node {uri: $uri})",
{"uri": "http://example.com/subject", "database_": "neo4j"}
"MERGE (n:Node {uri: $uri, user: $user, collection: $collection})",
{"uri": "http://example.com/subject", "user": "test_user", "collection": "test_collection", "database_": "neo4j"}
),
# Literal creation
(
"MERGE (n:Literal {value: $value})",
{"value": "literal value", "database_": "neo4j"}
"MERGE (n:Literal {value: $value, user: $user, collection: $collection})",
{"value": "literal value", "user": "test_user", "collection": "test_collection", "database_": "neo4j"}
),
# Relationship creation
(
"MATCH (src:Node {uri: $src}) "
"MATCH (dest:Literal {value: $dest}) "
"MERGE (src)-[:Rel {uri: $uri}]->(dest)",
"MATCH (src:Node {uri: $src, user: $user, collection: $collection}) "
"MATCH (dest:Literal {value: $dest, user: $user, collection: $collection}) "
"MERGE (src)-[:Rel {uri: $uri, user: $user, collection: $collection}]->(dest)",
{
"src": "http://example.com/subject",
"dest": "literal value",
"uri": "http://example.com/predicate",
"user": "test_user",
"collection": "test_collection",
"database_": "neo4j"
}
)
@ -381,9 +405,11 @@ class TestNeo4jStorageProcessor:
triple2.o.value = "literal value"
triple2.o.is_uri = False
# Create mock message
# Create mock message with metadata
mock_message = MagicMock()
mock_message.triples = [triple1, triple2]
mock_message.metadata.user = "test_user"
mock_message.metadata.collection = "test_collection"
await processor.store_triples(mock_message)
@ -405,9 +431,11 @@ class TestNeo4jStorageProcessor:
processor = Processor(taskgroup=taskgroup_mock)
# Create mock message with empty triples
# Create mock message with empty triples and metadata
mock_message = MagicMock()
mock_message.triples = []
mock_message.metadata.user = "test_user"
mock_message.metadata.collection = "test_collection"
await processor.store_triples(mock_message)
@ -521,28 +549,36 @@ class TestNeo4jStorageProcessor:
mock_message = MagicMock()
mock_message.triples = [triple]
mock_message.metadata.user = "test_user"
mock_message.metadata.collection = "test_collection"
await processor.store_triples(mock_message)
# Verify the triple was processed with special characters preserved
mock_driver.execute_query.assert_any_call(
"MERGE (n:Node {uri: $uri})",
"MERGE (n:Node {uri: $uri, user: $user, collection: $collection})",
uri="http://example.com/subject with spaces",
user="test_user",
collection="test_collection",
database_="neo4j"
)
mock_driver.execute_query.assert_any_call(
"MERGE (n:Literal {value: $value})",
"MERGE (n:Literal {value: $value, user: $user, collection: $collection})",
value='literal with "quotes" and unicode: ñáéíóú',
user="test_user",
collection="test_collection",
database_="neo4j"
)
mock_driver.execute_query.assert_any_call(
"MATCH (src:Node {uri: $src}) "
"MATCH (dest:Literal {value: $dest}) "
"MERGE (src)-[:Rel {uri: $uri}]->(dest)",
"MATCH (src:Node {uri: $src, user: $user, collection: $collection}) "
"MATCH (dest:Literal {value: $dest, user: $user, collection: $collection}) "
"MERGE (src)-[:Rel {uri: $uri, user: $user, collection: $collection}]->(dest)",
src="http://example.com/subject with spaces",
dest='literal with "quotes" and unicode: ñáéíóú',
uri="http://example.com/predicate:with/symbols",
user="test_user",
collection="test_collection",
database_="neo4j"
)