mirror of
https://github.com/trustgraph-ai/trustgraph.git
synced 2026-04-26 08:56:21 +02:00
parent
a8e437fc7f
commit
6c7af8789d
216 changed files with 31360 additions and 1611 deletions
429
tests/unit/test_storage/test_cassandra_config_integration.py
Normal file
429
tests/unit/test_storage/test_cassandra_config_integration.py
Normal file
|
|
@ -0,0 +1,429 @@
|
|||
"""
|
||||
Integration tests for Cassandra configuration in processors.
|
||||
|
||||
Tests that processors correctly use the configuration helper
|
||||
and handle environment variables, CLI args, and backward compatibility.
|
||||
"""
|
||||
|
||||
import os
|
||||
import pytest
|
||||
from unittest.mock import Mock, patch, MagicMock
|
||||
|
||||
from trustgraph.storage.triples.cassandra.write import Processor as TriplesWriter
|
||||
from trustgraph.storage.objects.cassandra.write import Processor as ObjectsWriter
|
||||
from trustgraph.query.triples.cassandra.service import Processor as TriplesQuery
|
||||
from trustgraph.storage.knowledge.store import Processor as KgStore
|
||||
|
||||
|
||||
class TestTriplesWriterConfiguration:
|
||||
"""Test Cassandra configuration in triples writer processor."""
|
||||
|
||||
@patch('trustgraph.direct.cassandra_kg.KnowledgeGraph')
|
||||
def test_environment_variable_configuration(self, mock_trust_graph):
|
||||
"""Test processor picks up configuration from environment variables."""
|
||||
env_vars = {
|
||||
'CASSANDRA_HOST': 'env-host1,env-host2',
|
||||
'CASSANDRA_USERNAME': 'env-user',
|
||||
'CASSANDRA_PASSWORD': 'env-pass'
|
||||
}
|
||||
|
||||
with patch.dict(os.environ, env_vars, clear=True):
|
||||
processor = TriplesWriter(taskgroup=MagicMock())
|
||||
|
||||
assert processor.cassandra_host == ['env-host1', 'env-host2']
|
||||
assert processor.cassandra_username == 'env-user'
|
||||
assert processor.cassandra_password == 'env-pass'
|
||||
|
||||
@patch('trustgraph.direct.cassandra_kg.KnowledgeGraph')
|
||||
def test_parameter_override_environment(self, mock_trust_graph):
|
||||
"""Test explicit parameters override environment variables."""
|
||||
env_vars = {
|
||||
'CASSANDRA_HOST': 'env-host',
|
||||
'CASSANDRA_USERNAME': 'env-user',
|
||||
'CASSANDRA_PASSWORD': 'env-pass'
|
||||
}
|
||||
|
||||
with patch.dict(os.environ, env_vars, clear=True):
|
||||
processor = TriplesWriter(
|
||||
taskgroup=MagicMock(),
|
||||
cassandra_host='param-host1,param-host2',
|
||||
cassandra_username='param-user',
|
||||
cassandra_password='param-pass'
|
||||
)
|
||||
|
||||
assert processor.cassandra_host == ['param-host1', 'param-host2']
|
||||
assert processor.cassandra_username == 'param-user'
|
||||
assert processor.cassandra_password == 'param-pass'
|
||||
|
||||
@patch('trustgraph.direct.cassandra_kg.KnowledgeGraph')
|
||||
def test_no_backward_compatibility_graph_params(self, mock_trust_graph):
|
||||
"""Test that old graph_* parameter names are no longer supported."""
|
||||
processor = TriplesWriter(
|
||||
taskgroup=MagicMock(),
|
||||
graph_host='compat-host',
|
||||
graph_username='compat-user',
|
||||
graph_password='compat-pass'
|
||||
)
|
||||
|
||||
# Should use defaults since graph_* params are not recognized
|
||||
assert processor.cassandra_host == ['cassandra'] # Default
|
||||
assert processor.cassandra_username is None
|
||||
assert processor.cassandra_password is None
|
||||
|
||||
@patch('trustgraph.direct.cassandra_kg.KnowledgeGraph')
|
||||
def test_default_configuration(self, mock_trust_graph):
|
||||
"""Test default configuration when no params or env vars provided."""
|
||||
with patch.dict(os.environ, {}, clear=True):
|
||||
processor = TriplesWriter(taskgroup=MagicMock())
|
||||
|
||||
assert processor.cassandra_host == ['cassandra']
|
||||
assert processor.cassandra_username is None
|
||||
assert processor.cassandra_password is None
|
||||
|
||||
|
||||
class TestObjectsWriterConfiguration:
|
||||
"""Test Cassandra configuration in objects writer processor."""
|
||||
|
||||
@patch('trustgraph.storage.objects.cassandra.write.Cluster')
|
||||
def test_environment_variable_configuration(self, mock_cluster):
|
||||
"""Test processor picks up configuration from environment variables."""
|
||||
env_vars = {
|
||||
'CASSANDRA_HOST': 'obj-env-host1,obj-env-host2',
|
||||
'CASSANDRA_USERNAME': 'obj-env-user',
|
||||
'CASSANDRA_PASSWORD': 'obj-env-pass'
|
||||
}
|
||||
|
||||
mock_cluster_instance = MagicMock()
|
||||
mock_cluster.return_value = mock_cluster_instance
|
||||
|
||||
with patch.dict(os.environ, env_vars, clear=True):
|
||||
processor = ObjectsWriter(taskgroup=MagicMock())
|
||||
|
||||
assert processor.cassandra_host == ['obj-env-host1', 'obj-env-host2']
|
||||
assert processor.cassandra_username == 'obj-env-user'
|
||||
assert processor.cassandra_password == 'obj-env-pass'
|
||||
|
||||
@patch('trustgraph.storage.objects.cassandra.write.Cluster')
|
||||
def test_cassandra_connection_with_hosts_list(self, mock_cluster):
|
||||
"""Test that Cassandra connection uses hosts list correctly."""
|
||||
env_vars = {
|
||||
'CASSANDRA_HOST': 'conn-host1,conn-host2,conn-host3',
|
||||
'CASSANDRA_USERNAME': 'conn-user',
|
||||
'CASSANDRA_PASSWORD': 'conn-pass'
|
||||
}
|
||||
|
||||
mock_cluster_instance = MagicMock()
|
||||
mock_session = MagicMock()
|
||||
mock_cluster_instance.connect.return_value = mock_session
|
||||
mock_cluster.return_value = mock_cluster_instance
|
||||
|
||||
with patch.dict(os.environ, env_vars, clear=True):
|
||||
processor = ObjectsWriter(taskgroup=MagicMock())
|
||||
processor.connect_cassandra()
|
||||
|
||||
# Verify cluster was called with hosts list
|
||||
mock_cluster.assert_called_once()
|
||||
call_args = mock_cluster.call_args
|
||||
|
||||
# Check that contact_points was passed the hosts list
|
||||
assert 'contact_points' in call_args.kwargs
|
||||
assert call_args.kwargs['contact_points'] == ['conn-host1', 'conn-host2', 'conn-host3']
|
||||
|
||||
@patch('trustgraph.storage.objects.cassandra.write.Cluster')
|
||||
@patch('trustgraph.storage.objects.cassandra.write.PlainTextAuthProvider')
|
||||
def test_authentication_configuration(self, mock_auth_provider, mock_cluster):
|
||||
"""Test authentication is configured when credentials are provided."""
|
||||
env_vars = {
|
||||
'CASSANDRA_HOST': 'auth-host',
|
||||
'CASSANDRA_USERNAME': 'auth-user',
|
||||
'CASSANDRA_PASSWORD': 'auth-pass'
|
||||
}
|
||||
|
||||
mock_auth_instance = MagicMock()
|
||||
mock_auth_provider.return_value = mock_auth_instance
|
||||
mock_cluster_instance = MagicMock()
|
||||
mock_cluster.return_value = mock_cluster_instance
|
||||
|
||||
with patch.dict(os.environ, env_vars, clear=True):
|
||||
processor = ObjectsWriter(taskgroup=MagicMock())
|
||||
processor.connect_cassandra()
|
||||
|
||||
# Verify auth provider was created with correct credentials
|
||||
mock_auth_provider.assert_called_once_with(
|
||||
username='auth-user',
|
||||
password='auth-pass'
|
||||
)
|
||||
|
||||
# Verify cluster was configured with auth provider
|
||||
call_args = mock_cluster.call_args
|
||||
assert 'auth_provider' in call_args.kwargs
|
||||
assert call_args.kwargs['auth_provider'] == mock_auth_instance
|
||||
|
||||
|
||||
class TestTriplesQueryConfiguration:
|
||||
"""Test Cassandra configuration in triples query processor."""
|
||||
|
||||
@patch('trustgraph.direct.cassandra_kg.KnowledgeGraph')
|
||||
def test_environment_variable_configuration(self, mock_trust_graph):
|
||||
"""Test processor picks up configuration from environment variables."""
|
||||
env_vars = {
|
||||
'CASSANDRA_HOST': 'query-env-host1,query-env-host2',
|
||||
'CASSANDRA_USERNAME': 'query-env-user',
|
||||
'CASSANDRA_PASSWORD': 'query-env-pass'
|
||||
}
|
||||
|
||||
with patch.dict(os.environ, env_vars, clear=True):
|
||||
processor = TriplesQuery(taskgroup=MagicMock())
|
||||
|
||||
assert processor.cassandra_host == ['query-env-host1', 'query-env-host2']
|
||||
assert processor.cassandra_username == 'query-env-user'
|
||||
assert processor.cassandra_password == 'query-env-pass'
|
||||
|
||||
@patch('trustgraph.direct.cassandra_kg.KnowledgeGraph')
|
||||
def test_only_new_parameters_work(self, mock_trust_graph):
|
||||
"""Test that only new parameters work."""
|
||||
processor = TriplesQuery(
|
||||
taskgroup=MagicMock(),
|
||||
cassandra_host='new-host',
|
||||
graph_host='old-host', # Should be ignored
|
||||
cassandra_username='new-user',
|
||||
graph_username='old-user' # Should be ignored
|
||||
)
|
||||
|
||||
# Only new parameters should work
|
||||
assert processor.cassandra_host == ['new-host']
|
||||
assert processor.cassandra_username == 'new-user'
|
||||
|
||||
|
||||
class TestKgStoreConfiguration:
|
||||
"""Test Cassandra configuration in knowledge store processor."""
|
||||
|
||||
@patch('trustgraph.storage.knowledge.store.KnowledgeTableStore')
|
||||
def test_environment_variable_configuration(self, mock_table_store):
|
||||
"""Test kg-store picks up configuration from environment variables."""
|
||||
env_vars = {
|
||||
'CASSANDRA_HOST': 'kg-env-host1,kg-env-host2,kg-env-host3',
|
||||
'CASSANDRA_USERNAME': 'kg-env-user',
|
||||
'CASSANDRA_PASSWORD': 'kg-env-pass'
|
||||
}
|
||||
|
||||
mock_store_instance = MagicMock()
|
||||
mock_table_store.return_value = mock_store_instance
|
||||
|
||||
with patch.dict(os.environ, env_vars, clear=True):
|
||||
processor = KgStore(taskgroup=MagicMock())
|
||||
|
||||
# Verify KnowledgeTableStore was called with resolved config
|
||||
mock_table_store.assert_called_once_with(
|
||||
cassandra_host=['kg-env-host1', 'kg-env-host2', 'kg-env-host3'],
|
||||
cassandra_username='kg-env-user',
|
||||
cassandra_password='kg-env-pass',
|
||||
keyspace='knowledge'
|
||||
)
|
||||
|
||||
@patch('trustgraph.storage.knowledge.store.KnowledgeTableStore')
|
||||
def test_explicit_parameters(self, mock_table_store):
|
||||
"""Test kg-store with explicit parameters."""
|
||||
mock_store_instance = MagicMock()
|
||||
mock_table_store.return_value = mock_store_instance
|
||||
|
||||
processor = KgStore(
|
||||
taskgroup=MagicMock(),
|
||||
cassandra_host='explicit-host',
|
||||
cassandra_username='explicit-user',
|
||||
cassandra_password='explicit-pass'
|
||||
)
|
||||
|
||||
# Verify KnowledgeTableStore was called with explicit config
|
||||
mock_table_store.assert_called_once_with(
|
||||
cassandra_host=['explicit-host'],
|
||||
cassandra_username='explicit-user',
|
||||
cassandra_password='explicit-pass',
|
||||
keyspace='knowledge'
|
||||
)
|
||||
|
||||
@patch('trustgraph.storage.knowledge.store.KnowledgeTableStore')
|
||||
def test_no_backward_compatibility_cassandra_user(self, mock_table_store):
|
||||
"""Test that cassandra_user parameter is no longer supported."""
|
||||
mock_store_instance = MagicMock()
|
||||
mock_table_store.return_value = mock_store_instance
|
||||
|
||||
processor = KgStore(
|
||||
taskgroup=MagicMock(),
|
||||
cassandra_host='compat-host',
|
||||
cassandra_user='compat-user', # Old parameter name - should be ignored
|
||||
cassandra_password='compat-pass'
|
||||
)
|
||||
|
||||
# cassandra_user should be ignored
|
||||
mock_table_store.assert_called_once_with(
|
||||
cassandra_host=['compat-host'],
|
||||
cassandra_username=None, # Should be None since cassandra_user is ignored
|
||||
cassandra_password='compat-pass',
|
||||
keyspace='knowledge'
|
||||
)
|
||||
|
||||
@patch('trustgraph.storage.knowledge.store.KnowledgeTableStore')
|
||||
def test_default_configuration(self, mock_table_store):
|
||||
"""Test kg-store default configuration."""
|
||||
mock_store_instance = MagicMock()
|
||||
mock_table_store.return_value = mock_store_instance
|
||||
|
||||
with patch.dict(os.environ, {}, clear=True):
|
||||
processor = KgStore(taskgroup=MagicMock())
|
||||
|
||||
# Should use defaults
|
||||
mock_table_store.assert_called_once_with(
|
||||
cassandra_host=['cassandra'],
|
||||
cassandra_username=None,
|
||||
cassandra_password=None,
|
||||
keyspace='knowledge'
|
||||
)
|
||||
|
||||
|
||||
class TestCommandLineArgumentHandling:
|
||||
"""Test command-line argument parsing in processors."""
|
||||
|
||||
def test_triples_writer_add_args(self):
|
||||
"""Test that triples writer adds standard Cassandra arguments."""
|
||||
import argparse
|
||||
from trustgraph.storage.triples.cassandra.write import Processor as TriplesWriter
|
||||
|
||||
parser = argparse.ArgumentParser()
|
||||
TriplesWriter.add_args(parser)
|
||||
|
||||
# Parse empty args to check that arguments exist
|
||||
args = parser.parse_args([])
|
||||
|
||||
assert hasattr(args, 'cassandra_host')
|
||||
assert hasattr(args, 'cassandra_username')
|
||||
assert hasattr(args, 'cassandra_password')
|
||||
|
||||
def test_objects_writer_add_args(self):
|
||||
"""Test that objects writer adds standard Cassandra arguments."""
|
||||
import argparse
|
||||
from trustgraph.storage.objects.cassandra.write import Processor as ObjectsWriter
|
||||
|
||||
parser = argparse.ArgumentParser()
|
||||
ObjectsWriter.add_args(parser)
|
||||
|
||||
# Parse empty args to check that arguments exist
|
||||
args = parser.parse_args([])
|
||||
|
||||
assert hasattr(args, 'cassandra_host')
|
||||
assert hasattr(args, 'cassandra_username')
|
||||
assert hasattr(args, 'cassandra_password')
|
||||
assert hasattr(args, 'config_type') # Objects writer specific arg
|
||||
|
||||
def test_triples_query_add_args(self):
|
||||
"""Test that triples query adds standard Cassandra arguments."""
|
||||
import argparse
|
||||
from trustgraph.query.triples.cassandra.service import Processor as TriplesQuery
|
||||
|
||||
parser = argparse.ArgumentParser()
|
||||
TriplesQuery.add_args(parser)
|
||||
|
||||
# Parse empty args to check that arguments exist
|
||||
args = parser.parse_args([])
|
||||
|
||||
assert hasattr(args, 'cassandra_host')
|
||||
assert hasattr(args, 'cassandra_username')
|
||||
assert hasattr(args, 'cassandra_password')
|
||||
|
||||
def test_kg_store_add_args(self):
|
||||
"""Test that kg-store now adds Cassandra arguments (previously missing)."""
|
||||
import argparse
|
||||
from trustgraph.storage.knowledge.store import Processor as KgStore
|
||||
|
||||
parser = argparse.ArgumentParser()
|
||||
KgStore.add_args(parser)
|
||||
|
||||
# Parse empty args to check that arguments exist
|
||||
args = parser.parse_args([])
|
||||
|
||||
assert hasattr(args, 'cassandra_host')
|
||||
assert hasattr(args, 'cassandra_username')
|
||||
assert hasattr(args, 'cassandra_password')
|
||||
|
||||
def test_help_text_with_environment_variables(self):
|
||||
"""Test that help text shows environment variable values."""
|
||||
import argparse
|
||||
from trustgraph.storage.triples.cassandra.write import Processor as TriplesWriter
|
||||
|
||||
env_vars = {
|
||||
'CASSANDRA_HOST': 'help-host1,help-host2',
|
||||
'CASSANDRA_USERNAME': 'help-user',
|
||||
'CASSANDRA_PASSWORD': 'help-pass'
|
||||
}
|
||||
|
||||
with patch.dict(os.environ, env_vars, clear=True):
|
||||
parser = argparse.ArgumentParser()
|
||||
TriplesWriter.add_args(parser)
|
||||
|
||||
help_text = parser.format_help()
|
||||
|
||||
# Should show environment variable values (except password)
|
||||
# Help text may have line breaks - argparse breaks long lines
|
||||
# So check for the components that should be there
|
||||
assert 'help-' in help_text and 'host1' in help_text
|
||||
assert 'help-host2' in help_text
|
||||
assert 'help-user' in help_text
|
||||
assert '<set>' in help_text # Password should be hidden
|
||||
assert 'help-pass' not in help_text # Password value not shown
|
||||
assert '[from CASSANDRA_HOST]' in help_text
|
||||
# Check key components (may be split across lines by argparse)
|
||||
assert '[from' in help_text and 'CASSANDRA_USERNAME]' in help_text
|
||||
assert '[from' in help_text and 'CASSANDRA_PASSWORD]' in help_text
|
||||
|
||||
|
||||
class TestConfigurationPriorityIntegration:
|
||||
"""Test complete configuration priority chain in processors."""
|
||||
|
||||
@patch('trustgraph.direct.cassandra_kg.KnowledgeGraph')
|
||||
def test_complete_priority_chain(self, mock_trust_graph):
|
||||
"""Test CLI params > env vars > defaults priority in actual processor."""
|
||||
env_vars = {
|
||||
'CASSANDRA_HOST': 'env-host',
|
||||
'CASSANDRA_USERNAME': 'env-user',
|
||||
'CASSANDRA_PASSWORD': 'env-pass'
|
||||
}
|
||||
|
||||
with patch.dict(os.environ, env_vars, clear=True):
|
||||
# Explicit parameters should override environment
|
||||
processor = TriplesWriter(
|
||||
taskgroup=MagicMock(),
|
||||
cassandra_host='cli-host1,cli-host2',
|
||||
cassandra_username='cli-user'
|
||||
# Password not provided - should fall back to env
|
||||
)
|
||||
|
||||
assert processor.cassandra_host == ['cli-host1', 'cli-host2'] # From CLI
|
||||
assert processor.cassandra_username == 'cli-user' # From CLI
|
||||
assert processor.cassandra_password == 'env-pass' # From env
|
||||
|
||||
@patch('trustgraph.storage.knowledge.store.KnowledgeTableStore')
|
||||
def test_kg_store_priority_chain(self, mock_table_store):
|
||||
"""Test configuration priority chain in kg-store processor."""
|
||||
mock_store_instance = MagicMock()
|
||||
mock_table_store.return_value = mock_store_instance
|
||||
|
||||
env_vars = {
|
||||
'CASSANDRA_HOST': 'env-host1,env-host2',
|
||||
'CASSANDRA_USERNAME': 'env-user',
|
||||
'CASSANDRA_PASSWORD': 'env-pass'
|
||||
}
|
||||
|
||||
with patch.dict(os.environ, env_vars, clear=True):
|
||||
processor = KgStore(
|
||||
taskgroup=MagicMock(),
|
||||
cassandra_host='param-host'
|
||||
# username and password not provided - should use env
|
||||
)
|
||||
|
||||
# Verify correct priority resolution
|
||||
mock_table_store.assert_called_once_with(
|
||||
cassandra_host=['param-host'], # From parameter
|
||||
cassandra_username='env-user', # From environment
|
||||
cassandra_password='env-pass', # From environment
|
||||
keyspace='knowledge'
|
||||
)
|
||||
|
|
@ -91,37 +91,41 @@ class TestMilvusDocEmbeddingsStorageProcessor:
|
|||
|
||||
await processor.store_document_embeddings(message)
|
||||
|
||||
# Verify insert was called for each vector
|
||||
# Verify insert was called for each vector with user/collection parameters
|
||||
expected_calls = [
|
||||
([0.1, 0.2, 0.3], "Test document content"),
|
||||
([0.4, 0.5, 0.6], "Test document content"),
|
||||
([0.1, 0.2, 0.3], "Test document content", 'test_user', 'test_collection'),
|
||||
([0.4, 0.5, 0.6], "Test document content", 'test_user', 'test_collection'),
|
||||
]
|
||||
|
||||
assert processor.vecstore.insert.call_count == 2
|
||||
for i, (expected_vec, expected_doc) in enumerate(expected_calls):
|
||||
for i, (expected_vec, expected_doc, expected_user, expected_collection) in enumerate(expected_calls):
|
||||
actual_call = processor.vecstore.insert.call_args_list[i]
|
||||
assert actual_call[0][0] == expected_vec
|
||||
assert actual_call[0][1] == expected_doc
|
||||
assert actual_call[0][2] == expected_user
|
||||
assert actual_call[0][3] == expected_collection
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_store_document_embeddings_multiple_chunks(self, processor, mock_message):
|
||||
"""Test storing document embeddings for multiple chunks"""
|
||||
await processor.store_document_embeddings(mock_message)
|
||||
|
||||
# Verify insert was called for each vector of each chunk
|
||||
# Verify insert was called for each vector of each chunk with user/collection parameters
|
||||
expected_calls = [
|
||||
# Chunk 1 vectors
|
||||
([0.1, 0.2, 0.3], "This is the first document chunk"),
|
||||
([0.4, 0.5, 0.6], "This is the first document chunk"),
|
||||
([0.1, 0.2, 0.3], "This is the first document chunk", 'test_user', 'test_collection'),
|
||||
([0.4, 0.5, 0.6], "This is the first document chunk", 'test_user', 'test_collection'),
|
||||
# Chunk 2 vectors
|
||||
([0.7, 0.8, 0.9], "This is the second document chunk"),
|
||||
([0.7, 0.8, 0.9], "This is the second document chunk", 'test_user', 'test_collection'),
|
||||
]
|
||||
|
||||
assert processor.vecstore.insert.call_count == 3
|
||||
for i, (expected_vec, expected_doc) in enumerate(expected_calls):
|
||||
for i, (expected_vec, expected_doc, expected_user, expected_collection) in enumerate(expected_calls):
|
||||
actual_call = processor.vecstore.insert.call_args_list[i]
|
||||
assert actual_call[0][0] == expected_vec
|
||||
assert actual_call[0][1] == expected_doc
|
||||
assert actual_call[0][2] == expected_user
|
||||
assert actual_call[0][3] == expected_collection
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_store_document_embeddings_empty_chunk(self, processor):
|
||||
|
|
@ -185,9 +189,9 @@ class TestMilvusDocEmbeddingsStorageProcessor:
|
|||
|
||||
await processor.store_document_embeddings(message)
|
||||
|
||||
# Verify only valid chunk was inserted
|
||||
# Verify only valid chunk was inserted with user/collection parameters
|
||||
processor.vecstore.insert.assert_called_once_with(
|
||||
[0.1, 0.2, 0.3], "Valid document content"
|
||||
[0.1, 0.2, 0.3], "Valid document content", 'test_user', 'test_collection'
|
||||
)
|
||||
|
||||
@pytest.mark.asyncio
|
||||
|
|
@ -243,18 +247,20 @@ class TestMilvusDocEmbeddingsStorageProcessor:
|
|||
|
||||
await processor.store_document_embeddings(message)
|
||||
|
||||
# Verify all vectors were inserted regardless of dimension
|
||||
# Verify all vectors were inserted regardless of dimension with user/collection parameters
|
||||
expected_calls = [
|
||||
([0.1, 0.2], "Document with mixed dimensions"),
|
||||
([0.3, 0.4, 0.5, 0.6], "Document with mixed dimensions"),
|
||||
([0.7, 0.8, 0.9], "Document with mixed dimensions"),
|
||||
([0.1, 0.2], "Document with mixed dimensions", 'test_user', 'test_collection'),
|
||||
([0.3, 0.4, 0.5, 0.6], "Document with mixed dimensions", 'test_user', 'test_collection'),
|
||||
([0.7, 0.8, 0.9], "Document with mixed dimensions", 'test_user', 'test_collection'),
|
||||
]
|
||||
|
||||
assert processor.vecstore.insert.call_count == 3
|
||||
for i, (expected_vec, expected_doc) in enumerate(expected_calls):
|
||||
for i, (expected_vec, expected_doc, expected_user, expected_collection) in enumerate(expected_calls):
|
||||
actual_call = processor.vecstore.insert.call_args_list[i]
|
||||
assert actual_call[0][0] == expected_vec
|
||||
assert actual_call[0][1] == expected_doc
|
||||
assert actual_call[0][2] == expected_user
|
||||
assert actual_call[0][3] == expected_collection
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_store_document_embeddings_unicode_content(self, processor):
|
||||
|
|
@ -272,9 +278,9 @@ class TestMilvusDocEmbeddingsStorageProcessor:
|
|||
|
||||
await processor.store_document_embeddings(message)
|
||||
|
||||
# Verify Unicode content was properly decoded and inserted
|
||||
# Verify Unicode content was properly decoded and inserted with user/collection parameters
|
||||
processor.vecstore.insert.assert_called_once_with(
|
||||
[0.1, 0.2, 0.3], "Document with Unicode: éñ中文🚀"
|
||||
[0.1, 0.2, 0.3], "Document with Unicode: éñ中文🚀", 'test_user', 'test_collection'
|
||||
)
|
||||
|
||||
@pytest.mark.asyncio
|
||||
|
|
@ -295,9 +301,9 @@ class TestMilvusDocEmbeddingsStorageProcessor:
|
|||
|
||||
await processor.store_document_embeddings(message)
|
||||
|
||||
# Verify large content was inserted
|
||||
# Verify large content was inserted with user/collection parameters
|
||||
processor.vecstore.insert.assert_called_once_with(
|
||||
[0.1, 0.2, 0.3], large_content
|
||||
[0.1, 0.2, 0.3], large_content, 'test_user', 'test_collection'
|
||||
)
|
||||
|
||||
@pytest.mark.asyncio
|
||||
|
|
@ -316,9 +322,103 @@ class TestMilvusDocEmbeddingsStorageProcessor:
|
|||
|
||||
await processor.store_document_embeddings(message)
|
||||
|
||||
# Verify whitespace content was inserted (not filtered out)
|
||||
# Verify whitespace content was inserted (not filtered out) with user/collection parameters
|
||||
processor.vecstore.insert.assert_called_once_with(
|
||||
[0.1, 0.2, 0.3], " \n\t "
|
||||
[0.1, 0.2, 0.3], " \n\t ", 'test_user', 'test_collection'
|
||||
)
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_store_document_embeddings_different_user_collection_combinations(self, processor):
|
||||
"""Test storing document embeddings with different user/collection combinations"""
|
||||
test_cases = [
|
||||
('user1', 'collection1'),
|
||||
('user2', 'collection2'),
|
||||
('admin', 'production'),
|
||||
('test@domain.com', 'test-collection.v1'),
|
||||
]
|
||||
|
||||
for user, collection in test_cases:
|
||||
processor.vecstore.reset_mock() # Reset mock for each test case
|
||||
|
||||
message = MagicMock()
|
||||
message.metadata = MagicMock()
|
||||
message.metadata.user = user
|
||||
message.metadata.collection = collection
|
||||
|
||||
chunk = ChunkEmbeddings(
|
||||
chunk=b"Test content",
|
||||
vectors=[[0.1, 0.2, 0.3]]
|
||||
)
|
||||
message.chunks = [chunk]
|
||||
|
||||
await processor.store_document_embeddings(message)
|
||||
|
||||
# Verify insert was called with the correct user/collection
|
||||
processor.vecstore.insert.assert_called_once_with(
|
||||
[0.1, 0.2, 0.3], "Test content", user, collection
|
||||
)
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_store_document_embeddings_user_collection_parameter_isolation(self, processor):
|
||||
"""Test that different user/collection combinations are properly isolated"""
|
||||
# Store embeddings for user1/collection1
|
||||
message1 = MagicMock()
|
||||
message1.metadata = MagicMock()
|
||||
message1.metadata.user = 'user1'
|
||||
message1.metadata.collection = 'collection1'
|
||||
chunk1 = ChunkEmbeddings(
|
||||
chunk=b"User1 content",
|
||||
vectors=[[0.1, 0.2, 0.3]]
|
||||
)
|
||||
message1.chunks = [chunk1]
|
||||
|
||||
# Store embeddings for user2/collection2
|
||||
message2 = MagicMock()
|
||||
message2.metadata = MagicMock()
|
||||
message2.metadata.user = 'user2'
|
||||
message2.metadata.collection = 'collection2'
|
||||
chunk2 = ChunkEmbeddings(
|
||||
chunk=b"User2 content",
|
||||
vectors=[[0.4, 0.5, 0.6]]
|
||||
)
|
||||
message2.chunks = [chunk2]
|
||||
|
||||
await processor.store_document_embeddings(message1)
|
||||
await processor.store_document_embeddings(message2)
|
||||
|
||||
# Verify both calls were made with correct parameters
|
||||
expected_calls = [
|
||||
([0.1, 0.2, 0.3], "User1 content", 'user1', 'collection1'),
|
||||
([0.4, 0.5, 0.6], "User2 content", 'user2', 'collection2'),
|
||||
]
|
||||
|
||||
assert processor.vecstore.insert.call_count == 2
|
||||
for i, (expected_vec, expected_doc, expected_user, expected_collection) in enumerate(expected_calls):
|
||||
actual_call = processor.vecstore.insert.call_args_list[i]
|
||||
assert actual_call[0][0] == expected_vec
|
||||
assert actual_call[0][1] == expected_doc
|
||||
assert actual_call[0][2] == expected_user
|
||||
assert actual_call[0][3] == expected_collection
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_store_document_embeddings_special_character_user_collection(self, processor):
|
||||
"""Test storing document embeddings with special characters in user/collection names"""
|
||||
message = MagicMock()
|
||||
message.metadata = MagicMock()
|
||||
message.metadata.user = 'user@domain.com' # Email-like user
|
||||
message.metadata.collection = 'test-collection.v1' # Collection with special chars
|
||||
|
||||
chunk = ChunkEmbeddings(
|
||||
chunk=b"Special chars test",
|
||||
vectors=[[0.1, 0.2, 0.3]]
|
||||
)
|
||||
message.chunks = [chunk]
|
||||
|
||||
await processor.store_document_embeddings(message)
|
||||
|
||||
# Verify the exact user/collection strings are passed (sanitization happens in DocVectors)
|
||||
processor.vecstore.insert.assert_called_once_with(
|
||||
[0.1, 0.2, 0.3], "Special chars test", 'user@domain.com', 'test-collection.v1'
|
||||
)
|
||||
|
||||
def test_add_args_method(self):
|
||||
|
|
|
|||
|
|
@ -135,7 +135,7 @@ class TestPineconeDocEmbeddingsStorageProcessor:
|
|||
await processor.store_document_embeddings(message)
|
||||
|
||||
# Verify index name and operations
|
||||
expected_index_name = "d-test_user-test_collection-3"
|
||||
expected_index_name = "d-test_user-test_collection"
|
||||
processor.pinecone.Index.assert_called_with(expected_index_name)
|
||||
|
||||
# Verify upsert was called for each vector
|
||||
|
|
@ -203,7 +203,7 @@ class TestPineconeDocEmbeddingsStorageProcessor:
|
|||
await processor.store_document_embeddings(message)
|
||||
|
||||
# Verify index creation was called
|
||||
expected_index_name = "d-test_user-test_collection-3"
|
||||
expected_index_name = "d-test_user-test_collection"
|
||||
processor.pinecone.create_index.assert_called_once()
|
||||
create_call = processor.pinecone.create_index.call_args
|
||||
assert create_call[1]['name'] == expected_index_name
|
||||
|
|
@ -299,12 +299,11 @@ class TestPineconeDocEmbeddingsStorageProcessor:
|
|||
mock_index_3d = MagicMock()
|
||||
|
||||
def mock_index_side_effect(name):
|
||||
if name.endswith("-2"):
|
||||
return mock_index_2d
|
||||
elif name.endswith("-4"):
|
||||
return mock_index_4d
|
||||
elif name.endswith("-3"):
|
||||
return mock_index_3d
|
||||
# All dimensions now use the same index name pattern
|
||||
# Different dimensions will be handled within the same index
|
||||
if "test_user" in name and "test_collection" in name:
|
||||
return mock_index_2d # Just return one mock for all
|
||||
return MagicMock()
|
||||
|
||||
processor.pinecone.Index.side_effect = mock_index_side_effect
|
||||
processor.pinecone.has_index.return_value = True
|
||||
|
|
@ -312,11 +311,10 @@ class TestPineconeDocEmbeddingsStorageProcessor:
|
|||
with patch('uuid.uuid4', side_effect=['id1', 'id2', 'id3']):
|
||||
await processor.store_document_embeddings(message)
|
||||
|
||||
# Verify different indexes were used for different dimensions
|
||||
assert processor.pinecone.Index.call_count == 3
|
||||
mock_index_2d.upsert.assert_called_once()
|
||||
mock_index_4d.upsert.assert_called_once()
|
||||
mock_index_3d.upsert.assert_called_once()
|
||||
# Verify all vectors are now stored in the same index
|
||||
# (Pinecone can handle mixed dimensions in the same index)
|
||||
assert processor.pinecone.Index.call_count == 3 # Called once per vector
|
||||
mock_index_2d.upsert.call_count == 3 # All upserts go to same index
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_store_document_embeddings_empty_chunks_list(self, processor):
|
||||
|
|
|
|||
|
|
@ -106,7 +106,7 @@ class TestQdrantDocEmbeddingsStorage(IsolatedAsyncioTestCase):
|
|||
|
||||
# Assert
|
||||
# Verify collection existence was checked
|
||||
expected_collection = 'd_test_user_test_collection_3'
|
||||
expected_collection = 'd_test_user_test_collection'
|
||||
mock_qdrant_instance.collection_exists.assert_called_once_with(expected_collection)
|
||||
|
||||
# Verify upsert was called
|
||||
|
|
@ -309,7 +309,7 @@ class TestQdrantDocEmbeddingsStorage(IsolatedAsyncioTestCase):
|
|||
await processor.store_document_embeddings(mock_message)
|
||||
|
||||
# Assert
|
||||
expected_collection = 'd_new_user_new_collection_5'
|
||||
expected_collection = 'd_new_user_new_collection'
|
||||
|
||||
# Verify collection existence check and creation
|
||||
mock_qdrant_instance.collection_exists.assert_called_once_with(expected_collection)
|
||||
|
|
@ -408,7 +408,7 @@ class TestQdrantDocEmbeddingsStorage(IsolatedAsyncioTestCase):
|
|||
await processor.store_document_embeddings(mock_message2)
|
||||
|
||||
# Assert
|
||||
expected_collection = 'd_cache_user_cache_collection_3'
|
||||
expected_collection = 'd_cache_user_cache_collection'
|
||||
assert processor.last_collection == expected_collection
|
||||
|
||||
# Verify second call skipped existence check (cached)
|
||||
|
|
@ -455,17 +455,16 @@ class TestQdrantDocEmbeddingsStorage(IsolatedAsyncioTestCase):
|
|||
await processor.store_document_embeddings(mock_message)
|
||||
|
||||
# Assert
|
||||
# Should check existence of both collections
|
||||
expected_collections = ['d_dim_user_dim_collection_2', 'd_dim_user_dim_collection_3']
|
||||
actual_calls = [call.args[0] for call in mock_qdrant_instance.collection_exists.call_args_list]
|
||||
assert actual_calls == expected_collections
|
||||
|
||||
# Should upsert to both collections
|
||||
# Should check existence of the same collection (dimensions no longer create separate collections)
|
||||
expected_collection = 'd_dim_user_dim_collection'
|
||||
mock_qdrant_instance.collection_exists.assert_called_once_with(expected_collection)
|
||||
|
||||
# Should upsert to the same collection for both vectors
|
||||
assert mock_qdrant_instance.upsert.call_count == 2
|
||||
|
||||
|
||||
upsert_calls = mock_qdrant_instance.upsert.call_args_list
|
||||
assert upsert_calls[0][1]['collection_name'] == 'd_dim_user_dim_collection_2'
|
||||
assert upsert_calls[1][1]['collection_name'] == 'd_dim_user_dim_collection_3'
|
||||
assert upsert_calls[0][1]['collection_name'] == expected_collection
|
||||
assert upsert_calls[1][1]['collection_name'] == expected_collection
|
||||
|
||||
@patch('trustgraph.storage.doc_embeddings.qdrant.write.QdrantClient')
|
||||
@patch('trustgraph.base.DocumentEmbeddingsStoreService.__init__')
|
||||
|
|
|
|||
|
|
@ -91,37 +91,41 @@ class TestMilvusGraphEmbeddingsStorageProcessor:
|
|||
|
||||
await processor.store_graph_embeddings(message)
|
||||
|
||||
# Verify insert was called for each vector
|
||||
# Verify insert was called for each vector with user/collection parameters
|
||||
expected_calls = [
|
||||
([0.1, 0.2, 0.3], 'http://example.com/entity'),
|
||||
([0.4, 0.5, 0.6], 'http://example.com/entity'),
|
||||
([0.1, 0.2, 0.3], 'http://example.com/entity', 'test_user', 'test_collection'),
|
||||
([0.4, 0.5, 0.6], 'http://example.com/entity', 'test_user', 'test_collection'),
|
||||
]
|
||||
|
||||
assert processor.vecstore.insert.call_count == 2
|
||||
for i, (expected_vec, expected_entity) in enumerate(expected_calls):
|
||||
for i, (expected_vec, expected_entity, expected_user, expected_collection) in enumerate(expected_calls):
|
||||
actual_call = processor.vecstore.insert.call_args_list[i]
|
||||
assert actual_call[0][0] == expected_vec
|
||||
assert actual_call[0][1] == expected_entity
|
||||
assert actual_call[0][2] == expected_user
|
||||
assert actual_call[0][3] == expected_collection
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_store_graph_embeddings_multiple_entities(self, processor, mock_message):
|
||||
"""Test storing graph embeddings for multiple entities"""
|
||||
await processor.store_graph_embeddings(mock_message)
|
||||
|
||||
# Verify insert was called for each vector of each entity
|
||||
# Verify insert was called for each vector of each entity with user/collection parameters
|
||||
expected_calls = [
|
||||
# Entity 1 vectors
|
||||
([0.1, 0.2, 0.3], 'http://example.com/entity1'),
|
||||
([0.4, 0.5, 0.6], 'http://example.com/entity1'),
|
||||
([0.1, 0.2, 0.3], 'http://example.com/entity1', 'test_user', 'test_collection'),
|
||||
([0.4, 0.5, 0.6], 'http://example.com/entity1', 'test_user', 'test_collection'),
|
||||
# Entity 2 vectors
|
||||
([0.7, 0.8, 0.9], 'literal entity'),
|
||||
([0.7, 0.8, 0.9], 'literal entity', 'test_user', 'test_collection'),
|
||||
]
|
||||
|
||||
assert processor.vecstore.insert.call_count == 3
|
||||
for i, (expected_vec, expected_entity) in enumerate(expected_calls):
|
||||
for i, (expected_vec, expected_entity, expected_user, expected_collection) in enumerate(expected_calls):
|
||||
actual_call = processor.vecstore.insert.call_args_list[i]
|
||||
assert actual_call[0][0] == expected_vec
|
||||
assert actual_call[0][1] == expected_entity
|
||||
assert actual_call[0][2] == expected_user
|
||||
assert actual_call[0][3] == expected_collection
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_store_graph_embeddings_empty_entity_value(self, processor):
|
||||
|
|
@ -185,9 +189,9 @@ class TestMilvusGraphEmbeddingsStorageProcessor:
|
|||
|
||||
await processor.store_graph_embeddings(message)
|
||||
|
||||
# Verify only valid entity was inserted
|
||||
# Verify only valid entity was inserted with user/collection parameters
|
||||
processor.vecstore.insert.assert_called_once_with(
|
||||
[0.1, 0.2, 0.3], 'http://example.com/valid'
|
||||
[0.1, 0.2, 0.3], 'http://example.com/valid', 'test_user', 'test_collection'
|
||||
)
|
||||
|
||||
@pytest.mark.asyncio
|
||||
|
|
|
|||
|
|
@ -135,7 +135,7 @@ class TestPineconeGraphEmbeddingsStorageProcessor:
|
|||
await processor.store_graph_embeddings(message)
|
||||
|
||||
# Verify index name and operations
|
||||
expected_index_name = "t-test_user-test_collection-3"
|
||||
expected_index_name = "t-test_user-test_collection"
|
||||
processor.pinecone.Index.assert_called_with(expected_index_name)
|
||||
|
||||
# Verify upsert was called for each vector
|
||||
|
|
@ -203,7 +203,7 @@ class TestPineconeGraphEmbeddingsStorageProcessor:
|
|||
await processor.store_graph_embeddings(message)
|
||||
|
||||
# Verify index creation was called
|
||||
expected_index_name = "t-test_user-test_collection-3"
|
||||
expected_index_name = "t-test_user-test_collection"
|
||||
processor.pinecone.create_index.assert_called_once()
|
||||
create_call = processor.pinecone.create_index.call_args
|
||||
assert create_call[1]['name'] == expected_index_name
|
||||
|
|
@ -256,12 +256,12 @@ class TestPineconeGraphEmbeddingsStorageProcessor:
|
|||
|
||||
@pytest.mark.asyncio
|
||||
async def test_store_graph_embeddings_different_vector_dimensions(self, processor):
|
||||
"""Test storing graph embeddings with different vector dimensions"""
|
||||
"""Test storing graph embeddings with different vector dimensions to same index"""
|
||||
message = MagicMock()
|
||||
message.metadata = MagicMock()
|
||||
message.metadata.user = 'test_user'
|
||||
message.metadata.collection = 'test_collection'
|
||||
|
||||
|
||||
entity = EntityEmbeddings(
|
||||
entity=Value(value="test_entity", is_uri=False),
|
||||
vectors=[
|
||||
|
|
@ -271,30 +271,21 @@ class TestPineconeGraphEmbeddingsStorageProcessor:
|
|||
]
|
||||
)
|
||||
message.entities = [entity]
|
||||
|
||||
mock_index_2d = MagicMock()
|
||||
mock_index_4d = MagicMock()
|
||||
mock_index_3d = MagicMock()
|
||||
|
||||
def mock_index_side_effect(name):
|
||||
if name.endswith("-2"):
|
||||
return mock_index_2d
|
||||
elif name.endswith("-4"):
|
||||
return mock_index_4d
|
||||
elif name.endswith("-3"):
|
||||
return mock_index_3d
|
||||
|
||||
processor.pinecone.Index.side_effect = mock_index_side_effect
|
||||
|
||||
# All vectors now use the same index (no dimension in name)
|
||||
mock_index = MagicMock()
|
||||
processor.pinecone.Index.return_value = mock_index
|
||||
processor.pinecone.has_index.return_value = True
|
||||
|
||||
|
||||
with patch('uuid.uuid4', side_effect=['id1', 'id2', 'id3']):
|
||||
await processor.store_graph_embeddings(message)
|
||||
|
||||
# Verify different indexes were used for different dimensions
|
||||
assert processor.pinecone.Index.call_count == 3
|
||||
mock_index_2d.upsert.assert_called_once()
|
||||
mock_index_4d.upsert.assert_called_once()
|
||||
mock_index_3d.upsert.assert_called_once()
|
||||
|
||||
# Verify same index was used for all dimensions
|
||||
expected_index_name = 't-test_user-test_collection'
|
||||
processor.pinecone.Index.assert_called_with(expected_index_name)
|
||||
|
||||
# Verify all vectors were upserted to the same index
|
||||
assert mock_index.upsert.call_count == 3
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_store_graph_embeddings_empty_entities_list(self, processor):
|
||||
|
|
|
|||
|
|
@ -69,7 +69,7 @@ class TestQdrantGraphEmbeddingsStorage(IsolatedAsyncioTestCase):
|
|||
collection_name = processor.get_collection(dim=512, user='test_user', collection='test_collection')
|
||||
|
||||
# Assert
|
||||
expected_name = 't_test_user_test_collection_512'
|
||||
expected_name = 't_test_user_test_collection'
|
||||
assert collection_name == expected_name
|
||||
assert processor.last_collection == expected_name
|
||||
|
||||
|
|
@ -118,7 +118,7 @@ class TestQdrantGraphEmbeddingsStorage(IsolatedAsyncioTestCase):
|
|||
|
||||
# Assert
|
||||
# Verify collection existence was checked
|
||||
expected_collection = 't_test_user_test_collection_3'
|
||||
expected_collection = 't_test_user_test_collection'
|
||||
mock_qdrant_instance.collection_exists.assert_called_once_with(expected_collection)
|
||||
|
||||
# Verify upsert was called
|
||||
|
|
@ -156,7 +156,7 @@ class TestQdrantGraphEmbeddingsStorage(IsolatedAsyncioTestCase):
|
|||
collection_name = processor.get_collection(dim=256, user='existing_user', collection='existing_collection')
|
||||
|
||||
# Assert
|
||||
expected_name = 't_existing_user_existing_collection_256'
|
||||
expected_name = 't_existing_user_existing_collection'
|
||||
assert collection_name == expected_name
|
||||
assert processor.last_collection == expected_name
|
||||
|
||||
|
|
@ -194,7 +194,7 @@ class TestQdrantGraphEmbeddingsStorage(IsolatedAsyncioTestCase):
|
|||
collection_name2 = processor.get_collection(dim=128, user='cache_user', collection='cache_collection')
|
||||
|
||||
# Assert
|
||||
expected_name = 't_cache_user_cache_collection_128'
|
||||
expected_name = 't_cache_user_cache_collection'
|
||||
assert collection_name1 == expected_name
|
||||
assert collection_name2 == expected_name
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1,363 @@
|
|||
"""
|
||||
Tests for Memgraph user/collection isolation in storage service
|
||||
"""
|
||||
|
||||
import pytest
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
from trustgraph.storage.triples.memgraph.write import Processor
|
||||
|
||||
|
||||
class TestMemgraphUserCollectionIsolation:
|
||||
"""Test cases for Memgraph storage service with user/collection isolation"""
|
||||
|
||||
@patch('trustgraph.storage.triples.memgraph.write.GraphDatabase')
|
||||
def test_storage_creates_indexes_with_user_collection(self, mock_graph_db):
|
||||
"""Test that storage creates both legacy and user/collection indexes"""
|
||||
mock_driver = MagicMock()
|
||||
mock_graph_db.driver.return_value = mock_driver
|
||||
mock_session = MagicMock()
|
||||
mock_driver.session.return_value.__enter__.return_value = mock_session
|
||||
|
||||
processor = Processor(taskgroup=MagicMock())
|
||||
|
||||
# Verify all indexes were attempted (4 legacy + 4 user/collection = 8 total)
|
||||
assert mock_session.run.call_count == 8
|
||||
|
||||
# Check some specific index creation calls
|
||||
expected_calls = [
|
||||
"CREATE INDEX ON :Node",
|
||||
"CREATE INDEX ON :Node(uri)",
|
||||
"CREATE INDEX ON :Literal",
|
||||
"CREATE INDEX ON :Literal(value)",
|
||||
"CREATE INDEX ON :Node(user)",
|
||||
"CREATE INDEX ON :Node(collection)",
|
||||
"CREATE INDEX ON :Literal(user)",
|
||||
"CREATE INDEX ON :Literal(collection)"
|
||||
]
|
||||
|
||||
for expected_call in expected_calls:
|
||||
mock_session.run.assert_any_call(expected_call)
|
||||
|
||||
@patch('trustgraph.storage.triples.memgraph.write.GraphDatabase')
|
||||
@pytest.mark.asyncio
|
||||
async def test_store_triples_with_user_collection(self, mock_graph_db):
|
||||
"""Test that store_triples includes user/collection in all operations"""
|
||||
mock_driver = MagicMock()
|
||||
mock_graph_db.driver.return_value = mock_driver
|
||||
mock_session = MagicMock()
|
||||
mock_driver.session.return_value.__enter__.return_value = mock_session
|
||||
|
||||
# Mock execute_query response
|
||||
mock_result = MagicMock()
|
||||
mock_summary = MagicMock()
|
||||
mock_summary.counters.nodes_created = 1
|
||||
mock_summary.result_available_after = 10
|
||||
mock_result.summary = mock_summary
|
||||
mock_driver.execute_query.return_value = mock_result
|
||||
|
||||
processor = Processor(taskgroup=MagicMock())
|
||||
|
||||
# Create mock triple with URI object
|
||||
triple = MagicMock()
|
||||
triple.s.value = "http://example.com/subject"
|
||||
triple.p.value = "http://example.com/predicate"
|
||||
triple.o.value = "http://example.com/object"
|
||||
triple.o.is_uri = True
|
||||
|
||||
# Create mock message with metadata
|
||||
mock_message = MagicMock()
|
||||
mock_message.triples = [triple]
|
||||
mock_message.metadata.user = "test_user"
|
||||
mock_message.metadata.collection = "test_collection"
|
||||
|
||||
await processor.store_triples(mock_message)
|
||||
|
||||
# Verify user/collection parameters were passed to all operations
|
||||
# Should have: create_node (subject), create_node (object), relate_node = 3 calls
|
||||
assert mock_driver.execute_query.call_count == 3
|
||||
|
||||
# Check that user and collection were included in all calls
|
||||
for call in mock_driver.execute_query.call_args_list:
|
||||
call_kwargs = call.kwargs if hasattr(call, 'kwargs') else call[1]
|
||||
assert 'user' in call_kwargs
|
||||
assert 'collection' in call_kwargs
|
||||
assert call_kwargs['user'] == "test_user"
|
||||
assert call_kwargs['collection'] == "test_collection"
|
||||
|
||||
@patch('trustgraph.storage.triples.memgraph.write.GraphDatabase')
|
||||
@pytest.mark.asyncio
|
||||
async def test_store_triples_with_default_user_collection(self, mock_graph_db):
|
||||
"""Test that defaults are used when user/collection not provided in metadata"""
|
||||
mock_driver = MagicMock()
|
||||
mock_graph_db.driver.return_value = mock_driver
|
||||
mock_session = MagicMock()
|
||||
mock_driver.session.return_value.__enter__.return_value = mock_session
|
||||
|
||||
# Mock execute_query response
|
||||
mock_result = MagicMock()
|
||||
mock_summary = MagicMock()
|
||||
mock_summary.counters.nodes_created = 1
|
||||
mock_summary.result_available_after = 10
|
||||
mock_result.summary = mock_summary
|
||||
mock_driver.execute_query.return_value = mock_result
|
||||
|
||||
processor = Processor(taskgroup=MagicMock())
|
||||
|
||||
# Create mock triple
|
||||
triple = MagicMock()
|
||||
triple.s.value = "http://example.com/subject"
|
||||
triple.p.value = "http://example.com/predicate"
|
||||
triple.o.value = "literal_value"
|
||||
triple.o.is_uri = False
|
||||
|
||||
# Create mock message without user/collection metadata
|
||||
mock_message = MagicMock()
|
||||
mock_message.triples = [triple]
|
||||
mock_message.metadata.user = None
|
||||
mock_message.metadata.collection = None
|
||||
|
||||
await processor.store_triples(mock_message)
|
||||
|
||||
# Verify defaults were used
|
||||
for call in mock_driver.execute_query.call_args_list:
|
||||
call_kwargs = call.kwargs if hasattr(call, 'kwargs') else call[1]
|
||||
assert call_kwargs['user'] == "default"
|
||||
assert call_kwargs['collection'] == "default"
|
||||
|
||||
@patch('trustgraph.storage.triples.memgraph.write.GraphDatabase')
|
||||
def test_create_node_includes_user_collection(self, mock_graph_db):
|
||||
"""Test that create_node includes user/collection properties"""
|
||||
mock_driver = MagicMock()
|
||||
mock_graph_db.driver.return_value = mock_driver
|
||||
mock_session = MagicMock()
|
||||
mock_driver.session.return_value.__enter__.return_value = mock_session
|
||||
|
||||
# Mock execute_query response
|
||||
mock_result = MagicMock()
|
||||
mock_summary = MagicMock()
|
||||
mock_summary.counters.nodes_created = 1
|
||||
mock_summary.result_available_after = 10
|
||||
mock_result.summary = mock_summary
|
||||
mock_driver.execute_query.return_value = mock_result
|
||||
|
||||
processor = Processor(taskgroup=MagicMock())
|
||||
|
||||
processor.create_node("http://example.com/node", "test_user", "test_collection")
|
||||
|
||||
mock_driver.execute_query.assert_called_with(
|
||||
"MERGE (n:Node {uri: $uri, user: $user, collection: $collection})",
|
||||
uri="http://example.com/node",
|
||||
user="test_user",
|
||||
collection="test_collection",
|
||||
database_="memgraph"
|
||||
)
|
||||
|
||||
@patch('trustgraph.storage.triples.memgraph.write.GraphDatabase')
|
||||
def test_create_literal_includes_user_collection(self, mock_graph_db):
|
||||
"""Test that create_literal includes user/collection properties"""
|
||||
mock_driver = MagicMock()
|
||||
mock_graph_db.driver.return_value = mock_driver
|
||||
mock_session = MagicMock()
|
||||
mock_driver.session.return_value.__enter__.return_value = mock_session
|
||||
|
||||
# Mock execute_query response
|
||||
mock_result = MagicMock()
|
||||
mock_summary = MagicMock()
|
||||
mock_summary.counters.nodes_created = 1
|
||||
mock_summary.result_available_after = 10
|
||||
mock_result.summary = mock_summary
|
||||
mock_driver.execute_query.return_value = mock_result
|
||||
|
||||
processor = Processor(taskgroup=MagicMock())
|
||||
|
||||
processor.create_literal("test_value", "test_user", "test_collection")
|
||||
|
||||
mock_driver.execute_query.assert_called_with(
|
||||
"MERGE (n:Literal {value: $value, user: $user, collection: $collection})",
|
||||
value="test_value",
|
||||
user="test_user",
|
||||
collection="test_collection",
|
||||
database_="memgraph"
|
||||
)
|
||||
|
||||
@patch('trustgraph.storage.triples.memgraph.write.GraphDatabase')
|
||||
def test_relate_node_includes_user_collection(self, mock_graph_db):
|
||||
"""Test that relate_node includes user/collection properties"""
|
||||
mock_driver = MagicMock()
|
||||
mock_graph_db.driver.return_value = mock_driver
|
||||
mock_session = MagicMock()
|
||||
mock_driver.session.return_value.__enter__.return_value = mock_session
|
||||
|
||||
# Mock execute_query response
|
||||
mock_result = MagicMock()
|
||||
mock_summary = MagicMock()
|
||||
mock_summary.counters.nodes_created = 0
|
||||
mock_summary.result_available_after = 10
|
||||
mock_result.summary = mock_summary
|
||||
mock_driver.execute_query.return_value = mock_result
|
||||
|
||||
processor = Processor(taskgroup=MagicMock())
|
||||
|
||||
processor.relate_node(
|
||||
"http://example.com/subject",
|
||||
"http://example.com/predicate",
|
||||
"http://example.com/object",
|
||||
"test_user",
|
||||
"test_collection"
|
||||
)
|
||||
|
||||
mock_driver.execute_query.assert_called_with(
|
||||
"MATCH (src:Node {uri: $src, user: $user, collection: $collection}) "
|
||||
"MATCH (dest:Node {uri: $dest, user: $user, collection: $collection}) "
|
||||
"MERGE (src)-[:Rel {uri: $uri, user: $user, collection: $collection}]->(dest)",
|
||||
src="http://example.com/subject",
|
||||
dest="http://example.com/object",
|
||||
uri="http://example.com/predicate",
|
||||
user="test_user",
|
||||
collection="test_collection",
|
||||
database_="memgraph"
|
||||
)
|
||||
|
||||
@patch('trustgraph.storage.triples.memgraph.write.GraphDatabase')
|
||||
def test_relate_literal_includes_user_collection(self, mock_graph_db):
|
||||
"""Test that relate_literal includes user/collection properties"""
|
||||
mock_driver = MagicMock()
|
||||
mock_graph_db.driver.return_value = mock_driver
|
||||
mock_session = MagicMock()
|
||||
mock_driver.session.return_value.__enter__.return_value = mock_session
|
||||
|
||||
# Mock execute_query response
|
||||
mock_result = MagicMock()
|
||||
mock_summary = MagicMock()
|
||||
mock_summary.counters.nodes_created = 0
|
||||
mock_summary.result_available_after = 10
|
||||
mock_result.summary = mock_summary
|
||||
mock_driver.execute_query.return_value = mock_result
|
||||
|
||||
processor = Processor(taskgroup=MagicMock())
|
||||
|
||||
processor.relate_literal(
|
||||
"http://example.com/subject",
|
||||
"http://example.com/predicate",
|
||||
"literal_value",
|
||||
"test_user",
|
||||
"test_collection"
|
||||
)
|
||||
|
||||
mock_driver.execute_query.assert_called_with(
|
||||
"MATCH (src:Node {uri: $src, user: $user, collection: $collection}) "
|
||||
"MATCH (dest:Literal {value: $dest, user: $user, collection: $collection}) "
|
||||
"MERGE (src)-[:Rel {uri: $uri, user: $user, collection: $collection}]->(dest)",
|
||||
src="http://example.com/subject",
|
||||
dest="literal_value",
|
||||
uri="http://example.com/predicate",
|
||||
user="test_user",
|
||||
collection="test_collection",
|
||||
database_="memgraph"
|
||||
)
|
||||
|
||||
def test_add_args_includes_memgraph_parameters(self):
|
||||
"""Test that add_args properly configures Memgraph-specific parameters"""
|
||||
from argparse import ArgumentParser
|
||||
from unittest.mock import patch
|
||||
|
||||
parser = ArgumentParser()
|
||||
|
||||
# Mock the parent class add_args method
|
||||
with patch('trustgraph.storage.triples.memgraph.write.TriplesStoreService.add_args') as mock_parent_add_args:
|
||||
Processor.add_args(parser)
|
||||
|
||||
# Verify parent add_args was called
|
||||
mock_parent_add_args.assert_called_once()
|
||||
|
||||
# Verify our specific arguments were added with Memgraph defaults
|
||||
args = parser.parse_args([])
|
||||
|
||||
assert hasattr(args, 'graph_host')
|
||||
assert args.graph_host == 'bolt://memgraph:7687'
|
||||
assert hasattr(args, 'username')
|
||||
assert args.username == 'memgraph'
|
||||
assert hasattr(args, 'password')
|
||||
assert args.password == 'password'
|
||||
assert hasattr(args, 'database')
|
||||
assert args.database == 'memgraph'
|
||||
|
||||
|
||||
class TestMemgraphUserCollectionRegression:
|
||||
"""Regression tests to ensure user/collection isolation prevents data leakage"""
|
||||
|
||||
@patch('trustgraph.storage.triples.memgraph.write.GraphDatabase')
|
||||
@pytest.mark.asyncio
|
||||
async def test_regression_no_cross_user_data_access(self, mock_graph_db):
|
||||
"""Regression test: Ensure users cannot access each other's data"""
|
||||
mock_driver = MagicMock()
|
||||
mock_graph_db.driver.return_value = mock_driver
|
||||
mock_session = MagicMock()
|
||||
mock_driver.session.return_value.__enter__.return_value = mock_session
|
||||
|
||||
# Mock execute_query response
|
||||
mock_result = MagicMock()
|
||||
mock_summary = MagicMock()
|
||||
mock_summary.counters.nodes_created = 1
|
||||
mock_summary.result_available_after = 10
|
||||
mock_result.summary = mock_summary
|
||||
mock_driver.execute_query.return_value = mock_result
|
||||
|
||||
processor = Processor(taskgroup=MagicMock())
|
||||
|
||||
# Store data for user1
|
||||
triple = MagicMock()
|
||||
triple.s.value = "http://example.com/subject"
|
||||
triple.p.value = "http://example.com/predicate"
|
||||
triple.o.value = "user1_data"
|
||||
triple.o.is_uri = False
|
||||
|
||||
message_user1 = MagicMock()
|
||||
message_user1.triples = [triple]
|
||||
message_user1.metadata.user = "user1"
|
||||
message_user1.metadata.collection = "collection1"
|
||||
|
||||
await processor.store_triples(message_user1)
|
||||
|
||||
# Verify that all storage operations included user1/collection1 parameters
|
||||
for call in mock_driver.execute_query.call_args_list:
|
||||
call_kwargs = call.kwargs if hasattr(call, 'kwargs') else call[1]
|
||||
if 'user' in call_kwargs:
|
||||
assert call_kwargs['user'] == "user1"
|
||||
assert call_kwargs['collection'] == "collection1"
|
||||
|
||||
@patch('trustgraph.storage.triples.memgraph.write.GraphDatabase')
|
||||
@pytest.mark.asyncio
|
||||
async def test_regression_same_uri_different_users(self, mock_graph_db):
|
||||
"""Regression test: Same URI can exist for different users without conflict"""
|
||||
mock_driver = MagicMock()
|
||||
mock_graph_db.driver.return_value = mock_driver
|
||||
mock_session = MagicMock()
|
||||
mock_driver.session.return_value.__enter__.return_value = mock_session
|
||||
|
||||
# Mock execute_query response
|
||||
mock_result = MagicMock()
|
||||
mock_summary = MagicMock()
|
||||
mock_summary.counters.nodes_created = 1
|
||||
mock_summary.result_available_after = 10
|
||||
mock_result.summary = mock_summary
|
||||
mock_driver.execute_query.return_value = mock_result
|
||||
|
||||
processor = Processor(taskgroup=MagicMock())
|
||||
|
||||
# Same URI for different users should create separate nodes
|
||||
processor.create_node("http://example.com/same-uri", "user1", "collection1")
|
||||
processor.create_node("http://example.com/same-uri", "user2", "collection2")
|
||||
|
||||
# Verify both calls were made with different user/collection parameters
|
||||
calls = mock_driver.execute_query.call_args_list[-2:] # Get last 2 calls
|
||||
|
||||
call1_kwargs = calls[0].kwargs if hasattr(calls[0], 'kwargs') else calls[0][1]
|
||||
call2_kwargs = calls[1].kwargs if hasattr(calls[1], 'kwargs') else calls[1][1]
|
||||
|
||||
assert call1_kwargs['user'] == "user1" and call1_kwargs['collection'] == "collection1"
|
||||
assert call2_kwargs['user'] == "user2" and call2_kwargs['collection'] == "collection2"
|
||||
|
||||
# Both should have the same URI but different user/collection
|
||||
assert call1_kwargs['uri'] == call2_kwargs['uri'] == "http://example.com/same-uri"
|
||||
470
tests/unit/test_storage/test_neo4j_user_collection_isolation.py
Normal file
470
tests/unit/test_storage/test_neo4j_user_collection_isolation.py
Normal file
|
|
@ -0,0 +1,470 @@
|
|||
"""
|
||||
Tests for Neo4j user/collection isolation in triples storage and query
|
||||
"""
|
||||
|
||||
import pytest
|
||||
from unittest.mock import MagicMock, patch, call
|
||||
|
||||
from trustgraph.storage.triples.neo4j.write import Processor as StorageProcessor
|
||||
from trustgraph.query.triples.neo4j.service import Processor as QueryProcessor
|
||||
from trustgraph.schema import Triples, Triple, Value, Metadata
|
||||
from trustgraph.schema import TriplesQueryRequest
|
||||
|
||||
|
||||
class TestNeo4jUserCollectionIsolation:
|
||||
"""Test cases for Neo4j user/collection isolation functionality"""
|
||||
|
||||
@patch('trustgraph.storage.triples.neo4j.write.GraphDatabase')
|
||||
def test_storage_creates_indexes_with_user_collection(self, mock_graph_db):
|
||||
"""Test that storage service creates compound indexes for user/collection"""
|
||||
taskgroup_mock = MagicMock()
|
||||
mock_driver = MagicMock()
|
||||
mock_graph_db.driver.return_value = mock_driver
|
||||
mock_session = MagicMock()
|
||||
mock_driver.session.return_value.__enter__.return_value = mock_session
|
||||
|
||||
processor = StorageProcessor(taskgroup=taskgroup_mock)
|
||||
|
||||
# Verify both legacy and new compound indexes are created
|
||||
expected_indexes = [
|
||||
"CREATE INDEX Node_uri FOR (n:Node) ON (n.uri)",
|
||||
"CREATE INDEX Literal_value FOR (n:Literal) ON (n.value)",
|
||||
"CREATE INDEX Rel_uri FOR ()-[r:Rel]-() ON (r.uri)",
|
||||
"CREATE INDEX node_user_collection_uri FOR (n:Node) ON (n.user, n.collection, n.uri)",
|
||||
"CREATE INDEX literal_user_collection_value FOR (n:Literal) ON (n.user, n.collection, n.value)",
|
||||
"CREATE INDEX rel_user FOR ()-[r:Rel]-() ON (r.user)",
|
||||
"CREATE INDEX rel_collection FOR ()-[r:Rel]-() ON (r.collection)"
|
||||
]
|
||||
|
||||
# Check that all expected indexes were created
|
||||
for expected_query in expected_indexes:
|
||||
mock_session.run.assert_any_call(expected_query)
|
||||
|
||||
@patch('trustgraph.storage.triples.neo4j.write.GraphDatabase')
|
||||
@pytest.mark.asyncio
|
||||
async def test_store_triples_with_user_collection(self, mock_graph_db):
|
||||
"""Test that triples are stored with user/collection properties"""
|
||||
taskgroup_mock = MagicMock()
|
||||
mock_driver = MagicMock()
|
||||
mock_graph_db.driver.return_value = mock_driver
|
||||
mock_session = MagicMock()
|
||||
mock_driver.session.return_value.__enter__.return_value = mock_session
|
||||
|
||||
processor = StorageProcessor(taskgroup=taskgroup_mock)
|
||||
|
||||
# Create test message with user/collection metadata
|
||||
metadata = Metadata(
|
||||
id="test-id",
|
||||
user="test_user",
|
||||
collection="test_collection"
|
||||
)
|
||||
|
||||
triple = Triple(
|
||||
s=Value(value="http://example.com/subject", is_uri=True),
|
||||
p=Value(value="http://example.com/predicate", is_uri=True),
|
||||
o=Value(value="literal_value", is_uri=False)
|
||||
)
|
||||
|
||||
message = Triples(
|
||||
metadata=metadata,
|
||||
triples=[triple]
|
||||
)
|
||||
|
||||
# Mock execute_query to return summaries
|
||||
mock_summary = MagicMock()
|
||||
mock_summary.counters.nodes_created = 1
|
||||
mock_summary.result_available_after = 10
|
||||
mock_driver.execute_query.return_value.summary = mock_summary
|
||||
|
||||
await processor.store_triples(message)
|
||||
|
||||
# Verify nodes and relationships were created with user/collection properties
|
||||
expected_calls = [
|
||||
call(
|
||||
"MERGE (n:Node {uri: $uri, user: $user, collection: $collection})",
|
||||
uri="http://example.com/subject",
|
||||
user="test_user",
|
||||
collection="test_collection",
|
||||
database_='neo4j'
|
||||
),
|
||||
call(
|
||||
"MERGE (n:Literal {value: $value, user: $user, collection: $collection})",
|
||||
value="literal_value",
|
||||
user="test_user",
|
||||
collection="test_collection",
|
||||
database_='neo4j'
|
||||
),
|
||||
call(
|
||||
"MATCH (src:Node {uri: $src, user: $user, collection: $collection}) "
|
||||
"MATCH (dest:Literal {value: $dest, user: $user, collection: $collection}) "
|
||||
"MERGE (src)-[:Rel {uri: $uri, user: $user, collection: $collection}]->(dest)",
|
||||
src="http://example.com/subject",
|
||||
dest="literal_value",
|
||||
uri="http://example.com/predicate",
|
||||
user="test_user",
|
||||
collection="test_collection",
|
||||
database_='neo4j'
|
||||
)
|
||||
]
|
||||
|
||||
for expected_call in expected_calls:
|
||||
mock_driver.execute_query.assert_any_call(*expected_call.args, **expected_call.kwargs)
|
||||
|
||||
@patch('trustgraph.storage.triples.neo4j.write.GraphDatabase')
|
||||
@pytest.mark.asyncio
|
||||
async def test_store_triples_with_default_user_collection(self, mock_graph_db):
|
||||
"""Test that default user/collection are used when not provided"""
|
||||
taskgroup_mock = MagicMock()
|
||||
mock_driver = MagicMock()
|
||||
mock_graph_db.driver.return_value = mock_driver
|
||||
mock_session = MagicMock()
|
||||
mock_driver.session.return_value.__enter__.return_value = mock_session
|
||||
|
||||
processor = StorageProcessor(taskgroup=taskgroup_mock)
|
||||
|
||||
# Create test message without user/collection
|
||||
metadata = Metadata(id="test-id")
|
||||
|
||||
triple = Triple(
|
||||
s=Value(value="http://example.com/subject", is_uri=True),
|
||||
p=Value(value="http://example.com/predicate", is_uri=True),
|
||||
o=Value(value="http://example.com/object", is_uri=True)
|
||||
)
|
||||
|
||||
message = Triples(
|
||||
metadata=metadata,
|
||||
triples=[triple]
|
||||
)
|
||||
|
||||
# Mock execute_query
|
||||
mock_summary = MagicMock()
|
||||
mock_summary.counters.nodes_created = 1
|
||||
mock_summary.result_available_after = 10
|
||||
mock_driver.execute_query.return_value.summary = mock_summary
|
||||
|
||||
await processor.store_triples(message)
|
||||
|
||||
# Verify defaults were used
|
||||
mock_driver.execute_query.assert_any_call(
|
||||
"MERGE (n:Node {uri: $uri, user: $user, collection: $collection})",
|
||||
uri="http://example.com/subject",
|
||||
user="default",
|
||||
collection="default",
|
||||
database_='neo4j'
|
||||
)
|
||||
|
||||
@patch('trustgraph.query.triples.neo4j.service.GraphDatabase')
|
||||
@pytest.mark.asyncio
|
||||
async def test_query_triples_filters_by_user_collection(self, mock_graph_db):
|
||||
"""Test that query service filters results by user/collection"""
|
||||
mock_driver = MagicMock()
|
||||
mock_graph_db.driver.return_value = mock_driver
|
||||
|
||||
processor = QueryProcessor(taskgroup=MagicMock())
|
||||
|
||||
# Create test query
|
||||
query = TriplesQueryRequest(
|
||||
user="test_user",
|
||||
collection="test_collection",
|
||||
s=Value(value="http://example.com/subject", is_uri=True),
|
||||
p=Value(value="http://example.com/predicate", is_uri=True),
|
||||
o=None
|
||||
)
|
||||
|
||||
# Mock query results
|
||||
mock_records = [
|
||||
MagicMock(data=lambda: {"dest": "http://example.com/object1"}),
|
||||
MagicMock(data=lambda: {"dest": "literal_value"})
|
||||
]
|
||||
|
||||
mock_driver.execute_query.return_value = (mock_records, MagicMock(), MagicMock())
|
||||
|
||||
result = await processor.query_triples(query)
|
||||
|
||||
# Verify queries include user/collection filters
|
||||
expected_literal_query = (
|
||||
"MATCH (src:Node {uri: $src, user: $user, collection: $collection})-"
|
||||
"[rel:Rel {uri: $rel, user: $user, collection: $collection}]->"
|
||||
"(dest:Literal {user: $user, collection: $collection}) "
|
||||
"RETURN dest.value as dest"
|
||||
)
|
||||
|
||||
expected_node_query = (
|
||||
"MATCH (src:Node {uri: $src, user: $user, collection: $collection})-"
|
||||
"[rel:Rel {uri: $rel, user: $user, collection: $collection}]->"
|
||||
"(dest:Node {user: $user, collection: $collection}) "
|
||||
"RETURN dest.uri as dest"
|
||||
)
|
||||
|
||||
# Check that queries were executed with user/collection parameters
|
||||
calls = mock_driver.execute_query.call_args_list
|
||||
assert any(
|
||||
expected_literal_query in str(call) and
|
||||
"user='test_user'" in str(call) and
|
||||
"collection='test_collection'" in str(call)
|
||||
for call in calls
|
||||
)
|
||||
|
||||
@patch('trustgraph.query.triples.neo4j.service.GraphDatabase')
|
||||
@pytest.mark.asyncio
|
||||
async def test_query_triples_with_default_user_collection(self, mock_graph_db):
|
||||
"""Test that query service uses defaults when user/collection not provided"""
|
||||
mock_driver = MagicMock()
|
||||
mock_graph_db.driver.return_value = mock_driver
|
||||
|
||||
processor = QueryProcessor(taskgroup=MagicMock())
|
||||
|
||||
# Create test query without user/collection
|
||||
query = TriplesQueryRequest(
|
||||
s=None,
|
||||
p=None,
|
||||
o=None
|
||||
)
|
||||
|
||||
# Mock empty results
|
||||
mock_driver.execute_query.return_value = ([], MagicMock(), MagicMock())
|
||||
|
||||
result = await processor.query_triples(query)
|
||||
|
||||
# Verify defaults were used in queries
|
||||
calls = mock_driver.execute_query.call_args_list
|
||||
assert any(
|
||||
"user='default'" in str(call) and "collection='default'" in str(call)
|
||||
for call in calls
|
||||
)
|
||||
|
||||
@patch('trustgraph.storage.triples.neo4j.write.GraphDatabase')
|
||||
@pytest.mark.asyncio
|
||||
async def test_data_isolation_between_users(self, mock_graph_db):
|
||||
"""Test that data from different users is properly isolated"""
|
||||
taskgroup_mock = MagicMock()
|
||||
mock_driver = MagicMock()
|
||||
mock_graph_db.driver.return_value = mock_driver
|
||||
mock_session = MagicMock()
|
||||
mock_driver.session.return_value.__enter__.return_value = mock_session
|
||||
|
||||
processor = StorageProcessor(taskgroup=taskgroup_mock)
|
||||
|
||||
# Create messages for different users
|
||||
message_user1 = Triples(
|
||||
metadata=Metadata(user="user1", collection="coll1"),
|
||||
triples=[
|
||||
Triple(
|
||||
s=Value(value="http://example.com/user1/subject", is_uri=True),
|
||||
p=Value(value="http://example.com/predicate", is_uri=True),
|
||||
o=Value(value="user1_data", is_uri=False)
|
||||
)
|
||||
]
|
||||
)
|
||||
|
||||
message_user2 = Triples(
|
||||
metadata=Metadata(user="user2", collection="coll2"),
|
||||
triples=[
|
||||
Triple(
|
||||
s=Value(value="http://example.com/user2/subject", is_uri=True),
|
||||
p=Value(value="http://example.com/predicate", is_uri=True),
|
||||
o=Value(value="user2_data", is_uri=False)
|
||||
)
|
||||
]
|
||||
)
|
||||
|
||||
# Mock execute_query
|
||||
mock_summary = MagicMock()
|
||||
mock_summary.counters.nodes_created = 1
|
||||
mock_summary.result_available_after = 10
|
||||
mock_driver.execute_query.return_value.summary = mock_summary
|
||||
|
||||
# Store data for both users
|
||||
await processor.store_triples(message_user1)
|
||||
await processor.store_triples(message_user2)
|
||||
|
||||
# Verify user1 data was stored with user1/coll1
|
||||
mock_driver.execute_query.assert_any_call(
|
||||
"MERGE (n:Literal {value: $value, user: $user, collection: $collection})",
|
||||
value="user1_data",
|
||||
user="user1",
|
||||
collection="coll1",
|
||||
database_='neo4j'
|
||||
)
|
||||
|
||||
# Verify user2 data was stored with user2/coll2
|
||||
mock_driver.execute_query.assert_any_call(
|
||||
"MERGE (n:Literal {value: $value, user: $user, collection: $collection})",
|
||||
value="user2_data",
|
||||
user="user2",
|
||||
collection="coll2",
|
||||
database_='neo4j'
|
||||
)
|
||||
|
||||
@patch('trustgraph.query.triples.neo4j.service.GraphDatabase')
|
||||
@pytest.mark.asyncio
|
||||
async def test_wildcard_query_respects_user_collection(self, mock_graph_db):
|
||||
"""Test that wildcard queries still filter by user/collection"""
|
||||
mock_driver = MagicMock()
|
||||
mock_graph_db.driver.return_value = mock_driver
|
||||
|
||||
processor = QueryProcessor(taskgroup=MagicMock())
|
||||
|
||||
# Create wildcard query (all nulls) with user/collection
|
||||
query = TriplesQueryRequest(
|
||||
user="test_user",
|
||||
collection="test_collection",
|
||||
s=None,
|
||||
p=None,
|
||||
o=None
|
||||
)
|
||||
|
||||
# Mock results
|
||||
mock_driver.execute_query.return_value = ([], MagicMock(), MagicMock())
|
||||
|
||||
result = await processor.query_triples(query)
|
||||
|
||||
# Verify wildcard queries include user/collection filters
|
||||
wildcard_query = (
|
||||
"MATCH (src:Node {user: $user, collection: $collection})-"
|
||||
"[rel:Rel {user: $user, collection: $collection}]->"
|
||||
"(dest:Literal {user: $user, collection: $collection}) "
|
||||
"RETURN src.uri as src, rel.uri as rel, dest.value as dest"
|
||||
)
|
||||
|
||||
calls = mock_driver.execute_query.call_args_list
|
||||
assert any(
|
||||
wildcard_query in str(call) and
|
||||
"user='test_user'" in str(call) and
|
||||
"collection='test_collection'" in str(call)
|
||||
for call in calls
|
||||
)
|
||||
|
||||
def test_add_args_includes_neo4j_parameters(self):
|
||||
"""Test that add_args includes Neo4j-specific parameters"""
|
||||
from argparse import ArgumentParser
|
||||
from unittest.mock import patch
|
||||
|
||||
parser = ArgumentParser()
|
||||
|
||||
with patch('trustgraph.storage.triples.neo4j.write.TriplesStoreService.add_args'):
|
||||
StorageProcessor.add_args(parser)
|
||||
|
||||
args = parser.parse_args([])
|
||||
|
||||
assert hasattr(args, 'graph_host')
|
||||
assert hasattr(args, 'username')
|
||||
assert hasattr(args, 'password')
|
||||
assert hasattr(args, 'database')
|
||||
|
||||
# Check defaults
|
||||
assert args.graph_host == 'bolt://neo4j:7687'
|
||||
assert args.username == 'neo4j'
|
||||
assert args.password == 'password'
|
||||
assert args.database == 'neo4j'
|
||||
|
||||
|
||||
class TestNeo4jUserCollectionRegression:
|
||||
"""Regression tests to ensure user/collection isolation prevents data leaks"""
|
||||
|
||||
@patch('trustgraph.query.triples.neo4j.service.GraphDatabase')
|
||||
@pytest.mark.asyncio
|
||||
async def test_regression_no_cross_user_data_access(self, mock_graph_db):
|
||||
"""
|
||||
Regression test: Ensure user1 cannot access user2's data
|
||||
|
||||
This test guards against the bug where all users shared the same
|
||||
Neo4j graph space, causing data contamination between users.
|
||||
"""
|
||||
mock_driver = MagicMock()
|
||||
mock_graph_db.driver.return_value = mock_driver
|
||||
|
||||
processor = QueryProcessor(taskgroup=MagicMock())
|
||||
|
||||
# User1 queries for all triples
|
||||
query_user1 = TriplesQueryRequest(
|
||||
user="user1",
|
||||
collection="collection1",
|
||||
s=None, p=None, o=None
|
||||
)
|
||||
|
||||
# Mock that the database has data but none matching user1/collection1
|
||||
mock_driver.execute_query.return_value = ([], MagicMock(), MagicMock())
|
||||
|
||||
result = await processor.query_triples(query_user1)
|
||||
|
||||
# Verify empty results (user1 cannot see other users' data)
|
||||
assert len(result) == 0
|
||||
|
||||
# Verify the query included user/collection filters
|
||||
calls = mock_driver.execute_query.call_args_list
|
||||
for call in calls:
|
||||
query_str = str(call)
|
||||
if "MATCH" in query_str:
|
||||
assert "user: $user" in query_str or "user='user1'" in query_str
|
||||
assert "collection: $collection" in query_str or "collection='collection1'" in query_str
|
||||
|
||||
@patch('trustgraph.storage.triples.neo4j.write.GraphDatabase')
|
||||
@pytest.mark.asyncio
|
||||
async def test_regression_same_uri_different_users(self, mock_graph_db):
|
||||
"""
|
||||
Regression test: Same URI in different user contexts should create separate nodes
|
||||
|
||||
This ensures that http://example.com/entity for user1 is completely separate
|
||||
from http://example.com/entity for user2.
|
||||
"""
|
||||
taskgroup_mock = MagicMock()
|
||||
mock_driver = MagicMock()
|
||||
mock_graph_db.driver.return_value = mock_driver
|
||||
mock_session = MagicMock()
|
||||
mock_driver.session.return_value.__enter__.return_value = mock_session
|
||||
|
||||
processor = StorageProcessor(taskgroup=taskgroup_mock)
|
||||
|
||||
# Same URI for different users
|
||||
shared_uri = "http://example.com/shared_entity"
|
||||
|
||||
message_user1 = Triples(
|
||||
metadata=Metadata(user="user1", collection="coll1"),
|
||||
triples=[
|
||||
Triple(
|
||||
s=Value(value=shared_uri, is_uri=True),
|
||||
p=Value(value="http://example.com/p", is_uri=True),
|
||||
o=Value(value="user1_value", is_uri=False)
|
||||
)
|
||||
]
|
||||
)
|
||||
|
||||
message_user2 = Triples(
|
||||
metadata=Metadata(user="user2", collection="coll2"),
|
||||
triples=[
|
||||
Triple(
|
||||
s=Value(value=shared_uri, is_uri=True),
|
||||
p=Value(value="http://example.com/p", is_uri=True),
|
||||
o=Value(value="user2_value", is_uri=False)
|
||||
)
|
||||
]
|
||||
)
|
||||
|
||||
# Mock execute_query
|
||||
mock_summary = MagicMock()
|
||||
mock_summary.counters.nodes_created = 1
|
||||
mock_summary.result_available_after = 10
|
||||
mock_driver.execute_query.return_value.summary = mock_summary
|
||||
|
||||
await processor.store_triples(message_user1)
|
||||
await processor.store_triples(message_user2)
|
||||
|
||||
# Verify two separate nodes were created with same URI but different user/collection
|
||||
user1_node_call = call(
|
||||
"MERGE (n:Node {uri: $uri, user: $user, collection: $collection})",
|
||||
uri=shared_uri,
|
||||
user="user1",
|
||||
collection="coll1",
|
||||
database_='neo4j'
|
||||
)
|
||||
|
||||
user2_node_call = call(
|
||||
"MERGE (n:Node {uri: $uri, user: $user, collection: $collection})",
|
||||
uri=shared_uri,
|
||||
user="user2",
|
||||
collection="coll2",
|
||||
database_='neo4j'
|
||||
)
|
||||
|
||||
mock_driver.execute_query.assert_has_calls([user1_node_call, user2_node_call], any_order=True)
|
||||
|
|
@ -261,7 +261,7 @@ class TestObjectsCassandraStorageLogic:
|
|||
metadata=[]
|
||||
),
|
||||
schema_name="test_schema",
|
||||
values={"id": "123", "value": "456"},
|
||||
values=[{"id": "123", "value": "456"}],
|
||||
confidence=0.9,
|
||||
source_span="test source"
|
||||
)
|
||||
|
|
@ -284,8 +284,8 @@ class TestObjectsCassandraStorageLogic:
|
|||
assert "INSERT INTO test_user.o_test_schema" in insert_cql
|
||||
assert "collection" in insert_cql
|
||||
assert values[0] == "test_collection" # collection value
|
||||
assert values[1] == "123" # id value
|
||||
assert values[2] == 456 # converted integer value
|
||||
assert values[1] == "123" # id value (from values[0])
|
||||
assert values[2] == 456 # converted integer value (from values[0])
|
||||
|
||||
def test_secondary_index_creation(self):
|
||||
"""Test that secondary indexes are created for indexed fields"""
|
||||
|
|
@ -325,4 +325,201 @@ class TestObjectsCassandraStorageLogic:
|
|||
index_calls = [call[0][0] for call in calls if "CREATE INDEX" in call[0][0]]
|
||||
assert len(index_calls) == 2
|
||||
assert any("o_products_category_idx" in call for call in index_calls)
|
||||
assert any("o_products_price_idx" in call for call in index_calls)
|
||||
assert any("o_products_price_idx" in call for call in index_calls)
|
||||
|
||||
|
||||
class TestObjectsCassandraStorageBatchLogic:
|
||||
"""Test batch processing logic in Cassandra storage"""
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_batch_object_processing_logic(self):
|
||||
"""Test processing of batch ExtractedObjects"""
|
||||
processor = MagicMock()
|
||||
processor.schemas = {
|
||||
"batch_schema": RowSchema(
|
||||
name="batch_schema",
|
||||
description="Test batch schema",
|
||||
fields=[
|
||||
Field(name="id", type="string", size=50, primary=True),
|
||||
Field(name="name", type="string", size=100),
|
||||
Field(name="value", type="integer", size=4)
|
||||
]
|
||||
)
|
||||
}
|
||||
processor.ensure_table = MagicMock()
|
||||
processor.sanitize_name = Processor.sanitize_name.__get__(processor, Processor)
|
||||
processor.sanitize_table = Processor.sanitize_table.__get__(processor, Processor)
|
||||
processor.convert_value = Processor.convert_value.__get__(processor, Processor)
|
||||
processor.session = MagicMock()
|
||||
processor.on_object = Processor.on_object.__get__(processor, Processor)
|
||||
|
||||
# Create batch object with multiple values
|
||||
batch_obj = ExtractedObject(
|
||||
metadata=Metadata(
|
||||
id="batch-001",
|
||||
user="test_user",
|
||||
collection="batch_collection",
|
||||
metadata=[]
|
||||
),
|
||||
schema_name="batch_schema",
|
||||
values=[
|
||||
{"id": "001", "name": "First", "value": "100"},
|
||||
{"id": "002", "name": "Second", "value": "200"},
|
||||
{"id": "003", "name": "Third", "value": "300"}
|
||||
],
|
||||
confidence=0.95,
|
||||
source_span="batch source"
|
||||
)
|
||||
|
||||
# Create mock message
|
||||
msg = MagicMock()
|
||||
msg.value.return_value = batch_obj
|
||||
|
||||
# Process batch object
|
||||
await processor.on_object(msg, None, None)
|
||||
|
||||
# Verify table was ensured once
|
||||
processor.ensure_table.assert_called_once_with("test_user", "batch_schema", processor.schemas["batch_schema"])
|
||||
|
||||
# Verify 3 separate insert calls (one per batch item)
|
||||
assert processor.session.execute.call_count == 3
|
||||
|
||||
# Check each insert call
|
||||
calls = processor.session.execute.call_args_list
|
||||
for i, call in enumerate(calls):
|
||||
insert_cql = call[0][0]
|
||||
values = call[0][1]
|
||||
|
||||
assert "INSERT INTO test_user.o_batch_schema" in insert_cql
|
||||
assert "collection" in insert_cql
|
||||
|
||||
# Check values for each batch item
|
||||
assert values[0] == "batch_collection" # collection
|
||||
assert values[1] == f"00{i+1}" # id from batch item i
|
||||
assert values[2] == f"First" if i == 0 else f"Second" if i == 1 else f"Third" # name
|
||||
assert values[3] == (i+1) * 100 # converted integer value
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_empty_batch_processing_logic(self):
|
||||
"""Test processing of empty batch ExtractedObjects"""
|
||||
processor = MagicMock()
|
||||
processor.schemas = {
|
||||
"empty_schema": RowSchema(
|
||||
name="empty_schema",
|
||||
fields=[Field(name="id", type="string", size=50, primary=True)]
|
||||
)
|
||||
}
|
||||
processor.ensure_table = MagicMock()
|
||||
processor.sanitize_name = Processor.sanitize_name.__get__(processor, Processor)
|
||||
processor.sanitize_table = Processor.sanitize_table.__get__(processor, Processor)
|
||||
processor.convert_value = Processor.convert_value.__get__(processor, Processor)
|
||||
processor.session = MagicMock()
|
||||
processor.on_object = Processor.on_object.__get__(processor, Processor)
|
||||
|
||||
# Create empty batch object
|
||||
empty_batch_obj = ExtractedObject(
|
||||
metadata=Metadata(
|
||||
id="empty-001",
|
||||
user="test_user",
|
||||
collection="empty_collection",
|
||||
metadata=[]
|
||||
),
|
||||
schema_name="empty_schema",
|
||||
values=[], # Empty batch
|
||||
confidence=1.0,
|
||||
source_span="empty source"
|
||||
)
|
||||
|
||||
msg = MagicMock()
|
||||
msg.value.return_value = empty_batch_obj
|
||||
|
||||
# Process empty batch object
|
||||
await processor.on_object(msg, None, None)
|
||||
|
||||
# Verify table was ensured
|
||||
processor.ensure_table.assert_called_once()
|
||||
|
||||
# Verify no insert calls for empty batch
|
||||
processor.session.execute.assert_not_called()
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_single_item_batch_processing_logic(self):
|
||||
"""Test processing of single-item batch (backward compatibility)"""
|
||||
processor = MagicMock()
|
||||
processor.schemas = {
|
||||
"single_schema": RowSchema(
|
||||
name="single_schema",
|
||||
fields=[
|
||||
Field(name="id", type="string", size=50, primary=True),
|
||||
Field(name="data", type="string", size=100)
|
||||
]
|
||||
)
|
||||
}
|
||||
processor.ensure_table = MagicMock()
|
||||
processor.sanitize_name = Processor.sanitize_name.__get__(processor, Processor)
|
||||
processor.sanitize_table = Processor.sanitize_table.__get__(processor, Processor)
|
||||
processor.convert_value = Processor.convert_value.__get__(processor, Processor)
|
||||
processor.session = MagicMock()
|
||||
processor.on_object = Processor.on_object.__get__(processor, Processor)
|
||||
|
||||
# Create single-item batch object (backward compatibility case)
|
||||
single_batch_obj = ExtractedObject(
|
||||
metadata=Metadata(
|
||||
id="single-001",
|
||||
user="test_user",
|
||||
collection="single_collection",
|
||||
metadata=[]
|
||||
),
|
||||
schema_name="single_schema",
|
||||
values=[{"id": "single-1", "data": "single data"}], # Array with one item
|
||||
confidence=0.8,
|
||||
source_span="single source"
|
||||
)
|
||||
|
||||
msg = MagicMock()
|
||||
msg.value.return_value = single_batch_obj
|
||||
|
||||
# Process single-item batch object
|
||||
await processor.on_object(msg, None, None)
|
||||
|
||||
# Verify table was ensured
|
||||
processor.ensure_table.assert_called_once()
|
||||
|
||||
# Verify exactly one insert call
|
||||
processor.session.execute.assert_called_once()
|
||||
|
||||
insert_cql = processor.session.execute.call_args[0][0]
|
||||
values = processor.session.execute.call_args[0][1]
|
||||
|
||||
assert "INSERT INTO test_user.o_single_schema" in insert_cql
|
||||
assert values[0] == "single_collection" # collection
|
||||
assert values[1] == "single-1" # id value
|
||||
assert values[2] == "single data" # data value
|
||||
|
||||
def test_batch_value_conversion_logic(self):
|
||||
"""Test value conversion works correctly for batch items"""
|
||||
processor = MagicMock()
|
||||
processor.convert_value = Processor.convert_value.__get__(processor, Processor)
|
||||
|
||||
# Test various conversion scenarios that would occur in batch processing
|
||||
test_cases = [
|
||||
# Integer conversions for batch items
|
||||
("123", "integer", 123),
|
||||
("456", "integer", 456),
|
||||
("789", "integer", 789),
|
||||
# Float conversions for batch items
|
||||
("12.5", "float", 12.5),
|
||||
("34.7", "float", 34.7),
|
||||
# Boolean conversions for batch items
|
||||
("true", "boolean", True),
|
||||
("false", "boolean", False),
|
||||
("1", "boolean", True),
|
||||
("0", "boolean", False),
|
||||
# String conversions for batch items
|
||||
(123, "string", "123"),
|
||||
(45.6, "string", "45.6"),
|
||||
]
|
||||
|
||||
for input_val, field_type, expected_output in test_cases:
|
||||
result = processor.convert_value(input_val, field_type)
|
||||
assert result == expected_output, f"Failed for {input_val} -> {field_type}: got {result}, expected {expected_output}"
|
||||
|
|
@ -16,28 +16,30 @@ class TestCassandraStorageProcessor:
|
|||
"""Test processor initialization with default parameters"""
|
||||
taskgroup_mock = MagicMock()
|
||||
|
||||
processor = Processor(taskgroup=taskgroup_mock)
|
||||
# Patch environment to ensure clean defaults
|
||||
with patch.dict('os.environ', {}, clear=True):
|
||||
processor = Processor(taskgroup=taskgroup_mock)
|
||||
|
||||
assert processor.graph_host == ['localhost']
|
||||
assert processor.username is None
|
||||
assert processor.password is None
|
||||
assert processor.cassandra_host == ['cassandra'] # Updated default
|
||||
assert processor.cassandra_username is None
|
||||
assert processor.cassandra_password is None
|
||||
assert processor.table is None
|
||||
|
||||
def test_processor_initialization_with_custom_params(self):
|
||||
"""Test processor initialization with custom parameters"""
|
||||
"""Test processor initialization with custom parameters (new cassandra_* names)"""
|
||||
taskgroup_mock = MagicMock()
|
||||
|
||||
processor = Processor(
|
||||
taskgroup=taskgroup_mock,
|
||||
id='custom-storage',
|
||||
graph_host='cassandra.example.com',
|
||||
graph_username='testuser',
|
||||
graph_password='testpass'
|
||||
cassandra_host='cassandra.example.com',
|
||||
cassandra_username='testuser',
|
||||
cassandra_password='testpass'
|
||||
)
|
||||
|
||||
assert processor.graph_host == ['cassandra.example.com']
|
||||
assert processor.username == 'testuser'
|
||||
assert processor.password == 'testpass'
|
||||
assert processor.cassandra_host == ['cassandra.example.com']
|
||||
assert processor.cassandra_username == 'testuser'
|
||||
assert processor.cassandra_password == 'testpass'
|
||||
assert processor.table is None
|
||||
|
||||
def test_processor_initialization_with_partial_auth(self):
|
||||
|
|
@ -46,14 +48,45 @@ class TestCassandraStorageProcessor:
|
|||
|
||||
processor = Processor(
|
||||
taskgroup=taskgroup_mock,
|
||||
graph_username='testuser'
|
||||
cassandra_username='testuser'
|
||||
)
|
||||
|
||||
assert processor.username == 'testuser'
|
||||
assert processor.password is None
|
||||
assert processor.cassandra_username == 'testuser'
|
||||
assert processor.cassandra_password is None
|
||||
|
||||
def test_processor_no_backward_compatibility(self):
|
||||
"""Test that old graph_* parameters are no longer supported"""
|
||||
taskgroup_mock = MagicMock()
|
||||
|
||||
processor = Processor(
|
||||
taskgroup=taskgroup_mock,
|
||||
graph_host='old-host',
|
||||
graph_username='old-user',
|
||||
graph_password='old-pass'
|
||||
)
|
||||
|
||||
# Should use defaults since graph_* params are not recognized
|
||||
assert processor.cassandra_host == ['cassandra'] # Default
|
||||
assert processor.cassandra_username is None
|
||||
assert processor.cassandra_password is None
|
||||
|
||||
def test_processor_only_new_parameters_work(self):
|
||||
"""Test that only new cassandra_* parameters work"""
|
||||
taskgroup_mock = MagicMock()
|
||||
|
||||
processor = Processor(
|
||||
taskgroup=taskgroup_mock,
|
||||
cassandra_host='new-host',
|
||||
graph_host='old-host', # Should be ignored
|
||||
cassandra_username='new-user',
|
||||
graph_username='old-user' # Should be ignored
|
||||
)
|
||||
|
||||
assert processor.cassandra_host == ['new-host'] # Only cassandra_* params work
|
||||
assert processor.cassandra_username == 'new-user' # Only cassandra_* params work
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@patch('trustgraph.storage.triples.cassandra.write.TrustGraph')
|
||||
@patch('trustgraph.storage.triples.cassandra.write.KnowledgeGraph')
|
||||
async def test_table_switching_with_auth(self, mock_trustgraph):
|
||||
"""Test table switching logic when authentication is provided"""
|
||||
taskgroup_mock = MagicMock()
|
||||
|
|
@ -62,8 +95,8 @@ class TestCassandraStorageProcessor:
|
|||
|
||||
processor = Processor(
|
||||
taskgroup=taskgroup_mock,
|
||||
graph_username='testuser',
|
||||
graph_password='testpass'
|
||||
cassandra_username='testuser',
|
||||
cassandra_password='testpass'
|
||||
)
|
||||
|
||||
# Create mock message
|
||||
|
|
@ -74,18 +107,17 @@ class TestCassandraStorageProcessor:
|
|||
|
||||
await processor.store_triples(mock_message)
|
||||
|
||||
# Verify TrustGraph was called with auth parameters
|
||||
# Verify KnowledgeGraph was called with auth parameters
|
||||
mock_trustgraph.assert_called_once_with(
|
||||
hosts=['localhost'],
|
||||
hosts=['cassandra'], # Updated default
|
||||
keyspace='user1',
|
||||
table='collection1',
|
||||
username='testuser',
|
||||
password='testpass'
|
||||
)
|
||||
assert processor.table == ('user1', 'collection1')
|
||||
assert processor.table == 'user1'
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@patch('trustgraph.storage.triples.cassandra.write.TrustGraph')
|
||||
@patch('trustgraph.storage.triples.cassandra.write.KnowledgeGraph')
|
||||
async def test_table_switching_without_auth(self, mock_trustgraph):
|
||||
"""Test table switching logic when no authentication is provided"""
|
||||
taskgroup_mock = MagicMock()
|
||||
|
|
@ -102,16 +134,15 @@ class TestCassandraStorageProcessor:
|
|||
|
||||
await processor.store_triples(mock_message)
|
||||
|
||||
# Verify TrustGraph was called without auth parameters
|
||||
# Verify KnowledgeGraph was called without auth parameters
|
||||
mock_trustgraph.assert_called_once_with(
|
||||
hosts=['localhost'],
|
||||
keyspace='user2',
|
||||
table='collection2'
|
||||
hosts=['cassandra'], # Updated default
|
||||
keyspace='user2'
|
||||
)
|
||||
assert processor.table == ('user2', 'collection2')
|
||||
assert processor.table == 'user2'
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@patch('trustgraph.storage.triples.cassandra.write.TrustGraph')
|
||||
@patch('trustgraph.storage.triples.cassandra.write.KnowledgeGraph')
|
||||
async def test_table_reuse_when_same(self, mock_trustgraph):
|
||||
"""Test that TrustGraph is not recreated when table hasn't changed"""
|
||||
taskgroup_mock = MagicMock()
|
||||
|
|
@ -135,7 +166,7 @@ class TestCassandraStorageProcessor:
|
|||
assert mock_trustgraph.call_count == 1 # Should not increase
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@patch('trustgraph.storage.triples.cassandra.write.TrustGraph')
|
||||
@patch('trustgraph.storage.triples.cassandra.write.KnowledgeGraph')
|
||||
async def test_triple_insertion(self, mock_trustgraph):
|
||||
"""Test that triples are properly inserted into Cassandra"""
|
||||
taskgroup_mock = MagicMock()
|
||||
|
|
@ -165,11 +196,11 @@ class TestCassandraStorageProcessor:
|
|||
|
||||
# Verify both triples were inserted
|
||||
assert mock_tg_instance.insert.call_count == 2
|
||||
mock_tg_instance.insert.assert_any_call('subject1', 'predicate1', 'object1')
|
||||
mock_tg_instance.insert.assert_any_call('subject2', 'predicate2', 'object2')
|
||||
mock_tg_instance.insert.assert_any_call('collection1', 'subject1', 'predicate1', 'object1')
|
||||
mock_tg_instance.insert.assert_any_call('collection1', 'subject2', 'predicate2', 'object2')
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@patch('trustgraph.storage.triples.cassandra.write.TrustGraph')
|
||||
@patch('trustgraph.storage.triples.cassandra.write.KnowledgeGraph')
|
||||
async def test_triple_insertion_with_empty_list(self, mock_trustgraph):
|
||||
"""Test behavior when message has no triples"""
|
||||
taskgroup_mock = MagicMock()
|
||||
|
|
@ -190,7 +221,7 @@ class TestCassandraStorageProcessor:
|
|||
mock_tg_instance.insert.assert_not_called()
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@patch('trustgraph.storage.triples.cassandra.write.TrustGraph')
|
||||
@patch('trustgraph.storage.triples.cassandra.write.KnowledgeGraph')
|
||||
@patch('trustgraph.storage.triples.cassandra.write.time.sleep')
|
||||
async def test_exception_handling_with_retry(self, mock_sleep, mock_trustgraph):
|
||||
"""Test exception handling during TrustGraph creation"""
|
||||
|
|
@ -225,16 +256,16 @@ class TestCassandraStorageProcessor:
|
|||
# Verify parent add_args was called
|
||||
mock_parent_add_args.assert_called_once_with(parser)
|
||||
|
||||
# Verify our specific arguments were added
|
||||
# Verify our specific arguments were added (now using cassandra_* names)
|
||||
# Parse empty args to check defaults
|
||||
args = parser.parse_args([])
|
||||
|
||||
assert hasattr(args, 'graph_host')
|
||||
assert args.graph_host == 'localhost'
|
||||
assert hasattr(args, 'graph_username')
|
||||
assert args.graph_username is None
|
||||
assert hasattr(args, 'graph_password')
|
||||
assert args.graph_password is None
|
||||
assert hasattr(args, 'cassandra_host')
|
||||
assert args.cassandra_host == 'cassandra' # Updated default
|
||||
assert hasattr(args, 'cassandra_username')
|
||||
assert args.cassandra_username is None
|
||||
assert hasattr(args, 'cassandra_password')
|
||||
assert args.cassandra_password is None
|
||||
|
||||
def test_add_args_with_custom_values(self):
|
||||
"""Test add_args with custom command line values"""
|
||||
|
|
@ -246,31 +277,44 @@ class TestCassandraStorageProcessor:
|
|||
with patch('trustgraph.storage.triples.cassandra.write.TriplesStoreService.add_args'):
|
||||
Processor.add_args(parser)
|
||||
|
||||
# Test parsing with custom values
|
||||
# Test parsing with custom values (new cassandra_* arguments)
|
||||
args = parser.parse_args([
|
||||
'--graph-host', 'cassandra.example.com',
|
||||
'--graph-username', 'testuser',
|
||||
'--graph-password', 'testpass'
|
||||
'--cassandra-host', 'cassandra.example.com',
|
||||
'--cassandra-username', 'testuser',
|
||||
'--cassandra-password', 'testpass'
|
||||
])
|
||||
|
||||
assert args.graph_host == 'cassandra.example.com'
|
||||
assert args.graph_username == 'testuser'
|
||||
assert args.graph_password == 'testpass'
|
||||
assert args.cassandra_host == 'cassandra.example.com'
|
||||
assert args.cassandra_username == 'testuser'
|
||||
assert args.cassandra_password == 'testpass'
|
||||
|
||||
def test_add_args_short_form(self):
|
||||
"""Test add_args with short form arguments"""
|
||||
def test_add_args_with_env_vars(self):
|
||||
"""Test add_args shows environment variables in help text"""
|
||||
from argparse import ArgumentParser
|
||||
from unittest.mock import patch
|
||||
import os
|
||||
|
||||
parser = ArgumentParser()
|
||||
|
||||
# Set environment variables
|
||||
env_vars = {
|
||||
'CASSANDRA_HOST': 'env-host1,env-host2',
|
||||
'CASSANDRA_USERNAME': 'env-user',
|
||||
'CASSANDRA_PASSWORD': 'env-pass'
|
||||
}
|
||||
|
||||
with patch('trustgraph.storage.triples.cassandra.write.TriplesStoreService.add_args'):
|
||||
Processor.add_args(parser)
|
||||
|
||||
# Test parsing with short form
|
||||
args = parser.parse_args(['-g', 'short.example.com'])
|
||||
|
||||
assert args.graph_host == 'short.example.com'
|
||||
with patch.dict(os.environ, env_vars, clear=True):
|
||||
Processor.add_args(parser)
|
||||
|
||||
# Check that help text includes environment variable info
|
||||
help_text = parser.format_help()
|
||||
# Argparse may break lines, so check for components
|
||||
assert 'env-' in help_text and 'host1' in help_text
|
||||
assert 'env-host2' in help_text
|
||||
assert 'env-user' in help_text
|
||||
assert '<set>' in help_text # Password should be hidden
|
||||
assert 'env-pass' not in help_text # Password value not shown
|
||||
|
||||
@patch('trustgraph.storage.triples.cassandra.write.Processor.launch')
|
||||
def test_run_function(self, mock_launch):
|
||||
|
|
@ -282,7 +326,7 @@ class TestCassandraStorageProcessor:
|
|||
mock_launch.assert_called_once_with(default_ident, '\nGraph writer. Input is graph edge. Writes edges to Cassandra graph.\n')
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@patch('trustgraph.storage.triples.cassandra.write.TrustGraph')
|
||||
@patch('trustgraph.storage.triples.cassandra.write.KnowledgeGraph')
|
||||
async def test_store_triples_table_switching_between_different_tables(self, mock_trustgraph):
|
||||
"""Test table switching when different tables are used in sequence"""
|
||||
taskgroup_mock = MagicMock()
|
||||
|
|
@ -299,7 +343,7 @@ class TestCassandraStorageProcessor:
|
|||
mock_message1.triples = []
|
||||
|
||||
await processor.store_triples(mock_message1)
|
||||
assert processor.table == ('user1', 'collection1')
|
||||
assert processor.table == 'user1'
|
||||
assert processor.tg == mock_tg_instance1
|
||||
|
||||
# Second message with different table
|
||||
|
|
@ -309,14 +353,14 @@ class TestCassandraStorageProcessor:
|
|||
mock_message2.triples = []
|
||||
|
||||
await processor.store_triples(mock_message2)
|
||||
assert processor.table == ('user2', 'collection2')
|
||||
assert processor.table == 'user2'
|
||||
assert processor.tg == mock_tg_instance2
|
||||
|
||||
# Verify TrustGraph was created twice for different tables
|
||||
assert mock_trustgraph.call_count == 2
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@patch('trustgraph.storage.triples.cassandra.write.TrustGraph')
|
||||
@patch('trustgraph.storage.triples.cassandra.write.KnowledgeGraph')
|
||||
async def test_store_triples_with_special_characters_in_values(self, mock_trustgraph):
|
||||
"""Test storing triples with special characters and unicode"""
|
||||
taskgroup_mock = MagicMock()
|
||||
|
|
@ -340,13 +384,14 @@ class TestCassandraStorageProcessor:
|
|||
|
||||
# Verify the triple was inserted with special characters preserved
|
||||
mock_tg_instance.insert.assert_called_once_with(
|
||||
'test_collection',
|
||||
'subject with spaces & symbols',
|
||||
'predicate:with/colons',
|
||||
'object with "quotes" and unicode: ñáéíóú'
|
||||
)
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@patch('trustgraph.storage.triples.cassandra.write.TrustGraph')
|
||||
@patch('trustgraph.storage.triples.cassandra.write.KnowledgeGraph')
|
||||
async def test_store_triples_preserves_old_table_on_exception(self, mock_trustgraph):
|
||||
"""Test that table remains unchanged when TrustGraph creation fails"""
|
||||
taskgroup_mock = MagicMock()
|
||||
|
|
@ -370,4 +415,99 @@ class TestCassandraStorageProcessor:
|
|||
# Table should remain unchanged since self.table = table happens after try/except
|
||||
assert processor.table == ('old_user', 'old_collection')
|
||||
# TrustGraph should be set to None though
|
||||
assert processor.tg is None
|
||||
assert processor.tg is None
|
||||
|
||||
|
||||
class TestCassandraPerformanceOptimizations:
|
||||
"""Test cases for multi-table performance optimizations"""
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@patch('trustgraph.storage.triples.cassandra.write.KnowledgeGraph')
|
||||
async def test_legacy_mode_uses_single_table(self, mock_trustgraph):
|
||||
"""Test that legacy mode still works with single table"""
|
||||
taskgroup_mock = MagicMock()
|
||||
mock_tg_instance = MagicMock()
|
||||
mock_trustgraph.return_value = mock_tg_instance
|
||||
|
||||
with patch.dict('os.environ', {'CASSANDRA_USE_LEGACY': 'true'}):
|
||||
processor = Processor(taskgroup=taskgroup_mock)
|
||||
|
||||
mock_message = MagicMock()
|
||||
mock_message.metadata.user = 'user1'
|
||||
mock_message.metadata.collection = 'collection1'
|
||||
mock_message.triples = []
|
||||
|
||||
await processor.store_triples(mock_message)
|
||||
|
||||
# Verify KnowledgeGraph instance uses legacy mode
|
||||
kg_instance = mock_trustgraph.return_value
|
||||
assert kg_instance is not None
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@patch('trustgraph.storage.triples.cassandra.write.KnowledgeGraph')
|
||||
async def test_optimized_mode_uses_multi_table(self, mock_trustgraph):
|
||||
"""Test that optimized mode uses multi-table schema"""
|
||||
taskgroup_mock = MagicMock()
|
||||
mock_tg_instance = MagicMock()
|
||||
mock_trustgraph.return_value = mock_tg_instance
|
||||
|
||||
with patch.dict('os.environ', {'CASSANDRA_USE_LEGACY': 'false'}):
|
||||
processor = Processor(taskgroup=taskgroup_mock)
|
||||
|
||||
mock_message = MagicMock()
|
||||
mock_message.metadata.user = 'user1'
|
||||
mock_message.metadata.collection = 'collection1'
|
||||
mock_message.triples = []
|
||||
|
||||
await processor.store_triples(mock_message)
|
||||
|
||||
# Verify KnowledgeGraph instance is in optimized mode
|
||||
kg_instance = mock_trustgraph.return_value
|
||||
assert kg_instance is not None
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@patch('trustgraph.storage.triples.cassandra.write.KnowledgeGraph')
|
||||
async def test_batch_write_consistency(self, mock_trustgraph):
|
||||
"""Test that all tables stay consistent during batch writes"""
|
||||
taskgroup_mock = MagicMock()
|
||||
mock_tg_instance = MagicMock()
|
||||
mock_trustgraph.return_value = mock_tg_instance
|
||||
|
||||
processor = Processor(taskgroup=taskgroup_mock)
|
||||
|
||||
# Create test triple
|
||||
triple = MagicMock()
|
||||
triple.s.value = 'test_subject'
|
||||
triple.p.value = 'test_predicate'
|
||||
triple.o.value = 'test_object'
|
||||
|
||||
mock_message = MagicMock()
|
||||
mock_message.metadata.user = 'user1'
|
||||
mock_message.metadata.collection = 'collection1'
|
||||
mock_message.triples = [triple]
|
||||
|
||||
await processor.store_triples(mock_message)
|
||||
|
||||
# Verify insert was called for the triple (implementation details tested in KnowledgeGraph)
|
||||
mock_tg_instance.insert.assert_called_once_with(
|
||||
'collection1', 'test_subject', 'test_predicate', 'test_object'
|
||||
)
|
||||
|
||||
def test_environment_variable_controls_mode(self):
|
||||
"""Test that CASSANDRA_USE_LEGACY environment variable controls operation mode"""
|
||||
taskgroup_mock = MagicMock()
|
||||
|
||||
# Test legacy mode
|
||||
with patch.dict('os.environ', {'CASSANDRA_USE_LEGACY': 'true'}):
|
||||
processor = Processor(taskgroup=taskgroup_mock)
|
||||
# Mode is determined in KnowledgeGraph initialization
|
||||
|
||||
# Test optimized mode
|
||||
with patch.dict('os.environ', {'CASSANDRA_USE_LEGACY': 'false'}):
|
||||
processor = Processor(taskgroup=taskgroup_mock)
|
||||
# Mode is determined in KnowledgeGraph initialization
|
||||
|
||||
# Test default mode (optimized when env var not set)
|
||||
with patch.dict('os.environ', {}, clear=True):
|
||||
processor = Processor(taskgroup=taskgroup_mock)
|
||||
# Mode is determined in KnowledgeGraph initialization
|
||||
|
|
@ -86,15 +86,17 @@ class TestFalkorDBStorageProcessor:
|
|||
mock_result = MagicMock()
|
||||
mock_result.nodes_created = 1
|
||||
mock_result.run_time_ms = 10
|
||||
|
||||
|
||||
processor.io.query.return_value = mock_result
|
||||
|
||||
processor.create_node(test_uri)
|
||||
|
||||
|
||||
processor.create_node(test_uri, 'test_user', 'test_collection')
|
||||
|
||||
processor.io.query.assert_called_once_with(
|
||||
"MERGE (n:Node {uri: $uri})",
|
||||
"MERGE (n:Node {uri: $uri, user: $user, collection: $collection})",
|
||||
params={
|
||||
"uri": test_uri,
|
||||
"user": 'test_user',
|
||||
"collection": 'test_collection',
|
||||
},
|
||||
)
|
||||
|
||||
|
|
@ -104,15 +106,17 @@ class TestFalkorDBStorageProcessor:
|
|||
mock_result = MagicMock()
|
||||
mock_result.nodes_created = 1
|
||||
mock_result.run_time_ms = 10
|
||||
|
||||
|
||||
processor.io.query.return_value = mock_result
|
||||
|
||||
processor.create_literal(test_value)
|
||||
|
||||
|
||||
processor.create_literal(test_value, 'test_user', 'test_collection')
|
||||
|
||||
processor.io.query.assert_called_once_with(
|
||||
"MERGE (n:Literal {value: $value})",
|
||||
"MERGE (n:Literal {value: $value, user: $user, collection: $collection})",
|
||||
params={
|
||||
"value": test_value,
|
||||
"user": 'test_user',
|
||||
"collection": 'test_collection',
|
||||
},
|
||||
)
|
||||
|
||||
|
|
@ -121,23 +125,25 @@ class TestFalkorDBStorageProcessor:
|
|||
src_uri = 'http://example.com/src'
|
||||
pred_uri = 'http://example.com/pred'
|
||||
dest_uri = 'http://example.com/dest'
|
||||
|
||||
|
||||
mock_result = MagicMock()
|
||||
mock_result.nodes_created = 0
|
||||
mock_result.run_time_ms = 5
|
||||
|
||||
|
||||
processor.io.query.return_value = mock_result
|
||||
|
||||
processor.relate_node(src_uri, pred_uri, dest_uri)
|
||||
|
||||
|
||||
processor.relate_node(src_uri, pred_uri, dest_uri, 'test_user', 'test_collection')
|
||||
|
||||
processor.io.query.assert_called_once_with(
|
||||
"MATCH (src:Node {uri: $src}) "
|
||||
"MATCH (dest:Node {uri: $dest}) "
|
||||
"MERGE (src)-[:Rel {uri: $uri}]->(dest)",
|
||||
"MATCH (src:Node {uri: $src, user: $user, collection: $collection}) "
|
||||
"MATCH (dest:Node {uri: $dest, user: $user, collection: $collection}) "
|
||||
"MERGE (src)-[:Rel {uri: $uri, user: $user, collection: $collection}]->(dest)",
|
||||
params={
|
||||
"src": src_uri,
|
||||
"dest": dest_uri,
|
||||
"uri": pred_uri,
|
||||
"user": 'test_user',
|
||||
"collection": 'test_collection',
|
||||
},
|
||||
)
|
||||
|
||||
|
|
@ -146,23 +152,25 @@ class TestFalkorDBStorageProcessor:
|
|||
src_uri = 'http://example.com/src'
|
||||
pred_uri = 'http://example.com/pred'
|
||||
literal_value = 'literal destination'
|
||||
|
||||
|
||||
mock_result = MagicMock()
|
||||
mock_result.nodes_created = 0
|
||||
mock_result.run_time_ms = 5
|
||||
|
||||
|
||||
processor.io.query.return_value = mock_result
|
||||
|
||||
processor.relate_literal(src_uri, pred_uri, literal_value)
|
||||
|
||||
|
||||
processor.relate_literal(src_uri, pred_uri, literal_value, 'test_user', 'test_collection')
|
||||
|
||||
processor.io.query.assert_called_once_with(
|
||||
"MATCH (src:Node {uri: $src}) "
|
||||
"MATCH (dest:Literal {value: $dest}) "
|
||||
"MERGE (src)-[:Rel {uri: $uri}]->(dest)",
|
||||
"MATCH (src:Node {uri: $src, user: $user, collection: $collection}) "
|
||||
"MATCH (dest:Literal {value: $dest, user: $user, collection: $collection}) "
|
||||
"MERGE (src)-[:Rel {uri: $uri, user: $user, collection: $collection}]->(dest)",
|
||||
params={
|
||||
"src": src_uri,
|
||||
"dest": literal_value,
|
||||
"uri": pred_uri,
|
||||
"user": 'test_user',
|
||||
"collection": 'test_collection',
|
||||
},
|
||||
)
|
||||
|
||||
|
|
@ -191,14 +199,16 @@ class TestFalkorDBStorageProcessor:
|
|||
# Verify queries were called in the correct order
|
||||
expected_calls = [
|
||||
# Create subject node
|
||||
(("MERGE (n:Node {uri: $uri})",), {"params": {"uri": "http://example.com/subject"}}),
|
||||
(("MERGE (n:Node {uri: $uri, user: $user, collection: $collection})",),
|
||||
{"params": {"uri": "http://example.com/subject", "user": "test_user", "collection": "test_collection"}}),
|
||||
# Create object node
|
||||
(("MERGE (n:Node {uri: $uri})",), {"params": {"uri": "http://example.com/object"}}),
|
||||
(("MERGE (n:Node {uri: $uri, user: $user, collection: $collection})",),
|
||||
{"params": {"uri": "http://example.com/object", "user": "test_user", "collection": "test_collection"}}),
|
||||
# Create relationship
|
||||
(("MATCH (src:Node {uri: $src}) "
|
||||
"MATCH (dest:Node {uri: $dest}) "
|
||||
"MERGE (src)-[:Rel {uri: $uri}]->(dest)",),
|
||||
{"params": {"src": "http://example.com/subject", "dest": "http://example.com/object", "uri": "http://example.com/predicate"}}),
|
||||
(("MATCH (src:Node {uri: $src, user: $user, collection: $collection}) "
|
||||
"MATCH (dest:Node {uri: $dest, user: $user, collection: $collection}) "
|
||||
"MERGE (src)-[:Rel {uri: $uri, user: $user, collection: $collection}]->(dest)",),
|
||||
{"params": {"src": "http://example.com/subject", "dest": "http://example.com/object", "uri": "http://example.com/predicate", "user": "test_user", "collection": "test_collection"}}),
|
||||
]
|
||||
|
||||
assert processor.io.query.call_count == 3
|
||||
|
|
@ -220,14 +230,16 @@ class TestFalkorDBStorageProcessor:
|
|||
# Verify queries were called in the correct order
|
||||
expected_calls = [
|
||||
# Create subject node
|
||||
(("MERGE (n:Node {uri: $uri})",), {"params": {"uri": "http://example.com/subject"}}),
|
||||
(("MERGE (n:Node {uri: $uri, user: $user, collection: $collection})",),
|
||||
{"params": {"uri": "http://example.com/subject", "user": "test_user", "collection": "test_collection"}}),
|
||||
# Create literal object
|
||||
(("MERGE (n:Literal {value: $value})",), {"params": {"value": "literal object"}}),
|
||||
(("MERGE (n:Literal {value: $value, user: $user, collection: $collection})",),
|
||||
{"params": {"value": "literal object", "user": "test_user", "collection": "test_collection"}}),
|
||||
# Create relationship
|
||||
(("MATCH (src:Node {uri: $src}) "
|
||||
"MATCH (dest:Literal {value: $dest}) "
|
||||
"MERGE (src)-[:Rel {uri: $uri}]->(dest)",),
|
||||
{"params": {"src": "http://example.com/subject", "dest": "literal object", "uri": "http://example.com/predicate"}}),
|
||||
(("MATCH (src:Node {uri: $src, user: $user, collection: $collection}) "
|
||||
"MATCH (dest:Literal {value: $dest, user: $user, collection: $collection}) "
|
||||
"MERGE (src)-[:Rel {uri: $uri, user: $user, collection: $collection}]->(dest)",),
|
||||
{"params": {"src": "http://example.com/subject", "dest": "literal object", "uri": "http://example.com/predicate", "user": "test_user", "collection": "test_collection"}}),
|
||||
]
|
||||
|
||||
assert processor.io.query.call_count == 3
|
||||
|
|
@ -408,12 +420,14 @@ class TestFalkorDBStorageProcessor:
|
|||
|
||||
processor.io.query.return_value = mock_result
|
||||
|
||||
processor.create_node(test_uri)
|
||||
processor.create_node(test_uri, 'test_user', 'test_collection')
|
||||
|
||||
processor.io.query.assert_called_once_with(
|
||||
"MERGE (n:Node {uri: $uri})",
|
||||
"MERGE (n:Node {uri: $uri, user: $user, collection: $collection})",
|
||||
params={
|
||||
"uri": test_uri,
|
||||
"user": 'test_user',
|
||||
"collection": 'test_collection',
|
||||
},
|
||||
)
|
||||
|
||||
|
|
@ -426,11 +440,13 @@ class TestFalkorDBStorageProcessor:
|
|||
|
||||
processor.io.query.return_value = mock_result
|
||||
|
||||
processor.create_literal(test_value)
|
||||
processor.create_literal(test_value, 'test_user', 'test_collection')
|
||||
|
||||
processor.io.query.assert_called_once_with(
|
||||
"MERGE (n:Literal {value: $value})",
|
||||
"MERGE (n:Literal {value: $value, user: $user, collection: $collection})",
|
||||
params={
|
||||
"value": test_value,
|
||||
"user": 'test_user',
|
||||
"collection": 'test_collection',
|
||||
},
|
||||
)
|
||||
|
|
@ -99,12 +99,16 @@ class TestMemgraphStorageProcessor:
|
|||
|
||||
processor = Processor(taskgroup=taskgroup_mock)
|
||||
|
||||
# Verify index creation calls
|
||||
# Verify index creation calls (now includes user/collection indexes)
|
||||
expected_calls = [
|
||||
"CREATE INDEX ON :Node",
|
||||
"CREATE INDEX ON :Node(uri)",
|
||||
"CREATE INDEX ON :Literal",
|
||||
"CREATE INDEX ON :Literal(value)"
|
||||
"CREATE INDEX ON :Literal(value)",
|
||||
"CREATE INDEX ON :Node(user)",
|
||||
"CREATE INDEX ON :Node(collection)",
|
||||
"CREATE INDEX ON :Literal(user)",
|
||||
"CREATE INDEX ON :Literal(collection)"
|
||||
]
|
||||
|
||||
assert mock_session.run.call_count == len(expected_calls)
|
||||
|
|
@ -127,8 +131,8 @@ class TestMemgraphStorageProcessor:
|
|||
# Should not raise an exception
|
||||
processor = Processor(taskgroup=taskgroup_mock)
|
||||
|
||||
# Verify all index creation calls were attempted
|
||||
assert mock_session.run.call_count == 4
|
||||
# Verify all index creation calls were attempted (8 total)
|
||||
assert mock_session.run.call_count == 8
|
||||
|
||||
def test_create_node(self, processor):
|
||||
"""Test node creation"""
|
||||
|
|
@ -141,11 +145,13 @@ class TestMemgraphStorageProcessor:
|
|||
|
||||
processor.io.execute_query.return_value = mock_result
|
||||
|
||||
processor.create_node(test_uri)
|
||||
processor.create_node(test_uri, "test_user", "test_collection")
|
||||
|
||||
processor.io.execute_query.assert_called_once_with(
|
||||
"MERGE (n:Node {uri: $uri})",
|
||||
"MERGE (n:Node {uri: $uri, user: $user, collection: $collection})",
|
||||
uri=test_uri,
|
||||
user="test_user",
|
||||
collection="test_collection",
|
||||
database_=processor.db
|
||||
)
|
||||
|
||||
|
|
@ -160,11 +166,13 @@ class TestMemgraphStorageProcessor:
|
|||
|
||||
processor.io.execute_query.return_value = mock_result
|
||||
|
||||
processor.create_literal(test_value)
|
||||
processor.create_literal(test_value, "test_user", "test_collection")
|
||||
|
||||
processor.io.execute_query.assert_called_once_with(
|
||||
"MERGE (n:Literal {value: $value})",
|
||||
"MERGE (n:Literal {value: $value, user: $user, collection: $collection})",
|
||||
value=test_value,
|
||||
user="test_user",
|
||||
collection="test_collection",
|
||||
database_=processor.db
|
||||
)
|
||||
|
||||
|
|
@ -182,13 +190,14 @@ class TestMemgraphStorageProcessor:
|
|||
|
||||
processor.io.execute_query.return_value = mock_result
|
||||
|
||||
processor.relate_node(src_uri, pred_uri, dest_uri)
|
||||
processor.relate_node(src_uri, pred_uri, dest_uri, "test_user", "test_collection")
|
||||
|
||||
processor.io.execute_query.assert_called_once_with(
|
||||
"MATCH (src:Node {uri: $src}) "
|
||||
"MATCH (dest:Node {uri: $dest}) "
|
||||
"MERGE (src)-[:Rel {uri: $uri}]->(dest)",
|
||||
"MATCH (src:Node {uri: $src, user: $user, collection: $collection}) "
|
||||
"MATCH (dest:Node {uri: $dest, user: $user, collection: $collection}) "
|
||||
"MERGE (src)-[:Rel {uri: $uri, user: $user, collection: $collection}]->(dest)",
|
||||
src=src_uri, dest=dest_uri, uri=pred_uri,
|
||||
user="test_user", collection="test_collection",
|
||||
database_=processor.db
|
||||
)
|
||||
|
||||
|
|
@ -206,13 +215,14 @@ class TestMemgraphStorageProcessor:
|
|||
|
||||
processor.io.execute_query.return_value = mock_result
|
||||
|
||||
processor.relate_literal(src_uri, pred_uri, literal_value)
|
||||
processor.relate_literal(src_uri, pred_uri, literal_value, "test_user", "test_collection")
|
||||
|
||||
processor.io.execute_query.assert_called_once_with(
|
||||
"MATCH (src:Node {uri: $src}) "
|
||||
"MATCH (dest:Literal {value: $dest}) "
|
||||
"MERGE (src)-[:Rel {uri: $uri}]->(dest)",
|
||||
"MATCH (src:Node {uri: $src, user: $user, collection: $collection}) "
|
||||
"MATCH (dest:Literal {value: $dest, user: $user, collection: $collection}) "
|
||||
"MERGE (src)-[:Rel {uri: $uri, user: $user, collection: $collection}]->(dest)",
|
||||
src=src_uri, dest=literal_value, uri=pred_uri,
|
||||
user="test_user", collection="test_collection",
|
||||
database_=processor.db
|
||||
)
|
||||
|
||||
|
|
@ -226,19 +236,22 @@ class TestMemgraphStorageProcessor:
|
|||
o=Value(value='http://example.com/object', is_uri=True)
|
||||
)
|
||||
|
||||
processor.create_triple(mock_tx, triple)
|
||||
processor.create_triple(mock_tx, triple, "test_user", "test_collection")
|
||||
|
||||
# Verify transaction calls
|
||||
expected_calls = [
|
||||
# Create subject node
|
||||
("MERGE (n:Node {uri: $uri})", {'uri': 'http://example.com/subject'}),
|
||||
("MERGE (n:Node {uri: $uri, user: $user, collection: $collection})",
|
||||
{'uri': 'http://example.com/subject', 'user': 'test_user', 'collection': 'test_collection'}),
|
||||
# Create object node
|
||||
("MERGE (n:Node {uri: $uri})", {'uri': 'http://example.com/object'}),
|
||||
("MERGE (n:Node {uri: $uri, user: $user, collection: $collection})",
|
||||
{'uri': 'http://example.com/object', 'user': 'test_user', 'collection': 'test_collection'}),
|
||||
# Create relationship
|
||||
("MATCH (src:Node {uri: $src}) "
|
||||
"MATCH (dest:Node {uri: $dest}) "
|
||||
"MERGE (src)-[:Rel {uri: $uri}]->(dest)",
|
||||
{'src': 'http://example.com/subject', 'dest': 'http://example.com/object', 'uri': 'http://example.com/predicate'})
|
||||
("MATCH (src:Node {uri: $src, user: $user, collection: $collection}) "
|
||||
"MATCH (dest:Node {uri: $dest, user: $user, collection: $collection}) "
|
||||
"MERGE (src)-[:Rel {uri: $uri, user: $user, collection: $collection}]->(dest)",
|
||||
{'src': 'http://example.com/subject', 'dest': 'http://example.com/object', 'uri': 'http://example.com/predicate',
|
||||
'user': 'test_user', 'collection': 'test_collection'})
|
||||
]
|
||||
|
||||
assert mock_tx.run.call_count == 3
|
||||
|
|
@ -257,19 +270,22 @@ class TestMemgraphStorageProcessor:
|
|||
o=Value(value='literal object', is_uri=False)
|
||||
)
|
||||
|
||||
processor.create_triple(mock_tx, triple)
|
||||
processor.create_triple(mock_tx, triple, "test_user", "test_collection")
|
||||
|
||||
# Verify transaction calls
|
||||
expected_calls = [
|
||||
# Create subject node
|
||||
("MERGE (n:Node {uri: $uri})", {'uri': 'http://example.com/subject'}),
|
||||
("MERGE (n:Node {uri: $uri, user: $user, collection: $collection})",
|
||||
{'uri': 'http://example.com/subject', 'user': 'test_user', 'collection': 'test_collection'}),
|
||||
# Create literal object
|
||||
("MERGE (n:Literal {value: $value})", {'value': 'literal object'}),
|
||||
("MERGE (n:Literal {value: $value, user: $user, collection: $collection})",
|
||||
{'value': 'literal object', 'user': 'test_user', 'collection': 'test_collection'}),
|
||||
# Create relationship
|
||||
("MATCH (src:Node {uri: $src}) "
|
||||
"MATCH (dest:Literal {value: $dest}) "
|
||||
"MERGE (src)-[:Rel {uri: $uri}]->(dest)",
|
||||
{'src': 'http://example.com/subject', 'dest': 'literal object', 'uri': 'http://example.com/predicate'})
|
||||
("MATCH (src:Node {uri: $src, user: $user, collection: $collection}) "
|
||||
"MATCH (dest:Literal {value: $dest, user: $user, collection: $collection}) "
|
||||
"MERGE (src)-[:Rel {uri: $uri, user: $user, collection: $collection}]->(dest)",
|
||||
{'src': 'http://example.com/subject', 'dest': 'literal object', 'uri': 'http://example.com/predicate',
|
||||
'user': 'test_user', 'collection': 'test_collection'})
|
||||
]
|
||||
|
||||
assert mock_tx.run.call_count == 3
|
||||
|
|
@ -281,33 +297,42 @@ class TestMemgraphStorageProcessor:
|
|||
@pytest.mark.asyncio
|
||||
async def test_store_triples_single_triple(self, processor, mock_message):
|
||||
"""Test storing a single triple"""
|
||||
mock_session = MagicMock()
|
||||
processor.io.session.return_value.__enter__.return_value = mock_session
|
||||
# Mock the execute_query method used by the direct methods
|
||||
mock_result = MagicMock()
|
||||
mock_summary = MagicMock()
|
||||
mock_summary.counters.nodes_created = 1
|
||||
mock_summary.result_available_after = 10
|
||||
mock_result.summary = mock_summary
|
||||
processor.io.execute_query.return_value = mock_result
|
||||
|
||||
# Reset the mock to clear the initialization call
|
||||
processor.io.session.reset_mock()
|
||||
# Reset the mock to clear initialization calls
|
||||
processor.io.execute_query.reset_mock()
|
||||
|
||||
await processor.store_triples(mock_message)
|
||||
|
||||
# Verify session was created with correct database
|
||||
processor.io.session.assert_called_once_with(database=processor.db)
|
||||
# Verify execute_query was called for create_node, create_literal, and relate_literal
|
||||
# (since mock_message has a literal object)
|
||||
assert processor.io.execute_query.call_count == 3
|
||||
|
||||
# Verify execute_write was called once per triple
|
||||
mock_session.execute_write.assert_called_once()
|
||||
|
||||
# Verify the triple was passed to create_triple
|
||||
call_args = mock_session.execute_write.call_args
|
||||
assert call_args[0][0] == processor.create_triple
|
||||
assert call_args[0][1] == mock_message.triples[0]
|
||||
# Verify user/collection parameters were included
|
||||
for call in processor.io.execute_query.call_args_list:
|
||||
call_kwargs = call.kwargs if hasattr(call, 'kwargs') else call[1]
|
||||
assert 'user' in call_kwargs
|
||||
assert 'collection' in call_kwargs
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_store_triples_multiple_triples(self, processor):
|
||||
"""Test storing multiple triples"""
|
||||
mock_session = MagicMock()
|
||||
processor.io.session.return_value.__enter__.return_value = mock_session
|
||||
# Mock the execute_query method used by the direct methods
|
||||
mock_result = MagicMock()
|
||||
mock_summary = MagicMock()
|
||||
mock_summary.counters.nodes_created = 1
|
||||
mock_summary.result_available_after = 10
|
||||
mock_result.summary = mock_summary
|
||||
processor.io.execute_query.return_value = mock_result
|
||||
|
||||
# Reset the mock to clear the initialization call
|
||||
processor.io.session.reset_mock()
|
||||
# Reset the mock to clear initialization calls
|
||||
processor.io.execute_query.reset_mock()
|
||||
|
||||
# Create message with multiple triples
|
||||
message = MagicMock()
|
||||
|
|
@ -329,16 +354,17 @@ class TestMemgraphStorageProcessor:
|
|||
|
||||
await processor.store_triples(message)
|
||||
|
||||
# Verify session was called twice (once per triple)
|
||||
assert processor.io.session.call_count == 2
|
||||
# Verify execute_query was called:
|
||||
# Triple1: create_node(s) + create_literal(o) + relate_literal = 3 calls
|
||||
# Triple2: create_node(s) + create_node(o) + relate_node = 3 calls
|
||||
# Total: 6 calls
|
||||
assert processor.io.execute_query.call_count == 6
|
||||
|
||||
# Verify execute_write was called once per triple
|
||||
assert mock_session.execute_write.call_count == 2
|
||||
|
||||
# Verify each triple was processed
|
||||
call_args_list = mock_session.execute_write.call_args_list
|
||||
assert call_args_list[0][0][1] == triple1
|
||||
assert call_args_list[1][0][1] == triple2
|
||||
# Verify user/collection parameters were included in all calls
|
||||
for call in processor.io.execute_query.call_args_list:
|
||||
call_kwargs = call.kwargs if hasattr(call, 'kwargs') else call[1]
|
||||
assert call_kwargs['user'] == 'test_user'
|
||||
assert call_kwargs['collection'] == 'test_collection'
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_store_triples_empty_list(self, processor):
|
||||
|
|
|
|||
|
|
@ -62,14 +62,18 @@ class TestNeo4jStorageProcessor:
|
|||
|
||||
processor = Processor(taskgroup=taskgroup_mock)
|
||||
|
||||
# Verify index creation queries were executed
|
||||
# Verify index creation queries were executed (now includes 7 indexes)
|
||||
expected_calls = [
|
||||
"CREATE INDEX Node_uri FOR (n:Node) ON (n.uri)",
|
||||
"CREATE INDEX Literal_value FOR (n:Literal) ON (n.value)",
|
||||
"CREATE INDEX Rel_uri FOR ()-[r:Rel]-() ON (r.uri)"
|
||||
"CREATE INDEX Rel_uri FOR ()-[r:Rel]-() ON (r.uri)",
|
||||
"CREATE INDEX node_user_collection_uri FOR (n:Node) ON (n.user, n.collection, n.uri)",
|
||||
"CREATE INDEX literal_user_collection_value FOR (n:Literal) ON (n.user, n.collection, n.value)",
|
||||
"CREATE INDEX rel_user FOR ()-[r:Rel]-() ON (r.user)",
|
||||
"CREATE INDEX rel_collection FOR ()-[r:Rel]-() ON (r.collection)"
|
||||
]
|
||||
|
||||
assert mock_session.run.call_count == 3
|
||||
assert mock_session.run.call_count == 7
|
||||
for expected_query in expected_calls:
|
||||
mock_session.run.assert_any_call(expected_query)
|
||||
|
||||
|
|
@ -88,8 +92,8 @@ class TestNeo4jStorageProcessor:
|
|||
# Should not raise exception - they should be caught and ignored
|
||||
processor = Processor(taskgroup=taskgroup_mock)
|
||||
|
||||
# Should have tried to create all 3 indexes despite exceptions
|
||||
assert mock_session.run.call_count == 3
|
||||
# Should have tried to create all 7 indexes despite exceptions
|
||||
assert mock_session.run.call_count == 7
|
||||
|
||||
@patch('trustgraph.storage.triples.neo4j.write.GraphDatabase')
|
||||
def test_create_node(self, mock_graph_db):
|
||||
|
|
@ -111,11 +115,13 @@ class TestNeo4jStorageProcessor:
|
|||
processor = Processor(taskgroup=taskgroup_mock)
|
||||
|
||||
# Test create_node
|
||||
processor.create_node("http://example.com/node")
|
||||
processor.create_node("http://example.com/node", "test_user", "test_collection")
|
||||
|
||||
mock_driver.execute_query.assert_called_with(
|
||||
"MERGE (n:Node {uri: $uri})",
|
||||
"MERGE (n:Node {uri: $uri, user: $user, collection: $collection})",
|
||||
uri="http://example.com/node",
|
||||
user="test_user",
|
||||
collection="test_collection",
|
||||
database_="neo4j"
|
||||
)
|
||||
|
||||
|
|
@ -139,11 +145,13 @@ class TestNeo4jStorageProcessor:
|
|||
processor = Processor(taskgroup=taskgroup_mock)
|
||||
|
||||
# Test create_literal
|
||||
processor.create_literal("literal value")
|
||||
processor.create_literal("literal value", "test_user", "test_collection")
|
||||
|
||||
mock_driver.execute_query.assert_called_with(
|
||||
"MERGE (n:Literal {value: $value})",
|
||||
"MERGE (n:Literal {value: $value, user: $user, collection: $collection})",
|
||||
value="literal value",
|
||||
user="test_user",
|
||||
collection="test_collection",
|
||||
database_="neo4j"
|
||||
)
|
||||
|
||||
|
|
@ -170,16 +178,20 @@ class TestNeo4jStorageProcessor:
|
|||
processor.relate_node(
|
||||
"http://example.com/subject",
|
||||
"http://example.com/predicate",
|
||||
"http://example.com/object"
|
||||
"http://example.com/object",
|
||||
"test_user",
|
||||
"test_collection"
|
||||
)
|
||||
|
||||
mock_driver.execute_query.assert_called_with(
|
||||
"MATCH (src:Node {uri: $src}) "
|
||||
"MATCH (dest:Node {uri: $dest}) "
|
||||
"MERGE (src)-[:Rel {uri: $uri}]->(dest)",
|
||||
"MATCH (src:Node {uri: $src, user: $user, collection: $collection}) "
|
||||
"MATCH (dest:Node {uri: $dest, user: $user, collection: $collection}) "
|
||||
"MERGE (src)-[:Rel {uri: $uri, user: $user, collection: $collection}]->(dest)",
|
||||
src="http://example.com/subject",
|
||||
dest="http://example.com/object",
|
||||
uri="http://example.com/predicate",
|
||||
user="test_user",
|
||||
collection="test_collection",
|
||||
database_="neo4j"
|
||||
)
|
||||
|
||||
|
|
@ -206,16 +218,20 @@ class TestNeo4jStorageProcessor:
|
|||
processor.relate_literal(
|
||||
"http://example.com/subject",
|
||||
"http://example.com/predicate",
|
||||
"literal value"
|
||||
"literal value",
|
||||
"test_user",
|
||||
"test_collection"
|
||||
)
|
||||
|
||||
mock_driver.execute_query.assert_called_with(
|
||||
"MATCH (src:Node {uri: $src}) "
|
||||
"MATCH (dest:Literal {value: $dest}) "
|
||||
"MERGE (src)-[:Rel {uri: $uri}]->(dest)",
|
||||
"MATCH (src:Node {uri: $src, user: $user, collection: $collection}) "
|
||||
"MATCH (dest:Literal {value: $dest, user: $user, collection: $collection}) "
|
||||
"MERGE (src)-[:Rel {uri: $uri, user: $user, collection: $collection}]->(dest)",
|
||||
src="http://example.com/subject",
|
||||
dest="literal value",
|
||||
uri="http://example.com/predicate",
|
||||
user="test_user",
|
||||
collection="test_collection",
|
||||
database_="neo4j"
|
||||
)
|
||||
|
||||
|
|
@ -246,9 +262,11 @@ class TestNeo4jStorageProcessor:
|
|||
triple.o.value = "http://example.com/object"
|
||||
triple.o.is_uri = True
|
||||
|
||||
# Create mock message
|
||||
# Create mock message with metadata
|
||||
mock_message = MagicMock()
|
||||
mock_message.triples = [triple]
|
||||
mock_message.metadata.user = "test_user"
|
||||
mock_message.metadata.collection = "test_collection"
|
||||
|
||||
await processor.store_triples(mock_message)
|
||||
|
||||
|
|
@ -257,23 +275,25 @@ class TestNeo4jStorageProcessor:
|
|||
expected_calls = [
|
||||
# Subject node creation
|
||||
(
|
||||
"MERGE (n:Node {uri: $uri})",
|
||||
{"uri": "http://example.com/subject", "database_": "neo4j"}
|
||||
"MERGE (n:Node {uri: $uri, user: $user, collection: $collection})",
|
||||
{"uri": "http://example.com/subject", "user": "test_user", "collection": "test_collection", "database_": "neo4j"}
|
||||
),
|
||||
# Object node creation
|
||||
(
|
||||
"MERGE (n:Node {uri: $uri})",
|
||||
{"uri": "http://example.com/object", "database_": "neo4j"}
|
||||
"MERGE (n:Node {uri: $uri, user: $user, collection: $collection})",
|
||||
{"uri": "http://example.com/object", "user": "test_user", "collection": "test_collection", "database_": "neo4j"}
|
||||
),
|
||||
# Relationship creation
|
||||
(
|
||||
"MATCH (src:Node {uri: $src}) "
|
||||
"MATCH (dest:Node {uri: $dest}) "
|
||||
"MERGE (src)-[:Rel {uri: $uri}]->(dest)",
|
||||
"MATCH (src:Node {uri: $src, user: $user, collection: $collection}) "
|
||||
"MATCH (dest:Node {uri: $dest, user: $user, collection: $collection}) "
|
||||
"MERGE (src)-[:Rel {uri: $uri, user: $user, collection: $collection}]->(dest)",
|
||||
{
|
||||
"src": "http://example.com/subject",
|
||||
"dest": "http://example.com/object",
|
||||
"uri": "http://example.com/predicate",
|
||||
"user": "test_user",
|
||||
"collection": "test_collection",
|
||||
"database_": "neo4j"
|
||||
}
|
||||
)
|
||||
|
|
@ -310,9 +330,11 @@ class TestNeo4jStorageProcessor:
|
|||
triple.o.value = "literal value"
|
||||
triple.o.is_uri = False
|
||||
|
||||
# Create mock message
|
||||
# Create mock message with metadata
|
||||
mock_message = MagicMock()
|
||||
mock_message.triples = [triple]
|
||||
mock_message.metadata.user = "test_user"
|
||||
mock_message.metadata.collection = "test_collection"
|
||||
|
||||
await processor.store_triples(mock_message)
|
||||
|
||||
|
|
@ -322,23 +344,25 @@ class TestNeo4jStorageProcessor:
|
|||
expected_calls = [
|
||||
# Subject node creation
|
||||
(
|
||||
"MERGE (n:Node {uri: $uri})",
|
||||
{"uri": "http://example.com/subject", "database_": "neo4j"}
|
||||
"MERGE (n:Node {uri: $uri, user: $user, collection: $collection})",
|
||||
{"uri": "http://example.com/subject", "user": "test_user", "collection": "test_collection", "database_": "neo4j"}
|
||||
),
|
||||
# Literal creation
|
||||
(
|
||||
"MERGE (n:Literal {value: $value})",
|
||||
{"value": "literal value", "database_": "neo4j"}
|
||||
"MERGE (n:Literal {value: $value, user: $user, collection: $collection})",
|
||||
{"value": "literal value", "user": "test_user", "collection": "test_collection", "database_": "neo4j"}
|
||||
),
|
||||
# Relationship creation
|
||||
(
|
||||
"MATCH (src:Node {uri: $src}) "
|
||||
"MATCH (dest:Literal {value: $dest}) "
|
||||
"MERGE (src)-[:Rel {uri: $uri}]->(dest)",
|
||||
"MATCH (src:Node {uri: $src, user: $user, collection: $collection}) "
|
||||
"MATCH (dest:Literal {value: $dest, user: $user, collection: $collection}) "
|
||||
"MERGE (src)-[:Rel {uri: $uri, user: $user, collection: $collection}]->(dest)",
|
||||
{
|
||||
"src": "http://example.com/subject",
|
||||
"dest": "literal value",
|
||||
"uri": "http://example.com/predicate",
|
||||
"user": "test_user",
|
||||
"collection": "test_collection",
|
||||
"database_": "neo4j"
|
||||
}
|
||||
)
|
||||
|
|
@ -381,9 +405,11 @@ class TestNeo4jStorageProcessor:
|
|||
triple2.o.value = "literal value"
|
||||
triple2.o.is_uri = False
|
||||
|
||||
# Create mock message
|
||||
# Create mock message with metadata
|
||||
mock_message = MagicMock()
|
||||
mock_message.triples = [triple1, triple2]
|
||||
mock_message.metadata.user = "test_user"
|
||||
mock_message.metadata.collection = "test_collection"
|
||||
|
||||
await processor.store_triples(mock_message)
|
||||
|
||||
|
|
@ -405,9 +431,11 @@ class TestNeo4jStorageProcessor:
|
|||
|
||||
processor = Processor(taskgroup=taskgroup_mock)
|
||||
|
||||
# Create mock message with empty triples
|
||||
# Create mock message with empty triples and metadata
|
||||
mock_message = MagicMock()
|
||||
mock_message.triples = []
|
||||
mock_message.metadata.user = "test_user"
|
||||
mock_message.metadata.collection = "test_collection"
|
||||
|
||||
await processor.store_triples(mock_message)
|
||||
|
||||
|
|
@ -521,28 +549,36 @@ class TestNeo4jStorageProcessor:
|
|||
|
||||
mock_message = MagicMock()
|
||||
mock_message.triples = [triple]
|
||||
mock_message.metadata.user = "test_user"
|
||||
mock_message.metadata.collection = "test_collection"
|
||||
|
||||
await processor.store_triples(mock_message)
|
||||
|
||||
# Verify the triple was processed with special characters preserved
|
||||
mock_driver.execute_query.assert_any_call(
|
||||
"MERGE (n:Node {uri: $uri})",
|
||||
"MERGE (n:Node {uri: $uri, user: $user, collection: $collection})",
|
||||
uri="http://example.com/subject with spaces",
|
||||
user="test_user",
|
||||
collection="test_collection",
|
||||
database_="neo4j"
|
||||
)
|
||||
|
||||
mock_driver.execute_query.assert_any_call(
|
||||
"MERGE (n:Literal {value: $value})",
|
||||
"MERGE (n:Literal {value: $value, user: $user, collection: $collection})",
|
||||
value='literal with "quotes" and unicode: ñáéíóú',
|
||||
user="test_user",
|
||||
collection="test_collection",
|
||||
database_="neo4j"
|
||||
)
|
||||
|
||||
mock_driver.execute_query.assert_any_call(
|
||||
"MATCH (src:Node {uri: $src}) "
|
||||
"MATCH (dest:Literal {value: $dest}) "
|
||||
"MERGE (src)-[:Rel {uri: $uri}]->(dest)",
|
||||
"MATCH (src:Node {uri: $src, user: $user, collection: $collection}) "
|
||||
"MATCH (dest:Literal {value: $dest, user: $user, collection: $collection}) "
|
||||
"MERGE (src)-[:Rel {uri: $uri, user: $user, collection: $collection}]->(dest)",
|
||||
src="http://example.com/subject with spaces",
|
||||
dest='literal with "quotes" and unicode: ñáéíóú',
|
||||
uri="http://example.com/predicate:with/symbols",
|
||||
user="test_user",
|
||||
collection="test_collection",
|
||||
database_="neo4j"
|
||||
)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue