Collection management (#520)

* Tech spec

* Refactored Cassanda knowledge graph for single table

* Collection management, librarian services to manage metadata and collection deletion
This commit is contained in:
cybermaggedon 2025-09-18 15:57:52 +01:00 committed by GitHub
parent 48016d8fb2
commit 13ff7d765d
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
48 changed files with 2941 additions and 425 deletions

View file

@ -13,163 +13,146 @@ class TestMilvusCollectionNaming:
"""Test basic collection name creation"""
result = make_safe_collection_name(
user="test_user",
collection="test_collection",
dimension=384,
collection="test_collection",
prefix="doc"
)
assert result == "doc_test_user_test_collection_384"
assert result == "doc_test_user_test_collection"
def test_make_safe_collection_name_with_special_characters(self):
"""Test collection name creation with special characters that need sanitization"""
result = make_safe_collection_name(
user="user@domain.com",
collection="test-collection.v2",
dimension=768,
prefix="entity"
)
assert result == "entity_user_domain_com_test_collection_v2_768"
assert result == "entity_user_domain_com_test_collection_v2"
def test_make_safe_collection_name_with_unicode(self):
"""Test collection name creation with Unicode characters"""
result = make_safe_collection_name(
user="测试用户",
collection="colección_española",
dimension=512,
collection="colección_española",
prefix="doc"
)
assert result == "doc_default_colecci_n_espa_ola_512"
assert result == "doc_default_colecci_n_espa_ola"
def test_make_safe_collection_name_with_spaces(self):
"""Test collection name creation with spaces"""
result = make_safe_collection_name(
user="test user",
collection="my test collection",
dimension=256,
prefix="entity"
)
assert result == "entity_test_user_my_test_collection_256"
assert result == "entity_test_user_my_test_collection"
def test_make_safe_collection_name_with_multiple_consecutive_special_chars(self):
"""Test collection name creation with multiple consecutive special characters"""
result = make_safe_collection_name(
user="user@@@domain!!!",
collection="test---collection...v2",
dimension=384,
prefix="doc"
prefix="doc"
)
assert result == "doc_user_domain_test_collection_v2_384"
assert result == "doc_user_domain_test_collection_v2"
def test_make_safe_collection_name_with_leading_trailing_underscores(self):
"""Test collection name creation with leading/trailing special characters"""
result = make_safe_collection_name(
user="__test_user__",
collection="@@test_collection##",
dimension=128,
prefix="entity"
)
assert result == "entity_test_user_test_collection_128"
assert result == "entity_test_user_test_collection"
def test_make_safe_collection_name_empty_user(self):
"""Test collection name creation with empty user (should fallback to 'default')"""
result = make_safe_collection_name(
user="",
collection="test_collection",
dimension=384,
prefix="doc"
)
assert result == "doc_default_test_collection_384"
assert result == "doc_default_test_collection"
def test_make_safe_collection_name_empty_collection(self):
"""Test collection name creation with empty collection (should fallback to 'default')"""
result = make_safe_collection_name(
user="test_user",
collection="",
dimension=384,
prefix="doc"
)
assert result == "doc_test_user_default_384"
assert result == "doc_test_user_default"
def test_make_safe_collection_name_both_empty(self):
"""Test collection name creation with both user and collection empty"""
result = make_safe_collection_name(
user="",
collection="",
dimension=384,
prefix="doc"
)
assert result == "doc_default_default_384"
assert result == "doc_default_default"
def test_make_safe_collection_name_only_special_characters(self):
"""Test collection name creation with only special characters (should fallback to 'default')"""
result = make_safe_collection_name(
user="@@@!!!",
collection="---###",
dimension=512,
prefix="entity"
)
assert result == "entity_default_default_512"
assert result == "entity_default_default"
def test_make_safe_collection_name_whitespace_only(self):
"""Test collection name creation with whitespace-only strings"""
result = make_safe_collection_name(
user=" \n\t ",
collection=" \r\n ",
dimension=256,
prefix="doc"
)
assert result == "doc_default_default_256"
assert result == "doc_default_default"
def test_make_safe_collection_name_mixed_valid_invalid_chars(self):
"""Test collection name creation with mixed valid and invalid characters"""
result = make_safe_collection_name(
user="user123@test",
collection="coll_2023.v1",
dimension=384,
prefix="entity"
)
assert result == "entity_user123_test_coll_2023_v1_384"
assert result == "entity_user123_test_coll_2023_v1"
def test_make_safe_collection_name_different_prefixes(self):
"""Test collection name creation with different prefixes"""
user = "test_user"
collection = "test_collection"
dimension = 384
doc_result = make_safe_collection_name(user, collection, dimension, "doc")
entity_result = make_safe_collection_name(user, collection, dimension, "entity")
custom_result = make_safe_collection_name(user, collection, dimension, "custom")
assert doc_result == "doc_test_user_test_collection_384"
assert entity_result == "entity_test_user_test_collection_384"
assert custom_result == "custom_test_user_test_collection_384"
doc_result = make_safe_collection_name(user, collection, "doc")
entity_result = make_safe_collection_name(user, collection, "entity")
custom_result = make_safe_collection_name(user, collection, "custom")
assert doc_result == "doc_test_user_test_collection"
assert entity_result == "entity_test_user_test_collection"
assert custom_result == "custom_test_user_test_collection"
def test_make_safe_collection_name_different_dimensions(self):
"""Test collection name creation with different dimensions"""
"""Test collection name creation - dimension handling no longer part of function"""
user = "test_user"
collection = "test_collection"
prefix = "doc"
result_128 = make_safe_collection_name(user, collection, 128, prefix)
result_384 = make_safe_collection_name(user, collection, 384, prefix)
result_768 = make_safe_collection_name(user, collection, 768, prefix)
assert result_128 == "doc_test_user_test_collection_128"
assert result_384 == "doc_test_user_test_collection_384"
assert result_768 == "doc_test_user_test_collection_768"
# With new API, dimensions are handled separately, function always returns same result
result = make_safe_collection_name(user, collection, prefix)
assert result == "doc_test_user_test_collection"
def test_make_safe_collection_name_long_names(self):
"""Test collection name creation with very long user/collection names"""
long_user = "a" * 100
long_collection = "b" * 100
result = make_safe_collection_name(
user=long_user,
collection=long_collection,
dimension=384,
prefix="doc"
)
expected = f"doc_{long_user}_{long_collection}_384"
expected = f"doc_{long_user}_{long_collection}"
assert result == expected
assert len(result) > 200 # Verify it handles long names
@ -178,20 +161,18 @@ class TestMilvusCollectionNaming:
result = make_safe_collection_name(
user="user123",
collection="collection456",
dimension=384,
prefix="doc"
)
assert result == "doc_user123_collection456_384"
assert result == "doc_user123_collection456"
def test_make_safe_collection_name_case_sensitivity(self):
"""Test that collection name creation preserves case"""
result = make_safe_collection_name(
user="TestUser",
collection="TestCollection",
dimension=384,
prefix="Doc"
)
assert result == "Doc_TestUser_TestCollection_384"
assert result == "Doc_TestUser_TestCollection"
def test_make_safe_collection_name_realistic_examples(self):
"""Test collection name creation with realistic user/collection combinations"""
@ -202,30 +183,27 @@ class TestMilvusCollectionNaming:
("user_123", "test_collection", "user_123", "test_collection"),
("αβγ-user", "测试集合", "user", "default"),
]
for user, collection, expected_user, expected_collection in test_cases:
result = make_safe_collection_name(user, collection, 384, "doc")
assert result == f"doc_{expected_user}_{expected_collection}_384"
result = make_safe_collection_name(user, collection, "doc")
assert result == f"doc_{expected_user}_{expected_collection}"
def test_make_safe_collection_name_matches_qdrant_pattern(self):
"""Test that Milvus collection names follow similar pattern to Qdrant"""
"""Test that Milvus collection names follow similar pattern to Qdrant (but without dimension in name)"""
# Qdrant uses: "d_{user}_{collection}_{dimension}" and "t_{user}_{collection}_{dimension}"
# Milvus should use: "{prefix}_{safe_user}_{safe_collection}_{dimension}"
# New Milvus API uses: "{prefix}_{safe_user}_{safe_collection}" (dimension handled separately)
user = "test.user@domain.com"
collection = "test-collection.v2"
dimension = 384
doc_result = make_safe_collection_name(user, collection, dimension, "doc")
entity_result = make_safe_collection_name(user, collection, dimension, "entity")
# Should follow the pattern but with sanitized names
assert doc_result == "doc_test_user_domain_com_test_collection_v2_384"
assert entity_result == "entity_test_user_domain_com_test_collection_v2_384"
# Verify structure matches expected pattern (may have more underscores due to sanitization)
# The important thing is that it follows prefix_user_collection_dimension structure
doc_result = make_safe_collection_name(user, collection, "doc")
entity_result = make_safe_collection_name(user, collection, "entity")
# Should follow the pattern but with sanitized names and no dimension
assert doc_result == "doc_test_user_domain_com_test_collection_v2"
assert entity_result == "entity_test_user_domain_com_test_collection_v2"
# Verify structure matches expected pattern
assert doc_result.startswith("doc_")
assert doc_result.endswith("_384")
assert entity_result.startswith("entity_")
assert entity_result.endswith("_384")
# Dimension is no longer part of the collection name

View file

@ -32,7 +32,7 @@ class TestMilvusUserCollectionIntegration:
doc_vectors.insert(vector, "test document", user, collection)
expected_collection_name = make_safe_collection_name(
user, collection, len(vector), "doc"
user, collection, "doc"
)
# Verify collection was created with correct name
@ -58,7 +58,7 @@ class TestMilvusUserCollectionIntegration:
entity_vectors.insert(vector, "test entity", user, collection)
expected_collection_name = make_safe_collection_name(
user, collection, len(vector), "entity"
user, collection, "entity"
)
# Verify collection was created with correct name
@ -89,7 +89,7 @@ class TestMilvusUserCollectionIntegration:
result = doc_vectors.search(vector, user, collection, limit=5)
# Verify search was called with correct collection name
expected_collection_name = make_safe_collection_name(user, collection, 3, "doc")
expected_collection_name = make_safe_collection_name(user, collection, "doc")
mock_client.search.assert_called_once()
search_call = mock_client.search.call_args
assert search_call[1]["collection_name"] == expected_collection_name
@ -118,7 +118,7 @@ class TestMilvusUserCollectionIntegration:
result = entity_vectors.search(vector, user, collection, limit=5)
# Verify search was called with correct collection name
expected_collection_name = make_safe_collection_name(user, collection, 3, "entity")
expected_collection_name = make_safe_collection_name(user, collection, "entity")
mock_client.search.assert_called_once()
search_call = mock_client.search.call_args
assert search_call[1]["collection_name"] == expected_collection_name
@ -142,9 +142,9 @@ class TestMilvusUserCollectionIntegration:
collection_names = set(doc_vectors.collections.values())
expected_names = {
"doc_user1_collection1_3",
"doc_user2_collection2_3",
"doc_user1_collection2_3"
"doc_user1_collection1",
"doc_user2_collection2",
"doc_user1_collection2"
}
assert collection_names == expected_names
@ -167,9 +167,9 @@ class TestMilvusUserCollectionIntegration:
collection_names = set(entity_vectors.collections.values())
expected_names = {
"entity_user1_collection1_3",
"entity_user2_collection2_3",
"entity_user1_collection2_3"
"entity_user1_collection1",
"entity_user2_collection2",
"entity_user1_collection2"
}
assert collection_names == expected_names
@ -194,10 +194,13 @@ class TestMilvusUserCollectionIntegration:
collection_names = set(doc_vectors.collections.values())
expected_names = {
"doc_test_user_test_collection_3", # 3D
"doc_test_user_test_collection_4", # 4D
"doc_test_user_test_collection_2" # 2D
"doc_test_user_test_collection", # Same name for all dimensions
"doc_test_user_test_collection", # now stored per dimension in key
"doc_test_user_test_collection" # but collection name is the same
}
# Note: Now all dimensions use the same collection name, they are differentiated by the key
assert len(collection_names) == 1 # Only one unique collection name
assert "doc_test_user_test_collection" in collection_names
assert collection_names == expected_names
@patch('trustgraph.direct.milvus_doc_embeddings.MilvusClient')
@ -220,7 +223,7 @@ class TestMilvusUserCollectionIntegration:
# Verify only one collection was created
assert len(doc_vectors.collections) == 1
expected_collection_name = "doc_test_user_test_collection_3"
expected_collection_name = "doc_test_user_test_collection"
assert doc_vectors.collections[(3, user, collection)] == expected_collection_name
@patch('trustgraph.direct.milvus_doc_embeddings.MilvusClient')
@ -233,10 +236,10 @@ class TestMilvusUserCollectionIntegration:
# Test various special character combinations
test_cases = [
("user@domain.com", "test-collection.v1", "doc_user_domain_com_test_collection_v1_3"),
("user_123", "collection_456", "doc_user_123_collection_456_3"),
("user with spaces", "collection with spaces", "doc_user_with_spaces_collection_with_spaces_3"),
("user@@@test", "collection---test", "doc_user_test_collection_test_3"),
("user@domain.com", "test-collection.v1", "doc_user_domain_com_test_collection_v1"),
("user_123", "collection_456", "doc_user_123_collection_456"),
("user with spaces", "collection with spaces", "doc_user_with_spaces_collection_with_spaces"),
("user@@@test", "collection---test", "doc_user_test_collection_test"),
]
vector = [0.1, 0.2, 0.3]
@ -250,24 +253,24 @@ class TestMilvusUserCollectionIntegration:
def test_collection_name_backward_compatibility(self):
"""Test that new collection names don't conflict with old pattern"""
# Old pattern was: {prefix}_{dimension}
# New pattern is: {prefix}_{safe_user}_{safe_collection}_{dimension}
# New pattern is: {prefix}_{safe_user}_{safe_collection}
# The new pattern should never generate names that match the old pattern
old_pattern_examples = ["doc_384", "entity_768", "doc_512"]
test_cases = [
("user", "collection", 384, "doc"),
("test", "test", 768, "entity"),
("a", "b", 512, "doc"),
("user", "collection", "doc"),
("test", "test", "entity"),
("a", "b", "doc"),
]
for user, collection, dimension, prefix in test_cases:
new_name = make_safe_collection_name(user, collection, dimension, prefix)
# New names should have at least 4 underscores (prefix_user_collection_dimension)
for user, collection, prefix in test_cases:
new_name = make_safe_collection_name(user, collection, prefix)
# New names should have at least 2 underscores (prefix_user_collection)
# Old names had only 1 underscore (prefix_dimension)
assert new_name.count('_') >= 3, f"New name {new_name} doesn't have enough underscores"
assert new_name.count('_') >= 2, f"New name {new_name} doesn't have enough underscores"
# New names should not match old pattern
assert new_name not in old_pattern_examples, f"New name {new_name} conflicts with old pattern"
@ -286,23 +289,23 @@ class TestMilvusUserCollectionIntegration:
dimension = 384
# Generate collection names
doc_name1 = make_safe_collection_name(user1, collection1, dimension, "doc")
doc_name2 = make_safe_collection_name(user2, collection2, dimension, "doc")
entity_name1 = make_safe_collection_name(user1, collection1, dimension, "entity")
entity_name2 = make_safe_collection_name(user2, collection2, dimension, "entity")
doc_name1 = make_safe_collection_name(user1, collection1, "doc")
doc_name2 = make_safe_collection_name(user2, collection2, "doc")
entity_name1 = make_safe_collection_name(user1, collection1, "entity")
entity_name2 = make_safe_collection_name(user2, collection2, "entity")
# Verify complete isolation
assert doc_name1 != doc_name2, "Document collections should be isolated"
assert entity_name1 != entity_name2, "Entity collections should be isolated"
# Verify names match expected pattern from Qdrant
# Verify names match expected pattern from new API
# Qdrant uses: d_{user}_{collection}_{dimension}, t_{user}_{collection}_{dimension}
# Milvus uses: doc_{safe_user}_{safe_collection}_{dimension}, entity_{safe_user}_{safe_collection}_{dimension}
assert doc_name1 == "doc_my_user_test_coll_1_384"
assert doc_name2 == "doc_other_user_production_data_384"
assert entity_name1 == "entity_my_user_test_coll_1_384"
assert entity_name2 == "entity_other_user_production_data_384"
# New Milvus API uses: doc_{safe_user}_{safe_collection}, entity_{safe_user}_{safe_collection}
assert doc_name1 == "doc_my_user_test_coll_1"
assert doc_name2 == "doc_other_user_production_data"
assert entity_name1 == "entity_my_user_test_coll_1"
assert entity_name2 == "entity_other_user_production_data"
# This test would have FAILED with the old implementation that used:
# - doc_384 for all document embeddings (no user/collection differentiation)

View file

@ -120,7 +120,7 @@ class TestPineconeDocEmbeddingsQueryProcessor:
chunks = await processor.query_document_embeddings(message)
# Verify index was accessed correctly
expected_index_name = "d-test_user-test_collection-3"
expected_index_name = "d-test_user-test_collection"
processor.pinecone.Index.assert_called_once_with(expected_index_name)
# Verify query parameters
@ -239,7 +239,7 @@ class TestPineconeDocEmbeddingsQueryProcessor:
@pytest.mark.asyncio
async def test_query_document_embeddings_different_vector_dimensions(self, processor):
"""Test querying with vectors of different dimensions"""
"""Test querying with vectors of different dimensions using same index"""
message = MagicMock()
message.vectors = [
[0.1, 0.2], # 2D vector
@ -248,37 +248,33 @@ class TestPineconeDocEmbeddingsQueryProcessor:
message.limit = 5
message.user = 'test_user'
message.collection = 'test_collection'
mock_index_2d = MagicMock()
mock_index_4d = MagicMock()
def mock_index_side_effect(name):
if name.endswith("-2"):
return mock_index_2d
elif name.endswith("-4"):
return mock_index_4d
processor.pinecone.Index.side_effect = mock_index_side_effect
# Mock results for different dimensions
# Mock single index that handles all dimensions
mock_index = MagicMock()
processor.pinecone.Index.return_value = mock_index
# Mock results for different vector queries
mock_results_2d = MagicMock()
mock_results_2d.matches = [MagicMock(metadata={'doc': 'Document from 2D index'})]
mock_index_2d.query.return_value = mock_results_2d
mock_results_2d.matches = [MagicMock(metadata={'doc': 'Document from 2D query'})]
mock_results_4d = MagicMock()
mock_results_4d.matches = [MagicMock(metadata={'doc': 'Document from 4D index'})]
mock_index_4d.query.return_value = mock_results_4d
mock_results_4d.matches = [MagicMock(metadata={'doc': 'Document from 4D query'})]
mock_index.query.side_effect = [mock_results_2d, mock_results_4d]
chunks = await processor.query_document_embeddings(message)
# Verify different indexes were used
# Verify same index used for both vectors
expected_index_name = "d-test_user-test_collection"
assert processor.pinecone.Index.call_count == 2
mock_index_2d.query.assert_called_once()
mock_index_4d.query.assert_called_once()
processor.pinecone.Index.assert_called_with(expected_index_name)
# Verify both queries were made
assert mock_index.query.call_count == 2
# Verify results from both dimensions
assert 'Document from 2D index' in chunks
assert 'Document from 4D index' in chunks
assert 'Document from 2D query' in chunks
assert 'Document from 4D query' in chunks
@pytest.mark.asyncio
async def test_query_document_embeddings_empty_vectors_list(self, processor):

View file

@ -148,7 +148,7 @@ class TestPineconeGraphEmbeddingsQueryProcessor:
entities = await processor.query_graph_embeddings(message)
# Verify index was accessed correctly
expected_index_name = "t-test_user-test_collection-3"
expected_index_name = "t-test_user-test_collection"
processor.pinecone.Index.assert_called_once_with(expected_index_name)
# Verify query parameters
@ -265,7 +265,7 @@ class TestPineconeGraphEmbeddingsQueryProcessor:
@pytest.mark.asyncio
async def test_query_graph_embeddings_different_vector_dimensions(self, processor):
"""Test querying with vectors of different dimensions"""
"""Test querying with vectors of different dimensions using same index"""
message = MagicMock()
message.vectors = [
[0.1, 0.2], # 2D vector
@ -274,34 +274,30 @@ class TestPineconeGraphEmbeddingsQueryProcessor:
message.limit = 5
message.user = 'test_user'
message.collection = 'test_collection'
mock_index_2d = MagicMock()
mock_index_4d = MagicMock()
def mock_index_side_effect(name):
if name.endswith("-2"):
return mock_index_2d
elif name.endswith("-4"):
return mock_index_4d
processor.pinecone.Index.side_effect = mock_index_side_effect
# Mock results for different dimensions
# Mock single index that handles all dimensions
mock_index = MagicMock()
processor.pinecone.Index.return_value = mock_index
# Mock results for different vector queries
mock_results_2d = MagicMock()
mock_results_2d.matches = [MagicMock(metadata={'entity': 'entity_2d'})]
mock_index_2d.query.return_value = mock_results_2d
mock_results_4d = MagicMock()
mock_results_4d.matches = [MagicMock(metadata={'entity': 'entity_4d'})]
mock_index_4d.query.return_value = mock_results_4d
mock_index.query.side_effect = [mock_results_2d, mock_results_4d]
entities = await processor.query_graph_embeddings(message)
# Verify different indexes were used
# Verify same index used for both vectors
expected_index_name = "t-test_user-test_collection"
assert processor.pinecone.Index.call_count == 2
mock_index_2d.query.assert_called_once()
mock_index_4d.query.assert_called_once()
processor.pinecone.Index.assert_called_with(expected_index_name)
# Verify both queries were made
assert mock_index.query.call_count == 2
# Verify results from both dimensions
entity_values = [e.value for e in entities]
assert 'entity_2d' in entity_values

View file

@ -70,7 +70,7 @@ class TestCassandraQueryProcessor:
assert result.is_uri is False
@pytest.mark.asyncio
@patch('trustgraph.query.triples.cassandra.service.TrustGraph')
@patch('trustgraph.query.triples.cassandra.service.KnowledgeGraph')
async def test_query_triples_spo_query(self, mock_trustgraph):
"""Test querying triples with subject, predicate, and object specified"""
from trustgraph.schema import TriplesQueryRequest, Value
@ -98,16 +98,15 @@ class TestCassandraQueryProcessor:
result = await processor.query_triples(query)
# Verify TrustGraph was created with correct parameters
# Verify KnowledgeGraph was created with correct parameters
mock_trustgraph.assert_called_once_with(
hosts=['localhost'],
keyspace='test_user',
table='test_collection'
keyspace='test_user'
)
# Verify get_spo was called with correct parameters
mock_tg_instance.get_spo.assert_called_once_with(
'test_subject', 'test_predicate', 'test_object', limit=100
'test_collection', 'test_subject', 'test_predicate', 'test_object', limit=100
)
# Verify result contains the queried triple
@ -144,7 +143,7 @@ class TestCassandraQueryProcessor:
assert processor.table is None
@pytest.mark.asyncio
@patch('trustgraph.query.triples.cassandra.service.TrustGraph')
@patch('trustgraph.query.triples.cassandra.service.KnowledgeGraph')
async def test_query_triples_sp_pattern(self, mock_trustgraph):
"""Test SP query pattern (subject and predicate, no object)"""
from trustgraph.schema import TriplesQueryRequest, Value
@ -170,14 +169,14 @@ class TestCassandraQueryProcessor:
result = await processor.query_triples(query)
mock_tg_instance.get_sp.assert_called_once_with('test_subject', 'test_predicate', limit=50)
mock_tg_instance.get_sp.assert_called_once_with('test_collection', 'test_subject', 'test_predicate', limit=50)
assert len(result) == 1
assert result[0].s.value == 'test_subject'
assert result[0].p.value == 'test_predicate'
assert result[0].o.value == 'result_object'
@pytest.mark.asyncio
@patch('trustgraph.query.triples.cassandra.service.TrustGraph')
@patch('trustgraph.query.triples.cassandra.service.KnowledgeGraph')
async def test_query_triples_s_pattern(self, mock_trustgraph):
"""Test S query pattern (subject only)"""
from trustgraph.schema import TriplesQueryRequest, Value
@ -203,14 +202,14 @@ class TestCassandraQueryProcessor:
result = await processor.query_triples(query)
mock_tg_instance.get_s.assert_called_once_with('test_subject', limit=25)
mock_tg_instance.get_s.assert_called_once_with('test_collection', 'test_subject', limit=25)
assert len(result) == 1
assert result[0].s.value == 'test_subject'
assert result[0].p.value == 'result_predicate'
assert result[0].o.value == 'result_object'
@pytest.mark.asyncio
@patch('trustgraph.query.triples.cassandra.service.TrustGraph')
@patch('trustgraph.query.triples.cassandra.service.KnowledgeGraph')
async def test_query_triples_p_pattern(self, mock_trustgraph):
"""Test P query pattern (predicate only)"""
from trustgraph.schema import TriplesQueryRequest, Value
@ -236,14 +235,14 @@ class TestCassandraQueryProcessor:
result = await processor.query_triples(query)
mock_tg_instance.get_p.assert_called_once_with('test_predicate', limit=10)
mock_tg_instance.get_p.assert_called_once_with('test_collection', 'test_predicate', limit=10)
assert len(result) == 1
assert result[0].s.value == 'result_subject'
assert result[0].p.value == 'test_predicate'
assert result[0].o.value == 'result_object'
@pytest.mark.asyncio
@patch('trustgraph.query.triples.cassandra.service.TrustGraph')
@patch('trustgraph.query.triples.cassandra.service.KnowledgeGraph')
async def test_query_triples_o_pattern(self, mock_trustgraph):
"""Test O query pattern (object only)"""
from trustgraph.schema import TriplesQueryRequest, Value
@ -269,14 +268,14 @@ class TestCassandraQueryProcessor:
result = await processor.query_triples(query)
mock_tg_instance.get_o.assert_called_once_with('test_object', limit=75)
mock_tg_instance.get_o.assert_called_once_with('test_collection', 'test_object', limit=75)
assert len(result) == 1
assert result[0].s.value == 'result_subject'
assert result[0].p.value == 'result_predicate'
assert result[0].o.value == 'test_object'
@pytest.mark.asyncio
@patch('trustgraph.query.triples.cassandra.service.TrustGraph')
@patch('trustgraph.query.triples.cassandra.service.KnowledgeGraph')
async def test_query_triples_get_all_pattern(self, mock_trustgraph):
"""Test query pattern with no constraints (get all)"""
from trustgraph.schema import TriplesQueryRequest
@ -303,7 +302,7 @@ class TestCassandraQueryProcessor:
result = await processor.query_triples(query)
mock_tg_instance.get_all.assert_called_once_with(limit=1000)
mock_tg_instance.get_all.assert_called_once_with('test_collection', limit=1000)
assert len(result) == 1
assert result[0].s.value == 'all_subject'
assert result[0].p.value == 'all_predicate'
@ -376,7 +375,7 @@ class TestCassandraQueryProcessor:
mock_launch.assert_called_once_with(default_ident, '\nTriples query service. Input is a (s, p, o) triple, some values may be\nnull. Output is a list of triples.\n')
@pytest.mark.asyncio
@patch('trustgraph.query.triples.cassandra.service.TrustGraph')
@patch('trustgraph.query.triples.cassandra.service.KnowledgeGraph')
async def test_query_triples_with_authentication(self, mock_trustgraph):
"""Test querying with username and password authentication"""
from trustgraph.schema import TriplesQueryRequest, Value
@ -402,17 +401,16 @@ class TestCassandraQueryProcessor:
await processor.query_triples(query)
# Verify TrustGraph was created with authentication
# Verify KnowledgeGraph was created with authentication
mock_trustgraph.assert_called_once_with(
hosts=['cassandra'], # Updated default
keyspace='test_user',
table='test_collection',
username='authuser',
password='authpass'
)
@pytest.mark.asyncio
@patch('trustgraph.query.triples.cassandra.service.TrustGraph')
@patch('trustgraph.query.triples.cassandra.service.KnowledgeGraph')
async def test_query_triples_table_reuse(self, mock_trustgraph):
"""Test that TrustGraph is reused for same table"""
from trustgraph.schema import TriplesQueryRequest, Value
@ -441,7 +439,7 @@ class TestCassandraQueryProcessor:
assert mock_trustgraph.call_count == 1 # Should not increase
@pytest.mark.asyncio
@patch('trustgraph.query.triples.cassandra.service.TrustGraph')
@patch('trustgraph.query.triples.cassandra.service.KnowledgeGraph')
async def test_query_triples_table_switching(self, mock_trustgraph):
"""Test table switching creates new TrustGraph"""
from trustgraph.schema import TriplesQueryRequest, Value
@ -463,7 +461,7 @@ class TestCassandraQueryProcessor:
)
await processor.query_triples(query1)
assert processor.table == ('user1', 'collection1')
assert processor.table == 'user1'
# Second query with different table
query2 = TriplesQueryRequest(
@ -476,13 +474,13 @@ class TestCassandraQueryProcessor:
)
await processor.query_triples(query2)
assert processor.table == ('user2', 'collection2')
assert processor.table == 'user2'
# Verify TrustGraph was created twice
assert mock_trustgraph.call_count == 2
@pytest.mark.asyncio
@patch('trustgraph.query.triples.cassandra.service.TrustGraph')
@patch('trustgraph.query.triples.cassandra.service.KnowledgeGraph')
async def test_query_triples_exception_handling(self, mock_trustgraph):
"""Test exception handling during query execution"""
from trustgraph.schema import TriplesQueryRequest, Value
@ -506,7 +504,7 @@ class TestCassandraQueryProcessor:
await processor.query_triples(query)
@pytest.mark.asyncio
@patch('trustgraph.query.triples.cassandra.service.TrustGraph')
@patch('trustgraph.query.triples.cassandra.service.KnowledgeGraph')
async def test_query_triples_multiple_results(self, mock_trustgraph):
"""Test query returning multiple results"""
from trustgraph.schema import TriplesQueryRequest, Value

View file

@ -18,7 +18,7 @@ from trustgraph.storage.knowledge.store import Processor as KgStore
class TestTriplesWriterConfiguration:
"""Test Cassandra configuration in triples writer processor."""
@patch('trustgraph.direct.cassandra.TrustGraph')
@patch('trustgraph.direct.cassandra_kg.KnowledgeGraph')
def test_environment_variable_configuration(self, mock_trust_graph):
"""Test processor picks up configuration from environment variables."""
env_vars = {
@ -34,7 +34,7 @@ class TestTriplesWriterConfiguration:
assert processor.cassandra_username == 'env-user'
assert processor.cassandra_password == 'env-pass'
@patch('trustgraph.direct.cassandra.TrustGraph')
@patch('trustgraph.direct.cassandra_kg.KnowledgeGraph')
def test_parameter_override_environment(self, mock_trust_graph):
"""Test explicit parameters override environment variables."""
env_vars = {
@ -55,7 +55,7 @@ class TestTriplesWriterConfiguration:
assert processor.cassandra_username == 'param-user'
assert processor.cassandra_password == 'param-pass'
@patch('trustgraph.direct.cassandra.TrustGraph')
@patch('trustgraph.direct.cassandra_kg.KnowledgeGraph')
def test_no_backward_compatibility_graph_params(self, mock_trust_graph):
"""Test that old graph_* parameter names are no longer supported."""
processor = TriplesWriter(
@ -70,7 +70,7 @@ class TestTriplesWriterConfiguration:
assert processor.cassandra_username is None
assert processor.cassandra_password is None
@patch('trustgraph.direct.cassandra.TrustGraph')
@patch('trustgraph.direct.cassandra_kg.KnowledgeGraph')
def test_default_configuration(self, mock_trust_graph):
"""Test default configuration when no params or env vars provided."""
with patch.dict(os.environ, {}, clear=True):
@ -163,7 +163,7 @@ class TestObjectsWriterConfiguration:
class TestTriplesQueryConfiguration:
"""Test Cassandra configuration in triples query processor."""
@patch('trustgraph.direct.cassandra.TrustGraph')
@patch('trustgraph.direct.cassandra_kg.KnowledgeGraph')
def test_environment_variable_configuration(self, mock_trust_graph):
"""Test processor picks up configuration from environment variables."""
env_vars = {
@ -179,7 +179,7 @@ class TestTriplesQueryConfiguration:
assert processor.cassandra_username == 'query-env-user'
assert processor.cassandra_password == 'query-env-pass'
@patch('trustgraph.direct.cassandra.TrustGraph')
@patch('trustgraph.direct.cassandra_kg.KnowledgeGraph')
def test_only_new_parameters_work(self, mock_trust_graph):
"""Test that only new parameters work."""
processor = TriplesQuery(
@ -379,7 +379,7 @@ class TestCommandLineArgumentHandling:
class TestConfigurationPriorityIntegration:
"""Test complete configuration priority chain in processors."""
@patch('trustgraph.direct.cassandra.TrustGraph')
@patch('trustgraph.direct.cassandra_kg.KnowledgeGraph')
def test_complete_priority_chain(self, mock_trust_graph):
"""Test CLI params > env vars > defaults priority in actual processor."""
env_vars = {

View file

@ -135,7 +135,7 @@ class TestPineconeDocEmbeddingsStorageProcessor:
await processor.store_document_embeddings(message)
# Verify index name and operations
expected_index_name = "d-test_user-test_collection-3"
expected_index_name = "d-test_user-test_collection"
processor.pinecone.Index.assert_called_with(expected_index_name)
# Verify upsert was called for each vector
@ -203,7 +203,7 @@ class TestPineconeDocEmbeddingsStorageProcessor:
await processor.store_document_embeddings(message)
# Verify index creation was called
expected_index_name = "d-test_user-test_collection-3"
expected_index_name = "d-test_user-test_collection"
processor.pinecone.create_index.assert_called_once()
create_call = processor.pinecone.create_index.call_args
assert create_call[1]['name'] == expected_index_name
@ -299,12 +299,11 @@ class TestPineconeDocEmbeddingsStorageProcessor:
mock_index_3d = MagicMock()
def mock_index_side_effect(name):
if name.endswith("-2"):
return mock_index_2d
elif name.endswith("-4"):
return mock_index_4d
elif name.endswith("-3"):
return mock_index_3d
# All dimensions now use the same index name pattern
# Different dimensions will be handled within the same index
if "test_user" in name and "test_collection" in name:
return mock_index_2d # Just return one mock for all
return MagicMock()
processor.pinecone.Index.side_effect = mock_index_side_effect
processor.pinecone.has_index.return_value = True
@ -312,11 +311,10 @@ class TestPineconeDocEmbeddingsStorageProcessor:
with patch('uuid.uuid4', side_effect=['id1', 'id2', 'id3']):
await processor.store_document_embeddings(message)
# Verify different indexes were used for different dimensions
assert processor.pinecone.Index.call_count == 3
mock_index_2d.upsert.assert_called_once()
mock_index_4d.upsert.assert_called_once()
mock_index_3d.upsert.assert_called_once()
# Verify all vectors are now stored in the same index
# (Pinecone can handle mixed dimensions in the same index)
assert processor.pinecone.Index.call_count == 3 # Called once per vector
mock_index_2d.upsert.call_count == 3 # All upserts go to same index
@pytest.mark.asyncio
async def test_store_document_embeddings_empty_chunks_list(self, processor):

View file

@ -106,7 +106,7 @@ class TestQdrantDocEmbeddingsStorage(IsolatedAsyncioTestCase):
# Assert
# Verify collection existence was checked
expected_collection = 'd_test_user_test_collection_3'
expected_collection = 'd_test_user_test_collection'
mock_qdrant_instance.collection_exists.assert_called_once_with(expected_collection)
# Verify upsert was called
@ -309,7 +309,7 @@ class TestQdrantDocEmbeddingsStorage(IsolatedAsyncioTestCase):
await processor.store_document_embeddings(mock_message)
# Assert
expected_collection = 'd_new_user_new_collection_5'
expected_collection = 'd_new_user_new_collection'
# Verify collection existence check and creation
mock_qdrant_instance.collection_exists.assert_called_once_with(expected_collection)
@ -408,7 +408,7 @@ class TestQdrantDocEmbeddingsStorage(IsolatedAsyncioTestCase):
await processor.store_document_embeddings(mock_message2)
# Assert
expected_collection = 'd_cache_user_cache_collection_3'
expected_collection = 'd_cache_user_cache_collection'
assert processor.last_collection == expected_collection
# Verify second call skipped existence check (cached)
@ -455,17 +455,16 @@ class TestQdrantDocEmbeddingsStorage(IsolatedAsyncioTestCase):
await processor.store_document_embeddings(mock_message)
# Assert
# Should check existence of both collections
expected_collections = ['d_dim_user_dim_collection_2', 'd_dim_user_dim_collection_3']
actual_calls = [call.args[0] for call in mock_qdrant_instance.collection_exists.call_args_list]
assert actual_calls == expected_collections
# Should upsert to both collections
# Should check existence of the same collection (dimensions no longer create separate collections)
expected_collection = 'd_dim_user_dim_collection'
mock_qdrant_instance.collection_exists.assert_called_once_with(expected_collection)
# Should upsert to the same collection for both vectors
assert mock_qdrant_instance.upsert.call_count == 2
upsert_calls = mock_qdrant_instance.upsert.call_args_list
assert upsert_calls[0][1]['collection_name'] == 'd_dim_user_dim_collection_2'
assert upsert_calls[1][1]['collection_name'] == 'd_dim_user_dim_collection_3'
assert upsert_calls[0][1]['collection_name'] == expected_collection
assert upsert_calls[1][1]['collection_name'] == expected_collection
@patch('trustgraph.storage.doc_embeddings.qdrant.write.QdrantClient')
@patch('trustgraph.base.DocumentEmbeddingsStoreService.__init__')

View file

@ -135,7 +135,7 @@ class TestPineconeGraphEmbeddingsStorageProcessor:
await processor.store_graph_embeddings(message)
# Verify index name and operations
expected_index_name = "t-test_user-test_collection-3"
expected_index_name = "t-test_user-test_collection"
processor.pinecone.Index.assert_called_with(expected_index_name)
# Verify upsert was called for each vector
@ -203,7 +203,7 @@ class TestPineconeGraphEmbeddingsStorageProcessor:
await processor.store_graph_embeddings(message)
# Verify index creation was called
expected_index_name = "t-test_user-test_collection-3"
expected_index_name = "t-test_user-test_collection"
processor.pinecone.create_index.assert_called_once()
create_call = processor.pinecone.create_index.call_args
assert create_call[1]['name'] == expected_index_name
@ -256,12 +256,12 @@ class TestPineconeGraphEmbeddingsStorageProcessor:
@pytest.mark.asyncio
async def test_store_graph_embeddings_different_vector_dimensions(self, processor):
"""Test storing graph embeddings with different vector dimensions"""
"""Test storing graph embeddings with different vector dimensions to same index"""
message = MagicMock()
message.metadata = MagicMock()
message.metadata.user = 'test_user'
message.metadata.collection = 'test_collection'
entity = EntityEmbeddings(
entity=Value(value="test_entity", is_uri=False),
vectors=[
@ -271,30 +271,21 @@ class TestPineconeGraphEmbeddingsStorageProcessor:
]
)
message.entities = [entity]
mock_index_2d = MagicMock()
mock_index_4d = MagicMock()
mock_index_3d = MagicMock()
def mock_index_side_effect(name):
if name.endswith("-2"):
return mock_index_2d
elif name.endswith("-4"):
return mock_index_4d
elif name.endswith("-3"):
return mock_index_3d
processor.pinecone.Index.side_effect = mock_index_side_effect
# All vectors now use the same index (no dimension in name)
mock_index = MagicMock()
processor.pinecone.Index.return_value = mock_index
processor.pinecone.has_index.return_value = True
with patch('uuid.uuid4', side_effect=['id1', 'id2', 'id3']):
await processor.store_graph_embeddings(message)
# Verify different indexes were used for different dimensions
assert processor.pinecone.Index.call_count == 3
mock_index_2d.upsert.assert_called_once()
mock_index_4d.upsert.assert_called_once()
mock_index_3d.upsert.assert_called_once()
# Verify same index was used for all dimensions
expected_index_name = 't-test_user-test_collection'
processor.pinecone.Index.assert_called_with(expected_index_name)
# Verify all vectors were upserted to the same index
assert mock_index.upsert.call_count == 3
@pytest.mark.asyncio
async def test_store_graph_embeddings_empty_entities_list(self, processor):

View file

@ -69,7 +69,7 @@ class TestQdrantGraphEmbeddingsStorage(IsolatedAsyncioTestCase):
collection_name = processor.get_collection(dim=512, user='test_user', collection='test_collection')
# Assert
expected_name = 't_test_user_test_collection_512'
expected_name = 't_test_user_test_collection'
assert collection_name == expected_name
assert processor.last_collection == expected_name
@ -118,7 +118,7 @@ class TestQdrantGraphEmbeddingsStorage(IsolatedAsyncioTestCase):
# Assert
# Verify collection existence was checked
expected_collection = 't_test_user_test_collection_3'
expected_collection = 't_test_user_test_collection'
mock_qdrant_instance.collection_exists.assert_called_once_with(expected_collection)
# Verify upsert was called
@ -156,7 +156,7 @@ class TestQdrantGraphEmbeddingsStorage(IsolatedAsyncioTestCase):
collection_name = processor.get_collection(dim=256, user='existing_user', collection='existing_collection')
# Assert
expected_name = 't_existing_user_existing_collection_256'
expected_name = 't_existing_user_existing_collection'
assert collection_name == expected_name
assert processor.last_collection == expected_name
@ -194,7 +194,7 @@ class TestQdrantGraphEmbeddingsStorage(IsolatedAsyncioTestCase):
collection_name2 = processor.get_collection(dim=128, user='cache_user', collection='cache_collection')
# Assert
expected_name = 't_cache_user_cache_collection_128'
expected_name = 't_cache_user_cache_collection'
assert collection_name1 == expected_name
assert collection_name2 == expected_name

View file

@ -86,7 +86,7 @@ class TestCassandraStorageProcessor:
assert processor.cassandra_username == 'new-user' # Only cassandra_* params work
@pytest.mark.asyncio
@patch('trustgraph.storage.triples.cassandra.write.TrustGraph')
@patch('trustgraph.storage.triples.cassandra.write.KnowledgeGraph')
async def test_table_switching_with_auth(self, mock_trustgraph):
"""Test table switching logic when authentication is provided"""
taskgroup_mock = MagicMock()
@ -107,18 +107,17 @@ class TestCassandraStorageProcessor:
await processor.store_triples(mock_message)
# Verify TrustGraph was called with auth parameters
# Verify KnowledgeGraph was called with auth parameters
mock_trustgraph.assert_called_once_with(
hosts=['cassandra'], # Updated default
keyspace='user1',
table='collection1',
username='testuser',
password='testpass'
)
assert processor.table == ('user1', 'collection1')
assert processor.table == 'user1'
@pytest.mark.asyncio
@patch('trustgraph.storage.triples.cassandra.write.TrustGraph')
@patch('trustgraph.storage.triples.cassandra.write.KnowledgeGraph')
async def test_table_switching_without_auth(self, mock_trustgraph):
"""Test table switching logic when no authentication is provided"""
taskgroup_mock = MagicMock()
@ -135,16 +134,15 @@ class TestCassandraStorageProcessor:
await processor.store_triples(mock_message)
# Verify TrustGraph was called without auth parameters
# Verify KnowledgeGraph was called without auth parameters
mock_trustgraph.assert_called_once_with(
hosts=['cassandra'], # Updated default
keyspace='user2',
table='collection2'
keyspace='user2'
)
assert processor.table == ('user2', 'collection2')
assert processor.table == 'user2'
@pytest.mark.asyncio
@patch('trustgraph.storage.triples.cassandra.write.TrustGraph')
@patch('trustgraph.storage.triples.cassandra.write.KnowledgeGraph')
async def test_table_reuse_when_same(self, mock_trustgraph):
"""Test that TrustGraph is not recreated when table hasn't changed"""
taskgroup_mock = MagicMock()
@ -168,7 +166,7 @@ class TestCassandraStorageProcessor:
assert mock_trustgraph.call_count == 1 # Should not increase
@pytest.mark.asyncio
@patch('trustgraph.storage.triples.cassandra.write.TrustGraph')
@patch('trustgraph.storage.triples.cassandra.write.KnowledgeGraph')
async def test_triple_insertion(self, mock_trustgraph):
"""Test that triples are properly inserted into Cassandra"""
taskgroup_mock = MagicMock()
@ -198,11 +196,11 @@ class TestCassandraStorageProcessor:
# Verify both triples were inserted
assert mock_tg_instance.insert.call_count == 2
mock_tg_instance.insert.assert_any_call('subject1', 'predicate1', 'object1')
mock_tg_instance.insert.assert_any_call('subject2', 'predicate2', 'object2')
mock_tg_instance.insert.assert_any_call('collection1', 'subject1', 'predicate1', 'object1')
mock_tg_instance.insert.assert_any_call('collection1', 'subject2', 'predicate2', 'object2')
@pytest.mark.asyncio
@patch('trustgraph.storage.triples.cassandra.write.TrustGraph')
@patch('trustgraph.storage.triples.cassandra.write.KnowledgeGraph')
async def test_triple_insertion_with_empty_list(self, mock_trustgraph):
"""Test behavior when message has no triples"""
taskgroup_mock = MagicMock()
@ -223,7 +221,7 @@ class TestCassandraStorageProcessor:
mock_tg_instance.insert.assert_not_called()
@pytest.mark.asyncio
@patch('trustgraph.storage.triples.cassandra.write.TrustGraph')
@patch('trustgraph.storage.triples.cassandra.write.KnowledgeGraph')
@patch('trustgraph.storage.triples.cassandra.write.time.sleep')
async def test_exception_handling_with_retry(self, mock_sleep, mock_trustgraph):
"""Test exception handling during TrustGraph creation"""
@ -328,7 +326,7 @@ class TestCassandraStorageProcessor:
mock_launch.assert_called_once_with(default_ident, '\nGraph writer. Input is graph edge. Writes edges to Cassandra graph.\n')
@pytest.mark.asyncio
@patch('trustgraph.storage.triples.cassandra.write.TrustGraph')
@patch('trustgraph.storage.triples.cassandra.write.KnowledgeGraph')
async def test_store_triples_table_switching_between_different_tables(self, mock_trustgraph):
"""Test table switching when different tables are used in sequence"""
taskgroup_mock = MagicMock()
@ -345,7 +343,7 @@ class TestCassandraStorageProcessor:
mock_message1.triples = []
await processor.store_triples(mock_message1)
assert processor.table == ('user1', 'collection1')
assert processor.table == 'user1'
assert processor.tg == mock_tg_instance1
# Second message with different table
@ -355,14 +353,14 @@ class TestCassandraStorageProcessor:
mock_message2.triples = []
await processor.store_triples(mock_message2)
assert processor.table == ('user2', 'collection2')
assert processor.table == 'user2'
assert processor.tg == mock_tg_instance2
# Verify TrustGraph was created twice for different tables
assert mock_trustgraph.call_count == 2
@pytest.mark.asyncio
@patch('trustgraph.storage.triples.cassandra.write.TrustGraph')
@patch('trustgraph.storage.triples.cassandra.write.KnowledgeGraph')
async def test_store_triples_with_special_characters_in_values(self, mock_trustgraph):
"""Test storing triples with special characters and unicode"""
taskgroup_mock = MagicMock()
@ -386,13 +384,14 @@ class TestCassandraStorageProcessor:
# Verify the triple was inserted with special characters preserved
mock_tg_instance.insert.assert_called_once_with(
'test_collection',
'subject with spaces & symbols',
'predicate:with/colons',
'object with "quotes" and unicode: ñáéíóú'
)
@pytest.mark.asyncio
@patch('trustgraph.storage.triples.cassandra.write.TrustGraph')
@patch('trustgraph.storage.triples.cassandra.write.KnowledgeGraph')
async def test_store_triples_preserves_old_table_on_exception(self, mock_trustgraph):
"""Test that table remains unchanged when TrustGraph creation fails"""
taskgroup_mock = MagicMock()

View file

@ -86,15 +86,17 @@ class TestFalkorDBStorageProcessor:
mock_result = MagicMock()
mock_result.nodes_created = 1
mock_result.run_time_ms = 10
processor.io.query.return_value = mock_result
processor.create_node(test_uri)
processor.create_node(test_uri, 'test_user', 'test_collection')
processor.io.query.assert_called_once_with(
"MERGE (n:Node {uri: $uri})",
"MERGE (n:Node {uri: $uri, user: $user, collection: $collection})",
params={
"uri": test_uri,
"user": 'test_user',
"collection": 'test_collection',
},
)
@ -104,15 +106,17 @@ class TestFalkorDBStorageProcessor:
mock_result = MagicMock()
mock_result.nodes_created = 1
mock_result.run_time_ms = 10
processor.io.query.return_value = mock_result
processor.create_literal(test_value)
processor.create_literal(test_value, 'test_user', 'test_collection')
processor.io.query.assert_called_once_with(
"MERGE (n:Literal {value: $value})",
"MERGE (n:Literal {value: $value, user: $user, collection: $collection})",
params={
"value": test_value,
"user": 'test_user',
"collection": 'test_collection',
},
)
@ -121,23 +125,25 @@ class TestFalkorDBStorageProcessor:
src_uri = 'http://example.com/src'
pred_uri = 'http://example.com/pred'
dest_uri = 'http://example.com/dest'
mock_result = MagicMock()
mock_result.nodes_created = 0
mock_result.run_time_ms = 5
processor.io.query.return_value = mock_result
processor.relate_node(src_uri, pred_uri, dest_uri)
processor.relate_node(src_uri, pred_uri, dest_uri, 'test_user', 'test_collection')
processor.io.query.assert_called_once_with(
"MATCH (src:Node {uri: $src}) "
"MATCH (dest:Node {uri: $dest}) "
"MERGE (src)-[:Rel {uri: $uri}]->(dest)",
"MATCH (src:Node {uri: $src, user: $user, collection: $collection}) "
"MATCH (dest:Node {uri: $dest, user: $user, collection: $collection}) "
"MERGE (src)-[:Rel {uri: $uri, user: $user, collection: $collection}]->(dest)",
params={
"src": src_uri,
"dest": dest_uri,
"uri": pred_uri,
"user": 'test_user',
"collection": 'test_collection',
},
)
@ -146,23 +152,25 @@ class TestFalkorDBStorageProcessor:
src_uri = 'http://example.com/src'
pred_uri = 'http://example.com/pred'
literal_value = 'literal destination'
mock_result = MagicMock()
mock_result.nodes_created = 0
mock_result.run_time_ms = 5
processor.io.query.return_value = mock_result
processor.relate_literal(src_uri, pred_uri, literal_value)
processor.relate_literal(src_uri, pred_uri, literal_value, 'test_user', 'test_collection')
processor.io.query.assert_called_once_with(
"MATCH (src:Node {uri: $src}) "
"MATCH (dest:Literal {value: $dest}) "
"MERGE (src)-[:Rel {uri: $uri}]->(dest)",
"MATCH (src:Node {uri: $src, user: $user, collection: $collection}) "
"MATCH (dest:Literal {value: $dest, user: $user, collection: $collection}) "
"MERGE (src)-[:Rel {uri: $uri, user: $user, collection: $collection}]->(dest)",
params={
"src": src_uri,
"dest": literal_value,
"uri": pred_uri,
"user": 'test_user',
"collection": 'test_collection',
},
)
@ -191,14 +199,16 @@ class TestFalkorDBStorageProcessor:
# Verify queries were called in the correct order
expected_calls = [
# Create subject node
(("MERGE (n:Node {uri: $uri})",), {"params": {"uri": "http://example.com/subject"}}),
(("MERGE (n:Node {uri: $uri, user: $user, collection: $collection})",),
{"params": {"uri": "http://example.com/subject", "user": "test_user", "collection": "test_collection"}}),
# Create object node
(("MERGE (n:Node {uri: $uri})",), {"params": {"uri": "http://example.com/object"}}),
(("MERGE (n:Node {uri: $uri, user: $user, collection: $collection})",),
{"params": {"uri": "http://example.com/object", "user": "test_user", "collection": "test_collection"}}),
# Create relationship
(("MATCH (src:Node {uri: $src}) "
"MATCH (dest:Node {uri: $dest}) "
"MERGE (src)-[:Rel {uri: $uri}]->(dest)",),
{"params": {"src": "http://example.com/subject", "dest": "http://example.com/object", "uri": "http://example.com/predicate"}}),
(("MATCH (src:Node {uri: $src, user: $user, collection: $collection}) "
"MATCH (dest:Node {uri: $dest, user: $user, collection: $collection}) "
"MERGE (src)-[:Rel {uri: $uri, user: $user, collection: $collection}]->(dest)",),
{"params": {"src": "http://example.com/subject", "dest": "http://example.com/object", "uri": "http://example.com/predicate", "user": "test_user", "collection": "test_collection"}}),
]
assert processor.io.query.call_count == 3
@ -220,14 +230,16 @@ class TestFalkorDBStorageProcessor:
# Verify queries were called in the correct order
expected_calls = [
# Create subject node
(("MERGE (n:Node {uri: $uri})",), {"params": {"uri": "http://example.com/subject"}}),
(("MERGE (n:Node {uri: $uri, user: $user, collection: $collection})",),
{"params": {"uri": "http://example.com/subject", "user": "test_user", "collection": "test_collection"}}),
# Create literal object
(("MERGE (n:Literal {value: $value})",), {"params": {"value": "literal object"}}),
(("MERGE (n:Literal {value: $value, user: $user, collection: $collection})",),
{"params": {"value": "literal object", "user": "test_user", "collection": "test_collection"}}),
# Create relationship
(("MATCH (src:Node {uri: $src}) "
"MATCH (dest:Literal {value: $dest}) "
"MERGE (src)-[:Rel {uri: $uri}]->(dest)",),
{"params": {"src": "http://example.com/subject", "dest": "literal object", "uri": "http://example.com/predicate"}}),
(("MATCH (src:Node {uri: $src, user: $user, collection: $collection}) "
"MATCH (dest:Literal {value: $dest, user: $user, collection: $collection}) "
"MERGE (src)-[:Rel {uri: $uri, user: $user, collection: $collection}]->(dest)",),
{"params": {"src": "http://example.com/subject", "dest": "literal object", "uri": "http://example.com/predicate", "user": "test_user", "collection": "test_collection"}}),
]
assert processor.io.query.call_count == 3
@ -408,12 +420,14 @@ class TestFalkorDBStorageProcessor:
processor.io.query.return_value = mock_result
processor.create_node(test_uri)
processor.create_node(test_uri, 'test_user', 'test_collection')
processor.io.query.assert_called_once_with(
"MERGE (n:Node {uri: $uri})",
"MERGE (n:Node {uri: $uri, user: $user, collection: $collection})",
params={
"uri": test_uri,
"user": 'test_user',
"collection": 'test_collection',
},
)
@ -426,11 +440,13 @@ class TestFalkorDBStorageProcessor:
processor.io.query.return_value = mock_result
processor.create_literal(test_value)
processor.create_literal(test_value, 'test_user', 'test_collection')
processor.io.query.assert_called_once_with(
"MERGE (n:Literal {value: $value})",
"MERGE (n:Literal {value: $value, user: $user, collection: $collection})",
params={
"value": test_value,
"user": 'test_user',
"collection": 'test_collection',
},
)