mirror of
https://github.com/trustgraph-ai/trustgraph.git
synced 2026-05-04 21:02:37 +02:00
Collection management (#520)
* Tech spec * Refactored Cassanda knowledge graph for single table * Collection management, librarian services to manage metadata and collection deletion
This commit is contained in:
parent
48016d8fb2
commit
13ff7d765d
48 changed files with 2941 additions and 425 deletions
|
|
@ -13,163 +13,146 @@ class TestMilvusCollectionNaming:
|
|||
"""Test basic collection name creation"""
|
||||
result = make_safe_collection_name(
|
||||
user="test_user",
|
||||
collection="test_collection",
|
||||
dimension=384,
|
||||
collection="test_collection",
|
||||
prefix="doc"
|
||||
)
|
||||
assert result == "doc_test_user_test_collection_384"
|
||||
assert result == "doc_test_user_test_collection"
|
||||
|
||||
def test_make_safe_collection_name_with_special_characters(self):
|
||||
"""Test collection name creation with special characters that need sanitization"""
|
||||
result = make_safe_collection_name(
|
||||
user="user@domain.com",
|
||||
collection="test-collection.v2",
|
||||
dimension=768,
|
||||
prefix="entity"
|
||||
)
|
||||
assert result == "entity_user_domain_com_test_collection_v2_768"
|
||||
assert result == "entity_user_domain_com_test_collection_v2"
|
||||
|
||||
def test_make_safe_collection_name_with_unicode(self):
|
||||
"""Test collection name creation with Unicode characters"""
|
||||
result = make_safe_collection_name(
|
||||
user="测试用户",
|
||||
collection="colección_española",
|
||||
dimension=512,
|
||||
collection="colección_española",
|
||||
prefix="doc"
|
||||
)
|
||||
assert result == "doc_default_colecci_n_espa_ola_512"
|
||||
assert result == "doc_default_colecci_n_espa_ola"
|
||||
|
||||
def test_make_safe_collection_name_with_spaces(self):
|
||||
"""Test collection name creation with spaces"""
|
||||
result = make_safe_collection_name(
|
||||
user="test user",
|
||||
collection="my test collection",
|
||||
dimension=256,
|
||||
prefix="entity"
|
||||
)
|
||||
assert result == "entity_test_user_my_test_collection_256"
|
||||
assert result == "entity_test_user_my_test_collection"
|
||||
|
||||
def test_make_safe_collection_name_with_multiple_consecutive_special_chars(self):
|
||||
"""Test collection name creation with multiple consecutive special characters"""
|
||||
result = make_safe_collection_name(
|
||||
user="user@@@domain!!!",
|
||||
collection="test---collection...v2",
|
||||
dimension=384,
|
||||
prefix="doc"
|
||||
prefix="doc"
|
||||
)
|
||||
assert result == "doc_user_domain_test_collection_v2_384"
|
||||
assert result == "doc_user_domain_test_collection_v2"
|
||||
|
||||
def test_make_safe_collection_name_with_leading_trailing_underscores(self):
|
||||
"""Test collection name creation with leading/trailing special characters"""
|
||||
result = make_safe_collection_name(
|
||||
user="__test_user__",
|
||||
collection="@@test_collection##",
|
||||
dimension=128,
|
||||
prefix="entity"
|
||||
)
|
||||
assert result == "entity_test_user_test_collection_128"
|
||||
assert result == "entity_test_user_test_collection"
|
||||
|
||||
def test_make_safe_collection_name_empty_user(self):
|
||||
"""Test collection name creation with empty user (should fallback to 'default')"""
|
||||
result = make_safe_collection_name(
|
||||
user="",
|
||||
collection="test_collection",
|
||||
dimension=384,
|
||||
prefix="doc"
|
||||
)
|
||||
assert result == "doc_default_test_collection_384"
|
||||
assert result == "doc_default_test_collection"
|
||||
|
||||
def test_make_safe_collection_name_empty_collection(self):
|
||||
"""Test collection name creation with empty collection (should fallback to 'default')"""
|
||||
result = make_safe_collection_name(
|
||||
user="test_user",
|
||||
collection="",
|
||||
dimension=384,
|
||||
prefix="doc"
|
||||
)
|
||||
assert result == "doc_test_user_default_384"
|
||||
assert result == "doc_test_user_default"
|
||||
|
||||
def test_make_safe_collection_name_both_empty(self):
|
||||
"""Test collection name creation with both user and collection empty"""
|
||||
result = make_safe_collection_name(
|
||||
user="",
|
||||
collection="",
|
||||
dimension=384,
|
||||
prefix="doc"
|
||||
)
|
||||
assert result == "doc_default_default_384"
|
||||
assert result == "doc_default_default"
|
||||
|
||||
def test_make_safe_collection_name_only_special_characters(self):
|
||||
"""Test collection name creation with only special characters (should fallback to 'default')"""
|
||||
result = make_safe_collection_name(
|
||||
user="@@@!!!",
|
||||
collection="---###",
|
||||
dimension=512,
|
||||
prefix="entity"
|
||||
)
|
||||
assert result == "entity_default_default_512"
|
||||
assert result == "entity_default_default"
|
||||
|
||||
def test_make_safe_collection_name_whitespace_only(self):
|
||||
"""Test collection name creation with whitespace-only strings"""
|
||||
result = make_safe_collection_name(
|
||||
user=" \n\t ",
|
||||
collection=" \r\n ",
|
||||
dimension=256,
|
||||
prefix="doc"
|
||||
)
|
||||
assert result == "doc_default_default_256"
|
||||
assert result == "doc_default_default"
|
||||
|
||||
def test_make_safe_collection_name_mixed_valid_invalid_chars(self):
|
||||
"""Test collection name creation with mixed valid and invalid characters"""
|
||||
result = make_safe_collection_name(
|
||||
user="user123@test",
|
||||
collection="coll_2023.v1",
|
||||
dimension=384,
|
||||
prefix="entity"
|
||||
)
|
||||
assert result == "entity_user123_test_coll_2023_v1_384"
|
||||
assert result == "entity_user123_test_coll_2023_v1"
|
||||
|
||||
def test_make_safe_collection_name_different_prefixes(self):
|
||||
"""Test collection name creation with different prefixes"""
|
||||
user = "test_user"
|
||||
collection = "test_collection"
|
||||
dimension = 384
|
||||
|
||||
doc_result = make_safe_collection_name(user, collection, dimension, "doc")
|
||||
entity_result = make_safe_collection_name(user, collection, dimension, "entity")
|
||||
custom_result = make_safe_collection_name(user, collection, dimension, "custom")
|
||||
|
||||
assert doc_result == "doc_test_user_test_collection_384"
|
||||
assert entity_result == "entity_test_user_test_collection_384"
|
||||
assert custom_result == "custom_test_user_test_collection_384"
|
||||
|
||||
doc_result = make_safe_collection_name(user, collection, "doc")
|
||||
entity_result = make_safe_collection_name(user, collection, "entity")
|
||||
custom_result = make_safe_collection_name(user, collection, "custom")
|
||||
|
||||
assert doc_result == "doc_test_user_test_collection"
|
||||
assert entity_result == "entity_test_user_test_collection"
|
||||
assert custom_result == "custom_test_user_test_collection"
|
||||
|
||||
def test_make_safe_collection_name_different_dimensions(self):
|
||||
"""Test collection name creation with different dimensions"""
|
||||
"""Test collection name creation - dimension handling no longer part of function"""
|
||||
user = "test_user"
|
||||
collection = "test_collection"
|
||||
prefix = "doc"
|
||||
|
||||
result_128 = make_safe_collection_name(user, collection, 128, prefix)
|
||||
result_384 = make_safe_collection_name(user, collection, 384, prefix)
|
||||
result_768 = make_safe_collection_name(user, collection, 768, prefix)
|
||||
|
||||
assert result_128 == "doc_test_user_test_collection_128"
|
||||
assert result_384 == "doc_test_user_test_collection_384"
|
||||
assert result_768 == "doc_test_user_test_collection_768"
|
||||
|
||||
# With new API, dimensions are handled separately, function always returns same result
|
||||
result = make_safe_collection_name(user, collection, prefix)
|
||||
|
||||
assert result == "doc_test_user_test_collection"
|
||||
|
||||
def test_make_safe_collection_name_long_names(self):
|
||||
"""Test collection name creation with very long user/collection names"""
|
||||
long_user = "a" * 100
|
||||
long_collection = "b" * 100
|
||||
|
||||
|
||||
result = make_safe_collection_name(
|
||||
user=long_user,
|
||||
collection=long_collection,
|
||||
dimension=384,
|
||||
prefix="doc"
|
||||
)
|
||||
|
||||
expected = f"doc_{long_user}_{long_collection}_384"
|
||||
|
||||
expected = f"doc_{long_user}_{long_collection}"
|
||||
assert result == expected
|
||||
assert len(result) > 200 # Verify it handles long names
|
||||
|
||||
|
|
@ -178,20 +161,18 @@ class TestMilvusCollectionNaming:
|
|||
result = make_safe_collection_name(
|
||||
user="user123",
|
||||
collection="collection456",
|
||||
dimension=384,
|
||||
prefix="doc"
|
||||
)
|
||||
assert result == "doc_user123_collection456_384"
|
||||
assert result == "doc_user123_collection456"
|
||||
|
||||
def test_make_safe_collection_name_case_sensitivity(self):
|
||||
"""Test that collection name creation preserves case"""
|
||||
result = make_safe_collection_name(
|
||||
user="TestUser",
|
||||
collection="TestCollection",
|
||||
dimension=384,
|
||||
prefix="Doc"
|
||||
)
|
||||
assert result == "Doc_TestUser_TestCollection_384"
|
||||
assert result == "Doc_TestUser_TestCollection"
|
||||
|
||||
def test_make_safe_collection_name_realistic_examples(self):
|
||||
"""Test collection name creation with realistic user/collection combinations"""
|
||||
|
|
@ -202,30 +183,27 @@ class TestMilvusCollectionNaming:
|
|||
("user_123", "test_collection", "user_123", "test_collection"),
|
||||
("αβγ-user", "测试集合", "user", "default"),
|
||||
]
|
||||
|
||||
|
||||
for user, collection, expected_user, expected_collection in test_cases:
|
||||
result = make_safe_collection_name(user, collection, 384, "doc")
|
||||
assert result == f"doc_{expected_user}_{expected_collection}_384"
|
||||
result = make_safe_collection_name(user, collection, "doc")
|
||||
assert result == f"doc_{expected_user}_{expected_collection}"
|
||||
|
||||
def test_make_safe_collection_name_matches_qdrant_pattern(self):
|
||||
"""Test that Milvus collection names follow similar pattern to Qdrant"""
|
||||
"""Test that Milvus collection names follow similar pattern to Qdrant (but without dimension in name)"""
|
||||
# Qdrant uses: "d_{user}_{collection}_{dimension}" and "t_{user}_{collection}_{dimension}"
|
||||
# Milvus should use: "{prefix}_{safe_user}_{safe_collection}_{dimension}"
|
||||
|
||||
# New Milvus API uses: "{prefix}_{safe_user}_{safe_collection}" (dimension handled separately)
|
||||
|
||||
user = "test.user@domain.com"
|
||||
collection = "test-collection.v2"
|
||||
dimension = 384
|
||||
|
||||
doc_result = make_safe_collection_name(user, collection, dimension, "doc")
|
||||
entity_result = make_safe_collection_name(user, collection, dimension, "entity")
|
||||
|
||||
# Should follow the pattern but with sanitized names
|
||||
assert doc_result == "doc_test_user_domain_com_test_collection_v2_384"
|
||||
assert entity_result == "entity_test_user_domain_com_test_collection_v2_384"
|
||||
|
||||
# Verify structure matches expected pattern (may have more underscores due to sanitization)
|
||||
# The important thing is that it follows prefix_user_collection_dimension structure
|
||||
|
||||
doc_result = make_safe_collection_name(user, collection, "doc")
|
||||
entity_result = make_safe_collection_name(user, collection, "entity")
|
||||
|
||||
# Should follow the pattern but with sanitized names and no dimension
|
||||
assert doc_result == "doc_test_user_domain_com_test_collection_v2"
|
||||
assert entity_result == "entity_test_user_domain_com_test_collection_v2"
|
||||
|
||||
# Verify structure matches expected pattern
|
||||
assert doc_result.startswith("doc_")
|
||||
assert doc_result.endswith("_384")
|
||||
assert entity_result.startswith("entity_")
|
||||
assert entity_result.endswith("_384")
|
||||
# Dimension is no longer part of the collection name
|
||||
|
|
@ -32,7 +32,7 @@ class TestMilvusUserCollectionIntegration:
|
|||
doc_vectors.insert(vector, "test document", user, collection)
|
||||
|
||||
expected_collection_name = make_safe_collection_name(
|
||||
user, collection, len(vector), "doc"
|
||||
user, collection, "doc"
|
||||
)
|
||||
|
||||
# Verify collection was created with correct name
|
||||
|
|
@ -58,7 +58,7 @@ class TestMilvusUserCollectionIntegration:
|
|||
entity_vectors.insert(vector, "test entity", user, collection)
|
||||
|
||||
expected_collection_name = make_safe_collection_name(
|
||||
user, collection, len(vector), "entity"
|
||||
user, collection, "entity"
|
||||
)
|
||||
|
||||
# Verify collection was created with correct name
|
||||
|
|
@ -89,7 +89,7 @@ class TestMilvusUserCollectionIntegration:
|
|||
result = doc_vectors.search(vector, user, collection, limit=5)
|
||||
|
||||
# Verify search was called with correct collection name
|
||||
expected_collection_name = make_safe_collection_name(user, collection, 3, "doc")
|
||||
expected_collection_name = make_safe_collection_name(user, collection, "doc")
|
||||
mock_client.search.assert_called_once()
|
||||
search_call = mock_client.search.call_args
|
||||
assert search_call[1]["collection_name"] == expected_collection_name
|
||||
|
|
@ -118,7 +118,7 @@ class TestMilvusUserCollectionIntegration:
|
|||
result = entity_vectors.search(vector, user, collection, limit=5)
|
||||
|
||||
# Verify search was called with correct collection name
|
||||
expected_collection_name = make_safe_collection_name(user, collection, 3, "entity")
|
||||
expected_collection_name = make_safe_collection_name(user, collection, "entity")
|
||||
mock_client.search.assert_called_once()
|
||||
search_call = mock_client.search.call_args
|
||||
assert search_call[1]["collection_name"] == expected_collection_name
|
||||
|
|
@ -142,9 +142,9 @@ class TestMilvusUserCollectionIntegration:
|
|||
|
||||
collection_names = set(doc_vectors.collections.values())
|
||||
expected_names = {
|
||||
"doc_user1_collection1_3",
|
||||
"doc_user2_collection2_3",
|
||||
"doc_user1_collection2_3"
|
||||
"doc_user1_collection1",
|
||||
"doc_user2_collection2",
|
||||
"doc_user1_collection2"
|
||||
}
|
||||
assert collection_names == expected_names
|
||||
|
||||
|
|
@ -167,9 +167,9 @@ class TestMilvusUserCollectionIntegration:
|
|||
|
||||
collection_names = set(entity_vectors.collections.values())
|
||||
expected_names = {
|
||||
"entity_user1_collection1_3",
|
||||
"entity_user2_collection2_3",
|
||||
"entity_user1_collection2_3"
|
||||
"entity_user1_collection1",
|
||||
"entity_user2_collection2",
|
||||
"entity_user1_collection2"
|
||||
}
|
||||
assert collection_names == expected_names
|
||||
|
||||
|
|
@ -194,10 +194,13 @@ class TestMilvusUserCollectionIntegration:
|
|||
|
||||
collection_names = set(doc_vectors.collections.values())
|
||||
expected_names = {
|
||||
"doc_test_user_test_collection_3", # 3D
|
||||
"doc_test_user_test_collection_4", # 4D
|
||||
"doc_test_user_test_collection_2" # 2D
|
||||
"doc_test_user_test_collection", # Same name for all dimensions
|
||||
"doc_test_user_test_collection", # now stored per dimension in key
|
||||
"doc_test_user_test_collection" # but collection name is the same
|
||||
}
|
||||
# Note: Now all dimensions use the same collection name, they are differentiated by the key
|
||||
assert len(collection_names) == 1 # Only one unique collection name
|
||||
assert "doc_test_user_test_collection" in collection_names
|
||||
assert collection_names == expected_names
|
||||
|
||||
@patch('trustgraph.direct.milvus_doc_embeddings.MilvusClient')
|
||||
|
|
@ -220,7 +223,7 @@ class TestMilvusUserCollectionIntegration:
|
|||
# Verify only one collection was created
|
||||
assert len(doc_vectors.collections) == 1
|
||||
|
||||
expected_collection_name = "doc_test_user_test_collection_3"
|
||||
expected_collection_name = "doc_test_user_test_collection"
|
||||
assert doc_vectors.collections[(3, user, collection)] == expected_collection_name
|
||||
|
||||
@patch('trustgraph.direct.milvus_doc_embeddings.MilvusClient')
|
||||
|
|
@ -233,10 +236,10 @@ class TestMilvusUserCollectionIntegration:
|
|||
|
||||
# Test various special character combinations
|
||||
test_cases = [
|
||||
("user@domain.com", "test-collection.v1", "doc_user_domain_com_test_collection_v1_3"),
|
||||
("user_123", "collection_456", "doc_user_123_collection_456_3"),
|
||||
("user with spaces", "collection with spaces", "doc_user_with_spaces_collection_with_spaces_3"),
|
||||
("user@@@test", "collection---test", "doc_user_test_collection_test_3"),
|
||||
("user@domain.com", "test-collection.v1", "doc_user_domain_com_test_collection_v1"),
|
||||
("user_123", "collection_456", "doc_user_123_collection_456"),
|
||||
("user with spaces", "collection with spaces", "doc_user_with_spaces_collection_with_spaces"),
|
||||
("user@@@test", "collection---test", "doc_user_test_collection_test"),
|
||||
]
|
||||
|
||||
vector = [0.1, 0.2, 0.3]
|
||||
|
|
@ -250,24 +253,24 @@ class TestMilvusUserCollectionIntegration:
|
|||
def test_collection_name_backward_compatibility(self):
|
||||
"""Test that new collection names don't conflict with old pattern"""
|
||||
# Old pattern was: {prefix}_{dimension}
|
||||
# New pattern is: {prefix}_{safe_user}_{safe_collection}_{dimension}
|
||||
|
||||
# New pattern is: {prefix}_{safe_user}_{safe_collection}
|
||||
|
||||
# The new pattern should never generate names that match the old pattern
|
||||
old_pattern_examples = ["doc_384", "entity_768", "doc_512"]
|
||||
|
||||
|
||||
test_cases = [
|
||||
("user", "collection", 384, "doc"),
|
||||
("test", "test", 768, "entity"),
|
||||
("a", "b", 512, "doc"),
|
||||
("user", "collection", "doc"),
|
||||
("test", "test", "entity"),
|
||||
("a", "b", "doc"),
|
||||
]
|
||||
|
||||
for user, collection, dimension, prefix in test_cases:
|
||||
new_name = make_safe_collection_name(user, collection, dimension, prefix)
|
||||
|
||||
# New names should have at least 4 underscores (prefix_user_collection_dimension)
|
||||
|
||||
for user, collection, prefix in test_cases:
|
||||
new_name = make_safe_collection_name(user, collection, prefix)
|
||||
|
||||
# New names should have at least 2 underscores (prefix_user_collection)
|
||||
# Old names had only 1 underscore (prefix_dimension)
|
||||
assert new_name.count('_') >= 3, f"New name {new_name} doesn't have enough underscores"
|
||||
|
||||
assert new_name.count('_') >= 2, f"New name {new_name} doesn't have enough underscores"
|
||||
|
||||
# New names should not match old pattern
|
||||
assert new_name not in old_pattern_examples, f"New name {new_name} conflicts with old pattern"
|
||||
|
||||
|
|
@ -286,23 +289,23 @@ class TestMilvusUserCollectionIntegration:
|
|||
dimension = 384
|
||||
|
||||
# Generate collection names
|
||||
doc_name1 = make_safe_collection_name(user1, collection1, dimension, "doc")
|
||||
doc_name2 = make_safe_collection_name(user2, collection2, dimension, "doc")
|
||||
|
||||
entity_name1 = make_safe_collection_name(user1, collection1, dimension, "entity")
|
||||
entity_name2 = make_safe_collection_name(user2, collection2, dimension, "entity")
|
||||
doc_name1 = make_safe_collection_name(user1, collection1, "doc")
|
||||
doc_name2 = make_safe_collection_name(user2, collection2, "doc")
|
||||
|
||||
entity_name1 = make_safe_collection_name(user1, collection1, "entity")
|
||||
entity_name2 = make_safe_collection_name(user2, collection2, "entity")
|
||||
|
||||
# Verify complete isolation
|
||||
assert doc_name1 != doc_name2, "Document collections should be isolated"
|
||||
assert entity_name1 != entity_name2, "Entity collections should be isolated"
|
||||
|
||||
# Verify names match expected pattern from Qdrant
|
||||
# Verify names match expected pattern from new API
|
||||
# Qdrant uses: d_{user}_{collection}_{dimension}, t_{user}_{collection}_{dimension}
|
||||
# Milvus uses: doc_{safe_user}_{safe_collection}_{dimension}, entity_{safe_user}_{safe_collection}_{dimension}
|
||||
assert doc_name1 == "doc_my_user_test_coll_1_384"
|
||||
assert doc_name2 == "doc_other_user_production_data_384"
|
||||
assert entity_name1 == "entity_my_user_test_coll_1_384"
|
||||
assert entity_name2 == "entity_other_user_production_data_384"
|
||||
# New Milvus API uses: doc_{safe_user}_{safe_collection}, entity_{safe_user}_{safe_collection}
|
||||
assert doc_name1 == "doc_my_user_test_coll_1"
|
||||
assert doc_name2 == "doc_other_user_production_data"
|
||||
assert entity_name1 == "entity_my_user_test_coll_1"
|
||||
assert entity_name2 == "entity_other_user_production_data"
|
||||
|
||||
# This test would have FAILED with the old implementation that used:
|
||||
# - doc_384 for all document embeddings (no user/collection differentiation)
|
||||
|
|
|
|||
|
|
@ -120,7 +120,7 @@ class TestPineconeDocEmbeddingsQueryProcessor:
|
|||
chunks = await processor.query_document_embeddings(message)
|
||||
|
||||
# Verify index was accessed correctly
|
||||
expected_index_name = "d-test_user-test_collection-3"
|
||||
expected_index_name = "d-test_user-test_collection"
|
||||
processor.pinecone.Index.assert_called_once_with(expected_index_name)
|
||||
|
||||
# Verify query parameters
|
||||
|
|
@ -239,7 +239,7 @@ class TestPineconeDocEmbeddingsQueryProcessor:
|
|||
|
||||
@pytest.mark.asyncio
|
||||
async def test_query_document_embeddings_different_vector_dimensions(self, processor):
|
||||
"""Test querying with vectors of different dimensions"""
|
||||
"""Test querying with vectors of different dimensions using same index"""
|
||||
message = MagicMock()
|
||||
message.vectors = [
|
||||
[0.1, 0.2], # 2D vector
|
||||
|
|
@ -248,37 +248,33 @@ class TestPineconeDocEmbeddingsQueryProcessor:
|
|||
message.limit = 5
|
||||
message.user = 'test_user'
|
||||
message.collection = 'test_collection'
|
||||
|
||||
mock_index_2d = MagicMock()
|
||||
mock_index_4d = MagicMock()
|
||||
|
||||
def mock_index_side_effect(name):
|
||||
if name.endswith("-2"):
|
||||
return mock_index_2d
|
||||
elif name.endswith("-4"):
|
||||
return mock_index_4d
|
||||
|
||||
processor.pinecone.Index.side_effect = mock_index_side_effect
|
||||
|
||||
# Mock results for different dimensions
|
||||
|
||||
# Mock single index that handles all dimensions
|
||||
mock_index = MagicMock()
|
||||
processor.pinecone.Index.return_value = mock_index
|
||||
|
||||
# Mock results for different vector queries
|
||||
mock_results_2d = MagicMock()
|
||||
mock_results_2d.matches = [MagicMock(metadata={'doc': 'Document from 2D index'})]
|
||||
mock_index_2d.query.return_value = mock_results_2d
|
||||
|
||||
mock_results_2d.matches = [MagicMock(metadata={'doc': 'Document from 2D query'})]
|
||||
|
||||
mock_results_4d = MagicMock()
|
||||
mock_results_4d.matches = [MagicMock(metadata={'doc': 'Document from 4D index'})]
|
||||
mock_index_4d.query.return_value = mock_results_4d
|
||||
|
||||
mock_results_4d.matches = [MagicMock(metadata={'doc': 'Document from 4D query'})]
|
||||
|
||||
mock_index.query.side_effect = [mock_results_2d, mock_results_4d]
|
||||
|
||||
chunks = await processor.query_document_embeddings(message)
|
||||
|
||||
# Verify different indexes were used
|
||||
|
||||
# Verify same index used for both vectors
|
||||
expected_index_name = "d-test_user-test_collection"
|
||||
assert processor.pinecone.Index.call_count == 2
|
||||
mock_index_2d.query.assert_called_once()
|
||||
mock_index_4d.query.assert_called_once()
|
||||
|
||||
processor.pinecone.Index.assert_called_with(expected_index_name)
|
||||
|
||||
# Verify both queries were made
|
||||
assert mock_index.query.call_count == 2
|
||||
|
||||
# Verify results from both dimensions
|
||||
assert 'Document from 2D index' in chunks
|
||||
assert 'Document from 4D index' in chunks
|
||||
assert 'Document from 2D query' in chunks
|
||||
assert 'Document from 4D query' in chunks
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_query_document_embeddings_empty_vectors_list(self, processor):
|
||||
|
|
|
|||
|
|
@ -148,7 +148,7 @@ class TestPineconeGraphEmbeddingsQueryProcessor:
|
|||
entities = await processor.query_graph_embeddings(message)
|
||||
|
||||
# Verify index was accessed correctly
|
||||
expected_index_name = "t-test_user-test_collection-3"
|
||||
expected_index_name = "t-test_user-test_collection"
|
||||
processor.pinecone.Index.assert_called_once_with(expected_index_name)
|
||||
|
||||
# Verify query parameters
|
||||
|
|
@ -265,7 +265,7 @@ class TestPineconeGraphEmbeddingsQueryProcessor:
|
|||
|
||||
@pytest.mark.asyncio
|
||||
async def test_query_graph_embeddings_different_vector_dimensions(self, processor):
|
||||
"""Test querying with vectors of different dimensions"""
|
||||
"""Test querying with vectors of different dimensions using same index"""
|
||||
message = MagicMock()
|
||||
message.vectors = [
|
||||
[0.1, 0.2], # 2D vector
|
||||
|
|
@ -274,34 +274,30 @@ class TestPineconeGraphEmbeddingsQueryProcessor:
|
|||
message.limit = 5
|
||||
message.user = 'test_user'
|
||||
message.collection = 'test_collection'
|
||||
|
||||
mock_index_2d = MagicMock()
|
||||
mock_index_4d = MagicMock()
|
||||
|
||||
def mock_index_side_effect(name):
|
||||
if name.endswith("-2"):
|
||||
return mock_index_2d
|
||||
elif name.endswith("-4"):
|
||||
return mock_index_4d
|
||||
|
||||
processor.pinecone.Index.side_effect = mock_index_side_effect
|
||||
|
||||
# Mock results for different dimensions
|
||||
|
||||
# Mock single index that handles all dimensions
|
||||
mock_index = MagicMock()
|
||||
processor.pinecone.Index.return_value = mock_index
|
||||
|
||||
# Mock results for different vector queries
|
||||
mock_results_2d = MagicMock()
|
||||
mock_results_2d.matches = [MagicMock(metadata={'entity': 'entity_2d'})]
|
||||
mock_index_2d.query.return_value = mock_results_2d
|
||||
|
||||
|
||||
mock_results_4d = MagicMock()
|
||||
mock_results_4d.matches = [MagicMock(metadata={'entity': 'entity_4d'})]
|
||||
mock_index_4d.query.return_value = mock_results_4d
|
||||
|
||||
|
||||
mock_index.query.side_effect = [mock_results_2d, mock_results_4d]
|
||||
|
||||
entities = await processor.query_graph_embeddings(message)
|
||||
|
||||
# Verify different indexes were used
|
||||
|
||||
# Verify same index used for both vectors
|
||||
expected_index_name = "t-test_user-test_collection"
|
||||
assert processor.pinecone.Index.call_count == 2
|
||||
mock_index_2d.query.assert_called_once()
|
||||
mock_index_4d.query.assert_called_once()
|
||||
|
||||
processor.pinecone.Index.assert_called_with(expected_index_name)
|
||||
|
||||
# Verify both queries were made
|
||||
assert mock_index.query.call_count == 2
|
||||
|
||||
# Verify results from both dimensions
|
||||
entity_values = [e.value for e in entities]
|
||||
assert 'entity_2d' in entity_values
|
||||
|
|
|
|||
|
|
@ -70,7 +70,7 @@ class TestCassandraQueryProcessor:
|
|||
assert result.is_uri is False
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@patch('trustgraph.query.triples.cassandra.service.TrustGraph')
|
||||
@patch('trustgraph.query.triples.cassandra.service.KnowledgeGraph')
|
||||
async def test_query_triples_spo_query(self, mock_trustgraph):
|
||||
"""Test querying triples with subject, predicate, and object specified"""
|
||||
from trustgraph.schema import TriplesQueryRequest, Value
|
||||
|
|
@ -98,16 +98,15 @@ class TestCassandraQueryProcessor:
|
|||
|
||||
result = await processor.query_triples(query)
|
||||
|
||||
# Verify TrustGraph was created with correct parameters
|
||||
# Verify KnowledgeGraph was created with correct parameters
|
||||
mock_trustgraph.assert_called_once_with(
|
||||
hosts=['localhost'],
|
||||
keyspace='test_user',
|
||||
table='test_collection'
|
||||
keyspace='test_user'
|
||||
)
|
||||
|
||||
# Verify get_spo was called with correct parameters
|
||||
mock_tg_instance.get_spo.assert_called_once_with(
|
||||
'test_subject', 'test_predicate', 'test_object', limit=100
|
||||
'test_collection', 'test_subject', 'test_predicate', 'test_object', limit=100
|
||||
)
|
||||
|
||||
# Verify result contains the queried triple
|
||||
|
|
@ -144,7 +143,7 @@ class TestCassandraQueryProcessor:
|
|||
assert processor.table is None
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@patch('trustgraph.query.triples.cassandra.service.TrustGraph')
|
||||
@patch('trustgraph.query.triples.cassandra.service.KnowledgeGraph')
|
||||
async def test_query_triples_sp_pattern(self, mock_trustgraph):
|
||||
"""Test SP query pattern (subject and predicate, no object)"""
|
||||
from trustgraph.schema import TriplesQueryRequest, Value
|
||||
|
|
@ -170,14 +169,14 @@ class TestCassandraQueryProcessor:
|
|||
|
||||
result = await processor.query_triples(query)
|
||||
|
||||
mock_tg_instance.get_sp.assert_called_once_with('test_subject', 'test_predicate', limit=50)
|
||||
mock_tg_instance.get_sp.assert_called_once_with('test_collection', 'test_subject', 'test_predicate', limit=50)
|
||||
assert len(result) == 1
|
||||
assert result[0].s.value == 'test_subject'
|
||||
assert result[0].p.value == 'test_predicate'
|
||||
assert result[0].o.value == 'result_object'
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@patch('trustgraph.query.triples.cassandra.service.TrustGraph')
|
||||
@patch('trustgraph.query.triples.cassandra.service.KnowledgeGraph')
|
||||
async def test_query_triples_s_pattern(self, mock_trustgraph):
|
||||
"""Test S query pattern (subject only)"""
|
||||
from trustgraph.schema import TriplesQueryRequest, Value
|
||||
|
|
@ -203,14 +202,14 @@ class TestCassandraQueryProcessor:
|
|||
|
||||
result = await processor.query_triples(query)
|
||||
|
||||
mock_tg_instance.get_s.assert_called_once_with('test_subject', limit=25)
|
||||
mock_tg_instance.get_s.assert_called_once_with('test_collection', 'test_subject', limit=25)
|
||||
assert len(result) == 1
|
||||
assert result[0].s.value == 'test_subject'
|
||||
assert result[0].p.value == 'result_predicate'
|
||||
assert result[0].o.value == 'result_object'
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@patch('trustgraph.query.triples.cassandra.service.TrustGraph')
|
||||
@patch('trustgraph.query.triples.cassandra.service.KnowledgeGraph')
|
||||
async def test_query_triples_p_pattern(self, mock_trustgraph):
|
||||
"""Test P query pattern (predicate only)"""
|
||||
from trustgraph.schema import TriplesQueryRequest, Value
|
||||
|
|
@ -236,14 +235,14 @@ class TestCassandraQueryProcessor:
|
|||
|
||||
result = await processor.query_triples(query)
|
||||
|
||||
mock_tg_instance.get_p.assert_called_once_with('test_predicate', limit=10)
|
||||
mock_tg_instance.get_p.assert_called_once_with('test_collection', 'test_predicate', limit=10)
|
||||
assert len(result) == 1
|
||||
assert result[0].s.value == 'result_subject'
|
||||
assert result[0].p.value == 'test_predicate'
|
||||
assert result[0].o.value == 'result_object'
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@patch('trustgraph.query.triples.cassandra.service.TrustGraph')
|
||||
@patch('trustgraph.query.triples.cassandra.service.KnowledgeGraph')
|
||||
async def test_query_triples_o_pattern(self, mock_trustgraph):
|
||||
"""Test O query pattern (object only)"""
|
||||
from trustgraph.schema import TriplesQueryRequest, Value
|
||||
|
|
@ -269,14 +268,14 @@ class TestCassandraQueryProcessor:
|
|||
|
||||
result = await processor.query_triples(query)
|
||||
|
||||
mock_tg_instance.get_o.assert_called_once_with('test_object', limit=75)
|
||||
mock_tg_instance.get_o.assert_called_once_with('test_collection', 'test_object', limit=75)
|
||||
assert len(result) == 1
|
||||
assert result[0].s.value == 'result_subject'
|
||||
assert result[0].p.value == 'result_predicate'
|
||||
assert result[0].o.value == 'test_object'
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@patch('trustgraph.query.triples.cassandra.service.TrustGraph')
|
||||
@patch('trustgraph.query.triples.cassandra.service.KnowledgeGraph')
|
||||
async def test_query_triples_get_all_pattern(self, mock_trustgraph):
|
||||
"""Test query pattern with no constraints (get all)"""
|
||||
from trustgraph.schema import TriplesQueryRequest
|
||||
|
|
@ -303,7 +302,7 @@ class TestCassandraQueryProcessor:
|
|||
|
||||
result = await processor.query_triples(query)
|
||||
|
||||
mock_tg_instance.get_all.assert_called_once_with(limit=1000)
|
||||
mock_tg_instance.get_all.assert_called_once_with('test_collection', limit=1000)
|
||||
assert len(result) == 1
|
||||
assert result[0].s.value == 'all_subject'
|
||||
assert result[0].p.value == 'all_predicate'
|
||||
|
|
@ -376,7 +375,7 @@ class TestCassandraQueryProcessor:
|
|||
mock_launch.assert_called_once_with(default_ident, '\nTriples query service. Input is a (s, p, o) triple, some values may be\nnull. Output is a list of triples.\n')
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@patch('trustgraph.query.triples.cassandra.service.TrustGraph')
|
||||
@patch('trustgraph.query.triples.cassandra.service.KnowledgeGraph')
|
||||
async def test_query_triples_with_authentication(self, mock_trustgraph):
|
||||
"""Test querying with username and password authentication"""
|
||||
from trustgraph.schema import TriplesQueryRequest, Value
|
||||
|
|
@ -402,17 +401,16 @@ class TestCassandraQueryProcessor:
|
|||
|
||||
await processor.query_triples(query)
|
||||
|
||||
# Verify TrustGraph was created with authentication
|
||||
# Verify KnowledgeGraph was created with authentication
|
||||
mock_trustgraph.assert_called_once_with(
|
||||
hosts=['cassandra'], # Updated default
|
||||
keyspace='test_user',
|
||||
table='test_collection',
|
||||
username='authuser',
|
||||
password='authpass'
|
||||
)
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@patch('trustgraph.query.triples.cassandra.service.TrustGraph')
|
||||
@patch('trustgraph.query.triples.cassandra.service.KnowledgeGraph')
|
||||
async def test_query_triples_table_reuse(self, mock_trustgraph):
|
||||
"""Test that TrustGraph is reused for same table"""
|
||||
from trustgraph.schema import TriplesQueryRequest, Value
|
||||
|
|
@ -441,7 +439,7 @@ class TestCassandraQueryProcessor:
|
|||
assert mock_trustgraph.call_count == 1 # Should not increase
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@patch('trustgraph.query.triples.cassandra.service.TrustGraph')
|
||||
@patch('trustgraph.query.triples.cassandra.service.KnowledgeGraph')
|
||||
async def test_query_triples_table_switching(self, mock_trustgraph):
|
||||
"""Test table switching creates new TrustGraph"""
|
||||
from trustgraph.schema import TriplesQueryRequest, Value
|
||||
|
|
@ -463,7 +461,7 @@ class TestCassandraQueryProcessor:
|
|||
)
|
||||
|
||||
await processor.query_triples(query1)
|
||||
assert processor.table == ('user1', 'collection1')
|
||||
assert processor.table == 'user1'
|
||||
|
||||
# Second query with different table
|
||||
query2 = TriplesQueryRequest(
|
||||
|
|
@ -476,13 +474,13 @@ class TestCassandraQueryProcessor:
|
|||
)
|
||||
|
||||
await processor.query_triples(query2)
|
||||
assert processor.table == ('user2', 'collection2')
|
||||
assert processor.table == 'user2'
|
||||
|
||||
# Verify TrustGraph was created twice
|
||||
assert mock_trustgraph.call_count == 2
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@patch('trustgraph.query.triples.cassandra.service.TrustGraph')
|
||||
@patch('trustgraph.query.triples.cassandra.service.KnowledgeGraph')
|
||||
async def test_query_triples_exception_handling(self, mock_trustgraph):
|
||||
"""Test exception handling during query execution"""
|
||||
from trustgraph.schema import TriplesQueryRequest, Value
|
||||
|
|
@ -506,7 +504,7 @@ class TestCassandraQueryProcessor:
|
|||
await processor.query_triples(query)
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@patch('trustgraph.query.triples.cassandra.service.TrustGraph')
|
||||
@patch('trustgraph.query.triples.cassandra.service.KnowledgeGraph')
|
||||
async def test_query_triples_multiple_results(self, mock_trustgraph):
|
||||
"""Test query returning multiple results"""
|
||||
from trustgraph.schema import TriplesQueryRequest, Value
|
||||
|
|
|
|||
|
|
@ -18,7 +18,7 @@ from trustgraph.storage.knowledge.store import Processor as KgStore
|
|||
class TestTriplesWriterConfiguration:
|
||||
"""Test Cassandra configuration in triples writer processor."""
|
||||
|
||||
@patch('trustgraph.direct.cassandra.TrustGraph')
|
||||
@patch('trustgraph.direct.cassandra_kg.KnowledgeGraph')
|
||||
def test_environment_variable_configuration(self, mock_trust_graph):
|
||||
"""Test processor picks up configuration from environment variables."""
|
||||
env_vars = {
|
||||
|
|
@ -34,7 +34,7 @@ class TestTriplesWriterConfiguration:
|
|||
assert processor.cassandra_username == 'env-user'
|
||||
assert processor.cassandra_password == 'env-pass'
|
||||
|
||||
@patch('trustgraph.direct.cassandra.TrustGraph')
|
||||
@patch('trustgraph.direct.cassandra_kg.KnowledgeGraph')
|
||||
def test_parameter_override_environment(self, mock_trust_graph):
|
||||
"""Test explicit parameters override environment variables."""
|
||||
env_vars = {
|
||||
|
|
@ -55,7 +55,7 @@ class TestTriplesWriterConfiguration:
|
|||
assert processor.cassandra_username == 'param-user'
|
||||
assert processor.cassandra_password == 'param-pass'
|
||||
|
||||
@patch('trustgraph.direct.cassandra.TrustGraph')
|
||||
@patch('trustgraph.direct.cassandra_kg.KnowledgeGraph')
|
||||
def test_no_backward_compatibility_graph_params(self, mock_trust_graph):
|
||||
"""Test that old graph_* parameter names are no longer supported."""
|
||||
processor = TriplesWriter(
|
||||
|
|
@ -70,7 +70,7 @@ class TestTriplesWriterConfiguration:
|
|||
assert processor.cassandra_username is None
|
||||
assert processor.cassandra_password is None
|
||||
|
||||
@patch('trustgraph.direct.cassandra.TrustGraph')
|
||||
@patch('trustgraph.direct.cassandra_kg.KnowledgeGraph')
|
||||
def test_default_configuration(self, mock_trust_graph):
|
||||
"""Test default configuration when no params or env vars provided."""
|
||||
with patch.dict(os.environ, {}, clear=True):
|
||||
|
|
@ -163,7 +163,7 @@ class TestObjectsWriterConfiguration:
|
|||
class TestTriplesQueryConfiguration:
|
||||
"""Test Cassandra configuration in triples query processor."""
|
||||
|
||||
@patch('trustgraph.direct.cassandra.TrustGraph')
|
||||
@patch('trustgraph.direct.cassandra_kg.KnowledgeGraph')
|
||||
def test_environment_variable_configuration(self, mock_trust_graph):
|
||||
"""Test processor picks up configuration from environment variables."""
|
||||
env_vars = {
|
||||
|
|
@ -179,7 +179,7 @@ class TestTriplesQueryConfiguration:
|
|||
assert processor.cassandra_username == 'query-env-user'
|
||||
assert processor.cassandra_password == 'query-env-pass'
|
||||
|
||||
@patch('trustgraph.direct.cassandra.TrustGraph')
|
||||
@patch('trustgraph.direct.cassandra_kg.KnowledgeGraph')
|
||||
def test_only_new_parameters_work(self, mock_trust_graph):
|
||||
"""Test that only new parameters work."""
|
||||
processor = TriplesQuery(
|
||||
|
|
@ -379,7 +379,7 @@ class TestCommandLineArgumentHandling:
|
|||
class TestConfigurationPriorityIntegration:
|
||||
"""Test complete configuration priority chain in processors."""
|
||||
|
||||
@patch('trustgraph.direct.cassandra.TrustGraph')
|
||||
@patch('trustgraph.direct.cassandra_kg.KnowledgeGraph')
|
||||
def test_complete_priority_chain(self, mock_trust_graph):
|
||||
"""Test CLI params > env vars > defaults priority in actual processor."""
|
||||
env_vars = {
|
||||
|
|
|
|||
|
|
@ -135,7 +135,7 @@ class TestPineconeDocEmbeddingsStorageProcessor:
|
|||
await processor.store_document_embeddings(message)
|
||||
|
||||
# Verify index name and operations
|
||||
expected_index_name = "d-test_user-test_collection-3"
|
||||
expected_index_name = "d-test_user-test_collection"
|
||||
processor.pinecone.Index.assert_called_with(expected_index_name)
|
||||
|
||||
# Verify upsert was called for each vector
|
||||
|
|
@ -203,7 +203,7 @@ class TestPineconeDocEmbeddingsStorageProcessor:
|
|||
await processor.store_document_embeddings(message)
|
||||
|
||||
# Verify index creation was called
|
||||
expected_index_name = "d-test_user-test_collection-3"
|
||||
expected_index_name = "d-test_user-test_collection"
|
||||
processor.pinecone.create_index.assert_called_once()
|
||||
create_call = processor.pinecone.create_index.call_args
|
||||
assert create_call[1]['name'] == expected_index_name
|
||||
|
|
@ -299,12 +299,11 @@ class TestPineconeDocEmbeddingsStorageProcessor:
|
|||
mock_index_3d = MagicMock()
|
||||
|
||||
def mock_index_side_effect(name):
|
||||
if name.endswith("-2"):
|
||||
return mock_index_2d
|
||||
elif name.endswith("-4"):
|
||||
return mock_index_4d
|
||||
elif name.endswith("-3"):
|
||||
return mock_index_3d
|
||||
# All dimensions now use the same index name pattern
|
||||
# Different dimensions will be handled within the same index
|
||||
if "test_user" in name and "test_collection" in name:
|
||||
return mock_index_2d # Just return one mock for all
|
||||
return MagicMock()
|
||||
|
||||
processor.pinecone.Index.side_effect = mock_index_side_effect
|
||||
processor.pinecone.has_index.return_value = True
|
||||
|
|
@ -312,11 +311,10 @@ class TestPineconeDocEmbeddingsStorageProcessor:
|
|||
with patch('uuid.uuid4', side_effect=['id1', 'id2', 'id3']):
|
||||
await processor.store_document_embeddings(message)
|
||||
|
||||
# Verify different indexes were used for different dimensions
|
||||
assert processor.pinecone.Index.call_count == 3
|
||||
mock_index_2d.upsert.assert_called_once()
|
||||
mock_index_4d.upsert.assert_called_once()
|
||||
mock_index_3d.upsert.assert_called_once()
|
||||
# Verify all vectors are now stored in the same index
|
||||
# (Pinecone can handle mixed dimensions in the same index)
|
||||
assert processor.pinecone.Index.call_count == 3 # Called once per vector
|
||||
mock_index_2d.upsert.call_count == 3 # All upserts go to same index
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_store_document_embeddings_empty_chunks_list(self, processor):
|
||||
|
|
|
|||
|
|
@ -106,7 +106,7 @@ class TestQdrantDocEmbeddingsStorage(IsolatedAsyncioTestCase):
|
|||
|
||||
# Assert
|
||||
# Verify collection existence was checked
|
||||
expected_collection = 'd_test_user_test_collection_3'
|
||||
expected_collection = 'd_test_user_test_collection'
|
||||
mock_qdrant_instance.collection_exists.assert_called_once_with(expected_collection)
|
||||
|
||||
# Verify upsert was called
|
||||
|
|
@ -309,7 +309,7 @@ class TestQdrantDocEmbeddingsStorage(IsolatedAsyncioTestCase):
|
|||
await processor.store_document_embeddings(mock_message)
|
||||
|
||||
# Assert
|
||||
expected_collection = 'd_new_user_new_collection_5'
|
||||
expected_collection = 'd_new_user_new_collection'
|
||||
|
||||
# Verify collection existence check and creation
|
||||
mock_qdrant_instance.collection_exists.assert_called_once_with(expected_collection)
|
||||
|
|
@ -408,7 +408,7 @@ class TestQdrantDocEmbeddingsStorage(IsolatedAsyncioTestCase):
|
|||
await processor.store_document_embeddings(mock_message2)
|
||||
|
||||
# Assert
|
||||
expected_collection = 'd_cache_user_cache_collection_3'
|
||||
expected_collection = 'd_cache_user_cache_collection'
|
||||
assert processor.last_collection == expected_collection
|
||||
|
||||
# Verify second call skipped existence check (cached)
|
||||
|
|
@ -455,17 +455,16 @@ class TestQdrantDocEmbeddingsStorage(IsolatedAsyncioTestCase):
|
|||
await processor.store_document_embeddings(mock_message)
|
||||
|
||||
# Assert
|
||||
# Should check existence of both collections
|
||||
expected_collections = ['d_dim_user_dim_collection_2', 'd_dim_user_dim_collection_3']
|
||||
actual_calls = [call.args[0] for call in mock_qdrant_instance.collection_exists.call_args_list]
|
||||
assert actual_calls == expected_collections
|
||||
|
||||
# Should upsert to both collections
|
||||
# Should check existence of the same collection (dimensions no longer create separate collections)
|
||||
expected_collection = 'd_dim_user_dim_collection'
|
||||
mock_qdrant_instance.collection_exists.assert_called_once_with(expected_collection)
|
||||
|
||||
# Should upsert to the same collection for both vectors
|
||||
assert mock_qdrant_instance.upsert.call_count == 2
|
||||
|
||||
|
||||
upsert_calls = mock_qdrant_instance.upsert.call_args_list
|
||||
assert upsert_calls[0][1]['collection_name'] == 'd_dim_user_dim_collection_2'
|
||||
assert upsert_calls[1][1]['collection_name'] == 'd_dim_user_dim_collection_3'
|
||||
assert upsert_calls[0][1]['collection_name'] == expected_collection
|
||||
assert upsert_calls[1][1]['collection_name'] == expected_collection
|
||||
|
||||
@patch('trustgraph.storage.doc_embeddings.qdrant.write.QdrantClient')
|
||||
@patch('trustgraph.base.DocumentEmbeddingsStoreService.__init__')
|
||||
|
|
|
|||
|
|
@ -135,7 +135,7 @@ class TestPineconeGraphEmbeddingsStorageProcessor:
|
|||
await processor.store_graph_embeddings(message)
|
||||
|
||||
# Verify index name and operations
|
||||
expected_index_name = "t-test_user-test_collection-3"
|
||||
expected_index_name = "t-test_user-test_collection"
|
||||
processor.pinecone.Index.assert_called_with(expected_index_name)
|
||||
|
||||
# Verify upsert was called for each vector
|
||||
|
|
@ -203,7 +203,7 @@ class TestPineconeGraphEmbeddingsStorageProcessor:
|
|||
await processor.store_graph_embeddings(message)
|
||||
|
||||
# Verify index creation was called
|
||||
expected_index_name = "t-test_user-test_collection-3"
|
||||
expected_index_name = "t-test_user-test_collection"
|
||||
processor.pinecone.create_index.assert_called_once()
|
||||
create_call = processor.pinecone.create_index.call_args
|
||||
assert create_call[1]['name'] == expected_index_name
|
||||
|
|
@ -256,12 +256,12 @@ class TestPineconeGraphEmbeddingsStorageProcessor:
|
|||
|
||||
@pytest.mark.asyncio
|
||||
async def test_store_graph_embeddings_different_vector_dimensions(self, processor):
|
||||
"""Test storing graph embeddings with different vector dimensions"""
|
||||
"""Test storing graph embeddings with different vector dimensions to same index"""
|
||||
message = MagicMock()
|
||||
message.metadata = MagicMock()
|
||||
message.metadata.user = 'test_user'
|
||||
message.metadata.collection = 'test_collection'
|
||||
|
||||
|
||||
entity = EntityEmbeddings(
|
||||
entity=Value(value="test_entity", is_uri=False),
|
||||
vectors=[
|
||||
|
|
@ -271,30 +271,21 @@ class TestPineconeGraphEmbeddingsStorageProcessor:
|
|||
]
|
||||
)
|
||||
message.entities = [entity]
|
||||
|
||||
mock_index_2d = MagicMock()
|
||||
mock_index_4d = MagicMock()
|
||||
mock_index_3d = MagicMock()
|
||||
|
||||
def mock_index_side_effect(name):
|
||||
if name.endswith("-2"):
|
||||
return mock_index_2d
|
||||
elif name.endswith("-4"):
|
||||
return mock_index_4d
|
||||
elif name.endswith("-3"):
|
||||
return mock_index_3d
|
||||
|
||||
processor.pinecone.Index.side_effect = mock_index_side_effect
|
||||
|
||||
# All vectors now use the same index (no dimension in name)
|
||||
mock_index = MagicMock()
|
||||
processor.pinecone.Index.return_value = mock_index
|
||||
processor.pinecone.has_index.return_value = True
|
||||
|
||||
|
||||
with patch('uuid.uuid4', side_effect=['id1', 'id2', 'id3']):
|
||||
await processor.store_graph_embeddings(message)
|
||||
|
||||
# Verify different indexes were used for different dimensions
|
||||
assert processor.pinecone.Index.call_count == 3
|
||||
mock_index_2d.upsert.assert_called_once()
|
||||
mock_index_4d.upsert.assert_called_once()
|
||||
mock_index_3d.upsert.assert_called_once()
|
||||
|
||||
# Verify same index was used for all dimensions
|
||||
expected_index_name = 't-test_user-test_collection'
|
||||
processor.pinecone.Index.assert_called_with(expected_index_name)
|
||||
|
||||
# Verify all vectors were upserted to the same index
|
||||
assert mock_index.upsert.call_count == 3
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_store_graph_embeddings_empty_entities_list(self, processor):
|
||||
|
|
|
|||
|
|
@ -69,7 +69,7 @@ class TestQdrantGraphEmbeddingsStorage(IsolatedAsyncioTestCase):
|
|||
collection_name = processor.get_collection(dim=512, user='test_user', collection='test_collection')
|
||||
|
||||
# Assert
|
||||
expected_name = 't_test_user_test_collection_512'
|
||||
expected_name = 't_test_user_test_collection'
|
||||
assert collection_name == expected_name
|
||||
assert processor.last_collection == expected_name
|
||||
|
||||
|
|
@ -118,7 +118,7 @@ class TestQdrantGraphEmbeddingsStorage(IsolatedAsyncioTestCase):
|
|||
|
||||
# Assert
|
||||
# Verify collection existence was checked
|
||||
expected_collection = 't_test_user_test_collection_3'
|
||||
expected_collection = 't_test_user_test_collection'
|
||||
mock_qdrant_instance.collection_exists.assert_called_once_with(expected_collection)
|
||||
|
||||
# Verify upsert was called
|
||||
|
|
@ -156,7 +156,7 @@ class TestQdrantGraphEmbeddingsStorage(IsolatedAsyncioTestCase):
|
|||
collection_name = processor.get_collection(dim=256, user='existing_user', collection='existing_collection')
|
||||
|
||||
# Assert
|
||||
expected_name = 't_existing_user_existing_collection_256'
|
||||
expected_name = 't_existing_user_existing_collection'
|
||||
assert collection_name == expected_name
|
||||
assert processor.last_collection == expected_name
|
||||
|
||||
|
|
@ -194,7 +194,7 @@ class TestQdrantGraphEmbeddingsStorage(IsolatedAsyncioTestCase):
|
|||
collection_name2 = processor.get_collection(dim=128, user='cache_user', collection='cache_collection')
|
||||
|
||||
# Assert
|
||||
expected_name = 't_cache_user_cache_collection_128'
|
||||
expected_name = 't_cache_user_cache_collection'
|
||||
assert collection_name1 == expected_name
|
||||
assert collection_name2 == expected_name
|
||||
|
||||
|
|
|
|||
|
|
@ -86,7 +86,7 @@ class TestCassandraStorageProcessor:
|
|||
assert processor.cassandra_username == 'new-user' # Only cassandra_* params work
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@patch('trustgraph.storage.triples.cassandra.write.TrustGraph')
|
||||
@patch('trustgraph.storage.triples.cassandra.write.KnowledgeGraph')
|
||||
async def test_table_switching_with_auth(self, mock_trustgraph):
|
||||
"""Test table switching logic when authentication is provided"""
|
||||
taskgroup_mock = MagicMock()
|
||||
|
|
@ -107,18 +107,17 @@ class TestCassandraStorageProcessor:
|
|||
|
||||
await processor.store_triples(mock_message)
|
||||
|
||||
# Verify TrustGraph was called with auth parameters
|
||||
# Verify KnowledgeGraph was called with auth parameters
|
||||
mock_trustgraph.assert_called_once_with(
|
||||
hosts=['cassandra'], # Updated default
|
||||
keyspace='user1',
|
||||
table='collection1',
|
||||
username='testuser',
|
||||
password='testpass'
|
||||
)
|
||||
assert processor.table == ('user1', 'collection1')
|
||||
assert processor.table == 'user1'
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@patch('trustgraph.storage.triples.cassandra.write.TrustGraph')
|
||||
@patch('trustgraph.storage.triples.cassandra.write.KnowledgeGraph')
|
||||
async def test_table_switching_without_auth(self, mock_trustgraph):
|
||||
"""Test table switching logic when no authentication is provided"""
|
||||
taskgroup_mock = MagicMock()
|
||||
|
|
@ -135,16 +134,15 @@ class TestCassandraStorageProcessor:
|
|||
|
||||
await processor.store_triples(mock_message)
|
||||
|
||||
# Verify TrustGraph was called without auth parameters
|
||||
# Verify KnowledgeGraph was called without auth parameters
|
||||
mock_trustgraph.assert_called_once_with(
|
||||
hosts=['cassandra'], # Updated default
|
||||
keyspace='user2',
|
||||
table='collection2'
|
||||
keyspace='user2'
|
||||
)
|
||||
assert processor.table == ('user2', 'collection2')
|
||||
assert processor.table == 'user2'
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@patch('trustgraph.storage.triples.cassandra.write.TrustGraph')
|
||||
@patch('trustgraph.storage.triples.cassandra.write.KnowledgeGraph')
|
||||
async def test_table_reuse_when_same(self, mock_trustgraph):
|
||||
"""Test that TrustGraph is not recreated when table hasn't changed"""
|
||||
taskgroup_mock = MagicMock()
|
||||
|
|
@ -168,7 +166,7 @@ class TestCassandraStorageProcessor:
|
|||
assert mock_trustgraph.call_count == 1 # Should not increase
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@patch('trustgraph.storage.triples.cassandra.write.TrustGraph')
|
||||
@patch('trustgraph.storage.triples.cassandra.write.KnowledgeGraph')
|
||||
async def test_triple_insertion(self, mock_trustgraph):
|
||||
"""Test that triples are properly inserted into Cassandra"""
|
||||
taskgroup_mock = MagicMock()
|
||||
|
|
@ -198,11 +196,11 @@ class TestCassandraStorageProcessor:
|
|||
|
||||
# Verify both triples were inserted
|
||||
assert mock_tg_instance.insert.call_count == 2
|
||||
mock_tg_instance.insert.assert_any_call('subject1', 'predicate1', 'object1')
|
||||
mock_tg_instance.insert.assert_any_call('subject2', 'predicate2', 'object2')
|
||||
mock_tg_instance.insert.assert_any_call('collection1', 'subject1', 'predicate1', 'object1')
|
||||
mock_tg_instance.insert.assert_any_call('collection1', 'subject2', 'predicate2', 'object2')
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@patch('trustgraph.storage.triples.cassandra.write.TrustGraph')
|
||||
@patch('trustgraph.storage.triples.cassandra.write.KnowledgeGraph')
|
||||
async def test_triple_insertion_with_empty_list(self, mock_trustgraph):
|
||||
"""Test behavior when message has no triples"""
|
||||
taskgroup_mock = MagicMock()
|
||||
|
|
@ -223,7 +221,7 @@ class TestCassandraStorageProcessor:
|
|||
mock_tg_instance.insert.assert_not_called()
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@patch('trustgraph.storage.triples.cassandra.write.TrustGraph')
|
||||
@patch('trustgraph.storage.triples.cassandra.write.KnowledgeGraph')
|
||||
@patch('trustgraph.storage.triples.cassandra.write.time.sleep')
|
||||
async def test_exception_handling_with_retry(self, mock_sleep, mock_trustgraph):
|
||||
"""Test exception handling during TrustGraph creation"""
|
||||
|
|
@ -328,7 +326,7 @@ class TestCassandraStorageProcessor:
|
|||
mock_launch.assert_called_once_with(default_ident, '\nGraph writer. Input is graph edge. Writes edges to Cassandra graph.\n')
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@patch('trustgraph.storage.triples.cassandra.write.TrustGraph')
|
||||
@patch('trustgraph.storage.triples.cassandra.write.KnowledgeGraph')
|
||||
async def test_store_triples_table_switching_between_different_tables(self, mock_trustgraph):
|
||||
"""Test table switching when different tables are used in sequence"""
|
||||
taskgroup_mock = MagicMock()
|
||||
|
|
@ -345,7 +343,7 @@ class TestCassandraStorageProcessor:
|
|||
mock_message1.triples = []
|
||||
|
||||
await processor.store_triples(mock_message1)
|
||||
assert processor.table == ('user1', 'collection1')
|
||||
assert processor.table == 'user1'
|
||||
assert processor.tg == mock_tg_instance1
|
||||
|
||||
# Second message with different table
|
||||
|
|
@ -355,14 +353,14 @@ class TestCassandraStorageProcessor:
|
|||
mock_message2.triples = []
|
||||
|
||||
await processor.store_triples(mock_message2)
|
||||
assert processor.table == ('user2', 'collection2')
|
||||
assert processor.table == 'user2'
|
||||
assert processor.tg == mock_tg_instance2
|
||||
|
||||
# Verify TrustGraph was created twice for different tables
|
||||
assert mock_trustgraph.call_count == 2
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@patch('trustgraph.storage.triples.cassandra.write.TrustGraph')
|
||||
@patch('trustgraph.storage.triples.cassandra.write.KnowledgeGraph')
|
||||
async def test_store_triples_with_special_characters_in_values(self, mock_trustgraph):
|
||||
"""Test storing triples with special characters and unicode"""
|
||||
taskgroup_mock = MagicMock()
|
||||
|
|
@ -386,13 +384,14 @@ class TestCassandraStorageProcessor:
|
|||
|
||||
# Verify the triple was inserted with special characters preserved
|
||||
mock_tg_instance.insert.assert_called_once_with(
|
||||
'test_collection',
|
||||
'subject with spaces & symbols',
|
||||
'predicate:with/colons',
|
||||
'object with "quotes" and unicode: ñáéíóú'
|
||||
)
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@patch('trustgraph.storage.triples.cassandra.write.TrustGraph')
|
||||
@patch('trustgraph.storage.triples.cassandra.write.KnowledgeGraph')
|
||||
async def test_store_triples_preserves_old_table_on_exception(self, mock_trustgraph):
|
||||
"""Test that table remains unchanged when TrustGraph creation fails"""
|
||||
taskgroup_mock = MagicMock()
|
||||
|
|
|
|||
|
|
@ -86,15 +86,17 @@ class TestFalkorDBStorageProcessor:
|
|||
mock_result = MagicMock()
|
||||
mock_result.nodes_created = 1
|
||||
mock_result.run_time_ms = 10
|
||||
|
||||
|
||||
processor.io.query.return_value = mock_result
|
||||
|
||||
processor.create_node(test_uri)
|
||||
|
||||
|
||||
processor.create_node(test_uri, 'test_user', 'test_collection')
|
||||
|
||||
processor.io.query.assert_called_once_with(
|
||||
"MERGE (n:Node {uri: $uri})",
|
||||
"MERGE (n:Node {uri: $uri, user: $user, collection: $collection})",
|
||||
params={
|
||||
"uri": test_uri,
|
||||
"user": 'test_user',
|
||||
"collection": 'test_collection',
|
||||
},
|
||||
)
|
||||
|
||||
|
|
@ -104,15 +106,17 @@ class TestFalkorDBStorageProcessor:
|
|||
mock_result = MagicMock()
|
||||
mock_result.nodes_created = 1
|
||||
mock_result.run_time_ms = 10
|
||||
|
||||
|
||||
processor.io.query.return_value = mock_result
|
||||
|
||||
processor.create_literal(test_value)
|
||||
|
||||
|
||||
processor.create_literal(test_value, 'test_user', 'test_collection')
|
||||
|
||||
processor.io.query.assert_called_once_with(
|
||||
"MERGE (n:Literal {value: $value})",
|
||||
"MERGE (n:Literal {value: $value, user: $user, collection: $collection})",
|
||||
params={
|
||||
"value": test_value,
|
||||
"user": 'test_user',
|
||||
"collection": 'test_collection',
|
||||
},
|
||||
)
|
||||
|
||||
|
|
@ -121,23 +125,25 @@ class TestFalkorDBStorageProcessor:
|
|||
src_uri = 'http://example.com/src'
|
||||
pred_uri = 'http://example.com/pred'
|
||||
dest_uri = 'http://example.com/dest'
|
||||
|
||||
|
||||
mock_result = MagicMock()
|
||||
mock_result.nodes_created = 0
|
||||
mock_result.run_time_ms = 5
|
||||
|
||||
|
||||
processor.io.query.return_value = mock_result
|
||||
|
||||
processor.relate_node(src_uri, pred_uri, dest_uri)
|
||||
|
||||
|
||||
processor.relate_node(src_uri, pred_uri, dest_uri, 'test_user', 'test_collection')
|
||||
|
||||
processor.io.query.assert_called_once_with(
|
||||
"MATCH (src:Node {uri: $src}) "
|
||||
"MATCH (dest:Node {uri: $dest}) "
|
||||
"MERGE (src)-[:Rel {uri: $uri}]->(dest)",
|
||||
"MATCH (src:Node {uri: $src, user: $user, collection: $collection}) "
|
||||
"MATCH (dest:Node {uri: $dest, user: $user, collection: $collection}) "
|
||||
"MERGE (src)-[:Rel {uri: $uri, user: $user, collection: $collection}]->(dest)",
|
||||
params={
|
||||
"src": src_uri,
|
||||
"dest": dest_uri,
|
||||
"uri": pred_uri,
|
||||
"user": 'test_user',
|
||||
"collection": 'test_collection',
|
||||
},
|
||||
)
|
||||
|
||||
|
|
@ -146,23 +152,25 @@ class TestFalkorDBStorageProcessor:
|
|||
src_uri = 'http://example.com/src'
|
||||
pred_uri = 'http://example.com/pred'
|
||||
literal_value = 'literal destination'
|
||||
|
||||
|
||||
mock_result = MagicMock()
|
||||
mock_result.nodes_created = 0
|
||||
mock_result.run_time_ms = 5
|
||||
|
||||
|
||||
processor.io.query.return_value = mock_result
|
||||
|
||||
processor.relate_literal(src_uri, pred_uri, literal_value)
|
||||
|
||||
|
||||
processor.relate_literal(src_uri, pred_uri, literal_value, 'test_user', 'test_collection')
|
||||
|
||||
processor.io.query.assert_called_once_with(
|
||||
"MATCH (src:Node {uri: $src}) "
|
||||
"MATCH (dest:Literal {value: $dest}) "
|
||||
"MERGE (src)-[:Rel {uri: $uri}]->(dest)",
|
||||
"MATCH (src:Node {uri: $src, user: $user, collection: $collection}) "
|
||||
"MATCH (dest:Literal {value: $dest, user: $user, collection: $collection}) "
|
||||
"MERGE (src)-[:Rel {uri: $uri, user: $user, collection: $collection}]->(dest)",
|
||||
params={
|
||||
"src": src_uri,
|
||||
"dest": literal_value,
|
||||
"uri": pred_uri,
|
||||
"user": 'test_user',
|
||||
"collection": 'test_collection',
|
||||
},
|
||||
)
|
||||
|
||||
|
|
@ -191,14 +199,16 @@ class TestFalkorDBStorageProcessor:
|
|||
# Verify queries were called in the correct order
|
||||
expected_calls = [
|
||||
# Create subject node
|
||||
(("MERGE (n:Node {uri: $uri})",), {"params": {"uri": "http://example.com/subject"}}),
|
||||
(("MERGE (n:Node {uri: $uri, user: $user, collection: $collection})",),
|
||||
{"params": {"uri": "http://example.com/subject", "user": "test_user", "collection": "test_collection"}}),
|
||||
# Create object node
|
||||
(("MERGE (n:Node {uri: $uri})",), {"params": {"uri": "http://example.com/object"}}),
|
||||
(("MERGE (n:Node {uri: $uri, user: $user, collection: $collection})",),
|
||||
{"params": {"uri": "http://example.com/object", "user": "test_user", "collection": "test_collection"}}),
|
||||
# Create relationship
|
||||
(("MATCH (src:Node {uri: $src}) "
|
||||
"MATCH (dest:Node {uri: $dest}) "
|
||||
"MERGE (src)-[:Rel {uri: $uri}]->(dest)",),
|
||||
{"params": {"src": "http://example.com/subject", "dest": "http://example.com/object", "uri": "http://example.com/predicate"}}),
|
||||
(("MATCH (src:Node {uri: $src, user: $user, collection: $collection}) "
|
||||
"MATCH (dest:Node {uri: $dest, user: $user, collection: $collection}) "
|
||||
"MERGE (src)-[:Rel {uri: $uri, user: $user, collection: $collection}]->(dest)",),
|
||||
{"params": {"src": "http://example.com/subject", "dest": "http://example.com/object", "uri": "http://example.com/predicate", "user": "test_user", "collection": "test_collection"}}),
|
||||
]
|
||||
|
||||
assert processor.io.query.call_count == 3
|
||||
|
|
@ -220,14 +230,16 @@ class TestFalkorDBStorageProcessor:
|
|||
# Verify queries were called in the correct order
|
||||
expected_calls = [
|
||||
# Create subject node
|
||||
(("MERGE (n:Node {uri: $uri})",), {"params": {"uri": "http://example.com/subject"}}),
|
||||
(("MERGE (n:Node {uri: $uri, user: $user, collection: $collection})",),
|
||||
{"params": {"uri": "http://example.com/subject", "user": "test_user", "collection": "test_collection"}}),
|
||||
# Create literal object
|
||||
(("MERGE (n:Literal {value: $value})",), {"params": {"value": "literal object"}}),
|
||||
(("MERGE (n:Literal {value: $value, user: $user, collection: $collection})",),
|
||||
{"params": {"value": "literal object", "user": "test_user", "collection": "test_collection"}}),
|
||||
# Create relationship
|
||||
(("MATCH (src:Node {uri: $src}) "
|
||||
"MATCH (dest:Literal {value: $dest}) "
|
||||
"MERGE (src)-[:Rel {uri: $uri}]->(dest)",),
|
||||
{"params": {"src": "http://example.com/subject", "dest": "literal object", "uri": "http://example.com/predicate"}}),
|
||||
(("MATCH (src:Node {uri: $src, user: $user, collection: $collection}) "
|
||||
"MATCH (dest:Literal {value: $dest, user: $user, collection: $collection}) "
|
||||
"MERGE (src)-[:Rel {uri: $uri, user: $user, collection: $collection}]->(dest)",),
|
||||
{"params": {"src": "http://example.com/subject", "dest": "literal object", "uri": "http://example.com/predicate", "user": "test_user", "collection": "test_collection"}}),
|
||||
]
|
||||
|
||||
assert processor.io.query.call_count == 3
|
||||
|
|
@ -408,12 +420,14 @@ class TestFalkorDBStorageProcessor:
|
|||
|
||||
processor.io.query.return_value = mock_result
|
||||
|
||||
processor.create_node(test_uri)
|
||||
processor.create_node(test_uri, 'test_user', 'test_collection')
|
||||
|
||||
processor.io.query.assert_called_once_with(
|
||||
"MERGE (n:Node {uri: $uri})",
|
||||
"MERGE (n:Node {uri: $uri, user: $user, collection: $collection})",
|
||||
params={
|
||||
"uri": test_uri,
|
||||
"user": 'test_user',
|
||||
"collection": 'test_collection',
|
||||
},
|
||||
)
|
||||
|
||||
|
|
@ -426,11 +440,13 @@ class TestFalkorDBStorageProcessor:
|
|||
|
||||
processor.io.query.return_value = mock_result
|
||||
|
||||
processor.create_literal(test_value)
|
||||
processor.create_literal(test_value, 'test_user', 'test_collection')
|
||||
|
||||
processor.io.query.assert_called_once_with(
|
||||
"MERGE (n:Literal {value: $value})",
|
||||
"MERGE (n:Literal {value: $value, user: $user, collection: $collection})",
|
||||
params={
|
||||
"value": test_value,
|
||||
"user": 'test_user',
|
||||
"collection": 'test_collection',
|
||||
},
|
||||
)
|
||||
Loading…
Add table
Add a link
Reference in a new issue