diff --git a/tests/unit/test_query/test_doc_embeddings_milvus_query.py b/tests/unit/test_query/test_doc_embeddings_milvus_query.py index 10ea54d2..ae615bff 100644 --- a/tests/unit/test_query/test_doc_embeddings_milvus_query.py +++ b/tests/unit/test_query/test_doc_embeddings_milvus_query.py @@ -85,8 +85,10 @@ class TestMilvusDocEmbeddingsQueryProcessor: result = await processor.query_document_embeddings(query) - # Verify search was called with correct parameters - processor.vecstore.search.assert_called_once_with([0.1, 0.2, 0.3], limit=5) + # Verify search was called with correct parameters including user/collection + processor.vecstore.search.assert_called_once_with( + [0.1, 0.2, 0.3], 'test_user', 'test_collection', limit=5 + ) # Verify results are document chunks assert len(result) == 3 diff --git a/tests/unit/test_storage/test_doc_embeddings_milvus_storage.py b/tests/unit/test_storage/test_doc_embeddings_milvus_storage.py index 5e6bcfb9..d957d711 100644 --- a/tests/unit/test_storage/test_doc_embeddings_milvus_storage.py +++ b/tests/unit/test_storage/test_doc_embeddings_milvus_storage.py @@ -91,37 +91,41 @@ class TestMilvusDocEmbeddingsStorageProcessor: await processor.store_document_embeddings(message) - # Verify insert was called for each vector + # Verify insert was called for each vector with user/collection parameters expected_calls = [ - ([0.1, 0.2, 0.3], "Test document content"), - ([0.4, 0.5, 0.6], "Test document content"), + ([0.1, 0.2, 0.3], "Test document content", 'test_user', 'test_collection'), + ([0.4, 0.5, 0.6], "Test document content", 'test_user', 'test_collection'), ] assert processor.vecstore.insert.call_count == 2 - for i, (expected_vec, expected_doc) in enumerate(expected_calls): + for i, (expected_vec, expected_doc, expected_user, expected_collection) in enumerate(expected_calls): actual_call = processor.vecstore.insert.call_args_list[i] assert actual_call[0][0] == expected_vec assert actual_call[0][1] == expected_doc + assert actual_call[0][2] == expected_user + assert actual_call[0][3] == expected_collection @pytest.mark.asyncio async def test_store_document_embeddings_multiple_chunks(self, processor, mock_message): """Test storing document embeddings for multiple chunks""" await processor.store_document_embeddings(mock_message) - # Verify insert was called for each vector of each chunk + # Verify insert was called for each vector of each chunk with user/collection parameters expected_calls = [ # Chunk 1 vectors - ([0.1, 0.2, 0.3], "This is the first document chunk"), - ([0.4, 0.5, 0.6], "This is the first document chunk"), + ([0.1, 0.2, 0.3], "This is the first document chunk", 'test_user', 'test_collection'), + ([0.4, 0.5, 0.6], "This is the first document chunk", 'test_user', 'test_collection'), # Chunk 2 vectors - ([0.7, 0.8, 0.9], "This is the second document chunk"), + ([0.7, 0.8, 0.9], "This is the second document chunk", 'test_user', 'test_collection'), ] assert processor.vecstore.insert.call_count == 3 - for i, (expected_vec, expected_doc) in enumerate(expected_calls): + for i, (expected_vec, expected_doc, expected_user, expected_collection) in enumerate(expected_calls): actual_call = processor.vecstore.insert.call_args_list[i] assert actual_call[0][0] == expected_vec assert actual_call[0][1] == expected_doc + assert actual_call[0][2] == expected_user + assert actual_call[0][3] == expected_collection @pytest.mark.asyncio async def test_store_document_embeddings_empty_chunk(self, processor): @@ -185,9 +189,9 @@ class TestMilvusDocEmbeddingsStorageProcessor: await processor.store_document_embeddings(message) - # Verify only valid chunk was inserted + # Verify only valid chunk was inserted with user/collection parameters processor.vecstore.insert.assert_called_once_with( - [0.1, 0.2, 0.3], "Valid document content" + [0.1, 0.2, 0.3], "Valid document content", 'test_user', 'test_collection' ) @pytest.mark.asyncio @@ -243,18 +247,20 @@ class TestMilvusDocEmbeddingsStorageProcessor: await processor.store_document_embeddings(message) - # Verify all vectors were inserted regardless of dimension + # Verify all vectors were inserted regardless of dimension with user/collection parameters expected_calls = [ - ([0.1, 0.2], "Document with mixed dimensions"), - ([0.3, 0.4, 0.5, 0.6], "Document with mixed dimensions"), - ([0.7, 0.8, 0.9], "Document with mixed dimensions"), + ([0.1, 0.2], "Document with mixed dimensions", 'test_user', 'test_collection'), + ([0.3, 0.4, 0.5, 0.6], "Document with mixed dimensions", 'test_user', 'test_collection'), + ([0.7, 0.8, 0.9], "Document with mixed dimensions", 'test_user', 'test_collection'), ] assert processor.vecstore.insert.call_count == 3 - for i, (expected_vec, expected_doc) in enumerate(expected_calls): + for i, (expected_vec, expected_doc, expected_user, expected_collection) in enumerate(expected_calls): actual_call = processor.vecstore.insert.call_args_list[i] assert actual_call[0][0] == expected_vec assert actual_call[0][1] == expected_doc + assert actual_call[0][2] == expected_user + assert actual_call[0][3] == expected_collection @pytest.mark.asyncio async def test_store_document_embeddings_unicode_content(self, processor): @@ -272,9 +278,9 @@ class TestMilvusDocEmbeddingsStorageProcessor: await processor.store_document_embeddings(message) - # Verify Unicode content was properly decoded and inserted + # Verify Unicode content was properly decoded and inserted with user/collection parameters processor.vecstore.insert.assert_called_once_with( - [0.1, 0.2, 0.3], "Document with Unicode: éñ中文🚀" + [0.1, 0.2, 0.3], "Document with Unicode: éñ中文🚀", 'test_user', 'test_collection' ) @pytest.mark.asyncio @@ -295,9 +301,9 @@ class TestMilvusDocEmbeddingsStorageProcessor: await processor.store_document_embeddings(message) - # Verify large content was inserted + # Verify large content was inserted with user/collection parameters processor.vecstore.insert.assert_called_once_with( - [0.1, 0.2, 0.3], large_content + [0.1, 0.2, 0.3], large_content, 'test_user', 'test_collection' ) @pytest.mark.asyncio @@ -316,9 +322,103 @@ class TestMilvusDocEmbeddingsStorageProcessor: await processor.store_document_embeddings(message) - # Verify whitespace content was inserted (not filtered out) + # Verify whitespace content was inserted (not filtered out) with user/collection parameters processor.vecstore.insert.assert_called_once_with( - [0.1, 0.2, 0.3], " \n\t " + [0.1, 0.2, 0.3], " \n\t ", 'test_user', 'test_collection' + ) + + @pytest.mark.asyncio + async def test_store_document_embeddings_different_user_collection_combinations(self, processor): + """Test storing document embeddings with different user/collection combinations""" + test_cases = [ + ('user1', 'collection1'), + ('user2', 'collection2'), + ('admin', 'production'), + ('test@domain.com', 'test-collection.v1'), + ] + + for user, collection in test_cases: + processor.vecstore.reset_mock() # Reset mock for each test case + + message = MagicMock() + message.metadata = MagicMock() + message.metadata.user = user + message.metadata.collection = collection + + chunk = ChunkEmbeddings( + chunk=b"Test content", + vectors=[[0.1, 0.2, 0.3]] + ) + message.chunks = [chunk] + + await processor.store_document_embeddings(message) + + # Verify insert was called with the correct user/collection + processor.vecstore.insert.assert_called_once_with( + [0.1, 0.2, 0.3], "Test content", user, collection + ) + + @pytest.mark.asyncio + async def test_store_document_embeddings_user_collection_parameter_isolation(self, processor): + """Test that different user/collection combinations are properly isolated""" + # Store embeddings for user1/collection1 + message1 = MagicMock() + message1.metadata = MagicMock() + message1.metadata.user = 'user1' + message1.metadata.collection = 'collection1' + chunk1 = ChunkEmbeddings( + chunk=b"User1 content", + vectors=[[0.1, 0.2, 0.3]] + ) + message1.chunks = [chunk1] + + # Store embeddings for user2/collection2 + message2 = MagicMock() + message2.metadata = MagicMock() + message2.metadata.user = 'user2' + message2.metadata.collection = 'collection2' + chunk2 = ChunkEmbeddings( + chunk=b"User2 content", + vectors=[[0.4, 0.5, 0.6]] + ) + message2.chunks = [chunk2] + + await processor.store_document_embeddings(message1) + await processor.store_document_embeddings(message2) + + # Verify both calls were made with correct parameters + expected_calls = [ + ([0.1, 0.2, 0.3], "User1 content", 'user1', 'collection1'), + ([0.4, 0.5, 0.6], "User2 content", 'user2', 'collection2'), + ] + + assert processor.vecstore.insert.call_count == 2 + for i, (expected_vec, expected_doc, expected_user, expected_collection) in enumerate(expected_calls): + actual_call = processor.vecstore.insert.call_args_list[i] + assert actual_call[0][0] == expected_vec + assert actual_call[0][1] == expected_doc + assert actual_call[0][2] == expected_user + assert actual_call[0][3] == expected_collection + + @pytest.mark.asyncio + async def test_store_document_embeddings_special_character_user_collection(self, processor): + """Test storing document embeddings with special characters in user/collection names""" + message = MagicMock() + message.metadata = MagicMock() + message.metadata.user = 'user@domain.com' # Email-like user + message.metadata.collection = 'test-collection.v1' # Collection with special chars + + chunk = ChunkEmbeddings( + chunk=b"Special chars test", + vectors=[[0.1, 0.2, 0.3]] + ) + message.chunks = [chunk] + + await processor.store_document_embeddings(message) + + # Verify the exact user/collection strings are passed (sanitization happens in DocVectors) + processor.vecstore.insert.assert_called_once_with( + [0.1, 0.2, 0.3], "Special chars test", 'user@domain.com', 'test-collection.v1' ) def test_add_args_method(self): diff --git a/tests/unit/test_storage/test_graph_embeddings_milvus_storage.py b/tests/unit/test_storage/test_graph_embeddings_milvus_storage.py index ae300574..a22173ab 100644 --- a/tests/unit/test_storage/test_graph_embeddings_milvus_storage.py +++ b/tests/unit/test_storage/test_graph_embeddings_milvus_storage.py @@ -91,37 +91,41 @@ class TestMilvusGraphEmbeddingsStorageProcessor: await processor.store_graph_embeddings(message) - # Verify insert was called for each vector + # Verify insert was called for each vector with user/collection parameters expected_calls = [ - ([0.1, 0.2, 0.3], 'http://example.com/entity'), - ([0.4, 0.5, 0.6], 'http://example.com/entity'), + ([0.1, 0.2, 0.3], 'http://example.com/entity', 'test_user', 'test_collection'), + ([0.4, 0.5, 0.6], 'http://example.com/entity', 'test_user', 'test_collection'), ] assert processor.vecstore.insert.call_count == 2 - for i, (expected_vec, expected_entity) in enumerate(expected_calls): + for i, (expected_vec, expected_entity, expected_user, expected_collection) in enumerate(expected_calls): actual_call = processor.vecstore.insert.call_args_list[i] assert actual_call[0][0] == expected_vec assert actual_call[0][1] == expected_entity + assert actual_call[0][2] == expected_user + assert actual_call[0][3] == expected_collection @pytest.mark.asyncio async def test_store_graph_embeddings_multiple_entities(self, processor, mock_message): """Test storing graph embeddings for multiple entities""" await processor.store_graph_embeddings(mock_message) - # Verify insert was called for each vector of each entity + # Verify insert was called for each vector of each entity with user/collection parameters expected_calls = [ # Entity 1 vectors - ([0.1, 0.2, 0.3], 'http://example.com/entity1'), - ([0.4, 0.5, 0.6], 'http://example.com/entity1'), + ([0.1, 0.2, 0.3], 'http://example.com/entity1', 'test_user', 'test_collection'), + ([0.4, 0.5, 0.6], 'http://example.com/entity1', 'test_user', 'test_collection'), # Entity 2 vectors - ([0.7, 0.8, 0.9], 'literal entity'), + ([0.7, 0.8, 0.9], 'literal entity', 'test_user', 'test_collection'), ] assert processor.vecstore.insert.call_count == 3 - for i, (expected_vec, expected_entity) in enumerate(expected_calls): + for i, (expected_vec, expected_entity, expected_user, expected_collection) in enumerate(expected_calls): actual_call = processor.vecstore.insert.call_args_list[i] assert actual_call[0][0] == expected_vec assert actual_call[0][1] == expected_entity + assert actual_call[0][2] == expected_user + assert actual_call[0][3] == expected_collection @pytest.mark.asyncio async def test_store_graph_embeddings_empty_entity_value(self, processor): @@ -185,9 +189,9 @@ class TestMilvusGraphEmbeddingsStorageProcessor: await processor.store_graph_embeddings(message) - # Verify only valid entity was inserted + # Verify only valid entity was inserted with user/collection parameters processor.vecstore.insert.assert_called_once_with( - [0.1, 0.2, 0.3], 'http://example.com/valid' + [0.1, 0.2, 0.3], 'http://example.com/valid', 'test_user', 'test_collection' ) @pytest.mark.asyncio