Test suite executed from CI pipeline (#433)

* Test strategy & test cases

* Unit tests

* Integration tests
This commit is contained in:
cybermaggedon 2025-07-14 14:57:44 +01:00 committed by GitHub
parent 9c7a070681
commit 2f7fddd206
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
101 changed files with 17811 additions and 1 deletions

View file

@ -0,0 +1,162 @@
"""
Shared fixtures for storage tests
"""
import pytest
from unittest.mock import AsyncMock, MagicMock
@pytest.fixture
def base_storage_config():
"""Base configuration for storage processors"""
return {
'taskgroup': AsyncMock(),
'id': 'test-storage-processor'
}
@pytest.fixture
def qdrant_storage_config(base_storage_config):
"""Configuration for Qdrant storage processors"""
return base_storage_config | {
'store_uri': 'http://localhost:6333',
'api_key': 'test-api-key'
}
@pytest.fixture
def mock_qdrant_client():
"""Mock Qdrant client"""
mock_client = MagicMock()
mock_client.collection_exists.return_value = True
mock_client.create_collection.return_value = None
mock_client.upsert.return_value = None
return mock_client
@pytest.fixture
def mock_uuid():
"""Mock UUID generation"""
mock_uuid = MagicMock()
mock_uuid.uuid4.return_value = MagicMock()
mock_uuid.uuid4.return_value.__str__ = MagicMock(return_value='test-uuid-123')
return mock_uuid
# Document embeddings fixtures
@pytest.fixture
def mock_document_embeddings_message():
"""Mock document embeddings message"""
mock_message = MagicMock()
mock_message.metadata.user = 'test_user'
mock_message.metadata.collection = 'test_collection'
mock_chunk = MagicMock()
mock_chunk.chunk.decode.return_value = 'test document chunk'
mock_chunk.vectors = [[0.1, 0.2, 0.3]]
mock_message.chunks = [mock_chunk]
return mock_message
@pytest.fixture
def mock_document_embeddings_multiple_chunks():
"""Mock document embeddings message with multiple chunks"""
mock_message = MagicMock()
mock_message.metadata.user = 'multi_user'
mock_message.metadata.collection = 'multi_collection'
mock_chunk1 = MagicMock()
mock_chunk1.chunk.decode.return_value = 'first document chunk'
mock_chunk1.vectors = [[0.1, 0.2]]
mock_chunk2 = MagicMock()
mock_chunk2.chunk.decode.return_value = 'second document chunk'
mock_chunk2.vectors = [[0.3, 0.4]]
mock_message.chunks = [mock_chunk1, mock_chunk2]
return mock_message
@pytest.fixture
def mock_document_embeddings_multiple_vectors():
"""Mock document embeddings message with multiple vectors per chunk"""
mock_message = MagicMock()
mock_message.metadata.user = 'vector_user'
mock_message.metadata.collection = 'vector_collection'
mock_chunk = MagicMock()
mock_chunk.chunk.decode.return_value = 'multi-vector document chunk'
mock_chunk.vectors = [
[0.1, 0.2, 0.3],
[0.4, 0.5, 0.6],
[0.7, 0.8, 0.9]
]
mock_message.chunks = [mock_chunk]
return mock_message
@pytest.fixture
def mock_document_embeddings_empty_chunk():
"""Mock document embeddings message with empty chunk"""
mock_message = MagicMock()
mock_message.metadata.user = 'empty_user'
mock_message.metadata.collection = 'empty_collection'
mock_chunk = MagicMock()
mock_chunk.chunk.decode.return_value = "" # Empty string
mock_chunk.vectors = [[0.1, 0.2]]
mock_message.chunks = [mock_chunk]
return mock_message
# Graph embeddings fixtures
@pytest.fixture
def mock_graph_embeddings_message():
"""Mock graph embeddings message"""
mock_message = MagicMock()
mock_message.metadata.user = 'test_user'
mock_message.metadata.collection = 'test_collection'
mock_entity = MagicMock()
mock_entity.entity.value = 'test_entity'
mock_entity.vectors = [[0.1, 0.2, 0.3]]
mock_message.entities = [mock_entity]
return mock_message
@pytest.fixture
def mock_graph_embeddings_multiple_entities():
"""Mock graph embeddings message with multiple entities"""
mock_message = MagicMock()
mock_message.metadata.user = 'multi_user'
mock_message.metadata.collection = 'multi_collection'
mock_entity1 = MagicMock()
mock_entity1.entity.value = 'entity_one'
mock_entity1.vectors = [[0.1, 0.2]]
mock_entity2 = MagicMock()
mock_entity2.entity.value = 'entity_two'
mock_entity2.vectors = [[0.3, 0.4]]
mock_message.entities = [mock_entity1, mock_entity2]
return mock_message
@pytest.fixture
def mock_graph_embeddings_empty_entity():
"""Mock graph embeddings message with empty entity"""
mock_message = MagicMock()
mock_message.metadata.user = 'empty_user'
mock_message.metadata.collection = 'empty_collection'
mock_entity = MagicMock()
mock_entity.entity.value = "" # Empty string
mock_entity.vectors = [[0.1, 0.2]]
mock_message.entities = [mock_entity]
return mock_message

View file

@ -0,0 +1,569 @@
"""
Unit tests for trustgraph.storage.doc_embeddings.qdrant.write
Testing document embeddings storage functionality
"""
import pytest
from unittest.mock import AsyncMock, MagicMock, patch
from unittest import IsolatedAsyncioTestCase
# Import the service under test
from trustgraph.storage.doc_embeddings.qdrant.write import Processor
class TestQdrantDocEmbeddingsStorage(IsolatedAsyncioTestCase):
"""Test Qdrant document embeddings storage functionality"""
@patch('trustgraph.storage.doc_embeddings.qdrant.write.QdrantClient')
@patch('trustgraph.base.DocumentEmbeddingsStoreService.__init__')
async def test_processor_initialization_basic(self, mock_base_init, mock_qdrant_client):
"""Test basic Qdrant processor initialization"""
# Arrange
mock_base_init.return_value = None
mock_qdrant_instance = MagicMock()
mock_qdrant_client.return_value = mock_qdrant_instance
config = {
'store_uri': 'http://localhost:6333',
'api_key': 'test-api-key',
'taskgroup': AsyncMock(),
'id': 'test-doc-qdrant-processor'
}
# Act
processor = Processor(**config)
# Assert
# Verify base class initialization was called
mock_base_init.assert_called_once()
# Verify QdrantClient was created with correct parameters
mock_qdrant_client.assert_called_once_with(url='http://localhost:6333', api_key='test-api-key')
# Verify processor attributes
assert hasattr(processor, 'qdrant')
assert processor.qdrant == mock_qdrant_instance
assert hasattr(processor, 'last_collection')
assert processor.last_collection is None
@patch('trustgraph.storage.doc_embeddings.qdrant.write.QdrantClient')
@patch('trustgraph.base.DocumentEmbeddingsStoreService.__init__')
async def test_processor_initialization_with_defaults(self, mock_base_init, mock_qdrant_client):
"""Test processor initialization with default values"""
# Arrange
mock_base_init.return_value = None
mock_qdrant_instance = MagicMock()
mock_qdrant_client.return_value = mock_qdrant_instance
config = {
'taskgroup': AsyncMock(),
'id': 'test-doc-qdrant-processor'
# No store_uri or api_key provided - should use defaults
}
# Act
processor = Processor(**config)
# Assert
# Verify QdrantClient was created with default URI and None API key
mock_qdrant_client.assert_called_once_with(url='http://localhost:6333', api_key=None)
@patch('trustgraph.storage.doc_embeddings.qdrant.write.QdrantClient')
@patch('trustgraph.storage.doc_embeddings.qdrant.write.uuid')
@patch('trustgraph.base.DocumentEmbeddingsStoreService.__init__')
async def test_store_document_embeddings_basic(self, mock_base_init, mock_uuid, mock_qdrant_client):
"""Test storing document embeddings with basic message"""
# Arrange
mock_base_init.return_value = None
mock_qdrant_instance = MagicMock()
mock_qdrant_instance.collection_exists.return_value = True # Collection already exists
mock_qdrant_client.return_value = mock_qdrant_instance
mock_uuid.uuid4.return_value = MagicMock()
mock_uuid.uuid4.return_value.__str__ = MagicMock(return_value='test-uuid-123')
config = {
'store_uri': 'http://localhost:6333',
'api_key': 'test-api-key',
'taskgroup': AsyncMock(),
'id': 'test-doc-qdrant-processor'
}
processor = Processor(**config)
# Create mock message with chunks and vectors
mock_message = MagicMock()
mock_message.metadata.user = 'test_user'
mock_message.metadata.collection = 'test_collection'
mock_chunk = MagicMock()
mock_chunk.chunk.decode.return_value = 'test document chunk'
mock_chunk.vectors = [[0.1, 0.2, 0.3]] # Single vector with 3 dimensions
mock_message.chunks = [mock_chunk]
# Act
await processor.store_document_embeddings(mock_message)
# Assert
# Verify collection existence was checked
expected_collection = 'd_test_user_test_collection_3'
mock_qdrant_instance.collection_exists.assert_called_once_with(expected_collection)
# Verify upsert was called
mock_qdrant_instance.upsert.assert_called_once()
# Verify upsert parameters
upsert_call_args = mock_qdrant_instance.upsert.call_args
assert upsert_call_args[1]['collection_name'] == expected_collection
assert len(upsert_call_args[1]['points']) == 1
point = upsert_call_args[1]['points'][0]
assert point.vector == [0.1, 0.2, 0.3]
assert point.payload['doc'] == 'test document chunk'
@patch('trustgraph.storage.doc_embeddings.qdrant.write.QdrantClient')
@patch('trustgraph.storage.doc_embeddings.qdrant.write.uuid')
@patch('trustgraph.base.DocumentEmbeddingsStoreService.__init__')
async def test_store_document_embeddings_multiple_chunks(self, mock_base_init, mock_uuid, mock_qdrant_client):
"""Test storing document embeddings with multiple chunks"""
# Arrange
mock_base_init.return_value = None
mock_qdrant_instance = MagicMock()
mock_qdrant_instance.collection_exists.return_value = True
mock_qdrant_client.return_value = mock_qdrant_instance
mock_uuid.uuid4.return_value = MagicMock()
mock_uuid.uuid4.return_value.__str__ = MagicMock(return_value='test-uuid')
config = {
'store_uri': 'http://localhost:6333',
'api_key': 'test-api-key',
'taskgroup': AsyncMock(),
'id': 'test-doc-qdrant-processor'
}
processor = Processor(**config)
# Create mock message with multiple chunks
mock_message = MagicMock()
mock_message.metadata.user = 'multi_user'
mock_message.metadata.collection = 'multi_collection'
mock_chunk1 = MagicMock()
mock_chunk1.chunk.decode.return_value = 'first document chunk'
mock_chunk1.vectors = [[0.1, 0.2]]
mock_chunk2 = MagicMock()
mock_chunk2.chunk.decode.return_value = 'second document chunk'
mock_chunk2.vectors = [[0.3, 0.4]]
mock_message.chunks = [mock_chunk1, mock_chunk2]
# Act
await processor.store_document_embeddings(mock_message)
# Assert
# Should be called twice (once per chunk)
assert mock_qdrant_instance.upsert.call_count == 2
# Verify both chunks were processed
upsert_calls = mock_qdrant_instance.upsert.call_args_list
# First chunk
first_call = upsert_calls[0]
first_point = first_call[1]['points'][0]
assert first_point.vector == [0.1, 0.2]
assert first_point.payload['doc'] == 'first document chunk'
# Second chunk
second_call = upsert_calls[1]
second_point = second_call[1]['points'][0]
assert second_point.vector == [0.3, 0.4]
assert second_point.payload['doc'] == 'second document chunk'
@patch('trustgraph.storage.doc_embeddings.qdrant.write.QdrantClient')
@patch('trustgraph.storage.doc_embeddings.qdrant.write.uuid')
@patch('trustgraph.base.DocumentEmbeddingsStoreService.__init__')
async def test_store_document_embeddings_multiple_vectors_per_chunk(self, mock_base_init, mock_uuid, mock_qdrant_client):
"""Test storing document embeddings with multiple vectors per chunk"""
# Arrange
mock_base_init.return_value = None
mock_qdrant_instance = MagicMock()
mock_qdrant_instance.collection_exists.return_value = True
mock_qdrant_client.return_value = mock_qdrant_instance
mock_uuid.uuid4.return_value = MagicMock()
mock_uuid.uuid4.return_value.__str__ = MagicMock(return_value='test-uuid')
config = {
'store_uri': 'http://localhost:6333',
'api_key': 'test-api-key',
'taskgroup': AsyncMock(),
'id': 'test-doc-qdrant-processor'
}
processor = Processor(**config)
# Create mock message with chunk having multiple vectors
mock_message = MagicMock()
mock_message.metadata.user = 'vector_user'
mock_message.metadata.collection = 'vector_collection'
mock_chunk = MagicMock()
mock_chunk.chunk.decode.return_value = 'multi-vector document chunk'
mock_chunk.vectors = [
[0.1, 0.2, 0.3],
[0.4, 0.5, 0.6],
[0.7, 0.8, 0.9]
]
mock_message.chunks = [mock_chunk]
# Act
await processor.store_document_embeddings(mock_message)
# Assert
# Should be called 3 times (once per vector)
assert mock_qdrant_instance.upsert.call_count == 3
# Verify all vectors were processed
upsert_calls = mock_qdrant_instance.upsert.call_args_list
expected_vectors = [
[0.1, 0.2, 0.3],
[0.4, 0.5, 0.6],
[0.7, 0.8, 0.9]
]
for i, call in enumerate(upsert_calls):
point = call[1]['points'][0]
assert point.vector == expected_vectors[i]
assert point.payload['doc'] == 'multi-vector document chunk'
@patch('trustgraph.storage.doc_embeddings.qdrant.write.QdrantClient')
@patch('trustgraph.base.DocumentEmbeddingsStoreService.__init__')
async def test_store_document_embeddings_empty_chunk(self, mock_base_init, mock_qdrant_client):
"""Test storing document embeddings skips empty chunks"""
# Arrange
mock_base_init.return_value = None
mock_qdrant_instance = MagicMock()
mock_qdrant_client.return_value = mock_qdrant_instance
config = {
'store_uri': 'http://localhost:6333',
'api_key': 'test-api-key',
'taskgroup': AsyncMock(),
'id': 'test-doc-qdrant-processor'
}
processor = Processor(**config)
# Create mock message with empty chunk
mock_message = MagicMock()
mock_message.metadata.user = 'empty_user'
mock_message.metadata.collection = 'empty_collection'
mock_chunk_empty = MagicMock()
mock_chunk_empty.chunk.decode.return_value = "" # Empty string
mock_chunk_empty.vectors = [[0.1, 0.2]]
mock_message.chunks = [mock_chunk_empty]
# Act
await processor.store_document_embeddings(mock_message)
# Assert
# Should not call upsert for empty chunks
mock_qdrant_instance.upsert.assert_not_called()
mock_qdrant_instance.collection_exists.assert_not_called()
@patch('trustgraph.storage.doc_embeddings.qdrant.write.QdrantClient')
@patch('trustgraph.base.DocumentEmbeddingsStoreService.__init__')
async def test_collection_creation_when_not_exists(self, mock_base_init, mock_qdrant_client):
"""Test collection creation when it doesn't exist"""
# Arrange
mock_base_init.return_value = None
mock_qdrant_instance = MagicMock()
mock_qdrant_instance.collection_exists.return_value = False # Collection doesn't exist
mock_qdrant_client.return_value = mock_qdrant_instance
config = {
'store_uri': 'http://localhost:6333',
'api_key': 'test-api-key',
'taskgroup': AsyncMock(),
'id': 'test-doc-qdrant-processor'
}
processor = Processor(**config)
# Create mock message
mock_message = MagicMock()
mock_message.metadata.user = 'new_user'
mock_message.metadata.collection = 'new_collection'
mock_chunk = MagicMock()
mock_chunk.chunk.decode.return_value = 'test chunk'
mock_chunk.vectors = [[0.1, 0.2, 0.3, 0.4, 0.5]] # 5 dimensions
mock_message.chunks = [mock_chunk]
# Act
await processor.store_document_embeddings(mock_message)
# Assert
expected_collection = 'd_new_user_new_collection_5'
# Verify collection existence check and creation
mock_qdrant_instance.collection_exists.assert_called_once_with(expected_collection)
mock_qdrant_instance.create_collection.assert_called_once()
# Verify create_collection was called with correct parameters
create_call_args = mock_qdrant_instance.create_collection.call_args
assert create_call_args[1]['collection_name'] == expected_collection
# Verify upsert was still called after collection creation
mock_qdrant_instance.upsert.assert_called_once()
@patch('trustgraph.storage.doc_embeddings.qdrant.write.QdrantClient')
@patch('trustgraph.base.DocumentEmbeddingsStoreService.__init__')
async def test_collection_creation_exception(self, mock_base_init, mock_qdrant_client):
"""Test collection creation handles exceptions"""
# Arrange
mock_base_init.return_value = None
mock_qdrant_instance = MagicMock()
mock_qdrant_instance.collection_exists.return_value = False
mock_qdrant_instance.create_collection.side_effect = Exception("Qdrant connection failed")
mock_qdrant_client.return_value = mock_qdrant_instance
config = {
'store_uri': 'http://localhost:6333',
'api_key': 'test-api-key',
'taskgroup': AsyncMock(),
'id': 'test-doc-qdrant-processor'
}
processor = Processor(**config)
# Create mock message
mock_message = MagicMock()
mock_message.metadata.user = 'error_user'
mock_message.metadata.collection = 'error_collection'
mock_chunk = MagicMock()
mock_chunk.chunk.decode.return_value = 'test chunk'
mock_chunk.vectors = [[0.1, 0.2]]
mock_message.chunks = [mock_chunk]
# Act & Assert
with pytest.raises(Exception, match="Qdrant connection failed"):
await processor.store_document_embeddings(mock_message)
@patch('trustgraph.storage.doc_embeddings.qdrant.write.QdrantClient')
@patch('trustgraph.base.DocumentEmbeddingsStoreService.__init__')
async def test_collection_caching_behavior(self, mock_base_init, mock_qdrant_client):
"""Test collection caching with last_collection"""
# Arrange
mock_base_init.return_value = None
mock_qdrant_instance = MagicMock()
mock_qdrant_instance.collection_exists.return_value = True
mock_qdrant_client.return_value = mock_qdrant_instance
config = {
'store_uri': 'http://localhost:6333',
'api_key': 'test-api-key',
'taskgroup': AsyncMock(),
'id': 'test-doc-qdrant-processor'
}
processor = Processor(**config)
# Create first mock message
mock_message1 = MagicMock()
mock_message1.metadata.user = 'cache_user'
mock_message1.metadata.collection = 'cache_collection'
mock_chunk1 = MagicMock()
mock_chunk1.chunk.decode.return_value = 'first chunk'
mock_chunk1.vectors = [[0.1, 0.2, 0.3]]
mock_message1.chunks = [mock_chunk1]
# First call
await processor.store_document_embeddings(mock_message1)
# Reset mock to track second call
mock_qdrant_instance.reset_mock()
# Create second mock message with same dimensions
mock_message2 = MagicMock()
mock_message2.metadata.user = 'cache_user'
mock_message2.metadata.collection = 'cache_collection'
mock_chunk2 = MagicMock()
mock_chunk2.chunk.decode.return_value = 'second chunk'
mock_chunk2.vectors = [[0.4, 0.5, 0.6]] # Same dimension (3)
mock_message2.chunks = [mock_chunk2]
# Act - Second call with same collection
await processor.store_document_embeddings(mock_message2)
# Assert
expected_collection = 'd_cache_user_cache_collection_3'
assert processor.last_collection == expected_collection
# Verify second call skipped existence check (cached)
mock_qdrant_instance.collection_exists.assert_not_called()
mock_qdrant_instance.create_collection.assert_not_called()
# But upsert should still be called
mock_qdrant_instance.upsert.assert_called_once()
@patch('trustgraph.storage.doc_embeddings.qdrant.write.QdrantClient')
@patch('trustgraph.base.DocumentEmbeddingsStoreService.__init__')
async def test_different_dimensions_different_collections(self, mock_base_init, mock_qdrant_client):
"""Test that different vector dimensions create different collections"""
# Arrange
mock_base_init.return_value = None
mock_qdrant_instance = MagicMock()
mock_qdrant_instance.collection_exists.return_value = True
mock_qdrant_client.return_value = mock_qdrant_instance
config = {
'store_uri': 'http://localhost:6333',
'api_key': 'test-api-key',
'taskgroup': AsyncMock(),
'id': 'test-doc-qdrant-processor'
}
processor = Processor(**config)
# Create mock message with different dimension vectors
mock_message = MagicMock()
mock_message.metadata.user = 'dim_user'
mock_message.metadata.collection = 'dim_collection'
mock_chunk = MagicMock()
mock_chunk.chunk.decode.return_value = 'dimension test chunk'
mock_chunk.vectors = [
[0.1, 0.2], # 2 dimensions
[0.3, 0.4, 0.5] # 3 dimensions
]
mock_message.chunks = [mock_chunk]
# Act
await processor.store_document_embeddings(mock_message)
# Assert
# Should check existence of both collections
expected_collections = ['d_dim_user_dim_collection_2', 'd_dim_user_dim_collection_3']
actual_calls = [call.args[0] for call in mock_qdrant_instance.collection_exists.call_args_list]
assert actual_calls == expected_collections
# Should upsert to both collections
assert mock_qdrant_instance.upsert.call_count == 2
upsert_calls = mock_qdrant_instance.upsert.call_args_list
assert upsert_calls[0][1]['collection_name'] == 'd_dim_user_dim_collection_2'
assert upsert_calls[1][1]['collection_name'] == 'd_dim_user_dim_collection_3'
@patch('trustgraph.storage.doc_embeddings.qdrant.write.QdrantClient')
@patch('trustgraph.base.DocumentEmbeddingsStoreService.__init__')
async def test_add_args_calls_parent(self, mock_base_init, mock_qdrant_client):
"""Test that add_args() calls parent add_args method"""
# Arrange
mock_base_init.return_value = None
mock_qdrant_client.return_value = MagicMock()
mock_parser = MagicMock()
# Act
with patch('trustgraph.base.DocumentEmbeddingsStoreService.add_args') as mock_parent_add_args:
Processor.add_args(mock_parser)
# Assert
mock_parent_add_args.assert_called_once_with(mock_parser)
# Verify processor-specific arguments were added
assert mock_parser.add_argument.call_count >= 2 # At least store-uri and api-key
@patch('trustgraph.storage.doc_embeddings.qdrant.write.QdrantClient')
@patch('trustgraph.storage.doc_embeddings.qdrant.write.uuid')
@patch('trustgraph.base.DocumentEmbeddingsStoreService.__init__')
async def test_utf8_decoding_handling(self, mock_base_init, mock_uuid, mock_qdrant_client):
"""Test proper UTF-8 decoding of chunk text"""
# Arrange
mock_base_init.return_value = None
mock_qdrant_instance = MagicMock()
mock_qdrant_instance.collection_exists.return_value = True
mock_qdrant_client.return_value = mock_qdrant_instance
mock_uuid.uuid4.return_value = MagicMock()
mock_uuid.uuid4.return_value.__str__ = MagicMock(return_value='test-uuid')
config = {
'store_uri': 'http://localhost:6333',
'api_key': 'test-api-key',
'taskgroup': AsyncMock(),
'id': 'test-doc-qdrant-processor'
}
processor = Processor(**config)
# Create mock message with UTF-8 encoded text
mock_message = MagicMock()
mock_message.metadata.user = 'utf8_user'
mock_message.metadata.collection = 'utf8_collection'
mock_chunk = MagicMock()
mock_chunk.chunk.decode.return_value = 'UTF-8 text with special chars: café, naïve, résumé'
mock_chunk.vectors = [[0.1, 0.2]]
mock_message.chunks = [mock_chunk]
# Act
await processor.store_document_embeddings(mock_message)
# Assert
# Verify chunk.decode was called with 'utf-8'
mock_chunk.chunk.decode.assert_called_with('utf-8')
# Verify the decoded text was stored in payload
upsert_call_args = mock_qdrant_instance.upsert.call_args
point = upsert_call_args[1]['points'][0]
assert point.payload['doc'] == 'UTF-8 text with special chars: café, naïve, résumé'
@patch('trustgraph.storage.doc_embeddings.qdrant.write.QdrantClient')
@patch('trustgraph.base.DocumentEmbeddingsStoreService.__init__')
async def test_chunk_decode_exception_handling(self, mock_base_init, mock_qdrant_client):
"""Test handling of chunk decode exceptions"""
# Arrange
mock_base_init.return_value = None
mock_qdrant_instance = MagicMock()
mock_qdrant_client.return_value = mock_qdrant_instance
config = {
'store_uri': 'http://localhost:6333',
'api_key': 'test-api-key',
'taskgroup': AsyncMock(),
'id': 'test-doc-qdrant-processor'
}
processor = Processor(**config)
# Create mock message with decode error
mock_message = MagicMock()
mock_message.metadata.user = 'decode_user'
mock_message.metadata.collection = 'decode_collection'
mock_chunk = MagicMock()
mock_chunk.chunk.decode.side_effect = UnicodeDecodeError('utf-8', b'', 0, 1, 'invalid start byte')
mock_chunk.vectors = [[0.1, 0.2]]
mock_message.chunks = [mock_chunk]
# Act & Assert
with pytest.raises(UnicodeDecodeError):
await processor.store_document_embeddings(mock_message)
if __name__ == '__main__':
pytest.main([__file__])

View file

@ -0,0 +1,428 @@
"""
Unit tests for trustgraph.storage.graph_embeddings.qdrant.write
Starting small with a single test to verify basic functionality
"""
import pytest
from unittest.mock import AsyncMock, MagicMock, patch
from unittest import IsolatedAsyncioTestCase
# Import the service under test
from trustgraph.storage.graph_embeddings.qdrant.write import Processor
class TestQdrantGraphEmbeddingsStorage(IsolatedAsyncioTestCase):
"""Test Qdrant graph embeddings storage functionality"""
@patch('trustgraph.storage.graph_embeddings.qdrant.write.QdrantClient')
@patch('trustgraph.base.GraphEmbeddingsStoreService.__init__')
async def test_processor_initialization_basic(self, mock_base_init, mock_qdrant_client):
"""Test basic Qdrant processor initialization"""
# Arrange
mock_base_init.return_value = None
mock_qdrant_instance = MagicMock()
mock_qdrant_client.return_value = mock_qdrant_instance
config = {
'store_uri': 'http://localhost:6333',
'api_key': 'test-api-key',
'taskgroup': AsyncMock(),
'id': 'test-qdrant-processor'
}
# Act
processor = Processor(**config)
# Assert
# Verify base class initialization was called
mock_base_init.assert_called_once()
# Verify QdrantClient was created with correct parameters
mock_qdrant_client.assert_called_once_with(url='http://localhost:6333', api_key='test-api-key')
# Verify processor attributes
assert hasattr(processor, 'qdrant')
assert processor.qdrant == mock_qdrant_instance
assert hasattr(processor, 'last_collection')
assert processor.last_collection is None
@patch('trustgraph.storage.graph_embeddings.qdrant.write.QdrantClient')
@patch('trustgraph.base.GraphEmbeddingsStoreService.__init__')
async def test_get_collection_creates_new_collection(self, mock_base_init, mock_qdrant_client):
"""Test get_collection creates a new collection when it doesn't exist"""
# Arrange
mock_base_init.return_value = None
mock_qdrant_instance = MagicMock()
mock_qdrant_instance.collection_exists.return_value = False
mock_qdrant_client.return_value = mock_qdrant_instance
config = {
'store_uri': 'http://localhost:6333',
'api_key': 'test-api-key',
'taskgroup': AsyncMock(),
'id': 'test-qdrant-processor'
}
processor = Processor(**config)
# Act
collection_name = processor.get_collection(dim=512, user='test_user', collection='test_collection')
# Assert
expected_name = 't_test_user_test_collection_512'
assert collection_name == expected_name
assert processor.last_collection == expected_name
# Verify collection existence check and creation
mock_qdrant_instance.collection_exists.assert_called_once_with(expected_name)
mock_qdrant_instance.create_collection.assert_called_once()
# Verify create_collection was called with correct parameters
create_call_args = mock_qdrant_instance.create_collection.call_args
assert create_call_args[1]['collection_name'] == expected_name
@patch('trustgraph.storage.graph_embeddings.qdrant.write.QdrantClient')
@patch('trustgraph.storage.graph_embeddings.qdrant.write.uuid')
@patch('trustgraph.base.GraphEmbeddingsStoreService.__init__')
async def test_store_graph_embeddings_basic(self, mock_base_init, mock_uuid, mock_qdrant_client):
"""Test storing graph embeddings with basic message"""
# Arrange
mock_base_init.return_value = None
mock_qdrant_instance = MagicMock()
mock_qdrant_instance.collection_exists.return_value = True # Collection already exists
mock_qdrant_client.return_value = mock_qdrant_instance
mock_uuid.uuid4.return_value.return_value = 'test-uuid-123'
config = {
'store_uri': 'http://localhost:6333',
'api_key': 'test-api-key',
'taskgroup': AsyncMock(),
'id': 'test-qdrant-processor'
}
processor = Processor(**config)
# Create mock message with entities and vectors
mock_message = MagicMock()
mock_message.metadata.user = 'test_user'
mock_message.metadata.collection = 'test_collection'
mock_entity = MagicMock()
mock_entity.entity.value = 'test_entity'
mock_entity.vectors = [[0.1, 0.2, 0.3]] # Single vector with 3 dimensions
mock_message.entities = [mock_entity]
# Act
await processor.store_graph_embeddings(mock_message)
# Assert
# Verify collection existence was checked
expected_collection = 't_test_user_test_collection_3'
mock_qdrant_instance.collection_exists.assert_called_once_with(expected_collection)
# Verify upsert was called
mock_qdrant_instance.upsert.assert_called_once()
# Verify upsert parameters
upsert_call_args = mock_qdrant_instance.upsert.call_args
assert upsert_call_args[1]['collection_name'] == expected_collection
assert len(upsert_call_args[1]['points']) == 1
point = upsert_call_args[1]['points'][0]
assert point.vector == [0.1, 0.2, 0.3]
assert point.payload['entity'] == 'test_entity'
@patch('trustgraph.storage.graph_embeddings.qdrant.write.QdrantClient')
@patch('trustgraph.base.GraphEmbeddingsStoreService.__init__')
async def test_get_collection_uses_existing_collection(self, mock_base_init, mock_qdrant_client):
"""Test get_collection uses existing collection without creating new one"""
# Arrange
mock_base_init.return_value = None
mock_qdrant_instance = MagicMock()
mock_qdrant_instance.collection_exists.return_value = True # Collection exists
mock_qdrant_client.return_value = mock_qdrant_instance
config = {
'store_uri': 'http://localhost:6333',
'api_key': 'test-api-key',
'taskgroup': AsyncMock(),
'id': 'test-qdrant-processor'
}
processor = Processor(**config)
# Act
collection_name = processor.get_collection(dim=256, user='existing_user', collection='existing_collection')
# Assert
expected_name = 't_existing_user_existing_collection_256'
assert collection_name == expected_name
assert processor.last_collection == expected_name
# Verify collection existence check was performed
mock_qdrant_instance.collection_exists.assert_called_once_with(expected_name)
# Verify create_collection was NOT called
mock_qdrant_instance.create_collection.assert_not_called()
@patch('trustgraph.storage.graph_embeddings.qdrant.write.QdrantClient')
@patch('trustgraph.base.GraphEmbeddingsStoreService.__init__')
async def test_get_collection_caches_last_collection(self, mock_base_init, mock_qdrant_client):
"""Test get_collection skips checks when using same collection"""
# Arrange
mock_base_init.return_value = None
mock_qdrant_instance = MagicMock()
mock_qdrant_instance.collection_exists.return_value = True
mock_qdrant_client.return_value = mock_qdrant_instance
config = {
'store_uri': 'http://localhost:6333',
'api_key': 'test-api-key',
'taskgroup': AsyncMock(),
'id': 'test-qdrant-processor'
}
processor = Processor(**config)
# First call
collection_name1 = processor.get_collection(dim=128, user='cache_user', collection='cache_collection')
# Reset mock to track second call
mock_qdrant_instance.reset_mock()
# Act - Second call with same parameters
collection_name2 = processor.get_collection(dim=128, user='cache_user', collection='cache_collection')
# Assert
expected_name = 't_cache_user_cache_collection_128'
assert collection_name1 == expected_name
assert collection_name2 == expected_name
# Verify second call skipped existence check (cached)
mock_qdrant_instance.collection_exists.assert_not_called()
mock_qdrant_instance.create_collection.assert_not_called()
@patch('trustgraph.storage.graph_embeddings.qdrant.write.QdrantClient')
@patch('trustgraph.base.GraphEmbeddingsStoreService.__init__')
async def test_get_collection_creation_exception(self, mock_base_init, mock_qdrant_client):
"""Test get_collection handles collection creation exceptions"""
# Arrange
mock_base_init.return_value = None
mock_qdrant_instance = MagicMock()
mock_qdrant_instance.collection_exists.return_value = False
mock_qdrant_instance.create_collection.side_effect = Exception("Qdrant connection failed")
mock_qdrant_client.return_value = mock_qdrant_instance
config = {
'store_uri': 'http://localhost:6333',
'api_key': 'test-api-key',
'taskgroup': AsyncMock(),
'id': 'test-qdrant-processor'
}
processor = Processor(**config)
# Act & Assert
with pytest.raises(Exception, match="Qdrant connection failed"):
processor.get_collection(dim=512, user='error_user', collection='error_collection')
@patch('trustgraph.storage.graph_embeddings.qdrant.write.QdrantClient')
@patch('trustgraph.storage.graph_embeddings.qdrant.write.uuid')
@patch('trustgraph.base.GraphEmbeddingsStoreService.__init__')
async def test_store_graph_embeddings_multiple_entities(self, mock_base_init, mock_uuid, mock_qdrant_client):
"""Test storing graph embeddings with multiple entities"""
# Arrange
mock_base_init.return_value = None
mock_qdrant_instance = MagicMock()
mock_qdrant_instance.collection_exists.return_value = True
mock_qdrant_client.return_value = mock_qdrant_instance
mock_uuid.uuid4.return_value.return_value = 'test-uuid'
config = {
'store_uri': 'http://localhost:6333',
'api_key': 'test-api-key',
'taskgroup': AsyncMock(),
'id': 'test-qdrant-processor'
}
processor = Processor(**config)
# Create mock message with multiple entities
mock_message = MagicMock()
mock_message.metadata.user = 'multi_user'
mock_message.metadata.collection = 'multi_collection'
mock_entity1 = MagicMock()
mock_entity1.entity.value = 'entity_one'
mock_entity1.vectors = [[0.1, 0.2]]
mock_entity2 = MagicMock()
mock_entity2.entity.value = 'entity_two'
mock_entity2.vectors = [[0.3, 0.4]]
mock_message.entities = [mock_entity1, mock_entity2]
# Act
await processor.store_graph_embeddings(mock_message)
# Assert
# Should be called twice (once per entity)
assert mock_qdrant_instance.upsert.call_count == 2
# Verify both entities were processed
upsert_calls = mock_qdrant_instance.upsert.call_args_list
# First entity
first_call = upsert_calls[0]
first_point = first_call[1]['points'][0]
assert first_point.vector == [0.1, 0.2]
assert first_point.payload['entity'] == 'entity_one'
# Second entity
second_call = upsert_calls[1]
second_point = second_call[1]['points'][0]
assert second_point.vector == [0.3, 0.4]
assert second_point.payload['entity'] == 'entity_two'
@patch('trustgraph.storage.graph_embeddings.qdrant.write.QdrantClient')
@patch('trustgraph.storage.graph_embeddings.qdrant.write.uuid')
@patch('trustgraph.base.GraphEmbeddingsStoreService.__init__')
async def test_store_graph_embeddings_multiple_vectors_per_entity(self, mock_base_init, mock_uuid, mock_qdrant_client):
"""Test storing graph embeddings with multiple vectors per entity"""
# Arrange
mock_base_init.return_value = None
mock_qdrant_instance = MagicMock()
mock_qdrant_instance.collection_exists.return_value = True
mock_qdrant_client.return_value = mock_qdrant_instance
mock_uuid.uuid4.return_value.return_value = 'test-uuid'
config = {
'store_uri': 'http://localhost:6333',
'api_key': 'test-api-key',
'taskgroup': AsyncMock(),
'id': 'test-qdrant-processor'
}
processor = Processor(**config)
# Create mock message with entity having multiple vectors
mock_message = MagicMock()
mock_message.metadata.user = 'vector_user'
mock_message.metadata.collection = 'vector_collection'
mock_entity = MagicMock()
mock_entity.entity.value = 'multi_vector_entity'
mock_entity.vectors = [
[0.1, 0.2, 0.3],
[0.4, 0.5, 0.6],
[0.7, 0.8, 0.9]
]
mock_message.entities = [mock_entity]
# Act
await processor.store_graph_embeddings(mock_message)
# Assert
# Should be called 3 times (once per vector)
assert mock_qdrant_instance.upsert.call_count == 3
# Verify all vectors were processed
upsert_calls = mock_qdrant_instance.upsert.call_args_list
expected_vectors = [
[0.1, 0.2, 0.3],
[0.4, 0.5, 0.6],
[0.7, 0.8, 0.9]
]
for i, call in enumerate(upsert_calls):
point = call[1]['points'][0]
assert point.vector == expected_vectors[i]
assert point.payload['entity'] == 'multi_vector_entity'
@patch('trustgraph.storage.graph_embeddings.qdrant.write.QdrantClient')
@patch('trustgraph.base.GraphEmbeddingsStoreService.__init__')
async def test_store_graph_embeddings_empty_entity_value(self, mock_base_init, mock_qdrant_client):
"""Test storing graph embeddings skips empty entity values"""
# Arrange
mock_base_init.return_value = None
mock_qdrant_instance = MagicMock()
mock_qdrant_client.return_value = mock_qdrant_instance
config = {
'store_uri': 'http://localhost:6333',
'api_key': 'test-api-key',
'taskgroup': AsyncMock(),
'id': 'test-qdrant-processor'
}
processor = Processor(**config)
# Create mock message with empty entity value
mock_message = MagicMock()
mock_message.metadata.user = 'empty_user'
mock_message.metadata.collection = 'empty_collection'
mock_entity_empty = MagicMock()
mock_entity_empty.entity.value = "" # Empty string
mock_entity_empty.vectors = [[0.1, 0.2]]
mock_entity_none = MagicMock()
mock_entity_none.entity.value = None # None value
mock_entity_none.vectors = [[0.3, 0.4]]
mock_message.entities = [mock_entity_empty, mock_entity_none]
# Act
await processor.store_graph_embeddings(mock_message)
# Assert
# Should not call upsert for empty entities
mock_qdrant_instance.upsert.assert_not_called()
mock_qdrant_instance.collection_exists.assert_not_called()
@patch('trustgraph.storage.graph_embeddings.qdrant.write.QdrantClient')
@patch('trustgraph.base.GraphEmbeddingsStoreService.__init__')
async def test_processor_initialization_with_defaults(self, mock_base_init, mock_qdrant_client):
"""Test processor initialization with default values"""
# Arrange
mock_base_init.return_value = None
mock_qdrant_instance = MagicMock()
mock_qdrant_client.return_value = mock_qdrant_instance
config = {
'taskgroup': AsyncMock(),
'id': 'test-qdrant-processor'
# No store_uri or api_key provided - should use defaults
}
# Act
processor = Processor(**config)
# Assert
# Verify QdrantClient was created with default URI and None API key
mock_qdrant_client.assert_called_once_with(url='http://localhost:6333', api_key=None)
@patch('trustgraph.storage.graph_embeddings.qdrant.write.QdrantClient')
@patch('trustgraph.base.GraphEmbeddingsStoreService.__init__')
async def test_add_args_calls_parent(self, mock_base_init, mock_qdrant_client):
"""Test that add_args() calls parent add_args method"""
# Arrange
mock_base_init.return_value = None
mock_qdrant_client.return_value = MagicMock()
mock_parser = MagicMock()
# Act
with patch('trustgraph.base.GraphEmbeddingsStoreService.add_args') as mock_parent_add_args:
Processor.add_args(mock_parser)
# Assert
mock_parent_add_args.assert_called_once_with(mock_parser)
# Verify processor-specific arguments were added
assert mock_parser.add_argument.call_count >= 2 # At least store-uri and api-key
if __name__ == '__main__':
pytest.main([__file__])

View file

@ -0,0 +1,373 @@
"""
Tests for Cassandra triples storage service
"""
import pytest
from unittest.mock import MagicMock, patch, AsyncMock
from trustgraph.storage.triples.cassandra.write import Processor
from trustgraph.schema import Value, Triple
class TestCassandraStorageProcessor:
"""Test cases for Cassandra storage processor"""
def test_processor_initialization_with_defaults(self):
"""Test processor initialization with default parameters"""
taskgroup_mock = MagicMock()
processor = Processor(taskgroup=taskgroup_mock)
assert processor.graph_host == ['localhost']
assert processor.username is None
assert processor.password is None
assert processor.table is None
def test_processor_initialization_with_custom_params(self):
"""Test processor initialization with custom parameters"""
taskgroup_mock = MagicMock()
processor = Processor(
taskgroup=taskgroup_mock,
id='custom-storage',
graph_host='cassandra.example.com',
graph_username='testuser',
graph_password='testpass'
)
assert processor.graph_host == ['cassandra.example.com']
assert processor.username == 'testuser'
assert processor.password == 'testpass'
assert processor.table is None
def test_processor_initialization_with_partial_auth(self):
"""Test processor initialization with only username (no password)"""
taskgroup_mock = MagicMock()
processor = Processor(
taskgroup=taskgroup_mock,
graph_username='testuser'
)
assert processor.username == 'testuser'
assert processor.password is None
@pytest.mark.asyncio
@patch('trustgraph.storage.triples.cassandra.write.TrustGraph')
async def test_table_switching_with_auth(self, mock_trustgraph):
"""Test table switching logic when authentication is provided"""
taskgroup_mock = MagicMock()
mock_tg_instance = MagicMock()
mock_trustgraph.return_value = mock_tg_instance
processor = Processor(
taskgroup=taskgroup_mock,
graph_username='testuser',
graph_password='testpass'
)
# Create mock message
mock_message = MagicMock()
mock_message.metadata.user = 'user1'
mock_message.metadata.collection = 'collection1'
mock_message.triples = []
await processor.store_triples(mock_message)
# Verify TrustGraph was called with auth parameters
mock_trustgraph.assert_called_once_with(
hosts=['localhost'],
keyspace='user1',
table='collection1',
username='testuser',
password='testpass'
)
assert processor.table == ('user1', 'collection1')
@pytest.mark.asyncio
@patch('trustgraph.storage.triples.cassandra.write.TrustGraph')
async def test_table_switching_without_auth(self, mock_trustgraph):
"""Test table switching logic when no authentication is provided"""
taskgroup_mock = MagicMock()
mock_tg_instance = MagicMock()
mock_trustgraph.return_value = mock_tg_instance
processor = Processor(taskgroup=taskgroup_mock)
# Create mock message
mock_message = MagicMock()
mock_message.metadata.user = 'user2'
mock_message.metadata.collection = 'collection2'
mock_message.triples = []
await processor.store_triples(mock_message)
# Verify TrustGraph was called without auth parameters
mock_trustgraph.assert_called_once_with(
hosts=['localhost'],
keyspace='user2',
table='collection2'
)
assert processor.table == ('user2', 'collection2')
@pytest.mark.asyncio
@patch('trustgraph.storage.triples.cassandra.write.TrustGraph')
async def test_table_reuse_when_same(self, mock_trustgraph):
"""Test that TrustGraph is not recreated when table hasn't changed"""
taskgroup_mock = MagicMock()
mock_tg_instance = MagicMock()
mock_trustgraph.return_value = mock_tg_instance
processor = Processor(taskgroup=taskgroup_mock)
# Create mock message
mock_message = MagicMock()
mock_message.metadata.user = 'user1'
mock_message.metadata.collection = 'collection1'
mock_message.triples = []
# First call should create TrustGraph
await processor.store_triples(mock_message)
assert mock_trustgraph.call_count == 1
# Second call with same table should reuse TrustGraph
await processor.store_triples(mock_message)
assert mock_trustgraph.call_count == 1 # Should not increase
@pytest.mark.asyncio
@patch('trustgraph.storage.triples.cassandra.write.TrustGraph')
async def test_triple_insertion(self, mock_trustgraph):
"""Test that triples are properly inserted into Cassandra"""
taskgroup_mock = MagicMock()
mock_tg_instance = MagicMock()
mock_trustgraph.return_value = mock_tg_instance
processor = Processor(taskgroup=taskgroup_mock)
# Create mock triples
triple1 = MagicMock()
triple1.s.value = 'subject1'
triple1.p.value = 'predicate1'
triple1.o.value = 'object1'
triple2 = MagicMock()
triple2.s.value = 'subject2'
triple2.p.value = 'predicate2'
triple2.o.value = 'object2'
# Create mock message
mock_message = MagicMock()
mock_message.metadata.user = 'user1'
mock_message.metadata.collection = 'collection1'
mock_message.triples = [triple1, triple2]
await processor.store_triples(mock_message)
# Verify both triples were inserted
assert mock_tg_instance.insert.call_count == 2
mock_tg_instance.insert.assert_any_call('subject1', 'predicate1', 'object1')
mock_tg_instance.insert.assert_any_call('subject2', 'predicate2', 'object2')
@pytest.mark.asyncio
@patch('trustgraph.storage.triples.cassandra.write.TrustGraph')
async def test_triple_insertion_with_empty_list(self, mock_trustgraph):
"""Test behavior when message has no triples"""
taskgroup_mock = MagicMock()
mock_tg_instance = MagicMock()
mock_trustgraph.return_value = mock_tg_instance
processor = Processor(taskgroup=taskgroup_mock)
# Create mock message with empty triples
mock_message = MagicMock()
mock_message.metadata.user = 'user1'
mock_message.metadata.collection = 'collection1'
mock_message.triples = []
await processor.store_triples(mock_message)
# Verify no triples were inserted
mock_tg_instance.insert.assert_not_called()
@pytest.mark.asyncio
@patch('trustgraph.storage.triples.cassandra.write.TrustGraph')
@patch('trustgraph.storage.triples.cassandra.write.time.sleep')
async def test_exception_handling_with_retry(self, mock_sleep, mock_trustgraph):
"""Test exception handling during TrustGraph creation"""
taskgroup_mock = MagicMock()
mock_trustgraph.side_effect = Exception("Connection failed")
processor = Processor(taskgroup=taskgroup_mock)
# Create mock message
mock_message = MagicMock()
mock_message.metadata.user = 'user1'
mock_message.metadata.collection = 'collection1'
mock_message.triples = []
with pytest.raises(Exception, match="Connection failed"):
await processor.store_triples(mock_message)
# Verify sleep was called before re-raising
mock_sleep.assert_called_once_with(1)
def test_add_args_method(self):
"""Test that add_args properly configures argument parser"""
from argparse import ArgumentParser
from unittest.mock import patch
parser = ArgumentParser()
# Mock the parent class add_args method
with patch('trustgraph.storage.triples.cassandra.write.TriplesStoreService.add_args') as mock_parent_add_args:
Processor.add_args(parser)
# Verify parent add_args was called
mock_parent_add_args.assert_called_once_with(parser)
# Verify our specific arguments were added
# Parse empty args to check defaults
args = parser.parse_args([])
assert hasattr(args, 'graph_host')
assert args.graph_host == 'localhost'
assert hasattr(args, 'graph_username')
assert args.graph_username is None
assert hasattr(args, 'graph_password')
assert args.graph_password is None
def test_add_args_with_custom_values(self):
"""Test add_args with custom command line values"""
from argparse import ArgumentParser
from unittest.mock import patch
parser = ArgumentParser()
with patch('trustgraph.storage.triples.cassandra.write.TriplesStoreService.add_args'):
Processor.add_args(parser)
# Test parsing with custom values
args = parser.parse_args([
'--graph-host', 'cassandra.example.com',
'--graph-username', 'testuser',
'--graph-password', 'testpass'
])
assert args.graph_host == 'cassandra.example.com'
assert args.graph_username == 'testuser'
assert args.graph_password == 'testpass'
def test_add_args_short_form(self):
"""Test add_args with short form arguments"""
from argparse import ArgumentParser
from unittest.mock import patch
parser = ArgumentParser()
with patch('trustgraph.storage.triples.cassandra.write.TriplesStoreService.add_args'):
Processor.add_args(parser)
# Test parsing with short form
args = parser.parse_args(['-g', 'short.example.com'])
assert args.graph_host == 'short.example.com'
@patch('trustgraph.storage.triples.cassandra.write.Processor.launch')
def test_run_function(self, mock_launch):
"""Test the run function calls Processor.launch with correct parameters"""
from trustgraph.storage.triples.cassandra.write import run, default_ident
run()
mock_launch.assert_called_once_with(default_ident, '\nGraph writer. Input is graph edge. Writes edges to Cassandra graph.\n')
@pytest.mark.asyncio
@patch('trustgraph.storage.triples.cassandra.write.TrustGraph')
async def test_store_triples_table_switching_between_different_tables(self, mock_trustgraph):
"""Test table switching when different tables are used in sequence"""
taskgroup_mock = MagicMock()
mock_tg_instance1 = MagicMock()
mock_tg_instance2 = MagicMock()
mock_trustgraph.side_effect = [mock_tg_instance1, mock_tg_instance2]
processor = Processor(taskgroup=taskgroup_mock)
# First message with table1
mock_message1 = MagicMock()
mock_message1.metadata.user = 'user1'
mock_message1.metadata.collection = 'collection1'
mock_message1.triples = []
await processor.store_triples(mock_message1)
assert processor.table == ('user1', 'collection1')
assert processor.tg == mock_tg_instance1
# Second message with different table
mock_message2 = MagicMock()
mock_message2.metadata.user = 'user2'
mock_message2.metadata.collection = 'collection2'
mock_message2.triples = []
await processor.store_triples(mock_message2)
assert processor.table == ('user2', 'collection2')
assert processor.tg == mock_tg_instance2
# Verify TrustGraph was created twice for different tables
assert mock_trustgraph.call_count == 2
@pytest.mark.asyncio
@patch('trustgraph.storage.triples.cassandra.write.TrustGraph')
async def test_store_triples_with_special_characters_in_values(self, mock_trustgraph):
"""Test storing triples with special characters and unicode"""
taskgroup_mock = MagicMock()
mock_tg_instance = MagicMock()
mock_trustgraph.return_value = mock_tg_instance
processor = Processor(taskgroup=taskgroup_mock)
# Create triple with special characters
triple = MagicMock()
triple.s.value = 'subject with spaces & symbols'
triple.p.value = 'predicate:with/colons'
triple.o.value = 'object with "quotes" and unicode: ñáéíóú'
mock_message = MagicMock()
mock_message.metadata.user = 'test_user'
mock_message.metadata.collection = 'test_collection'
mock_message.triples = [triple]
await processor.store_triples(mock_message)
# Verify the triple was inserted with special characters preserved
mock_tg_instance.insert.assert_called_once_with(
'subject with spaces & symbols',
'predicate:with/colons',
'object with "quotes" and unicode: ñáéíóú'
)
@pytest.mark.asyncio
@patch('trustgraph.storage.triples.cassandra.write.TrustGraph')
async def test_store_triples_preserves_old_table_on_exception(self, mock_trustgraph):
"""Test that table remains unchanged when TrustGraph creation fails"""
taskgroup_mock = MagicMock()
processor = Processor(taskgroup=taskgroup_mock)
# Set an initial table
processor.table = ('old_user', 'old_collection')
# Mock TrustGraph to raise exception
mock_trustgraph.side_effect = Exception("Connection failed")
mock_message = MagicMock()
mock_message.metadata.user = 'new_user'
mock_message.metadata.collection = 'new_collection'
mock_message.triples = []
with pytest.raises(Exception, match="Connection failed"):
await processor.store_triples(mock_message)
# Table should remain unchanged since self.table = table happens after try/except
assert processor.table == ('old_user', 'old_collection')
# TrustGraph should be set to None though
assert processor.tg is None