Increase storage test coverage (#435)

* Fixing storage and adding tests

* PR pipeline only runs quick tests
This commit is contained in:
cybermaggedon 2025-07-15 09:33:35 +01:00 committed by GitHub
parent 4daa54abaf
commit f37decea2b
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
33 changed files with 7606 additions and 754 deletions

View file

@ -0,0 +1,456 @@
"""
Tests for Milvus document embeddings query service
"""
import pytest
from unittest.mock import MagicMock, patch
from trustgraph.query.doc_embeddings.milvus.service import Processor
from trustgraph.schema import DocumentEmbeddingsRequest
class TestMilvusDocEmbeddingsQueryProcessor:
"""Test cases for Milvus document embeddings query processor"""
@pytest.fixture
def processor(self):
"""Create a processor instance for testing"""
with patch('trustgraph.query.doc_embeddings.milvus.service.DocVectors') as mock_doc_vectors:
mock_vecstore = MagicMock()
mock_doc_vectors.return_value = mock_vecstore
processor = Processor(
taskgroup=MagicMock(),
id='test-milvus-de-query',
store_uri='http://localhost:19530'
)
return processor
@pytest.fixture
def mock_query_request(self):
"""Create a mock query request for testing"""
query = DocumentEmbeddingsRequest(
user='test_user',
collection='test_collection',
vectors=[[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]],
limit=10
)
return query
@patch('trustgraph.query.doc_embeddings.milvus.service.DocVectors')
def test_processor_initialization_with_defaults(self, mock_doc_vectors):
"""Test processor initialization with default parameters"""
taskgroup_mock = MagicMock()
mock_vecstore = MagicMock()
mock_doc_vectors.return_value = mock_vecstore
processor = Processor(taskgroup=taskgroup_mock)
mock_doc_vectors.assert_called_once_with('http://localhost:19530')
assert processor.vecstore == mock_vecstore
@patch('trustgraph.query.doc_embeddings.milvus.service.DocVectors')
def test_processor_initialization_with_custom_params(self, mock_doc_vectors):
"""Test processor initialization with custom parameters"""
taskgroup_mock = MagicMock()
mock_vecstore = MagicMock()
mock_doc_vectors.return_value = mock_vecstore
processor = Processor(
taskgroup=taskgroup_mock,
store_uri='http://custom-milvus:19530'
)
mock_doc_vectors.assert_called_once_with('http://custom-milvus:19530')
assert processor.vecstore == mock_vecstore
@pytest.mark.asyncio
async def test_query_document_embeddings_single_vector(self, processor):
"""Test querying document embeddings with a single vector"""
query = DocumentEmbeddingsRequest(
user='test_user',
collection='test_collection',
vectors=[[0.1, 0.2, 0.3]],
limit=5
)
# Mock search results
mock_results = [
{"entity": {"doc": "First document chunk"}},
{"entity": {"doc": "Second document chunk"}},
{"entity": {"doc": "Third document chunk"}},
]
processor.vecstore.search.return_value = mock_results
result = await processor.query_document_embeddings(query)
# Verify search was called with correct parameters
processor.vecstore.search.assert_called_once_with([0.1, 0.2, 0.3], limit=5)
# Verify results are document chunks
assert len(result) == 3
assert result[0] == "First document chunk"
assert result[1] == "Second document chunk"
assert result[2] == "Third document chunk"
@pytest.mark.asyncio
async def test_query_document_embeddings_multiple_vectors(self, processor):
"""Test querying document embeddings with multiple vectors"""
query = DocumentEmbeddingsRequest(
user='test_user',
collection='test_collection',
vectors=[[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]],
limit=3
)
# Mock search results - different results for each vector
mock_results_1 = [
{"entity": {"doc": "Document from first vector"}},
{"entity": {"doc": "Another doc from first vector"}},
]
mock_results_2 = [
{"entity": {"doc": "Document from second vector"}},
]
processor.vecstore.search.side_effect = [mock_results_1, mock_results_2]
result = await processor.query_document_embeddings(query)
# Verify search was called twice with correct parameters
expected_calls = [
(([0.1, 0.2, 0.3],), {"limit": 3}),
(([0.4, 0.5, 0.6],), {"limit": 3}),
]
assert processor.vecstore.search.call_count == 2
for i, (expected_args, expected_kwargs) in enumerate(expected_calls):
actual_call = processor.vecstore.search.call_args_list[i]
assert actual_call[0] == expected_args
assert actual_call[1] == expected_kwargs
# Verify results from all vectors are combined
assert len(result) == 3
assert "Document from first vector" in result
assert "Another doc from first vector" in result
assert "Document from second vector" in result
@pytest.mark.asyncio
async def test_query_document_embeddings_with_limit(self, processor):
"""Test querying document embeddings respects limit parameter"""
query = DocumentEmbeddingsRequest(
user='test_user',
collection='test_collection',
vectors=[[0.1, 0.2, 0.3]],
limit=2
)
# Mock search results - more results than limit
mock_results = [
{"entity": {"doc": "Document 1"}},
{"entity": {"doc": "Document 2"}},
{"entity": {"doc": "Document 3"}},
{"entity": {"doc": "Document 4"}},
]
processor.vecstore.search.return_value = mock_results
result = await processor.query_document_embeddings(query)
# Verify search was called with the specified limit
processor.vecstore.search.assert_called_once_with([0.1, 0.2, 0.3], limit=2)
# Verify all results are returned (Milvus handles limit internally)
assert len(result) == 4
@pytest.mark.asyncio
async def test_query_document_embeddings_empty_vectors(self, processor):
"""Test querying document embeddings with empty vectors list"""
query = DocumentEmbeddingsRequest(
user='test_user',
collection='test_collection',
vectors=[],
limit=5
)
result = await processor.query_document_embeddings(query)
# Verify no search was called
processor.vecstore.search.assert_not_called()
# Verify empty results
assert len(result) == 0
@pytest.mark.asyncio
async def test_query_document_embeddings_empty_search_results(self, processor):
"""Test querying document embeddings with empty search results"""
query = DocumentEmbeddingsRequest(
user='test_user',
collection='test_collection',
vectors=[[0.1, 0.2, 0.3]],
limit=5
)
# Mock empty search results
processor.vecstore.search.return_value = []
result = await processor.query_document_embeddings(query)
# Verify search was called
processor.vecstore.search.assert_called_once_with([0.1, 0.2, 0.3], limit=5)
# Verify empty results
assert len(result) == 0
@pytest.mark.asyncio
async def test_query_document_embeddings_unicode_documents(self, processor):
"""Test querying document embeddings with Unicode document content"""
query = DocumentEmbeddingsRequest(
user='test_user',
collection='test_collection',
vectors=[[0.1, 0.2, 0.3]],
limit=5
)
# Mock search results with Unicode content
mock_results = [
{"entity": {"doc": "Document with Unicode: éñ中文🚀"}},
{"entity": {"doc": "Regular ASCII document"}},
{"entity": {"doc": "Document with émojis: 😀🎉"}},
]
processor.vecstore.search.return_value = mock_results
result = await processor.query_document_embeddings(query)
# Verify Unicode content is preserved
assert len(result) == 3
assert "Document with Unicode: éñ中文🚀" in result
assert "Regular ASCII document" in result
assert "Document with émojis: 😀🎉" in result
@pytest.mark.asyncio
async def test_query_document_embeddings_large_documents(self, processor):
"""Test querying document embeddings with large document content"""
query = DocumentEmbeddingsRequest(
user='test_user',
collection='test_collection',
vectors=[[0.1, 0.2, 0.3]],
limit=5
)
# Mock search results with large content
large_doc = "A" * 10000 # 10KB of content
mock_results = [
{"entity": {"doc": large_doc}},
{"entity": {"doc": "Small document"}},
]
processor.vecstore.search.return_value = mock_results
result = await processor.query_document_embeddings(query)
# Verify large content is preserved
assert len(result) == 2
assert large_doc in result
assert "Small document" in result
@pytest.mark.asyncio
async def test_query_document_embeddings_special_characters(self, processor):
"""Test querying document embeddings with special characters in documents"""
query = DocumentEmbeddingsRequest(
user='test_user',
collection='test_collection',
vectors=[[0.1, 0.2, 0.3]],
limit=5
)
# Mock search results with special characters
mock_results = [
{"entity": {"doc": "Document with \"quotes\" and 'apostrophes'"}},
{"entity": {"doc": "Document with\nnewlines\tand\ttabs"}},
{"entity": {"doc": "Document with special chars: @#$%^&*()"}},
]
processor.vecstore.search.return_value = mock_results
result = await processor.query_document_embeddings(query)
# Verify special characters are preserved
assert len(result) == 3
assert "Document with \"quotes\" and 'apostrophes'" in result
assert "Document with\nnewlines\tand\ttabs" in result
assert "Document with special chars: @#$%^&*()" in result
@pytest.mark.asyncio
async def test_query_document_embeddings_zero_limit(self, processor):
"""Test querying document embeddings with zero limit"""
query = DocumentEmbeddingsRequest(
user='test_user',
collection='test_collection',
vectors=[[0.1, 0.2, 0.3]],
limit=0
)
result = await processor.query_document_embeddings(query)
# Verify no search was called (optimization for zero limit)
processor.vecstore.search.assert_not_called()
# Verify empty results due to zero limit
assert len(result) == 0
@pytest.mark.asyncio
async def test_query_document_embeddings_negative_limit(self, processor):
"""Test querying document embeddings with negative limit"""
query = DocumentEmbeddingsRequest(
user='test_user',
collection='test_collection',
vectors=[[0.1, 0.2, 0.3]],
limit=-1
)
result = await processor.query_document_embeddings(query)
# Verify no search was called (optimization for negative limit)
processor.vecstore.search.assert_not_called()
# Verify empty results due to negative limit
assert len(result) == 0
@pytest.mark.asyncio
async def test_query_document_embeddings_exception_handling(self, processor):
"""Test exception handling during query processing"""
query = DocumentEmbeddingsRequest(
user='test_user',
collection='test_collection',
vectors=[[0.1, 0.2, 0.3]],
limit=5
)
# Mock search to raise exception
processor.vecstore.search.side_effect = Exception("Milvus connection failed")
# Should raise the exception
with pytest.raises(Exception, match="Milvus connection failed"):
await processor.query_document_embeddings(query)
@pytest.mark.asyncio
async def test_query_document_embeddings_different_vector_dimensions(self, processor):
"""Test querying document embeddings with different vector dimensions"""
query = DocumentEmbeddingsRequest(
user='test_user',
collection='test_collection',
vectors=[
[0.1, 0.2], # 2D vector
[0.3, 0.4, 0.5, 0.6], # 4D vector
[0.7, 0.8, 0.9] # 3D vector
],
limit=5
)
# Mock search results for each vector
mock_results_1 = [{"entity": {"doc": "Document from 2D vector"}}]
mock_results_2 = [{"entity": {"doc": "Document from 4D vector"}}]
mock_results_3 = [{"entity": {"doc": "Document from 3D vector"}}]
processor.vecstore.search.side_effect = [mock_results_1, mock_results_2, mock_results_3]
result = await processor.query_document_embeddings(query)
# Verify all vectors were searched
assert processor.vecstore.search.call_count == 3
# Verify results from all dimensions
assert len(result) == 3
assert "Document from 2D vector" in result
assert "Document from 4D vector" in result
assert "Document from 3D vector" in result
@pytest.mark.asyncio
async def test_query_document_embeddings_duplicate_documents(self, processor):
"""Test querying document embeddings with duplicate documents in results"""
query = DocumentEmbeddingsRequest(
user='test_user',
collection='test_collection',
vectors=[[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]],
limit=5
)
# Mock search results with duplicates across vectors
mock_results_1 = [
{"entity": {"doc": "Document A"}},
{"entity": {"doc": "Document B"}},
]
mock_results_2 = [
{"entity": {"doc": "Document B"}}, # Duplicate
{"entity": {"doc": "Document C"}},
]
processor.vecstore.search.side_effect = [mock_results_1, mock_results_2]
result = await processor.query_document_embeddings(query)
# Note: Unlike graph embeddings, doc embeddings don't deduplicate
# This preserves ranking and allows multiple occurrences
assert len(result) == 4
assert result.count("Document B") == 2 # Should appear twice
assert "Document A" in result
assert "Document C" in result
def test_add_args_method(self):
"""Test that add_args properly configures argument parser"""
from argparse import ArgumentParser
from unittest.mock import patch
parser = ArgumentParser()
# Mock the parent class add_args method
with patch('trustgraph.query.doc_embeddings.milvus.service.DocumentEmbeddingsQueryService.add_args') as mock_parent_add_args:
Processor.add_args(parser)
# Verify parent add_args was called
mock_parent_add_args.assert_called_once()
# Verify our specific arguments were added
# Parse empty args to check defaults
args = parser.parse_args([])
assert hasattr(args, 'store_uri')
assert args.store_uri == 'http://localhost:19530'
def test_add_args_with_custom_values(self):
"""Test add_args with custom command line values"""
from argparse import ArgumentParser
from unittest.mock import patch
parser = ArgumentParser()
with patch('trustgraph.query.doc_embeddings.milvus.service.DocumentEmbeddingsQueryService.add_args'):
Processor.add_args(parser)
# Test parsing with custom values
args = parser.parse_args([
'--store-uri', 'http://custom-milvus:19530'
])
assert args.store_uri == 'http://custom-milvus:19530'
def test_add_args_short_form(self):
"""Test add_args with short form arguments"""
from argparse import ArgumentParser
from unittest.mock import patch
parser = ArgumentParser()
with patch('trustgraph.query.doc_embeddings.milvus.service.DocumentEmbeddingsQueryService.add_args'):
Processor.add_args(parser)
# Test parsing with short form
args = parser.parse_args(['-t', 'http://short-milvus:19530'])
assert args.store_uri == 'http://short-milvus:19530'
@patch('trustgraph.query.doc_embeddings.milvus.service.Processor.launch')
def test_run_function(self, mock_launch):
"""Test the run function calls Processor.launch with correct parameters"""
from trustgraph.query.doc_embeddings.milvus.service import run, default_ident
run()
mock_launch.assert_called_once_with(
default_ident,
"\nDocument embeddings query service. Input is vector, output is an array\nof chunks\n"
)

View file

@ -0,0 +1,558 @@
"""
Tests for Pinecone document embeddings query service
"""
import pytest
from unittest.mock import MagicMock, patch
from trustgraph.query.doc_embeddings.pinecone.service import Processor
class TestPineconeDocEmbeddingsQueryProcessor:
"""Test cases for Pinecone document embeddings query processor"""
@pytest.fixture
def mock_query_message(self):
"""Create a mock query message for testing"""
message = MagicMock()
message.vectors = [
[0.1, 0.2, 0.3],
[0.4, 0.5, 0.6]
]
message.limit = 5
message.user = 'test_user'
message.collection = 'test_collection'
return message
@pytest.fixture
def processor(self):
"""Create a processor instance for testing"""
with patch('trustgraph.query.doc_embeddings.pinecone.service.Pinecone') as mock_pinecone_class:
mock_pinecone = MagicMock()
mock_pinecone_class.return_value = mock_pinecone
processor = Processor(
taskgroup=MagicMock(),
id='test-pinecone-de-query',
api_key='test-api-key'
)
return processor
@patch('trustgraph.query.doc_embeddings.pinecone.service.Pinecone')
@patch('trustgraph.query.doc_embeddings.pinecone.service.default_api_key', 'env-api-key')
def test_processor_initialization_with_defaults(self, mock_pinecone_class):
"""Test processor initialization with default parameters"""
mock_pinecone = MagicMock()
mock_pinecone_class.return_value = mock_pinecone
taskgroup_mock = MagicMock()
processor = Processor(taskgroup=taskgroup_mock)
mock_pinecone_class.assert_called_once_with(api_key='env-api-key')
assert processor.pinecone == mock_pinecone
assert processor.api_key == 'env-api-key'
@patch('trustgraph.query.doc_embeddings.pinecone.service.Pinecone')
def test_processor_initialization_with_custom_params(self, mock_pinecone_class):
"""Test processor initialization with custom parameters"""
mock_pinecone = MagicMock()
mock_pinecone_class.return_value = mock_pinecone
taskgroup_mock = MagicMock()
processor = Processor(
taskgroup=taskgroup_mock,
api_key='custom-api-key'
)
mock_pinecone_class.assert_called_once_with(api_key='custom-api-key')
assert processor.api_key == 'custom-api-key'
@patch('trustgraph.query.doc_embeddings.pinecone.service.PineconeGRPC')
def test_processor_initialization_with_url(self, mock_pinecone_grpc_class):
"""Test processor initialization with custom URL (GRPC mode)"""
mock_pinecone = MagicMock()
mock_pinecone_grpc_class.return_value = mock_pinecone
taskgroup_mock = MagicMock()
processor = Processor(
taskgroup=taskgroup_mock,
api_key='test-api-key',
url='https://custom-host.pinecone.io'
)
mock_pinecone_grpc_class.assert_called_once_with(
api_key='test-api-key',
host='https://custom-host.pinecone.io'
)
assert processor.pinecone == mock_pinecone
assert processor.url == 'https://custom-host.pinecone.io'
@patch('trustgraph.query.doc_embeddings.pinecone.service.default_api_key', 'not-specified')
def test_processor_initialization_missing_api_key(self):
"""Test processor initialization fails with missing API key"""
taskgroup_mock = MagicMock()
with pytest.raises(RuntimeError, match="Pinecone API key must be specified"):
Processor(taskgroup=taskgroup_mock)
@pytest.mark.asyncio
async def test_query_document_embeddings_single_vector(self, processor):
"""Test querying document embeddings with a single vector"""
message = MagicMock()
message.vectors = [[0.1, 0.2, 0.3]]
message.limit = 3
message.user = 'test_user'
message.collection = 'test_collection'
# Mock index and query results
mock_index = MagicMock()
processor.pinecone.Index.return_value = mock_index
mock_results = MagicMock()
mock_results.matches = [
MagicMock(metadata={'doc': 'First document chunk'}),
MagicMock(metadata={'doc': 'Second document chunk'}),
MagicMock(metadata={'doc': 'Third document chunk'})
]
mock_index.query.return_value = mock_results
chunks = await processor.query_document_embeddings(message)
# Verify index was accessed correctly
expected_index_name = "d-test_user-test_collection-3"
processor.pinecone.Index.assert_called_once_with(expected_index_name)
# Verify query parameters
mock_index.query.assert_called_once_with(
vector=[0.1, 0.2, 0.3],
top_k=3,
include_values=False,
include_metadata=True
)
# Verify results
assert len(chunks) == 3
assert chunks[0] == 'First document chunk'
assert chunks[1] == 'Second document chunk'
assert chunks[2] == 'Third document chunk'
@pytest.mark.asyncio
async def test_query_document_embeddings_multiple_vectors(self, processor, mock_query_message):
"""Test querying document embeddings with multiple vectors"""
# Mock index and query results
mock_index = MagicMock()
processor.pinecone.Index.return_value = mock_index
# First query results
mock_results1 = MagicMock()
mock_results1.matches = [
MagicMock(metadata={'doc': 'Document chunk 1'}),
MagicMock(metadata={'doc': 'Document chunk 2'})
]
# Second query results
mock_results2 = MagicMock()
mock_results2.matches = [
MagicMock(metadata={'doc': 'Document chunk 3'}),
MagicMock(metadata={'doc': 'Document chunk 4'})
]
mock_index.query.side_effect = [mock_results1, mock_results2]
chunks = await processor.query_document_embeddings(mock_query_message)
# Verify both queries were made
assert mock_index.query.call_count == 2
# Verify results from both queries
assert len(chunks) == 4
assert 'Document chunk 1' in chunks
assert 'Document chunk 2' in chunks
assert 'Document chunk 3' in chunks
assert 'Document chunk 4' in chunks
@pytest.mark.asyncio
async def test_query_document_embeddings_limit_handling(self, processor):
"""Test that query respects the limit parameter"""
message = MagicMock()
message.vectors = [[0.1, 0.2, 0.3]]
message.limit = 2
message.user = 'test_user'
message.collection = 'test_collection'
# Mock index with many results
mock_index = MagicMock()
processor.pinecone.Index.return_value = mock_index
mock_results = MagicMock()
mock_results.matches = [
MagicMock(metadata={'doc': f'Document chunk {i}'}) for i in range(10)
]
mock_index.query.return_value = mock_results
chunks = await processor.query_document_embeddings(message)
# Verify limit is passed to query
mock_index.query.assert_called_once()
call_args = mock_index.query.call_args
assert call_args[1]['top_k'] == 2
# Results should contain all returned chunks (limit is applied by Pinecone)
assert len(chunks) == 10
@pytest.mark.asyncio
async def test_query_document_embeddings_zero_limit(self, processor):
"""Test querying with zero limit returns empty results"""
message = MagicMock()
message.vectors = [[0.1, 0.2, 0.3]]
message.limit = 0
message.user = 'test_user'
message.collection = 'test_collection'
mock_index = MagicMock()
processor.pinecone.Index.return_value = mock_index
chunks = await processor.query_document_embeddings(message)
# Verify no query was made and empty result returned
mock_index.query.assert_not_called()
assert chunks == []
@pytest.mark.asyncio
async def test_query_document_embeddings_negative_limit(self, processor):
"""Test querying with negative limit returns empty results"""
message = MagicMock()
message.vectors = [[0.1, 0.2, 0.3]]
message.limit = -1
message.user = 'test_user'
message.collection = 'test_collection'
mock_index = MagicMock()
processor.pinecone.Index.return_value = mock_index
chunks = await processor.query_document_embeddings(message)
# Verify no query was made and empty result returned
mock_index.query.assert_not_called()
assert chunks == []
@pytest.mark.asyncio
async def test_query_document_embeddings_different_vector_dimensions(self, processor):
"""Test querying with vectors of different dimensions"""
message = MagicMock()
message.vectors = [
[0.1, 0.2], # 2D vector
[0.3, 0.4, 0.5, 0.6] # 4D vector
]
message.limit = 5
message.user = 'test_user'
message.collection = 'test_collection'
mock_index_2d = MagicMock()
mock_index_4d = MagicMock()
def mock_index_side_effect(name):
if name.endswith("-2"):
return mock_index_2d
elif name.endswith("-4"):
return mock_index_4d
processor.pinecone.Index.side_effect = mock_index_side_effect
# Mock results for different dimensions
mock_results_2d = MagicMock()
mock_results_2d.matches = [MagicMock(metadata={'doc': 'Document from 2D index'})]
mock_index_2d.query.return_value = mock_results_2d
mock_results_4d = MagicMock()
mock_results_4d.matches = [MagicMock(metadata={'doc': 'Document from 4D index'})]
mock_index_4d.query.return_value = mock_results_4d
chunks = await processor.query_document_embeddings(message)
# Verify different indexes were used
assert processor.pinecone.Index.call_count == 2
mock_index_2d.query.assert_called_once()
mock_index_4d.query.assert_called_once()
# Verify results from both dimensions
assert 'Document from 2D index' in chunks
assert 'Document from 4D index' in chunks
@pytest.mark.asyncio
async def test_query_document_embeddings_empty_vectors_list(self, processor):
"""Test querying with empty vectors list"""
message = MagicMock()
message.vectors = []
message.limit = 5
message.user = 'test_user'
message.collection = 'test_collection'
mock_index = MagicMock()
processor.pinecone.Index.return_value = mock_index
chunks = await processor.query_document_embeddings(message)
# Verify no queries were made and empty result returned
processor.pinecone.Index.assert_not_called()
mock_index.query.assert_not_called()
assert chunks == []
@pytest.mark.asyncio
async def test_query_document_embeddings_no_results(self, processor):
"""Test querying when index returns no results"""
message = MagicMock()
message.vectors = [[0.1, 0.2, 0.3]]
message.limit = 5
message.user = 'test_user'
message.collection = 'test_collection'
mock_index = MagicMock()
processor.pinecone.Index.return_value = mock_index
mock_results = MagicMock()
mock_results.matches = []
mock_index.query.return_value = mock_results
chunks = await processor.query_document_embeddings(message)
# Verify empty results
assert chunks == []
@pytest.mark.asyncio
async def test_query_document_embeddings_unicode_content(self, processor):
"""Test querying document embeddings with Unicode content results"""
message = MagicMock()
message.vectors = [[0.1, 0.2, 0.3]]
message.limit = 2
message.user = 'test_user'
message.collection = 'test_collection'
mock_index = MagicMock()
processor.pinecone.Index.return_value = mock_index
mock_results = MagicMock()
mock_results.matches = [
MagicMock(metadata={'doc': 'Document with Unicode: éñ中文🚀'}),
MagicMock(metadata={'doc': 'Regular ASCII document'})
]
mock_index.query.return_value = mock_results
chunks = await processor.query_document_embeddings(message)
# Verify Unicode content is properly handled
assert len(chunks) == 2
assert 'Document with Unicode: éñ中文🚀' in chunks
assert 'Regular ASCII document' in chunks
@pytest.mark.asyncio
async def test_query_document_embeddings_large_content(self, processor):
"""Test querying document embeddings with large content results"""
message = MagicMock()
message.vectors = [[0.1, 0.2, 0.3]]
message.limit = 1
message.user = 'test_user'
message.collection = 'test_collection'
mock_index = MagicMock()
processor.pinecone.Index.return_value = mock_index
# Create a large document content
large_content = "A" * 10000 # 10KB of content
mock_results = MagicMock()
mock_results.matches = [
MagicMock(metadata={'doc': large_content})
]
mock_index.query.return_value = mock_results
chunks = await processor.query_document_embeddings(message)
# Verify large content is properly handled
assert len(chunks) == 1
assert chunks[0] == large_content
@pytest.mark.asyncio
async def test_query_document_embeddings_mixed_content_types(self, processor):
"""Test querying document embeddings with mixed content types"""
message = MagicMock()
message.vectors = [[0.1, 0.2, 0.3]]
message.limit = 5
message.user = 'test_user'
message.collection = 'test_collection'
mock_index = MagicMock()
processor.pinecone.Index.return_value = mock_index
mock_results = MagicMock()
mock_results.matches = [
MagicMock(metadata={'doc': 'Short text'}),
MagicMock(metadata={'doc': 'A' * 1000}), # Long text
MagicMock(metadata={'doc': 'Text with numbers: 123 and symbols: @#$'}),
MagicMock(metadata={'doc': ' Whitespace text '}),
MagicMock(metadata={'doc': ''}) # Empty string
]
mock_index.query.return_value = mock_results
chunks = await processor.query_document_embeddings(message)
# Verify all content types are properly handled
assert len(chunks) == 5
assert 'Short text' in chunks
assert 'A' * 1000 in chunks
assert 'Text with numbers: 123 and symbols: @#$' in chunks
assert ' Whitespace text ' in chunks
assert '' in chunks
@pytest.mark.asyncio
async def test_query_document_embeddings_exception_handling(self, processor):
"""Test that exceptions are properly raised"""
message = MagicMock()
message.vectors = [[0.1, 0.2, 0.3]]
message.limit = 5
message.user = 'test_user'
message.collection = 'test_collection'
mock_index = MagicMock()
processor.pinecone.Index.return_value = mock_index
mock_index.query.side_effect = Exception("Query failed")
with pytest.raises(Exception, match="Query failed"):
await processor.query_document_embeddings(message)
@pytest.mark.asyncio
async def test_query_document_embeddings_index_access_failure(self, processor):
"""Test handling of index access failure"""
message = MagicMock()
message.vectors = [[0.1, 0.2, 0.3]]
message.limit = 5
message.user = 'test_user'
message.collection = 'test_collection'
processor.pinecone.Index.side_effect = Exception("Index access failed")
with pytest.raises(Exception, match="Index access failed"):
await processor.query_document_embeddings(message)
@pytest.mark.asyncio
async def test_query_document_embeddings_vector_accumulation(self, processor):
"""Test that results from multiple vectors are properly accumulated"""
message = MagicMock()
message.vectors = [
[0.1, 0.2, 0.3],
[0.4, 0.5, 0.6],
[0.7, 0.8, 0.9]
]
message.limit = 2
message.user = 'test_user'
message.collection = 'test_collection'
mock_index = MagicMock()
processor.pinecone.Index.return_value = mock_index
# Each query returns different results
mock_results1 = MagicMock()
mock_results1.matches = [
MagicMock(metadata={'doc': 'Doc from vector 1.1'}),
MagicMock(metadata={'doc': 'Doc from vector 1.2'})
]
mock_results2 = MagicMock()
mock_results2.matches = [
MagicMock(metadata={'doc': 'Doc from vector 2.1'})
]
mock_results3 = MagicMock()
mock_results3.matches = [
MagicMock(metadata={'doc': 'Doc from vector 3.1'}),
MagicMock(metadata={'doc': 'Doc from vector 3.2'}),
MagicMock(metadata={'doc': 'Doc from vector 3.3'})
]
mock_index.query.side_effect = [mock_results1, mock_results2, mock_results3]
chunks = await processor.query_document_embeddings(message)
# Verify all queries were made
assert mock_index.query.call_count == 3
# Verify all results are accumulated
assert len(chunks) == 6
assert 'Doc from vector 1.1' in chunks
assert 'Doc from vector 1.2' in chunks
assert 'Doc from vector 2.1' in chunks
assert 'Doc from vector 3.1' in chunks
assert 'Doc from vector 3.2' in chunks
assert 'Doc from vector 3.3' in chunks
def test_add_args_method(self):
"""Test that add_args properly configures argument parser"""
from argparse import ArgumentParser
from unittest.mock import patch
parser = ArgumentParser()
# Mock the parent class add_args method
with patch('trustgraph.query.doc_embeddings.pinecone.service.DocumentEmbeddingsQueryService.add_args') as mock_parent_add_args:
Processor.add_args(parser)
# Verify parent add_args was called
mock_parent_add_args.assert_called_once()
# Verify our specific arguments were added
args = parser.parse_args([])
assert hasattr(args, 'api_key')
assert args.api_key == 'not-specified' # Default value when no env var
assert hasattr(args, 'url')
assert args.url is None
def test_add_args_with_custom_values(self):
"""Test add_args with custom command line values"""
from argparse import ArgumentParser
from unittest.mock import patch
parser = ArgumentParser()
with patch('trustgraph.query.doc_embeddings.pinecone.service.DocumentEmbeddingsQueryService.add_args'):
Processor.add_args(parser)
# Test parsing with custom values
args = parser.parse_args([
'--api-key', 'custom-api-key',
'--url', 'https://custom-host.pinecone.io'
])
assert args.api_key == 'custom-api-key'
assert args.url == 'https://custom-host.pinecone.io'
def test_add_args_short_form(self):
"""Test add_args with short form arguments"""
from argparse import ArgumentParser
from unittest.mock import patch
parser = ArgumentParser()
with patch('trustgraph.query.doc_embeddings.pinecone.service.DocumentEmbeddingsQueryService.add_args'):
Processor.add_args(parser)
# Test parsing with short form
args = parser.parse_args([
'-a', 'short-api-key',
'-u', 'https://short-host.pinecone.io'
])
assert args.api_key == 'short-api-key'
assert args.url == 'https://short-host.pinecone.io'
@patch('trustgraph.query.doc_embeddings.pinecone.service.Processor.launch')
def test_run_function(self, mock_launch):
"""Test the run function calls Processor.launch with correct parameters"""
from trustgraph.query.doc_embeddings.pinecone.service import run, default_ident
run()
mock_launch.assert_called_once_with(
default_ident,
"\nDocument embeddings query service. Input is vector, output is an array\nof chunks. Pinecone implementation.\n"
)

View file

@ -0,0 +1,484 @@
"""
Tests for Milvus graph embeddings query service
"""
import pytest
from unittest.mock import MagicMock, patch
from trustgraph.query.graph_embeddings.milvus.service import Processor
from trustgraph.schema import Value, GraphEmbeddingsRequest
class TestMilvusGraphEmbeddingsQueryProcessor:
"""Test cases for Milvus graph embeddings query processor"""
@pytest.fixture
def processor(self):
"""Create a processor instance for testing"""
with patch('trustgraph.query.graph_embeddings.milvus.service.EntityVectors') as mock_entity_vectors:
mock_vecstore = MagicMock()
mock_entity_vectors.return_value = mock_vecstore
processor = Processor(
taskgroup=MagicMock(),
id='test-milvus-ge-query',
store_uri='http://localhost:19530'
)
return processor
@pytest.fixture
def mock_query_request(self):
"""Create a mock query request for testing"""
query = GraphEmbeddingsRequest(
user='test_user',
collection='test_collection',
vectors=[[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]],
limit=10
)
return query
@patch('trustgraph.query.graph_embeddings.milvus.service.EntityVectors')
def test_processor_initialization_with_defaults(self, mock_entity_vectors):
"""Test processor initialization with default parameters"""
taskgroup_mock = MagicMock()
mock_vecstore = MagicMock()
mock_entity_vectors.return_value = mock_vecstore
processor = Processor(taskgroup=taskgroup_mock)
mock_entity_vectors.assert_called_once_with('http://localhost:19530')
assert processor.vecstore == mock_vecstore
@patch('trustgraph.query.graph_embeddings.milvus.service.EntityVectors')
def test_processor_initialization_with_custom_params(self, mock_entity_vectors):
"""Test processor initialization with custom parameters"""
taskgroup_mock = MagicMock()
mock_vecstore = MagicMock()
mock_entity_vectors.return_value = mock_vecstore
processor = Processor(
taskgroup=taskgroup_mock,
store_uri='http://custom-milvus:19530'
)
mock_entity_vectors.assert_called_once_with('http://custom-milvus:19530')
assert processor.vecstore == mock_vecstore
def test_create_value_with_http_uri(self, processor):
"""Test create_value with HTTP URI"""
result = processor.create_value("http://example.com/resource")
assert isinstance(result, Value)
assert result.value == "http://example.com/resource"
assert result.is_uri is True
def test_create_value_with_https_uri(self, processor):
"""Test create_value with HTTPS URI"""
result = processor.create_value("https://example.com/resource")
assert isinstance(result, Value)
assert result.value == "https://example.com/resource"
assert result.is_uri is True
def test_create_value_with_literal(self, processor):
"""Test create_value with literal value"""
result = processor.create_value("just a literal string")
assert isinstance(result, Value)
assert result.value == "just a literal string"
assert result.is_uri is False
def test_create_value_with_empty_string(self, processor):
"""Test create_value with empty string"""
result = processor.create_value("")
assert isinstance(result, Value)
assert result.value == ""
assert result.is_uri is False
def test_create_value_with_partial_uri(self, processor):
"""Test create_value with string that looks like URI but isn't complete"""
result = processor.create_value("http")
assert isinstance(result, Value)
assert result.value == "http"
assert result.is_uri is False
def test_create_value_with_ftp_uri(self, processor):
"""Test create_value with FTP URI (should not be detected as URI)"""
result = processor.create_value("ftp://example.com/file")
assert isinstance(result, Value)
assert result.value == "ftp://example.com/file"
assert result.is_uri is False
@pytest.mark.asyncio
async def test_query_graph_embeddings_single_vector(self, processor):
"""Test querying graph embeddings with a single vector"""
query = GraphEmbeddingsRequest(
user='test_user',
collection='test_collection',
vectors=[[0.1, 0.2, 0.3]],
limit=5
)
# Mock search results
mock_results = [
{"entity": {"entity": "http://example.com/entity1"}},
{"entity": {"entity": "http://example.com/entity2"}},
{"entity": {"entity": "literal entity"}},
]
processor.vecstore.search.return_value = mock_results
result = await processor.query_graph_embeddings(query)
# Verify search was called with correct parameters
processor.vecstore.search.assert_called_once_with([0.1, 0.2, 0.3], limit=10)
# Verify results are converted to Value objects
assert len(result) == 3
assert isinstance(result[0], Value)
assert result[0].value == "http://example.com/entity1"
assert result[0].is_uri is True
assert isinstance(result[1], Value)
assert result[1].value == "http://example.com/entity2"
assert result[1].is_uri is True
assert isinstance(result[2], Value)
assert result[2].value == "literal entity"
assert result[2].is_uri is False
@pytest.mark.asyncio
async def test_query_graph_embeddings_multiple_vectors(self, processor):
"""Test querying graph embeddings with multiple vectors"""
query = GraphEmbeddingsRequest(
user='test_user',
collection='test_collection',
vectors=[[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]],
limit=3
)
# Mock search results - different results for each vector
mock_results_1 = [
{"entity": {"entity": "http://example.com/entity1"}},
{"entity": {"entity": "http://example.com/entity2"}},
]
mock_results_2 = [
{"entity": {"entity": "http://example.com/entity2"}}, # Duplicate
{"entity": {"entity": "http://example.com/entity3"}},
]
processor.vecstore.search.side_effect = [mock_results_1, mock_results_2]
result = await processor.query_graph_embeddings(query)
# Verify search was called twice with correct parameters
expected_calls = [
(([0.1, 0.2, 0.3],), {"limit": 6}),
(([0.4, 0.5, 0.6],), {"limit": 6}),
]
assert processor.vecstore.search.call_count == 2
for i, (expected_args, expected_kwargs) in enumerate(expected_calls):
actual_call = processor.vecstore.search.call_args_list[i]
assert actual_call[0] == expected_args
assert actual_call[1] == expected_kwargs
# Verify results are deduplicated and limited
assert len(result) == 3
entity_values = [r.value for r in result]
assert "http://example.com/entity1" in entity_values
assert "http://example.com/entity2" in entity_values
assert "http://example.com/entity3" in entity_values
@pytest.mark.asyncio
async def test_query_graph_embeddings_with_limit(self, processor):
"""Test querying graph embeddings respects limit parameter"""
query = GraphEmbeddingsRequest(
user='test_user',
collection='test_collection',
vectors=[[0.1, 0.2, 0.3]],
limit=2
)
# Mock search results - more results than limit
mock_results = [
{"entity": {"entity": "http://example.com/entity1"}},
{"entity": {"entity": "http://example.com/entity2"}},
{"entity": {"entity": "http://example.com/entity3"}},
{"entity": {"entity": "http://example.com/entity4"}},
]
processor.vecstore.search.return_value = mock_results
result = await processor.query_graph_embeddings(query)
# Verify search was called with 2*limit for better deduplication
processor.vecstore.search.assert_called_once_with([0.1, 0.2, 0.3], limit=4)
# Verify results are limited to the requested limit
assert len(result) == 2
@pytest.mark.asyncio
async def test_query_graph_embeddings_deduplication(self, processor):
"""Test that duplicate entities are properly deduplicated"""
query = GraphEmbeddingsRequest(
user='test_user',
collection='test_collection',
vectors=[[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]],
limit=5
)
# Mock search results with duplicates
mock_results_1 = [
{"entity": {"entity": "http://example.com/entity1"}},
{"entity": {"entity": "http://example.com/entity2"}},
]
mock_results_2 = [
{"entity": {"entity": "http://example.com/entity2"}}, # Duplicate
{"entity": {"entity": "http://example.com/entity1"}}, # Duplicate
{"entity": {"entity": "http://example.com/entity3"}}, # New
]
processor.vecstore.search.side_effect = [mock_results_1, mock_results_2]
result = await processor.query_graph_embeddings(query)
# Verify duplicates are removed
assert len(result) == 3
entity_values = [r.value for r in result]
assert len(set(entity_values)) == 3 # All unique
assert "http://example.com/entity1" in entity_values
assert "http://example.com/entity2" in entity_values
assert "http://example.com/entity3" in entity_values
@pytest.mark.asyncio
async def test_query_graph_embeddings_early_termination_on_limit(self, processor):
"""Test that querying stops early when limit is reached"""
query = GraphEmbeddingsRequest(
user='test_user',
collection='test_collection',
vectors=[[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]],
limit=2
)
# Mock search results - first vector returns enough results
mock_results_1 = [
{"entity": {"entity": "http://example.com/entity1"}},
{"entity": {"entity": "http://example.com/entity2"}},
{"entity": {"entity": "http://example.com/entity3"}},
]
processor.vecstore.search.return_value = mock_results_1
result = await processor.query_graph_embeddings(query)
# Verify only first vector was searched (limit reached)
processor.vecstore.search.assert_called_once_with([0.1, 0.2, 0.3], limit=4)
# Verify results are limited
assert len(result) == 2
@pytest.mark.asyncio
async def test_query_graph_embeddings_empty_vectors(self, processor):
"""Test querying graph embeddings with empty vectors list"""
query = GraphEmbeddingsRequest(
user='test_user',
collection='test_collection',
vectors=[],
limit=5
)
result = await processor.query_graph_embeddings(query)
# Verify no search was called
processor.vecstore.search.assert_not_called()
# Verify empty results
assert len(result) == 0
@pytest.mark.asyncio
async def test_query_graph_embeddings_empty_search_results(self, processor):
"""Test querying graph embeddings with empty search results"""
query = GraphEmbeddingsRequest(
user='test_user',
collection='test_collection',
vectors=[[0.1, 0.2, 0.3]],
limit=5
)
# Mock empty search results
processor.vecstore.search.return_value = []
result = await processor.query_graph_embeddings(query)
# Verify search was called
processor.vecstore.search.assert_called_once_with([0.1, 0.2, 0.3], limit=10)
# Verify empty results
assert len(result) == 0
@pytest.mark.asyncio
async def test_query_graph_embeddings_mixed_uri_literal_results(self, processor):
"""Test querying graph embeddings with mixed URI and literal results"""
query = GraphEmbeddingsRequest(
user='test_user',
collection='test_collection',
vectors=[[0.1, 0.2, 0.3]],
limit=5
)
# Mock search results with mixed types
mock_results = [
{"entity": {"entity": "http://example.com/uri_entity"}},
{"entity": {"entity": "literal entity text"}},
{"entity": {"entity": "https://example.com/another_uri"}},
{"entity": {"entity": "another literal"}},
]
processor.vecstore.search.return_value = mock_results
result = await processor.query_graph_embeddings(query)
# Verify all results are properly typed
assert len(result) == 4
# Check URI entities
uri_results = [r for r in result if r.is_uri]
assert len(uri_results) == 2
uri_values = [r.value for r in uri_results]
assert "http://example.com/uri_entity" in uri_values
assert "https://example.com/another_uri" in uri_values
# Check literal entities
literal_results = [r for r in result if not r.is_uri]
assert len(literal_results) == 2
literal_values = [r.value for r in literal_results]
assert "literal entity text" in literal_values
assert "another literal" in literal_values
@pytest.mark.asyncio
async def test_query_graph_embeddings_exception_handling(self, processor):
"""Test exception handling during query processing"""
query = GraphEmbeddingsRequest(
user='test_user',
collection='test_collection',
vectors=[[0.1, 0.2, 0.3]],
limit=5
)
# Mock search to raise exception
processor.vecstore.search.side_effect = Exception("Milvus connection failed")
# Should raise the exception
with pytest.raises(Exception, match="Milvus connection failed"):
await processor.query_graph_embeddings(query)
def test_add_args_method(self):
"""Test that add_args properly configures argument parser"""
from argparse import ArgumentParser
from unittest.mock import patch
parser = ArgumentParser()
# Mock the parent class add_args method
with patch('trustgraph.query.graph_embeddings.milvus.service.GraphEmbeddingsQueryService.add_args') as mock_parent_add_args:
Processor.add_args(parser)
# Verify parent add_args was called
mock_parent_add_args.assert_called_once()
# Verify our specific arguments were added
# Parse empty args to check defaults
args = parser.parse_args([])
assert hasattr(args, 'store_uri')
assert args.store_uri == 'http://localhost:19530'
def test_add_args_with_custom_values(self):
"""Test add_args with custom command line values"""
from argparse import ArgumentParser
from unittest.mock import patch
parser = ArgumentParser()
with patch('trustgraph.query.graph_embeddings.milvus.service.GraphEmbeddingsQueryService.add_args'):
Processor.add_args(parser)
# Test parsing with custom values
args = parser.parse_args([
'--store-uri', 'http://custom-milvus:19530'
])
assert args.store_uri == 'http://custom-milvus:19530'
def test_add_args_short_form(self):
"""Test add_args with short form arguments"""
from argparse import ArgumentParser
from unittest.mock import patch
parser = ArgumentParser()
with patch('trustgraph.query.graph_embeddings.milvus.service.GraphEmbeddingsQueryService.add_args'):
Processor.add_args(parser)
# Test parsing with short form
args = parser.parse_args(['-t', 'http://short-milvus:19530'])
assert args.store_uri == 'http://short-milvus:19530'
@patch('trustgraph.query.graph_embeddings.milvus.service.Processor.launch')
def test_run_function(self, mock_launch):
"""Test the run function calls Processor.launch with correct parameters"""
from trustgraph.query.graph_embeddings.milvus.service import run, default_ident
run()
mock_launch.assert_called_once_with(
default_ident,
"\nGraph embeddings query service. Input is vector, output is list of\nentities\n"
)
@pytest.mark.asyncio
async def test_query_graph_embeddings_zero_limit(self, processor):
"""Test querying graph embeddings with zero limit"""
query = GraphEmbeddingsRequest(
user='test_user',
collection='test_collection',
vectors=[[0.1, 0.2, 0.3]],
limit=0
)
result = await processor.query_graph_embeddings(query)
# Verify no search was called (optimization for zero limit)
processor.vecstore.search.assert_not_called()
# Verify empty results due to zero limit
assert len(result) == 0
@pytest.mark.asyncio
async def test_query_graph_embeddings_different_vector_dimensions(self, processor):
"""Test querying graph embeddings with different vector dimensions"""
query = GraphEmbeddingsRequest(
user='test_user',
collection='test_collection',
vectors=[
[0.1, 0.2], # 2D vector
[0.3, 0.4, 0.5, 0.6], # 4D vector
[0.7, 0.8, 0.9] # 3D vector
],
limit=5
)
# Mock search results for each vector
mock_results_1 = [{"entity": {"entity": "entity_2d"}}]
mock_results_2 = [{"entity": {"entity": "entity_4d"}}]
mock_results_3 = [{"entity": {"entity": "entity_3d"}}]
processor.vecstore.search.side_effect = [mock_results_1, mock_results_2, mock_results_3]
result = await processor.query_graph_embeddings(query)
# Verify all vectors were searched
assert processor.vecstore.search.call_count == 3
# Verify results from all dimensions
assert len(result) == 3
entity_values = [r.value for r in result]
assert "entity_2d" in entity_values
assert "entity_4d" in entity_values
assert "entity_3d" in entity_values

View file

@ -0,0 +1,507 @@
"""
Tests for Pinecone graph embeddings query service
"""
import pytest
from unittest.mock import MagicMock, patch
from trustgraph.query.graph_embeddings.pinecone.service import Processor
from trustgraph.schema import Value
class TestPineconeGraphEmbeddingsQueryProcessor:
"""Test cases for Pinecone graph embeddings query processor"""
@pytest.fixture
def mock_query_message(self):
"""Create a mock query message for testing"""
message = MagicMock()
message.vectors = [
[0.1, 0.2, 0.3],
[0.4, 0.5, 0.6]
]
message.limit = 5
message.user = 'test_user'
message.collection = 'test_collection'
return message
@pytest.fixture
def processor(self):
"""Create a processor instance for testing"""
with patch('trustgraph.query.graph_embeddings.pinecone.service.Pinecone') as mock_pinecone_class:
mock_pinecone = MagicMock()
mock_pinecone_class.return_value = mock_pinecone
processor = Processor(
taskgroup=MagicMock(),
id='test-pinecone-ge-query',
api_key='test-api-key'
)
return processor
@patch('trustgraph.query.graph_embeddings.pinecone.service.Pinecone')
@patch('trustgraph.query.graph_embeddings.pinecone.service.default_api_key', 'env-api-key')
def test_processor_initialization_with_defaults(self, mock_pinecone_class):
"""Test processor initialization with default parameters"""
mock_pinecone = MagicMock()
mock_pinecone_class.return_value = mock_pinecone
taskgroup_mock = MagicMock()
processor = Processor(taskgroup=taskgroup_mock)
mock_pinecone_class.assert_called_once_with(api_key='env-api-key')
assert processor.pinecone == mock_pinecone
assert processor.api_key == 'env-api-key'
@patch('trustgraph.query.graph_embeddings.pinecone.service.Pinecone')
def test_processor_initialization_with_custom_params(self, mock_pinecone_class):
"""Test processor initialization with custom parameters"""
mock_pinecone = MagicMock()
mock_pinecone_class.return_value = mock_pinecone
taskgroup_mock = MagicMock()
processor = Processor(
taskgroup=taskgroup_mock,
api_key='custom-api-key'
)
mock_pinecone_class.assert_called_once_with(api_key='custom-api-key')
assert processor.api_key == 'custom-api-key'
@patch('trustgraph.query.graph_embeddings.pinecone.service.PineconeGRPC')
def test_processor_initialization_with_url(self, mock_pinecone_grpc_class):
"""Test processor initialization with custom URL (GRPC mode)"""
mock_pinecone = MagicMock()
mock_pinecone_grpc_class.return_value = mock_pinecone
taskgroup_mock = MagicMock()
processor = Processor(
taskgroup=taskgroup_mock,
api_key='test-api-key',
url='https://custom-host.pinecone.io'
)
mock_pinecone_grpc_class.assert_called_once_with(
api_key='test-api-key',
host='https://custom-host.pinecone.io'
)
assert processor.pinecone == mock_pinecone
assert processor.url == 'https://custom-host.pinecone.io'
@patch('trustgraph.query.graph_embeddings.pinecone.service.default_api_key', 'not-specified')
def test_processor_initialization_missing_api_key(self):
"""Test processor initialization fails with missing API key"""
taskgroup_mock = MagicMock()
with pytest.raises(RuntimeError, match="Pinecone API key must be specified"):
Processor(taskgroup=taskgroup_mock)
def test_create_value_uri(self, processor):
"""Test create_value method for URI entities"""
uri_entity = "http://example.org/entity"
value = processor.create_value(uri_entity)
assert isinstance(value, Value)
assert value.value == uri_entity
assert value.is_uri == True
def test_create_value_https_uri(self, processor):
"""Test create_value method for HTTPS URI entities"""
uri_entity = "https://example.org/entity"
value = processor.create_value(uri_entity)
assert isinstance(value, Value)
assert value.value == uri_entity
assert value.is_uri == True
def test_create_value_literal(self, processor):
"""Test create_value method for literal entities"""
literal_entity = "literal_entity"
value = processor.create_value(literal_entity)
assert isinstance(value, Value)
assert value.value == literal_entity
assert value.is_uri == False
@pytest.mark.asyncio
async def test_query_graph_embeddings_single_vector(self, processor):
"""Test querying graph embeddings with a single vector"""
message = MagicMock()
message.vectors = [[0.1, 0.2, 0.3]]
message.limit = 3
message.user = 'test_user'
message.collection = 'test_collection'
# Mock index and query results
mock_index = MagicMock()
processor.pinecone.Index.return_value = mock_index
mock_results = MagicMock()
mock_results.matches = [
MagicMock(metadata={'entity': 'http://example.org/entity1'}),
MagicMock(metadata={'entity': 'entity2'}),
MagicMock(metadata={'entity': 'http://example.org/entity3'})
]
mock_index.query.return_value = mock_results
entities = await processor.query_graph_embeddings(message)
# Verify index was accessed correctly
expected_index_name = "t-test_user-test_collection-3"
processor.pinecone.Index.assert_called_once_with(expected_index_name)
# Verify query parameters
mock_index.query.assert_called_once_with(
vector=[0.1, 0.2, 0.3],
top_k=6, # 2 * limit
include_values=False,
include_metadata=True
)
# Verify results
assert len(entities) == 3
assert entities[0].value == 'http://example.org/entity1'
assert entities[0].is_uri == True
assert entities[1].value == 'entity2'
assert entities[1].is_uri == False
assert entities[2].value == 'http://example.org/entity3'
assert entities[2].is_uri == True
@pytest.mark.asyncio
async def test_query_graph_embeddings_multiple_vectors(self, processor, mock_query_message):
"""Test querying graph embeddings with multiple vectors"""
# Mock index and query results
mock_index = MagicMock()
processor.pinecone.Index.return_value = mock_index
# First query results
mock_results1 = MagicMock()
mock_results1.matches = [
MagicMock(metadata={'entity': 'entity1'}),
MagicMock(metadata={'entity': 'entity2'})
]
# Second query results
mock_results2 = MagicMock()
mock_results2.matches = [
MagicMock(metadata={'entity': 'entity2'}), # Duplicate
MagicMock(metadata={'entity': 'entity3'})
]
mock_index.query.side_effect = [mock_results1, mock_results2]
entities = await processor.query_graph_embeddings(mock_query_message)
# Verify both queries were made
assert mock_index.query.call_count == 2
# Verify deduplication occurred
entity_values = [e.value for e in entities]
assert len(entity_values) == 3
assert 'entity1' in entity_values
assert 'entity2' in entity_values
assert 'entity3' in entity_values
@pytest.mark.asyncio
async def test_query_graph_embeddings_limit_handling(self, processor):
"""Test that query respects the limit parameter"""
message = MagicMock()
message.vectors = [[0.1, 0.2, 0.3]]
message.limit = 2
message.user = 'test_user'
message.collection = 'test_collection'
# Mock index with many results
mock_index = MagicMock()
processor.pinecone.Index.return_value = mock_index
mock_results = MagicMock()
mock_results.matches = [
MagicMock(metadata={'entity': f'entity{i}'}) for i in range(10)
]
mock_index.query.return_value = mock_results
entities = await processor.query_graph_embeddings(message)
# Verify limit is respected
assert len(entities) == 2
@pytest.mark.asyncio
async def test_query_graph_embeddings_zero_limit(self, processor):
"""Test querying with zero limit returns empty results"""
message = MagicMock()
message.vectors = [[0.1, 0.2, 0.3]]
message.limit = 0
message.user = 'test_user'
message.collection = 'test_collection'
mock_index = MagicMock()
processor.pinecone.Index.return_value = mock_index
entities = await processor.query_graph_embeddings(message)
# Verify no query was made and empty result returned
mock_index.query.assert_not_called()
assert entities == []
@pytest.mark.asyncio
async def test_query_graph_embeddings_negative_limit(self, processor):
"""Test querying with negative limit returns empty results"""
message = MagicMock()
message.vectors = [[0.1, 0.2, 0.3]]
message.limit = -1
message.user = 'test_user'
message.collection = 'test_collection'
mock_index = MagicMock()
processor.pinecone.Index.return_value = mock_index
entities = await processor.query_graph_embeddings(message)
# Verify no query was made and empty result returned
mock_index.query.assert_not_called()
assert entities == []
@pytest.mark.asyncio
async def test_query_graph_embeddings_different_vector_dimensions(self, processor):
"""Test querying with vectors of different dimensions"""
message = MagicMock()
message.vectors = [
[0.1, 0.2], # 2D vector
[0.3, 0.4, 0.5, 0.6] # 4D vector
]
message.limit = 5
message.user = 'test_user'
message.collection = 'test_collection'
mock_index_2d = MagicMock()
mock_index_4d = MagicMock()
def mock_index_side_effect(name):
if name.endswith("-2"):
return mock_index_2d
elif name.endswith("-4"):
return mock_index_4d
processor.pinecone.Index.side_effect = mock_index_side_effect
# Mock results for different dimensions
mock_results_2d = MagicMock()
mock_results_2d.matches = [MagicMock(metadata={'entity': 'entity_2d'})]
mock_index_2d.query.return_value = mock_results_2d
mock_results_4d = MagicMock()
mock_results_4d.matches = [MagicMock(metadata={'entity': 'entity_4d'})]
mock_index_4d.query.return_value = mock_results_4d
entities = await processor.query_graph_embeddings(message)
# Verify different indexes were used
assert processor.pinecone.Index.call_count == 2
mock_index_2d.query.assert_called_once()
mock_index_4d.query.assert_called_once()
# Verify results from both dimensions
entity_values = [e.value for e in entities]
assert 'entity_2d' in entity_values
assert 'entity_4d' in entity_values
@pytest.mark.asyncio
async def test_query_graph_embeddings_empty_vectors_list(self, processor):
"""Test querying with empty vectors list"""
message = MagicMock()
message.vectors = []
message.limit = 5
message.user = 'test_user'
message.collection = 'test_collection'
mock_index = MagicMock()
processor.pinecone.Index.return_value = mock_index
entities = await processor.query_graph_embeddings(message)
# Verify no queries were made and empty result returned
processor.pinecone.Index.assert_not_called()
mock_index.query.assert_not_called()
assert entities == []
@pytest.mark.asyncio
async def test_query_graph_embeddings_no_results(self, processor):
"""Test querying when index returns no results"""
message = MagicMock()
message.vectors = [[0.1, 0.2, 0.3]]
message.limit = 5
message.user = 'test_user'
message.collection = 'test_collection'
mock_index = MagicMock()
processor.pinecone.Index.return_value = mock_index
mock_results = MagicMock()
mock_results.matches = []
mock_index.query.return_value = mock_results
entities = await processor.query_graph_embeddings(message)
# Verify empty results
assert entities == []
@pytest.mark.asyncio
async def test_query_graph_embeddings_deduplication_across_vectors(self, processor):
"""Test that deduplication works correctly across multiple vector queries"""
message = MagicMock()
message.vectors = [
[0.1, 0.2, 0.3],
[0.4, 0.5, 0.6]
]
message.limit = 3
message.user = 'test_user'
message.collection = 'test_collection'
mock_index = MagicMock()
processor.pinecone.Index.return_value = mock_index
# Both queries return overlapping results
mock_results1 = MagicMock()
mock_results1.matches = [
MagicMock(metadata={'entity': 'entity1'}),
MagicMock(metadata={'entity': 'entity2'}),
MagicMock(metadata={'entity': 'entity3'}),
MagicMock(metadata={'entity': 'entity4'})
]
mock_results2 = MagicMock()
mock_results2.matches = [
MagicMock(metadata={'entity': 'entity2'}), # Duplicate
MagicMock(metadata={'entity': 'entity3'}), # Duplicate
MagicMock(metadata={'entity': 'entity5'})
]
mock_index.query.side_effect = [mock_results1, mock_results2]
entities = await processor.query_graph_embeddings(message)
# Should get exactly 3 unique entities (respecting limit)
assert len(entities) == 3
entity_values = [e.value for e in entities]
assert len(set(entity_values)) == 3 # All unique
@pytest.mark.asyncio
async def test_query_graph_embeddings_early_termination_on_limit(self, processor):
"""Test that querying stops early when limit is reached"""
message = MagicMock()
message.vectors = [
[0.1, 0.2, 0.3],
[0.4, 0.5, 0.6],
[0.7, 0.8, 0.9]
]
message.limit = 2
message.user = 'test_user'
message.collection = 'test_collection'
mock_index = MagicMock()
processor.pinecone.Index.return_value = mock_index
# First query returns enough results to meet limit
mock_results1 = MagicMock()
mock_results1.matches = [
MagicMock(metadata={'entity': 'entity1'}),
MagicMock(metadata={'entity': 'entity2'}),
MagicMock(metadata={'entity': 'entity3'})
]
mock_index.query.return_value = mock_results1
entities = await processor.query_graph_embeddings(message)
# Should only make one query since limit was reached
mock_index.query.assert_called_once()
assert len(entities) == 2
@pytest.mark.asyncio
async def test_query_graph_embeddings_exception_handling(self, processor):
"""Test that exceptions are properly raised"""
message = MagicMock()
message.vectors = [[0.1, 0.2, 0.3]]
message.limit = 5
message.user = 'test_user'
message.collection = 'test_collection'
mock_index = MagicMock()
processor.pinecone.Index.return_value = mock_index
mock_index.query.side_effect = Exception("Query failed")
with pytest.raises(Exception, match="Query failed"):
await processor.query_graph_embeddings(message)
def test_add_args_method(self):
"""Test that add_args properly configures argument parser"""
from argparse import ArgumentParser
from unittest.mock import patch
parser = ArgumentParser()
# Mock the parent class add_args method
with patch('trustgraph.query.graph_embeddings.pinecone.service.GraphEmbeddingsQueryService.add_args') as mock_parent_add_args:
Processor.add_args(parser)
# Verify parent add_args was called
mock_parent_add_args.assert_called_once()
# Verify our specific arguments were added
args = parser.parse_args([])
assert hasattr(args, 'api_key')
assert args.api_key == 'not-specified' # Default value when no env var
assert hasattr(args, 'url')
assert args.url is None
def test_add_args_with_custom_values(self):
"""Test add_args with custom command line values"""
from argparse import ArgumentParser
from unittest.mock import patch
parser = ArgumentParser()
with patch('trustgraph.query.graph_embeddings.pinecone.service.GraphEmbeddingsQueryService.add_args'):
Processor.add_args(parser)
# Test parsing with custom values
args = parser.parse_args([
'--api-key', 'custom-api-key',
'--url', 'https://custom-host.pinecone.io'
])
assert args.api_key == 'custom-api-key'
assert args.url == 'https://custom-host.pinecone.io'
def test_add_args_short_form(self):
"""Test add_args with short form arguments"""
from argparse import ArgumentParser
from unittest.mock import patch
parser = ArgumentParser()
with patch('trustgraph.query.graph_embeddings.pinecone.service.GraphEmbeddingsQueryService.add_args'):
Processor.add_args(parser)
# Test parsing with short form
args = parser.parse_args([
'-a', 'short-api-key',
'-u', 'https://short-host.pinecone.io'
])
assert args.api_key == 'short-api-key'
assert args.url == 'https://short-host.pinecone.io'
@patch('trustgraph.query.graph_embeddings.pinecone.service.Processor.launch')
def test_run_function(self, mock_launch):
"""Test the run function calls Processor.launch with correct parameters"""
from trustgraph.query.graph_embeddings.pinecone.service import run, default_ident
run()
mock_launch.assert_called_once_with(
default_ident,
"\nGraph embeddings query service. Input is vector, output is list of\nentities. Pinecone implementation.\n"
)

View file

@ -0,0 +1,556 @@
"""
Tests for FalkorDB triples query service
"""
import pytest
from unittest.mock import MagicMock, patch
from trustgraph.query.triples.falkordb.service import Processor
from trustgraph.schema import Value, TriplesQueryRequest
class TestFalkorDBQueryProcessor:
"""Test cases for FalkorDB query processor"""
@pytest.fixture
def processor(self):
"""Create a processor instance for testing"""
with patch('trustgraph.query.triples.falkordb.service.FalkorDB'):
return Processor(
taskgroup=MagicMock(),
id='test-falkordb-query',
graph_url='falkor://localhost:6379'
)
def test_create_value_with_http_uri(self, processor):
"""Test create_value with HTTP URI"""
result = processor.create_value("http://example.com/resource")
assert isinstance(result, Value)
assert result.value == "http://example.com/resource"
assert result.is_uri is True
def test_create_value_with_https_uri(self, processor):
"""Test create_value with HTTPS URI"""
result = processor.create_value("https://example.com/resource")
assert isinstance(result, Value)
assert result.value == "https://example.com/resource"
assert result.is_uri is True
def test_create_value_with_literal(self, processor):
"""Test create_value with literal value"""
result = processor.create_value("just a literal string")
assert isinstance(result, Value)
assert result.value == "just a literal string"
assert result.is_uri is False
def test_create_value_with_empty_string(self, processor):
"""Test create_value with empty string"""
result = processor.create_value("")
assert isinstance(result, Value)
assert result.value == ""
assert result.is_uri is False
def test_create_value_with_partial_uri(self, processor):
"""Test create_value with string that looks like URI but isn't complete"""
result = processor.create_value("http")
assert isinstance(result, Value)
assert result.value == "http"
assert result.is_uri is False
def test_create_value_with_ftp_uri(self, processor):
"""Test create_value with FTP URI (should not be detected as URI)"""
result = processor.create_value("ftp://example.com/file")
assert isinstance(result, Value)
assert result.value == "ftp://example.com/file"
assert result.is_uri is False
@patch('trustgraph.query.triples.falkordb.service.FalkorDB')
def test_processor_initialization_with_defaults(self, mock_falkordb):
"""Test processor initialization with default parameters"""
taskgroup_mock = MagicMock()
mock_client = MagicMock()
mock_graph = MagicMock()
mock_falkordb.from_url.return_value = mock_client
mock_client.select_graph.return_value = mock_graph
processor = Processor(taskgroup=taskgroup_mock)
assert processor.db == 'falkordb'
mock_falkordb.from_url.assert_called_once_with('falkor://falkordb:6379')
mock_client.select_graph.assert_called_once_with('falkordb')
@patch('trustgraph.query.triples.falkordb.service.FalkorDB')
def test_processor_initialization_with_custom_params(self, mock_falkordb):
"""Test processor initialization with custom parameters"""
taskgroup_mock = MagicMock()
mock_client = MagicMock()
mock_graph = MagicMock()
mock_falkordb.from_url.return_value = mock_client
mock_client.select_graph.return_value = mock_graph
processor = Processor(
taskgroup=taskgroup_mock,
graph_url='falkor://custom:6379',
database='customdb'
)
assert processor.db == 'customdb'
mock_falkordb.from_url.assert_called_once_with('falkor://custom:6379')
mock_client.select_graph.assert_called_once_with('customdb')
@patch('trustgraph.query.triples.falkordb.service.FalkorDB')
@pytest.mark.asyncio
async def test_query_triples_spo_query(self, mock_falkordb):
"""Test SPO query (all values specified)"""
taskgroup_mock = MagicMock()
mock_client = MagicMock()
mock_graph = MagicMock()
mock_falkordb.from_url.return_value = mock_client
mock_client.select_graph.return_value = mock_graph
# Mock query results - both queries return one record each
mock_result = MagicMock()
mock_result.result_set = [["record1"]]
mock_graph.query.return_value = mock_result
processor = Processor(taskgroup=taskgroup_mock)
# Create query request
query = TriplesQueryRequest(
user='test_user',
collection='test_collection',
s=Value(value="http://example.com/subject", is_uri=True),
p=Value(value="http://example.com/predicate", is_uri=True),
o=Value(value="literal object", is_uri=False),
limit=100
)
result = await processor.query_triples(query)
# Verify both literal and URI queries were executed
assert mock_graph.query.call_count == 2
# Verify result contains the queried triple (appears twice - once from each query)
assert len(result) == 2
assert result[0].s.value == "http://example.com/subject"
assert result[0].p.value == "http://example.com/predicate"
assert result[0].o.value == "literal object"
@patch('trustgraph.query.triples.falkordb.service.FalkorDB')
@pytest.mark.asyncio
async def test_query_triples_sp_query(self, mock_falkordb):
"""Test SP query (subject and predicate specified)"""
taskgroup_mock = MagicMock()
mock_client = MagicMock()
mock_graph = MagicMock()
mock_falkordb.from_url.return_value = mock_client
mock_client.select_graph.return_value = mock_graph
# Mock query results with different objects
mock_result1 = MagicMock()
mock_result1.result_set = [["literal result"]]
mock_result2 = MagicMock()
mock_result2.result_set = [["http://example.com/uri_result"]]
mock_graph.query.side_effect = [mock_result1, mock_result2]
processor = Processor(taskgroup=taskgroup_mock)
# Create query request
query = TriplesQueryRequest(
user='test_user',
collection='test_collection',
s=Value(value="http://example.com/subject", is_uri=True),
p=Value(value="http://example.com/predicate", is_uri=True),
o=None,
limit=100
)
result = await processor.query_triples(query)
# Verify both literal and URI queries were executed
assert mock_graph.query.call_count == 2
# Verify results contain different objects
assert len(result) == 2
assert result[0].s.value == "http://example.com/subject"
assert result[0].p.value == "http://example.com/predicate"
assert result[0].o.value == "literal result"
assert result[1].s.value == "http://example.com/subject"
assert result[1].p.value == "http://example.com/predicate"
assert result[1].o.value == "http://example.com/uri_result"
@patch('trustgraph.query.triples.falkordb.service.FalkorDB')
@pytest.mark.asyncio
async def test_query_triples_so_query(self, mock_falkordb):
"""Test SO query (subject and object specified)"""
taskgroup_mock = MagicMock()
mock_client = MagicMock()
mock_graph = MagicMock()
mock_falkordb.from_url.return_value = mock_client
mock_client.select_graph.return_value = mock_graph
# Mock query results with different predicates
mock_result1 = MagicMock()
mock_result1.result_set = [["http://example.com/pred1"]]
mock_result2 = MagicMock()
mock_result2.result_set = [["http://example.com/pred2"]]
mock_graph.query.side_effect = [mock_result1, mock_result2]
processor = Processor(taskgroup=taskgroup_mock)
# Create query request
query = TriplesQueryRequest(
user='test_user',
collection='test_collection',
s=Value(value="http://example.com/subject", is_uri=True),
p=None,
o=Value(value="literal object", is_uri=False),
limit=100
)
result = await processor.query_triples(query)
# Verify both literal and URI queries were executed
assert mock_graph.query.call_count == 2
# Verify results contain different predicates
assert len(result) == 2
assert result[0].s.value == "http://example.com/subject"
assert result[0].p.value == "http://example.com/pred1"
assert result[0].o.value == "literal object"
assert result[1].s.value == "http://example.com/subject"
assert result[1].p.value == "http://example.com/pred2"
assert result[1].o.value == "literal object"
@patch('trustgraph.query.triples.falkordb.service.FalkorDB')
@pytest.mark.asyncio
async def test_query_triples_s_query(self, mock_falkordb):
"""Test S query (subject only)"""
taskgroup_mock = MagicMock()
mock_client = MagicMock()
mock_graph = MagicMock()
mock_falkordb.from_url.return_value = mock_client
mock_client.select_graph.return_value = mock_graph
# Mock query results with different predicate-object pairs
mock_result1 = MagicMock()
mock_result1.result_set = [["http://example.com/pred1", "literal1"]]
mock_result2 = MagicMock()
mock_result2.result_set = [["http://example.com/pred2", "http://example.com/uri2"]]
mock_graph.query.side_effect = [mock_result1, mock_result2]
processor = Processor(taskgroup=taskgroup_mock)
# Create query request
query = TriplesQueryRequest(
user='test_user',
collection='test_collection',
s=Value(value="http://example.com/subject", is_uri=True),
p=None,
o=None,
limit=100
)
result = await processor.query_triples(query)
# Verify both literal and URI queries were executed
assert mock_graph.query.call_count == 2
# Verify results contain different predicate-object pairs
assert len(result) == 2
assert result[0].s.value == "http://example.com/subject"
assert result[0].p.value == "http://example.com/pred1"
assert result[0].o.value == "literal1"
assert result[1].s.value == "http://example.com/subject"
assert result[1].p.value == "http://example.com/pred2"
assert result[1].o.value == "http://example.com/uri2"
@patch('trustgraph.query.triples.falkordb.service.FalkorDB')
@pytest.mark.asyncio
async def test_query_triples_po_query(self, mock_falkordb):
"""Test PO query (predicate and object specified)"""
taskgroup_mock = MagicMock()
mock_client = MagicMock()
mock_graph = MagicMock()
mock_falkordb.from_url.return_value = mock_client
mock_client.select_graph.return_value = mock_graph
# Mock query results with different subjects
mock_result1 = MagicMock()
mock_result1.result_set = [["http://example.com/subj1"]]
mock_result2 = MagicMock()
mock_result2.result_set = [["http://example.com/subj2"]]
mock_graph.query.side_effect = [mock_result1, mock_result2]
processor = Processor(taskgroup=taskgroup_mock)
# Create query request
query = TriplesQueryRequest(
user='test_user',
collection='test_collection',
s=None,
p=Value(value="http://example.com/predicate", is_uri=True),
o=Value(value="literal object", is_uri=False),
limit=100
)
result = await processor.query_triples(query)
# Verify both literal and URI queries were executed
assert mock_graph.query.call_count == 2
# Verify results contain different subjects
assert len(result) == 2
assert result[0].s.value == "http://example.com/subj1"
assert result[0].p.value == "http://example.com/predicate"
assert result[0].o.value == "literal object"
assert result[1].s.value == "http://example.com/subj2"
assert result[1].p.value == "http://example.com/predicate"
assert result[1].o.value == "literal object"
@patch('trustgraph.query.triples.falkordb.service.FalkorDB')
@pytest.mark.asyncio
async def test_query_triples_p_query(self, mock_falkordb):
"""Test P query (predicate only)"""
taskgroup_mock = MagicMock()
mock_client = MagicMock()
mock_graph = MagicMock()
mock_falkordb.from_url.return_value = mock_client
mock_client.select_graph.return_value = mock_graph
# Mock query results with different subject-object pairs
mock_result1 = MagicMock()
mock_result1.result_set = [["http://example.com/subj1", "literal1"]]
mock_result2 = MagicMock()
mock_result2.result_set = [["http://example.com/subj2", "http://example.com/uri2"]]
mock_graph.query.side_effect = [mock_result1, mock_result2]
processor = Processor(taskgroup=taskgroup_mock)
# Create query request
query = TriplesQueryRequest(
user='test_user',
collection='test_collection',
s=None,
p=Value(value="http://example.com/predicate", is_uri=True),
o=None,
limit=100
)
result = await processor.query_triples(query)
# Verify both literal and URI queries were executed
assert mock_graph.query.call_count == 2
# Verify results contain different subject-object pairs
assert len(result) == 2
assert result[0].s.value == "http://example.com/subj1"
assert result[0].p.value == "http://example.com/predicate"
assert result[0].o.value == "literal1"
assert result[1].s.value == "http://example.com/subj2"
assert result[1].p.value == "http://example.com/predicate"
assert result[1].o.value == "http://example.com/uri2"
@patch('trustgraph.query.triples.falkordb.service.FalkorDB')
@pytest.mark.asyncio
async def test_query_triples_o_query(self, mock_falkordb):
"""Test O query (object only)"""
taskgroup_mock = MagicMock()
mock_client = MagicMock()
mock_graph = MagicMock()
mock_falkordb.from_url.return_value = mock_client
mock_client.select_graph.return_value = mock_graph
# Mock query results with different subject-predicate pairs
mock_result1 = MagicMock()
mock_result1.result_set = [["http://example.com/subj1", "http://example.com/pred1"]]
mock_result2 = MagicMock()
mock_result2.result_set = [["http://example.com/subj2", "http://example.com/pred2"]]
mock_graph.query.side_effect = [mock_result1, mock_result2]
processor = Processor(taskgroup=taskgroup_mock)
# Create query request
query = TriplesQueryRequest(
user='test_user',
collection='test_collection',
s=None,
p=None,
o=Value(value="literal object", is_uri=False),
limit=100
)
result = await processor.query_triples(query)
# Verify both literal and URI queries were executed
assert mock_graph.query.call_count == 2
# Verify results contain different subject-predicate pairs
assert len(result) == 2
assert result[0].s.value == "http://example.com/subj1"
assert result[0].p.value == "http://example.com/pred1"
assert result[0].o.value == "literal object"
assert result[1].s.value == "http://example.com/subj2"
assert result[1].p.value == "http://example.com/pred2"
assert result[1].o.value == "literal object"
@patch('trustgraph.query.triples.falkordb.service.FalkorDB')
@pytest.mark.asyncio
async def test_query_triples_wildcard_query(self, mock_falkordb):
"""Test wildcard query (no constraints)"""
taskgroup_mock = MagicMock()
mock_client = MagicMock()
mock_graph = MagicMock()
mock_falkordb.from_url.return_value = mock_client
mock_client.select_graph.return_value = mock_graph
# Mock query results
mock_result1 = MagicMock()
mock_result1.result_set = [["http://example.com/s1", "http://example.com/p1", "literal1"]]
mock_result2 = MagicMock()
mock_result2.result_set = [["http://example.com/s2", "http://example.com/p2", "http://example.com/o2"]]
mock_graph.query.side_effect = [mock_result1, mock_result2]
processor = Processor(taskgroup=taskgroup_mock)
# Create query request
query = TriplesQueryRequest(
user='test_user',
collection='test_collection',
s=None,
p=None,
o=None,
limit=100
)
result = await processor.query_triples(query)
# Verify both literal and URI queries were executed
assert mock_graph.query.call_count == 2
# Verify results contain different triples
assert len(result) == 2
assert result[0].s.value == "http://example.com/s1"
assert result[0].p.value == "http://example.com/p1"
assert result[0].o.value == "literal1"
assert result[1].s.value == "http://example.com/s2"
assert result[1].p.value == "http://example.com/p2"
assert result[1].o.value == "http://example.com/o2"
@patch('trustgraph.query.triples.falkordb.service.FalkorDB')
@pytest.mark.asyncio
async def test_query_triples_exception_handling(self, mock_falkordb):
"""Test exception handling during query processing"""
taskgroup_mock = MagicMock()
mock_client = MagicMock()
mock_graph = MagicMock()
mock_falkordb.from_url.return_value = mock_client
mock_client.select_graph.return_value = mock_graph
# Mock query to raise exception
mock_graph.query.side_effect = Exception("Database connection failed")
processor = Processor(taskgroup=taskgroup_mock)
# Create query request
query = TriplesQueryRequest(
user='test_user',
collection='test_collection',
s=Value(value="http://example.com/subject", is_uri=True),
p=None,
o=None,
limit=100
)
# Should raise the exception
with pytest.raises(Exception, match="Database connection failed"):
await processor.query_triples(query)
def test_add_args_method(self):
"""Test that add_args properly configures argument parser"""
from argparse import ArgumentParser
from unittest.mock import patch
parser = ArgumentParser()
# Mock the parent class add_args method
with patch('trustgraph.query.triples.falkordb.service.TriplesQueryService.add_args') as mock_parent_add_args:
Processor.add_args(parser)
# Verify parent add_args was called
mock_parent_add_args.assert_called_once()
# Verify our specific arguments were added
# Parse empty args to check defaults
args = parser.parse_args([])
assert hasattr(args, 'graph_url')
assert args.graph_url == 'falkor://falkordb:6379'
assert hasattr(args, 'database')
assert args.database == 'falkordb'
def test_add_args_with_custom_values(self):
"""Test add_args with custom command line values"""
from argparse import ArgumentParser
from unittest.mock import patch
parser = ArgumentParser()
with patch('trustgraph.query.triples.falkordb.service.TriplesQueryService.add_args'):
Processor.add_args(parser)
# Test parsing with custom values
args = parser.parse_args([
'--graph-url', 'falkor://custom:6379',
'--database', 'querydb'
])
assert args.graph_url == 'falkor://custom:6379'
assert args.database == 'querydb'
def test_add_args_short_form(self):
"""Test add_args with short form arguments"""
from argparse import ArgumentParser
from unittest.mock import patch
parser = ArgumentParser()
with patch('trustgraph.query.triples.falkordb.service.TriplesQueryService.add_args'):
Processor.add_args(parser)
# Test parsing with short form
args = parser.parse_args(['-g', 'falkor://short:6379'])
assert args.graph_url == 'falkor://short:6379'
@patch('trustgraph.query.triples.falkordb.service.Processor.launch')
def test_run_function(self, mock_launch):
"""Test the run function calls Processor.launch with correct parameters"""
from trustgraph.query.triples.falkordb.service import run, default_ident
run()
mock_launch.assert_called_once_with(
default_ident,
"\nTriples query service for FalkorDB.\nInput is a (s, p, o) triple, some values may be null. Output is a list of\ntriples.\n"
)

View file

@ -0,0 +1,568 @@
"""
Tests for Memgraph triples query service
"""
import pytest
from unittest.mock import MagicMock, patch
from trustgraph.query.triples.memgraph.service import Processor
from trustgraph.schema import Value, TriplesQueryRequest
class TestMemgraphQueryProcessor:
"""Test cases for Memgraph query processor"""
@pytest.fixture
def processor(self):
"""Create a processor instance for testing"""
with patch('trustgraph.query.triples.memgraph.service.GraphDatabase'):
return Processor(
taskgroup=MagicMock(),
id='test-memgraph-query',
graph_host='bolt://localhost:7687'
)
def test_create_value_with_http_uri(self, processor):
"""Test create_value with HTTP URI"""
result = processor.create_value("http://example.com/resource")
assert isinstance(result, Value)
assert result.value == "http://example.com/resource"
assert result.is_uri is True
def test_create_value_with_https_uri(self, processor):
"""Test create_value with HTTPS URI"""
result = processor.create_value("https://example.com/resource")
assert isinstance(result, Value)
assert result.value == "https://example.com/resource"
assert result.is_uri is True
def test_create_value_with_literal(self, processor):
"""Test create_value with literal value"""
result = processor.create_value("just a literal string")
assert isinstance(result, Value)
assert result.value == "just a literal string"
assert result.is_uri is False
def test_create_value_with_empty_string(self, processor):
"""Test create_value with empty string"""
result = processor.create_value("")
assert isinstance(result, Value)
assert result.value == ""
assert result.is_uri is False
def test_create_value_with_partial_uri(self, processor):
"""Test create_value with string that looks like URI but isn't complete"""
result = processor.create_value("http")
assert isinstance(result, Value)
assert result.value == "http"
assert result.is_uri is False
def test_create_value_with_ftp_uri(self, processor):
"""Test create_value with FTP URI (should not be detected as URI)"""
result = processor.create_value("ftp://example.com/file")
assert isinstance(result, Value)
assert result.value == "ftp://example.com/file"
assert result.is_uri is False
@patch('trustgraph.query.triples.memgraph.service.GraphDatabase')
def test_processor_initialization_with_defaults(self, mock_graph_db):
"""Test processor initialization with default parameters"""
taskgroup_mock = MagicMock()
mock_driver = MagicMock()
mock_graph_db.driver.return_value = mock_driver
processor = Processor(taskgroup=taskgroup_mock)
assert processor.db == 'memgraph'
mock_graph_db.driver.assert_called_once_with(
'bolt://memgraph:7687',
auth=('memgraph', 'password')
)
@patch('trustgraph.query.triples.memgraph.service.GraphDatabase')
def test_processor_initialization_with_custom_params(self, mock_graph_db):
"""Test processor initialization with custom parameters"""
taskgroup_mock = MagicMock()
mock_driver = MagicMock()
mock_graph_db.driver.return_value = mock_driver
processor = Processor(
taskgroup=taskgroup_mock,
graph_host='bolt://custom:7687',
username='queryuser',
password='querypass',
database='customdb'
)
assert processor.db == 'customdb'
mock_graph_db.driver.assert_called_once_with(
'bolt://custom:7687',
auth=('queryuser', 'querypass')
)
@patch('trustgraph.query.triples.memgraph.service.GraphDatabase')
@pytest.mark.asyncio
async def test_query_triples_spo_query(self, mock_graph_db):
"""Test SPO query (all values specified)"""
taskgroup_mock = MagicMock()
mock_driver = MagicMock()
mock_graph_db.driver.return_value = mock_driver
# Mock query results - both queries return one record each
mock_records = [MagicMock()]
mock_driver.execute_query.return_value = (mock_records, None, None)
processor = Processor(taskgroup=taskgroup_mock)
# Create query request
query = TriplesQueryRequest(
user='test_user',
collection='test_collection',
s=Value(value="http://example.com/subject", is_uri=True),
p=Value(value="http://example.com/predicate", is_uri=True),
o=Value(value="literal object", is_uri=False),
limit=100
)
result = await processor.query_triples(query)
# Verify both literal and URI queries were executed
assert mock_driver.execute_query.call_count == 2
# Verify result contains the queried triple (appears twice - once from each query)
assert len(result) == 2
assert result[0].s.value == "http://example.com/subject"
assert result[0].p.value == "http://example.com/predicate"
assert result[0].o.value == "literal object"
@patch('trustgraph.query.triples.memgraph.service.GraphDatabase')
@pytest.mark.asyncio
async def test_query_triples_sp_query(self, mock_graph_db):
"""Test SP query (subject and predicate specified)"""
taskgroup_mock = MagicMock()
mock_driver = MagicMock()
mock_graph_db.driver.return_value = mock_driver
# Mock query results with different objects
mock_record1 = MagicMock()
mock_record1.data.return_value = {"dest": "literal result"}
mock_record2 = MagicMock()
mock_record2.data.return_value = {"dest": "http://example.com/uri_result"}
mock_driver.execute_query.side_effect = [
([mock_record1], None, None), # Literal query
([mock_record2], None, None) # URI query
]
processor = Processor(taskgroup=taskgroup_mock)
# Create query request
query = TriplesQueryRequest(
user='test_user',
collection='test_collection',
s=Value(value="http://example.com/subject", is_uri=True),
p=Value(value="http://example.com/predicate", is_uri=True),
o=None,
limit=100
)
result = await processor.query_triples(query)
# Verify both literal and URI queries were executed
assert mock_driver.execute_query.call_count == 2
# Verify results contain different objects
assert len(result) == 2
assert result[0].s.value == "http://example.com/subject"
assert result[0].p.value == "http://example.com/predicate"
assert result[0].o.value == "literal result"
assert result[1].s.value == "http://example.com/subject"
assert result[1].p.value == "http://example.com/predicate"
assert result[1].o.value == "http://example.com/uri_result"
@patch('trustgraph.query.triples.memgraph.service.GraphDatabase')
@pytest.mark.asyncio
async def test_query_triples_so_query(self, mock_graph_db):
"""Test SO query (subject and object specified)"""
taskgroup_mock = MagicMock()
mock_driver = MagicMock()
mock_graph_db.driver.return_value = mock_driver
# Mock query results with different predicates
mock_record1 = MagicMock()
mock_record1.data.return_value = {"rel": "http://example.com/pred1"}
mock_record2 = MagicMock()
mock_record2.data.return_value = {"rel": "http://example.com/pred2"}
mock_driver.execute_query.side_effect = [
([mock_record1], None, None), # Literal query
([mock_record2], None, None) # URI query
]
processor = Processor(taskgroup=taskgroup_mock)
# Create query request
query = TriplesQueryRequest(
user='test_user',
collection='test_collection',
s=Value(value="http://example.com/subject", is_uri=True),
p=None,
o=Value(value="literal object", is_uri=False),
limit=100
)
result = await processor.query_triples(query)
# Verify both literal and URI queries were executed
assert mock_driver.execute_query.call_count == 2
# Verify results contain different predicates
assert len(result) == 2
assert result[0].s.value == "http://example.com/subject"
assert result[0].p.value == "http://example.com/pred1"
assert result[0].o.value == "literal object"
assert result[1].s.value == "http://example.com/subject"
assert result[1].p.value == "http://example.com/pred2"
assert result[1].o.value == "literal object"
@patch('trustgraph.query.triples.memgraph.service.GraphDatabase')
@pytest.mark.asyncio
async def test_query_triples_s_query(self, mock_graph_db):
"""Test S query (subject only)"""
taskgroup_mock = MagicMock()
mock_driver = MagicMock()
mock_graph_db.driver.return_value = mock_driver
# Mock query results with different predicate-object pairs
mock_record1 = MagicMock()
mock_record1.data.return_value = {"rel": "http://example.com/pred1", "dest": "literal1"}
mock_record2 = MagicMock()
mock_record2.data.return_value = {"rel": "http://example.com/pred2", "dest": "http://example.com/uri2"}
mock_driver.execute_query.side_effect = [
([mock_record1], None, None), # Literal query
([mock_record2], None, None) # URI query
]
processor = Processor(taskgroup=taskgroup_mock)
# Create query request
query = TriplesQueryRequest(
user='test_user',
collection='test_collection',
s=Value(value="http://example.com/subject", is_uri=True),
p=None,
o=None,
limit=100
)
result = await processor.query_triples(query)
# Verify both literal and URI queries were executed
assert mock_driver.execute_query.call_count == 2
# Verify results contain different predicate-object pairs
assert len(result) == 2
assert result[0].s.value == "http://example.com/subject"
assert result[0].p.value == "http://example.com/pred1"
assert result[0].o.value == "literal1"
assert result[1].s.value == "http://example.com/subject"
assert result[1].p.value == "http://example.com/pred2"
assert result[1].o.value == "http://example.com/uri2"
@patch('trustgraph.query.triples.memgraph.service.GraphDatabase')
@pytest.mark.asyncio
async def test_query_triples_po_query(self, mock_graph_db):
"""Test PO query (predicate and object specified)"""
taskgroup_mock = MagicMock()
mock_driver = MagicMock()
mock_graph_db.driver.return_value = mock_driver
# Mock query results with different subjects
mock_record1 = MagicMock()
mock_record1.data.return_value = {"src": "http://example.com/subj1"}
mock_record2 = MagicMock()
mock_record2.data.return_value = {"src": "http://example.com/subj2"}
mock_driver.execute_query.side_effect = [
([mock_record1], None, None), # Literal query
([mock_record2], None, None) # URI query
]
processor = Processor(taskgroup=taskgroup_mock)
# Create query request
query = TriplesQueryRequest(
user='test_user',
collection='test_collection',
s=None,
p=Value(value="http://example.com/predicate", is_uri=True),
o=Value(value="literal object", is_uri=False),
limit=100
)
result = await processor.query_triples(query)
# Verify both literal and URI queries were executed
assert mock_driver.execute_query.call_count == 2
# Verify results contain different subjects
assert len(result) == 2
assert result[0].s.value == "http://example.com/subj1"
assert result[0].p.value == "http://example.com/predicate"
assert result[0].o.value == "literal object"
assert result[1].s.value == "http://example.com/subj2"
assert result[1].p.value == "http://example.com/predicate"
assert result[1].o.value == "literal object"
@patch('trustgraph.query.triples.memgraph.service.GraphDatabase')
@pytest.mark.asyncio
async def test_query_triples_p_query(self, mock_graph_db):
"""Test P query (predicate only)"""
taskgroup_mock = MagicMock()
mock_driver = MagicMock()
mock_graph_db.driver.return_value = mock_driver
# Mock query results with different subject-object pairs
mock_record1 = MagicMock()
mock_record1.data.return_value = {"src": "http://example.com/subj1", "dest": "literal1"}
mock_record2 = MagicMock()
mock_record2.data.return_value = {"src": "http://example.com/subj2", "dest": "http://example.com/uri2"}
mock_driver.execute_query.side_effect = [
([mock_record1], None, None), # Literal query
([mock_record2], None, None) # URI query
]
processor = Processor(taskgroup=taskgroup_mock)
# Create query request
query = TriplesQueryRequest(
user='test_user',
collection='test_collection',
s=None,
p=Value(value="http://example.com/predicate", is_uri=True),
o=None,
limit=100
)
result = await processor.query_triples(query)
# Verify both literal and URI queries were executed
assert mock_driver.execute_query.call_count == 2
# Verify results contain different subject-object pairs
assert len(result) == 2
assert result[0].s.value == "http://example.com/subj1"
assert result[0].p.value == "http://example.com/predicate"
assert result[0].o.value == "literal1"
assert result[1].s.value == "http://example.com/subj2"
assert result[1].p.value == "http://example.com/predicate"
assert result[1].o.value == "http://example.com/uri2"
@patch('trustgraph.query.triples.memgraph.service.GraphDatabase')
@pytest.mark.asyncio
async def test_query_triples_o_query(self, mock_graph_db):
"""Test O query (object only)"""
taskgroup_mock = MagicMock()
mock_driver = MagicMock()
mock_graph_db.driver.return_value = mock_driver
# Mock query results with different subject-predicate pairs
mock_record1 = MagicMock()
mock_record1.data.return_value = {"src": "http://example.com/subj1", "rel": "http://example.com/pred1"}
mock_record2 = MagicMock()
mock_record2.data.return_value = {"src": "http://example.com/subj2", "rel": "http://example.com/pred2"}
mock_driver.execute_query.side_effect = [
([mock_record1], None, None), # Literal query
([mock_record2], None, None) # URI query
]
processor = Processor(taskgroup=taskgroup_mock)
# Create query request
query = TriplesQueryRequest(
user='test_user',
collection='test_collection',
s=None,
p=None,
o=Value(value="literal object", is_uri=False),
limit=100
)
result = await processor.query_triples(query)
# Verify both literal and URI queries were executed
assert mock_driver.execute_query.call_count == 2
# Verify results contain different subject-predicate pairs
assert len(result) == 2
assert result[0].s.value == "http://example.com/subj1"
assert result[0].p.value == "http://example.com/pred1"
assert result[0].o.value == "literal object"
assert result[1].s.value == "http://example.com/subj2"
assert result[1].p.value == "http://example.com/pred2"
assert result[1].o.value == "literal object"
@patch('trustgraph.query.triples.memgraph.service.GraphDatabase')
@pytest.mark.asyncio
async def test_query_triples_wildcard_query(self, mock_graph_db):
"""Test wildcard query (no constraints)"""
taskgroup_mock = MagicMock()
mock_driver = MagicMock()
mock_graph_db.driver.return_value = mock_driver
# Mock query results
mock_record1 = MagicMock()
mock_record1.data.return_value = {"src": "http://example.com/s1", "rel": "http://example.com/p1", "dest": "literal1"}
mock_record2 = MagicMock()
mock_record2.data.return_value = {"src": "http://example.com/s2", "rel": "http://example.com/p2", "dest": "http://example.com/o2"}
mock_driver.execute_query.side_effect = [
([mock_record1], None, None), # Literal query
([mock_record2], None, None) # URI query
]
processor = Processor(taskgroup=taskgroup_mock)
# Create query request
query = TriplesQueryRequest(
user='test_user',
collection='test_collection',
s=None,
p=None,
o=None,
limit=100
)
result = await processor.query_triples(query)
# Verify both literal and URI queries were executed
assert mock_driver.execute_query.call_count == 2
# Verify results contain different triples
assert len(result) == 2
assert result[0].s.value == "http://example.com/s1"
assert result[0].p.value == "http://example.com/p1"
assert result[0].o.value == "literal1"
assert result[1].s.value == "http://example.com/s2"
assert result[1].p.value == "http://example.com/p2"
assert result[1].o.value == "http://example.com/o2"
@patch('trustgraph.query.triples.memgraph.service.GraphDatabase')
@pytest.mark.asyncio
async def test_query_triples_exception_handling(self, mock_graph_db):
"""Test exception handling during query processing"""
taskgroup_mock = MagicMock()
mock_driver = MagicMock()
mock_graph_db.driver.return_value = mock_driver
# Mock execute_query to raise exception
mock_driver.execute_query.side_effect = Exception("Database connection failed")
processor = Processor(taskgroup=taskgroup_mock)
# Create query request
query = TriplesQueryRequest(
user='test_user',
collection='test_collection',
s=Value(value="http://example.com/subject", is_uri=True),
p=None,
o=None,
limit=100
)
# Should raise the exception
with pytest.raises(Exception, match="Database connection failed"):
await processor.query_triples(query)
def test_add_args_method(self):
"""Test that add_args properly configures argument parser"""
from argparse import ArgumentParser
from unittest.mock import patch
parser = ArgumentParser()
# Mock the parent class add_args method
with patch('trustgraph.query.triples.memgraph.service.TriplesQueryService.add_args') as mock_parent_add_args:
Processor.add_args(parser)
# Verify parent add_args was called
mock_parent_add_args.assert_called_once()
# Verify our specific arguments were added
# Parse empty args to check defaults
args = parser.parse_args([])
assert hasattr(args, 'graph_host')
assert args.graph_host == 'bolt://memgraph:7687'
assert hasattr(args, 'username')
assert args.username == 'memgraph'
assert hasattr(args, 'password')
assert args.password == 'password'
assert hasattr(args, 'database')
assert args.database == 'memgraph'
def test_add_args_with_custom_values(self):
"""Test add_args with custom command line values"""
from argparse import ArgumentParser
from unittest.mock import patch
parser = ArgumentParser()
with patch('trustgraph.query.triples.memgraph.service.TriplesQueryService.add_args'):
Processor.add_args(parser)
# Test parsing with custom values
args = parser.parse_args([
'--graph-host', 'bolt://custom:7687',
'--username', 'queryuser',
'--password', 'querypass',
'--database', 'querydb'
])
assert args.graph_host == 'bolt://custom:7687'
assert args.username == 'queryuser'
assert args.password == 'querypass'
assert args.database == 'querydb'
def test_add_args_short_form(self):
"""Test add_args with short form arguments"""
from argparse import ArgumentParser
from unittest.mock import patch
parser = ArgumentParser()
with patch('trustgraph.query.triples.memgraph.service.TriplesQueryService.add_args'):
Processor.add_args(parser)
# Test parsing with short form
args = parser.parse_args(['-g', 'bolt://short:7687'])
assert args.graph_host == 'bolt://short:7687'
@patch('trustgraph.query.triples.memgraph.service.Processor.launch')
def test_run_function(self, mock_launch):
"""Test the run function calls Processor.launch with correct parameters"""
from trustgraph.query.triples.memgraph.service import run, default_ident
run()
mock_launch.assert_called_once_with(
default_ident,
"\nTriples query service for memgraph.\nInput is a (s, p, o) triple, some values may be null. Output is a list of\ntriples.\n"
)

View file

@ -0,0 +1,338 @@
"""
Tests for Neo4j triples query service
"""
import pytest
from unittest.mock import MagicMock, patch
from trustgraph.query.triples.neo4j.service import Processor
from trustgraph.schema import Value, TriplesQueryRequest
class TestNeo4jQueryProcessor:
"""Test cases for Neo4j query processor"""
@pytest.fixture
def processor(self):
"""Create a processor instance for testing"""
with patch('trustgraph.query.triples.neo4j.service.GraphDatabase'):
return Processor(
taskgroup=MagicMock(),
id='test-neo4j-query',
graph_host='bolt://localhost:7687'
)
def test_create_value_with_http_uri(self, processor):
"""Test create_value with HTTP URI"""
result = processor.create_value("http://example.com/resource")
assert isinstance(result, Value)
assert result.value == "http://example.com/resource"
assert result.is_uri is True
def test_create_value_with_https_uri(self, processor):
"""Test create_value with HTTPS URI"""
result = processor.create_value("https://example.com/resource")
assert isinstance(result, Value)
assert result.value == "https://example.com/resource"
assert result.is_uri is True
def test_create_value_with_literal(self, processor):
"""Test create_value with literal value"""
result = processor.create_value("just a literal string")
assert isinstance(result, Value)
assert result.value == "just a literal string"
assert result.is_uri is False
def test_create_value_with_empty_string(self, processor):
"""Test create_value with empty string"""
result = processor.create_value("")
assert isinstance(result, Value)
assert result.value == ""
assert result.is_uri is False
def test_create_value_with_partial_uri(self, processor):
"""Test create_value with string that looks like URI but isn't complete"""
result = processor.create_value("http")
assert isinstance(result, Value)
assert result.value == "http"
assert result.is_uri is False
def test_create_value_with_ftp_uri(self, processor):
"""Test create_value with FTP URI (should not be detected as URI)"""
result = processor.create_value("ftp://example.com/file")
assert isinstance(result, Value)
assert result.value == "ftp://example.com/file"
assert result.is_uri is False
@patch('trustgraph.query.triples.neo4j.service.GraphDatabase')
def test_processor_initialization_with_defaults(self, mock_graph_db):
"""Test processor initialization with default parameters"""
taskgroup_mock = MagicMock()
mock_driver = MagicMock()
mock_graph_db.driver.return_value = mock_driver
processor = Processor(taskgroup=taskgroup_mock)
assert processor.db == 'neo4j'
mock_graph_db.driver.assert_called_once_with(
'bolt://neo4j:7687',
auth=('neo4j', 'password')
)
@patch('trustgraph.query.triples.neo4j.service.GraphDatabase')
def test_processor_initialization_with_custom_params(self, mock_graph_db):
"""Test processor initialization with custom parameters"""
taskgroup_mock = MagicMock()
mock_driver = MagicMock()
mock_graph_db.driver.return_value = mock_driver
processor = Processor(
taskgroup=taskgroup_mock,
graph_host='bolt://custom:7687',
username='queryuser',
password='querypass',
database='customdb'
)
assert processor.db == 'customdb'
mock_graph_db.driver.assert_called_once_with(
'bolt://custom:7687',
auth=('queryuser', 'querypass')
)
@patch('trustgraph.query.triples.neo4j.service.GraphDatabase')
@pytest.mark.asyncio
async def test_query_triples_spo_query(self, mock_graph_db):
"""Test SPO query (all values specified)"""
taskgroup_mock = MagicMock()
mock_driver = MagicMock()
mock_graph_db.driver.return_value = mock_driver
# Mock query results - both queries return one record each
mock_records = [MagicMock()]
mock_driver.execute_query.return_value = (mock_records, None, None)
processor = Processor(taskgroup=taskgroup_mock)
# Create query request
query = TriplesQueryRequest(
user='test_user',
collection='test_collection',
s=Value(value="http://example.com/subject", is_uri=True),
p=Value(value="http://example.com/predicate", is_uri=True),
o=Value(value="literal object", is_uri=False),
limit=100
)
result = await processor.query_triples(query)
# Verify both literal and URI queries were executed
assert mock_driver.execute_query.call_count == 2
# Verify result contains the queried triple (appears twice - once from each query)
assert len(result) == 2
assert result[0].s.value == "http://example.com/subject"
assert result[0].p.value == "http://example.com/predicate"
assert result[0].o.value == "literal object"
@patch('trustgraph.query.triples.neo4j.service.GraphDatabase')
@pytest.mark.asyncio
async def test_query_triples_sp_query(self, mock_graph_db):
"""Test SP query (subject and predicate specified)"""
taskgroup_mock = MagicMock()
mock_driver = MagicMock()
mock_graph_db.driver.return_value = mock_driver
# Mock query results with different objects
mock_record1 = MagicMock()
mock_record1.data.return_value = {"dest": "literal result"}
mock_record2 = MagicMock()
mock_record2.data.return_value = {"dest": "http://example.com/uri_result"}
mock_driver.execute_query.side_effect = [
([mock_record1], None, None), # Literal query
([mock_record2], None, None) # URI query
]
processor = Processor(taskgroup=taskgroup_mock)
# Create query request
query = TriplesQueryRequest(
user='test_user',
collection='test_collection',
s=Value(value="http://example.com/subject", is_uri=True),
p=Value(value="http://example.com/predicate", is_uri=True),
o=None,
limit=100
)
result = await processor.query_triples(query)
# Verify both literal and URI queries were executed
assert mock_driver.execute_query.call_count == 2
# Verify results contain different objects
assert len(result) == 2
assert result[0].s.value == "http://example.com/subject"
assert result[0].p.value == "http://example.com/predicate"
assert result[0].o.value == "literal result"
assert result[1].s.value == "http://example.com/subject"
assert result[1].p.value == "http://example.com/predicate"
assert result[1].o.value == "http://example.com/uri_result"
@patch('trustgraph.query.triples.neo4j.service.GraphDatabase')
@pytest.mark.asyncio
async def test_query_triples_wildcard_query(self, mock_graph_db):
"""Test wildcard query (no constraints)"""
taskgroup_mock = MagicMock()
mock_driver = MagicMock()
mock_graph_db.driver.return_value = mock_driver
# Mock query results
mock_record1 = MagicMock()
mock_record1.data.return_value = {"src": "http://example.com/s1", "rel": "http://example.com/p1", "dest": "literal1"}
mock_record2 = MagicMock()
mock_record2.data.return_value = {"src": "http://example.com/s2", "rel": "http://example.com/p2", "dest": "http://example.com/o2"}
mock_driver.execute_query.side_effect = [
([mock_record1], None, None), # Literal query
([mock_record2], None, None) # URI query
]
processor = Processor(taskgroup=taskgroup_mock)
# Create query request
query = TriplesQueryRequest(
user='test_user',
collection='test_collection',
s=None,
p=None,
o=None,
limit=100
)
result = await processor.query_triples(query)
# Verify both literal and URI queries were executed
assert mock_driver.execute_query.call_count == 2
# Verify results contain different triples
assert len(result) == 2
assert result[0].s.value == "http://example.com/s1"
assert result[0].p.value == "http://example.com/p1"
assert result[0].o.value == "literal1"
assert result[1].s.value == "http://example.com/s2"
assert result[1].p.value == "http://example.com/p2"
assert result[1].o.value == "http://example.com/o2"
@patch('trustgraph.query.triples.neo4j.service.GraphDatabase')
@pytest.mark.asyncio
async def test_query_triples_exception_handling(self, mock_graph_db):
"""Test exception handling during query processing"""
taskgroup_mock = MagicMock()
mock_driver = MagicMock()
mock_graph_db.driver.return_value = mock_driver
# Mock execute_query to raise exception
mock_driver.execute_query.side_effect = Exception("Database connection failed")
processor = Processor(taskgroup=taskgroup_mock)
# Create query request
query = TriplesQueryRequest(
user='test_user',
collection='test_collection',
s=Value(value="http://example.com/subject", is_uri=True),
p=None,
o=None,
limit=100
)
# Should raise the exception
with pytest.raises(Exception, match="Database connection failed"):
await processor.query_triples(query)
def test_add_args_method(self):
"""Test that add_args properly configures argument parser"""
from argparse import ArgumentParser
from unittest.mock import patch
parser = ArgumentParser()
# Mock the parent class add_args method
with patch('trustgraph.query.triples.neo4j.service.TriplesQueryService.add_args') as mock_parent_add_args:
Processor.add_args(parser)
# Verify parent add_args was called
mock_parent_add_args.assert_called_once()
# Verify our specific arguments were added
# Parse empty args to check defaults
args = parser.parse_args([])
assert hasattr(args, 'graph_host')
assert args.graph_host == 'bolt://neo4j:7687'
assert hasattr(args, 'username')
assert args.username == 'neo4j'
assert hasattr(args, 'password')
assert args.password == 'password'
assert hasattr(args, 'database')
assert args.database == 'neo4j'
def test_add_args_with_custom_values(self):
"""Test add_args with custom command line values"""
from argparse import ArgumentParser
from unittest.mock import patch
parser = ArgumentParser()
with patch('trustgraph.query.triples.neo4j.service.TriplesQueryService.add_args'):
Processor.add_args(parser)
# Test parsing with custom values
args = parser.parse_args([
'--graph-host', 'bolt://custom:7687',
'--username', 'queryuser',
'--password', 'querypass',
'--database', 'querydb'
])
assert args.graph_host == 'bolt://custom:7687'
assert args.username == 'queryuser'
assert args.password == 'querypass'
assert args.database == 'querydb'
def test_add_args_short_form(self):
"""Test add_args with short form arguments"""
from argparse import ArgumentParser
from unittest.mock import patch
parser = ArgumentParser()
with patch('trustgraph.query.triples.neo4j.service.TriplesQueryService.add_args'):
Processor.add_args(parser)
# Test parsing with short form
args = parser.parse_args(['-g', 'bolt://short:7687'])
assert args.graph_host == 'bolt://short:7687'
@patch('trustgraph.query.triples.neo4j.service.Processor.launch')
def test_run_function(self, mock_launch):
"""Test the run function calls Processor.launch with correct parameters"""
from trustgraph.query.triples.neo4j.service import run, default_ident
run()
mock_launch.assert_called_once_with(
default_ident,
"\nTriples query service for neo4j.\nInput is a (s, p, o) triple, some values may be null. Output is a list of\ntriples.\n"
)