mirror of
https://github.com/trustgraph-ai/trustgraph.git
synced 2026-04-25 08:26:21 +02:00
Fix tests (#666)
This commit is contained in:
parent
24bbe94136
commit
3bf8a65409
10 changed files with 510 additions and 446 deletions
|
|
@ -77,9 +77,9 @@ class TestMilvusDocEmbeddingsQueryProcessor:
|
|||
|
||||
# Mock search results
|
||||
mock_results = [
|
||||
{"entity": {"doc": "First document chunk"}},
|
||||
{"entity": {"doc": "Second document chunk"}},
|
||||
{"entity": {"doc": "Third document chunk"}},
|
||||
{"entity": {"chunk_id": "First document chunk"}},
|
||||
{"entity": {"chunk_id": "Second document chunk"}},
|
||||
{"entity": {"chunk_id": "Third document chunk"}},
|
||||
]
|
||||
processor.vecstore.search.return_value = mock_results
|
||||
|
||||
|
|
@ -108,11 +108,11 @@ class TestMilvusDocEmbeddingsQueryProcessor:
|
|||
|
||||
# Mock search results - different results for each vector
|
||||
mock_results_1 = [
|
||||
{"entity": {"doc": "Document from first vector"}},
|
||||
{"entity": {"doc": "Another doc from first vector"}},
|
||||
{"entity": {"chunk_id": "Document from first vector"}},
|
||||
{"entity": {"chunk_id": "Another doc from first vector"}},
|
||||
]
|
||||
mock_results_2 = [
|
||||
{"entity": {"doc": "Document from second vector"}},
|
||||
{"entity": {"chunk_id": "Document from second vector"}},
|
||||
]
|
||||
processor.vecstore.search.side_effect = [mock_results_1, mock_results_2]
|
||||
|
||||
|
|
@ -147,10 +147,10 @@ class TestMilvusDocEmbeddingsQueryProcessor:
|
|||
|
||||
# Mock search results - more results than limit
|
||||
mock_results = [
|
||||
{"entity": {"doc": "Document 1"}},
|
||||
{"entity": {"doc": "Document 2"}},
|
||||
{"entity": {"doc": "Document 3"}},
|
||||
{"entity": {"doc": "Document 4"}},
|
||||
{"entity": {"chunk_id": "Document 1"}},
|
||||
{"entity": {"chunk_id": "Document 2"}},
|
||||
{"entity": {"chunk_id": "Document 3"}},
|
||||
{"entity": {"chunk_id": "Document 4"}},
|
||||
]
|
||||
processor.vecstore.search.return_value = mock_results
|
||||
|
||||
|
|
@ -217,9 +217,9 @@ class TestMilvusDocEmbeddingsQueryProcessor:
|
|||
|
||||
# Mock search results with Unicode content
|
||||
mock_results = [
|
||||
{"entity": {"doc": "Document with Unicode: éñ中文🚀"}},
|
||||
{"entity": {"doc": "Regular ASCII document"}},
|
||||
{"entity": {"doc": "Document with émojis: 😀🎉"}},
|
||||
{"entity": {"chunk_id": "Document with Unicode: éñ中文🚀"}},
|
||||
{"entity": {"chunk_id": "Regular ASCII document"}},
|
||||
{"entity": {"chunk_id": "Document with émojis: 😀🎉"}},
|
||||
]
|
||||
processor.vecstore.search.return_value = mock_results
|
||||
|
||||
|
|
@ -244,8 +244,8 @@ class TestMilvusDocEmbeddingsQueryProcessor:
|
|||
# Mock search results with large content
|
||||
large_doc = "A" * 10000 # 10KB of content
|
||||
mock_results = [
|
||||
{"entity": {"doc": large_doc}},
|
||||
{"entity": {"doc": "Small document"}},
|
||||
{"entity": {"chunk_id": large_doc}},
|
||||
{"entity": {"chunk_id": "Small document"}},
|
||||
]
|
||||
processor.vecstore.search.return_value = mock_results
|
||||
|
||||
|
|
@ -268,9 +268,9 @@ class TestMilvusDocEmbeddingsQueryProcessor:
|
|||
|
||||
# Mock search results with special characters
|
||||
mock_results = [
|
||||
{"entity": {"doc": "Document with \"quotes\" and 'apostrophes'"}},
|
||||
{"entity": {"doc": "Document with\nnewlines\tand\ttabs"}},
|
||||
{"entity": {"doc": "Document with special chars: @#$%^&*()"}},
|
||||
{"entity": {"chunk_id": "Document with \"quotes\" and 'apostrophes'"}},
|
||||
{"entity": {"chunk_id": "Document with\nnewlines\tand\ttabs"}},
|
||||
{"entity": {"chunk_id": "Document with special chars: @#$%^&*()"}},
|
||||
]
|
||||
processor.vecstore.search.return_value = mock_results
|
||||
|
||||
|
|
@ -350,9 +350,9 @@ class TestMilvusDocEmbeddingsQueryProcessor:
|
|||
)
|
||||
|
||||
# Mock search results for each vector
|
||||
mock_results_1 = [{"entity": {"doc": "Document from 2D vector"}}]
|
||||
mock_results_2 = [{"entity": {"doc": "Document from 4D vector"}}]
|
||||
mock_results_3 = [{"entity": {"doc": "Document from 3D vector"}}]
|
||||
mock_results_1 = [{"entity": {"chunk_id": "Document from 2D vector"}}]
|
||||
mock_results_2 = [{"entity": {"chunk_id": "Document from 4D vector"}}]
|
||||
mock_results_3 = [{"entity": {"chunk_id": "Document from 3D vector"}}]
|
||||
processor.vecstore.search.side_effect = [mock_results_1, mock_results_2, mock_results_3]
|
||||
|
||||
result = await processor.query_document_embeddings(query)
|
||||
|
|
@ -378,12 +378,12 @@ class TestMilvusDocEmbeddingsQueryProcessor:
|
|||
|
||||
# Mock search results with duplicates across vectors
|
||||
mock_results_1 = [
|
||||
{"entity": {"doc": "Document A"}},
|
||||
{"entity": {"doc": "Document B"}},
|
||||
{"entity": {"chunk_id": "Document A"}},
|
||||
{"entity": {"chunk_id": "Document B"}},
|
||||
]
|
||||
mock_results_2 = [
|
||||
{"entity": {"doc": "Document B"}}, # Duplicate
|
||||
{"entity": {"doc": "Document C"}},
|
||||
{"entity": {"chunk_id": "Document B"}}, # Duplicate
|
||||
{"entity": {"chunk_id": "Document C"}},
|
||||
]
|
||||
processor.vecstore.search.side_effect = [mock_results_1, mock_results_2]
|
||||
|
||||
|
|
@ -458,5 +458,5 @@ class TestMilvusDocEmbeddingsQueryProcessor:
|
|||
|
||||
mock_launch.assert_called_once_with(
|
||||
default_ident,
|
||||
"\nDocument embeddings query service. Input is vector, output is an array\nof chunks\n"
|
||||
"\nDocument embeddings query service. Input is vector, output is an array\nof chunk_ids\n"
|
||||
)
|
||||
|
|
@ -77,9 +77,9 @@ class TestQdrantDocEmbeddingsQuery(IsolatedAsyncioTestCase):
|
|||
|
||||
# Mock query response
|
||||
mock_point1 = MagicMock()
|
||||
mock_point1.payload = {'doc': 'first document chunk'}
|
||||
mock_point1.payload = {'chunk_id': 'first document chunk'}
|
||||
mock_point2 = MagicMock()
|
||||
mock_point2.payload = {'doc': 'second document chunk'}
|
||||
mock_point2.payload = {'chunk_id': 'second document chunk'}
|
||||
|
||||
mock_response = MagicMock()
|
||||
mock_response.points = [mock_point1, mock_point2]
|
||||
|
|
@ -132,11 +132,11 @@ class TestQdrantDocEmbeddingsQuery(IsolatedAsyncioTestCase):
|
|||
|
||||
# Mock query responses for different vectors
|
||||
mock_point1 = MagicMock()
|
||||
mock_point1.payload = {'doc': 'document from vector 1'}
|
||||
mock_point1.payload = {'chunk_id': 'document from vector 1'}
|
||||
mock_point2 = MagicMock()
|
||||
mock_point2.payload = {'doc': 'document from vector 2'}
|
||||
mock_point2.payload = {'chunk_id': 'document from vector 2'}
|
||||
mock_point3 = MagicMock()
|
||||
mock_point3.payload = {'doc': 'another document from vector 2'}
|
||||
mock_point3.payload = {'chunk_id': 'another document from vector 2'}
|
||||
|
||||
mock_response1 = MagicMock()
|
||||
mock_response1.points = [mock_point1]
|
||||
|
|
@ -192,7 +192,7 @@ class TestQdrantDocEmbeddingsQuery(IsolatedAsyncioTestCase):
|
|||
mock_points = []
|
||||
for i in range(10):
|
||||
mock_point = MagicMock()
|
||||
mock_point.payload = {'doc': f'document chunk {i}'}
|
||||
mock_point.payload = {'chunk_id': f'document chunk {i}'}
|
||||
mock_points.append(mock_point)
|
||||
|
||||
mock_response = MagicMock()
|
||||
|
|
@ -270,9 +270,9 @@ class TestQdrantDocEmbeddingsQuery(IsolatedAsyncioTestCase):
|
|||
|
||||
# Mock query responses
|
||||
mock_point1 = MagicMock()
|
||||
mock_point1.payload = {'doc': 'document from 2D vector'}
|
||||
mock_point1.payload = {'chunk_id': 'document from 2D vector'}
|
||||
mock_point2 = MagicMock()
|
||||
mock_point2.payload = {'doc': 'document from 3D vector'}
|
||||
mock_point2.payload = {'chunk_id': 'document from 3D vector'}
|
||||
|
||||
mock_response1 = MagicMock()
|
||||
mock_response1.points = [mock_point1]
|
||||
|
|
@ -326,9 +326,9 @@ class TestQdrantDocEmbeddingsQuery(IsolatedAsyncioTestCase):
|
|||
|
||||
# Mock query response with UTF-8 content
|
||||
mock_point1 = MagicMock()
|
||||
mock_point1.payload = {'doc': 'Document with UTF-8: café, naïve, résumé'}
|
||||
mock_point1.payload = {'chunk_id': 'Document with UTF-8: café, naïve, résumé'}
|
||||
mock_point2 = MagicMock()
|
||||
mock_point2.payload = {'doc': 'Chinese text: 你好世界'}
|
||||
mock_point2.payload = {'chunk_id': 'Chinese text: 你好世界'}
|
||||
|
||||
mock_response = MagicMock()
|
||||
mock_response.points = [mock_point1, mock_point2]
|
||||
|
|
@ -399,7 +399,7 @@ class TestQdrantDocEmbeddingsQuery(IsolatedAsyncioTestCase):
|
|||
|
||||
# Mock query response
|
||||
mock_point = MagicMock()
|
||||
mock_point.payload = {'doc': 'document chunk'}
|
||||
mock_point.payload = {'chunk_id': 'document chunk'}
|
||||
mock_response = MagicMock()
|
||||
mock_response.points = [mock_point]
|
||||
mock_qdrant_instance.query_points.return_value = mock_response
|
||||
|
|
@ -442,9 +442,9 @@ class TestQdrantDocEmbeddingsQuery(IsolatedAsyncioTestCase):
|
|||
|
||||
# Mock query response with fewer results than limit
|
||||
mock_point1 = MagicMock()
|
||||
mock_point1.payload = {'doc': 'document 1'}
|
||||
mock_point1.payload = {'chunk_id': 'document 1'}
|
||||
mock_point2 = MagicMock()
|
||||
mock_point2.payload = {'doc': 'document 2'}
|
||||
mock_point2.payload = {'chunk_id': 'document 2'}
|
||||
|
||||
mock_response = MagicMock()
|
||||
mock_response.points = [mock_point1, mock_point2]
|
||||
|
|
@ -487,11 +487,11 @@ class TestQdrantDocEmbeddingsQuery(IsolatedAsyncioTestCase):
|
|||
mock_qdrant_instance = MagicMock()
|
||||
mock_qdrant_client.return_value = mock_qdrant_instance
|
||||
|
||||
# Mock query response with missing 'doc' key
|
||||
# Mock query response with missing 'chunk_id' key
|
||||
mock_point1 = MagicMock()
|
||||
mock_point1.payload = {'doc': 'valid document'}
|
||||
mock_point1.payload = {'chunk_id': 'valid document'}
|
||||
mock_point2 = MagicMock()
|
||||
mock_point2.payload = {} # Missing 'doc' key
|
||||
mock_point2.payload = {} # Missing 'chunk_id' key
|
||||
mock_point3 = MagicMock()
|
||||
mock_point3.payload = {'other_key': 'invalid'} # Wrong key
|
||||
|
||||
|
|
@ -514,7 +514,7 @@ class TestQdrantDocEmbeddingsQuery(IsolatedAsyncioTestCase):
|
|||
mock_message.collection = 'payload_collection'
|
||||
|
||||
# Act & Assert
|
||||
# This should raise a KeyError when trying to access payload['doc']
|
||||
# This should raise a KeyError when trying to access payload['chunk_id']
|
||||
with pytest.raises(KeyError):
|
||||
await processor.query_document_embeddings(mock_message)
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue