Fix tests (#666)

This commit is contained in:
cybermaggedon 2026-03-07 23:38:09 +00:00 committed by GitHub
parent 24bbe94136
commit 3bf8a65409
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
10 changed files with 510 additions and 446 deletions

View file

@ -77,9 +77,9 @@ class TestMilvusDocEmbeddingsQueryProcessor:
# Mock search results
mock_results = [
{"entity": {"doc": "First document chunk"}},
{"entity": {"doc": "Second document chunk"}},
{"entity": {"doc": "Third document chunk"}},
{"entity": {"chunk_id": "First document chunk"}},
{"entity": {"chunk_id": "Second document chunk"}},
{"entity": {"chunk_id": "Third document chunk"}},
]
processor.vecstore.search.return_value = mock_results
@ -108,11 +108,11 @@ class TestMilvusDocEmbeddingsQueryProcessor:
# Mock search results - different results for each vector
mock_results_1 = [
{"entity": {"doc": "Document from first vector"}},
{"entity": {"doc": "Another doc from first vector"}},
{"entity": {"chunk_id": "Document from first vector"}},
{"entity": {"chunk_id": "Another doc from first vector"}},
]
mock_results_2 = [
{"entity": {"doc": "Document from second vector"}},
{"entity": {"chunk_id": "Document from second vector"}},
]
processor.vecstore.search.side_effect = [mock_results_1, mock_results_2]
@ -147,10 +147,10 @@ class TestMilvusDocEmbeddingsQueryProcessor:
# Mock search results - more results than limit
mock_results = [
{"entity": {"doc": "Document 1"}},
{"entity": {"doc": "Document 2"}},
{"entity": {"doc": "Document 3"}},
{"entity": {"doc": "Document 4"}},
{"entity": {"chunk_id": "Document 1"}},
{"entity": {"chunk_id": "Document 2"}},
{"entity": {"chunk_id": "Document 3"}},
{"entity": {"chunk_id": "Document 4"}},
]
processor.vecstore.search.return_value = mock_results
@ -217,9 +217,9 @@ class TestMilvusDocEmbeddingsQueryProcessor:
# Mock search results with Unicode content
mock_results = [
{"entity": {"doc": "Document with Unicode: éñ中文🚀"}},
{"entity": {"doc": "Regular ASCII document"}},
{"entity": {"doc": "Document with émojis: 😀🎉"}},
{"entity": {"chunk_id": "Document with Unicode: éñ中文🚀"}},
{"entity": {"chunk_id": "Regular ASCII document"}},
{"entity": {"chunk_id": "Document with émojis: 😀🎉"}},
]
processor.vecstore.search.return_value = mock_results
@ -244,8 +244,8 @@ class TestMilvusDocEmbeddingsQueryProcessor:
# Mock search results with large content
large_doc = "A" * 10000 # 10KB of content
mock_results = [
{"entity": {"doc": large_doc}},
{"entity": {"doc": "Small document"}},
{"entity": {"chunk_id": large_doc}},
{"entity": {"chunk_id": "Small document"}},
]
processor.vecstore.search.return_value = mock_results
@ -268,9 +268,9 @@ class TestMilvusDocEmbeddingsQueryProcessor:
# Mock search results with special characters
mock_results = [
{"entity": {"doc": "Document with \"quotes\" and 'apostrophes'"}},
{"entity": {"doc": "Document with\nnewlines\tand\ttabs"}},
{"entity": {"doc": "Document with special chars: @#$%^&*()"}},
{"entity": {"chunk_id": "Document with \"quotes\" and 'apostrophes'"}},
{"entity": {"chunk_id": "Document with\nnewlines\tand\ttabs"}},
{"entity": {"chunk_id": "Document with special chars: @#$%^&*()"}},
]
processor.vecstore.search.return_value = mock_results
@ -350,9 +350,9 @@ class TestMilvusDocEmbeddingsQueryProcessor:
)
# Mock search results for each vector
mock_results_1 = [{"entity": {"doc": "Document from 2D vector"}}]
mock_results_2 = [{"entity": {"doc": "Document from 4D vector"}}]
mock_results_3 = [{"entity": {"doc": "Document from 3D vector"}}]
mock_results_1 = [{"entity": {"chunk_id": "Document from 2D vector"}}]
mock_results_2 = [{"entity": {"chunk_id": "Document from 4D vector"}}]
mock_results_3 = [{"entity": {"chunk_id": "Document from 3D vector"}}]
processor.vecstore.search.side_effect = [mock_results_1, mock_results_2, mock_results_3]
result = await processor.query_document_embeddings(query)
@ -378,12 +378,12 @@ class TestMilvusDocEmbeddingsQueryProcessor:
# Mock search results with duplicates across vectors
mock_results_1 = [
{"entity": {"doc": "Document A"}},
{"entity": {"doc": "Document B"}},
{"entity": {"chunk_id": "Document A"}},
{"entity": {"chunk_id": "Document B"}},
]
mock_results_2 = [
{"entity": {"doc": "Document B"}}, # Duplicate
{"entity": {"doc": "Document C"}},
{"entity": {"chunk_id": "Document B"}}, # Duplicate
{"entity": {"chunk_id": "Document C"}},
]
processor.vecstore.search.side_effect = [mock_results_1, mock_results_2]
@ -458,5 +458,5 @@ class TestMilvusDocEmbeddingsQueryProcessor:
mock_launch.assert_called_once_with(
default_ident,
"\nDocument embeddings query service. Input is vector, output is an array\nof chunks\n"
"\nDocument embeddings query service. Input is vector, output is an array\nof chunk_ids\n"
)

View file

@ -77,9 +77,9 @@ class TestQdrantDocEmbeddingsQuery(IsolatedAsyncioTestCase):
# Mock query response
mock_point1 = MagicMock()
mock_point1.payload = {'doc': 'first document chunk'}
mock_point1.payload = {'chunk_id': 'first document chunk'}
mock_point2 = MagicMock()
mock_point2.payload = {'doc': 'second document chunk'}
mock_point2.payload = {'chunk_id': 'second document chunk'}
mock_response = MagicMock()
mock_response.points = [mock_point1, mock_point2]
@ -132,11 +132,11 @@ class TestQdrantDocEmbeddingsQuery(IsolatedAsyncioTestCase):
# Mock query responses for different vectors
mock_point1 = MagicMock()
mock_point1.payload = {'doc': 'document from vector 1'}
mock_point1.payload = {'chunk_id': 'document from vector 1'}
mock_point2 = MagicMock()
mock_point2.payload = {'doc': 'document from vector 2'}
mock_point2.payload = {'chunk_id': 'document from vector 2'}
mock_point3 = MagicMock()
mock_point3.payload = {'doc': 'another document from vector 2'}
mock_point3.payload = {'chunk_id': 'another document from vector 2'}
mock_response1 = MagicMock()
mock_response1.points = [mock_point1]
@ -192,7 +192,7 @@ class TestQdrantDocEmbeddingsQuery(IsolatedAsyncioTestCase):
mock_points = []
for i in range(10):
mock_point = MagicMock()
mock_point.payload = {'doc': f'document chunk {i}'}
mock_point.payload = {'chunk_id': f'document chunk {i}'}
mock_points.append(mock_point)
mock_response = MagicMock()
@ -270,9 +270,9 @@ class TestQdrantDocEmbeddingsQuery(IsolatedAsyncioTestCase):
# Mock query responses
mock_point1 = MagicMock()
mock_point1.payload = {'doc': 'document from 2D vector'}
mock_point1.payload = {'chunk_id': 'document from 2D vector'}
mock_point2 = MagicMock()
mock_point2.payload = {'doc': 'document from 3D vector'}
mock_point2.payload = {'chunk_id': 'document from 3D vector'}
mock_response1 = MagicMock()
mock_response1.points = [mock_point1]
@ -326,9 +326,9 @@ class TestQdrantDocEmbeddingsQuery(IsolatedAsyncioTestCase):
# Mock query response with UTF-8 content
mock_point1 = MagicMock()
mock_point1.payload = {'doc': 'Document with UTF-8: café, naïve, résumé'}
mock_point1.payload = {'chunk_id': 'Document with UTF-8: café, naïve, résumé'}
mock_point2 = MagicMock()
mock_point2.payload = {'doc': 'Chinese text: 你好世界'}
mock_point2.payload = {'chunk_id': 'Chinese text: 你好世界'}
mock_response = MagicMock()
mock_response.points = [mock_point1, mock_point2]
@ -399,7 +399,7 @@ class TestQdrantDocEmbeddingsQuery(IsolatedAsyncioTestCase):
# Mock query response
mock_point = MagicMock()
mock_point.payload = {'doc': 'document chunk'}
mock_point.payload = {'chunk_id': 'document chunk'}
mock_response = MagicMock()
mock_response.points = [mock_point]
mock_qdrant_instance.query_points.return_value = mock_response
@ -442,9 +442,9 @@ class TestQdrantDocEmbeddingsQuery(IsolatedAsyncioTestCase):
# Mock query response with fewer results than limit
mock_point1 = MagicMock()
mock_point1.payload = {'doc': 'document 1'}
mock_point1.payload = {'chunk_id': 'document 1'}
mock_point2 = MagicMock()
mock_point2.payload = {'doc': 'document 2'}
mock_point2.payload = {'chunk_id': 'document 2'}
mock_response = MagicMock()
mock_response.points = [mock_point1, mock_point2]
@ -487,11 +487,11 @@ class TestQdrantDocEmbeddingsQuery(IsolatedAsyncioTestCase):
mock_qdrant_instance = MagicMock()
mock_qdrant_client.return_value = mock_qdrant_instance
# Mock query response with missing 'doc' key
# Mock query response with missing 'chunk_id' key
mock_point1 = MagicMock()
mock_point1.payload = {'doc': 'valid document'}
mock_point1.payload = {'chunk_id': 'valid document'}
mock_point2 = MagicMock()
mock_point2.payload = {} # Missing 'doc' key
mock_point2.payload = {} # Missing 'chunk_id' key
mock_point3 = MagicMock()
mock_point3.payload = {'other_key': 'invalid'} # Wrong key
@ -514,7 +514,7 @@ class TestQdrantDocEmbeddingsQuery(IsolatedAsyncioTestCase):
mock_message.collection = 'payload_collection'
# Act & Assert
# This should raise a KeyError when trying to access payload['doc']
# This should raise a KeyError when trying to access payload['chunk_id']
with pytest.raises(KeyError):
await processor.query_document_embeddings(mock_message)