mirror of
https://github.com/trustgraph-ai/trustgraph.git
synced 2026-04-26 08:56:21 +02:00
Fix hard coded vector size (#555)
* Fixed hard-coded embeddings store size * Vector store lazy-creates collections, different collections for different dimension lengths. * Added tech spec for vector store lifecycle * Fixed some tests for the new spec
This commit is contained in:
parent
05b9063fea
commit
6129bb68c1
22 changed files with 793 additions and 572 deletions
|
|
@ -119,8 +119,8 @@ class TestPineconeDocEmbeddingsQueryProcessor:
|
|||
|
||||
chunks = await processor.query_document_embeddings(message)
|
||||
|
||||
# Verify index was accessed correctly
|
||||
expected_index_name = "d-test_user-test_collection"
|
||||
# Verify index was accessed correctly (with dimension suffix)
|
||||
expected_index_name = "d-test_user-test_collection-3" # 3 dimensions
|
||||
processor.pinecone.Index.assert_called_once_with(expected_index_name)
|
||||
|
||||
# Verify query parameters
|
||||
|
|
@ -264,10 +264,12 @@ class TestPineconeDocEmbeddingsQueryProcessor:
|
|||
|
||||
chunks = await processor.query_document_embeddings(message)
|
||||
|
||||
# Verify same index used for both vectors
|
||||
expected_index_name = "d-test_user-test_collection"
|
||||
# Verify different indexes used for different dimensions
|
||||
assert processor.pinecone.Index.call_count == 2
|
||||
processor.pinecone.Index.assert_called_with(expected_index_name)
|
||||
index_calls = processor.pinecone.Index.call_args_list
|
||||
index_names = [call[0][0] for call in index_calls]
|
||||
assert "d-test_user-test_collection-2" in index_names # 2D vector
|
||||
assert "d-test_user-test_collection-4" in index_names # 4D vector
|
||||
|
||||
# Verify both queries were made
|
||||
assert mock_index.query.call_count == 2
|
||||
|
|
|
|||
|
|
@ -103,8 +103,8 @@ class TestQdrantDocEmbeddingsQuery(IsolatedAsyncioTestCase):
|
|||
result = await processor.query_document_embeddings(mock_message)
|
||||
|
||||
# Assert
|
||||
# Verify query was called with correct parameters
|
||||
expected_collection = 'd_test_user_test_collection'
|
||||
# Verify query was called with correct parameters (with dimension suffix)
|
||||
expected_collection = 'd_test_user_test_collection_3' # 3 dimensions
|
||||
mock_qdrant_instance.query_points.assert_called_once_with(
|
||||
collection_name=expected_collection,
|
||||
query=[0.1, 0.2, 0.3],
|
||||
|
|
@ -164,9 +164,9 @@ class TestQdrantDocEmbeddingsQuery(IsolatedAsyncioTestCase):
|
|||
# Assert
|
||||
# Verify query was called twice
|
||||
assert mock_qdrant_instance.query_points.call_count == 2
|
||||
|
||||
# Verify both collections were queried
|
||||
expected_collection = 'd_multi_user_multi_collection'
|
||||
|
||||
# Verify both collections were queried (both 2-dimensional vectors)
|
||||
expected_collection = 'd_multi_user_multi_collection_2' # 2 dimensions
|
||||
calls = mock_qdrant_instance.query_points.call_args_list
|
||||
assert calls[0][1]['collection_name'] == expected_collection
|
||||
assert calls[1][1]['collection_name'] == expected_collection
|
||||
|
|
@ -301,13 +301,13 @@ class TestQdrantDocEmbeddingsQuery(IsolatedAsyncioTestCase):
|
|||
# Verify query was called twice with different collections
|
||||
assert mock_qdrant_instance.query_points.call_count == 2
|
||||
calls = mock_qdrant_instance.query_points.call_args_list
|
||||
|
||||
|
||||
# First call should use 2D collection
|
||||
assert calls[0][1]['collection_name'] == 'd_dim_user_dim_collection'
|
||||
assert calls[0][1]['collection_name'] == 'd_dim_user_dim_collection_2' # 2 dimensions
|
||||
assert calls[0][1]['query'] == [0.1, 0.2]
|
||||
|
||||
|
||||
# Second call should use 3D collection
|
||||
assert calls[1][1]['collection_name'] == 'd_dim_user_dim_collection'
|
||||
assert calls[1][1]['collection_name'] == 'd_dim_user_dim_collection_3' # 3 dimensions
|
||||
assert calls[1][1]['query'] == [0.3, 0.4, 0.5]
|
||||
|
||||
# Verify results
|
||||
|
|
|
|||
|
|
@ -147,8 +147,8 @@ class TestPineconeGraphEmbeddingsQueryProcessor:
|
|||
|
||||
entities = await processor.query_graph_embeddings(message)
|
||||
|
||||
# Verify index was accessed correctly
|
||||
expected_index_name = "t-test_user-test_collection"
|
||||
# Verify index was accessed correctly (with dimension suffix)
|
||||
expected_index_name = "t-test_user-test_collection-3" # 3 dimensions
|
||||
processor.pinecone.Index.assert_called_once_with(expected_index_name)
|
||||
|
||||
# Verify query parameters
|
||||
|
|
@ -290,10 +290,12 @@ class TestPineconeGraphEmbeddingsQueryProcessor:
|
|||
|
||||
entities = await processor.query_graph_embeddings(message)
|
||||
|
||||
# Verify same index used for both vectors
|
||||
expected_index_name = "t-test_user-test_collection"
|
||||
# Verify different indexes used for different dimensions
|
||||
assert processor.pinecone.Index.call_count == 2
|
||||
processor.pinecone.Index.assert_called_with(expected_index_name)
|
||||
index_calls = processor.pinecone.Index.call_args_list
|
||||
index_names = [call[0][0] for call in index_calls]
|
||||
assert "t-test_user-test_collection-2" in index_names # 2D vector
|
||||
assert "t-test_user-test_collection-4" in index_names # 4D vector
|
||||
|
||||
# Verify both queries were made
|
||||
assert mock_index.query.call_count == 2
|
||||
|
|
|
|||
|
|
@ -175,8 +175,8 @@ class TestQdrantGraphEmbeddingsQuery(IsolatedAsyncioTestCase):
|
|||
result = await processor.query_graph_embeddings(mock_message)
|
||||
|
||||
# Assert
|
||||
# Verify query was called with correct parameters
|
||||
expected_collection = 't_test_user_test_collection'
|
||||
# Verify query was called with correct parameters (with dimension suffix)
|
||||
expected_collection = 't_test_user_test_collection_3' # 3 dimensions
|
||||
mock_qdrant_instance.query_points.assert_called_once_with(
|
||||
collection_name=expected_collection,
|
||||
query=[0.1, 0.2, 0.3],
|
||||
|
|
@ -234,9 +234,9 @@ class TestQdrantGraphEmbeddingsQuery(IsolatedAsyncioTestCase):
|
|||
# Assert
|
||||
# Verify query was called twice
|
||||
assert mock_qdrant_instance.query_points.call_count == 2
|
||||
|
||||
# Verify both collections were queried
|
||||
expected_collection = 't_multi_user_multi_collection'
|
||||
|
||||
# Verify both collections were queried (both 2-dimensional vectors)
|
||||
expected_collection = 't_multi_user_multi_collection_2' # 2 dimensions
|
||||
calls = mock_qdrant_instance.query_points.call_args_list
|
||||
assert calls[0][1]['collection_name'] == expected_collection
|
||||
assert calls[1][1]['collection_name'] == expected_collection
|
||||
|
|
@ -372,13 +372,13 @@ class TestQdrantGraphEmbeddingsQuery(IsolatedAsyncioTestCase):
|
|||
# Verify query was called twice with different collections
|
||||
assert mock_qdrant_instance.query_points.call_count == 2
|
||||
calls = mock_qdrant_instance.query_points.call_args_list
|
||||
|
||||
|
||||
# First call should use 2D collection
|
||||
assert calls[0][1]['collection_name'] == 't_dim_user_dim_collection'
|
||||
assert calls[0][1]['collection_name'] == 't_dim_user_dim_collection_2' # 2 dimensions
|
||||
assert calls[0][1]['query'] == [0.1, 0.2]
|
||||
|
||||
|
||||
# Second call should use 3D collection
|
||||
assert calls[1][1]['collection_name'] == 't_dim_user_dim_collection'
|
||||
assert calls[1][1]['collection_name'] == 't_dim_user_dim_collection_3' # 3 dimensions
|
||||
assert calls[1][1]['query'] == [0.3, 0.4, 0.5]
|
||||
|
||||
# Verify results
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue