mirror of
https://github.com/trustgraph-ai/trustgraph.git
synced 2026-04-25 08:26:21 +02:00
Collection management (#520)
* Tech spec * Refactored Cassanda knowledge graph for single table * Collection management, librarian services to manage metadata and collection deletion
This commit is contained in:
parent
48016d8fb2
commit
13ff7d765d
48 changed files with 2941 additions and 425 deletions
|
|
@ -120,7 +120,7 @@ class TestPineconeDocEmbeddingsQueryProcessor:
|
|||
chunks = await processor.query_document_embeddings(message)
|
||||
|
||||
# Verify index was accessed correctly
|
||||
expected_index_name = "d-test_user-test_collection-3"
|
||||
expected_index_name = "d-test_user-test_collection"
|
||||
processor.pinecone.Index.assert_called_once_with(expected_index_name)
|
||||
|
||||
# Verify query parameters
|
||||
|
|
@ -239,7 +239,7 @@ class TestPineconeDocEmbeddingsQueryProcessor:
|
|||
|
||||
@pytest.mark.asyncio
|
||||
async def test_query_document_embeddings_different_vector_dimensions(self, processor):
|
||||
"""Test querying with vectors of different dimensions"""
|
||||
"""Test querying with vectors of different dimensions using same index"""
|
||||
message = MagicMock()
|
||||
message.vectors = [
|
||||
[0.1, 0.2], # 2D vector
|
||||
|
|
@ -248,37 +248,33 @@ class TestPineconeDocEmbeddingsQueryProcessor:
|
|||
message.limit = 5
|
||||
message.user = 'test_user'
|
||||
message.collection = 'test_collection'
|
||||
|
||||
mock_index_2d = MagicMock()
|
||||
mock_index_4d = MagicMock()
|
||||
|
||||
def mock_index_side_effect(name):
|
||||
if name.endswith("-2"):
|
||||
return mock_index_2d
|
||||
elif name.endswith("-4"):
|
||||
return mock_index_4d
|
||||
|
||||
processor.pinecone.Index.side_effect = mock_index_side_effect
|
||||
|
||||
# Mock results for different dimensions
|
||||
|
||||
# Mock single index that handles all dimensions
|
||||
mock_index = MagicMock()
|
||||
processor.pinecone.Index.return_value = mock_index
|
||||
|
||||
# Mock results for different vector queries
|
||||
mock_results_2d = MagicMock()
|
||||
mock_results_2d.matches = [MagicMock(metadata={'doc': 'Document from 2D index'})]
|
||||
mock_index_2d.query.return_value = mock_results_2d
|
||||
|
||||
mock_results_2d.matches = [MagicMock(metadata={'doc': 'Document from 2D query'})]
|
||||
|
||||
mock_results_4d = MagicMock()
|
||||
mock_results_4d.matches = [MagicMock(metadata={'doc': 'Document from 4D index'})]
|
||||
mock_index_4d.query.return_value = mock_results_4d
|
||||
|
||||
mock_results_4d.matches = [MagicMock(metadata={'doc': 'Document from 4D query'})]
|
||||
|
||||
mock_index.query.side_effect = [mock_results_2d, mock_results_4d]
|
||||
|
||||
chunks = await processor.query_document_embeddings(message)
|
||||
|
||||
# Verify different indexes were used
|
||||
|
||||
# Verify same index used for both vectors
|
||||
expected_index_name = "d-test_user-test_collection"
|
||||
assert processor.pinecone.Index.call_count == 2
|
||||
mock_index_2d.query.assert_called_once()
|
||||
mock_index_4d.query.assert_called_once()
|
||||
|
||||
processor.pinecone.Index.assert_called_with(expected_index_name)
|
||||
|
||||
# Verify both queries were made
|
||||
assert mock_index.query.call_count == 2
|
||||
|
||||
# Verify results from both dimensions
|
||||
assert 'Document from 2D index' in chunks
|
||||
assert 'Document from 4D index' in chunks
|
||||
assert 'Document from 2D query' in chunks
|
||||
assert 'Document from 4D query' in chunks
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_query_document_embeddings_empty_vectors_list(self, processor):
|
||||
|
|
|
|||
|
|
@ -148,7 +148,7 @@ class TestPineconeGraphEmbeddingsQueryProcessor:
|
|||
entities = await processor.query_graph_embeddings(message)
|
||||
|
||||
# Verify index was accessed correctly
|
||||
expected_index_name = "t-test_user-test_collection-3"
|
||||
expected_index_name = "t-test_user-test_collection"
|
||||
processor.pinecone.Index.assert_called_once_with(expected_index_name)
|
||||
|
||||
# Verify query parameters
|
||||
|
|
@ -265,7 +265,7 @@ class TestPineconeGraphEmbeddingsQueryProcessor:
|
|||
|
||||
@pytest.mark.asyncio
|
||||
async def test_query_graph_embeddings_different_vector_dimensions(self, processor):
|
||||
"""Test querying with vectors of different dimensions"""
|
||||
"""Test querying with vectors of different dimensions using same index"""
|
||||
message = MagicMock()
|
||||
message.vectors = [
|
||||
[0.1, 0.2], # 2D vector
|
||||
|
|
@ -274,34 +274,30 @@ class TestPineconeGraphEmbeddingsQueryProcessor:
|
|||
message.limit = 5
|
||||
message.user = 'test_user'
|
||||
message.collection = 'test_collection'
|
||||
|
||||
mock_index_2d = MagicMock()
|
||||
mock_index_4d = MagicMock()
|
||||
|
||||
def mock_index_side_effect(name):
|
||||
if name.endswith("-2"):
|
||||
return mock_index_2d
|
||||
elif name.endswith("-4"):
|
||||
return mock_index_4d
|
||||
|
||||
processor.pinecone.Index.side_effect = mock_index_side_effect
|
||||
|
||||
# Mock results for different dimensions
|
||||
|
||||
# Mock single index that handles all dimensions
|
||||
mock_index = MagicMock()
|
||||
processor.pinecone.Index.return_value = mock_index
|
||||
|
||||
# Mock results for different vector queries
|
||||
mock_results_2d = MagicMock()
|
||||
mock_results_2d.matches = [MagicMock(metadata={'entity': 'entity_2d'})]
|
||||
mock_index_2d.query.return_value = mock_results_2d
|
||||
|
||||
|
||||
mock_results_4d = MagicMock()
|
||||
mock_results_4d.matches = [MagicMock(metadata={'entity': 'entity_4d'})]
|
||||
mock_index_4d.query.return_value = mock_results_4d
|
||||
|
||||
|
||||
mock_index.query.side_effect = [mock_results_2d, mock_results_4d]
|
||||
|
||||
entities = await processor.query_graph_embeddings(message)
|
||||
|
||||
# Verify different indexes were used
|
||||
|
||||
# Verify same index used for both vectors
|
||||
expected_index_name = "t-test_user-test_collection"
|
||||
assert processor.pinecone.Index.call_count == 2
|
||||
mock_index_2d.query.assert_called_once()
|
||||
mock_index_4d.query.assert_called_once()
|
||||
|
||||
processor.pinecone.Index.assert_called_with(expected_index_name)
|
||||
|
||||
# Verify both queries were made
|
||||
assert mock_index.query.call_count == 2
|
||||
|
||||
# Verify results from both dimensions
|
||||
entity_values = [e.value for e in entities]
|
||||
assert 'entity_2d' in entity_values
|
||||
|
|
|
|||
|
|
@ -70,7 +70,7 @@ class TestCassandraQueryProcessor:
|
|||
assert result.is_uri is False
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@patch('trustgraph.query.triples.cassandra.service.TrustGraph')
|
||||
@patch('trustgraph.query.triples.cassandra.service.KnowledgeGraph')
|
||||
async def test_query_triples_spo_query(self, mock_trustgraph):
|
||||
"""Test querying triples with subject, predicate, and object specified"""
|
||||
from trustgraph.schema import TriplesQueryRequest, Value
|
||||
|
|
@ -98,16 +98,15 @@ class TestCassandraQueryProcessor:
|
|||
|
||||
result = await processor.query_triples(query)
|
||||
|
||||
# Verify TrustGraph was created with correct parameters
|
||||
# Verify KnowledgeGraph was created with correct parameters
|
||||
mock_trustgraph.assert_called_once_with(
|
||||
hosts=['localhost'],
|
||||
keyspace='test_user',
|
||||
table='test_collection'
|
||||
keyspace='test_user'
|
||||
)
|
||||
|
||||
# Verify get_spo was called with correct parameters
|
||||
mock_tg_instance.get_spo.assert_called_once_with(
|
||||
'test_subject', 'test_predicate', 'test_object', limit=100
|
||||
'test_collection', 'test_subject', 'test_predicate', 'test_object', limit=100
|
||||
)
|
||||
|
||||
# Verify result contains the queried triple
|
||||
|
|
@ -144,7 +143,7 @@ class TestCassandraQueryProcessor:
|
|||
assert processor.table is None
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@patch('trustgraph.query.triples.cassandra.service.TrustGraph')
|
||||
@patch('trustgraph.query.triples.cassandra.service.KnowledgeGraph')
|
||||
async def test_query_triples_sp_pattern(self, mock_trustgraph):
|
||||
"""Test SP query pattern (subject and predicate, no object)"""
|
||||
from trustgraph.schema import TriplesQueryRequest, Value
|
||||
|
|
@ -170,14 +169,14 @@ class TestCassandraQueryProcessor:
|
|||
|
||||
result = await processor.query_triples(query)
|
||||
|
||||
mock_tg_instance.get_sp.assert_called_once_with('test_subject', 'test_predicate', limit=50)
|
||||
mock_tg_instance.get_sp.assert_called_once_with('test_collection', 'test_subject', 'test_predicate', limit=50)
|
||||
assert len(result) == 1
|
||||
assert result[0].s.value == 'test_subject'
|
||||
assert result[0].p.value == 'test_predicate'
|
||||
assert result[0].o.value == 'result_object'
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@patch('trustgraph.query.triples.cassandra.service.TrustGraph')
|
||||
@patch('trustgraph.query.triples.cassandra.service.KnowledgeGraph')
|
||||
async def test_query_triples_s_pattern(self, mock_trustgraph):
|
||||
"""Test S query pattern (subject only)"""
|
||||
from trustgraph.schema import TriplesQueryRequest, Value
|
||||
|
|
@ -203,14 +202,14 @@ class TestCassandraQueryProcessor:
|
|||
|
||||
result = await processor.query_triples(query)
|
||||
|
||||
mock_tg_instance.get_s.assert_called_once_with('test_subject', limit=25)
|
||||
mock_tg_instance.get_s.assert_called_once_with('test_collection', 'test_subject', limit=25)
|
||||
assert len(result) == 1
|
||||
assert result[0].s.value == 'test_subject'
|
||||
assert result[0].p.value == 'result_predicate'
|
||||
assert result[0].o.value == 'result_object'
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@patch('trustgraph.query.triples.cassandra.service.TrustGraph')
|
||||
@patch('trustgraph.query.triples.cassandra.service.KnowledgeGraph')
|
||||
async def test_query_triples_p_pattern(self, mock_trustgraph):
|
||||
"""Test P query pattern (predicate only)"""
|
||||
from trustgraph.schema import TriplesQueryRequest, Value
|
||||
|
|
@ -236,14 +235,14 @@ class TestCassandraQueryProcessor:
|
|||
|
||||
result = await processor.query_triples(query)
|
||||
|
||||
mock_tg_instance.get_p.assert_called_once_with('test_predicate', limit=10)
|
||||
mock_tg_instance.get_p.assert_called_once_with('test_collection', 'test_predicate', limit=10)
|
||||
assert len(result) == 1
|
||||
assert result[0].s.value == 'result_subject'
|
||||
assert result[0].p.value == 'test_predicate'
|
||||
assert result[0].o.value == 'result_object'
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@patch('trustgraph.query.triples.cassandra.service.TrustGraph')
|
||||
@patch('trustgraph.query.triples.cassandra.service.KnowledgeGraph')
|
||||
async def test_query_triples_o_pattern(self, mock_trustgraph):
|
||||
"""Test O query pattern (object only)"""
|
||||
from trustgraph.schema import TriplesQueryRequest, Value
|
||||
|
|
@ -269,14 +268,14 @@ class TestCassandraQueryProcessor:
|
|||
|
||||
result = await processor.query_triples(query)
|
||||
|
||||
mock_tg_instance.get_o.assert_called_once_with('test_object', limit=75)
|
||||
mock_tg_instance.get_o.assert_called_once_with('test_collection', 'test_object', limit=75)
|
||||
assert len(result) == 1
|
||||
assert result[0].s.value == 'result_subject'
|
||||
assert result[0].p.value == 'result_predicate'
|
||||
assert result[0].o.value == 'test_object'
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@patch('trustgraph.query.triples.cassandra.service.TrustGraph')
|
||||
@patch('trustgraph.query.triples.cassandra.service.KnowledgeGraph')
|
||||
async def test_query_triples_get_all_pattern(self, mock_trustgraph):
|
||||
"""Test query pattern with no constraints (get all)"""
|
||||
from trustgraph.schema import TriplesQueryRequest
|
||||
|
|
@ -303,7 +302,7 @@ class TestCassandraQueryProcessor:
|
|||
|
||||
result = await processor.query_triples(query)
|
||||
|
||||
mock_tg_instance.get_all.assert_called_once_with(limit=1000)
|
||||
mock_tg_instance.get_all.assert_called_once_with('test_collection', limit=1000)
|
||||
assert len(result) == 1
|
||||
assert result[0].s.value == 'all_subject'
|
||||
assert result[0].p.value == 'all_predicate'
|
||||
|
|
@ -376,7 +375,7 @@ class TestCassandraQueryProcessor:
|
|||
mock_launch.assert_called_once_with(default_ident, '\nTriples query service. Input is a (s, p, o) triple, some values may be\nnull. Output is a list of triples.\n')
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@patch('trustgraph.query.triples.cassandra.service.TrustGraph')
|
||||
@patch('trustgraph.query.triples.cassandra.service.KnowledgeGraph')
|
||||
async def test_query_triples_with_authentication(self, mock_trustgraph):
|
||||
"""Test querying with username and password authentication"""
|
||||
from trustgraph.schema import TriplesQueryRequest, Value
|
||||
|
|
@ -402,17 +401,16 @@ class TestCassandraQueryProcessor:
|
|||
|
||||
await processor.query_triples(query)
|
||||
|
||||
# Verify TrustGraph was created with authentication
|
||||
# Verify KnowledgeGraph was created with authentication
|
||||
mock_trustgraph.assert_called_once_with(
|
||||
hosts=['cassandra'], # Updated default
|
||||
keyspace='test_user',
|
||||
table='test_collection',
|
||||
username='authuser',
|
||||
password='authpass'
|
||||
)
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@patch('trustgraph.query.triples.cassandra.service.TrustGraph')
|
||||
@patch('trustgraph.query.triples.cassandra.service.KnowledgeGraph')
|
||||
async def test_query_triples_table_reuse(self, mock_trustgraph):
|
||||
"""Test that TrustGraph is reused for same table"""
|
||||
from trustgraph.schema import TriplesQueryRequest, Value
|
||||
|
|
@ -441,7 +439,7 @@ class TestCassandraQueryProcessor:
|
|||
assert mock_trustgraph.call_count == 1 # Should not increase
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@patch('trustgraph.query.triples.cassandra.service.TrustGraph')
|
||||
@patch('trustgraph.query.triples.cassandra.service.KnowledgeGraph')
|
||||
async def test_query_triples_table_switching(self, mock_trustgraph):
|
||||
"""Test table switching creates new TrustGraph"""
|
||||
from trustgraph.schema import TriplesQueryRequest, Value
|
||||
|
|
@ -463,7 +461,7 @@ class TestCassandraQueryProcessor:
|
|||
)
|
||||
|
||||
await processor.query_triples(query1)
|
||||
assert processor.table == ('user1', 'collection1')
|
||||
assert processor.table == 'user1'
|
||||
|
||||
# Second query with different table
|
||||
query2 = TriplesQueryRequest(
|
||||
|
|
@ -476,13 +474,13 @@ class TestCassandraQueryProcessor:
|
|||
)
|
||||
|
||||
await processor.query_triples(query2)
|
||||
assert processor.table == ('user2', 'collection2')
|
||||
assert processor.table == 'user2'
|
||||
|
||||
# Verify TrustGraph was created twice
|
||||
assert mock_trustgraph.call_count == 2
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@patch('trustgraph.query.triples.cassandra.service.TrustGraph')
|
||||
@patch('trustgraph.query.triples.cassandra.service.KnowledgeGraph')
|
||||
async def test_query_triples_exception_handling(self, mock_trustgraph):
|
||||
"""Test exception handling during query execution"""
|
||||
from trustgraph.schema import TriplesQueryRequest, Value
|
||||
|
|
@ -506,7 +504,7 @@ class TestCassandraQueryProcessor:
|
|||
await processor.query_triples(query)
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@patch('trustgraph.query.triples.cassandra.service.TrustGraph')
|
||||
@patch('trustgraph.query.triples.cassandra.service.KnowledgeGraph')
|
||||
async def test_query_triples_multiple_results(self, mock_trustgraph):
|
||||
"""Test query returning multiple results"""
|
||||
from trustgraph.schema import TriplesQueryRequest, Value
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue