2025-07-14 14:57:44 +01:00
"""
Tests for Cassandra triples query service
"""
import pytest
from unittest . mock import MagicMock , patch
from trustgraph . query . triples . cassandra . service import Processor
from trustgraph . schema import Value
class TestCassandraQueryProcessor :
""" Test cases for Cassandra query processor """
@pytest.fixture
def processor ( self ) :
""" Create a processor instance for testing """
return Processor (
taskgroup = MagicMock ( ) ,
id = ' test-cassandra-query ' ,
graph_host = ' localhost '
)
def test_create_value_with_http_uri ( self , processor ) :
""" Test create_value with HTTP URI """
result = processor . create_value ( " http://example.com/resource " )
assert isinstance ( result , Value )
assert result . value == " http://example.com/resource "
assert result . is_uri is True
def test_create_value_with_https_uri ( self , processor ) :
""" Test create_value with HTTPS URI """
result = processor . create_value ( " https://example.com/resource " )
assert isinstance ( result , Value )
assert result . value == " https://example.com/resource "
assert result . is_uri is True
def test_create_value_with_literal ( self , processor ) :
""" Test create_value with literal value """
result = processor . create_value ( " just a literal string " )
assert isinstance ( result , Value )
assert result . value == " just a literal string "
assert result . is_uri is False
def test_create_value_with_empty_string ( self , processor ) :
""" Test create_value with empty string """
result = processor . create_value ( " " )
assert isinstance ( result , Value )
assert result . value == " "
assert result . is_uri is False
def test_create_value_with_partial_uri ( self , processor ) :
""" Test create_value with string that looks like URI but isn ' t complete """
result = processor . create_value ( " http " )
assert isinstance ( result , Value )
assert result . value == " http "
assert result . is_uri is False
def test_create_value_with_ftp_uri ( self , processor ) :
""" Test create_value with FTP URI (should not be detected as URI) """
result = processor . create_value ( " ftp://example.com/file " )
assert isinstance ( result , Value )
assert result . value == " ftp://example.com/file "
assert result . is_uri is False
@pytest.mark.asyncio
2025-09-18 15:57:52 +01:00
@patch ( ' trustgraph.query.triples.cassandra.service.KnowledgeGraph ' )
2025-07-14 14:57:44 +01:00
async def test_query_triples_spo_query ( self , mock_trustgraph ) :
""" Test querying triples with subject, predicate, and object specified """
from trustgraph . schema import TriplesQueryRequest , Value
# Setup mock TrustGraph
mock_tg_instance = MagicMock ( )
mock_trustgraph . return_value = mock_tg_instance
mock_tg_instance . get_spo . return_value = None # SPO query returns None if found
processor = Processor (
taskgroup = MagicMock ( ) ,
id = ' test-cassandra-query ' ,
2025-09-04 00:58:11 +01:00
cassandra_host = ' localhost '
2025-07-14 14:57:44 +01:00
)
# Create query request with all SPO values
query = TriplesQueryRequest (
user = ' test_user ' ,
collection = ' test_collection ' ,
s = Value ( value = ' test_subject ' , is_uri = False ) ,
p = Value ( value = ' test_predicate ' , is_uri = False ) ,
o = Value ( value = ' test_object ' , is_uri = False ) ,
limit = 100
)
result = await processor . query_triples ( query )
2025-09-18 15:57:52 +01:00
# Verify KnowledgeGraph was created with correct parameters
2025-07-14 14:57:44 +01:00
mock_trustgraph . assert_called_once_with (
hosts = [ ' localhost ' ] ,
2025-09-18 15:57:52 +01:00
keyspace = ' test_user '
2025-07-14 14:57:44 +01:00
)
# Verify get_spo was called with correct parameters
mock_tg_instance . get_spo . assert_called_once_with (
2025-09-18 15:57:52 +01:00
' test_collection ' , ' test_subject ' , ' test_predicate ' , ' test_object ' , limit = 100
2025-07-14 14:57:44 +01:00
)
# Verify result contains the queried triple
assert len ( result ) == 1
assert result [ 0 ] . s . value == ' test_subject '
assert result [ 0 ] . p . value == ' test_predicate '
assert result [ 0 ] . o . value == ' test_object '
def test_processor_initialization_with_defaults ( self ) :
""" Test processor initialization with default parameters """
taskgroup_mock = MagicMock ( )
processor = Processor ( taskgroup = taskgroup_mock )
2025-09-04 00:58:11 +01:00
assert processor . cassandra_host == [ ' cassandra ' ] # Updated default
assert processor . cassandra_username is None
assert processor . cassandra_password is None
2025-07-14 14:57:44 +01:00
assert processor . table is None
def test_processor_initialization_with_custom_params ( self ) :
""" Test processor initialization with custom parameters """
taskgroup_mock = MagicMock ( )
processor = Processor (
taskgroup = taskgroup_mock ,
2025-09-04 00:58:11 +01:00
cassandra_host = ' cassandra.example.com ' ,
cassandra_username = ' queryuser ' ,
cassandra_password = ' querypass '
2025-07-14 14:57:44 +01:00
)
2025-09-04 00:58:11 +01:00
assert processor . cassandra_host == [ ' cassandra.example.com ' ]
assert processor . cassandra_username == ' queryuser '
assert processor . cassandra_password == ' querypass '
2025-07-14 14:57:44 +01:00
assert processor . table is None
@pytest.mark.asyncio
2025-09-18 15:57:52 +01:00
@patch ( ' trustgraph.query.triples.cassandra.service.KnowledgeGraph ' )
2025-07-14 14:57:44 +01:00
async def test_query_triples_sp_pattern ( self , mock_trustgraph ) :
""" Test SP query pattern (subject and predicate, no object) """
from trustgraph . schema import TriplesQueryRequest , Value
# Setup mock TrustGraph and response
mock_tg_instance = MagicMock ( )
mock_trustgraph . return_value = mock_tg_instance
mock_result = MagicMock ( )
mock_result . o = ' result_object '
mock_tg_instance . get_sp . return_value = [ mock_result ]
processor = Processor ( taskgroup = MagicMock ( ) )
query = TriplesQueryRequest (
user = ' test_user ' ,
collection = ' test_collection ' ,
s = Value ( value = ' test_subject ' , is_uri = False ) ,
p = Value ( value = ' test_predicate ' , is_uri = False ) ,
o = None ,
limit = 50
)
result = await processor . query_triples ( query )
2025-09-18 15:57:52 +01:00
mock_tg_instance . get_sp . assert_called_once_with ( ' test_collection ' , ' test_subject ' , ' test_predicate ' , limit = 50 )
2025-07-14 14:57:44 +01:00
assert len ( result ) == 1
assert result [ 0 ] . s . value == ' test_subject '
assert result [ 0 ] . p . value == ' test_predicate '
assert result [ 0 ] . o . value == ' result_object '
@pytest.mark.asyncio
2025-09-18 15:57:52 +01:00
@patch ( ' trustgraph.query.triples.cassandra.service.KnowledgeGraph ' )
2025-07-14 14:57:44 +01:00
async def test_query_triples_s_pattern ( self , mock_trustgraph ) :
""" Test S query pattern (subject only) """
from trustgraph . schema import TriplesQueryRequest , Value
mock_tg_instance = MagicMock ( )
mock_trustgraph . return_value = mock_tg_instance
mock_result = MagicMock ( )
mock_result . p = ' result_predicate '
mock_result . o = ' result_object '
mock_tg_instance . get_s . return_value = [ mock_result ]
processor = Processor ( taskgroup = MagicMock ( ) )
query = TriplesQueryRequest (
user = ' test_user ' ,
collection = ' test_collection ' ,
s = Value ( value = ' test_subject ' , is_uri = False ) ,
p = None ,
o = None ,
limit = 25
)
result = await processor . query_triples ( query )
2025-09-18 15:57:52 +01:00
mock_tg_instance . get_s . assert_called_once_with ( ' test_collection ' , ' test_subject ' , limit = 25 )
2025-07-14 14:57:44 +01:00
assert len ( result ) == 1
assert result [ 0 ] . s . value == ' test_subject '
assert result [ 0 ] . p . value == ' result_predicate '
assert result [ 0 ] . o . value == ' result_object '
@pytest.mark.asyncio
2025-09-18 15:57:52 +01:00
@patch ( ' trustgraph.query.triples.cassandra.service.KnowledgeGraph ' )
2025-07-14 14:57:44 +01:00
async def test_query_triples_p_pattern ( self , mock_trustgraph ) :
""" Test P query pattern (predicate only) """
from trustgraph . schema import TriplesQueryRequest , Value
mock_tg_instance = MagicMock ( )
mock_trustgraph . return_value = mock_tg_instance
mock_result = MagicMock ( )
mock_result . s = ' result_subject '
mock_result . o = ' result_object '
mock_tg_instance . get_p . return_value = [ mock_result ]
processor = Processor ( taskgroup = MagicMock ( ) )
query = TriplesQueryRequest (
user = ' test_user ' ,
collection = ' test_collection ' ,
s = None ,
p = Value ( value = ' test_predicate ' , is_uri = False ) ,
o = None ,
limit = 10
)
result = await processor . query_triples ( query )
2025-09-18 15:57:52 +01:00
mock_tg_instance . get_p . assert_called_once_with ( ' test_collection ' , ' test_predicate ' , limit = 10 )
2025-07-14 14:57:44 +01:00
assert len ( result ) == 1
assert result [ 0 ] . s . value == ' result_subject '
assert result [ 0 ] . p . value == ' test_predicate '
assert result [ 0 ] . o . value == ' result_object '
@pytest.mark.asyncio
2025-09-18 15:57:52 +01:00
@patch ( ' trustgraph.query.triples.cassandra.service.KnowledgeGraph ' )
2025-07-14 14:57:44 +01:00
async def test_query_triples_o_pattern ( self , mock_trustgraph ) :
""" Test O query pattern (object only) """
from trustgraph . schema import TriplesQueryRequest , Value
mock_tg_instance = MagicMock ( )
mock_trustgraph . return_value = mock_tg_instance
mock_result = MagicMock ( )
mock_result . s = ' result_subject '
mock_result . p = ' result_predicate '
mock_tg_instance . get_o . return_value = [ mock_result ]
processor = Processor ( taskgroup = MagicMock ( ) )
query = TriplesQueryRequest (
user = ' test_user ' ,
collection = ' test_collection ' ,
s = None ,
p = None ,
o = Value ( value = ' test_object ' , is_uri = False ) ,
limit = 75
)
result = await processor . query_triples ( query )
2025-09-18 15:57:52 +01:00
mock_tg_instance . get_o . assert_called_once_with ( ' test_collection ' , ' test_object ' , limit = 75 )
2025-07-14 14:57:44 +01:00
assert len ( result ) == 1
assert result [ 0 ] . s . value == ' result_subject '
assert result [ 0 ] . p . value == ' result_predicate '
assert result [ 0 ] . o . value == ' test_object '
@pytest.mark.asyncio
2025-09-18 15:57:52 +01:00
@patch ( ' trustgraph.query.triples.cassandra.service.KnowledgeGraph ' )
2025-07-14 14:57:44 +01:00
async def test_query_triples_get_all_pattern ( self , mock_trustgraph ) :
""" Test query pattern with no constraints (get all) """
from trustgraph . schema import TriplesQueryRequest
mock_tg_instance = MagicMock ( )
mock_trustgraph . return_value = mock_tg_instance
mock_result = MagicMock ( )
mock_result . s = ' all_subject '
mock_result . p = ' all_predicate '
mock_result . o = ' all_object '
mock_tg_instance . get_all . return_value = [ mock_result ]
processor = Processor ( taskgroup = MagicMock ( ) )
query = TriplesQueryRequest (
user = ' test_user ' ,
collection = ' test_collection ' ,
s = None ,
p = None ,
o = None ,
limit = 1000
)
result = await processor . query_triples ( query )
2025-09-18 15:57:52 +01:00
mock_tg_instance . get_all . assert_called_once_with ( ' test_collection ' , limit = 1000 )
2025-07-14 14:57:44 +01:00
assert len ( result ) == 1
assert result [ 0 ] . s . value == ' all_subject '
assert result [ 0 ] . p . value == ' all_predicate '
assert result [ 0 ] . o . value == ' all_object '
def test_add_args_method ( self ) :
""" Test that add_args properly configures argument parser """
from argparse import ArgumentParser
parser = ArgumentParser ( )
# Mock the parent class add_args method
with patch ( ' trustgraph.query.triples.cassandra.service.TriplesQueryService.add_args ' ) as mock_parent_add_args :
Processor . add_args ( parser )
# Verify parent add_args was called
mock_parent_add_args . assert_called_once_with ( parser )
# Verify our specific arguments were added
args = parser . parse_args ( [ ] )
2025-09-03 23:41:22 +01:00
assert hasattr ( args , ' cassandra_host ' )
assert args . cassandra_host == ' cassandra ' # Updated to new parameter name and default
assert hasattr ( args , ' cassandra_username ' )
assert args . cassandra_username is None
assert hasattr ( args , ' cassandra_password ' )
assert args . cassandra_password is None
2025-07-14 14:57:44 +01:00
def test_add_args_with_custom_values ( self ) :
""" Test add_args with custom command line values """
from argparse import ArgumentParser
parser = ArgumentParser ( )
with patch ( ' trustgraph.query.triples.cassandra.service.TriplesQueryService.add_args ' ) :
Processor . add_args ( parser )
2025-09-03 23:41:22 +01:00
# Test parsing with custom values (new cassandra_* arguments)
2025-07-14 14:57:44 +01:00
args = parser . parse_args ( [
2025-09-03 23:41:22 +01:00
' --cassandra-host ' , ' query.cassandra.com ' ,
' --cassandra-username ' , ' queryuser ' ,
' --cassandra-password ' , ' querypass '
2025-07-14 14:57:44 +01:00
] )
2025-09-03 23:41:22 +01:00
assert args . cassandra_host == ' query.cassandra.com '
assert args . cassandra_username == ' queryuser '
assert args . cassandra_password == ' querypass '
2025-07-14 14:57:44 +01:00
def test_add_args_short_form ( self ) :
""" Test add_args with short form arguments """
from argparse import ArgumentParser
parser = ArgumentParser ( )
with patch ( ' trustgraph.query.triples.cassandra.service.TriplesQueryService.add_args ' ) :
Processor . add_args ( parser )
2025-09-03 23:41:22 +01:00
# Test parsing with cassandra arguments (no short form)
args = parser . parse_args ( [ ' --cassandra-host ' , ' short.query.com ' ] )
2025-07-14 14:57:44 +01:00
2025-09-03 23:41:22 +01:00
assert args . cassandra_host == ' short.query.com '
2025-07-14 14:57:44 +01:00
@patch ( ' trustgraph.query.triples.cassandra.service.Processor.launch ' )
def test_run_function ( self , mock_launch ) :
""" Test the run function calls Processor.launch with correct parameters """
from trustgraph . query . triples . cassandra . service import run , default_ident
run ( )
mock_launch . assert_called_once_with ( default_ident , ' \n Triples query service. Input is a (s, p, o) triple, some values may be \n null. Output is a list of triples. \n ' )
@pytest.mark.asyncio
2025-09-18 15:57:52 +01:00
@patch ( ' trustgraph.query.triples.cassandra.service.KnowledgeGraph ' )
2025-07-14 14:57:44 +01:00
async def test_query_triples_with_authentication ( self , mock_trustgraph ) :
""" Test querying with username and password authentication """
from trustgraph . schema import TriplesQueryRequest , Value
mock_tg_instance = MagicMock ( )
mock_trustgraph . return_value = mock_tg_instance
mock_tg_instance . get_spo . return_value = None
processor = Processor (
taskgroup = MagicMock ( ) ,
2025-09-04 00:58:11 +01:00
cassandra_username = ' authuser ' ,
cassandra_password = ' authpass '
2025-07-14 14:57:44 +01:00
)
query = TriplesQueryRequest (
user = ' test_user ' ,
collection = ' test_collection ' ,
s = Value ( value = ' test_subject ' , is_uri = False ) ,
p = Value ( value = ' test_predicate ' , is_uri = False ) ,
o = Value ( value = ' test_object ' , is_uri = False ) ,
limit = 100
)
await processor . query_triples ( query )
2025-09-18 15:57:52 +01:00
# Verify KnowledgeGraph was created with authentication
2025-07-14 14:57:44 +01:00
mock_trustgraph . assert_called_once_with (
2025-09-03 23:41:22 +01:00
hosts = [ ' cassandra ' ] , # Updated default
2025-07-14 14:57:44 +01:00
keyspace = ' test_user ' ,
username = ' authuser ' ,
password = ' authpass '
)
@pytest.mark.asyncio
2025-09-18 15:57:52 +01:00
@patch ( ' trustgraph.query.triples.cassandra.service.KnowledgeGraph ' )
2025-07-14 14:57:44 +01:00
async def test_query_triples_table_reuse ( self , mock_trustgraph ) :
""" Test that TrustGraph is reused for same table """
from trustgraph . schema import TriplesQueryRequest , Value
mock_tg_instance = MagicMock ( )
mock_trustgraph . return_value = mock_tg_instance
mock_tg_instance . get_spo . return_value = None
processor = Processor ( taskgroup = MagicMock ( ) )
query = TriplesQueryRequest (
user = ' test_user ' ,
collection = ' test_collection ' ,
s = Value ( value = ' test_subject ' , is_uri = False ) ,
p = Value ( value = ' test_predicate ' , is_uri = False ) ,
o = Value ( value = ' test_object ' , is_uri = False ) ,
limit = 100
)
# First query should create TrustGraph
await processor . query_triples ( query )
assert mock_trustgraph . call_count == 1
# Second query with same table should reuse TrustGraph
await processor . query_triples ( query )
assert mock_trustgraph . call_count == 1 # Should not increase
@pytest.mark.asyncio
2025-09-18 15:57:52 +01:00
@patch ( ' trustgraph.query.triples.cassandra.service.KnowledgeGraph ' )
2025-07-14 14:57:44 +01:00
async def test_query_triples_table_switching ( self , mock_trustgraph ) :
""" Test table switching creates new TrustGraph """
from trustgraph . schema import TriplesQueryRequest , Value
mock_tg_instance1 = MagicMock ( )
mock_tg_instance2 = MagicMock ( )
mock_trustgraph . side_effect = [ mock_tg_instance1 , mock_tg_instance2 ]
processor = Processor ( taskgroup = MagicMock ( ) )
# First query
query1 = TriplesQueryRequest (
user = ' user1 ' ,
collection = ' collection1 ' ,
s = Value ( value = ' test_subject ' , is_uri = False ) ,
p = None ,
o = None ,
limit = 100
)
await processor . query_triples ( query1 )
2025-09-18 15:57:52 +01:00
assert processor . table == ' user1 '
2025-07-14 14:57:44 +01:00
# Second query with different table
query2 = TriplesQueryRequest (
user = ' user2 ' ,
collection = ' collection2 ' ,
s = Value ( value = ' test_subject ' , is_uri = False ) ,
p = None ,
o = None ,
limit = 100
)
await processor . query_triples ( query2 )
2025-09-18 15:57:52 +01:00
assert processor . table == ' user2 '
2025-07-14 14:57:44 +01:00
# Verify TrustGraph was created twice
assert mock_trustgraph . call_count == 2
@pytest.mark.asyncio
2025-09-18 15:57:52 +01:00
@patch ( ' trustgraph.query.triples.cassandra.service.KnowledgeGraph ' )
2025-07-14 14:57:44 +01:00
async def test_query_triples_exception_handling ( self , mock_trustgraph ) :
""" Test exception handling during query execution """
from trustgraph . schema import TriplesQueryRequest , Value
mock_tg_instance = MagicMock ( )
mock_trustgraph . return_value = mock_tg_instance
mock_tg_instance . get_spo . side_effect = Exception ( " Query failed " )
processor = Processor ( taskgroup = MagicMock ( ) )
query = TriplesQueryRequest (
user = ' test_user ' ,
collection = ' test_collection ' ,
s = Value ( value = ' test_subject ' , is_uri = False ) ,
p = Value ( value = ' test_predicate ' , is_uri = False ) ,
o = Value ( value = ' test_object ' , is_uri = False ) ,
limit = 100
)
with pytest . raises ( Exception , match = " Query failed " ) :
await processor . query_triples ( query )
@pytest.mark.asyncio
2025-09-18 15:57:52 +01:00
@patch ( ' trustgraph.query.triples.cassandra.service.KnowledgeGraph ' )
2025-07-14 14:57:44 +01:00
async def test_query_triples_multiple_results ( self , mock_trustgraph ) :
""" Test query returning multiple results """
from trustgraph . schema import TriplesQueryRequest , Value
mock_tg_instance = MagicMock ( )
mock_trustgraph . return_value = mock_tg_instance
# Mock multiple results
mock_result1 = MagicMock ( )
mock_result1 . o = ' object1 '
mock_result2 = MagicMock ( )
mock_result2 . o = ' object2 '
mock_tg_instance . get_sp . return_value = [ mock_result1 , mock_result2 ]
processor = Processor ( taskgroup = MagicMock ( ) )
query = TriplesQueryRequest (
user = ' test_user ' ,
collection = ' test_collection ' ,
s = Value ( value = ' test_subject ' , is_uri = False ) ,
p = Value ( value = ' test_predicate ' , is_uri = False ) ,
o = None ,
limit = 100
)
result = await processor . query_triples ( query )
assert len ( result ) == 2
assert result [ 0 ] . o . value == ' object1 '
2025-09-18 19:52:05 +01:00
assert result [ 1 ] . o . value == ' object2 '
class TestCassandraQueryPerformanceOptimizations :
""" Test cases for multi-table performance optimizations in query service """
@pytest.mark.asyncio
@patch ( ' trustgraph.query.triples.cassandra.service.KnowledgeGraph ' )
async def test_get_po_query_optimization ( self , mock_trustgraph ) :
""" Test that get_po queries use optimized table (no ALLOW FILTERING) """
from trustgraph . schema import TriplesQueryRequest , Value
mock_tg_instance = MagicMock ( )
mock_trustgraph . return_value = mock_tg_instance
mock_result = MagicMock ( )
mock_result . s = ' result_subject '
mock_tg_instance . get_po . return_value = [ mock_result ]
processor = Processor ( taskgroup = MagicMock ( ) )
# PO query pattern (predicate + object, find subjects)
query = TriplesQueryRequest (
user = ' test_user ' ,
collection = ' test_collection ' ,
s = None ,
p = Value ( value = ' test_predicate ' , is_uri = False ) ,
o = Value ( value = ' test_object ' , is_uri = False ) ,
limit = 50
)
result = await processor . query_triples ( query )
# Verify get_po was called (should use optimized po_table)
mock_tg_instance . get_po . assert_called_once_with (
' test_collection ' , ' test_predicate ' , ' test_object ' , limit = 50
)
assert len ( result ) == 1
assert result [ 0 ] . s . value == ' result_subject '
assert result [ 0 ] . p . value == ' test_predicate '
assert result [ 0 ] . o . value == ' test_object '
@pytest.mark.asyncio
@patch ( ' trustgraph.query.triples.cassandra.service.KnowledgeGraph ' )
async def test_get_os_query_optimization ( self , mock_trustgraph ) :
""" Test that get_os queries use optimized table (no ALLOW FILTERING) """
from trustgraph . schema import TriplesQueryRequest , Value
mock_tg_instance = MagicMock ( )
mock_trustgraph . return_value = mock_tg_instance
mock_result = MagicMock ( )
mock_result . p = ' result_predicate '
mock_tg_instance . get_os . return_value = [ mock_result ]
processor = Processor ( taskgroup = MagicMock ( ) )
# OS query pattern (object + subject, find predicates)
query = TriplesQueryRequest (
user = ' test_user ' ,
collection = ' test_collection ' ,
s = Value ( value = ' test_subject ' , is_uri = False ) ,
p = None ,
o = Value ( value = ' test_object ' , is_uri = False ) ,
limit = 25
)
result = await processor . query_triples ( query )
# Verify get_os was called (should use optimized subject_table with clustering)
mock_tg_instance . get_os . assert_called_once_with (
' test_collection ' , ' test_object ' , ' test_subject ' , limit = 25
)
assert len ( result ) == 1
assert result [ 0 ] . s . value == ' test_subject '
assert result [ 0 ] . p . value == ' result_predicate '
assert result [ 0 ] . o . value == ' test_object '
@pytest.mark.asyncio
@patch ( ' trustgraph.query.triples.cassandra.service.KnowledgeGraph ' )
async def test_all_query_patterns_use_correct_tables ( self , mock_trustgraph ) :
""" Test that all query patterns route to their optimal tables """
from trustgraph . schema import TriplesQueryRequest , Value
mock_tg_instance = MagicMock ( )
mock_trustgraph . return_value = mock_tg_instance
# Mock empty results for all queries
mock_tg_instance . get_all . return_value = [ ]
mock_tg_instance . get_s . return_value = [ ]
mock_tg_instance . get_p . return_value = [ ]
mock_tg_instance . get_o . return_value = [ ]
mock_tg_instance . get_sp . return_value = [ ]
mock_tg_instance . get_po . return_value = [ ]
mock_tg_instance . get_os . return_value = [ ]
mock_tg_instance . get_spo . return_value = [ ]
processor = Processor ( taskgroup = MagicMock ( ) )
# Test each query pattern
test_patterns = [
# (s, p, o, expected_method)
( None , None , None , ' get_all ' ) , # All triples
( ' s1 ' , None , None , ' get_s ' ) , # Subject only
( None , ' p1 ' , None , ' get_p ' ) , # Predicate only
( None , None , ' o1 ' , ' get_o ' ) , # Object only
( ' s1 ' , ' p1 ' , None , ' get_sp ' ) , # Subject + Predicate
( None , ' p1 ' , ' o1 ' , ' get_po ' ) , # Predicate + Object (CRITICAL OPTIMIZATION)
( ' s1 ' , None , ' o1 ' , ' get_os ' ) , # Object + Subject
( ' s1 ' , ' p1 ' , ' o1 ' , ' get_spo ' ) , # All three
]
for s , p , o , expected_method in test_patterns :
# Reset mock call counts
mock_tg_instance . reset_mock ( )
query = TriplesQueryRequest (
user = ' test_user ' ,
collection = ' test_collection ' ,
s = Value ( value = s , is_uri = False ) if s else None ,
p = Value ( value = p , is_uri = False ) if p else None ,
o = Value ( value = o , is_uri = False ) if o else None ,
limit = 10
)
await processor . query_triples ( query )
# Verify the correct method was called
method = getattr ( mock_tg_instance , expected_method )
assert method . called , f " Expected { expected_method } to be called for pattern s= { s } , p= { p } , o= { o } "
def test_legacy_vs_optimized_mode_configuration ( self ) :
""" Test that environment variable controls query optimization mode """
taskgroup_mock = MagicMock ( )
# Test optimized mode (default)
with patch . dict ( ' os.environ ' , { } , clear = True ) :
processor = Processor ( taskgroup = taskgroup_mock )
# Mode is determined in KnowledgeGraph initialization
# Test legacy mode
with patch . dict ( ' os.environ ' , { ' CASSANDRA_USE_LEGACY ' : ' true ' } ) :
processor = Processor ( taskgroup = taskgroup_mock )
# Mode is determined in KnowledgeGraph initialization
# Test explicit optimized mode
with patch . dict ( ' os.environ ' , { ' CASSANDRA_USE_LEGACY ' : ' false ' } ) :
processor = Processor ( taskgroup = taskgroup_mock )
# Mode is determined in KnowledgeGraph initialization
@pytest.mark.asyncio
@patch ( ' trustgraph.query.triples.cassandra.service.KnowledgeGraph ' )
async def test_performance_critical_po_query_no_filtering ( self , mock_trustgraph ) :
""" Test the performance-critical PO query that eliminates ALLOW FILTERING """
from trustgraph . schema import TriplesQueryRequest , Value
mock_tg_instance = MagicMock ( )
mock_trustgraph . return_value = mock_tg_instance
# Mock multiple subjects for the same predicate-object pair
mock_results = [ ]
for i in range ( 5 ) :
mock_result = MagicMock ( )
mock_result . s = f ' subject_ { i } '
mock_results . append ( mock_result )
mock_tg_instance . get_po . return_value = mock_results
processor = Processor ( taskgroup = MagicMock ( ) )
# This is the query pattern that was slow with ALLOW FILTERING
query = TriplesQueryRequest (
user = ' large_dataset_user ' ,
collection = ' massive_collection ' ,
s = None ,
p = Value ( value = ' http://www.w3.org/1999/02/22-rdf-syntax-ns#type ' , is_uri = True ) ,
o = Value ( value = ' http://example.com/Person ' , is_uri = True ) ,
limit = 1000
)
result = await processor . query_triples ( query )
# Verify optimized get_po was used (no ALLOW FILTERING needed!)
mock_tg_instance . get_po . assert_called_once_with (
' massive_collection ' ,
' http://www.w3.org/1999/02/22-rdf-syntax-ns#type ' ,
' http://example.com/Person ' ,
limit = 1000
)
# Verify all results were returned
assert len ( result ) == 5
for i , triple in enumerate ( result ) :
assert triple . s . value == f ' subject_ { i } '
assert triple . p . value == ' http://www.w3.org/1999/02/22-rdf-syntax-ns#type '
assert triple . p . is_uri is True
assert triple . o . value == ' http://example.com/Person '
assert triple . o . is_uri is True