trustgraph/tests/integration/test_cassandra_config_end_to_end.py

"""
End-to-end integration tests for Cassandra configuration.

Tests complete configuration flow from environment variables
through processors to Cassandra connections.
"""

import os
import pytest
from unittest.mock import Mock, patch, MagicMock, call
from argparse import ArgumentParser

# Import processors that use Cassandra configuration
from trustgraph.storage.triples.cassandra.write import Processor as TriplesWriter
from trustgraph.storage.rows.cassandra.write import Processor as RowsWriter
from trustgraph.query.triples.cassandra.service import Processor as TriplesQuery
from trustgraph.storage.knowledge.store import Processor as KgStore


class TestEndToEndConfigurationFlow:
    """Test complete configuration flow from environment to processors."""
    
    @pytest.mark.asyncio
    @patch('trustgraph.direct.cassandra_kg.Cluster')
    async def test_triples_writer_env_to_connection(self, mock_cluster):
        """Test complete flow from environment variables to TrustGraph connection."""
        env_vars = {
            'CASSANDRA_HOST': 'integration-host1,integration-host2,integration-host3',
            'CASSANDRA_USERNAME': 'integration-user',
            'CASSANDRA_PASSWORD': 'integration-pass'
        }

        mock_cluster_instance = MagicMock()
        mock_session = MagicMock()
        mock_cluster_instance.connect.return_value = mock_session
        mock_cluster.return_value = mock_cluster_instance

        with patch.dict(os.environ, env_vars, clear=True):
            processor = TriplesWriter(taskgroup=MagicMock())

            # Create a mock message to trigger TrustGraph creation
            mock_message = MagicMock()
            mock_message.metadata.collection = 'test_collection'
            mock_message.triples = []

            # Mock collection_exists to return True
            with patch('trustgraph.direct.cassandra_kg.KnowledgeGraph.collection_exists', return_value=True):
                # This should create TrustGraph with environment config
                await processor.store_triples('test_user', mock_message)
            
            # Verify Cluster was created with correct hosts
            mock_cluster.assert_called_once()
            call_args = mock_cluster.call_args
            assert call_args.args[0] == ['integration-host1', 'integration-host2', 'integration-host3']
            assert 'auth_provider' in call_args.kwargs  # Should have auth since credentials provided
    
    @patch('trustgraph.storage.rows.cassandra.write.Cluster')
    @patch('trustgraph.storage.rows.cassandra.write.PlainTextAuthProvider')
    def test_objects_writer_env_to_cluster_connection(self, mock_auth_provider, mock_cluster):
        """Test complete flow from environment variables to Cassandra Cluster connection."""
        env_vars = {
            'CASSANDRA_HOST': 'obj-host1,obj-host2',
            'CASSANDRA_USERNAME': 'obj-user',
            'CASSANDRA_PASSWORD': 'obj-pass'
        }

        mock_auth_instance = MagicMock()
        mock_auth_provider.return_value = mock_auth_instance
        mock_cluster_instance = MagicMock()
        mock_session = MagicMock()
        mock_cluster_instance.connect.return_value = mock_session
        mock_cluster.return_value = mock_cluster_instance

        with patch.dict(os.environ, env_vars, clear=True):
            processor = RowsWriter(taskgroup=MagicMock())

            # Trigger Cassandra connection
            processor.connect_cassandra()

            # Verify auth provider was created with env vars
            mock_auth_provider.assert_called_once_with(
                username='obj-user',
                password='obj-pass'
            )

            # Verify cluster was created with hosts from env and auth
            mock_cluster.assert_called_once()
            call_args = mock_cluster.call_args
            assert call_args.kwargs['contact_points'] == ['obj-host1', 'obj-host2']
            assert call_args.kwargs['auth_provider'] == mock_auth_instance
    
    @pytest.mark.asyncio
    @patch('trustgraph.storage.knowledge.store.KnowledgeTableStore')
    async def test_kg_store_env_to_table_store(self, mock_table_store):
        """Test complete flow from environment variables to KnowledgeTableStore."""
        env_vars = {
            'CASSANDRA_HOST': 'kg-host1,kg-host2,kg-host3,kg-host4',
            'CASSANDRA_USERNAME': 'kg-user',
            'CASSANDRA_PASSWORD': 'kg-pass'
        }
        
        mock_store_instance = MagicMock()
        mock_table_store.return_value = mock_store_instance
        
        with patch.dict(os.environ, env_vars, clear=True):
            processor = KgStore(taskgroup=MagicMock())
            
            # Verify KnowledgeTableStore was created with env config
            mock_table_store.assert_called_once_with(
                cassandra_host=['kg-host1', 'kg-host2', 'kg-host3', 'kg-host4'],
                cassandra_username='kg-user',
                cassandra_password='kg-pass',
                keyspace='knowledge',
                replication_factor=1,
            )


class TestConfigurationPriorityEndToEnd:
    """Test configuration priority chains end-to-end."""
    
    @pytest.mark.asyncio
    @patch('trustgraph.direct.cassandra_kg.Cluster')
    async def test_cli_override_env_end_to_end(self, mock_cluster):
        """Test that CLI parameters override environment variables end-to-end."""
        env_vars = {
            'CASSANDRA_HOST': 'env-host',
            'CASSANDRA_USERNAME': 'env-user',
            'CASSANDRA_PASSWORD': 'env-pass'
        }
        
        mock_cluster_instance = MagicMock()
        mock_session = MagicMock()
        mock_cluster_instance.connect.return_value = mock_session
        mock_cluster.return_value = mock_cluster_instance
        
        with patch.dict(os.environ, env_vars, clear=True):
            # CLI parameters should override environment
            processor = TriplesWriter(
                taskgroup=MagicMock(),
                cassandra_host='cli-host1,cli-host2',
                cassandra_username='cli-user',
                cassandra_password='cli-pass'
            )
            
            # Trigger TrustGraph creation
            mock_message = MagicMock()
            mock_message.metadata.collection = 'test_collection'
            mock_message.triples = []

            # Mock collection_exists to return True
            with patch('trustgraph.direct.cassandra_kg.KnowledgeGraph.collection_exists', return_value=True):
                await processor.store_triples('test_user', mock_message)
            
            # Should use CLI parameters, not environment
            mock_cluster.assert_called_once()
            call_args = mock_cluster.call_args
            assert call_args.args[0] == ['cli-host1', 'cli-host2']  # From CLI
            assert 'auth_provider' in call_args.kwargs  # Should have auth since credentials provided
    
    @pytest.mark.asyncio
    @patch('trustgraph.storage.knowledge.store.KnowledgeTableStore')
    async def test_partial_cli_with_env_fallback_end_to_end(self, mock_table_store):
        """Test partial CLI parameters with environment fallback end-to-end."""
        env_vars = {
            'CASSANDRA_HOST': 'fallback-host1,fallback-host2',
            'CASSANDRA_USERNAME': 'fallback-user',
            'CASSANDRA_PASSWORD': 'fallback-pass'
        }
        
        mock_store_instance = MagicMock()
        mock_table_store.return_value = mock_store_instance
        
        with patch.dict(os.environ, env_vars, clear=True):
            # Only provide host via parameter, rest should fall back to env
            processor = KgStore(
                taskgroup=MagicMock(),
                cassandra_host='partial-host'
                # username and password not provided - should use env
            )
            
            # Verify mixed configuration
            mock_table_store.assert_called_once_with(
                cassandra_host=['partial-host'],         # From parameter
                cassandra_username='fallback-user',      # From environment
                cassandra_password='fallback-pass',      # From environment
                keyspace='knowledge',
                replication_factor=1,
            )
    
    @pytest.mark.asyncio
    @patch('trustgraph.query.triples.cassandra.service.EntityCentricKnowledgeGraph')
    async def test_no_config_defaults_end_to_end(self, mock_kg_class):
        """Test that defaults are used when no configuration provided end-to-end."""
        from unittest.mock import AsyncMock

        mock_tg_instance = MagicMock()
        mock_tg_instance.async_get_all = AsyncMock(return_value=[])
        mock_kg_class.return_value = mock_tg_instance

        with patch.dict(os.environ, {}, clear=True):
            processor = TriplesQuery(taskgroup=MagicMock())

            # Mock query to trigger TrustGraph creation
            mock_query = MagicMock()
            mock_query.collection = 'default_collection'
            mock_query.s = None
            mock_query.p = None
            mock_query.o = None
            mock_query.g = None
            mock_query.limit = 100

            await processor.query_triples('default_user', mock_query)

            # Should use defaults
            mock_kg_class.assert_called_once_with(
                hosts=['cassandra'],
                keyspace='default_user'
            )


class TestNoBackwardCompatibilityEndToEnd:
    """Test that backward compatibility with old parameter names is removed."""
    
    @pytest.mark.asyncio
    @patch('trustgraph.direct.cassandra_kg.Cluster')
    async def test_old_graph_params_no_longer_work_end_to_end(self, mock_cluster):
        """Test that old graph_* parameters no longer work end-to-end."""
        mock_cluster_instance = MagicMock()
        mock_session = MagicMock()
        mock_cluster_instance.connect.return_value = mock_session
        mock_cluster.return_value = mock_cluster_instance
        
        # Use old parameter names (should be ignored)
        processor = TriplesWriter(
            taskgroup=MagicMock(),
            graph_host='legacy-host',
            graph_username='legacy-user',
            graph_password='legacy-pass'
        )
        
        # Trigger TrustGraph creation
        mock_message = MagicMock()
        mock_message.metadata.collection = 'legacy_collection'
        mock_message.triples = []

        # Mock collection_exists to return True
        with patch('trustgraph.direct.cassandra_kg.KnowledgeGraph.collection_exists', return_value=True):
            await processor.store_triples('legacy_user', mock_message)
        
        # Should use defaults since old parameters are not recognized
        mock_cluster.assert_called_once()
        call_args = mock_cluster.call_args
        assert call_args.args[0] == ['cassandra']  # Default, not legacy-host
        assert 'auth_provider' not in call_args.kwargs  # No auth since no valid credentials
    
    @patch('trustgraph.storage.knowledge.store.KnowledgeTableStore')
    def test_old_cassandra_user_param_no_longer_works_end_to_end(self, mock_table_store):
        """Test that old cassandra_user parameter no longer works."""
        mock_store_instance = MagicMock()
        mock_table_store.return_value = mock_store_instance
        
        # Use old cassandra_user parameter (should be ignored)
        processor = KgStore(
            taskgroup=MagicMock(),
            cassandra_host='legacy-kg-host',
            cassandra_user='legacy-kg-user',  # Old parameter name - not supported
            cassandra_password='legacy-kg-pass'
        )
        
        # cassandra_user should be ignored, only cassandra_username works
        mock_table_store.assert_called_once_with(
            cassandra_host=['legacy-kg-host'],
            cassandra_username=None,  # Should be None since cassandra_user is not recognized
            cassandra_password='legacy-kg-pass',
            keyspace='knowledge',
            replication_factor=1,
        )
    
    @pytest.mark.asyncio
    @patch('trustgraph.direct.cassandra_kg.Cluster')
    async def test_new_params_override_old_params_end_to_end(self, mock_cluster):
        """Test that new parameters override old ones when both are present end-to-end."""
        mock_cluster_instance = MagicMock()
        mock_session = MagicMock()
        mock_cluster_instance.connect.return_value = mock_session
        mock_cluster.return_value = mock_cluster_instance
        
        # Provide both old and new parameters
        processor = TriplesWriter(
            taskgroup=MagicMock(),
            cassandra_host='new-host',
            graph_host='old-host',          # Should be ignored
            cassandra_username='new-user',
            graph_username='old-user',      # Should be ignored
            cassandra_password='new-pass',
            graph_password='old-pass'       # Should be ignored
        )
        
        # Trigger TrustGraph creation
        mock_message = MagicMock()
        mock_message.metadata.collection = 'precedence_collection'
        mock_message.triples = []

        # Mock collection_exists to return True
        with patch('trustgraph.direct.cassandra_kg.KnowledgeGraph.collection_exists', return_value=True):
            await processor.store_triples('precedence_user', mock_message)
        
        # Should use new parameters, not old ones
        mock_cluster.assert_called_once()
        call_args = mock_cluster.call_args
        assert call_args.args[0] == ['new-host']    # New parameter wins
        assert 'auth_provider' in call_args.kwargs  # Should have auth since credentials provided


class TestMultipleHostsHandling:
    """Test multiple Cassandra hosts handling end-to-end."""
    
    @patch('trustgraph.storage.rows.cassandra.write.Cluster')
    def test_multiple_hosts_passed_to_cluster(self, mock_cluster):
        """Test that multiple hosts are correctly passed to Cassandra cluster."""
        env_vars = {
            'CASSANDRA_HOST': 'host1,host2,host3,host4,host5'
        }

        mock_cluster_instance = MagicMock()
        mock_session = MagicMock()
        mock_cluster_instance.connect.return_value = mock_session
        mock_cluster.return_value = mock_cluster_instance

        with patch.dict(os.environ, env_vars, clear=True):
            processor = RowsWriter(taskgroup=MagicMock())
            processor.connect_cassandra()

            # Verify all hosts were passed to Cluster
            mock_cluster.assert_called_once()
            call_args = mock_cluster.call_args
            assert call_args.kwargs['contact_points'] == ['host1', 'host2', 'host3', 'host4', 'host5']
    
    @pytest.mark.asyncio
    @patch('trustgraph.direct.cassandra_kg.Cluster')
    async def test_single_host_converted_to_list(self, mock_cluster):
        """Test that single host is converted to list for TrustGraph."""
        mock_cluster_instance = MagicMock()
        mock_session = MagicMock()
        mock_cluster_instance.connect.return_value = mock_session
        mock_cluster.return_value = mock_cluster_instance
        
        processor = TriplesWriter(taskgroup=MagicMock(), cassandra_host='single-host')
        
        # Trigger TrustGraph creation
        mock_message = MagicMock()
        mock_message.metadata.collection = 'single_collection'
        mock_message.triples = []

        # Mock collection_exists to return True
        with patch('trustgraph.direct.cassandra_kg.KnowledgeGraph.collection_exists', return_value=True):
            await processor.store_triples('single_user', mock_message)
        
        # Single host should be converted to list
        mock_cluster.assert_called_once()
        call_args = mock_cluster.call_args
        assert call_args.args[0] == ['single-host']  # Converted to list
        assert 'auth_provider' not in call_args.kwargs  # No auth since no credentials provided
    
    def test_whitespace_handling_in_host_list(self):
        """Test that whitespace in host lists is handled correctly."""
        from trustgraph.base.cassandra_config import resolve_cassandra_config
        
        # Test various whitespace scenarios
        hosts1, _, _, _, _ = resolve_cassandra_config(host='host1, host2 ,  host3')
        assert hosts1 == ['host1', 'host2', 'host3']
        
        hosts2, _, _, _, _ = resolve_cassandra_config(host='host1,host2,host3,')
        assert hosts2 == ['host1', 'host2', 'host3']
        
        hosts3, _, _, _, _ = resolve_cassandra_config(host='  host1  ,  host2  ')
        assert hosts3 == ['host1', 'host2']


class TestAuthenticationFlow:
    """Test authentication configuration flow end-to-end."""
    
    @patch('trustgraph.storage.rows.cassandra.write.Cluster')
    @patch('trustgraph.storage.rows.cassandra.write.PlainTextAuthProvider')
    def test_authentication_enabled_when_both_credentials_provided(self, mock_auth_provider, mock_cluster):
        """Test that authentication is enabled when both username and password are provided."""
        env_vars = {
            'CASSANDRA_HOST': 'auth-host',
            'CASSANDRA_USERNAME': 'auth-user',
            'CASSANDRA_PASSWORD': 'auth-secret'
        }

        mock_auth_instance = MagicMock()
        mock_auth_provider.return_value = mock_auth_instance
        mock_cluster_instance = MagicMock()
        mock_cluster.return_value = mock_cluster_instance

        with patch.dict(os.environ, env_vars, clear=True):
            processor = RowsWriter(taskgroup=MagicMock())
            processor.connect_cassandra()

            # Auth provider should be created
            mock_auth_provider.assert_called_once_with(
                username='auth-user',
                password='auth-secret'
            )

            # Cluster should be created with auth provider
            call_args = mock_cluster.call_args
            assert 'auth_provider' in call_args.kwargs
            assert call_args.kwargs['auth_provider'] == mock_auth_instance

    @patch('trustgraph.storage.rows.cassandra.write.Cluster')
    @patch('trustgraph.storage.rows.cassandra.write.PlainTextAuthProvider')
    def test_no_authentication_when_credentials_missing(self, mock_auth_provider, mock_cluster):
        """Test that authentication is not used when credentials are missing."""
        env_vars = {
            'CASSANDRA_HOST': 'no-auth-host'
            # No username/password
        }

        mock_cluster_instance = MagicMock()
        mock_cluster.return_value = mock_cluster_instance

        with patch.dict(os.environ, env_vars, clear=True):
            processor = RowsWriter(taskgroup=MagicMock())
            processor.connect_cassandra()

            # Auth provider should not be created
            mock_auth_provider.assert_not_called()

            # Cluster should be created without auth provider
            call_args = mock_cluster.call_args
            assert 'auth_provider' not in call_args.kwargs

    @patch('trustgraph.storage.rows.cassandra.write.Cluster')
    @patch('trustgraph.storage.rows.cassandra.write.PlainTextAuthProvider')
    def test_no_authentication_when_only_username_provided(self, mock_auth_provider, mock_cluster):
        """Test that authentication is not used when only username is provided."""
        processor = RowsWriter(
            taskgroup=MagicMock(),
            cassandra_host='partial-auth-host',
            cassandra_username='partial-user'
            # No password
        )

        mock_cluster_instance = MagicMock()
        mock_cluster.return_value = mock_cluster_instance

        processor.connect_cassandra()

        # Auth provider should not be created (needs both username AND password)
        mock_auth_provider.assert_not_called()

        # Cluster should be created without auth provider
        call_args = mock_cluster.call_args
        assert 'auth_provider' not in call_args.kwargs