trustgraph/tests/unit/test_direct/test_entity_centric_kg.py

"""
Unit tests for EntityCentricKnowledgeGraph class

Tests the entity-centric knowledge graph implementation without requiring
an actual Cassandra connection. Uses mocking to verify correct behavior.
"""

import pytest
from unittest.mock import MagicMock, patch, call
import os


class TestEntityCentricKnowledgeGraph:
    """Test cases for EntityCentricKnowledgeGraph"""

    @pytest.fixture
    def mock_cluster(self):
        """Create a mock Cassandra cluster"""
        with patch('trustgraph.direct.cassandra_kg.Cluster') as mock_cluster_cls:
            mock_cluster = MagicMock()
            mock_session = MagicMock()
            mock_cluster.connect.return_value = mock_session
            mock_cluster_cls.return_value = mock_cluster
            yield mock_cluster_cls, mock_cluster, mock_session

    @pytest.fixture
    def entity_kg(self, mock_cluster):
        """Create an EntityCentricKnowledgeGraph instance with mocked Cassandra"""
        from trustgraph.direct.cassandra_kg import EntityCentricKnowledgeGraph
        mock_cluster_cls, mock_cluster, mock_session = mock_cluster

        # Create instance
        kg = EntityCentricKnowledgeGraph(hosts=['localhost'], keyspace='test_keyspace')
        return kg, mock_session

    def test_init_creates_entity_centric_schema(self, mock_cluster):
        """Test that initialization creates the 2-table entity-centric schema"""
        from trustgraph.direct.cassandra_kg import EntityCentricKnowledgeGraph
        mock_cluster_cls, mock_cluster, mock_session = mock_cluster

        kg = EntityCentricKnowledgeGraph(hosts=['localhost'], keyspace='test_keyspace')

        # Verify schema tables were created
        execute_calls = mock_session.execute.call_args_list
        executed_statements = [str(c) for c in execute_calls]

        # Check for keyspace creation
        keyspace_created = any('create keyspace' in str(c).lower() for c in execute_calls)
        assert keyspace_created

        # Check for quads_by_entity table
        entity_table_created = any('quads_by_entity' in str(c) for c in execute_calls)
        assert entity_table_created

        # Check for quads_by_collection table
        collection_table_created = any('quads_by_collection' in str(c) for c in execute_calls)
        assert collection_table_created

        # Check for collection_metadata table
        metadata_table_created = any('collection_metadata' in str(c) for c in execute_calls)
        assert metadata_table_created

    def test_prepare_statements_initialized(self, entity_kg):
        """Test that prepared statements are initialized"""
        kg, mock_session = entity_kg

        # Verify prepare was called for various statements
        assert mock_session.prepare.called
        prepare_calls = mock_session.prepare.call_args_list

        # Check that key prepared statements exist
        prepared_queries = [str(c) for c in prepare_calls]

        # Insert statements
        insert_entity_stmt = any('INSERT INTO' in str(c) and 'quads_by_entity' in str(c)
                                  for c in prepare_calls)
        assert insert_entity_stmt

        insert_collection_stmt = any('INSERT INTO' in str(c) and 'quads_by_collection' in str(c)
                                      for c in prepare_calls)
        assert insert_collection_stmt

    def test_insert_uri_object_creates_4_entity_rows(self, entity_kg):
        """Test that inserting a quad with URI object creates 4 entity rows"""
        kg, mock_session = entity_kg

        # Reset mocks to track only insert-related calls
        mock_session.reset_mock()

        kg.insert(
            collection='test_collection',
            s='http://example.org/Alice',
            p='http://example.org/knows',
            o='http://example.org/Bob',
            g='http://example.org/graph1',
            otype='u'
        )

        # Verify batch was executed
        mock_session.execute.assert_called()

    def test_insert_literal_object_creates_3_entity_rows(self, entity_kg):
        """Test that inserting a quad with literal object creates 3 entity rows"""
        kg, mock_session = entity_kg

        mock_session.reset_mock()

        kg.insert(
            collection='test_collection',
            s='http://example.org/Alice',
            p='http://www.w3.org/2000/01/rdf-schema#label',
            o='Alice Smith',
            g=None,
            otype='l',
            dtype='xsd:string',
            lang='en'
        )

        # Verify batch was executed
        mock_session.execute.assert_called()

    def test_insert_default_graph(self, entity_kg):
        """Test that None graph is stored as empty string"""
        kg, mock_session = entity_kg

        mock_session.reset_mock()

        kg.insert(
            collection='test_collection',
            s='http://example.org/Alice',
            p='http://example.org/knows',
            o='http://example.org/Bob',
            g=None,
            otype='u'
        )

        mock_session.execute.assert_called()

    def test_insert_auto_detects_otype(self, entity_kg):
        """Test that otype is auto-detected when not provided"""
        kg, mock_session = entity_kg

        mock_session.reset_mock()

        # URI should be auto-detected
        kg.insert(
            collection='test_collection',
            s='http://example.org/Alice',
            p='http://example.org/knows',
            o='http://example.org/Bob'
        )
        mock_session.execute.assert_called()

        mock_session.reset_mock()

        # Literal should be auto-detected
        kg.insert(
            collection='test_collection',
            s='http://example.org/Alice',
            p='http://example.org/name',
            o='Alice'
        )
        mock_session.execute.assert_called()

    def test_get_s_returns_quads_for_subject(self, entity_kg):
        """Test get_s queries by subject"""
        kg, mock_session = entity_kg

        # Mock the query result
        mock_result = [
            MagicMock(p='http://example.org/knows', o='http://example.org/Bob',
                      d='', otype='u', dtype='', lang='', s='http://example.org/Alice')
        ]
        mock_session.execute.return_value = mock_result

        results = kg.get_s('test_collection', 'http://example.org/Alice')

        # Verify query was executed
        mock_session.execute.assert_called()

        # Results should be QuadResult objects
        assert len(results) == 1
        assert results[0].s == 'http://example.org/Alice'
        assert results[0].p == 'http://example.org/knows'
        assert results[0].o == 'http://example.org/Bob'

    def test_get_p_returns_quads_for_predicate(self, entity_kg):
        """Test get_p queries by predicate"""
        kg, mock_session = entity_kg

        mock_result = [
            MagicMock(s='http://example.org/Alice', o='http://example.org/Bob',
                      d='', otype='u', dtype='', lang='', p='http://example.org/knows')
        ]
        mock_session.execute.return_value = mock_result

        results = kg.get_p('test_collection', 'http://example.org/knows')

        mock_session.execute.assert_called()
        assert len(results) == 1

    def test_get_o_returns_quads_for_object(self, entity_kg):
        """Test get_o queries by object"""
        kg, mock_session = entity_kg

        mock_result = [
            MagicMock(s='http://example.org/Alice', p='http://example.org/knows',
                      d='', otype='u', dtype='', lang='', o='http://example.org/Bob')
        ]
        mock_session.execute.return_value = mock_result

        results = kg.get_o('test_collection', 'http://example.org/Bob')

        mock_session.execute.assert_called()
        assert len(results) == 1

    def test_get_sp_returns_quads_for_subject_predicate(self, entity_kg):
        """Test get_sp queries by subject and predicate"""
        kg, mock_session = entity_kg

        mock_result = [
            MagicMock(o='http://example.org/Bob', d='', otype='u', dtype='', lang='')
        ]
        mock_session.execute.return_value = mock_result

        results = kg.get_sp('test_collection', 'http://example.org/Alice',
                           'http://example.org/knows')

        mock_session.execute.assert_called()
        assert len(results) == 1

    def test_get_po_returns_quads_for_predicate_object(self, entity_kg):
        """Test get_po queries by predicate and object"""
        kg, mock_session = entity_kg

        mock_result = [
            MagicMock(s='http://example.org/Alice', d='', otype='u', dtype='', lang='',
                      o='http://example.org/Bob')
        ]
        mock_session.execute.return_value = mock_result

        results = kg.get_po('test_collection', 'http://example.org/knows',
                           'http://example.org/Bob')

        mock_session.execute.assert_called()
        assert len(results) == 1

    def test_get_os_returns_quads_for_object_subject(self, entity_kg):
        """Test get_os queries by object and subject"""
        kg, mock_session = entity_kg

        mock_result = [
            MagicMock(p='http://example.org/knows', d='', otype='u', dtype='', lang='',
                      s='http://example.org/Alice', o='http://example.org/Bob')
        ]
        mock_session.execute.return_value = mock_result

        results = kg.get_os('test_collection', 'http://example.org/Bob',
                           'http://example.org/Alice')

        mock_session.execute.assert_called()
        assert len(results) == 1

    def test_get_spo_returns_quads_for_subject_predicate_object(self, entity_kg):
        """Test get_spo queries by subject, predicate, and object"""
        kg, mock_session = entity_kg

        mock_result = [
            MagicMock(d='', otype='u', dtype='', lang='',
                      o='http://example.org/Bob')
        ]
        mock_session.execute.return_value = mock_result

        results = kg.get_spo('test_collection', 'http://example.org/Alice',
                            'http://example.org/knows', 'http://example.org/Bob')

        mock_session.execute.assert_called()
        assert len(results) == 1

    def test_get_g_returns_quads_for_graph(self, entity_kg):
        """Test get_g queries by graph"""
        kg, mock_session = entity_kg

        mock_result = [
            MagicMock(s='http://example.org/Alice', p='http://example.org/knows',
                      o='http://example.org/Bob', otype='u', dtype='', lang='')
        ]
        mock_session.execute.return_value = mock_result

        results = kg.get_g('test_collection', 'http://example.org/graph1')

        mock_session.execute.assert_called()

    def test_get_all_returns_all_quads_in_collection(self, entity_kg):
        """Test get_all returns all quads"""
        kg, mock_session = entity_kg

        mock_result = [
            MagicMock(d='', s='http://example.org/Alice', p='http://example.org/knows',
                      o='http://example.org/Bob', otype='u', dtype='', lang='')
        ]
        mock_session.execute.return_value = mock_result

        results = kg.get_all('test_collection')

        mock_session.execute.assert_called()

    def test_graph_none_returns_all_graphs(self, entity_kg):
        """Test that g=None returns quads from all graphs"""
        kg, mock_session = entity_kg

        mock_result = [
            MagicMock(p='http://example.org/knows', d='http://example.org/graph1',
                      otype='u', dtype='', lang='', s='http://example.org/Alice',
                      o='http://example.org/Bob'),
            MagicMock(p='http://example.org/knows', d='http://example.org/graph2',
                      otype='u', dtype='', lang='', s='http://example.org/Alice',
                      o='http://example.org/Charlie')
        ]
        mock_session.execute.return_value = mock_result

        results = kg.get_s('test_collection', 'http://example.org/Alice', g=None)

        # Should return quads from both graphs
        assert len(results) == 2

    def test_specific_graph_filters_results(self, entity_kg):
        """Test that specifying a graph filters results"""
        kg, mock_session = entity_kg

        mock_result = [
            MagicMock(p='http://example.org/knows', d='http://example.org/graph1',
                      otype='u', dtype='', lang='', s='http://example.org/Alice',
                      o='http://example.org/Bob'),
            MagicMock(p='http://example.org/knows', d='http://example.org/graph2',
                      otype='u', dtype='', lang='', s='http://example.org/Alice',
                      o='http://example.org/Charlie')
        ]
        mock_session.execute.return_value = mock_result

        results = kg.get_s('test_collection', 'http://example.org/Alice',
                          g='http://example.org/graph1')

        # Should only return quads from graph1
        assert len(results) == 1
        assert results[0].g == 'http://example.org/graph1'

    def test_collection_exists_returns_true_when_exists(self, entity_kg):
        """Test collection_exists returns True for existing collection"""
        kg, mock_session = entity_kg

        mock_result = [MagicMock(collection='test_collection')]
        mock_session.execute.return_value = mock_result

        exists = kg.collection_exists('test_collection')

        assert exists is True

    def test_collection_exists_returns_false_when_not_exists(self, entity_kg):
        """Test collection_exists returns False for non-existing collection"""
        kg, mock_session = entity_kg

        mock_session.execute.return_value = []

        exists = kg.collection_exists('nonexistent_collection')

        assert exists is False

    def test_create_collection_inserts_metadata(self, entity_kg):
        """Test create_collection inserts metadata row"""
        kg, mock_session = entity_kg

        mock_session.reset_mock()
        kg.create_collection('test_collection')

        # Verify INSERT was executed for collection_metadata
        mock_session.execute.assert_called()

    def test_delete_collection_removes_all_data(self, entity_kg):
        """Test delete_collection removes entity partitions and collection rows"""
        kg, mock_session = entity_kg

        # Mock reading quads from collection
        mock_quads = [
            MagicMock(d='', s='http://example.org/Alice', p='http://example.org/knows',
                      o='http://example.org/Bob', otype='u')
        ]
        mock_session.execute.return_value = mock_quads

        mock_session.reset_mock()
        kg.delete_collection('test_collection')

        # Verify delete operations were executed
        assert mock_session.execute.called

    def test_close_shuts_down_connections(self, entity_kg):
        """Test close shuts down session and cluster"""
        kg, mock_session = entity_kg

        kg.close()

        mock_session.shutdown.assert_called_once()
        kg.cluster.shutdown.assert_called_once()


class TestQuadResult:
    """Test cases for QuadResult class"""

    def test_quad_result_stores_all_fields(self):
        """Test QuadResult stores all quad fields"""
        from trustgraph.direct.cassandra_kg import QuadResult

        result = QuadResult(
            s='http://example.org/Alice',
            p='http://example.org/knows',
            o='http://example.org/Bob',
            g='http://example.org/graph1',
            otype='u',
            dtype='',
            lang=''
        )

        assert result.s == 'http://example.org/Alice'
        assert result.p == 'http://example.org/knows'
        assert result.o == 'http://example.org/Bob'
        assert result.g == 'http://example.org/graph1'
        assert result.otype == 'u'
        assert result.dtype == ''
        assert result.lang == ''

    def test_quad_result_defaults(self):
        """Test QuadResult default values"""
        from trustgraph.direct.cassandra_kg import QuadResult

        result = QuadResult(
            s='http://example.org/s',
            p='http://example.org/p',
            o='literal value',
            g=''
        )

        assert result.otype == 'u'  # Default otype
        assert result.dtype == ''
        assert result.lang == ''

    def test_quad_result_with_literal_metadata(self):
        """Test QuadResult with literal metadata"""
        from trustgraph.direct.cassandra_kg import QuadResult

        result = QuadResult(
            s='http://example.org/Alice',
            p='http://www.w3.org/2000/01/rdf-schema#label',
            o='Alice Smith',
            g='',
            otype='l',
            dtype='xsd:string',
            lang='en'
        )

        assert result.otype == 'l'
        assert result.dtype == 'xsd:string'
        assert result.lang == 'en'


class TestWriteHelperFunctions:
    """Test cases for helper functions in write.py"""

    def test_get_term_otype_for_iri(self):
        """Test get_term_otype returns 'u' for IRI terms"""
        from trustgraph.storage.triples.cassandra.write import get_term_otype
        from trustgraph.schema import Term, IRI

        term = Term(type=IRI, iri='http://example.org/Alice')
        assert get_term_otype(term) == 'u'

    def test_get_term_otype_for_literal(self):
        """Test get_term_otype returns 'l' for LITERAL terms"""
        from trustgraph.storage.triples.cassandra.write import get_term_otype
        from trustgraph.schema import Term, LITERAL

        term = Term(type=LITERAL, value='Alice Smith')
        assert get_term_otype(term) == 'l'

    def test_get_term_otype_for_blank(self):
        """Test get_term_otype returns 'u' for BLANK terms"""
        from trustgraph.storage.triples.cassandra.write import get_term_otype
        from trustgraph.schema import Term, BLANK

        term = Term(type=BLANK, id='_:b1')
        assert get_term_otype(term) == 'u'

    def test_get_term_otype_for_triple(self):
        """Test get_term_otype returns 't' for TRIPLE terms"""
        from trustgraph.storage.triples.cassandra.write import get_term_otype
        from trustgraph.schema import Term, TRIPLE

        term = Term(type=TRIPLE)
        assert get_term_otype(term) == 't'

    def test_get_term_otype_for_none(self):
        """Test get_term_otype returns 'u' for None"""
        from trustgraph.storage.triples.cassandra.write import get_term_otype

        assert get_term_otype(None) == 'u'

    def test_get_term_dtype_for_literal(self):
        """Test get_term_dtype extracts datatype from LITERAL"""
        from trustgraph.storage.triples.cassandra.write import get_term_dtype
        from trustgraph.schema import Term, LITERAL

        term = Term(type=LITERAL, value='42', datatype='xsd:integer')
        assert get_term_dtype(term) == 'xsd:integer'

    def test_get_term_dtype_for_non_literal(self):
        """Test get_term_dtype returns empty string for non-LITERAL"""
        from trustgraph.storage.triples.cassandra.write import get_term_dtype
        from trustgraph.schema import Term, IRI

        term = Term(type=IRI, iri='http://example.org/Alice')
        assert get_term_dtype(term) == ''

    def test_get_term_dtype_for_none(self):
        """Test get_term_dtype returns empty string for None"""
        from trustgraph.storage.triples.cassandra.write import get_term_dtype

        assert get_term_dtype(None) == ''

    def test_get_term_lang_for_literal(self):
        """Test get_term_lang extracts language from LITERAL"""
        from trustgraph.storage.triples.cassandra.write import get_term_lang
        from trustgraph.schema import Term, LITERAL

        term = Term(type=LITERAL, value='Alice Smith', language='en')
        assert get_term_lang(term) == 'en'

    def test_get_term_lang_for_non_literal(self):
        """Test get_term_lang returns empty string for non-LITERAL"""
        from trustgraph.storage.triples.cassandra.write import get_term_lang
        from trustgraph.schema import Term, IRI

        term = Term(type=IRI, iri='http://example.org/Alice')
        assert get_term_lang(term) == ''


class TestServiceHelperFunctions:
    """Test cases for helper functions in service.py"""

    def test_create_term_with_uri_otype(self):
        """Test create_term creates IRI Term for term_type='u'"""
        from trustgraph.query.triples.cassandra.service import create_term
        from trustgraph.schema import IRI

        term = create_term('http://example.org/Alice', term_type='u')

        assert term.type == IRI
        assert term.iri == 'http://example.org/Alice'

    def test_create_term_with_literal_otype(self):
        """Test create_term creates LITERAL Term for term_type='l'"""
        from trustgraph.query.triples.cassandra.service import create_term
        from trustgraph.schema import LITERAL

        term = create_term('Alice Smith', term_type='l', datatype='xsd:string', language='en')

        assert term.type == LITERAL
        assert term.value == 'Alice Smith'
        assert term.datatype == 'xsd:string'
        assert term.language == 'en'

    def test_create_term_with_triple_otype(self):
        """Test create_term creates TRIPLE Term for term_type='t' with valid JSON"""
        from trustgraph.query.triples.cassandra.service import create_term
        from trustgraph.schema import TRIPLE, IRI
        import json

        # Valid JSON triple data
        triple_json = json.dumps({
            "s": {"type": "i", "iri": "http://example.org/Alice"},
            "p": {"type": "i", "iri": "http://example.org/knows"},
            "o": {"type": "i", "iri": "http://example.org/Bob"},
        })

        term = create_term(triple_json, term_type='t')

        assert term.type == TRIPLE
        assert term.triple is not None
        assert term.triple.s.type == IRI
        assert term.triple.s.iri == "http://example.org/Alice"

    def test_create_term_heuristic_fallback_uri(self):
        """Test create_term uses URL heuristic when otype not provided"""
        from trustgraph.query.triples.cassandra.service import create_term
        from trustgraph.schema import IRI

        term = create_term('http://example.org/Alice')

        assert term.type == IRI
        assert term.iri == 'http://example.org/Alice'

    def test_create_term_heuristic_fallback_literal(self):
        """Test create_term uses literal heuristic when otype not provided"""
        from trustgraph.query.triples.cassandra.service import create_term
        from trustgraph.schema import LITERAL

        term = create_term('Alice Smith')

        assert term.type == LITERAL
        assert term.value == 'Alice Smith'