Structured data 2 (#645)

* Structured data refactor - multi-index tables, remove need for manual mods to the Cassandra tables * Tech spec updated to track implementation
2026-04-26 00:46:22 +02:00 · 2026-02-23 15:56:29 +00:00 · 2026-02-23 15:56:29 +00:00 · 1809c1f56d
commit 1809c1f56d
parent 5ffad92345
87 changed files with 5233 additions and 3235 deletions
--- a/tests/unit/test_query/test_objects_cassandra_query.py
+++ b/tests/unit/test_query/test_objects_cassandra_query.py
@ -1,10 +1,11 @@
 """
-Unit tests for Cassandra Objects GraphQL Query Processor
+Unit tests for Cassandra Rows GraphQL Query Processor (Unified Table Implementation)

 Tests the business logic of the GraphQL query processor including:
- GraphQL schema generation from RowSchema
- Query execution and validation
- CQL translation logic
+- Schema configuration handling
+- Query execution using unified rows table
+- Name sanitization
+- GraphQL query execution
 - Message processing logic
 """

@ -12,119 +13,91 @@ import pytest
 from unittest.mock import MagicMock, AsyncMock, patch
 import json

-import strawberry
-from strawberry import Schema
-
-from trustgraph.query.objects.cassandra.service import Processor
-from trustgraph.schema import ObjectsQueryRequest, ObjectsQueryResponse, GraphQLError
+from trustgraph.query.rows.cassandra.service import Processor
+from trustgraph.schema import RowsQueryRequest, RowsQueryResponse, GraphQLError
 from trustgraph.schema import RowSchema, Field


-class TestObjectsGraphQLQueryLogic:
-    """Test business logic without external dependencies"""
-
-    def test_get_python_type_mapping(self):
-        """Test schema field type conversion to Python types"""
-        processor = MagicMock()
-        processor.get_python_type = Processor.get_python_type.__get__(processor, Processor)
-        
-        # Basic type mappings
-        assert processor.get_python_type("string") == str
-        assert processor.get_python_type("integer") == int
-        assert processor.get_python_type("float") == float
-        assert processor.get_python_type("boolean") == bool
-        assert processor.get_python_type("timestamp") == str
-        assert processor.get_python_type("date") == str
-        assert processor.get_python_type("time") == str
-        assert processor.get_python_type("uuid") == str
-        
-        # Unknown type defaults to str
-        assert processor.get_python_type("unknown_type") == str
-
-    def test_create_graphql_type_basic_fields(self):
-        """Test GraphQL type creation for basic field types"""
-        processor = MagicMock()
-        processor.get_python_type = Processor.get_python_type.__get__(processor, Processor)
-        processor.create_graphql_type = Processor.create_graphql_type.__get__(processor, Processor)
-        
-        # Create test schema
-        schema = RowSchema(
-            name="test_table",
-            description="Test table",
-            fields=[
-                Field(
-                    name="id",
-                    type="string",
-                    primary=True,
-                    required=True,
-                    description="Primary key"
-                ),
-                Field(
-                    name="name",
-                    type="string",
-                    required=True,
-                    description="Name field"
-                ),
-                Field(
-                    name="age",
-                    type="integer",
-                    required=False,
-                    description="Optional age"
-                ),
-                Field(
-                    name="active",
-                    type="boolean",
-                    required=False,
-                    description="Status flag"
-                )
-            ]
-        )
-        
-        # Create GraphQL type
-        graphql_type = processor.create_graphql_type("test_table", schema)
-        
-        # Verify type was created
-        assert graphql_type is not None
-        assert hasattr(graphql_type, '__name__')
-        assert "TestTable" in graphql_type.__name__ or "test_table" in graphql_type.__name__.lower()
+class TestRowsGraphQLQueryLogic:
+    """Test business logic for unified table query implementation"""

    def test_sanitize_name_cassandra_compatibility(self):
        """Test name sanitization for Cassandra field names"""
        processor = MagicMock()
        processor.sanitize_name = Processor.sanitize_name.__get__(processor, Processor)
-        
-        # Test field name sanitization (matches storage processor)
+
+        # Test field name sanitization (uses r_ prefix like storage processor)
        assert processor.sanitize_name("simple_field") == "simple_field"
        assert processor.sanitize_name("Field-With-Dashes") == "field_with_dashes"
        assert processor.sanitize_name("field.with.dots") == "field_with_dots"
-        assert processor.sanitize_name("123_field") == "o_123_field"
+        assert processor.sanitize_name("123_field") == "r_123_field"
        assert processor.sanitize_name("field with spaces") == "field_with_spaces"
        assert processor.sanitize_name("special!@#chars") == "special___chars"
        assert processor.sanitize_name("UPPERCASE") == "uppercase"
        assert processor.sanitize_name("CamelCase") == "camelcase"

-    def test_sanitize_table_name(self):
-        """Test table name sanitization (always gets o_ prefix)"""
+    def test_get_index_names(self):
+        """Test extraction of index names from schema"""
        processor = MagicMock()
-        processor.sanitize_table = Processor.sanitize_table.__get__(processor, Processor)
-        
-        # Table names always get o_ prefix
-        assert processor.sanitize_table("simple_table") == "o_simple_table"
-        assert processor.sanitize_table("Table-Name") == "o_table_name"
-        assert processor.sanitize_table("123table") == "o_123table"
-        assert processor.sanitize_table("") == "o_"
+        processor.get_index_names = Processor.get_index_names.__get__(processor, Processor)
+
+        schema = RowSchema(
+            name="test_schema",
+            fields=[
+                Field(name="id", type="string", primary=True),
+                Field(name="category", type="string", indexed=True),
+                Field(name="name", type="string"),  # Not indexed
+                Field(name="status", type="string", indexed=True)
+            ]
+        )
+
+        index_names = processor.get_index_names(schema)
+
+        assert "id" in index_names
+        assert "category" in index_names
+        assert "status" in index_names
+        assert "name" not in index_names
+        assert len(index_names) == 3
+
+    def test_find_matching_index_exact_match(self):
+        """Test finding matching index for exact match query"""
+        processor = MagicMock()
+        processor.get_index_names = Processor.get_index_names.__get__(processor, Processor)
+        processor.find_matching_index = Processor.find_matching_index.__get__(processor, Processor)
+
+        schema = RowSchema(
+            name="test_schema",
+            fields=[
+                Field(name="id", type="string", primary=True),
+                Field(name="category", type="string", indexed=True),
+                Field(name="name", type="string")  # Not indexed
+            ]
+        )
+
+        # Filter on indexed field should return match
+        filters = {"category": "electronics"}
+        result = processor.find_matching_index(schema, filters)
+        assert result is not None
+        assert result[0] == "category"
+        assert result[1] == ["electronics"]
+
+        # Filter on non-indexed field should return None
+        filters = {"name": "test"}
+        result = processor.find_matching_index(schema, filters)
+        assert result is None

    @pytest.mark.asyncio
    async def test_schema_config_parsing(self):
        """Test parsing of schema configuration"""
        processor = MagicMock()
        processor.schemas = {}
-        processor.graphql_types = {}
-        processor.graphql_schema = None
-        processor.config_key = "schema"  # Set the config key
-        processor.generate_graphql_schema = AsyncMock()
+        processor.config_key = "schema"
+        processor.schema_builder = MagicMock()
+        processor.schema_builder.clear = MagicMock()
+        processor.schema_builder.add_schema = MagicMock()
+        processor.schema_builder.build = MagicMock(return_value=MagicMock())
        processor.on_schema_config = Processor.on_schema_config.__get__(processor, Processor)
-        
+
        # Create test config
        schema_config = {
            "schema": {
@ -154,96 +127,29 @@ class TestObjectsGraphQLQueryLogic:
                })
            }
        }
-        
+
        # Process config
        await processor.on_schema_config(schema_config, version=1)
-        
+
        # Verify schema was loaded
        assert "customer" in processor.schemas
        schema = processor.schemas["customer"]
        assert schema.name == "customer"
        assert len(schema.fields) == 3
-        
+
        # Verify fields
        id_field = next(f for f in schema.fields if f.name == "id")
        assert id_field.primary is True
-        # The field should have been created correctly from JSON  
-        # Let's test what we can verify - that the field has the right attributes
-        assert hasattr(id_field, 'required')  # Has the required attribute
-        assert hasattr(id_field, 'primary')   # Has the primary attribute
-        
+
        email_field = next(f for f in schema.fields if f.name == "email")
        assert email_field.indexed is True
-        
+
        status_field = next(f for f in schema.fields if f.name == "status")
        assert status_field.enum_values == ["active", "inactive"]
-        
-        # Verify GraphQL schema regeneration was called
-        processor.generate_graphql_schema.assert_called_once()

-    def test_cql_query_building_basic(self):
-        """Test basic CQL query construction"""
-        processor = MagicMock()
-        processor.session = MagicMock()
-        processor.connect_cassandra = MagicMock()
-        processor.sanitize_name = Processor.sanitize_name.__get__(processor, Processor)
-        processor.sanitize_table = Processor.sanitize_table.__get__(processor, Processor)
-        processor.parse_filter_key = Processor.parse_filter_key.__get__(processor, Processor)
-        processor.query_cassandra = Processor.query_cassandra.__get__(processor, Processor)
-        
-        # Mock session execute to capture the query
-        mock_result = []
-        processor.session.execute.return_value = mock_result
-        
-        # Create test schema
-        schema = RowSchema(
-            name="test_table",
-            fields=[
-                Field(name="id", type="string", primary=True),
-                Field(name="name", type="string", indexed=True),
-                Field(name="status", type="string")
-            ]
-        )
-        
-        # Test query building
-        asyncio = pytest.importorskip("asyncio")
-        
-        async def run_test():
-            await processor.query_cassandra(
-                user="test_user",
-                collection="test_collection", 
-                schema_name="test_table",
-                row_schema=schema,
-                filters={"name": "John", "invalid_filter": "ignored"},
-                limit=10
-            )
-        
-        # Run the async test
-        loop = asyncio.new_event_loop()
-        asyncio.set_event_loop(loop)
-        try:
-            loop.run_until_complete(run_test())
-        finally:
-            loop.close()
-        
-        # Verify Cassandra connection and query execution
-        processor.connect_cassandra.assert_called_once()
-        processor.session.execute.assert_called_once()
-        
-        # Verify the query structure (can't easily test exact query without complex mocking)
-        call_args = processor.session.execute.call_args
-        query = call_args[0][0]  # First positional argument is the query
-        params = call_args[0][1]  # Second positional argument is parameters
-        
-        # Basic query structure checks
-        assert "SELECT * FROM test_user.o_test_table" in query
-        assert "WHERE" in query
-        assert "collection = %s" in query
-        assert "LIMIT 10" in query
-        
-        # Parameters should include collection and name filter
-        assert "test_collection" in params
-        assert "John" in params
+        # Verify schema builder was called
+        processor.schema_builder.add_schema.assert_called_once()
+        processor.schema_builder.build.assert_called_once()

    @pytest.mark.asyncio
    async def test_graphql_context_handling(self):
@ -251,13 +157,13 @@ class TestObjectsGraphQLQueryLogic:
        processor = MagicMock()
        processor.graphql_schema = AsyncMock()
        processor.execute_graphql_query = Processor.execute_graphql_query.__get__(processor, Processor)
-        
+
        # Mock schema execution
        mock_result = MagicMock()
        mock_result.data = {"customers": [{"id": "1", "name": "Test"}]}
        mock_result.errors = None
        processor.graphql_schema.execute.return_value = mock_result
-        
+
        result = await processor.execute_graphql_query(
            query='{ customers { id name } }',
            variables={},
@ -265,17 +171,17 @@ class TestObjectsGraphQLQueryLogic:
            user="test_user",
            collection="test_collection"
        )
-        
+
        # Verify schema.execute was called with correct context
        processor.graphql_schema.execute.assert_called_once()
        call_args = processor.graphql_schema.execute.call_args
-        
+
        # Verify context was passed
-        context = call_args[1]['context_value']  # keyword argument
+        context = call_args[1]['context_value']
        assert context["processor"] == processor
        assert context["user"] == "test_user"
        assert context["collection"] == "test_collection"
-        
+
        # Verify result structure
        assert "data" in result
        assert result["data"] == {"customers": [{"id": "1", "name": "Test"}]}
@ -286,104 +192,79 @@ class TestObjectsGraphQLQueryLogic:
        processor = MagicMock()
        processor.graphql_schema = AsyncMock()
        processor.execute_graphql_query = Processor.execute_graphql_query.__get__(processor, Processor)
-        
-        # Create a simple object to simulate GraphQL error instead of MagicMock
+
+        # Create a simple object to simulate GraphQL error
        class MockError:
            def __init__(self, message, path, extensions):
                self.message = message
                self.path = path
                self.extensions = extensions
-            
+
            def __str__(self):
                return self.message
-        
+
        mock_error = MockError(
            message="Field 'invalid_field' doesn't exist",
            path=["customers", "0", "invalid_field"],
            extensions={"code": "FIELD_NOT_FOUND"}
        )
-        
+
        mock_result = MagicMock()
        mock_result.data = None
        mock_result.errors = [mock_error]
        processor.graphql_schema.execute.return_value = mock_result
-        
+
        result = await processor.execute_graphql_query(
            query='{ customers { invalid_field } }',
            variables={},
            operation_name=None,
-            user="test_user", 
+            user="test_user",
            collection="test_collection"
        )
-        
+
        # Verify error handling
        assert "errors" in result
        assert len(result["errors"]) == 1
-        
+
        error = result["errors"][0]
        assert error["message"] == "Field 'invalid_field' doesn't exist"
-        assert error["path"] == ["customers", "0", "invalid_field"]  # Fixed to match string path
+        assert error["path"] == ["customers", "0", "invalid_field"]
        assert error["extensions"] == {"code": "FIELD_NOT_FOUND"}

-    def test_schema_generation_basic_structure(self):
-        """Test basic GraphQL schema generation structure"""
-        processor = MagicMock()
-        processor.schemas = {
-            "customer": RowSchema(
-                name="customer",
-                fields=[
-                    Field(name="id", type="string", primary=True),
-                    Field(name="name", type="string")
-                ]
-            )
-        }
-        processor.graphql_types = {}
-        processor.get_python_type = Processor.get_python_type.__get__(processor, Processor)
-        processor.create_graphql_type = Processor.create_graphql_type.__get__(processor, Processor)
-        
-        # Test individual type creation (avoiding the full schema generation which has annotation issues)
-        graphql_type = processor.create_graphql_type("customer", processor.schemas["customer"])
-        processor.graphql_types["customer"] = graphql_type
-        
-        # Verify type was created
-        assert len(processor.graphql_types) == 1
-        assert "customer" in processor.graphql_types
-        assert processor.graphql_types["customer"] is not None
-
    @pytest.mark.asyncio
    async def test_message_processing_success(self):
        """Test successful message processing flow"""
        processor = MagicMock()
        processor.execute_graphql_query = AsyncMock()
        processor.on_message = Processor.on_message.__get__(processor, Processor)
-        
+
        # Mock successful query result
        processor.execute_graphql_query.return_value = {
            "data": {"customers": [{"id": "1", "name": "John"}]},
            "errors": [],
-            "extensions": {"execution_time": "0.1"}  # Extensions must be strings for Map(String())
+            "extensions": {}
        }
-        
+
        # Create mock message
        mock_msg = MagicMock()
-        mock_request = ObjectsQueryRequest(
+        mock_request = RowsQueryRequest(
            user="test_user",
-            collection="test_collection", 
+            collection="test_collection",
            query='{ customers { id name } }',
            variables={},
            operation_name=None
        )
        mock_msg.value.return_value = mock_request
        mock_msg.properties.return_value = {"id": "test-123"}
-        
+
        # Mock flow
        mock_flow = MagicMock()
        mock_response_flow = AsyncMock()
        mock_flow.return_value = mock_response_flow
-        
+
        # Process message
        await processor.on_message(mock_msg, None, mock_flow)
-        
+
        # Verify query was executed
        processor.execute_graphql_query.assert_called_once_with(
            query='{ customers { id name } }',
@ -392,13 +273,13 @@ class TestObjectsGraphQLQueryLogic:
            user="test_user",
            collection="test_collection"
        )
-        
+
        # Verify response was sent
        mock_response_flow.send.assert_called_once()
        response_call = mock_response_flow.send.call_args[0][0]
-        
+
        # Verify response structure
-        assert isinstance(response_call, ObjectsQueryResponse)
+        assert isinstance(response_call, RowsQueryResponse)
        assert response_call.error is None
        assert '"customers"' in response_call.data  # JSON encoded
        assert len(response_call.errors) == 0
@ -409,13 +290,13 @@ class TestObjectsGraphQLQueryLogic:
        processor = MagicMock()
        processor.execute_graphql_query = AsyncMock()
        processor.on_message = Processor.on_message.__get__(processor, Processor)
-        
+
        # Mock query execution error
        processor.execute_graphql_query.side_effect = RuntimeError("No schema available")
-        
+
        # Create mock message
        mock_msg = MagicMock()
-        mock_request = ObjectsQueryRequest(
+        mock_request = RowsQueryRequest(
            user="test_user",
            collection="test_collection",
            query='{ invalid_query }',
@ -424,67 +305,225 @@ class TestObjectsGraphQLQueryLogic:
        )
        mock_msg.value.return_value = mock_request
        mock_msg.properties.return_value = {"id": "test-456"}
-        
+
        # Mock flow
        mock_flow = MagicMock()
        mock_response_flow = AsyncMock()
        mock_flow.return_value = mock_response_flow
-        
+
        # Process message
        await processor.on_message(mock_msg, None, mock_flow)
-        
+
        # Verify error response was sent
        mock_response_flow.send.assert_called_once()
        response_call = mock_response_flow.send.call_args[0][0]
-        
+
        # Verify error response structure
-        assert isinstance(response_call, ObjectsQueryResponse)
+        assert isinstance(response_call, RowsQueryResponse)
        assert response_call.error is not None
-        assert response_call.error.type == "objects-query-error"
+        assert response_call.error.type == "rows-query-error"
        assert "No schema available" in response_call.error.message
        assert response_call.data is None


-class TestCQLQueryGeneration:
-    """Test CQL query generation logic in isolation"""
-    
-    def test_partition_key_inclusion(self):
-        """Test that collection is always included in queries"""
+class TestUnifiedTableQueries:
+    """Test queries against the unified rows table"""
+
+    @pytest.mark.asyncio
+    async def test_query_with_index_match(self):
+        """Test query execution with matching index"""
        processor = MagicMock()
+        processor.session = MagicMock()
+        processor.connect_cassandra = MagicMock()
        processor.sanitize_name = Processor.sanitize_name.__get__(processor, Processor)
-        processor.sanitize_table = Processor.sanitize_table.__get__(processor, Processor)
-        
-        # Mock the query building (simplified version)
-        keyspace = processor.sanitize_name("test_user")
-        table = processor.sanitize_table("test_table")
-        
-        query = f"SELECT * FROM {keyspace}.{table}"
-        where_clauses = ["collection = %s"]
-        
-        assert "collection = %s" in where_clauses
-        assert keyspace == "test_user"
-        assert table == "o_test_table"
-    
+        processor.get_index_names = Processor.get_index_names.__get__(processor, Processor)
+        processor.find_matching_index = Processor.find_matching_index.__get__(processor, Processor)
+        processor.query_cassandra = Processor.query_cassandra.__get__(processor, Processor)
+
+        # Mock session execute to return test data
+        mock_row = MagicMock()
+        mock_row.data = {"id": "123", "name": "Test Product", "category": "electronics"}
+        processor.session.execute.return_value = [mock_row]
+
+        schema = RowSchema(
+            name="products",
+            fields=[
+                Field(name="id", type="string", primary=True),
+                Field(name="category", type="string", indexed=True),
+                Field(name="name", type="string")
+            ]
+        )
+
+        # Query with filter on indexed field
+        results = await processor.query_cassandra(
+            user="test_user",
+            collection="test_collection",
+            schema_name="products",
+            row_schema=schema,
+            filters={"category": "electronics"},
+            limit=10
+        )
+
+        # Verify Cassandra was connected and queried
+        processor.connect_cassandra.assert_called_once()
+        processor.session.execute.assert_called_once()
+
+        # Verify query structure - should query unified rows table
+        call_args = processor.session.execute.call_args
+        query = call_args[0][0]
+        params = call_args[0][1]
+
+        assert "SELECT data, source FROM test_user.rows" in query
+        assert "collection = %s" in query
+        assert "schema_name = %s" in query
+        assert "index_name = %s" in query
+        assert "index_value = %s" in query
+
+        assert params[0] == "test_collection"
+        assert params[1] == "products"
+        assert params[2] == "category"
+        assert params[3] == ["electronics"]
+
+        # Verify results
+        assert len(results) == 1
+        assert results[0]["id"] == "123"
+        assert results[0]["category"] == "electronics"
+
+    @pytest.mark.asyncio
+    async def test_query_without_index_match(self):
+        """Test query execution without matching index (scan mode)"""
+        processor = MagicMock()
+        processor.session = MagicMock()
+        processor.connect_cassandra = MagicMock()
+        processor.sanitize_name = Processor.sanitize_name.__get__(processor, Processor)
+        processor.get_index_names = Processor.get_index_names.__get__(processor, Processor)
+        processor.find_matching_index = Processor.find_matching_index.__get__(processor, Processor)
+        processor._matches_filters = Processor._matches_filters.__get__(processor, Processor)
+        processor.query_cassandra = Processor.query_cassandra.__get__(processor, Processor)
+
+        # Mock session execute to return test data
+        mock_row1 = MagicMock()
+        mock_row1.data = {"id": "1", "name": "Product A", "price": "100"}
+        mock_row2 = MagicMock()
+        mock_row2.data = {"id": "2", "name": "Product B", "price": "200"}
+        processor.session.execute.return_value = [mock_row1, mock_row2]
+
+        schema = RowSchema(
+            name="products",
+            fields=[
+                Field(name="id", type="string", primary=True),
+                Field(name="name", type="string"),  # Not indexed
+                Field(name="price", type="string")  # Not indexed
+            ]
+        )
+
+        # Query with filter on non-indexed field
+        results = await processor.query_cassandra(
+            user="test_user",
+            collection="test_collection",
+            schema_name="products",
+            row_schema=schema,
+            filters={"name": "Product A"},
+            limit=10
+        )
+
+        # Query should use ALLOW FILTERING for scan
+        call_args = processor.session.execute.call_args
+        query = call_args[0][0]
+
+        assert "ALLOW FILTERING" in query
+
+        # Should post-filter results
+        assert len(results) == 1
+        assert results[0]["name"] == "Product A"
+
+
+class TestFilterMatching:
+    """Test filter matching logic"""
+
+    def test_matches_filters_exact_match(self):
+        """Test exact match filter"""
+        processor = MagicMock()
+        processor._matches_filters = Processor._matches_filters.__get__(processor, Processor)
+
+        schema = RowSchema(name="test", fields=[Field(name="status", type="string")])
+
+        row = {"status": "active", "name": "test"}
+        assert processor._matches_filters(row, {"status": "active"}, schema) is True
+        assert processor._matches_filters(row, {"status": "inactive"}, schema) is False
+
+    def test_matches_filters_comparison_operators(self):
+        """Test comparison operators in filters"""
+        processor = MagicMock()
+        processor._matches_filters = Processor._matches_filters.__get__(processor, Processor)
+
+        schema = RowSchema(name="test", fields=[Field(name="price", type="float")])
+
+        row = {"price": "100.0"}
+
+        # Greater than
+        assert processor._matches_filters(row, {"price_gt": 50}, schema) is True
+        assert processor._matches_filters(row, {"price_gt": 150}, schema) is False
+
+        # Less than
+        assert processor._matches_filters(row, {"price_lt": 150}, schema) is True
+        assert processor._matches_filters(row, {"price_lt": 50}, schema) is False
+
+        # Greater than or equal
+        assert processor._matches_filters(row, {"price_gte": 100}, schema) is True
+        assert processor._matches_filters(row, {"price_gte": 101}, schema) is False
+
+        # Less than or equal
+        assert processor._matches_filters(row, {"price_lte": 100}, schema) is True
+        assert processor._matches_filters(row, {"price_lte": 99}, schema) is False
+
+    def test_matches_filters_contains(self):
+        """Test contains filter"""
+        processor = MagicMock()
+        processor._matches_filters = Processor._matches_filters.__get__(processor, Processor)
+
+        schema = RowSchema(name="test", fields=[Field(name="description", type="string")])
+
+        row = {"description": "A great product for everyone"}
+
+        assert processor._matches_filters(row, {"description_contains": "great"}, schema) is True
+        assert processor._matches_filters(row, {"description_contains": "terrible"}, schema) is False
+
+    def test_matches_filters_in_list(self):
+        """Test in-list filter"""
+        processor = MagicMock()
+        processor._matches_filters = Processor._matches_filters.__get__(processor, Processor)
+
+        schema = RowSchema(name="test", fields=[Field(name="status", type="string")])
+
+        row = {"status": "active"}
+
+        assert processor._matches_filters(row, {"status_in": ["active", "pending"]}, schema) is True
+        assert processor._matches_filters(row, {"status_in": ["inactive", "deleted"]}, schema) is False
+
+
+class TestIndexedFieldFiltering:
+    """Test that only indexed or primary key fields can be directly filtered"""
+
    def test_indexed_field_filtering(self):
        """Test that only indexed or primary key fields can be filtered"""
-        # Create schema with mixed field types
        schema = RowSchema(
            name="test",
            fields=[
                Field(name="id", type="string", primary=True),
-                Field(name="indexed_field", type="string", indexed=True), 
+                Field(name="indexed_field", type="string", indexed=True),
                Field(name="normal_field", type="string", indexed=False),
                Field(name="another_field", type="string")
            ]
        )
-        
+
        filters = {
            "id": "test123",  # Primary key - should be included
            "indexed_field": "value",  # Indexed - should be included
            "normal_field": "ignored",  # Not indexed - should be ignored
            "another_field": "also_ignored"  # Not indexed - should be ignored
        }
-        
+
        # Simulate the filtering logic from the processor
        valid_filters = []
        for field_name, value in filters.items():
@ -492,7 +531,7 @@ class TestCQLQueryGeneration:
                schema_field = next((f for f in schema.fields if f.name == field_name), None)
                if schema_field and (schema_field.indexed or schema_field.primary):
                    valid_filters.append((field_name, value))
-        
+
        # Only id and indexed_field should be included
        assert len(valid_filters) == 2
        field_names = [f[0] for f in valid_filters]
@ -500,52 +539,3 @@ class TestCQLQueryGeneration:
        assert "indexed_field" in field_names
        assert "normal_field" not in field_names
        assert "another_field" not in field_names
-
-
-class TestGraphQLSchemaGeneration:
-    """Test GraphQL schema generation in detail"""
-    
-    def test_field_type_annotations(self):
-        """Test that GraphQL types have correct field annotations"""
-        processor = MagicMock()
-        processor.get_python_type = Processor.get_python_type.__get__(processor, Processor)
-        processor.create_graphql_type = Processor.create_graphql_type.__get__(processor, Processor)
-        
-        # Create schema with various field types
-        schema = RowSchema(
-            name="test",
-            fields=[
-                Field(name="id", type="string", required=True, primary=True),
-                Field(name="count", type="integer", required=True),
-                Field(name="price", type="float", required=False),
-                Field(name="active", type="boolean", required=False),
-                Field(name="optional_text", type="string", required=False)
-            ]
-        )
-        
-        # Create GraphQL type
-        graphql_type = processor.create_graphql_type("test", schema)
-        
-        # Verify type was created successfully
-        assert graphql_type is not None
-    
-    def test_basic_type_creation(self):
-        """Test that GraphQL types are created correctly"""
-        processor = MagicMock()
-        processor.schemas = {
-            "customer": RowSchema(
-                name="customer",
-                fields=[Field(name="id", type="string", primary=True)]
-            )
-        }
-        processor.graphql_types = {}
-        processor.get_python_type = Processor.get_python_type.__get__(processor, Processor)
-        processor.create_graphql_type = Processor.create_graphql_type.__get__(processor, Processor)
-        
-        # Create GraphQL type directly
-        graphql_type = processor.create_graphql_type("customer", processor.schemas["customer"])
-        processor.graphql_types["customer"] = graphql_type
-        
-        # Verify customer type was created
-        assert "customer" in processor.graphql_types
-        assert processor.graphql_types["customer"] is not None