Structured data 2 (#645)

* Structured data refactor - multi-index tables, remove need for manual mods to the Cassandra tables * Tech spec updated to track implementation
2026-06-29 00:19:39 +02:00 · 2026-02-23 15:56:29 +00:00 · 2026-02-23 15:56:29 +00:00 · 1809c1f56d
commit 1809c1f56d
parent 5ffad92345
87 changed files with 5233 additions and 3235 deletions
--- a/tests/unit/test_storage/test_cassandra_config_integration.py
+++ b/tests/unit/test_storage/test_cassandra_config_integration.py
@ -10,7 +10,7 @@ import pytest
 from unittest.mock import Mock, patch, MagicMock

 from trustgraph.storage.triples.cassandra.write import Processor as TriplesWriter
-from trustgraph.storage.objects.cassandra.write import Processor as ObjectsWriter
+from trustgraph.storage.rows.cassandra.write import Processor as RowsWriter
 from trustgraph.query.triples.cassandra.service import Processor as TriplesQuery
 from trustgraph.storage.knowledge.store import Processor as KgStore

@ -81,10 +81,10 @@ class TestTriplesWriterConfiguration:
            assert processor.cassandra_password is None


-class TestObjectsWriterConfiguration:
+class TestRowsWriterConfiguration:
    """Test Cassandra configuration in objects writer processor."""
    
-    @patch('trustgraph.storage.objects.cassandra.write.Cluster')
+    @patch('trustgraph.storage.rows.cassandra.write.Cluster')
    def test_environment_variable_configuration(self, mock_cluster):
        """Test processor picks up configuration from environment variables."""
        env_vars = {
@ -97,13 +97,13 @@ class TestObjectsWriterConfiguration:
        mock_cluster.return_value = mock_cluster_instance
        
        with patch.dict(os.environ, env_vars, clear=True):
-            processor = ObjectsWriter(taskgroup=MagicMock())
+            processor = RowsWriter(taskgroup=MagicMock())
            
            assert processor.cassandra_host == ['obj-env-host1', 'obj-env-host2']
            assert processor.cassandra_username == 'obj-env-user'
            assert processor.cassandra_password == 'obj-env-pass'
    
-    @patch('trustgraph.storage.objects.cassandra.write.Cluster')
+    @patch('trustgraph.storage.rows.cassandra.write.Cluster')
    def test_cassandra_connection_with_hosts_list(self, mock_cluster):
        """Test that Cassandra connection uses hosts list correctly."""
        env_vars = {
@ -118,7 +118,7 @@ class TestObjectsWriterConfiguration:
        mock_cluster.return_value = mock_cluster_instance
        
        with patch.dict(os.environ, env_vars, clear=True):
-            processor = ObjectsWriter(taskgroup=MagicMock())
+            processor = RowsWriter(taskgroup=MagicMock())
            processor.connect_cassandra()
            
            # Verify cluster was called with hosts list
@ -129,8 +129,8 @@ class TestObjectsWriterConfiguration:
            assert 'contact_points' in call_args.kwargs
            assert call_args.kwargs['contact_points'] == ['conn-host1', 'conn-host2', 'conn-host3']
    
-    @patch('trustgraph.storage.objects.cassandra.write.Cluster')
-    @patch('trustgraph.storage.objects.cassandra.write.PlainTextAuthProvider')
+    @patch('trustgraph.storage.rows.cassandra.write.Cluster')
+    @patch('trustgraph.storage.rows.cassandra.write.PlainTextAuthProvider')
    def test_authentication_configuration(self, mock_auth_provider, mock_cluster):
        """Test authentication is configured when credentials are provided."""
        env_vars = {
@ -145,7 +145,7 @@ class TestObjectsWriterConfiguration:
        mock_cluster.return_value = mock_cluster_instance
        
        with patch.dict(os.environ, env_vars, clear=True):
-            processor = ObjectsWriter(taskgroup=MagicMock())
+            processor = RowsWriter(taskgroup=MagicMock())
            processor.connect_cassandra()
            
            # Verify auth provider was created with correct credentials
@ -302,10 +302,10 @@ class TestCommandLineArgumentHandling:
    def test_objects_writer_add_args(self):
        """Test that objects writer adds standard Cassandra arguments."""
        import argparse
-        from trustgraph.storage.objects.cassandra.write import Processor as ObjectsWriter
+        from trustgraph.storage.rows.cassandra.write import Processor as RowsWriter
        
        parser = argparse.ArgumentParser()
-        ObjectsWriter.add_args(parser)
+        RowsWriter.add_args(parser)
        
        # Parse empty args to check that arguments exist
        args = parser.parse_args([])
--- a/tests/unit/test_storage/test_objects_cassandra_storage.py
+++ b/tests/unit/test_storage/test_objects_cassandra_storage.py
@ -1,533 +0,0 @@
-"""
-Unit tests for Cassandra Object Storage Processor
-
-Tests the business logic of the object storage processor including:
- Schema configuration handling
- Type conversions
- Name sanitization
- Table structure generation
-"""
-
-import pytest
-from unittest.mock import MagicMock, AsyncMock, patch
-import json
-
-from trustgraph.storage.objects.cassandra.write import Processor
-from trustgraph.schema import ExtractedObject, Metadata, RowSchema, Field
-
-
-class TestObjectsCassandraStorageLogic:
-    """Test business logic without FlowProcessor dependencies"""
-
-    def test_sanitize_name(self):
-        """Test name sanitization for Cassandra compatibility"""
-        processor = MagicMock()
-        processor.sanitize_name = Processor.sanitize_name.__get__(processor, Processor)
-        
-        # Test various name patterns (back to original logic)
-        assert processor.sanitize_name("simple_name") == "simple_name"
-        assert processor.sanitize_name("Name-With-Dashes") == "name_with_dashes"
-        assert processor.sanitize_name("name.with.dots") == "name_with_dots"
-        assert processor.sanitize_name("123_starts_with_number") == "o_123_starts_with_number"
-        assert processor.sanitize_name("name with spaces") == "name_with_spaces"
-        assert processor.sanitize_name("special!@#$%^chars") == "special______chars"
-
-    def test_get_cassandra_type(self):
-        """Test field type conversion to Cassandra types"""
-        processor = MagicMock()
-        processor.get_cassandra_type = Processor.get_cassandra_type.__get__(processor, Processor)
-        
-        # Basic type mappings
-        assert processor.get_cassandra_type("string") == "text"
-        assert processor.get_cassandra_type("boolean") == "boolean"
-        assert processor.get_cassandra_type("timestamp") == "timestamp"
-        assert processor.get_cassandra_type("uuid") == "uuid"
-        
-        # Integer types with size hints
-        assert processor.get_cassandra_type("integer", size=2) == "int"
-        assert processor.get_cassandra_type("integer", size=8) == "bigint"
-        
-        # Float types with size hints
-        assert processor.get_cassandra_type("float", size=2) == "float"
-        assert processor.get_cassandra_type("float", size=8) == "double"
-        
-        # Unknown type defaults to text
-        assert processor.get_cassandra_type("unknown_type") == "text"
-
-    def test_convert_value(self):
-        """Test value conversion for different field types"""
-        processor = MagicMock()
-        processor.convert_value = Processor.convert_value.__get__(processor, Processor)
-        
-        # Integer conversions
-        assert processor.convert_value("123", "integer") == 123
-        assert processor.convert_value(123.5, "integer") == 123
-        assert processor.convert_value(None, "integer") is None
-        
-        # Float conversions
-        assert processor.convert_value("123.45", "float") == 123.45
-        assert processor.convert_value(123, "float") == 123.0
-        
-        # Boolean conversions
-        assert processor.convert_value("true", "boolean") is True
-        assert processor.convert_value("false", "boolean") is False
-        assert processor.convert_value("1", "boolean") is True
-        assert processor.convert_value("0", "boolean") is False
-        assert processor.convert_value("yes", "boolean") is True
-        assert processor.convert_value("no", "boolean") is False
-        
-        # String conversions
-        assert processor.convert_value(123, "string") == "123"
-        assert processor.convert_value(True, "string") == "True"
-
-    def test_table_creation_cql_generation(self):
-        """Test CQL generation for table creation"""
-        processor = MagicMock()
-        processor.schemas = {}
-        processor.known_keyspaces = set()
-        processor.known_tables = {}
-        processor.session = MagicMock()
-        processor.sanitize_name = Processor.sanitize_name.__get__(processor, Processor)
-        processor.sanitize_table = Processor.sanitize_table.__get__(processor, Processor)
-        processor.get_cassandra_type = Processor.get_cassandra_type.__get__(processor, Processor)
-        def mock_ensure_keyspace(keyspace):
-            processor.known_keyspaces.add(keyspace)
-            processor.known_tables[keyspace] = set()
-        processor.ensure_keyspace = mock_ensure_keyspace
-        processor.ensure_table = Processor.ensure_table.__get__(processor, Processor)
-        
-        # Create test schema
-        schema = RowSchema(
-            name="customer_records",
-            description="Test customer schema",
-            fields=[
-                Field(
-                    name="customer_id",
-                    type="string",
-                    size=50,
-                    primary=True,
-                    required=True,
-                    indexed=False
-                ),
-                Field(
-                    name="email",
-                    type="string",
-                    size=100,
-                    required=True,
-                    indexed=True
-                ),
-                Field(
-                    name="age",
-                    type="integer",
-                    size=4,
-                    required=False,
-                    indexed=False
-                )
-            ]
-        )
-        
-        # Call ensure_table
-        processor.ensure_table("test_user", "customer_records", schema)
-        
-        # Verify keyspace was ensured (check that it was added to known_keyspaces)
-        assert "test_user" in processor.known_keyspaces
-        
-        # Check the CQL that was executed (first call should be table creation)
-        all_calls = processor.session.execute.call_args_list
-        table_creation_cql = all_calls[0][0][0]  # First call
-        
-        # Verify table structure (keyspace uses sanitize_name, table uses sanitize_table)
-        assert "CREATE TABLE IF NOT EXISTS test_user.o_customer_records" in table_creation_cql
-        assert "collection text" in table_creation_cql
-        assert "customer_id text" in table_creation_cql
-        assert "email text" in table_creation_cql
-        assert "age int" in table_creation_cql
-        assert "PRIMARY KEY ((collection, customer_id))" in table_creation_cql
-
-    def test_table_creation_without_primary_key(self):
-        """Test table creation when no primary key is defined"""
-        processor = MagicMock()
-        processor.schemas = {}
-        processor.known_keyspaces = set()
-        processor.known_tables = {}
-        processor.session = MagicMock()
-        processor.sanitize_name = Processor.sanitize_name.__get__(processor, Processor)
-        processor.sanitize_table = Processor.sanitize_table.__get__(processor, Processor)
-        processor.get_cassandra_type = Processor.get_cassandra_type.__get__(processor, Processor)
-        def mock_ensure_keyspace(keyspace):
-            processor.known_keyspaces.add(keyspace)
-            processor.known_tables[keyspace] = set()
-        processor.ensure_keyspace = mock_ensure_keyspace
-        processor.ensure_table = Processor.ensure_table.__get__(processor, Processor)
-        
-        # Create schema without primary key
-        schema = RowSchema(
-            name="events",
-            description="Event log",
-            fields=[
-                Field(name="event_type", type="string", size=50),
-                Field(name="timestamp", type="timestamp", size=0)
-            ]
-        )
-        
-        # Call ensure_table
-        processor.ensure_table("test_user", "events", schema)
-        
-        # Check the CQL includes synthetic_id (field names don't get o_ prefix)
-        executed_cql = processor.session.execute.call_args[0][0]
-        assert "synthetic_id uuid" in executed_cql
-        assert "PRIMARY KEY ((collection, synthetic_id))" in executed_cql
-
-    @pytest.mark.asyncio
-    async def test_schema_config_parsing(self):
-        """Test parsing of schema configurations"""
-        processor = MagicMock()
-        processor.schemas = {}
-        processor.config_key = "schema"
-        processor.on_schema_config = Processor.on_schema_config.__get__(processor, Processor)
-        
-        # Create test configuration
-        config = {
-            "schema": {
-                "customer_records": json.dumps({
-                    "name": "customer_records",
-                    "description": "Customer data",
-                    "fields": [
-                        {
-                            "name": "id",
-                            "type": "string",
-                            "primary_key": True,
-                            "required": True
-                        },
-                        {
-                            "name": "name",
-                            "type": "string",
-                            "required": True
-                        },
-                        {
-                            "name": "balance",
-                            "type": "float",
-                            "size": 8
-                        }
-                    ]
-                })
-            }
-        }
-        
-        # Process configuration
-        await processor.on_schema_config(config, version=1)
-        
-        # Verify schema was loaded
-        assert "customer_records" in processor.schemas
-        schema = processor.schemas["customer_records"]
-        assert schema.name == "customer_records"
-        assert len(schema.fields) == 3
-        
-        # Check field properties
-        id_field = schema.fields[0]
-        assert id_field.name == "id"
-        assert id_field.type == "string"
-        assert id_field.primary is True
-        # Note: Field.required always returns False due to Pulsar schema limitations
-        # The actual required value is tracked during schema parsing
-
-    @pytest.mark.asyncio
-    async def test_object_processing_logic(self):
-        """Test the logic for processing ExtractedObject"""
-        processor = MagicMock()
-        processor.schemas = {
-            "test_schema": RowSchema(
-                name="test_schema",
-                description="Test",
-                fields=[
-                    Field(name="id", type="string", size=50, primary=True),
-                    Field(name="value", type="integer", size=4)
-                ]
-            )
-        }
-        processor.ensure_table = MagicMock()
-        processor.sanitize_name = Processor.sanitize_name.__get__(processor, Processor)
-        processor.sanitize_table = Processor.sanitize_table.__get__(processor, Processor)
-        processor.convert_value = Processor.convert_value.__get__(processor, Processor)
-        processor.session = MagicMock()
-        processor.on_object = Processor.on_object.__get__(processor, Processor)
-        processor.known_keyspaces = {"test_user"}  # Pre-populate to skip validation query
-        processor.known_tables = {"test_user": set()}  # Pre-populate
-        
-        # Create test object
-        test_obj = ExtractedObject(
-            metadata=Metadata(
-                id="test-001",
-                user="test_user",
-                collection="test_collection",
-                metadata=[]
-            ),
-            schema_name="test_schema",
-            values=[{"id": "123", "value": "456"}],
-            confidence=0.9,
-            source_span="test source"
-        )
-        
-        # Create mock message
-        msg = MagicMock()
-        msg.value.return_value = test_obj
-        
-        # Process object
-        await processor.on_object(msg, None, None)
-        
-        # Verify table was ensured
-        processor.ensure_table.assert_called_once_with("test_user", "test_schema", processor.schemas["test_schema"])
-        
-        # Verify insert was executed (keyspace normal, table with o_ prefix)
-        processor.session.execute.assert_called_once()
-        insert_cql = processor.session.execute.call_args[0][0]
-        values = processor.session.execute.call_args[0][1]
-        
-        assert "INSERT INTO test_user.o_test_schema" in insert_cql
-        assert "collection" in insert_cql
-        assert values[0] == "test_collection"  # collection value
-        assert values[1] == "123"  # id value (from values[0])
-        assert values[2] == 456  # converted integer value (from values[0])
-
-    def test_secondary_index_creation(self):
-        """Test that secondary indexes are created for indexed fields"""
-        processor = MagicMock()
-        processor.schemas = {}
-        processor.known_keyspaces = {"test_user"}  # Pre-populate to skip validation query
-        processor.known_tables = {"test_user": set()}  # Pre-populate
-        processor.session = MagicMock()
-        processor.sanitize_name = Processor.sanitize_name.__get__(processor, Processor)
-        processor.sanitize_table = Processor.sanitize_table.__get__(processor, Processor)
-        processor.get_cassandra_type = Processor.get_cassandra_type.__get__(processor, Processor)
-        def mock_ensure_keyspace(keyspace):
-            processor.known_keyspaces.add(keyspace)
-            if keyspace not in processor.known_tables:
-                processor.known_tables[keyspace] = set()
-        processor.ensure_keyspace = mock_ensure_keyspace
-        processor.ensure_table = Processor.ensure_table.__get__(processor, Processor)
-
-        # Create schema with indexed field
-        schema = RowSchema(
-            name="products",
-            description="Product catalog",
-            fields=[
-                Field(name="product_id", type="string", size=50, primary=True),
-                Field(name="category", type="string", size=30, indexed=True),
-                Field(name="price", type="float", size=8, indexed=True)
-            ]
-        )
-
-        # Call ensure_table
-        processor.ensure_table("test_user", "products", schema)
-
-        # Should have 3 calls: create table + 2 indexes
-        assert processor.session.execute.call_count == 3
-        
-        # Check index creation calls (table has o_ prefix, fields don't)
-        calls = processor.session.execute.call_args_list
-        index_calls = [call[0][0] for call in calls if "CREATE INDEX" in call[0][0]]
-        assert len(index_calls) == 2
-        assert any("o_products_category_idx" in call for call in index_calls)
-        assert any("o_products_price_idx" in call for call in index_calls)
-
-
-class TestObjectsCassandraStorageBatchLogic:
-    """Test batch processing logic in Cassandra storage"""
-
-    @pytest.mark.asyncio
-    async def test_batch_object_processing_logic(self):
-        """Test processing of batch ExtractedObjects"""
-        processor = MagicMock()
-        processor.schemas = {
-            "batch_schema": RowSchema(
-                name="batch_schema",
-                description="Test batch schema",
-                fields=[
-                    Field(name="id", type="string", size=50, primary=True),
-                    Field(name="name", type="string", size=100),
-                    Field(name="value", type="integer", size=4)
-                ]
-            )
-        }
-        processor.known_keyspaces = {"test_user"}  # Pre-populate to skip validation query
-        processor.ensure_table = MagicMock()
-        processor.sanitize_name = Processor.sanitize_name.__get__(processor, Processor)
-        processor.sanitize_table = Processor.sanitize_table.__get__(processor, Processor)
-        processor.convert_value = Processor.convert_value.__get__(processor, Processor)
-        processor.session = MagicMock()
-        processor.on_object = Processor.on_object.__get__(processor, Processor)
-        
-        # Create batch object with multiple values
-        batch_obj = ExtractedObject(
-            metadata=Metadata(
-                id="batch-001",
-                user="test_user",
-                collection="batch_collection", 
-                metadata=[]
-            ),
-            schema_name="batch_schema",
-            values=[
-                {"id": "001", "name": "First", "value": "100"},
-                {"id": "002", "name": "Second", "value": "200"},
-                {"id": "003", "name": "Third", "value": "300"}
-            ],
-            confidence=0.95,
-            source_span="batch source"
-        )
-        
-        # Create mock message
-        msg = MagicMock()
-        msg.value.return_value = batch_obj
-        
-        # Process batch object
-        await processor.on_object(msg, None, None)
-        
-        # Verify table was ensured once
-        processor.ensure_table.assert_called_once_with("test_user", "batch_schema", processor.schemas["batch_schema"])
-        
-        # Verify 3 separate insert calls (one per batch item)
-        assert processor.session.execute.call_count == 3
-        
-        # Check each insert call
-        calls = processor.session.execute.call_args_list
-        for i, call in enumerate(calls):
-            insert_cql = call[0][0]
-            values = call[0][1]
-            
-            assert "INSERT INTO test_user.o_batch_schema" in insert_cql
-            assert "collection" in insert_cql
-            
-            # Check values for each batch item
-            assert values[0] == "batch_collection"  # collection
-            assert values[1] == f"00{i+1}"  # id from batch item i
-            assert values[2] == f"First" if i == 0 else f"Second" if i == 1 else f"Third"  # name
-            assert values[3] == (i+1) * 100  # converted integer value
-
-    @pytest.mark.asyncio  
-    async def test_empty_batch_processing_logic(self):
-        """Test processing of empty batch ExtractedObjects"""
-        processor = MagicMock()
-        processor.schemas = {
-            "empty_schema": RowSchema(
-                name="empty_schema",
-                fields=[Field(name="id", type="string", size=50, primary=True)]
-            )
-        }
-        processor.ensure_table = MagicMock()
-        processor.sanitize_name = Processor.sanitize_name.__get__(processor, Processor)
-        processor.sanitize_table = Processor.sanitize_table.__get__(processor, Processor)
-        processor.convert_value = Processor.convert_value.__get__(processor, Processor)
-        processor.session = MagicMock()
-        processor.on_object = Processor.on_object.__get__(processor, Processor)
-        processor.known_keyspaces = {"test_user"}  # Pre-populate to skip validation query
-        processor.known_tables = {"test_user": set()}  # Pre-populate
-        
-        # Create empty batch object
-        empty_batch_obj = ExtractedObject(
-            metadata=Metadata(
-                id="empty-001",
-                user="test_user",
-                collection="empty_collection",
-                metadata=[]
-            ),
-            schema_name="empty_schema",
-            values=[],  # Empty batch
-            confidence=1.0,
-            source_span="empty source"
-        )
-        
-        msg = MagicMock()
-        msg.value.return_value = empty_batch_obj
-        
-        # Process empty batch object
-        await processor.on_object(msg, None, None)
-        
-        # Verify table was ensured
-        processor.ensure_table.assert_called_once()
-        
-        # Verify no insert calls for empty batch
-        processor.session.execute.assert_not_called()
-
-    @pytest.mark.asyncio
-    async def test_single_item_batch_processing_logic(self):
-        """Test processing of single-item batch (backward compatibility)"""
-        processor = MagicMock()
-        processor.schemas = {
-            "single_schema": RowSchema(
-                name="single_schema",
-                fields=[
-                    Field(name="id", type="string", size=50, primary=True),
-                    Field(name="data", type="string", size=100)
-                ]
-            )
-        }
-        processor.ensure_table = MagicMock()
-        processor.sanitize_name = Processor.sanitize_name.__get__(processor, Processor)
-        processor.sanitize_table = Processor.sanitize_table.__get__(processor, Processor)
-        processor.convert_value = Processor.convert_value.__get__(processor, Processor)
-        processor.session = MagicMock()
-        processor.on_object = Processor.on_object.__get__(processor, Processor)
-        processor.known_keyspaces = {"test_user"}  # Pre-populate to skip validation query
-        processor.known_tables = {"test_user": set()}  # Pre-populate
-        
-        # Create single-item batch object (backward compatibility case)
-        single_batch_obj = ExtractedObject(
-            metadata=Metadata(
-                id="single-001",
-                user="test_user",
-                collection="single_collection",
-                metadata=[]
-            ),
-            schema_name="single_schema",
-            values=[{"id": "single-1", "data": "single data"}],  # Array with one item
-            confidence=0.8,
-            source_span="single source"
-        )
-        
-        msg = MagicMock()
-        msg.value.return_value = single_batch_obj
-        
-        # Process single-item batch object
-        await processor.on_object(msg, None, None)
-        
-        # Verify table was ensured
-        processor.ensure_table.assert_called_once()
-        
-        # Verify exactly one insert call
-        processor.session.execute.assert_called_once()
-        
-        insert_cql = processor.session.execute.call_args[0][0]
-        values = processor.session.execute.call_args[0][1]
-        
-        assert "INSERT INTO test_user.o_single_schema" in insert_cql
-        assert values[0] == "single_collection"  # collection
-        assert values[1] == "single-1"  # id value
-        assert values[2] == "single data"  # data value
-
-    def test_batch_value_conversion_logic(self):
-        """Test value conversion works correctly for batch items"""
-        processor = MagicMock()
-        processor.convert_value = Processor.convert_value.__get__(processor, Processor)
-        
-        # Test various conversion scenarios that would occur in batch processing
-        test_cases = [
-            # Integer conversions for batch items
-            ("123", "integer", 123),
-            ("456", "integer", 456), 
-            ("789", "integer", 789),
-            # Float conversions for batch items
-            ("12.5", "float", 12.5),
-            ("34.7", "float", 34.7),
-            # Boolean conversions for batch items  
-            ("true", "boolean", True),
-            ("false", "boolean", False),
-            ("1", "boolean", True),
-            ("0", "boolean", False),
-            # String conversions for batch items
-            (123, "string", "123"),
-            (45.6, "string", "45.6"),
-        ]
-        
-        for input_val, field_type, expected_output in test_cases:
-            result = processor.convert_value(input_val, field_type)
-            assert result == expected_output, f"Failed for {input_val} -> {field_type}: got {result}, expected {expected_output}"
--- a/tests/unit/test_storage/test_row_embeddings_qdrant_storage.py
+++ b/tests/unit/test_storage/test_row_embeddings_qdrant_storage.py
@ -0,0 +1,435 @@
+"""
+Unit tests for trustgraph.storage.row_embeddings.qdrant.write
+Tests the Stage 2 processor that stores pre-computed row embeddings in Qdrant.
+"""
+
+import pytest
+from unittest.mock import AsyncMock, MagicMock, patch
+from unittest import IsolatedAsyncioTestCase
+
+
+class TestQdrantRowEmbeddingsStorage(IsolatedAsyncioTestCase):
+    """Test Qdrant row embeddings storage functionality"""
+
+    @patch('trustgraph.storage.row_embeddings.qdrant.write.QdrantClient')
+    async def test_processor_initialization_basic(self, mock_qdrant_client):
+        """Test basic Qdrant processor initialization"""
+        from trustgraph.storage.row_embeddings.qdrant.write import Processor
+
+        mock_qdrant_instance = MagicMock()
+        mock_qdrant_client.return_value = mock_qdrant_instance
+
+        config = {
+            'store_uri': 'http://localhost:6333',
+            'api_key': 'test-api-key',
+            'taskgroup': AsyncMock(),
+            'id': 'test-qdrant-processor'
+        }
+
+        processor = Processor(**config)
+
+        mock_qdrant_client.assert_called_once_with(
+            url='http://localhost:6333', api_key='test-api-key'
+        )
+        assert hasattr(processor, 'qdrant')
+        assert processor.qdrant == mock_qdrant_instance
+
+    @patch('trustgraph.storage.row_embeddings.qdrant.write.QdrantClient')
+    async def test_processor_initialization_with_defaults(self, mock_qdrant_client):
+        """Test processor initialization with default values"""
+        from trustgraph.storage.row_embeddings.qdrant.write import Processor
+
+        mock_qdrant_instance = MagicMock()
+        mock_qdrant_client.return_value = mock_qdrant_instance
+
+        config = {
+            'taskgroup': AsyncMock(),
+            'id': 'test-qdrant-processor'
+        }
+
+        processor = Processor(**config)
+
+        mock_qdrant_client.assert_called_once_with(
+            url='http://localhost:6333', api_key=None
+        )
+
+    @patch('trustgraph.storage.row_embeddings.qdrant.write.QdrantClient')
+    async def test_sanitize_name(self, mock_qdrant_client):
+        """Test name sanitization for Qdrant collections"""
+        from trustgraph.storage.row_embeddings.qdrant.write import Processor
+
+        mock_qdrant_client.return_value = MagicMock()
+
+        config = {
+            'taskgroup': AsyncMock(),
+            'id': 'test-processor'
+        }
+
+        processor = Processor(**config)
+
+        # Test basic sanitization
+        assert processor.sanitize_name("simple") == "simple"
+        assert processor.sanitize_name("with-dash") == "with_dash"
+        assert processor.sanitize_name("with.dot") == "with_dot"
+        assert processor.sanitize_name("UPPERCASE") == "uppercase"
+
+        # Test numeric prefix handling
+        assert processor.sanitize_name("123start") == "r_123start"
+        assert processor.sanitize_name("_underscore") == "r__underscore"
+
+    @patch('trustgraph.storage.row_embeddings.qdrant.write.QdrantClient')
+    async def test_get_collection_name(self, mock_qdrant_client):
+        """Test Qdrant collection name generation"""
+        from trustgraph.storage.row_embeddings.qdrant.write import Processor
+
+        mock_qdrant_client.return_value = MagicMock()
+
+        config = {
+            'taskgroup': AsyncMock(),
+            'id': 'test-processor'
+        }
+
+        processor = Processor(**config)
+
+        collection_name = processor.get_collection_name(
+            user="test_user",
+            collection="test_collection",
+            schema_name="customer_data",
+            dimension=384
+        )
+
+        assert collection_name == "rows_test_user_test_collection_customer_data_384"
+
+    @patch('trustgraph.storage.row_embeddings.qdrant.write.QdrantClient')
+    async def test_ensure_collection_creates_new(self, mock_qdrant_client):
+        """Test that ensure_collection creates a new collection when needed"""
+        from trustgraph.storage.row_embeddings.qdrant.write import Processor
+
+        mock_qdrant_instance = MagicMock()
+        mock_qdrant_instance.collection_exists.return_value = False
+        mock_qdrant_client.return_value = mock_qdrant_instance
+
+        config = {
+            'taskgroup': AsyncMock(),
+            'id': 'test-processor'
+        }
+
+        processor = Processor(**config)
+
+        processor.ensure_collection("test_collection", 384)
+
+        mock_qdrant_instance.collection_exists.assert_called_once_with("test_collection")
+        mock_qdrant_instance.create_collection.assert_called_once()
+
+        # Verify the collection is cached
+        assert "test_collection" in processor.created_collections
+
+    @patch('trustgraph.storage.row_embeddings.qdrant.write.QdrantClient')
+    async def test_ensure_collection_skips_existing(self, mock_qdrant_client):
+        """Test that ensure_collection skips creation when collection exists"""
+        from trustgraph.storage.row_embeddings.qdrant.write import Processor
+
+        mock_qdrant_instance = MagicMock()
+        mock_qdrant_instance.collection_exists.return_value = True
+        mock_qdrant_client.return_value = mock_qdrant_instance
+
+        config = {
+            'taskgroup': AsyncMock(),
+            'id': 'test-processor'
+        }
+
+        processor = Processor(**config)
+
+        processor.ensure_collection("existing_collection", 384)
+
+        mock_qdrant_instance.collection_exists.assert_called_once()
+        mock_qdrant_instance.create_collection.assert_not_called()
+
+    @patch('trustgraph.storage.row_embeddings.qdrant.write.QdrantClient')
+    async def test_ensure_collection_uses_cache(self, mock_qdrant_client):
+        """Test that ensure_collection uses cache for previously created collections"""
+        from trustgraph.storage.row_embeddings.qdrant.write import Processor
+
+        mock_qdrant_instance = MagicMock()
+        mock_qdrant_client.return_value = mock_qdrant_instance
+
+        config = {
+            'taskgroup': AsyncMock(),
+            'id': 'test-processor'
+        }
+
+        processor = Processor(**config)
+        processor.created_collections.add("cached_collection")
+
+        processor.ensure_collection("cached_collection", 384)
+
+        # Should not check or create - just return
+        mock_qdrant_instance.collection_exists.assert_not_called()
+        mock_qdrant_instance.create_collection.assert_not_called()
+
+    @patch('trustgraph.storage.row_embeddings.qdrant.write.QdrantClient')
+    @patch('trustgraph.storage.row_embeddings.qdrant.write.uuid')
+    async def test_on_embeddings_basic(self, mock_uuid, mock_qdrant_client):
+        """Test processing basic row embeddings message"""
+        from trustgraph.storage.row_embeddings.qdrant.write import Processor
+        from trustgraph.schema import RowEmbeddings, RowIndexEmbedding, Metadata
+
+        mock_qdrant_instance = MagicMock()
+        mock_qdrant_instance.collection_exists.return_value = True
+        mock_qdrant_client.return_value = mock_qdrant_instance
+        mock_uuid.uuid4.return_value = 'test-uuid-123'
+
+        config = {
+            'taskgroup': AsyncMock(),
+            'id': 'test-processor'
+        }
+
+        processor = Processor(**config)
+        processor.known_collections[('test_user', 'test_collection')] = {}
+
+        # Create embeddings message
+        metadata = MagicMock()
+        metadata.user = 'test_user'
+        metadata.collection = 'test_collection'
+        metadata.id = 'doc-123'
+
+        embedding = RowIndexEmbedding(
+            index_name='customer_id',
+            index_value=['CUST001'],
+            text='CUST001',
+            vectors=[[0.1, 0.2, 0.3]]
+        )
+
+        embeddings_msg = RowEmbeddings(
+            metadata=metadata,
+            schema_name='customers',
+            embeddings=[embedding]
+        )
+
+        # Mock message wrapper
+        mock_msg = MagicMock()
+        mock_msg.value.return_value = embeddings_msg
+
+        await processor.on_embeddings(mock_msg, MagicMock(), MagicMock())
+
+        # Verify upsert was called
+        mock_qdrant_instance.upsert.assert_called_once()
+
+        # Verify upsert parameters
+        upsert_call_args = mock_qdrant_instance.upsert.call_args
+        assert upsert_call_args[1]['collection_name'] == 'rows_test_user_test_collection_customers_3'
+
+        point = upsert_call_args[1]['points'][0]
+        assert point.vector == [0.1, 0.2, 0.3]
+        assert point.payload['index_name'] == 'customer_id'
+        assert point.payload['index_value'] == ['CUST001']
+        assert point.payload['text'] == 'CUST001'
+
+    @patch('trustgraph.storage.row_embeddings.qdrant.write.QdrantClient')
+    @patch('trustgraph.storage.row_embeddings.qdrant.write.uuid')
+    async def test_on_embeddings_multiple_vectors(self, mock_uuid, mock_qdrant_client):
+        """Test processing embeddings with multiple vectors"""
+        from trustgraph.storage.row_embeddings.qdrant.write import Processor
+        from trustgraph.schema import RowEmbeddings, RowIndexEmbedding
+
+        mock_qdrant_instance = MagicMock()
+        mock_qdrant_instance.collection_exists.return_value = True
+        mock_qdrant_client.return_value = mock_qdrant_instance
+        mock_uuid.uuid4.return_value = 'test-uuid'
+
+        config = {
+            'taskgroup': AsyncMock(),
+            'id': 'test-processor'
+        }
+
+        processor = Processor(**config)
+        processor.known_collections[('test_user', 'test_collection')] = {}
+
+        metadata = MagicMock()
+        metadata.user = 'test_user'
+        metadata.collection = 'test_collection'
+        metadata.id = 'doc-123'
+
+        # Embedding with multiple vectors
+        embedding = RowIndexEmbedding(
+            index_name='name',
+            index_value=['John Doe'],
+            text='John Doe',
+            vectors=[[0.1, 0.2], [0.3, 0.4], [0.5, 0.6]]
+        )
+
+        embeddings_msg = RowEmbeddings(
+            metadata=metadata,
+            schema_name='people',
+            embeddings=[embedding]
+        )
+
+        mock_msg = MagicMock()
+        mock_msg.value.return_value = embeddings_msg
+
+        await processor.on_embeddings(mock_msg, MagicMock(), MagicMock())
+
+        # Should be called 3 times (once per vector)
+        assert mock_qdrant_instance.upsert.call_count == 3
+
+    @patch('trustgraph.storage.row_embeddings.qdrant.write.QdrantClient')
+    async def test_on_embeddings_skips_empty_vectors(self, mock_qdrant_client):
+        """Test that embeddings with no vectors are skipped"""
+        from trustgraph.storage.row_embeddings.qdrant.write import Processor
+        from trustgraph.schema import RowEmbeddings, RowIndexEmbedding
+
+        mock_qdrant_instance = MagicMock()
+        mock_qdrant_client.return_value = mock_qdrant_instance
+
+        config = {
+            'taskgroup': AsyncMock(),
+            'id': 'test-processor'
+        }
+
+        processor = Processor(**config)
+        processor.known_collections[('test_user', 'test_collection')] = {}
+
+        metadata = MagicMock()
+        metadata.user = 'test_user'
+        metadata.collection = 'test_collection'
+        metadata.id = 'doc-123'
+
+        # Embedding with no vectors
+        embedding = RowIndexEmbedding(
+            index_name='id',
+            index_value=['123'],
+            text='123',
+            vectors=[]  # Empty vectors
+        )
+
+        embeddings_msg = RowEmbeddings(
+            metadata=metadata,
+            schema_name='items',
+            embeddings=[embedding]
+        )
+
+        mock_msg = MagicMock()
+        mock_msg.value.return_value = embeddings_msg
+
+        await processor.on_embeddings(mock_msg, MagicMock(), MagicMock())
+
+        # Should not call upsert for empty vectors
+        mock_qdrant_instance.upsert.assert_not_called()
+
+    @patch('trustgraph.storage.row_embeddings.qdrant.write.QdrantClient')
+    async def test_on_embeddings_drops_unknown_collection(self, mock_qdrant_client):
+        """Test that messages for unknown collections are dropped"""
+        from trustgraph.storage.row_embeddings.qdrant.write import Processor
+        from trustgraph.schema import RowEmbeddings, RowIndexEmbedding
+
+        mock_qdrant_instance = MagicMock()
+        mock_qdrant_client.return_value = mock_qdrant_instance
+
+        config = {
+            'taskgroup': AsyncMock(),
+            'id': 'test-processor'
+        }
+
+        processor = Processor(**config)
+        # No collections registered
+
+        metadata = MagicMock()
+        metadata.user = 'unknown_user'
+        metadata.collection = 'unknown_collection'
+        metadata.id = 'doc-123'
+
+        embedding = RowIndexEmbedding(
+            index_name='id',
+            index_value=['123'],
+            text='123',
+            vectors=[[0.1, 0.2]]
+        )
+
+        embeddings_msg = RowEmbeddings(
+            metadata=metadata,
+            schema_name='items',
+            embeddings=[embedding]
+        )
+
+        mock_msg = MagicMock()
+        mock_msg.value.return_value = embeddings_msg
+
+        await processor.on_embeddings(mock_msg, MagicMock(), MagicMock())
+
+        # Should not call upsert for unknown collection
+        mock_qdrant_instance.upsert.assert_not_called()
+
+    @patch('trustgraph.storage.row_embeddings.qdrant.write.QdrantClient')
+    async def test_delete_collection(self, mock_qdrant_client):
+        """Test deleting all collections for a user/collection"""
+        from trustgraph.storage.row_embeddings.qdrant.write import Processor
+
+        mock_qdrant_instance = MagicMock()
+
+        # Mock collections list
+        mock_coll1 = MagicMock()
+        mock_coll1.name = 'rows_test_user_test_collection_schema1_384'
+        mock_coll2 = MagicMock()
+        mock_coll2.name = 'rows_test_user_test_collection_schema2_384'
+        mock_coll3 = MagicMock()
+        mock_coll3.name = 'rows_other_user_other_collection_schema_384'
+
+        mock_collections = MagicMock()
+        mock_collections.collections = [mock_coll1, mock_coll2, mock_coll3]
+        mock_qdrant_instance.get_collections.return_value = mock_collections
+
+        mock_qdrant_client.return_value = mock_qdrant_instance
+
+        config = {
+            'taskgroup': AsyncMock(),
+            'id': 'test-processor'
+        }
+
+        processor = Processor(**config)
+        processor.created_collections.add('rows_test_user_test_collection_schema1_384')
+
+        await processor.delete_collection('test_user', 'test_collection')
+
+        # Should delete only the matching collections
+        assert mock_qdrant_instance.delete_collection.call_count == 2
+
+        # Verify the cached collection was removed
+        assert 'rows_test_user_test_collection_schema1_384' not in processor.created_collections
+
+    @patch('trustgraph.storage.row_embeddings.qdrant.write.QdrantClient')
+    async def test_delete_collection_schema(self, mock_qdrant_client):
+        """Test deleting collections for a specific schema"""
+        from trustgraph.storage.row_embeddings.qdrant.write import Processor
+
+        mock_qdrant_instance = MagicMock()
+
+        mock_coll1 = MagicMock()
+        mock_coll1.name = 'rows_test_user_test_collection_customers_384'
+        mock_coll2 = MagicMock()
+        mock_coll2.name = 'rows_test_user_test_collection_orders_384'
+
+        mock_collections = MagicMock()
+        mock_collections.collections = [mock_coll1, mock_coll2]
+        mock_qdrant_instance.get_collections.return_value = mock_collections
+
+        mock_qdrant_client.return_value = mock_qdrant_instance
+
+        config = {
+            'taskgroup': AsyncMock(),
+            'id': 'test-processor'
+        }
+
+        processor = Processor(**config)
+
+        await processor.delete_collection_schema(
+            'test_user', 'test_collection', 'customers'
+        )
+
+        # Should only delete the customers schema collection
+        mock_qdrant_instance.delete_collection.assert_called_once()
+        call_args = mock_qdrant_instance.delete_collection.call_args[0]
+        assert call_args[0] == 'rows_test_user_test_collection_customers_384'
+
+
+if __name__ == '__main__':
+    pytest.main([__file__])
--- a/tests/unit/test_storage/test_rows_cassandra_storage.py
+++ b/tests/unit/test_storage/test_rows_cassandra_storage.py
@ -0,0 +1,474 @@
+"""
+Unit tests for Cassandra Row Storage Processor (Unified Table Implementation)
+
+Tests the business logic of the row storage processor including:
+- Schema configuration handling
+- Name sanitization
+- Unified table structure
+- Index management
+- Row storage with multi-index support
+"""
+
+import pytest
+from unittest.mock import MagicMock, AsyncMock, patch
+import json
+
+from trustgraph.storage.rows.cassandra.write import Processor
+from trustgraph.schema import ExtractedObject, Metadata, RowSchema, Field
+
+
+class TestRowsCassandraStorageLogic:
+    """Test business logic for unified table implementation"""
+
+    def test_sanitize_name(self):
+        """Test name sanitization for Cassandra compatibility"""
+        processor = MagicMock()
+        processor.sanitize_name = Processor.sanitize_name.__get__(processor, Processor)
+
+        # Test various name patterns
+        assert processor.sanitize_name("simple_name") == "simple_name"
+        assert processor.sanitize_name("Name-With-Dashes") == "name_with_dashes"
+        assert processor.sanitize_name("name.with.dots") == "name_with_dots"
+        assert processor.sanitize_name("123_starts_with_number") == "r_123_starts_with_number"
+        assert processor.sanitize_name("name with spaces") == "name_with_spaces"
+        assert processor.sanitize_name("special!@#$%^chars") == "special______chars"
+        assert processor.sanitize_name("UPPERCASE") == "uppercase"
+        assert processor.sanitize_name("CamelCase") == "camelcase"
+        assert processor.sanitize_name("_underscore_start") == "r__underscore_start"
+
+    def test_get_index_names(self):
+        """Test extraction of index names from schema"""
+        processor = MagicMock()
+        processor.get_index_names = Processor.get_index_names.__get__(processor, Processor)
+
+        # Schema with primary and indexed fields
+        schema = RowSchema(
+            name="test_schema",
+            description="Test",
+            fields=[
+                Field(name="id", type="string", primary=True),
+                Field(name="category", type="string", indexed=True),
+                Field(name="name", type="string"),  # Not indexed
+                Field(name="status", type="string", indexed=True)
+            ]
+        )
+
+        index_names = processor.get_index_names(schema)
+
+        # Should include primary key and indexed fields
+        assert "id" in index_names
+        assert "category" in index_names
+        assert "status" in index_names
+        assert "name" not in index_names  # Not indexed
+        assert len(index_names) == 3
+
+    def test_get_index_names_no_indexes(self):
+        """Test schema with no indexed fields"""
+        processor = MagicMock()
+        processor.get_index_names = Processor.get_index_names.__get__(processor, Processor)
+
+        schema = RowSchema(
+            name="no_index_schema",
+            fields=[
+                Field(name="data1", type="string"),
+                Field(name="data2", type="string")
+            ]
+        )
+
+        index_names = processor.get_index_names(schema)
+        assert len(index_names) == 0
+
+    def test_build_index_value(self):
+        """Test building index values from row data"""
+        processor = MagicMock()
+        processor.build_index_value = Processor.build_index_value.__get__(processor, Processor)
+
+        value_map = {"id": "123", "category": "electronics", "name": "Widget"}
+
+        # Single field index
+        result = processor.build_index_value(value_map, "id")
+        assert result == ["123"]
+
+        result = processor.build_index_value(value_map, "category")
+        assert result == ["electronics"]
+
+        # Missing field returns empty string
+        result = processor.build_index_value(value_map, "missing")
+        assert result == [""]
+
+    def test_build_index_value_composite(self):
+        """Test building composite index values"""
+        processor = MagicMock()
+        processor.build_index_value = Processor.build_index_value.__get__(processor, Processor)
+
+        value_map = {"region": "us-west", "category": "electronics", "id": "123"}
+
+        # Composite index (comma-separated field names)
+        result = processor.build_index_value(value_map, "region,category")
+        assert result == ["us-west", "electronics"]
+
+    @pytest.mark.asyncio
+    async def test_schema_config_parsing(self):
+        """Test parsing of schema configurations"""
+        processor = MagicMock()
+        processor.schemas = {}
+        processor.config_key = "schema"
+        processor.registered_partitions = set()
+        processor.on_schema_config = Processor.on_schema_config.__get__(processor, Processor)
+
+        # Create test configuration
+        config = {
+            "schema": {
+                "customer_records": json.dumps({
+                    "name": "customer_records",
+                    "description": "Customer data",
+                    "fields": [
+                        {
+                            "name": "id",
+                            "type": "string",
+                            "primary_key": True,
+                            "required": True
+                        },
+                        {
+                            "name": "name",
+                            "type": "string",
+                            "required": True
+                        },
+                        {
+                            "name": "category",
+                            "type": "string",
+                            "indexed": True
+                        }
+                    ]
+                })
+            }
+        }
+
+        # Process configuration
+        await processor.on_schema_config(config, version=1)
+
+        # Verify schema was loaded
+        assert "customer_records" in processor.schemas
+        schema = processor.schemas["customer_records"]
+        assert schema.name == "customer_records"
+        assert len(schema.fields) == 3
+
+        # Check field properties
+        id_field = schema.fields[0]
+        assert id_field.name == "id"
+        assert id_field.type == "string"
+        assert id_field.primary is True
+
+    @pytest.mark.asyncio
+    async def test_object_processing_stores_data_map(self):
+        """Test that row processing stores data as map<text, text>"""
+        processor = MagicMock()
+        processor.schemas = {
+            "test_schema": RowSchema(
+                name="test_schema",
+                description="Test",
+                fields=[
+                    Field(name="id", type="string", size=50, primary=True),
+                    Field(name="value", type="string", size=100)
+                ]
+            )
+        }
+        processor.tables_initialized = {"test_user"}
+        processor.registered_partitions = set()
+        processor.session = MagicMock()
+        processor.sanitize_name = Processor.sanitize_name.__get__(processor, Processor)
+        processor.get_index_names = Processor.get_index_names.__get__(processor, Processor)
+        processor.build_index_value = Processor.build_index_value.__get__(processor, Processor)
+        processor.ensure_tables = MagicMock()
+        processor.register_partitions = MagicMock()
+        processor.collection_exists = MagicMock(return_value=True)
+        processor.on_object = Processor.on_object.__get__(processor, Processor)
+
+        # Create test object
+        test_obj = ExtractedObject(
+            metadata=Metadata(
+                id="test-001",
+                user="test_user",
+                collection="test_collection",
+                metadata=[]
+            ),
+            schema_name="test_schema",
+            values=[{"id": "123", "value": "test_data"}],
+            confidence=0.9,
+            source_span="test source"
+        )
+
+        # Create mock message
+        msg = MagicMock()
+        msg.value.return_value = test_obj
+
+        # Process object
+        await processor.on_object(msg, None, None)
+
+        # Verify insert was executed
+        processor.session.execute.assert_called()
+        insert_call = processor.session.execute.call_args
+        insert_cql = insert_call[0][0]
+        values = insert_call[0][1]
+
+        # Verify using unified rows table
+        assert "INSERT INTO test_user.rows" in insert_cql
+
+        # Values should be: (collection, schema_name, index_name, index_value, data, source)
+        assert values[0] == "test_collection"  # collection
+        assert values[1] == "test_schema"      # schema_name
+        assert values[2] == "id"               # index_name (primary key field)
+        assert values[3] == ["123"]            # index_value as list
+        assert values[4] == {"id": "123", "value": "test_data"}  # data map
+        assert values[5] == ""                 # source
+
+    @pytest.mark.asyncio
+    async def test_object_processing_multiple_indexes(self):
+        """Test that row is written once per indexed field"""
+        processor = MagicMock()
+        processor.schemas = {
+            "multi_index_schema": RowSchema(
+                name="multi_index_schema",
+                fields=[
+                    Field(name="id", type="string", primary=True),
+                    Field(name="category", type="string", indexed=True),
+                    Field(name="status", type="string", indexed=True)
+                ]
+            )
+        }
+        processor.tables_initialized = {"test_user"}
+        processor.registered_partitions = set()
+        processor.session = MagicMock()
+        processor.sanitize_name = Processor.sanitize_name.__get__(processor, Processor)
+        processor.get_index_names = Processor.get_index_names.__get__(processor, Processor)
+        processor.build_index_value = Processor.build_index_value.__get__(processor, Processor)
+        processor.ensure_tables = MagicMock()
+        processor.register_partitions = MagicMock()
+        processor.collection_exists = MagicMock(return_value=True)
+        processor.on_object = Processor.on_object.__get__(processor, Processor)
+
+        test_obj = ExtractedObject(
+            metadata=Metadata(
+                id="test-001",
+                user="test_user",
+                collection="test_collection",
+                metadata=[]
+            ),
+            schema_name="multi_index_schema",
+            values=[{"id": "123", "category": "electronics", "status": "active"}],
+            confidence=0.9,
+            source_span=""
+        )
+
+        msg = MagicMock()
+        msg.value.return_value = test_obj
+
+        await processor.on_object(msg, None, None)
+
+        # Should have 3 inserts (one per indexed field: id, category, status)
+        assert processor.session.execute.call_count == 3
+
+        # Check that different index_names were used
+        index_names_used = set()
+        for call in processor.session.execute.call_args_list:
+            values = call[0][1]
+            index_names_used.add(values[2])  # index_name is 3rd value
+
+        assert index_names_used == {"id", "category", "status"}
+
+
+class TestRowsCassandraStorageBatchLogic:
+    """Test batch processing logic for unified table implementation"""
+
+    @pytest.mark.asyncio
+    async def test_batch_object_processing(self):
+        """Test processing of batch ExtractedObjects"""
+        processor = MagicMock()
+        processor.schemas = {
+            "batch_schema": RowSchema(
+                name="batch_schema",
+                fields=[
+                    Field(name="id", type="string", primary=True),
+                    Field(name="name", type="string")
+                ]
+            )
+        }
+        processor.tables_initialized = {"test_user"}
+        processor.registered_partitions = set()
+        processor.session = MagicMock()
+        processor.sanitize_name = Processor.sanitize_name.__get__(processor, Processor)
+        processor.get_index_names = Processor.get_index_names.__get__(processor, Processor)
+        processor.build_index_value = Processor.build_index_value.__get__(processor, Processor)
+        processor.ensure_tables = MagicMock()
+        processor.register_partitions = MagicMock()
+        processor.collection_exists = MagicMock(return_value=True)
+        processor.on_object = Processor.on_object.__get__(processor, Processor)
+
+        # Create batch object with multiple values
+        batch_obj = ExtractedObject(
+            metadata=Metadata(
+                id="batch-001",
+                user="test_user",
+                collection="batch_collection",
+                metadata=[]
+            ),
+            schema_name="batch_schema",
+            values=[
+                {"id": "001", "name": "First"},
+                {"id": "002", "name": "Second"},
+                {"id": "003", "name": "Third"}
+            ],
+            confidence=0.95,
+            source_span=""
+        )
+
+        msg = MagicMock()
+        msg.value.return_value = batch_obj
+
+        await processor.on_object(msg, None, None)
+
+        # Should have 3 inserts (one per row, one index per row since only primary key)
+        assert processor.session.execute.call_count == 3
+
+        # Check each insert has different id
+        ids_inserted = set()
+        for call in processor.session.execute.call_args_list:
+            values = call[0][1]
+            ids_inserted.add(tuple(values[3]))  # index_value is 4th value
+
+        assert ids_inserted == {("001",), ("002",), ("003",)}
+
+    @pytest.mark.asyncio
+    async def test_empty_batch_processing(self):
+        """Test processing of empty batch ExtractedObjects"""
+        processor = MagicMock()
+        processor.schemas = {
+            "empty_schema": RowSchema(
+                name="empty_schema",
+                fields=[Field(name="id", type="string", primary=True)]
+            )
+        }
+        processor.tables_initialized = {"test_user"}
+        processor.registered_partitions = set()
+        processor.session = MagicMock()
+        processor.sanitize_name = Processor.sanitize_name.__get__(processor, Processor)
+        processor.get_index_names = Processor.get_index_names.__get__(processor, Processor)
+        processor.build_index_value = Processor.build_index_value.__get__(processor, Processor)
+        processor.ensure_tables = MagicMock()
+        processor.register_partitions = MagicMock()
+        processor.collection_exists = MagicMock(return_value=True)
+        processor.on_object = Processor.on_object.__get__(processor, Processor)
+
+        # Create empty batch object
+        empty_batch_obj = ExtractedObject(
+            metadata=Metadata(
+                id="empty-001",
+                user="test_user",
+                collection="empty_collection",
+                metadata=[]
+            ),
+            schema_name="empty_schema",
+            values=[],  # Empty batch
+            confidence=1.0,
+            source_span=""
+        )
+
+        msg = MagicMock()
+        msg.value.return_value = empty_batch_obj
+
+        await processor.on_object(msg, None, None)
+
+        # Verify no insert calls for empty batch
+        processor.session.execute.assert_not_called()
+
+
+class TestUnifiedTableStructure:
+    """Test the unified rows table structure"""
+
+    def test_ensure_tables_creates_unified_structure(self):
+        """Test that ensure_tables creates the unified rows table"""
+        processor = MagicMock()
+        processor.known_keyspaces = {"test_user"}
+        processor.tables_initialized = set()
+        processor.session = MagicMock()
+        processor.sanitize_name = Processor.sanitize_name.__get__(processor, Processor)
+        processor.ensure_keyspace = MagicMock()
+        processor.ensure_tables = Processor.ensure_tables.__get__(processor, Processor)
+
+        processor.ensure_tables("test_user")
+
+        # Should have 2 calls: create rows table + create row_partitions table
+        assert processor.session.execute.call_count == 2
+
+        # Check rows table creation
+        rows_cql = processor.session.execute.call_args_list[0][0][0]
+        assert "CREATE TABLE IF NOT EXISTS test_user.rows" in rows_cql
+        assert "collection text" in rows_cql
+        assert "schema_name text" in rows_cql
+        assert "index_name text" in rows_cql
+        assert "index_value frozen<list<text>>" in rows_cql
+        assert "data map<text, text>" in rows_cql
+        assert "source text" in rows_cql
+        assert "PRIMARY KEY ((collection, schema_name, index_name), index_value)" in rows_cql
+
+        # Check row_partitions table creation
+        partitions_cql = processor.session.execute.call_args_list[1][0][0]
+        assert "CREATE TABLE IF NOT EXISTS test_user.row_partitions" in partitions_cql
+        assert "PRIMARY KEY ((collection), schema_name, index_name)" in partitions_cql
+
+        # Verify keyspace added to initialized set
+        assert "test_user" in processor.tables_initialized
+
+    def test_ensure_tables_idempotent(self):
+        """Test that ensure_tables is idempotent"""
+        processor = MagicMock()
+        processor.tables_initialized = {"test_user"}  # Already initialized
+        processor.session = MagicMock()
+        processor.ensure_tables = Processor.ensure_tables.__get__(processor, Processor)
+
+        processor.ensure_tables("test_user")
+
+        # Should not execute any CQL since already initialized
+        processor.session.execute.assert_not_called()
+
+
+class TestPartitionRegistration:
+    """Test partition registration for tracking what's stored"""
+
+    def test_register_partitions(self):
+        """Test registering partitions for a collection/schema pair"""
+        processor = MagicMock()
+        processor.registered_partitions = set()
+        processor.session = MagicMock()
+        processor.schemas = {
+            "test_schema": RowSchema(
+                name="test_schema",
+                fields=[
+                    Field(name="id", type="string", primary=True),
+                    Field(name="category", type="string", indexed=True)
+                ]
+            )
+        }
+        processor.sanitize_name = Processor.sanitize_name.__get__(processor, Processor)
+        processor.get_index_names = Processor.get_index_names.__get__(processor, Processor)
+        processor.register_partitions = Processor.register_partitions.__get__(processor, Processor)
+
+        processor.register_partitions("test_user", "test_collection", "test_schema")
+
+        # Should have 2 inserts (one per index: id, category)
+        assert processor.session.execute.call_count == 2
+
+        # Verify cache was updated
+        assert ("test_collection", "test_schema") in processor.registered_partitions
+
+    def test_register_partitions_idempotent(self):
+        """Test that partition registration is idempotent"""
+        processor = MagicMock()
+        processor.registered_partitions = {("test_collection", "test_schema")}  # Already registered
+        processor.session = MagicMock()
+        processor.register_partitions = Processor.register_partitions.__get__(processor, Processor)
+
+        processor.register_partitions("test_user", "test_collection", "test_schema")
+
+        # Should not execute any CQL since already registered
+        processor.session.execute.assert_not_called()