Release 1.4 -> master (#524)

Catch up
2026-07-12 14:52:11 +02:00 · 2025-09-20 16:00:37 +01:00 · 2025-09-20 16:00:37 +01:00 · 6c7af8789d
commit 6c7af8789d
parent a8e437fc7f
216 changed files with 31360 additions and 1611 deletions
--- a/tests/contract/conftest.py
+++ b/tests/contract/conftest.py
@ -82,8 +82,8 @@ def sample_message_data():
        },
        "AgentRequest": {
            "question": "What is machine learning?",
-            "plan": "",
            "state": "",
+            "group": [],
            "history": []
        },
        "AgentResponse": {
--- a/tests/contract/test_document_embeddings_contract.py
+++ b/tests/contract/test_document_embeddings_contract.py
@ -0,0 +1,261 @@
+"""
+Contract tests for document embeddings message schemas and translators
+Ensures that message formats remain consistent across services
+"""
+
+import pytest
+from unittest.mock import MagicMock
+
+from trustgraph.schema import DocumentEmbeddingsRequest, DocumentEmbeddingsResponse, Error
+from trustgraph.messaging.translators.embeddings_query import (
+    DocumentEmbeddingsRequestTranslator,
+    DocumentEmbeddingsResponseTranslator
+)
+
+
+class TestDocumentEmbeddingsRequestContract:
+    """Test DocumentEmbeddingsRequest schema contract"""
+
+    def test_request_schema_fields(self):
+        """Test that DocumentEmbeddingsRequest has expected fields"""
+        # Create a request
+        request = DocumentEmbeddingsRequest(
+            vectors=[[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]],
+            limit=10,
+            user="test_user",
+            collection="test_collection"
+        )
+        
+        # Verify all expected fields exist
+        assert hasattr(request, 'vectors')
+        assert hasattr(request, 'limit')
+        assert hasattr(request, 'user')
+        assert hasattr(request, 'collection')
+        
+        # Verify field values
+        assert request.vectors == [[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]]
+        assert request.limit == 10
+        assert request.user == "test_user"
+        assert request.collection == "test_collection"
+
+    def test_request_translator_to_pulsar(self):
+        """Test request translator converts dict to Pulsar schema"""
+        translator = DocumentEmbeddingsRequestTranslator()
+        
+        data = {
+            "vectors": [[0.1, 0.2], [0.3, 0.4]],
+            "limit": 5,
+            "user": "custom_user",
+            "collection": "custom_collection"
+        }
+        
+        result = translator.to_pulsar(data)
+        
+        assert isinstance(result, DocumentEmbeddingsRequest)
+        assert result.vectors == [[0.1, 0.2], [0.3, 0.4]]
+        assert result.limit == 5
+        assert result.user == "custom_user"
+        assert result.collection == "custom_collection"
+
+    def test_request_translator_to_pulsar_with_defaults(self):
+        """Test request translator uses correct defaults"""
+        translator = DocumentEmbeddingsRequestTranslator()
+        
+        data = {
+            "vectors": [[0.1, 0.2]]
+            # No limit, user, or collection provided
+        }
+        
+        result = translator.to_pulsar(data)
+        
+        assert isinstance(result, DocumentEmbeddingsRequest)
+        assert result.vectors == [[0.1, 0.2]]
+        assert result.limit == 10  # Default
+        assert result.user == "trustgraph"  # Default
+        assert result.collection == "default"  # Default
+
+    def test_request_translator_from_pulsar(self):
+        """Test request translator converts Pulsar schema to dict"""
+        translator = DocumentEmbeddingsRequestTranslator()
+        
+        request = DocumentEmbeddingsRequest(
+            vectors=[[0.5, 0.6]],
+            limit=20,
+            user="test_user",
+            collection="test_collection"
+        )
+        
+        result = translator.from_pulsar(request)
+        
+        assert isinstance(result, dict)
+        assert result["vectors"] == [[0.5, 0.6]]
+        assert result["limit"] == 20
+        assert result["user"] == "test_user"
+        assert result["collection"] == "test_collection"
+
+
+class TestDocumentEmbeddingsResponseContract:
+    """Test DocumentEmbeddingsResponse schema contract"""
+
+    def test_response_schema_fields(self):
+        """Test that DocumentEmbeddingsResponse has expected fields"""
+        # Create a response with chunks
+        response = DocumentEmbeddingsResponse(
+            error=None,
+            chunks=["chunk1", "chunk2", "chunk3"]
+        )
+        
+        # Verify all expected fields exist
+        assert hasattr(response, 'error')
+        assert hasattr(response, 'chunks')
+        
+        # Verify field values
+        assert response.error is None
+        assert response.chunks == ["chunk1", "chunk2", "chunk3"]
+
+    def test_response_schema_with_error(self):
+        """Test response schema with error"""
+        error = Error(
+            type="query_error",
+            message="Database connection failed"
+        )
+        
+        response = DocumentEmbeddingsResponse(
+            error=error,
+            chunks=None
+        )
+        
+        assert response.error == error
+        assert response.chunks is None
+
+    def test_response_translator_from_pulsar_with_chunks(self):
+        """Test response translator converts Pulsar schema with chunks to dict"""
+        translator = DocumentEmbeddingsResponseTranslator()
+        
+        response = DocumentEmbeddingsResponse(
+            error=None,
+            chunks=["doc1", "doc2", "doc3"]
+        )
+        
+        result = translator.from_pulsar(response)
+        
+        assert isinstance(result, dict)
+        assert "chunks" in result
+        assert result["chunks"] == ["doc1", "doc2", "doc3"]
+
+    def test_response_translator_from_pulsar_with_bytes(self):
+        """Test response translator handles byte chunks correctly"""
+        translator = DocumentEmbeddingsResponseTranslator()
+        
+        response = MagicMock()
+        response.chunks = [b"byte_chunk1", b"byte_chunk2"]
+        
+        result = translator.from_pulsar(response)
+        
+        assert isinstance(result, dict)
+        assert "chunks" in result
+        assert result["chunks"] == ["byte_chunk1", "byte_chunk2"]
+
+    def test_response_translator_from_pulsar_with_empty_chunks(self):
+        """Test response translator handles empty chunks list"""
+        translator = DocumentEmbeddingsResponseTranslator()
+        
+        response = MagicMock()
+        response.chunks = []
+        
+        result = translator.from_pulsar(response)
+        
+        assert isinstance(result, dict)
+        assert "chunks" in result
+        assert result["chunks"] == []
+
+    def test_response_translator_from_pulsar_with_none_chunks(self):
+        """Test response translator handles None chunks"""
+        translator = DocumentEmbeddingsResponseTranslator()
+        
+        response = MagicMock()
+        response.chunks = None
+        
+        result = translator.from_pulsar(response)
+        
+        assert isinstance(result, dict)
+        assert "chunks" not in result or result.get("chunks") is None
+
+    def test_response_translator_from_response_with_completion(self):
+        """Test response translator with completion flag"""
+        translator = DocumentEmbeddingsResponseTranslator()
+        
+        response = DocumentEmbeddingsResponse(
+            error=None,
+            chunks=["chunk1", "chunk2"]
+        )
+        
+        result, is_final = translator.from_response_with_completion(response)
+        
+        assert isinstance(result, dict)
+        assert "chunks" in result
+        assert result["chunks"] == ["chunk1", "chunk2"]
+        assert is_final is True  # Document embeddings responses are always final
+
+    def test_response_translator_to_pulsar_not_implemented(self):
+        """Test that to_pulsar raises NotImplementedError for responses"""
+        translator = DocumentEmbeddingsResponseTranslator()
+        
+        with pytest.raises(NotImplementedError):
+            translator.to_pulsar({"chunks": ["test"]})
+
+
+class TestDocumentEmbeddingsMessageCompatibility:
+    """Test compatibility between request and response messages"""
+
+    def test_request_response_flow(self):
+        """Test complete request-response flow maintains data integrity"""
+        # Create request
+        request_data = {
+            "vectors": [[0.1, 0.2, 0.3]],
+            "limit": 5,
+            "user": "test_user",
+            "collection": "test_collection"
+        }
+        
+        # Convert to Pulsar request
+        req_translator = DocumentEmbeddingsRequestTranslator()
+        pulsar_request = req_translator.to_pulsar(request_data)
+        
+        # Simulate service processing and creating response
+        response = DocumentEmbeddingsResponse(
+            error=None,
+            chunks=["relevant chunk 1", "relevant chunk 2"]
+        )
+        
+        # Convert response back to dict
+        resp_translator = DocumentEmbeddingsResponseTranslator()
+        response_data = resp_translator.from_pulsar(response)
+        
+        # Verify data integrity
+        assert isinstance(pulsar_request, DocumentEmbeddingsRequest)
+        assert isinstance(response_data, dict)
+        assert "chunks" in response_data
+        assert len(response_data["chunks"]) == 2
+
+    def test_error_response_flow(self):
+        """Test error response flow"""
+        # Create error response
+        error = Error(
+            type="vector_db_error",
+            message="Collection not found"
+        )
+        
+        response = DocumentEmbeddingsResponse(
+            error=error,
+            chunks=None
+        )
+        
+        # Convert response to dict
+        translator = DocumentEmbeddingsResponseTranslator()
+        response_data = translator.from_pulsar(response)
+        
+        # Verify error handling
+        assert isinstance(response_data, dict)
+        # The translator doesn't include error in the dict, only chunks
+        assert "chunks" not in response_data or response_data.get("chunks") is None
--- a/tests/contract/test_message_contracts.py
+++ b/tests/contract/test_message_contracts.py
@ -20,7 +20,7 @@ from trustgraph.schema import (
    GraphEmbeddings, EntityEmbeddings,
    Metadata, Field, RowSchema,
    StructuredDataSubmission, ExtractedObject,
-    NLPToStructuredQueryRequest, NLPToStructuredQueryResponse,
+    QuestionToStructuredQueryRequest, QuestionToStructuredQueryResponse,
    StructuredQueryRequest, StructuredQueryResponse,
    StructuredObjectEmbedding
 )
@ -198,8 +198,8 @@ class TestAgentMessageContracts:
        # Test required fields
        request = AgentRequest(**request_data)
        assert hasattr(request, 'question')
-        assert hasattr(request, 'plan')
        assert hasattr(request, 'state')
+        assert hasattr(request, 'group')
        assert hasattr(request, 'history')

    def test_agent_response_schema_contract(self, sample_message_data):
--- a/tests/contract/test_objects_cassandra_contracts.py
+++ b/tests/contract/test_objects_cassandra_contracts.py
@ -30,11 +30,11 @@ class TestObjectsCassandraContracts:
        test_object = ExtractedObject(
            metadata=test_metadata,
            schema_name="customer_records",
-            values={
+            values=[{
                "customer_id": "CUST123",
                "name": "Test Customer",
                "email": "test@example.com"
-            },
+            }],
            confidence=0.95,
            source_span="Customer data from document..."
        )
@ -54,7 +54,7 @@ class TestObjectsCassandraContracts:
        
        # Verify types
        assert isinstance(test_object.schema_name, str)
-        assert isinstance(test_object.values, dict)
+        assert isinstance(test_object.values, list)
        assert isinstance(test_object.confidence, float)
        assert isinstance(test_object.source_span, str)

@ -200,7 +200,7 @@ class TestObjectsCassandraContracts:
                metadata=[]
            ),
            schema_name="test_schema",
-            values={"field1": "value1", "field2": "123"},
+            values=[{"field1": "value1", "field2": "123"}],
            confidence=0.85,
            source_span="Test span"
        )
@ -292,7 +292,7 @@ class TestObjectsCassandraContracts:
                metadata=[{"key": "value"}]
            ),
            schema_name="table789",  # -> table name
-            values={"field": "value"},
+            values=[{"field": "value"}],
            confidence=0.9,
            source_span="Source"
        )
@ -303,4 +303,215 @@ class TestObjectsCassandraContracts:
        # - metadata.collection -> Part of primary key
        assert test_obj.metadata.user  # Required for keyspace
        assert test_obj.schema_name  # Required for table
-        assert test_obj.metadata.collection  # Required for partition key
+        assert test_obj.metadata.collection  # Required for partition key
+
+
+@pytest.mark.contract
+class TestObjectsCassandraContractsBatch:
+    """Contract tests for Cassandra object storage batch processing"""
+
+    def test_extracted_object_batch_input_contract(self):
+        """Test that batched ExtractedObject schema matches expected input format"""
+        # Create test object with multiple values in batch
+        test_metadata = Metadata(
+            id="batch-doc-001",
+            user="test_user",
+            collection="test_collection",
+            metadata=[]
+        )
+        
+        batch_object = ExtractedObject(
+            metadata=test_metadata,
+            schema_name="customer_records",
+            values=[
+                {
+                    "customer_id": "CUST123",
+                    "name": "Test Customer 1",
+                    "email": "test1@example.com"
+                },
+                {
+                    "customer_id": "CUST124", 
+                    "name": "Test Customer 2",
+                    "email": "test2@example.com"
+                },
+                {
+                    "customer_id": "CUST125",
+                    "name": "Test Customer 3", 
+                    "email": "test3@example.com"
+                }
+            ],
+            confidence=0.88,
+            source_span="Multiple customer data from document..."
+        )
+        
+        # Verify batch structure
+        assert hasattr(batch_object, 'values')
+        assert isinstance(batch_object.values, list)
+        assert len(batch_object.values) == 3
+        
+        # Verify each batch item is a dict
+        for i, batch_item in enumerate(batch_object.values):
+            assert isinstance(batch_item, dict)
+            assert "customer_id" in batch_item
+            assert "name" in batch_item
+            assert "email" in batch_item
+            assert batch_item["customer_id"] == f"CUST12{3+i}"
+            assert f"Test Customer {i+1}" in batch_item["name"]
+
+    def test_extracted_object_empty_batch_contract(self):
+        """Test empty batch ExtractedObject contract"""
+        test_metadata = Metadata(
+            id="empty-batch-001",
+            user="test_user",
+            collection="test_collection", 
+            metadata=[]
+        )
+        
+        empty_batch_object = ExtractedObject(
+            metadata=test_metadata,
+            schema_name="empty_schema",
+            values=[],  # Empty batch
+            confidence=1.0,
+            source_span="No objects found in document"
+        )
+        
+        # Verify empty batch structure
+        assert hasattr(empty_batch_object, 'values')
+        assert isinstance(empty_batch_object.values, list)
+        assert len(empty_batch_object.values) == 0
+        assert empty_batch_object.confidence == 1.0
+
+    def test_extracted_object_single_item_batch_contract(self):
+        """Test single-item batch (backward compatibility) contract"""
+        test_metadata = Metadata(
+            id="single-batch-001",
+            user="test_user",
+            collection="test_collection",
+            metadata=[]
+        )
+        
+        single_batch_object = ExtractedObject(
+            metadata=test_metadata,
+            schema_name="customer_records",
+            values=[{  # Array with single item for backward compatibility
+                "customer_id": "CUST999",
+                "name": "Single Customer",
+                "email": "single@example.com"
+            }],
+            confidence=0.95,
+            source_span="Single customer data from document..."
+        )
+        
+        # Verify single-item batch structure
+        assert isinstance(single_batch_object.values, list)
+        assert len(single_batch_object.values) == 1
+        assert isinstance(single_batch_object.values[0], dict)
+        assert single_batch_object.values[0]["customer_id"] == "CUST999"
+
+    def test_extracted_object_batch_serialization_contract(self):
+        """Test that batched ExtractedObject can be serialized/deserialized correctly"""
+        # Create batch object
+        original = ExtractedObject(
+            metadata=Metadata(
+                id="batch-serial-001",
+                user="test_user",
+                collection="test_coll",
+                metadata=[]
+            ),
+            schema_name="test_schema",
+            values=[
+                {"field1": "value1", "field2": "123"},
+                {"field1": "value2", "field2": "456"},  
+                {"field1": "value3", "field2": "789"}
+            ],
+            confidence=0.92,
+            source_span="Batch test span"
+        )
+        
+        # Test serialization using schema
+        schema = AvroSchema(ExtractedObject)
+        
+        # Encode and decode
+        encoded = schema.encode(original)
+        decoded = schema.decode(encoded)
+        
+        # Verify round-trip for batch
+        assert decoded.metadata.id == original.metadata.id
+        assert decoded.metadata.user == original.metadata.user
+        assert decoded.metadata.collection == original.metadata.collection
+        assert decoded.schema_name == original.schema_name
+        assert len(decoded.values) == len(original.values)
+        assert len(decoded.values) == 3
+        
+        # Verify each batch item
+        for i in range(3):
+            assert decoded.values[i] == original.values[i]
+            assert decoded.values[i]["field1"] == f"value{i+1}"
+            assert decoded.values[i]["field2"] == f"{123 + i*333}"
+            
+        assert decoded.confidence == original.confidence
+        assert decoded.source_span == original.source_span
+
+    def test_batch_processing_field_validation_contract(self):
+        """Test that batch processing validates field consistency"""
+        # All batch items should have consistent field structure
+        # This is a contract that the application should enforce
+        
+        # Valid batch - all items have same fields
+        valid_batch_values = [
+            {"id": "1", "name": "Item 1", "value": "100"},
+            {"id": "2", "name": "Item 2", "value": "200"},
+            {"id": "3", "name": "Item 3", "value": "300"}
+        ]
+        
+        # Each item has the same field structure
+        field_sets = [set(item.keys()) for item in valid_batch_values]
+        assert all(fields == field_sets[0] for fields in field_sets), "All batch items should have consistent fields"
+        
+        # Invalid batch - inconsistent fields (this would be caught by application logic)
+        invalid_batch_values = [
+            {"id": "1", "name": "Item 1", "value": "100"},
+            {"id": "2", "name": "Item 2"},  # Missing 'value' field
+            {"id": "3", "name": "Item 3", "value": "300", "extra": "field"}  # Extra field
+        ]
+        
+        # Demonstrate the inconsistency
+        invalid_field_sets = [set(item.keys()) for item in invalid_batch_values]
+        assert not all(fields == invalid_field_sets[0] for fields in invalid_field_sets), "Invalid batch should have inconsistent fields"
+
+    def test_batch_storage_partition_key_contract(self):
+        """Test that batch objects maintain partition key consistency"""
+        # In Cassandra storage, all objects in a batch should:
+        # 1. Belong to the same collection (partition key component)
+        # 2. Have unique primary keys within the batch
+        # 3. Be stored in the same keyspace (user)
+        
+        test_metadata = Metadata(
+            id="partition-test-001",
+            user="consistent_user",  # Same keyspace
+            collection="consistent_collection",  # Same partition
+            metadata=[]
+        )
+        
+        batch_object = ExtractedObject(
+            metadata=test_metadata,
+            schema_name="partition_test",
+            values=[
+                {"id": "pk1", "data": "data1"},  # Unique primary key
+                {"id": "pk2", "data": "data2"},  # Unique primary key
+                {"id": "pk3", "data": "data3"}   # Unique primary key
+            ],
+            confidence=0.95,
+            source_span="Partition consistency test"
+        )
+        
+        # Verify consistency contract
+        assert batch_object.metadata.user  # Must have user for keyspace
+        assert batch_object.metadata.collection  # Must have collection for partition key
+        
+        # Verify unique primary keys in batch
+        primary_keys = [item["id"] for item in batch_object.values]
+        assert len(primary_keys) == len(set(primary_keys)), "Primary keys must be unique within batch"
+        
+        # All batch items will be stored in same keyspace and partition
+        # This is enforced by the metadata.user and metadata.collection being shared
--- a/tests/contract/test_objects_graphql_query_contracts.py
+++ b/tests/contract/test_objects_graphql_query_contracts.py
@ -0,0 +1,427 @@
+"""
+Contract tests for Objects GraphQL Query Service
+
+These tests verify the message contracts and schema compatibility
+for the objects GraphQL query processor.
+"""
+
+import pytest
+import json
+from pulsar.schema import AvroSchema
+
+from trustgraph.schema import ObjectsQueryRequest, ObjectsQueryResponse, GraphQLError
+from trustgraph.query.objects.cassandra.service import Processor
+
+
+@pytest.mark.contract
+class TestObjectsGraphQLQueryContracts:
+    """Contract tests for GraphQL query service messages"""
+
+    def test_objects_query_request_contract(self):
+        """Test ObjectsQueryRequest schema structure and required fields"""
+        # Create test request with all required fields
+        test_request = ObjectsQueryRequest(
+            user="test_user",
+            collection="test_collection",
+            query='{ customers { id name email } }',
+            variables={"status": "active", "limit": "10"},
+            operation_name="GetCustomers"
+        )
+        
+        # Verify all required fields are present
+        assert hasattr(test_request, 'user')
+        assert hasattr(test_request, 'collection') 
+        assert hasattr(test_request, 'query')
+        assert hasattr(test_request, 'variables')
+        assert hasattr(test_request, 'operation_name')
+        
+        # Verify field types
+        assert isinstance(test_request.user, str)
+        assert isinstance(test_request.collection, str)
+        assert isinstance(test_request.query, str)
+        assert isinstance(test_request.variables, dict)
+        assert isinstance(test_request.operation_name, str)
+        
+        # Verify content
+        assert test_request.user == "test_user"
+        assert test_request.collection == "test_collection"
+        assert "customers" in test_request.query
+        assert test_request.variables["status"] == "active"
+        assert test_request.operation_name == "GetCustomers"
+
+    def test_objects_query_request_minimal(self):
+        """Test ObjectsQueryRequest with minimal required fields"""
+        # Create request with only essential fields
+        minimal_request = ObjectsQueryRequest(
+            user="user",
+            collection="collection",
+            query='{ test }',
+            variables={},
+            operation_name=""
+        )
+        
+        # Verify minimal request is valid
+        assert minimal_request.user == "user"
+        assert minimal_request.collection == "collection"
+        assert minimal_request.query == '{ test }'
+        assert minimal_request.variables == {}
+        assert minimal_request.operation_name == ""
+
+    def test_graphql_error_contract(self):
+        """Test GraphQLError schema structure"""
+        # Create test error with all fields
+        test_error = GraphQLError(
+            message="Field 'nonexistent' doesn't exist on type 'Customer'",
+            path=["customers", "0", "nonexistent"],  # All strings per Array(String()) schema
+            extensions={"code": "FIELD_ERROR", "timestamp": "2024-01-01T00:00:00Z"}
+        )
+        
+        # Verify all fields are present
+        assert hasattr(test_error, 'message')
+        assert hasattr(test_error, 'path')
+        assert hasattr(test_error, 'extensions')
+        
+        # Verify field types
+        assert isinstance(test_error.message, str)
+        assert isinstance(test_error.path, list)
+        assert isinstance(test_error.extensions, dict)
+        
+        # Verify content
+        assert "doesn't exist" in test_error.message
+        assert test_error.path == ["customers", "0", "nonexistent"]
+        assert test_error.extensions["code"] == "FIELD_ERROR"
+
+    def test_objects_query_response_success_contract(self):
+        """Test ObjectsQueryResponse schema for successful queries"""
+        # Create successful response
+        success_response = ObjectsQueryResponse(
+            error=None,
+            data='{"customers": [{"id": "1", "name": "John", "email": "john@example.com"}]}',
+            errors=[],
+            extensions={"execution_time": "0.045", "query_complexity": "5"}
+        )
+        
+        # Verify all fields are present
+        assert hasattr(success_response, 'error')
+        assert hasattr(success_response, 'data')
+        assert hasattr(success_response, 'errors')
+        assert hasattr(success_response, 'extensions')
+        
+        # Verify field types
+        assert success_response.error is None
+        assert isinstance(success_response.data, str)
+        assert isinstance(success_response.errors, list)
+        assert isinstance(success_response.extensions, dict)
+        
+        # Verify data can be parsed as JSON
+        parsed_data = json.loads(success_response.data)
+        assert "customers" in parsed_data
+        assert len(parsed_data["customers"]) == 1
+        assert parsed_data["customers"][0]["id"] == "1"
+
+    def test_objects_query_response_error_contract(self):
+        """Test ObjectsQueryResponse schema for error cases"""
+        # Create GraphQL errors - work around Pulsar Array(Record) validation bug
+        # by creating a response without the problematic errors array first
+        error_response = ObjectsQueryResponse(
+            error=None,  # System error is None - these are GraphQL errors
+            data=None,   # No data due to errors
+            errors=[],   # Empty errors array to avoid Pulsar bug
+            extensions={"execution_time": "0.012"}
+        )
+        
+        # Manually create GraphQL errors for testing (bypassing Pulsar validation)
+        graphql_errors = [
+            GraphQLError(
+                message="Syntax error near 'invalid'",
+                path=["query"],
+                extensions={"code": "SYNTAX_ERROR"}
+            ),
+            GraphQLError(
+                message="Field validation failed", 
+                path=["customers", "email"],
+                extensions={"code": "VALIDATION_ERROR", "details": "Invalid email format"}
+            )
+        ]
+        
+        # Verify response structure (basic fields work)
+        assert error_response.error is None
+        assert error_response.data is None
+        assert len(error_response.errors) == 0  # Empty due to Pulsar bug workaround
+        assert error_response.extensions["execution_time"] == "0.012"
+        
+        # Verify individual GraphQL error structure (bypassing Pulsar)
+        syntax_error = graphql_errors[0]
+        assert "Syntax error" in syntax_error.message
+        assert syntax_error.extensions["code"] == "SYNTAX_ERROR"
+        
+        validation_error = graphql_errors[1]
+        assert "validation failed" in validation_error.message
+        assert validation_error.path == ["customers", "email"]
+        assert validation_error.extensions["details"] == "Invalid email format"
+
+    def test_objects_query_response_system_error_contract(self):
+        """Test ObjectsQueryResponse schema for system errors"""
+        from trustgraph.schema import Error
+        
+        # Create system error response
+        system_error_response = ObjectsQueryResponse(
+            error=Error(
+                type="objects-query-error",
+                message="Failed to connect to Cassandra cluster"
+            ),
+            data=None,
+            errors=[],
+            extensions={}
+        )
+        
+        # Verify system error structure
+        assert system_error_response.error is not None
+        assert system_error_response.error.type == "objects-query-error"
+        assert "Cassandra" in system_error_response.error.message
+        assert system_error_response.data is None
+        assert len(system_error_response.errors) == 0
+
+    @pytest.mark.skip(reason="Pulsar Array(Record) validation bug - Record.type() missing self argument")
+    def test_request_response_serialization_contract(self):
+        """Test that request/response can be serialized/deserialized correctly"""
+        # Create original request
+        original_request = ObjectsQueryRequest(
+            user="serialization_test",
+            collection="test_data",
+            query='{ orders(limit: 5) { id total customer { name } } }',
+            variables={"limit": "5", "status": "active"},
+            operation_name="GetRecentOrders"
+        )
+        
+        # Test request serialization using Pulsar schema
+        request_schema = AvroSchema(ObjectsQueryRequest)
+        
+        # Encode and decode request
+        encoded_request = request_schema.encode(original_request)
+        decoded_request = request_schema.decode(encoded_request)
+        
+        # Verify request round-trip
+        assert decoded_request.user == original_request.user
+        assert decoded_request.collection == original_request.collection
+        assert decoded_request.query == original_request.query
+        assert decoded_request.variables == original_request.variables
+        assert decoded_request.operation_name == original_request.operation_name
+        
+        # Create original response - work around Pulsar Array(Record) bug
+        original_response = ObjectsQueryResponse(
+            error=None,
+            data='{"orders": []}',
+            errors=[],  # Empty to avoid Pulsar validation bug
+            extensions={"rate_limit_remaining": "0"}
+        )
+        
+        # Create GraphQL error separately (for testing error structure)
+        graphql_error = GraphQLError(
+            message="Rate limit exceeded",
+            path=["orders"],
+            extensions={"code": "RATE_LIMIT", "retry_after": "60"}
+        )
+        
+        # Test response serialization
+        response_schema = AvroSchema(ObjectsQueryResponse)
+        
+        # Encode and decode response
+        encoded_response = response_schema.encode(original_response)
+        decoded_response = response_schema.decode(encoded_response)
+        
+        # Verify response round-trip (basic fields)
+        assert decoded_response.error == original_response.error
+        assert decoded_response.data == original_response.data
+        assert len(decoded_response.errors) == 0  # Empty due to Pulsar bug workaround
+        assert decoded_response.extensions["rate_limit_remaining"] == "0"
+        
+        # Verify GraphQL error structure separately
+        assert graphql_error.message == "Rate limit exceeded"
+        assert graphql_error.extensions["code"] == "RATE_LIMIT"
+        assert graphql_error.extensions["retry_after"] == "60"
+
+    def test_graphql_query_format_contract(self):
+        """Test supported GraphQL query formats"""
+        # Test basic query
+        basic_query = ObjectsQueryRequest(
+            user="test", collection="test", query='{ customers { id } }',
+            variables={}, operation_name=""
+        )
+        assert "customers" in basic_query.query
+        assert basic_query.query.strip().startswith('{')
+        assert basic_query.query.strip().endswith('}')
+        
+        # Test query with variables
+        parameterized_query = ObjectsQueryRequest(
+            user="test", collection="test", 
+            query='query GetCustomers($status: String, $limit: Int) { customers(status: $status, limit: $limit) { id name } }',
+            variables={"status": "active", "limit": "10"}, 
+            operation_name="GetCustomers"
+        )
+        assert "$status" in parameterized_query.query
+        assert "$limit" in parameterized_query.query
+        assert parameterized_query.variables["status"] == "active"
+        assert parameterized_query.operation_name == "GetCustomers"
+        
+        # Test complex nested query
+        nested_query = ObjectsQueryRequest(
+            user="test", collection="test",
+            query='''
+            {
+                customers(limit: 10) {
+                    id
+                    name
+                    email
+                    orders {
+                        order_id
+                        total
+                        items {
+                            product_name
+                            quantity
+                        }
+                    }
+                }
+            }
+            ''',
+            variables={}, operation_name=""
+        )
+        assert "customers" in nested_query.query
+        assert "orders" in nested_query.query
+        assert "items" in nested_query.query
+
+    def test_variables_type_support_contract(self):
+        """Test that various variable types are supported correctly"""
+        # Variables should support string values (as per schema definition)
+        # Note: Current schema uses Map(String()) which only supports string values
+        # This test verifies the current contract, though ideally we'd support all JSON types
+        
+        variables_test = ObjectsQueryRequest(
+            user="test", collection="test", query='{ test }',
+            variables={
+                "string_var": "test_value",
+                "numeric_var": "123",  # Numbers as strings due to Map(String()) limitation
+                "boolean_var": "true",  # Booleans as strings
+                "array_var": '["item1", "item2"]',  # Arrays as JSON strings
+                "object_var": '{"key": "value"}'  # Objects as JSON strings
+            },
+            operation_name=""
+        )
+        
+        # Verify all variables are strings (current contract limitation)
+        for key, value in variables_test.variables.items():
+            assert isinstance(value, str), f"Variable {key} should be string, got {type(value)}"
+        
+        # Verify JSON string variables can be parsed
+        assert json.loads(variables_test.variables["array_var"]) == ["item1", "item2"]
+        assert json.loads(variables_test.variables["object_var"]) == {"key": "value"}
+
+    def test_cassandra_context_fields_contract(self):
+        """Test that request contains necessary fields for Cassandra operations"""
+        # Verify request has fields needed for Cassandra keyspace/table targeting
+        request = ObjectsQueryRequest(
+            user="keyspace_name",  # Maps to Cassandra keyspace
+            collection="partition_collection",  # Used in partition key
+            query='{ objects { id } }',
+            variables={}, operation_name=""
+        )
+        
+        # These fields are required for proper Cassandra operations
+        assert request.user  # Required for keyspace identification
+        assert request.collection  # Required for partition key
+        
+        # Verify field naming follows TrustGraph patterns (matching other query services)
+        # This matches TriplesQueryRequest, DocumentEmbeddingsRequest patterns
+        assert hasattr(request, 'user')  # Same as TriplesQueryRequest.user
+        assert hasattr(request, 'collection')  # Same as TriplesQueryRequest.collection
+
+    def test_graphql_extensions_contract(self):
+        """Test GraphQL extensions field format and usage"""
+        # Extensions should support query metadata
+        response_with_extensions = ObjectsQueryResponse(
+            error=None,
+            data='{"test": "data"}',
+            errors=[],
+            extensions={
+                "execution_time": "0.142",
+                "query_complexity": "8", 
+                "cache_hit": "false",
+                "data_source": "cassandra",
+                "schema_version": "1.2.3"
+            }
+        )
+        
+        # Verify extensions structure
+        assert isinstance(response_with_extensions.extensions, dict)
+        
+        # Common extension fields that should be supported
+        expected_extensions = {
+            "execution_time", "query_complexity", "cache_hit", 
+            "data_source", "schema_version"
+        }
+        actual_extensions = set(response_with_extensions.extensions.keys())
+        assert expected_extensions.issubset(actual_extensions)
+        
+        # Verify extension values are strings (Map(String()) constraint)
+        for key, value in response_with_extensions.extensions.items():
+            assert isinstance(value, str), f"Extension {key} should be string"
+
+    def test_error_path_format_contract(self):
+        """Test GraphQL error path format and structure"""
+        # Test various path formats that can occur in GraphQL errors
+        # Note: All path segments must be strings due to Array(String()) schema constraint
+        path_test_cases = [
+            # Field error path
+            ["customers", "0", "email"],
+            # Nested field error  
+            ["customers", "0", "orders", "1", "total"],
+            # Root level error
+            ["customers"],
+            # Complex nested path
+            ["orders", "items", "2", "product", "details", "price"]
+        ]
+        
+        for path in path_test_cases:
+            error = GraphQLError(
+                message=f"Error at path {path}",
+                path=path,
+                extensions={"code": "PATH_ERROR"}
+            )
+            
+            # Verify path is array of strings/ints as per GraphQL spec
+            assert isinstance(error.path, list)
+            for segment in error.path:
+                # Path segments can be field names (strings) or array indices (ints)
+                # But our schema uses Array(String()) so all are strings
+                assert isinstance(segment, str)
+
+    def test_operation_name_usage_contract(self):
+        """Test operation_name field usage for multi-operation documents"""
+        # Test query with multiple operations
+        multi_op_query = '''
+        query GetCustomers { customers { id name } }
+        query GetOrders { orders { order_id total } }
+        '''
+        
+        # Request to execute specific operation
+        multi_op_request = ObjectsQueryRequest(
+            user="test", collection="test",
+            query=multi_op_query,
+            variables={}, 
+            operation_name="GetCustomers"
+        )
+        
+        # Verify operation name is preserved
+        assert multi_op_request.operation_name == "GetCustomers"
+        assert "GetCustomers" in multi_op_request.query
+        assert "GetOrders" in multi_op_request.query
+        
+        # Test single operation (operation_name optional)
+        single_op_request = ObjectsQueryRequest(
+            user="test", collection="test",
+            query='{ customers { id } }',
+            variables={}, operation_name=""
+        )
+        
+        # Operation name can be empty for single operations
+        assert single_op_request.operation_name == ""
--- a/tests/contract/test_structured_data_contracts.py
+++ b/tests/contract/test_structured_data_contracts.py
@ -12,7 +12,7 @@ from typing import Dict, Any

 from trustgraph.schema import (
    StructuredDataSubmission, ExtractedObject,
-    NLPToStructuredQueryRequest, NLPToStructuredQueryResponse,
+    QuestionToStructuredQueryRequest, QuestionToStructuredQueryResponse,
    StructuredQueryRequest, StructuredQueryResponse,
    StructuredObjectEmbedding, Field, RowSchema,
    Metadata, Error, Value
@ -128,41 +128,98 @@ class TestStructuredDataSchemaContracts:
        obj = ExtractedObject(
            metadata=metadata,
            schema_name="customer_records",
-            values={"id": "123", "name": "John Doe", "email": "john@example.com"},
+            values=[{"id": "123", "name": "John Doe", "email": "john@example.com"}],
            confidence=0.95,
            source_span="John Doe (john@example.com) customer ID 123"
        )

        # Assert
        assert obj.schema_name == "customer_records"
-        assert obj.values["name"] == "John Doe"
+        assert obj.values[0]["name"] == "John Doe"
        assert obj.confidence == 0.95
        assert len(obj.source_span) > 0
        assert obj.metadata.id == "extracted-obj-001"

+    def test_extracted_object_batch_contract(self):
+        """Test ExtractedObject schema contract for batched values"""
+        # Arrange
+        metadata = Metadata(
+            id="extracted-batch-001",
+            user="test_user",
+            collection="test_collection",
+            metadata=[]
+        )
+        
+        # Act - create object with multiple values
+        obj = ExtractedObject(
+            metadata=metadata,
+            schema_name="customer_records",
+            values=[
+                {"id": "123", "name": "John Doe", "email": "john@example.com"},
+                {"id": "124", "name": "Jane Smith", "email": "jane@example.com"},
+                {"id": "125", "name": "Bob Johnson", "email": "bob@example.com"}
+            ],
+            confidence=0.85,
+            source_span="Multiple customers found in document"
+        )
+
+        # Assert
+        assert obj.schema_name == "customer_records"
+        assert len(obj.values) == 3
+        assert obj.values[0]["name"] == "John Doe"
+        assert obj.values[1]["name"] == "Jane Smith" 
+        assert obj.values[2]["name"] == "Bob Johnson"
+        assert obj.values[0]["id"] == "123"
+        assert obj.values[1]["id"] == "124"
+        assert obj.values[2]["id"] == "125"
+        assert obj.confidence == 0.85
+        assert "Multiple customers" in obj.source_span
+
+    def test_extracted_object_empty_batch_contract(self):
+        """Test ExtractedObject schema contract for empty values array"""
+        # Arrange
+        metadata = Metadata(
+            id="extracted-empty-001",
+            user="test_user", 
+            collection="test_collection",
+            metadata=[]
+        )
+        
+        # Act - create object with empty values array
+        obj = ExtractedObject(
+            metadata=metadata,
+            schema_name="empty_schema",
+            values=[],
+            confidence=1.0,
+            source_span="No objects found"
+        )
+
+        # Assert
+        assert obj.schema_name == "empty_schema"
+        assert len(obj.values) == 0
+        assert obj.confidence == 1.0
+

@pytest.mark.contract
 class TestStructuredQueryServiceContracts:
    """Contract tests for structured query services"""

    def test_nlp_to_structured_query_request_contract(self):
-        """Test NLPToStructuredQueryRequest schema contract"""
+        """Test QuestionToStructuredQueryRequest schema contract"""
        # Act
-        request = NLPToStructuredQueryRequest(
-            natural_language_query="Show me all customers who registered last month",
-            max_results=100,
-            context_hints={"time_range": "last_month", "entity_type": "customer"}
+        request = QuestionToStructuredQueryRequest(
+            question="Show me all customers who registered last month",
+            max_results=100
        )

        # Assert
-        assert "customers" in request.natural_language_query
+        assert "customers" in request.question
        assert request.max_results == 100
-        assert request.context_hints["time_range"] == "last_month"

    def test_nlp_to_structured_query_response_contract(self):
-        """Test NLPToStructuredQueryResponse schema contract"""
+        """Test QuestionToStructuredQueryResponse schema contract"""
        # Act
-        response = NLPToStructuredQueryResponse(
+        response = QuestionToStructuredQueryResponse(
            error=None,
            graphql_query="query { customers(filter: {registered: {gte: \"2024-01-01\"}}) { id name email } }",
            variables={"start_date": "2024-01-01"},
@ -180,15 +237,11 @@ class TestStructuredQueryServiceContracts:
        """Test StructuredQueryRequest schema contract"""
        # Act
        request = StructuredQueryRequest(
-            query="query GetCustomers($limit: Int) { customers(limit: $limit) { id name email } }",
-            variables={"limit": "10"},
-            operation_name="GetCustomers"
+            question="Show me customers with limit 10"
        )

        # Assert
-        assert "customers" in request.query
-        assert request.variables["limit"] == "10"
-        assert request.operation_name == "GetCustomers"
+        assert "customers" in request.question

    def test_structured_query_response_contract(self):
        """Test StructuredQueryResponse schema contract"""
@ -279,7 +332,7 @@ class TestStructuredDataSerializationContracts:
        object_data = {
            "metadata": metadata,
            "schema_name": "test_schema",
-            "values": {"field1": "value1"},
+            "values": [{"field1": "value1"}],
            "confidence": 0.8,
            "source_span": "test span"
        }
@ -291,11 +344,10 @@ class TestStructuredDataSerializationContracts:
        """Test NLP query request/response serialization contract"""
        # Test request
        request_data = {
-            "natural_language_query": "test query",
-            "max_results": 10,
-            "context_hints": {}
+            "question": "test query",
+            "max_results": 10
        }
-        assert serialize_deserialize_test(NLPToStructuredQueryRequest, request_data)
+        assert serialize_deserialize_test(QuestionToStructuredQueryRequest, request_data)

        # Test response
        response_data = {
@ -305,4 +357,54 @@ class TestStructuredDataSerializationContracts:
            "detected_schemas": ["test"],
            "confidence": 0.9
        }
-        assert serialize_deserialize_test(NLPToStructuredQueryResponse, response_data)
+        assert serialize_deserialize_test(QuestionToStructuredQueryResponse, response_data)
+
+    def test_structured_query_serialization(self):
+        """Test structured query request/response serialization contract"""
+        # Test request
+        request_data = {
+            "question": "Show me all customers"
+        }
+        assert serialize_deserialize_test(StructuredQueryRequest, request_data)
+
+        # Test response
+        response_data = {
+            "error": None,
+            "data": '{"customers": [{"id": "1", "name": "John"}]}',
+            "errors": []
+        }
+        assert serialize_deserialize_test(StructuredQueryResponse, response_data)
+
+    def test_extracted_object_batch_serialization(self):
+        """Test ExtractedObject batch serialization contract"""
+        # Arrange
+        metadata = Metadata(id="test", user="user", collection="col", metadata=[])
+        batch_object_data = {
+            "metadata": metadata,
+            "schema_name": "test_schema",
+            "values": [
+                {"field1": "value1", "field2": "value2"},
+                {"field1": "value3", "field2": "value4"},
+                {"field1": "value5", "field2": "value6"}
+            ],
+            "confidence": 0.9,
+            "source_span": "batch test span"
+        }
+
+        # Act & Assert
+        assert serialize_deserialize_test(ExtractedObject, batch_object_data)
+
+    def test_extracted_object_empty_batch_serialization(self):
+        """Test ExtractedObject empty batch serialization contract"""
+        # Arrange
+        metadata = Metadata(id="test", user="user", collection="col", metadata=[])
+        empty_batch_data = {
+            "metadata": metadata,
+            "schema_name": "test_schema", 
+            "values": [],
+            "confidence": 1.0,
+            "source_span": "empty batch"
+        }
+
+        # Act & Assert
+        assert serialize_deserialize_test(ExtractedObject, empty_batch_data)