trustgraph/tests/contract/test_rows_graphql_query_contracts.py

427 lines
18 KiB
Python
Raw Normal View History

2025-09-20 16:00:37 +01:00
"""
2026-02-28 11:03:14 +00:00
Contract tests for Rows GraphQL Query Service
2025-09-20 16:00:37 +01:00
These tests verify the message contracts and schema compatibility
2026-02-28 11:03:14 +00:00
for the rows GraphQL query processor.
2025-09-20 16:00:37 +01:00
"""
import pytest
import json
from pulsar.schema import AvroSchema
2026-02-28 11:03:14 +00:00
from trustgraph.schema import RowsQueryRequest, RowsQueryResponse, GraphQLError
from trustgraph.query.rows.cassandra.service import Processor
2025-09-20 16:00:37 +01:00
@pytest.mark.contract
2026-02-28 11:03:14 +00:00
class TestRowsGraphQLQueryContracts:
2025-09-20 16:00:37 +01:00
"""Contract tests for GraphQL query service messages"""
2026-02-28 11:03:14 +00:00
def test_rows_query_request_contract(self):
"""Test RowsQueryRequest schema structure and required fields"""
2025-09-20 16:00:37 +01:00
# Create test request with all required fields
2026-02-28 11:03:14 +00:00
test_request = RowsQueryRequest(
2025-09-20 16:00:37 +01:00
user="test_user",
collection="test_collection",
query='{ customers { id name email } }',
variables={"status": "active", "limit": "10"},
operation_name="GetCustomers"
)
# Verify all required fields are present
assert hasattr(test_request, 'user')
assert hasattr(test_request, 'collection')
assert hasattr(test_request, 'query')
assert hasattr(test_request, 'variables')
assert hasattr(test_request, 'operation_name')
# Verify field types
assert isinstance(test_request.user, str)
assert isinstance(test_request.collection, str)
assert isinstance(test_request.query, str)
assert isinstance(test_request.variables, dict)
assert isinstance(test_request.operation_name, str)
# Verify content
assert test_request.user == "test_user"
assert test_request.collection == "test_collection"
assert "customers" in test_request.query
assert test_request.variables["status"] == "active"
assert test_request.operation_name == "GetCustomers"
2026-02-28 11:03:14 +00:00
def test_rows_query_request_minimal(self):
"""Test RowsQueryRequest with minimal required fields"""
2025-09-20 16:00:37 +01:00
# Create request with only essential fields
2026-02-28 11:03:14 +00:00
minimal_request = RowsQueryRequest(
2025-09-20 16:00:37 +01:00
user="user",
collection="collection",
query='{ test }',
variables={},
operation_name=""
)
# Verify minimal request is valid
assert minimal_request.user == "user"
assert minimal_request.collection == "collection"
assert minimal_request.query == '{ test }'
assert minimal_request.variables == {}
assert minimal_request.operation_name == ""
def test_graphql_error_contract(self):
"""Test GraphQLError schema structure"""
# Create test error with all fields
test_error = GraphQLError(
message="Field 'nonexistent' doesn't exist on type 'Customer'",
path=["customers", "0", "nonexistent"], # All strings per Array(String()) schema
extensions={"code": "FIELD_ERROR", "timestamp": "2024-01-01T00:00:00Z"}
)
# Verify all fields are present
assert hasattr(test_error, 'message')
assert hasattr(test_error, 'path')
assert hasattr(test_error, 'extensions')
# Verify field types
assert isinstance(test_error.message, str)
assert isinstance(test_error.path, list)
assert isinstance(test_error.extensions, dict)
# Verify content
assert "doesn't exist" in test_error.message
assert test_error.path == ["customers", "0", "nonexistent"]
assert test_error.extensions["code"] == "FIELD_ERROR"
2026-02-28 11:03:14 +00:00
def test_rows_query_response_success_contract(self):
"""Test RowsQueryResponse schema for successful queries"""
2025-09-20 16:00:37 +01:00
# Create successful response
2026-02-28 11:03:14 +00:00
success_response = RowsQueryResponse(
2025-09-20 16:00:37 +01:00
error=None,
data='{"customers": [{"id": "1", "name": "John", "email": "john@example.com"}]}',
errors=[],
extensions={"execution_time": "0.045", "query_complexity": "5"}
)
# Verify all fields are present
assert hasattr(success_response, 'error')
assert hasattr(success_response, 'data')
assert hasattr(success_response, 'errors')
assert hasattr(success_response, 'extensions')
# Verify field types
assert success_response.error is None
assert isinstance(success_response.data, str)
assert isinstance(success_response.errors, list)
assert isinstance(success_response.extensions, dict)
# Verify data can be parsed as JSON
parsed_data = json.loads(success_response.data)
assert "customers" in parsed_data
assert len(parsed_data["customers"]) == 1
assert parsed_data["customers"][0]["id"] == "1"
2026-02-28 11:03:14 +00:00
def test_rows_query_response_error_contract(self):
"""Test RowsQueryResponse schema for error cases"""
2025-09-20 16:00:37 +01:00
# Create GraphQL errors - work around Pulsar Array(Record) validation bug
# by creating a response without the problematic errors array first
2026-02-28 11:03:14 +00:00
error_response = RowsQueryResponse(
2025-09-20 16:00:37 +01:00
error=None, # System error is None - these are GraphQL errors
data=None, # No data due to errors
errors=[], # Empty errors array to avoid Pulsar bug
extensions={"execution_time": "0.012"}
)
# Manually create GraphQL errors for testing (bypassing Pulsar validation)
graphql_errors = [
GraphQLError(
message="Syntax error near 'invalid'",
path=["query"],
extensions={"code": "SYNTAX_ERROR"}
),
GraphQLError(
message="Field validation failed",
path=["customers", "email"],
extensions={"code": "VALIDATION_ERROR", "details": "Invalid email format"}
)
]
# Verify response structure (basic fields work)
assert error_response.error is None
assert error_response.data is None
assert len(error_response.errors) == 0 # Empty due to Pulsar bug workaround
assert error_response.extensions["execution_time"] == "0.012"
# Verify individual GraphQL error structure (bypassing Pulsar)
syntax_error = graphql_errors[0]
assert "Syntax error" in syntax_error.message
assert syntax_error.extensions["code"] == "SYNTAX_ERROR"
validation_error = graphql_errors[1]
assert "validation failed" in validation_error.message
assert validation_error.path == ["customers", "email"]
assert validation_error.extensions["details"] == "Invalid email format"
2026-02-28 11:03:14 +00:00
def test_rows_query_response_system_error_contract(self):
"""Test RowsQueryResponse schema for system errors"""
2025-09-20 16:00:37 +01:00
from trustgraph.schema import Error
# Create system error response
2026-02-28 11:03:14 +00:00
system_error_response = RowsQueryResponse(
2025-09-20 16:00:37 +01:00
error=Error(
2026-02-28 11:03:14 +00:00
type="rows-query-error",
2025-09-20 16:00:37 +01:00
message="Failed to connect to Cassandra cluster"
),
data=None,
errors=[],
extensions={}
)
# Verify system error structure
assert system_error_response.error is not None
2026-02-28 11:03:14 +00:00
assert system_error_response.error.type == "rows-query-error"
2025-09-20 16:00:37 +01:00
assert "Cassandra" in system_error_response.error.message
assert system_error_response.data is None
assert len(system_error_response.errors) == 0
@pytest.mark.skip(reason="Pulsar Array(Record) validation bug - Record.type() missing self argument")
def test_request_response_serialization_contract(self):
"""Test that request/response can be serialized/deserialized correctly"""
# Create original request
2026-02-28 11:03:14 +00:00
original_request = RowsQueryRequest(
2025-09-20 16:00:37 +01:00
user="serialization_test",
collection="test_data",
query='{ orders(limit: 5) { id total customer { name } } }',
variables={"limit": "5", "status": "active"},
operation_name="GetRecentOrders"
)
# Test request serialization using Pulsar schema
2026-02-28 11:03:14 +00:00
request_schema = AvroSchema(RowsQueryRequest)
2025-09-20 16:00:37 +01:00
# Encode and decode request
encoded_request = request_schema.encode(original_request)
decoded_request = request_schema.decode(encoded_request)
# Verify request round-trip
assert decoded_request.user == original_request.user
assert decoded_request.collection == original_request.collection
assert decoded_request.query == original_request.query
assert decoded_request.variables == original_request.variables
assert decoded_request.operation_name == original_request.operation_name
# Create original response - work around Pulsar Array(Record) bug
2026-02-28 11:03:14 +00:00
original_response = RowsQueryResponse(
2025-09-20 16:00:37 +01:00
error=None,
data='{"orders": []}',
errors=[], # Empty to avoid Pulsar validation bug
extensions={"rate_limit_remaining": "0"}
)
# Create GraphQL error separately (for testing error structure)
graphql_error = GraphQLError(
message="Rate limit exceeded",
path=["orders"],
extensions={"code": "RATE_LIMIT", "retry_after": "60"}
)
# Test response serialization
2026-02-28 11:03:14 +00:00
response_schema = AvroSchema(RowsQueryResponse)
2025-09-20 16:00:37 +01:00
# Encode and decode response
encoded_response = response_schema.encode(original_response)
decoded_response = response_schema.decode(encoded_response)
# Verify response round-trip (basic fields)
assert decoded_response.error == original_response.error
assert decoded_response.data == original_response.data
assert len(decoded_response.errors) == 0 # Empty due to Pulsar bug workaround
assert decoded_response.extensions["rate_limit_remaining"] == "0"
# Verify GraphQL error structure separately
assert graphql_error.message == "Rate limit exceeded"
assert graphql_error.extensions["code"] == "RATE_LIMIT"
assert graphql_error.extensions["retry_after"] == "60"
def test_graphql_query_format_contract(self):
"""Test supported GraphQL query formats"""
# Test basic query
2026-02-28 11:03:14 +00:00
basic_query = RowsQueryRequest(
2025-09-20 16:00:37 +01:00
user="test", collection="test", query='{ customers { id } }',
variables={}, operation_name=""
)
assert "customers" in basic_query.query
assert basic_query.query.strip().startswith('{')
assert basic_query.query.strip().endswith('}')
# Test query with variables
2026-02-28 11:03:14 +00:00
parameterized_query = RowsQueryRequest(
2025-09-20 16:00:37 +01:00
user="test", collection="test",
query='query GetCustomers($status: String, $limit: Int) { customers(status: $status, limit: $limit) { id name } }',
variables={"status": "active", "limit": "10"},
operation_name="GetCustomers"
)
assert "$status" in parameterized_query.query
assert "$limit" in parameterized_query.query
assert parameterized_query.variables["status"] == "active"
assert parameterized_query.operation_name == "GetCustomers"
# Test complex nested query
2026-02-28 11:03:14 +00:00
nested_query = RowsQueryRequest(
2025-09-20 16:00:37 +01:00
user="test", collection="test",
query='''
{
customers(limit: 10) {
id
name
email
orders {
order_id
total
items {
product_name
quantity
}
}
}
}
''',
variables={}, operation_name=""
)
assert "customers" in nested_query.query
assert "orders" in nested_query.query
assert "items" in nested_query.query
def test_variables_type_support_contract(self):
"""Test that various variable types are supported correctly"""
# Variables should support string values (as per schema definition)
# Note: Current schema uses Map(String()) which only supports string values
# This test verifies the current contract, though ideally we'd support all JSON types
2026-02-28 11:03:14 +00:00
variables_test = RowsQueryRequest(
2025-09-20 16:00:37 +01:00
user="test", collection="test", query='{ test }',
variables={
"string_var": "test_value",
"numeric_var": "123", # Numbers as strings due to Map(String()) limitation
"boolean_var": "true", # Booleans as strings
"array_var": '["item1", "item2"]', # Arrays as JSON strings
"object_var": '{"key": "value"}' # Objects as JSON strings
},
operation_name=""
)
# Verify all variables are strings (current contract limitation)
for key, value in variables_test.variables.items():
assert isinstance(value, str), f"Variable {key} should be string, got {type(value)}"
# Verify JSON string variables can be parsed
assert json.loads(variables_test.variables["array_var"]) == ["item1", "item2"]
assert json.loads(variables_test.variables["object_var"]) == {"key": "value"}
def test_cassandra_context_fields_contract(self):
"""Test that request contains necessary fields for Cassandra operations"""
# Verify request has fields needed for Cassandra keyspace/table targeting
2026-02-28 11:03:14 +00:00
request = RowsQueryRequest(
2025-09-20 16:00:37 +01:00
user="keyspace_name", # Maps to Cassandra keyspace
collection="partition_collection", # Used in partition key
query='{ objects { id } }',
variables={}, operation_name=""
)
# These fields are required for proper Cassandra operations
assert request.user # Required for keyspace identification
assert request.collection # Required for partition key
# Verify field naming follows TrustGraph patterns (matching other query services)
# This matches TriplesQueryRequest, DocumentEmbeddingsRequest patterns
assert hasattr(request, 'user') # Same as TriplesQueryRequest.user
assert hasattr(request, 'collection') # Same as TriplesQueryRequest.collection
def test_graphql_extensions_contract(self):
"""Test GraphQL extensions field format and usage"""
# Extensions should support query metadata
2026-02-28 11:03:14 +00:00
response_with_extensions = RowsQueryResponse(
2025-09-20 16:00:37 +01:00
error=None,
data='{"test": "data"}',
errors=[],
extensions={
"execution_time": "0.142",
"query_complexity": "8",
"cache_hit": "false",
"data_source": "cassandra",
"schema_version": "1.2.3"
}
)
# Verify extensions structure
assert isinstance(response_with_extensions.extensions, dict)
# Common extension fields that should be supported
expected_extensions = {
"execution_time", "query_complexity", "cache_hit",
"data_source", "schema_version"
}
actual_extensions = set(response_with_extensions.extensions.keys())
assert expected_extensions.issubset(actual_extensions)
# Verify extension values are strings (Map(String()) constraint)
for key, value in response_with_extensions.extensions.items():
assert isinstance(value, str), f"Extension {key} should be string"
def test_error_path_format_contract(self):
"""Test GraphQL error path format and structure"""
# Test various path formats that can occur in GraphQL errors
# Note: All path segments must be strings due to Array(String()) schema constraint
path_test_cases = [
# Field error path
["customers", "0", "email"],
# Nested field error
["customers", "0", "orders", "1", "total"],
# Root level error
["customers"],
# Complex nested path
["orders", "items", "2", "product", "details", "price"]
]
for path in path_test_cases:
error = GraphQLError(
message=f"Error at path {path}",
path=path,
extensions={"code": "PATH_ERROR"}
)
# Verify path is array of strings/ints as per GraphQL spec
assert isinstance(error.path, list)
for segment in error.path:
# Path segments can be field names (strings) or array indices (ints)
# But our schema uses Array(String()) so all are strings
assert isinstance(segment, str)
def test_operation_name_usage_contract(self):
"""Test operation_name field usage for multi-operation documents"""
# Test query with multiple operations
multi_op_query = '''
query GetCustomers { customers { id name } }
query GetOrders { orders { order_id total } }
'''
# Request to execute specific operation
2026-02-28 11:03:14 +00:00
multi_op_request = RowsQueryRequest(
2025-09-20 16:00:37 +01:00
user="test", collection="test",
query=multi_op_query,
variables={},
operation_name="GetCustomers"
)
# Verify operation name is preserved
assert multi_op_request.operation_name == "GetCustomers"
assert "GetCustomers" in multi_op_request.query
assert "GetOrders" in multi_op_request.query
# Test single operation (operation_name optional)
2026-02-28 11:03:14 +00:00
single_op_request = RowsQueryRequest(
2025-09-20 16:00:37 +01:00
user="test", collection="test",
query='{ customers { id } }',
variables={}, operation_name=""
)
# Operation name can be empty for single operations
assert single_op_request.operation_name == ""