Release 1.4 -> master (#524)

Catch up
This commit is contained in:
cybermaggedon 2025-09-20 16:00:37 +01:00 committed by GitHub
parent a8e437fc7f
commit 6c7af8789d
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
216 changed files with 31360 additions and 1611 deletions

View file

@ -82,8 +82,8 @@ def sample_message_data():
},
"AgentRequest": {
"question": "What is machine learning?",
"plan": "",
"state": "",
"group": [],
"history": []
},
"AgentResponse": {

View file

@ -0,0 +1,261 @@
"""
Contract tests for document embeddings message schemas and translators
Ensures that message formats remain consistent across services
"""
import pytest
from unittest.mock import MagicMock
from trustgraph.schema import DocumentEmbeddingsRequest, DocumentEmbeddingsResponse, Error
from trustgraph.messaging.translators.embeddings_query import (
DocumentEmbeddingsRequestTranslator,
DocumentEmbeddingsResponseTranslator
)
class TestDocumentEmbeddingsRequestContract:
"""Test DocumentEmbeddingsRequest schema contract"""
def test_request_schema_fields(self):
"""Test that DocumentEmbeddingsRequest has expected fields"""
# Create a request
request = DocumentEmbeddingsRequest(
vectors=[[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]],
limit=10,
user="test_user",
collection="test_collection"
)
# Verify all expected fields exist
assert hasattr(request, 'vectors')
assert hasattr(request, 'limit')
assert hasattr(request, 'user')
assert hasattr(request, 'collection')
# Verify field values
assert request.vectors == [[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]]
assert request.limit == 10
assert request.user == "test_user"
assert request.collection == "test_collection"
def test_request_translator_to_pulsar(self):
"""Test request translator converts dict to Pulsar schema"""
translator = DocumentEmbeddingsRequestTranslator()
data = {
"vectors": [[0.1, 0.2], [0.3, 0.4]],
"limit": 5,
"user": "custom_user",
"collection": "custom_collection"
}
result = translator.to_pulsar(data)
assert isinstance(result, DocumentEmbeddingsRequest)
assert result.vectors == [[0.1, 0.2], [0.3, 0.4]]
assert result.limit == 5
assert result.user == "custom_user"
assert result.collection == "custom_collection"
def test_request_translator_to_pulsar_with_defaults(self):
"""Test request translator uses correct defaults"""
translator = DocumentEmbeddingsRequestTranslator()
data = {
"vectors": [[0.1, 0.2]]
# No limit, user, or collection provided
}
result = translator.to_pulsar(data)
assert isinstance(result, DocumentEmbeddingsRequest)
assert result.vectors == [[0.1, 0.2]]
assert result.limit == 10 # Default
assert result.user == "trustgraph" # Default
assert result.collection == "default" # Default
def test_request_translator_from_pulsar(self):
"""Test request translator converts Pulsar schema to dict"""
translator = DocumentEmbeddingsRequestTranslator()
request = DocumentEmbeddingsRequest(
vectors=[[0.5, 0.6]],
limit=20,
user="test_user",
collection="test_collection"
)
result = translator.from_pulsar(request)
assert isinstance(result, dict)
assert result["vectors"] == [[0.5, 0.6]]
assert result["limit"] == 20
assert result["user"] == "test_user"
assert result["collection"] == "test_collection"
class TestDocumentEmbeddingsResponseContract:
"""Test DocumentEmbeddingsResponse schema contract"""
def test_response_schema_fields(self):
"""Test that DocumentEmbeddingsResponse has expected fields"""
# Create a response with chunks
response = DocumentEmbeddingsResponse(
error=None,
chunks=["chunk1", "chunk2", "chunk3"]
)
# Verify all expected fields exist
assert hasattr(response, 'error')
assert hasattr(response, 'chunks')
# Verify field values
assert response.error is None
assert response.chunks == ["chunk1", "chunk2", "chunk3"]
def test_response_schema_with_error(self):
"""Test response schema with error"""
error = Error(
type="query_error",
message="Database connection failed"
)
response = DocumentEmbeddingsResponse(
error=error,
chunks=None
)
assert response.error == error
assert response.chunks is None
def test_response_translator_from_pulsar_with_chunks(self):
"""Test response translator converts Pulsar schema with chunks to dict"""
translator = DocumentEmbeddingsResponseTranslator()
response = DocumentEmbeddingsResponse(
error=None,
chunks=["doc1", "doc2", "doc3"]
)
result = translator.from_pulsar(response)
assert isinstance(result, dict)
assert "chunks" in result
assert result["chunks"] == ["doc1", "doc2", "doc3"]
def test_response_translator_from_pulsar_with_bytes(self):
"""Test response translator handles byte chunks correctly"""
translator = DocumentEmbeddingsResponseTranslator()
response = MagicMock()
response.chunks = [b"byte_chunk1", b"byte_chunk2"]
result = translator.from_pulsar(response)
assert isinstance(result, dict)
assert "chunks" in result
assert result["chunks"] == ["byte_chunk1", "byte_chunk2"]
def test_response_translator_from_pulsar_with_empty_chunks(self):
"""Test response translator handles empty chunks list"""
translator = DocumentEmbeddingsResponseTranslator()
response = MagicMock()
response.chunks = []
result = translator.from_pulsar(response)
assert isinstance(result, dict)
assert "chunks" in result
assert result["chunks"] == []
def test_response_translator_from_pulsar_with_none_chunks(self):
"""Test response translator handles None chunks"""
translator = DocumentEmbeddingsResponseTranslator()
response = MagicMock()
response.chunks = None
result = translator.from_pulsar(response)
assert isinstance(result, dict)
assert "chunks" not in result or result.get("chunks") is None
def test_response_translator_from_response_with_completion(self):
"""Test response translator with completion flag"""
translator = DocumentEmbeddingsResponseTranslator()
response = DocumentEmbeddingsResponse(
error=None,
chunks=["chunk1", "chunk2"]
)
result, is_final = translator.from_response_with_completion(response)
assert isinstance(result, dict)
assert "chunks" in result
assert result["chunks"] == ["chunk1", "chunk2"]
assert is_final is True # Document embeddings responses are always final
def test_response_translator_to_pulsar_not_implemented(self):
"""Test that to_pulsar raises NotImplementedError for responses"""
translator = DocumentEmbeddingsResponseTranslator()
with pytest.raises(NotImplementedError):
translator.to_pulsar({"chunks": ["test"]})
class TestDocumentEmbeddingsMessageCompatibility:
"""Test compatibility between request and response messages"""
def test_request_response_flow(self):
"""Test complete request-response flow maintains data integrity"""
# Create request
request_data = {
"vectors": [[0.1, 0.2, 0.3]],
"limit": 5,
"user": "test_user",
"collection": "test_collection"
}
# Convert to Pulsar request
req_translator = DocumentEmbeddingsRequestTranslator()
pulsar_request = req_translator.to_pulsar(request_data)
# Simulate service processing and creating response
response = DocumentEmbeddingsResponse(
error=None,
chunks=["relevant chunk 1", "relevant chunk 2"]
)
# Convert response back to dict
resp_translator = DocumentEmbeddingsResponseTranslator()
response_data = resp_translator.from_pulsar(response)
# Verify data integrity
assert isinstance(pulsar_request, DocumentEmbeddingsRequest)
assert isinstance(response_data, dict)
assert "chunks" in response_data
assert len(response_data["chunks"]) == 2
def test_error_response_flow(self):
"""Test error response flow"""
# Create error response
error = Error(
type="vector_db_error",
message="Collection not found"
)
response = DocumentEmbeddingsResponse(
error=error,
chunks=None
)
# Convert response to dict
translator = DocumentEmbeddingsResponseTranslator()
response_data = translator.from_pulsar(response)
# Verify error handling
assert isinstance(response_data, dict)
# The translator doesn't include error in the dict, only chunks
assert "chunks" not in response_data or response_data.get("chunks") is None

View file

@ -20,7 +20,7 @@ from trustgraph.schema import (
GraphEmbeddings, EntityEmbeddings,
Metadata, Field, RowSchema,
StructuredDataSubmission, ExtractedObject,
NLPToStructuredQueryRequest, NLPToStructuredQueryResponse,
QuestionToStructuredQueryRequest, QuestionToStructuredQueryResponse,
StructuredQueryRequest, StructuredQueryResponse,
StructuredObjectEmbedding
)
@ -198,8 +198,8 @@ class TestAgentMessageContracts:
# Test required fields
request = AgentRequest(**request_data)
assert hasattr(request, 'question')
assert hasattr(request, 'plan')
assert hasattr(request, 'state')
assert hasattr(request, 'group')
assert hasattr(request, 'history')
def test_agent_response_schema_contract(self, sample_message_data):

View file

@ -30,11 +30,11 @@ class TestObjectsCassandraContracts:
test_object = ExtractedObject(
metadata=test_metadata,
schema_name="customer_records",
values={
values=[{
"customer_id": "CUST123",
"name": "Test Customer",
"email": "test@example.com"
},
}],
confidence=0.95,
source_span="Customer data from document..."
)
@ -54,7 +54,7 @@ class TestObjectsCassandraContracts:
# Verify types
assert isinstance(test_object.schema_name, str)
assert isinstance(test_object.values, dict)
assert isinstance(test_object.values, list)
assert isinstance(test_object.confidence, float)
assert isinstance(test_object.source_span, str)
@ -200,7 +200,7 @@ class TestObjectsCassandraContracts:
metadata=[]
),
schema_name="test_schema",
values={"field1": "value1", "field2": "123"},
values=[{"field1": "value1", "field2": "123"}],
confidence=0.85,
source_span="Test span"
)
@ -292,7 +292,7 @@ class TestObjectsCassandraContracts:
metadata=[{"key": "value"}]
),
schema_name="table789", # -> table name
values={"field": "value"},
values=[{"field": "value"}],
confidence=0.9,
source_span="Source"
)
@ -303,4 +303,215 @@ class TestObjectsCassandraContracts:
# - metadata.collection -> Part of primary key
assert test_obj.metadata.user # Required for keyspace
assert test_obj.schema_name # Required for table
assert test_obj.metadata.collection # Required for partition key
assert test_obj.metadata.collection # Required for partition key
@pytest.mark.contract
class TestObjectsCassandraContractsBatch:
"""Contract tests for Cassandra object storage batch processing"""
def test_extracted_object_batch_input_contract(self):
"""Test that batched ExtractedObject schema matches expected input format"""
# Create test object with multiple values in batch
test_metadata = Metadata(
id="batch-doc-001",
user="test_user",
collection="test_collection",
metadata=[]
)
batch_object = ExtractedObject(
metadata=test_metadata,
schema_name="customer_records",
values=[
{
"customer_id": "CUST123",
"name": "Test Customer 1",
"email": "test1@example.com"
},
{
"customer_id": "CUST124",
"name": "Test Customer 2",
"email": "test2@example.com"
},
{
"customer_id": "CUST125",
"name": "Test Customer 3",
"email": "test3@example.com"
}
],
confidence=0.88,
source_span="Multiple customer data from document..."
)
# Verify batch structure
assert hasattr(batch_object, 'values')
assert isinstance(batch_object.values, list)
assert len(batch_object.values) == 3
# Verify each batch item is a dict
for i, batch_item in enumerate(batch_object.values):
assert isinstance(batch_item, dict)
assert "customer_id" in batch_item
assert "name" in batch_item
assert "email" in batch_item
assert batch_item["customer_id"] == f"CUST12{3+i}"
assert f"Test Customer {i+1}" in batch_item["name"]
def test_extracted_object_empty_batch_contract(self):
"""Test empty batch ExtractedObject contract"""
test_metadata = Metadata(
id="empty-batch-001",
user="test_user",
collection="test_collection",
metadata=[]
)
empty_batch_object = ExtractedObject(
metadata=test_metadata,
schema_name="empty_schema",
values=[], # Empty batch
confidence=1.0,
source_span="No objects found in document"
)
# Verify empty batch structure
assert hasattr(empty_batch_object, 'values')
assert isinstance(empty_batch_object.values, list)
assert len(empty_batch_object.values) == 0
assert empty_batch_object.confidence == 1.0
def test_extracted_object_single_item_batch_contract(self):
"""Test single-item batch (backward compatibility) contract"""
test_metadata = Metadata(
id="single-batch-001",
user="test_user",
collection="test_collection",
metadata=[]
)
single_batch_object = ExtractedObject(
metadata=test_metadata,
schema_name="customer_records",
values=[{ # Array with single item for backward compatibility
"customer_id": "CUST999",
"name": "Single Customer",
"email": "single@example.com"
}],
confidence=0.95,
source_span="Single customer data from document..."
)
# Verify single-item batch structure
assert isinstance(single_batch_object.values, list)
assert len(single_batch_object.values) == 1
assert isinstance(single_batch_object.values[0], dict)
assert single_batch_object.values[0]["customer_id"] == "CUST999"
def test_extracted_object_batch_serialization_contract(self):
"""Test that batched ExtractedObject can be serialized/deserialized correctly"""
# Create batch object
original = ExtractedObject(
metadata=Metadata(
id="batch-serial-001",
user="test_user",
collection="test_coll",
metadata=[]
),
schema_name="test_schema",
values=[
{"field1": "value1", "field2": "123"},
{"field1": "value2", "field2": "456"},
{"field1": "value3", "field2": "789"}
],
confidence=0.92,
source_span="Batch test span"
)
# Test serialization using schema
schema = AvroSchema(ExtractedObject)
# Encode and decode
encoded = schema.encode(original)
decoded = schema.decode(encoded)
# Verify round-trip for batch
assert decoded.metadata.id == original.metadata.id
assert decoded.metadata.user == original.metadata.user
assert decoded.metadata.collection == original.metadata.collection
assert decoded.schema_name == original.schema_name
assert len(decoded.values) == len(original.values)
assert len(decoded.values) == 3
# Verify each batch item
for i in range(3):
assert decoded.values[i] == original.values[i]
assert decoded.values[i]["field1"] == f"value{i+1}"
assert decoded.values[i]["field2"] == f"{123 + i*333}"
assert decoded.confidence == original.confidence
assert decoded.source_span == original.source_span
def test_batch_processing_field_validation_contract(self):
"""Test that batch processing validates field consistency"""
# All batch items should have consistent field structure
# This is a contract that the application should enforce
# Valid batch - all items have same fields
valid_batch_values = [
{"id": "1", "name": "Item 1", "value": "100"},
{"id": "2", "name": "Item 2", "value": "200"},
{"id": "3", "name": "Item 3", "value": "300"}
]
# Each item has the same field structure
field_sets = [set(item.keys()) for item in valid_batch_values]
assert all(fields == field_sets[0] for fields in field_sets), "All batch items should have consistent fields"
# Invalid batch - inconsistent fields (this would be caught by application logic)
invalid_batch_values = [
{"id": "1", "name": "Item 1", "value": "100"},
{"id": "2", "name": "Item 2"}, # Missing 'value' field
{"id": "3", "name": "Item 3", "value": "300", "extra": "field"} # Extra field
]
# Demonstrate the inconsistency
invalid_field_sets = [set(item.keys()) for item in invalid_batch_values]
assert not all(fields == invalid_field_sets[0] for fields in invalid_field_sets), "Invalid batch should have inconsistent fields"
def test_batch_storage_partition_key_contract(self):
"""Test that batch objects maintain partition key consistency"""
# In Cassandra storage, all objects in a batch should:
# 1. Belong to the same collection (partition key component)
# 2. Have unique primary keys within the batch
# 3. Be stored in the same keyspace (user)
test_metadata = Metadata(
id="partition-test-001",
user="consistent_user", # Same keyspace
collection="consistent_collection", # Same partition
metadata=[]
)
batch_object = ExtractedObject(
metadata=test_metadata,
schema_name="partition_test",
values=[
{"id": "pk1", "data": "data1"}, # Unique primary key
{"id": "pk2", "data": "data2"}, # Unique primary key
{"id": "pk3", "data": "data3"} # Unique primary key
],
confidence=0.95,
source_span="Partition consistency test"
)
# Verify consistency contract
assert batch_object.metadata.user # Must have user for keyspace
assert batch_object.metadata.collection # Must have collection for partition key
# Verify unique primary keys in batch
primary_keys = [item["id"] for item in batch_object.values]
assert len(primary_keys) == len(set(primary_keys)), "Primary keys must be unique within batch"
# All batch items will be stored in same keyspace and partition
# This is enforced by the metadata.user and metadata.collection being shared

View file

@ -0,0 +1,427 @@
"""
Contract tests for Objects GraphQL Query Service
These tests verify the message contracts and schema compatibility
for the objects GraphQL query processor.
"""
import pytest
import json
from pulsar.schema import AvroSchema
from trustgraph.schema import ObjectsQueryRequest, ObjectsQueryResponse, GraphQLError
from trustgraph.query.objects.cassandra.service import Processor
@pytest.mark.contract
class TestObjectsGraphQLQueryContracts:
"""Contract tests for GraphQL query service messages"""
def test_objects_query_request_contract(self):
"""Test ObjectsQueryRequest schema structure and required fields"""
# Create test request with all required fields
test_request = ObjectsQueryRequest(
user="test_user",
collection="test_collection",
query='{ customers { id name email } }',
variables={"status": "active", "limit": "10"},
operation_name="GetCustomers"
)
# Verify all required fields are present
assert hasattr(test_request, 'user')
assert hasattr(test_request, 'collection')
assert hasattr(test_request, 'query')
assert hasattr(test_request, 'variables')
assert hasattr(test_request, 'operation_name')
# Verify field types
assert isinstance(test_request.user, str)
assert isinstance(test_request.collection, str)
assert isinstance(test_request.query, str)
assert isinstance(test_request.variables, dict)
assert isinstance(test_request.operation_name, str)
# Verify content
assert test_request.user == "test_user"
assert test_request.collection == "test_collection"
assert "customers" in test_request.query
assert test_request.variables["status"] == "active"
assert test_request.operation_name == "GetCustomers"
def test_objects_query_request_minimal(self):
"""Test ObjectsQueryRequest with minimal required fields"""
# Create request with only essential fields
minimal_request = ObjectsQueryRequest(
user="user",
collection="collection",
query='{ test }',
variables={},
operation_name=""
)
# Verify minimal request is valid
assert minimal_request.user == "user"
assert minimal_request.collection == "collection"
assert minimal_request.query == '{ test }'
assert minimal_request.variables == {}
assert minimal_request.operation_name == ""
def test_graphql_error_contract(self):
"""Test GraphQLError schema structure"""
# Create test error with all fields
test_error = GraphQLError(
message="Field 'nonexistent' doesn't exist on type 'Customer'",
path=["customers", "0", "nonexistent"], # All strings per Array(String()) schema
extensions={"code": "FIELD_ERROR", "timestamp": "2024-01-01T00:00:00Z"}
)
# Verify all fields are present
assert hasattr(test_error, 'message')
assert hasattr(test_error, 'path')
assert hasattr(test_error, 'extensions')
# Verify field types
assert isinstance(test_error.message, str)
assert isinstance(test_error.path, list)
assert isinstance(test_error.extensions, dict)
# Verify content
assert "doesn't exist" in test_error.message
assert test_error.path == ["customers", "0", "nonexistent"]
assert test_error.extensions["code"] == "FIELD_ERROR"
def test_objects_query_response_success_contract(self):
"""Test ObjectsQueryResponse schema for successful queries"""
# Create successful response
success_response = ObjectsQueryResponse(
error=None,
data='{"customers": [{"id": "1", "name": "John", "email": "john@example.com"}]}',
errors=[],
extensions={"execution_time": "0.045", "query_complexity": "5"}
)
# Verify all fields are present
assert hasattr(success_response, 'error')
assert hasattr(success_response, 'data')
assert hasattr(success_response, 'errors')
assert hasattr(success_response, 'extensions')
# Verify field types
assert success_response.error is None
assert isinstance(success_response.data, str)
assert isinstance(success_response.errors, list)
assert isinstance(success_response.extensions, dict)
# Verify data can be parsed as JSON
parsed_data = json.loads(success_response.data)
assert "customers" in parsed_data
assert len(parsed_data["customers"]) == 1
assert parsed_data["customers"][0]["id"] == "1"
def test_objects_query_response_error_contract(self):
"""Test ObjectsQueryResponse schema for error cases"""
# Create GraphQL errors - work around Pulsar Array(Record) validation bug
# by creating a response without the problematic errors array first
error_response = ObjectsQueryResponse(
error=None, # System error is None - these are GraphQL errors
data=None, # No data due to errors
errors=[], # Empty errors array to avoid Pulsar bug
extensions={"execution_time": "0.012"}
)
# Manually create GraphQL errors for testing (bypassing Pulsar validation)
graphql_errors = [
GraphQLError(
message="Syntax error near 'invalid'",
path=["query"],
extensions={"code": "SYNTAX_ERROR"}
),
GraphQLError(
message="Field validation failed",
path=["customers", "email"],
extensions={"code": "VALIDATION_ERROR", "details": "Invalid email format"}
)
]
# Verify response structure (basic fields work)
assert error_response.error is None
assert error_response.data is None
assert len(error_response.errors) == 0 # Empty due to Pulsar bug workaround
assert error_response.extensions["execution_time"] == "0.012"
# Verify individual GraphQL error structure (bypassing Pulsar)
syntax_error = graphql_errors[0]
assert "Syntax error" in syntax_error.message
assert syntax_error.extensions["code"] == "SYNTAX_ERROR"
validation_error = graphql_errors[1]
assert "validation failed" in validation_error.message
assert validation_error.path == ["customers", "email"]
assert validation_error.extensions["details"] == "Invalid email format"
def test_objects_query_response_system_error_contract(self):
"""Test ObjectsQueryResponse schema for system errors"""
from trustgraph.schema import Error
# Create system error response
system_error_response = ObjectsQueryResponse(
error=Error(
type="objects-query-error",
message="Failed to connect to Cassandra cluster"
),
data=None,
errors=[],
extensions={}
)
# Verify system error structure
assert system_error_response.error is not None
assert system_error_response.error.type == "objects-query-error"
assert "Cassandra" in system_error_response.error.message
assert system_error_response.data is None
assert len(system_error_response.errors) == 0
@pytest.mark.skip(reason="Pulsar Array(Record) validation bug - Record.type() missing self argument")
def test_request_response_serialization_contract(self):
"""Test that request/response can be serialized/deserialized correctly"""
# Create original request
original_request = ObjectsQueryRequest(
user="serialization_test",
collection="test_data",
query='{ orders(limit: 5) { id total customer { name } } }',
variables={"limit": "5", "status": "active"},
operation_name="GetRecentOrders"
)
# Test request serialization using Pulsar schema
request_schema = AvroSchema(ObjectsQueryRequest)
# Encode and decode request
encoded_request = request_schema.encode(original_request)
decoded_request = request_schema.decode(encoded_request)
# Verify request round-trip
assert decoded_request.user == original_request.user
assert decoded_request.collection == original_request.collection
assert decoded_request.query == original_request.query
assert decoded_request.variables == original_request.variables
assert decoded_request.operation_name == original_request.operation_name
# Create original response - work around Pulsar Array(Record) bug
original_response = ObjectsQueryResponse(
error=None,
data='{"orders": []}',
errors=[], # Empty to avoid Pulsar validation bug
extensions={"rate_limit_remaining": "0"}
)
# Create GraphQL error separately (for testing error structure)
graphql_error = GraphQLError(
message="Rate limit exceeded",
path=["orders"],
extensions={"code": "RATE_LIMIT", "retry_after": "60"}
)
# Test response serialization
response_schema = AvroSchema(ObjectsQueryResponse)
# Encode and decode response
encoded_response = response_schema.encode(original_response)
decoded_response = response_schema.decode(encoded_response)
# Verify response round-trip (basic fields)
assert decoded_response.error == original_response.error
assert decoded_response.data == original_response.data
assert len(decoded_response.errors) == 0 # Empty due to Pulsar bug workaround
assert decoded_response.extensions["rate_limit_remaining"] == "0"
# Verify GraphQL error structure separately
assert graphql_error.message == "Rate limit exceeded"
assert graphql_error.extensions["code"] == "RATE_LIMIT"
assert graphql_error.extensions["retry_after"] == "60"
def test_graphql_query_format_contract(self):
"""Test supported GraphQL query formats"""
# Test basic query
basic_query = ObjectsQueryRequest(
user="test", collection="test", query='{ customers { id } }',
variables={}, operation_name=""
)
assert "customers" in basic_query.query
assert basic_query.query.strip().startswith('{')
assert basic_query.query.strip().endswith('}')
# Test query with variables
parameterized_query = ObjectsQueryRequest(
user="test", collection="test",
query='query GetCustomers($status: String, $limit: Int) { customers(status: $status, limit: $limit) { id name } }',
variables={"status": "active", "limit": "10"},
operation_name="GetCustomers"
)
assert "$status" in parameterized_query.query
assert "$limit" in parameterized_query.query
assert parameterized_query.variables["status"] == "active"
assert parameterized_query.operation_name == "GetCustomers"
# Test complex nested query
nested_query = ObjectsQueryRequest(
user="test", collection="test",
query='''
{
customers(limit: 10) {
id
name
email
orders {
order_id
total
items {
product_name
quantity
}
}
}
}
''',
variables={}, operation_name=""
)
assert "customers" in nested_query.query
assert "orders" in nested_query.query
assert "items" in nested_query.query
def test_variables_type_support_contract(self):
"""Test that various variable types are supported correctly"""
# Variables should support string values (as per schema definition)
# Note: Current schema uses Map(String()) which only supports string values
# This test verifies the current contract, though ideally we'd support all JSON types
variables_test = ObjectsQueryRequest(
user="test", collection="test", query='{ test }',
variables={
"string_var": "test_value",
"numeric_var": "123", # Numbers as strings due to Map(String()) limitation
"boolean_var": "true", # Booleans as strings
"array_var": '["item1", "item2"]', # Arrays as JSON strings
"object_var": '{"key": "value"}' # Objects as JSON strings
},
operation_name=""
)
# Verify all variables are strings (current contract limitation)
for key, value in variables_test.variables.items():
assert isinstance(value, str), f"Variable {key} should be string, got {type(value)}"
# Verify JSON string variables can be parsed
assert json.loads(variables_test.variables["array_var"]) == ["item1", "item2"]
assert json.loads(variables_test.variables["object_var"]) == {"key": "value"}
def test_cassandra_context_fields_contract(self):
"""Test that request contains necessary fields for Cassandra operations"""
# Verify request has fields needed for Cassandra keyspace/table targeting
request = ObjectsQueryRequest(
user="keyspace_name", # Maps to Cassandra keyspace
collection="partition_collection", # Used in partition key
query='{ objects { id } }',
variables={}, operation_name=""
)
# These fields are required for proper Cassandra operations
assert request.user # Required for keyspace identification
assert request.collection # Required for partition key
# Verify field naming follows TrustGraph patterns (matching other query services)
# This matches TriplesQueryRequest, DocumentEmbeddingsRequest patterns
assert hasattr(request, 'user') # Same as TriplesQueryRequest.user
assert hasattr(request, 'collection') # Same as TriplesQueryRequest.collection
def test_graphql_extensions_contract(self):
"""Test GraphQL extensions field format and usage"""
# Extensions should support query metadata
response_with_extensions = ObjectsQueryResponse(
error=None,
data='{"test": "data"}',
errors=[],
extensions={
"execution_time": "0.142",
"query_complexity": "8",
"cache_hit": "false",
"data_source": "cassandra",
"schema_version": "1.2.3"
}
)
# Verify extensions structure
assert isinstance(response_with_extensions.extensions, dict)
# Common extension fields that should be supported
expected_extensions = {
"execution_time", "query_complexity", "cache_hit",
"data_source", "schema_version"
}
actual_extensions = set(response_with_extensions.extensions.keys())
assert expected_extensions.issubset(actual_extensions)
# Verify extension values are strings (Map(String()) constraint)
for key, value in response_with_extensions.extensions.items():
assert isinstance(value, str), f"Extension {key} should be string"
def test_error_path_format_contract(self):
"""Test GraphQL error path format and structure"""
# Test various path formats that can occur in GraphQL errors
# Note: All path segments must be strings due to Array(String()) schema constraint
path_test_cases = [
# Field error path
["customers", "0", "email"],
# Nested field error
["customers", "0", "orders", "1", "total"],
# Root level error
["customers"],
# Complex nested path
["orders", "items", "2", "product", "details", "price"]
]
for path in path_test_cases:
error = GraphQLError(
message=f"Error at path {path}",
path=path,
extensions={"code": "PATH_ERROR"}
)
# Verify path is array of strings/ints as per GraphQL spec
assert isinstance(error.path, list)
for segment in error.path:
# Path segments can be field names (strings) or array indices (ints)
# But our schema uses Array(String()) so all are strings
assert isinstance(segment, str)
def test_operation_name_usage_contract(self):
"""Test operation_name field usage for multi-operation documents"""
# Test query with multiple operations
multi_op_query = '''
query GetCustomers { customers { id name } }
query GetOrders { orders { order_id total } }
'''
# Request to execute specific operation
multi_op_request = ObjectsQueryRequest(
user="test", collection="test",
query=multi_op_query,
variables={},
operation_name="GetCustomers"
)
# Verify operation name is preserved
assert multi_op_request.operation_name == "GetCustomers"
assert "GetCustomers" in multi_op_request.query
assert "GetOrders" in multi_op_request.query
# Test single operation (operation_name optional)
single_op_request = ObjectsQueryRequest(
user="test", collection="test",
query='{ customers { id } }',
variables={}, operation_name=""
)
# Operation name can be empty for single operations
assert single_op_request.operation_name == ""

View file

@ -12,7 +12,7 @@ from typing import Dict, Any
from trustgraph.schema import (
StructuredDataSubmission, ExtractedObject,
NLPToStructuredQueryRequest, NLPToStructuredQueryResponse,
QuestionToStructuredQueryRequest, QuestionToStructuredQueryResponse,
StructuredQueryRequest, StructuredQueryResponse,
StructuredObjectEmbedding, Field, RowSchema,
Metadata, Error, Value
@ -128,41 +128,98 @@ class TestStructuredDataSchemaContracts:
obj = ExtractedObject(
metadata=metadata,
schema_name="customer_records",
values={"id": "123", "name": "John Doe", "email": "john@example.com"},
values=[{"id": "123", "name": "John Doe", "email": "john@example.com"}],
confidence=0.95,
source_span="John Doe (john@example.com) customer ID 123"
)
# Assert
assert obj.schema_name == "customer_records"
assert obj.values["name"] == "John Doe"
assert obj.values[0]["name"] == "John Doe"
assert obj.confidence == 0.95
assert len(obj.source_span) > 0
assert obj.metadata.id == "extracted-obj-001"
def test_extracted_object_batch_contract(self):
"""Test ExtractedObject schema contract for batched values"""
# Arrange
metadata = Metadata(
id="extracted-batch-001",
user="test_user",
collection="test_collection",
metadata=[]
)
# Act - create object with multiple values
obj = ExtractedObject(
metadata=metadata,
schema_name="customer_records",
values=[
{"id": "123", "name": "John Doe", "email": "john@example.com"},
{"id": "124", "name": "Jane Smith", "email": "jane@example.com"},
{"id": "125", "name": "Bob Johnson", "email": "bob@example.com"}
],
confidence=0.85,
source_span="Multiple customers found in document"
)
# Assert
assert obj.schema_name == "customer_records"
assert len(obj.values) == 3
assert obj.values[0]["name"] == "John Doe"
assert obj.values[1]["name"] == "Jane Smith"
assert obj.values[2]["name"] == "Bob Johnson"
assert obj.values[0]["id"] == "123"
assert obj.values[1]["id"] == "124"
assert obj.values[2]["id"] == "125"
assert obj.confidence == 0.85
assert "Multiple customers" in obj.source_span
def test_extracted_object_empty_batch_contract(self):
"""Test ExtractedObject schema contract for empty values array"""
# Arrange
metadata = Metadata(
id="extracted-empty-001",
user="test_user",
collection="test_collection",
metadata=[]
)
# Act - create object with empty values array
obj = ExtractedObject(
metadata=metadata,
schema_name="empty_schema",
values=[],
confidence=1.0,
source_span="No objects found"
)
# Assert
assert obj.schema_name == "empty_schema"
assert len(obj.values) == 0
assert obj.confidence == 1.0
@pytest.mark.contract
class TestStructuredQueryServiceContracts:
"""Contract tests for structured query services"""
def test_nlp_to_structured_query_request_contract(self):
"""Test NLPToStructuredQueryRequest schema contract"""
"""Test QuestionToStructuredQueryRequest schema contract"""
# Act
request = NLPToStructuredQueryRequest(
natural_language_query="Show me all customers who registered last month",
max_results=100,
context_hints={"time_range": "last_month", "entity_type": "customer"}
request = QuestionToStructuredQueryRequest(
question="Show me all customers who registered last month",
max_results=100
)
# Assert
assert "customers" in request.natural_language_query
assert "customers" in request.question
assert request.max_results == 100
assert request.context_hints["time_range"] == "last_month"
def test_nlp_to_structured_query_response_contract(self):
"""Test NLPToStructuredQueryResponse schema contract"""
"""Test QuestionToStructuredQueryResponse schema contract"""
# Act
response = NLPToStructuredQueryResponse(
response = QuestionToStructuredQueryResponse(
error=None,
graphql_query="query { customers(filter: {registered: {gte: \"2024-01-01\"}}) { id name email } }",
variables={"start_date": "2024-01-01"},
@ -180,15 +237,11 @@ class TestStructuredQueryServiceContracts:
"""Test StructuredQueryRequest schema contract"""
# Act
request = StructuredQueryRequest(
query="query GetCustomers($limit: Int) { customers(limit: $limit) { id name email } }",
variables={"limit": "10"},
operation_name="GetCustomers"
question="Show me customers with limit 10"
)
# Assert
assert "customers" in request.query
assert request.variables["limit"] == "10"
assert request.operation_name == "GetCustomers"
assert "customers" in request.question
def test_structured_query_response_contract(self):
"""Test StructuredQueryResponse schema contract"""
@ -279,7 +332,7 @@ class TestStructuredDataSerializationContracts:
object_data = {
"metadata": metadata,
"schema_name": "test_schema",
"values": {"field1": "value1"},
"values": [{"field1": "value1"}],
"confidence": 0.8,
"source_span": "test span"
}
@ -291,11 +344,10 @@ class TestStructuredDataSerializationContracts:
"""Test NLP query request/response serialization contract"""
# Test request
request_data = {
"natural_language_query": "test query",
"max_results": 10,
"context_hints": {}
"question": "test query",
"max_results": 10
}
assert serialize_deserialize_test(NLPToStructuredQueryRequest, request_data)
assert serialize_deserialize_test(QuestionToStructuredQueryRequest, request_data)
# Test response
response_data = {
@ -305,4 +357,54 @@ class TestStructuredDataSerializationContracts:
"detected_schemas": ["test"],
"confidence": 0.9
}
assert serialize_deserialize_test(NLPToStructuredQueryResponse, response_data)
assert serialize_deserialize_test(QuestionToStructuredQueryResponse, response_data)
def test_structured_query_serialization(self):
"""Test structured query request/response serialization contract"""
# Test request
request_data = {
"question": "Show me all customers"
}
assert serialize_deserialize_test(StructuredQueryRequest, request_data)
# Test response
response_data = {
"error": None,
"data": '{"customers": [{"id": "1", "name": "John"}]}',
"errors": []
}
assert serialize_deserialize_test(StructuredQueryResponse, response_data)
def test_extracted_object_batch_serialization(self):
"""Test ExtractedObject batch serialization contract"""
# Arrange
metadata = Metadata(id="test", user="user", collection="col", metadata=[])
batch_object_data = {
"metadata": metadata,
"schema_name": "test_schema",
"values": [
{"field1": "value1", "field2": "value2"},
{"field1": "value3", "field2": "value4"},
{"field1": "value5", "field2": "value6"}
],
"confidence": 0.9,
"source_span": "batch test span"
}
# Act & Assert
assert serialize_deserialize_test(ExtractedObject, batch_object_data)
def test_extracted_object_empty_batch_serialization(self):
"""Test ExtractedObject empty batch serialization contract"""
# Arrange
metadata = Metadata(id="test", user="user", collection="col", metadata=[])
empty_batch_data = {
"metadata": metadata,
"schema_name": "test_schema",
"values": [],
"confidence": 1.0,
"source_span": "empty batch"
}
# Act & Assert
assert serialize_deserialize_test(ExtractedObject, empty_batch_data)