""" Contract tests for Pulsar Message Schemas These tests verify the contracts for all Pulsar message schemas used in TrustGraph, ensuring schema compatibility, serialization contracts, and service interface stability. Following the TEST_STRATEGY.md approach for contract testing. """ import pytest import json from typing import Dict, Any, Type from pulsar.schema import Record from trustgraph.schema import ( TextCompletionRequest, TextCompletionResponse, DocumentRagQuery, DocumentRagResponse, AgentRequest, AgentResponse, AgentStep, Chunk, Triple, Triples, Term, Error, EntityContext, EntityContexts, GraphEmbeddings, EntityEmbeddings, Metadata, Field, RowSchema, StructuredDataSubmission, ExtractedObject, QuestionToStructuredQueryRequest, QuestionToStructuredQueryResponse, StructuredQueryRequest, StructuredQueryResponse, StructuredObjectEmbedding, IRI, LITERAL ) from .conftest import validate_schema_contract, serialize_deserialize_test @pytest.mark.contract class TestTextCompletionMessageContracts: """Contract tests for Text Completion message schemas""" def test_text_completion_request_schema_contract(self, sample_message_data): """Test TextCompletionRequest schema contract""" # Arrange request_data = sample_message_data["TextCompletionRequest"] # Act & Assert assert validate_schema_contract(TextCompletionRequest, request_data) # Test required fields request = TextCompletionRequest(**request_data) assert hasattr(request, 'system') assert hasattr(request, 'prompt') assert isinstance(request.system, str) assert isinstance(request.prompt, str) def test_text_completion_response_schema_contract(self, sample_message_data): """Test TextCompletionResponse schema contract""" # Arrange response_data = sample_message_data["TextCompletionResponse"] # Act & Assert assert validate_schema_contract(TextCompletionResponse, response_data) # Test required fields response = TextCompletionResponse(**response_data) assert hasattr(response, 'error') assert hasattr(response, 'response') assert hasattr(response, 'in_token') assert hasattr(response, 'out_token') assert hasattr(response, 'model') def test_text_completion_request_serialization_contract(self, sample_message_data): """Test TextCompletionRequest serialization/deserialization contract""" # Arrange request_data = sample_message_data["TextCompletionRequest"] # Act & Assert assert serialize_deserialize_test(TextCompletionRequest, request_data) def test_text_completion_response_serialization_contract(self, sample_message_data): """Test TextCompletionResponse serialization/deserialization contract""" # Arrange response_data = sample_message_data["TextCompletionResponse"] # Act & Assert assert serialize_deserialize_test(TextCompletionResponse, response_data) def test_text_completion_request_field_constraints(self): """Test TextCompletionRequest field type constraints""" # Test valid data valid_request = TextCompletionRequest( system="You are helpful.", prompt="Test prompt" ) assert valid_request.system == "You are helpful." assert valid_request.prompt == "Test prompt" def test_text_completion_response_field_constraints(self): """Test TextCompletionResponse field type constraints""" # Test valid response with no error valid_response = TextCompletionResponse( error=None, response="Test response", in_token=50, out_token=100, model="gpt-3.5-turbo" ) assert valid_response.error is None assert valid_response.response == "Test response" assert valid_response.in_token == 50 assert valid_response.out_token == 100 assert valid_response.model == "gpt-3.5-turbo" # Test response with error error_response = TextCompletionResponse( error=Error(type="rate-limit", message="Rate limit exceeded"), response=None, in_token=None, out_token=None, model=None ) assert error_response.error is not None assert error_response.error.type == "rate-limit" assert error_response.response is None @pytest.mark.contract class TestDocumentRagMessageContracts: """Contract tests for Document RAG message schemas""" def test_document_rag_query_schema_contract(self, sample_message_data): """Test DocumentRagQuery schema contract""" # Arrange query_data = sample_message_data["DocumentRagQuery"] # Act & Assert assert validate_schema_contract(DocumentRagQuery, query_data) # Test required fields query = DocumentRagQuery(**query_data) assert hasattr(query, 'query') assert hasattr(query, 'user') assert hasattr(query, 'collection') assert hasattr(query, 'doc_limit') def test_document_rag_response_schema_contract(self, sample_message_data): """Test DocumentRagResponse schema contract""" # Arrange response_data = sample_message_data["DocumentRagResponse"] # Act & Assert assert validate_schema_contract(DocumentRagResponse, response_data) # Test required fields response = DocumentRagResponse(**response_data) assert hasattr(response, 'error') assert hasattr(response, 'response') def test_document_rag_query_field_constraints(self): """Test DocumentRagQuery field constraints""" # Test valid query valid_query = DocumentRagQuery( query="What is AI?", user="test_user", collection="test_collection", doc_limit=5 ) assert valid_query.query == "What is AI?" assert valid_query.user == "test_user" assert valid_query.collection == "test_collection" assert valid_query.doc_limit == 5 def test_document_rag_response_error_contract(self): """Test DocumentRagResponse error handling contract""" # Test successful response success_response = DocumentRagResponse( error=None, response="AI is artificial intelligence." ) assert success_response.error is None assert success_response.response == "AI is artificial intelligence." # Test error response error_response = DocumentRagResponse( error=Error(type="no-documents", message="No documents found"), response=None ) assert error_response.error is not None assert error_response.error.type == "no-documents" assert error_response.response is None @pytest.mark.contract class TestAgentMessageContracts: """Contract tests for Agent message schemas""" def test_agent_request_schema_contract(self, sample_message_data): """Test AgentRequest schema contract""" # Arrange request_data = sample_message_data["AgentRequest"] # Act & Assert assert validate_schema_contract(AgentRequest, request_data) # Test required fields request = AgentRequest(**request_data) assert hasattr(request, 'question') assert hasattr(request, 'state') assert hasattr(request, 'group') assert hasattr(request, 'history') def test_agent_response_schema_contract(self, sample_message_data): """Test AgentResponse schema contract""" # Arrange response_data = sample_message_data["AgentResponse"] # Act & Assert assert validate_schema_contract(AgentResponse, response_data) # Test required fields response = AgentResponse(**response_data) assert hasattr(response, 'chunk_type') assert hasattr(response, 'content') assert hasattr(response, 'end_of_message') assert hasattr(response, 'end_of_dialog') assert hasattr(response, 'error') def test_agent_step_schema_contract(self): """Test AgentStep schema contract""" # Arrange step_data = { "thought": "I need to search for information", "action": "knowledge_query", "arguments": {"question": "What is AI?"}, "observation": "AI is artificial intelligence" } # Act & Assert assert validate_schema_contract(AgentStep, step_data) step = AgentStep(**step_data) assert step.thought == "I need to search for information" assert step.action == "knowledge_query" assert step.arguments == {"question": "What is AI?"} assert step.observation == "AI is artificial intelligence" def test_agent_request_with_history_contract(self): """Test AgentRequest with conversation history contract""" # Arrange history_steps = [ AgentStep( thought="First thought", action="first_action", arguments={"param": "value"}, observation="First observation" ), AgentStep( thought="Second thought", action="second_action", arguments={"param2": "value2"}, observation="Second observation" ) ] # Act request = AgentRequest( question="What comes next?", state="processing", history=history_steps ) # Assert assert len(request.history) == 2 assert request.history[0].thought == "First thought" assert request.history[1].action == "second_action" @pytest.mark.contract class TestGraphMessageContracts: """Contract tests for Graph/Knowledge message schemas""" def test_term_schema_contract(self, sample_message_data): """Test Term schema contract""" # Arrange term_data = sample_message_data["Term"] # Act & Assert assert validate_schema_contract(Term, term_data) # Test URI term uri_term = Term(**term_data) assert uri_term.iri == "http://example.com/entity" assert uri_term.type == IRI # Test literal term literal_term = Term( type=LITERAL, value="Literal text value" ) assert literal_term.value == "Literal text value" assert literal_term.type == LITERAL def test_triple_schema_contract(self, sample_message_data): """Test Triple schema contract""" # Arrange triple_data = sample_message_data["Triple"] # Act & Assert - Triple uses Term objects, not dict validation triple = Triple( s=triple_data["s"], p=triple_data["p"], o=triple_data["o"] ) assert triple.s.iri == "http://example.com/subject" assert triple.p.iri == "http://example.com/predicate" assert triple.o.value == "Object value" assert triple.s.type == IRI assert triple.p.type == IRI assert triple.o.type == LITERAL def test_triples_schema_contract(self, sample_message_data): """Test Triples (batch) schema contract""" # Arrange metadata = Metadata(**sample_message_data["Metadata"]) triple = Triple(**sample_message_data["Triple"]) triples_data = { "metadata": metadata, "triples": [triple] } # Act & Assert assert validate_schema_contract(Triples, triples_data) triples = Triples(**triples_data) assert triples.metadata.id == "test-doc-123" assert len(triples.triples) == 1 assert triples.triples[0].s.iri == "http://example.com/subject" def test_chunk_schema_contract(self, sample_message_data): """Test Chunk schema contract""" # Arrange metadata = Metadata(**sample_message_data["Metadata"]) chunk_data = { "metadata": metadata, "chunk": b"This is a text chunk for processing" } # Act & Assert assert validate_schema_contract(Chunk, chunk_data) chunk = Chunk(**chunk_data) assert chunk.metadata.id == "test-doc-123" assert chunk.chunk == b"This is a text chunk for processing" def test_entity_context_schema_contract(self): """Test EntityContext schema contract""" # Arrange entity_term = Term(type=IRI, iri="http://example.com/entity") entity_context_data = { "entity": entity_term, "context": "Context information about the entity" } # Act & Assert assert validate_schema_contract(EntityContext, entity_context_data) entity_context = EntityContext(**entity_context_data) assert entity_context.entity.iri == "http://example.com/entity" assert entity_context.context == "Context information about the entity" def test_entity_contexts_batch_schema_contract(self, sample_message_data): """Test EntityContexts (batch) schema contract""" # Arrange metadata = Metadata(**sample_message_data["Metadata"]) entity_term = Term(type=IRI, iri="http://example.com/entity") entity_context = EntityContext( entity=entity_term, context="Entity context" ) entity_contexts_data = { "metadata": metadata, "entities": [entity_context] } # Act & Assert assert validate_schema_contract(EntityContexts, entity_contexts_data) entity_contexts = EntityContexts(**entity_contexts_data) assert entity_contexts.metadata.id == "test-doc-123" assert len(entity_contexts.entities) == 1 assert entity_contexts.entities[0].context == "Entity context" @pytest.mark.contract class TestMetadataMessageContracts: """Contract tests for Metadata and common message schemas""" def test_metadata_schema_contract(self, sample_message_data): """Test Metadata schema contract""" # Arrange metadata_data = sample_message_data["Metadata"] # Act & Assert assert validate_schema_contract(Metadata, metadata_data) metadata = Metadata(**metadata_data) assert metadata.id == "test-doc-123" assert metadata.user == "test_user" assert metadata.collection == "test_collection" def test_error_schema_contract(self): """Test Error schema contract""" # Arrange error_data = { "type": "validation-error", "message": "Invalid input data provided" } # Act & Assert assert validate_schema_contract(Error, error_data) error = Error(**error_data) assert error.type == "validation-error" assert error.message == "Invalid input data provided" @pytest.mark.contract class TestMessageRoutingContracts: """Contract tests for message routing and properties""" def test_message_property_contracts(self, message_properties): """Test standard message property contracts""" # Act & Assert required_properties = ["id", "routing_key", "timestamp", "source_service"] for prop in required_properties: assert prop in message_properties assert message_properties[prop] is not None assert isinstance(message_properties[prop], str) def test_message_id_format_contract(self, message_properties): """Test message ID format contract""" # Act & Assert message_id = message_properties["id"] assert isinstance(message_id, str) assert len(message_id) > 0 # Message IDs should follow a consistent format assert "test-message-" in message_id def test_routing_key_format_contract(self, message_properties): """Test routing key format contract""" # Act & Assert routing_key = message_properties["routing_key"] assert isinstance(routing_key, str) assert "." in routing_key # Should use dot notation assert routing_key.count(".") >= 2 # Should have at least 3 parts def test_correlation_id_contract(self, message_properties): """Test correlation ID contract for request/response tracking""" # Act & Assert correlation_id = message_properties.get("correlation_id") if correlation_id is not None: assert isinstance(correlation_id, str) assert len(correlation_id) > 0 @pytest.mark.contract class TestSchemaEvolutionContracts: """Contract tests for schema evolution and backward compatibility""" def test_schema_backward_compatibility(self, schema_evolution_data): """Test schema backward compatibility""" # Test that v1 data can still be processed v1_request = schema_evolution_data["TextCompletionRequest_v1"] # Should work with current schema (optional fields default) request = TextCompletionRequest(**v1_request) assert request.system == "You are helpful." assert request.prompt == "Test prompt" def test_schema_forward_compatibility(self, schema_evolution_data): """Test schema forward compatibility with new fields""" # Test that v2 data works with additional fields v2_request = schema_evolution_data["TextCompletionRequest_v2"] # Current schema should handle new fields gracefully # (This would require actual schema versioning implementation) base_fields = {"system": v2_request["system"], "prompt": v2_request["prompt"]} request = TextCompletionRequest(**base_fields) assert request.system == "You are helpful." assert request.prompt == "Test prompt" def test_required_field_stability_contract(self): """Test that required fields remain stable across versions""" # These fields should never become optional or be removed required_fields = { "TextCompletionRequest": ["system", "prompt"], "TextCompletionResponse": ["error", "response", "model"], "DocumentRagQuery": ["query", "user", "collection"], "DocumentRagResponse": ["error", "response"], "AgentRequest": ["question", "history"], "AgentResponse": ["error"], } # Verify required fields are present in schema definitions for schema_name, fields in required_fields.items(): # This would be implemented with actual schema introspection # For now, we verify by attempting to create instances assert len(fields) > 0 # Ensure we have defined required fields @pytest.mark.contract class TestSerializationContracts: """Contract tests for message serialization/deserialization""" def test_all_schemas_serialization_contract(self, schema_registry, sample_message_data): """Test serialization contract for all schemas""" # Test each schema in the registry for schema_name, schema_class in schema_registry.items(): if schema_name in sample_message_data: # Skip Triple schema as it requires special handling with Term objects if schema_name == "Triple": continue # Act & Assert data = sample_message_data[schema_name] assert serialize_deserialize_test(schema_class, data), f"Serialization failed for {schema_name}" def test_triple_serialization_contract(self, sample_message_data): """Test Triple schema serialization contract with Term objects""" # Arrange triple_data = sample_message_data["Triple"] # Act triple = Triple( s=triple_data["s"], p=triple_data["p"], o=triple_data["o"] ) # Assert - Test that Term objects are properly constructed and accessible assert triple.s.iri == "http://example.com/subject" assert triple.p.iri == "http://example.com/predicate" assert triple.o.value == "Object value" assert isinstance(triple.s, Term) assert isinstance(triple.p, Term) assert isinstance(triple.o, Term) def test_nested_schema_serialization_contract(self, sample_message_data): """Test serialization of nested schemas""" # Test Triples (contains Metadata and Triple objects) metadata = Metadata(**sample_message_data["Metadata"]) triple = Triple(**sample_message_data["Triple"]) triples = Triples(metadata=metadata, triples=[triple]) # Verify nested objects maintain their contracts assert triples.metadata.id == "test-doc-123" assert triples.triples[0].s.iri == "http://example.com/subject" def test_array_field_serialization_contract(self): """Test serialization of array fields""" # Test AgentRequest with history array steps = [ AgentStep( thought=f"Step {i}", action=f"action_{i}", arguments={f"param_{i}": f"value_{i}"}, observation=f"Observation {i}" ) for i in range(3) ] request = AgentRequest( question="Test with array", state="Test state", history=steps ) # Verify array serialization maintains order and content assert len(request.history) == 3 assert request.history[0].thought == "Step 0" assert request.history[2].action == "action_2" def test_optional_field_serialization_contract(self): """Test serialization contract for optional fields""" # Test with minimal required fields minimal_response = TextCompletionResponse( error=None, response="Test", in_token=None, # Optional field out_token=None, # Optional field model="test-model" ) assert minimal_response.response == "Test" assert minimal_response.in_token is None assert minimal_response.out_token is None