mirror of
https://github.com/trustgraph-ai/trustgraph.git
synced 2026-04-25 16:36:21 +02:00
The metadata field (list of triples) in the pipeline Metadata class was redundant. Document metadata triples already flow directly from librarian to triple-store via emit_document_provenance() - they don't need to pass through the extraction pipeline. Additionally, chunker and PDF decoder were overwriting metadata to [] anyway, so any metadata passed through the pipeline was being discarded. Changes: - Remove metadata field from Metadata dataclass (schema/core/metadata.py) - Update all Metadata instantiations to remove metadata=[] parameter - Remove metadata handling from translators (document_loading, knowledge) - Remove metadata consumption from extractors (ontology, agent) - Update gateway serializers and import handlers - Update all unit, integration, and contract tests
592 lines
No EOL
22 KiB
Python
592 lines
No EOL
22 KiB
Python
"""
|
|
Contract tests for Pulsar Message Schemas
|
|
|
|
These tests verify the contracts for all Pulsar message schemas used in TrustGraph,
|
|
ensuring schema compatibility, serialization contracts, and service interface stability.
|
|
Following the TEST_STRATEGY.md approach for contract testing.
|
|
"""
|
|
|
|
import pytest
|
|
import json
|
|
from typing import Dict, Any, Type
|
|
from pulsar.schema import Record
|
|
|
|
from trustgraph.schema import (
|
|
TextCompletionRequest, TextCompletionResponse,
|
|
DocumentRagQuery, DocumentRagResponse,
|
|
AgentRequest, AgentResponse, AgentStep,
|
|
Chunk, Triple, Triples, Term, Error,
|
|
EntityContext, EntityContexts,
|
|
GraphEmbeddings, EntityEmbeddings,
|
|
Metadata, Field, RowSchema,
|
|
StructuredDataSubmission, ExtractedObject,
|
|
QuestionToStructuredQueryRequest, QuestionToStructuredQueryResponse,
|
|
StructuredQueryRequest, StructuredQueryResponse,
|
|
StructuredObjectEmbedding, IRI, LITERAL
|
|
)
|
|
from .conftest import validate_schema_contract, serialize_deserialize_test
|
|
|
|
|
|
@pytest.mark.contract
|
|
class TestTextCompletionMessageContracts:
|
|
"""Contract tests for Text Completion message schemas"""
|
|
|
|
def test_text_completion_request_schema_contract(self, sample_message_data):
|
|
"""Test TextCompletionRequest schema contract"""
|
|
# Arrange
|
|
request_data = sample_message_data["TextCompletionRequest"]
|
|
|
|
# Act & Assert
|
|
assert validate_schema_contract(TextCompletionRequest, request_data)
|
|
|
|
# Test required fields
|
|
request = TextCompletionRequest(**request_data)
|
|
assert hasattr(request, 'system')
|
|
assert hasattr(request, 'prompt')
|
|
assert isinstance(request.system, str)
|
|
assert isinstance(request.prompt, str)
|
|
|
|
def test_text_completion_response_schema_contract(self, sample_message_data):
|
|
"""Test TextCompletionResponse schema contract"""
|
|
# Arrange
|
|
response_data = sample_message_data["TextCompletionResponse"]
|
|
|
|
# Act & Assert
|
|
assert validate_schema_contract(TextCompletionResponse, response_data)
|
|
|
|
# Test required fields
|
|
response = TextCompletionResponse(**response_data)
|
|
assert hasattr(response, 'error')
|
|
assert hasattr(response, 'response')
|
|
assert hasattr(response, 'in_token')
|
|
assert hasattr(response, 'out_token')
|
|
assert hasattr(response, 'model')
|
|
|
|
def test_text_completion_request_serialization_contract(self, sample_message_data):
|
|
"""Test TextCompletionRequest serialization/deserialization contract"""
|
|
# Arrange
|
|
request_data = sample_message_data["TextCompletionRequest"]
|
|
|
|
# Act & Assert
|
|
assert serialize_deserialize_test(TextCompletionRequest, request_data)
|
|
|
|
def test_text_completion_response_serialization_contract(self, sample_message_data):
|
|
"""Test TextCompletionResponse serialization/deserialization contract"""
|
|
# Arrange
|
|
response_data = sample_message_data["TextCompletionResponse"]
|
|
|
|
# Act & Assert
|
|
assert serialize_deserialize_test(TextCompletionResponse, response_data)
|
|
|
|
def test_text_completion_request_field_constraints(self):
|
|
"""Test TextCompletionRequest field type constraints"""
|
|
# Test valid data
|
|
valid_request = TextCompletionRequest(
|
|
system="You are helpful.",
|
|
prompt="Test prompt"
|
|
)
|
|
assert valid_request.system == "You are helpful."
|
|
assert valid_request.prompt == "Test prompt"
|
|
|
|
def test_text_completion_response_field_constraints(self):
|
|
"""Test TextCompletionResponse field type constraints"""
|
|
# Test valid response with no error
|
|
valid_response = TextCompletionResponse(
|
|
error=None,
|
|
response="Test response",
|
|
in_token=50,
|
|
out_token=100,
|
|
model="gpt-3.5-turbo"
|
|
)
|
|
assert valid_response.error is None
|
|
assert valid_response.response == "Test response"
|
|
assert valid_response.in_token == 50
|
|
assert valid_response.out_token == 100
|
|
assert valid_response.model == "gpt-3.5-turbo"
|
|
|
|
# Test response with error
|
|
error_response = TextCompletionResponse(
|
|
error=Error(type="rate-limit", message="Rate limit exceeded"),
|
|
response=None,
|
|
in_token=None,
|
|
out_token=None,
|
|
model=None
|
|
)
|
|
assert error_response.error is not None
|
|
assert error_response.error.type == "rate-limit"
|
|
assert error_response.response is None
|
|
|
|
|
|
@pytest.mark.contract
|
|
class TestDocumentRagMessageContracts:
|
|
"""Contract tests for Document RAG message schemas"""
|
|
|
|
def test_document_rag_query_schema_contract(self, sample_message_data):
|
|
"""Test DocumentRagQuery schema contract"""
|
|
# Arrange
|
|
query_data = sample_message_data["DocumentRagQuery"]
|
|
|
|
# Act & Assert
|
|
assert validate_schema_contract(DocumentRagQuery, query_data)
|
|
|
|
# Test required fields
|
|
query = DocumentRagQuery(**query_data)
|
|
assert hasattr(query, 'query')
|
|
assert hasattr(query, 'user')
|
|
assert hasattr(query, 'collection')
|
|
assert hasattr(query, 'doc_limit')
|
|
|
|
def test_document_rag_response_schema_contract(self, sample_message_data):
|
|
"""Test DocumentRagResponse schema contract"""
|
|
# Arrange
|
|
response_data = sample_message_data["DocumentRagResponse"]
|
|
|
|
# Act & Assert
|
|
assert validate_schema_contract(DocumentRagResponse, response_data)
|
|
|
|
# Test required fields
|
|
response = DocumentRagResponse(**response_data)
|
|
assert hasattr(response, 'error')
|
|
assert hasattr(response, 'response')
|
|
|
|
def test_document_rag_query_field_constraints(self):
|
|
"""Test DocumentRagQuery field constraints"""
|
|
# Test valid query
|
|
valid_query = DocumentRagQuery(
|
|
query="What is AI?",
|
|
user="test_user",
|
|
collection="test_collection",
|
|
doc_limit=5
|
|
)
|
|
assert valid_query.query == "What is AI?"
|
|
assert valid_query.user == "test_user"
|
|
assert valid_query.collection == "test_collection"
|
|
assert valid_query.doc_limit == 5
|
|
|
|
def test_document_rag_response_error_contract(self):
|
|
"""Test DocumentRagResponse error handling contract"""
|
|
# Test successful response
|
|
success_response = DocumentRagResponse(
|
|
error=None,
|
|
response="AI is artificial intelligence."
|
|
)
|
|
assert success_response.error is None
|
|
assert success_response.response == "AI is artificial intelligence."
|
|
|
|
# Test error response
|
|
error_response = DocumentRagResponse(
|
|
error=Error(type="no-documents", message="No documents found"),
|
|
response=None
|
|
)
|
|
assert error_response.error is not None
|
|
assert error_response.error.type == "no-documents"
|
|
assert error_response.response is None
|
|
|
|
|
|
@pytest.mark.contract
|
|
class TestAgentMessageContracts:
|
|
"""Contract tests for Agent message schemas"""
|
|
|
|
def test_agent_request_schema_contract(self, sample_message_data):
|
|
"""Test AgentRequest schema contract"""
|
|
# Arrange
|
|
request_data = sample_message_data["AgentRequest"]
|
|
|
|
# Act & Assert
|
|
assert validate_schema_contract(AgentRequest, request_data)
|
|
|
|
# Test required fields
|
|
request = AgentRequest(**request_data)
|
|
assert hasattr(request, 'question')
|
|
assert hasattr(request, 'state')
|
|
assert hasattr(request, 'group')
|
|
assert hasattr(request, 'history')
|
|
|
|
def test_agent_response_schema_contract(self, sample_message_data):
|
|
"""Test AgentResponse schema contract"""
|
|
# Arrange
|
|
response_data = sample_message_data["AgentResponse"]
|
|
|
|
# Act & Assert
|
|
assert validate_schema_contract(AgentResponse, response_data)
|
|
|
|
# Test required fields
|
|
response = AgentResponse(**response_data)
|
|
assert hasattr(response, 'answer')
|
|
assert hasattr(response, 'error')
|
|
assert hasattr(response, 'thought')
|
|
assert hasattr(response, 'observation')
|
|
|
|
def test_agent_step_schema_contract(self):
|
|
"""Test AgentStep schema contract"""
|
|
# Arrange
|
|
step_data = {
|
|
"thought": "I need to search for information",
|
|
"action": "knowledge_query",
|
|
"arguments": {"question": "What is AI?"},
|
|
"observation": "AI is artificial intelligence"
|
|
}
|
|
|
|
# Act & Assert
|
|
assert validate_schema_contract(AgentStep, step_data)
|
|
|
|
step = AgentStep(**step_data)
|
|
assert step.thought == "I need to search for information"
|
|
assert step.action == "knowledge_query"
|
|
assert step.arguments == {"question": "What is AI?"}
|
|
assert step.observation == "AI is artificial intelligence"
|
|
|
|
def test_agent_request_with_history_contract(self):
|
|
"""Test AgentRequest with conversation history contract"""
|
|
# Arrange
|
|
history_steps = [
|
|
AgentStep(
|
|
thought="First thought",
|
|
action="first_action",
|
|
arguments={"param": "value"},
|
|
observation="First observation"
|
|
),
|
|
AgentStep(
|
|
thought="Second thought",
|
|
action="second_action",
|
|
arguments={"param2": "value2"},
|
|
observation="Second observation"
|
|
)
|
|
]
|
|
|
|
# Act
|
|
request = AgentRequest(
|
|
question="What comes next?",
|
|
state="processing",
|
|
history=history_steps
|
|
)
|
|
|
|
# Assert
|
|
assert len(request.history) == 2
|
|
assert request.history[0].thought == "First thought"
|
|
assert request.history[1].action == "second_action"
|
|
|
|
|
|
@pytest.mark.contract
|
|
class TestGraphMessageContracts:
|
|
"""Contract tests for Graph/Knowledge message schemas"""
|
|
|
|
def test_term_schema_contract(self, sample_message_data):
|
|
"""Test Term schema contract"""
|
|
# Arrange
|
|
term_data = sample_message_data["Term"]
|
|
|
|
# Act & Assert
|
|
assert validate_schema_contract(Term, term_data)
|
|
|
|
# Test URI term
|
|
uri_term = Term(**term_data)
|
|
assert uri_term.iri == "http://example.com/entity"
|
|
assert uri_term.type == IRI
|
|
|
|
# Test literal term
|
|
literal_term = Term(
|
|
type=LITERAL,
|
|
value="Literal text value"
|
|
)
|
|
assert literal_term.value == "Literal text value"
|
|
assert literal_term.type == LITERAL
|
|
|
|
def test_triple_schema_contract(self, sample_message_data):
|
|
"""Test Triple schema contract"""
|
|
# Arrange
|
|
triple_data = sample_message_data["Triple"]
|
|
|
|
# Act & Assert - Triple uses Term objects, not dict validation
|
|
triple = Triple(
|
|
s=triple_data["s"],
|
|
p=triple_data["p"],
|
|
o=triple_data["o"]
|
|
)
|
|
assert triple.s.iri == "http://example.com/subject"
|
|
assert triple.p.iri == "http://example.com/predicate"
|
|
assert triple.o.value == "Object value"
|
|
assert triple.s.type == IRI
|
|
assert triple.p.type == IRI
|
|
assert triple.o.type == LITERAL
|
|
|
|
def test_triples_schema_contract(self, sample_message_data):
|
|
"""Test Triples (batch) schema contract"""
|
|
# Arrange
|
|
metadata = Metadata(**sample_message_data["Metadata"])
|
|
triple = Triple(**sample_message_data["Triple"])
|
|
|
|
triples_data = {
|
|
"metadata": metadata,
|
|
"triples": [triple]
|
|
}
|
|
|
|
# Act & Assert
|
|
assert validate_schema_contract(Triples, triples_data)
|
|
|
|
triples = Triples(**triples_data)
|
|
assert triples.metadata.id == "test-doc-123"
|
|
assert len(triples.triples) == 1
|
|
assert triples.triples[0].s.iri == "http://example.com/subject"
|
|
|
|
def test_chunk_schema_contract(self, sample_message_data):
|
|
"""Test Chunk schema contract"""
|
|
# Arrange
|
|
metadata = Metadata(**sample_message_data["Metadata"])
|
|
chunk_data = {
|
|
"metadata": metadata,
|
|
"chunk": b"This is a text chunk for processing"
|
|
}
|
|
|
|
# Act & Assert
|
|
assert validate_schema_contract(Chunk, chunk_data)
|
|
|
|
chunk = Chunk(**chunk_data)
|
|
assert chunk.metadata.id == "test-doc-123"
|
|
assert chunk.chunk == b"This is a text chunk for processing"
|
|
|
|
def test_entity_context_schema_contract(self):
|
|
"""Test EntityContext schema contract"""
|
|
# Arrange
|
|
entity_term = Term(type=IRI, iri="http://example.com/entity")
|
|
entity_context_data = {
|
|
"entity": entity_term,
|
|
"context": "Context information about the entity"
|
|
}
|
|
|
|
# Act & Assert
|
|
assert validate_schema_contract(EntityContext, entity_context_data)
|
|
|
|
entity_context = EntityContext(**entity_context_data)
|
|
assert entity_context.entity.iri == "http://example.com/entity"
|
|
assert entity_context.context == "Context information about the entity"
|
|
|
|
def test_entity_contexts_batch_schema_contract(self, sample_message_data):
|
|
"""Test EntityContexts (batch) schema contract"""
|
|
# Arrange
|
|
metadata = Metadata(**sample_message_data["Metadata"])
|
|
entity_term = Term(type=IRI, iri="http://example.com/entity")
|
|
entity_context = EntityContext(
|
|
entity=entity_term,
|
|
context="Entity context"
|
|
)
|
|
|
|
entity_contexts_data = {
|
|
"metadata": metadata,
|
|
"entities": [entity_context]
|
|
}
|
|
|
|
# Act & Assert
|
|
assert validate_schema_contract(EntityContexts, entity_contexts_data)
|
|
|
|
entity_contexts = EntityContexts(**entity_contexts_data)
|
|
assert entity_contexts.metadata.id == "test-doc-123"
|
|
assert len(entity_contexts.entities) == 1
|
|
assert entity_contexts.entities[0].context == "Entity context"
|
|
|
|
|
|
@pytest.mark.contract
|
|
class TestMetadataMessageContracts:
|
|
"""Contract tests for Metadata and common message schemas"""
|
|
|
|
def test_metadata_schema_contract(self, sample_message_data):
|
|
"""Test Metadata schema contract"""
|
|
# Arrange
|
|
metadata_data = sample_message_data["Metadata"]
|
|
|
|
# Act & Assert
|
|
assert validate_schema_contract(Metadata, metadata_data)
|
|
|
|
metadata = Metadata(**metadata_data)
|
|
assert metadata.id == "test-doc-123"
|
|
assert metadata.user == "test_user"
|
|
assert metadata.collection == "test_collection"
|
|
|
|
def test_error_schema_contract(self):
|
|
"""Test Error schema contract"""
|
|
# Arrange
|
|
error_data = {
|
|
"type": "validation-error",
|
|
"message": "Invalid input data provided"
|
|
}
|
|
|
|
# Act & Assert
|
|
assert validate_schema_contract(Error, error_data)
|
|
|
|
error = Error(**error_data)
|
|
assert error.type == "validation-error"
|
|
assert error.message == "Invalid input data provided"
|
|
|
|
|
|
@pytest.mark.contract
|
|
class TestMessageRoutingContracts:
|
|
"""Contract tests for message routing and properties"""
|
|
|
|
def test_message_property_contracts(self, message_properties):
|
|
"""Test standard message property contracts"""
|
|
# Act & Assert
|
|
required_properties = ["id", "routing_key", "timestamp", "source_service"]
|
|
|
|
for prop in required_properties:
|
|
assert prop in message_properties
|
|
assert message_properties[prop] is not None
|
|
assert isinstance(message_properties[prop], str)
|
|
|
|
def test_message_id_format_contract(self, message_properties):
|
|
"""Test message ID format contract"""
|
|
# Act & Assert
|
|
message_id = message_properties["id"]
|
|
assert isinstance(message_id, str)
|
|
assert len(message_id) > 0
|
|
# Message IDs should follow a consistent format
|
|
assert "test-message-" in message_id
|
|
|
|
def test_routing_key_format_contract(self, message_properties):
|
|
"""Test routing key format contract"""
|
|
# Act & Assert
|
|
routing_key = message_properties["routing_key"]
|
|
assert isinstance(routing_key, str)
|
|
assert "." in routing_key # Should use dot notation
|
|
assert routing_key.count(".") >= 2 # Should have at least 3 parts
|
|
|
|
def test_correlation_id_contract(self, message_properties):
|
|
"""Test correlation ID contract for request/response tracking"""
|
|
# Act & Assert
|
|
correlation_id = message_properties.get("correlation_id")
|
|
if correlation_id is not None:
|
|
assert isinstance(correlation_id, str)
|
|
assert len(correlation_id) > 0
|
|
|
|
|
|
@pytest.mark.contract
|
|
class TestSchemaEvolutionContracts:
|
|
"""Contract tests for schema evolution and backward compatibility"""
|
|
|
|
def test_schema_backward_compatibility(self, schema_evolution_data):
|
|
"""Test schema backward compatibility"""
|
|
# Test that v1 data can still be processed
|
|
v1_request = schema_evolution_data["TextCompletionRequest_v1"]
|
|
|
|
# Should work with current schema (optional fields default)
|
|
request = TextCompletionRequest(**v1_request)
|
|
assert request.system == "You are helpful."
|
|
assert request.prompt == "Test prompt"
|
|
|
|
def test_schema_forward_compatibility(self, schema_evolution_data):
|
|
"""Test schema forward compatibility with new fields"""
|
|
# Test that v2 data works with additional fields
|
|
v2_request = schema_evolution_data["TextCompletionRequest_v2"]
|
|
|
|
# Current schema should handle new fields gracefully
|
|
# (This would require actual schema versioning implementation)
|
|
base_fields = {"system": v2_request["system"], "prompt": v2_request["prompt"]}
|
|
request = TextCompletionRequest(**base_fields)
|
|
assert request.system == "You are helpful."
|
|
assert request.prompt == "Test prompt"
|
|
|
|
def test_required_field_stability_contract(self):
|
|
"""Test that required fields remain stable across versions"""
|
|
# These fields should never become optional or be removed
|
|
required_fields = {
|
|
"TextCompletionRequest": ["system", "prompt"],
|
|
"TextCompletionResponse": ["error", "response", "model"],
|
|
"DocumentRagQuery": ["query", "user", "collection"],
|
|
"DocumentRagResponse": ["error", "response"],
|
|
"AgentRequest": ["question", "history"],
|
|
"AgentResponse": ["error"],
|
|
}
|
|
|
|
# Verify required fields are present in schema definitions
|
|
for schema_name, fields in required_fields.items():
|
|
# This would be implemented with actual schema introspection
|
|
# For now, we verify by attempting to create instances
|
|
assert len(fields) > 0 # Ensure we have defined required fields
|
|
|
|
|
|
@pytest.mark.contract
|
|
class TestSerializationContracts:
|
|
"""Contract tests for message serialization/deserialization"""
|
|
|
|
def test_all_schemas_serialization_contract(self, schema_registry, sample_message_data):
|
|
"""Test serialization contract for all schemas"""
|
|
# Test each schema in the registry
|
|
for schema_name, schema_class in schema_registry.items():
|
|
if schema_name in sample_message_data:
|
|
# Skip Triple schema as it requires special handling with Term objects
|
|
if schema_name == "Triple":
|
|
continue
|
|
|
|
# Act & Assert
|
|
data = sample_message_data[schema_name]
|
|
assert serialize_deserialize_test(schema_class, data), f"Serialization failed for {schema_name}"
|
|
|
|
def test_triple_serialization_contract(self, sample_message_data):
|
|
"""Test Triple schema serialization contract with Term objects"""
|
|
# Arrange
|
|
triple_data = sample_message_data["Triple"]
|
|
|
|
# Act
|
|
triple = Triple(
|
|
s=triple_data["s"],
|
|
p=triple_data["p"],
|
|
o=triple_data["o"]
|
|
)
|
|
|
|
# Assert - Test that Term objects are properly constructed and accessible
|
|
assert triple.s.iri == "http://example.com/subject"
|
|
assert triple.p.iri == "http://example.com/predicate"
|
|
assert triple.o.value == "Object value"
|
|
assert isinstance(triple.s, Term)
|
|
assert isinstance(triple.p, Term)
|
|
assert isinstance(triple.o, Term)
|
|
|
|
def test_nested_schema_serialization_contract(self, sample_message_data):
|
|
"""Test serialization of nested schemas"""
|
|
# Test Triples (contains Metadata and Triple objects)
|
|
metadata = Metadata(**sample_message_data["Metadata"])
|
|
triple = Triple(**sample_message_data["Triple"])
|
|
|
|
triples = Triples(metadata=metadata, triples=[triple])
|
|
|
|
# Verify nested objects maintain their contracts
|
|
assert triples.metadata.id == "test-doc-123"
|
|
assert triples.triples[0].s.iri == "http://example.com/subject"
|
|
|
|
def test_array_field_serialization_contract(self):
|
|
"""Test serialization of array fields"""
|
|
# Test AgentRequest with history array
|
|
steps = [
|
|
AgentStep(
|
|
thought=f"Step {i}",
|
|
action=f"action_{i}",
|
|
arguments={f"param_{i}": f"value_{i}"},
|
|
observation=f"Observation {i}"
|
|
)
|
|
for i in range(3)
|
|
]
|
|
|
|
request = AgentRequest(
|
|
question="Test with array",
|
|
state="Test state",
|
|
history=steps
|
|
)
|
|
|
|
# Verify array serialization maintains order and content
|
|
assert len(request.history) == 3
|
|
assert request.history[0].thought == "Step 0"
|
|
assert request.history[2].action == "action_2"
|
|
|
|
def test_optional_field_serialization_contract(self):
|
|
"""Test serialization contract for optional fields"""
|
|
# Test with minimal required fields
|
|
minimal_response = TextCompletionResponse(
|
|
error=None,
|
|
response="Test",
|
|
in_token=None, # Optional field
|
|
out_token=None, # Optional field
|
|
model="test-model"
|
|
)
|
|
|
|
assert minimal_response.response == "Test"
|
|
assert minimal_response.in_token is None
|
|
assert minimal_response.out_token is None |