mirror of
https://github.com/trustgraph-ai/trustgraph.git
synced 2026-04-25 08:26:21 +02:00
410 lines
No EOL
14 KiB
Python
410 lines
No EOL
14 KiB
Python
"""
|
|
Contract tests for Structured Data Pulsar Message Schemas
|
|
|
|
These tests verify the contracts for all structured data Pulsar message schemas,
|
|
ensuring schema compatibility, serialization contracts, and service interface stability.
|
|
Following the TEST_STRATEGY.md approach for contract testing.
|
|
"""
|
|
|
|
import pytest
|
|
import json
|
|
from typing import Dict, Any
|
|
|
|
from trustgraph.schema import (
|
|
StructuredDataSubmission, ExtractedObject,
|
|
QuestionToStructuredQueryRequest, QuestionToStructuredQueryResponse,
|
|
StructuredQueryRequest, StructuredQueryResponse,
|
|
StructuredObjectEmbedding, Field, RowSchema,
|
|
Metadata, Error, Value
|
|
)
|
|
from .conftest import serialize_deserialize_test
|
|
|
|
|
|
@pytest.mark.contract
|
|
class TestStructuredDataSchemaContracts:
|
|
"""Contract tests for structured data schemas"""
|
|
|
|
def test_field_schema_contract(self):
|
|
"""Test enhanced Field schema contract"""
|
|
# Arrange & Act - create Field instance directly
|
|
field = Field(
|
|
name="customer_id",
|
|
type="string",
|
|
size=0,
|
|
primary=True,
|
|
description="Unique customer identifier",
|
|
required=True,
|
|
enum_values=[],
|
|
indexed=True
|
|
)
|
|
|
|
# Assert - test field properties
|
|
assert field.name == "customer_id"
|
|
assert field.type == "string"
|
|
assert field.primary is True
|
|
assert field.indexed is True
|
|
assert isinstance(field.enum_values, list)
|
|
assert len(field.enum_values) == 0
|
|
|
|
# Test with enum values
|
|
field_with_enum = Field(
|
|
name="status",
|
|
type="string",
|
|
size=0,
|
|
primary=False,
|
|
description="Status field",
|
|
required=False,
|
|
enum_values=["active", "inactive"],
|
|
indexed=True
|
|
)
|
|
|
|
assert len(field_with_enum.enum_values) == 2
|
|
assert "active" in field_with_enum.enum_values
|
|
|
|
def test_row_schema_contract(self):
|
|
"""Test RowSchema contract"""
|
|
# Arrange & Act
|
|
field = Field(
|
|
name="email",
|
|
type="string",
|
|
size=255,
|
|
primary=False,
|
|
description="Customer email",
|
|
required=True,
|
|
enum_values=[],
|
|
indexed=True
|
|
)
|
|
|
|
schema = RowSchema(
|
|
name="customers",
|
|
description="Customer records schema",
|
|
fields=[field]
|
|
)
|
|
|
|
# Assert
|
|
assert schema.name == "customers"
|
|
assert schema.description == "Customer records schema"
|
|
assert len(schema.fields) == 1
|
|
assert schema.fields[0].name == "email"
|
|
assert schema.fields[0].indexed is True
|
|
|
|
def test_structured_data_submission_contract(self):
|
|
"""Test StructuredDataSubmission schema contract"""
|
|
# Arrange
|
|
metadata = Metadata(
|
|
id="structured-data-001",
|
|
user="test_user",
|
|
collection="test_collection",
|
|
metadata=[]
|
|
)
|
|
|
|
# Act
|
|
submission = StructuredDataSubmission(
|
|
metadata=metadata,
|
|
format="csv",
|
|
schema_name="customer_records",
|
|
data=b"id,name,email\n1,John,john@example.com",
|
|
options={"delimiter": ",", "header": "true"}
|
|
)
|
|
|
|
# Assert
|
|
assert submission.format == "csv"
|
|
assert submission.schema_name == "customer_records"
|
|
assert submission.options["delimiter"] == ","
|
|
assert submission.metadata.id == "structured-data-001"
|
|
assert len(submission.data) > 0
|
|
|
|
def test_extracted_object_contract(self):
|
|
"""Test ExtractedObject schema contract"""
|
|
# Arrange
|
|
metadata = Metadata(
|
|
id="extracted-obj-001",
|
|
user="test_user",
|
|
collection="test_collection",
|
|
metadata=[]
|
|
)
|
|
|
|
# Act
|
|
obj = ExtractedObject(
|
|
metadata=metadata,
|
|
schema_name="customer_records",
|
|
values=[{"id": "123", "name": "John Doe", "email": "john@example.com"}],
|
|
confidence=0.95,
|
|
source_span="John Doe (john@example.com) customer ID 123"
|
|
)
|
|
|
|
# Assert
|
|
assert obj.schema_name == "customer_records"
|
|
assert obj.values[0]["name"] == "John Doe"
|
|
assert obj.confidence == 0.95
|
|
assert len(obj.source_span) > 0
|
|
assert obj.metadata.id == "extracted-obj-001"
|
|
|
|
def test_extracted_object_batch_contract(self):
|
|
"""Test ExtractedObject schema contract for batched values"""
|
|
# Arrange
|
|
metadata = Metadata(
|
|
id="extracted-batch-001",
|
|
user="test_user",
|
|
collection="test_collection",
|
|
metadata=[]
|
|
)
|
|
|
|
# Act - create object with multiple values
|
|
obj = ExtractedObject(
|
|
metadata=metadata,
|
|
schema_name="customer_records",
|
|
values=[
|
|
{"id": "123", "name": "John Doe", "email": "john@example.com"},
|
|
{"id": "124", "name": "Jane Smith", "email": "jane@example.com"},
|
|
{"id": "125", "name": "Bob Johnson", "email": "bob@example.com"}
|
|
],
|
|
confidence=0.85,
|
|
source_span="Multiple customers found in document"
|
|
)
|
|
|
|
# Assert
|
|
assert obj.schema_name == "customer_records"
|
|
assert len(obj.values) == 3
|
|
assert obj.values[0]["name"] == "John Doe"
|
|
assert obj.values[1]["name"] == "Jane Smith"
|
|
assert obj.values[2]["name"] == "Bob Johnson"
|
|
assert obj.values[0]["id"] == "123"
|
|
assert obj.values[1]["id"] == "124"
|
|
assert obj.values[2]["id"] == "125"
|
|
assert obj.confidence == 0.85
|
|
assert "Multiple customers" in obj.source_span
|
|
|
|
def test_extracted_object_empty_batch_contract(self):
|
|
"""Test ExtractedObject schema contract for empty values array"""
|
|
# Arrange
|
|
metadata = Metadata(
|
|
id="extracted-empty-001",
|
|
user="test_user",
|
|
collection="test_collection",
|
|
metadata=[]
|
|
)
|
|
|
|
# Act - create object with empty values array
|
|
obj = ExtractedObject(
|
|
metadata=metadata,
|
|
schema_name="empty_schema",
|
|
values=[],
|
|
confidence=1.0,
|
|
source_span="No objects found"
|
|
)
|
|
|
|
# Assert
|
|
assert obj.schema_name == "empty_schema"
|
|
assert len(obj.values) == 0
|
|
assert obj.confidence == 1.0
|
|
|
|
|
|
@pytest.mark.contract
|
|
class TestStructuredQueryServiceContracts:
|
|
"""Contract tests for structured query services"""
|
|
|
|
def test_nlp_to_structured_query_request_contract(self):
|
|
"""Test QuestionToStructuredQueryRequest schema contract"""
|
|
# Act
|
|
request = QuestionToStructuredQueryRequest(
|
|
question="Show me all customers who registered last month",
|
|
max_results=100
|
|
)
|
|
|
|
# Assert
|
|
assert "customers" in request.question
|
|
assert request.max_results == 100
|
|
|
|
def test_nlp_to_structured_query_response_contract(self):
|
|
"""Test QuestionToStructuredQueryResponse schema contract"""
|
|
# Act
|
|
response = QuestionToStructuredQueryResponse(
|
|
error=None,
|
|
graphql_query="query { customers(filter: {registered: {gte: \"2024-01-01\"}}) { id name email } }",
|
|
variables={"start_date": "2024-01-01"},
|
|
detected_schemas=["customers"],
|
|
confidence=0.92
|
|
)
|
|
|
|
# Assert
|
|
assert response.error is None
|
|
assert "customers" in response.graphql_query
|
|
assert response.detected_schemas[0] == "customers"
|
|
assert response.confidence > 0.9
|
|
|
|
def test_structured_query_request_contract(self):
|
|
"""Test StructuredQueryRequest schema contract"""
|
|
# Act
|
|
request = StructuredQueryRequest(
|
|
question="Show me customers with limit 10"
|
|
)
|
|
|
|
# Assert
|
|
assert "customers" in request.question
|
|
|
|
def test_structured_query_response_contract(self):
|
|
"""Test StructuredQueryResponse schema contract"""
|
|
# Act
|
|
response = StructuredQueryResponse(
|
|
error=None,
|
|
data='{"customers": [{"id": "1", "name": "John", "email": "john@example.com"}]}',
|
|
errors=[]
|
|
)
|
|
|
|
# Assert
|
|
assert response.error is None
|
|
assert "customers" in response.data
|
|
assert len(response.errors) == 0
|
|
|
|
def test_structured_query_response_with_errors_contract(self):
|
|
"""Test StructuredQueryResponse with GraphQL errors contract"""
|
|
# Act
|
|
response = StructuredQueryResponse(
|
|
error=None,
|
|
data=None,
|
|
errors=["Field 'invalid_field' not found in schema 'customers'"]
|
|
)
|
|
|
|
# Assert
|
|
assert response.data is None
|
|
assert len(response.errors) == 1
|
|
assert "invalid_field" in response.errors[0]
|
|
|
|
|
|
@pytest.mark.contract
|
|
class TestStructuredEmbeddingsContracts:
|
|
"""Contract tests for structured object embeddings"""
|
|
|
|
def test_structured_object_embedding_contract(self):
|
|
"""Test StructuredObjectEmbedding schema contract"""
|
|
# Arrange
|
|
metadata = Metadata(
|
|
id="struct-embed-001",
|
|
user="test_user",
|
|
collection="test_collection",
|
|
metadata=[]
|
|
)
|
|
|
|
# Act
|
|
embedding = StructuredObjectEmbedding(
|
|
metadata=metadata,
|
|
vectors=[[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]],
|
|
schema_name="customer_records",
|
|
object_id="customer_123",
|
|
field_embeddings={
|
|
"name": [0.1, 0.2, 0.3],
|
|
"email": [0.4, 0.5, 0.6]
|
|
}
|
|
)
|
|
|
|
# Assert
|
|
assert embedding.schema_name == "customer_records"
|
|
assert embedding.object_id == "customer_123"
|
|
assert len(embedding.vectors) == 2
|
|
assert len(embedding.field_embeddings) == 2
|
|
assert "name" in embedding.field_embeddings
|
|
|
|
|
|
@pytest.mark.contract
|
|
class TestStructuredDataSerializationContracts:
|
|
"""Contract tests for structured data serialization/deserialization"""
|
|
|
|
def test_structured_data_submission_serialization(self):
|
|
"""Test StructuredDataSubmission serialization contract"""
|
|
# Arrange
|
|
metadata = Metadata(id="test", user="user", collection="col", metadata=[])
|
|
submission_data = {
|
|
"metadata": metadata,
|
|
"format": "json",
|
|
"schema_name": "test_schema",
|
|
"data": b'{"test": "data"}',
|
|
"options": {"encoding": "utf-8"}
|
|
}
|
|
|
|
# Act & Assert
|
|
assert serialize_deserialize_test(StructuredDataSubmission, submission_data)
|
|
|
|
def test_extracted_object_serialization(self):
|
|
"""Test ExtractedObject serialization contract"""
|
|
# Arrange
|
|
metadata = Metadata(id="test", user="user", collection="col", metadata=[])
|
|
object_data = {
|
|
"metadata": metadata,
|
|
"schema_name": "test_schema",
|
|
"values": [{"field1": "value1"}],
|
|
"confidence": 0.8,
|
|
"source_span": "test span"
|
|
}
|
|
|
|
# Act & Assert
|
|
assert serialize_deserialize_test(ExtractedObject, object_data)
|
|
|
|
def test_nlp_query_serialization(self):
|
|
"""Test NLP query request/response serialization contract"""
|
|
# Test request
|
|
request_data = {
|
|
"question": "test query",
|
|
"max_results": 10
|
|
}
|
|
assert serialize_deserialize_test(QuestionToStructuredQueryRequest, request_data)
|
|
|
|
# Test response
|
|
response_data = {
|
|
"error": None,
|
|
"graphql_query": "query { test }",
|
|
"variables": {},
|
|
"detected_schemas": ["test"],
|
|
"confidence": 0.9
|
|
}
|
|
assert serialize_deserialize_test(QuestionToStructuredQueryResponse, response_data)
|
|
|
|
def test_structured_query_serialization(self):
|
|
"""Test structured query request/response serialization contract"""
|
|
# Test request
|
|
request_data = {
|
|
"question": "Show me all customers"
|
|
}
|
|
assert serialize_deserialize_test(StructuredQueryRequest, request_data)
|
|
|
|
# Test response
|
|
response_data = {
|
|
"error": None,
|
|
"data": '{"customers": [{"id": "1", "name": "John"}]}',
|
|
"errors": []
|
|
}
|
|
assert serialize_deserialize_test(StructuredQueryResponse, response_data)
|
|
|
|
def test_extracted_object_batch_serialization(self):
|
|
"""Test ExtractedObject batch serialization contract"""
|
|
# Arrange
|
|
metadata = Metadata(id="test", user="user", collection="col", metadata=[])
|
|
batch_object_data = {
|
|
"metadata": metadata,
|
|
"schema_name": "test_schema",
|
|
"values": [
|
|
{"field1": "value1", "field2": "value2"},
|
|
{"field1": "value3", "field2": "value4"},
|
|
{"field1": "value5", "field2": "value6"}
|
|
],
|
|
"confidence": 0.9,
|
|
"source_span": "batch test span"
|
|
}
|
|
|
|
# Act & Assert
|
|
assert serialize_deserialize_test(ExtractedObject, batch_object_data)
|
|
|
|
def test_extracted_object_empty_batch_serialization(self):
|
|
"""Test ExtractedObject empty batch serialization contract"""
|
|
# Arrange
|
|
metadata = Metadata(id="test", user="user", collection="col", metadata=[])
|
|
empty_batch_data = {
|
|
"metadata": metadata,
|
|
"schema_name": "test_schema",
|
|
"values": [],
|
|
"confidence": 1.0,
|
|
"source_span": "empty batch"
|
|
}
|
|
|
|
# Act & Assert
|
|
assert serialize_deserialize_test(ExtractedObject, empty_batch_data) |