Merge 2.0 to master (#651)

This commit is contained in:
cybermaggedon 2026-02-28 11:03:14 +00:00 committed by GitHub
parent 3666ece2c5
commit b9d7bf9a8b
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
212 changed files with 13940 additions and 6180 deletions

View file

@ -15,10 +15,10 @@ from trustgraph.schema import (
TextCompletionRequest, TextCompletionResponse,
DocumentRagQuery, DocumentRagResponse,
AgentRequest, AgentResponse, AgentStep,
Chunk, Triple, Triples, Value, Error,
Chunk, Triple, Triples, Term, Error,
EntityContext, EntityContexts,
GraphEmbeddings, EntityEmbeddings,
Metadata
Metadata, IRI, LITERAL
)
@ -43,7 +43,7 @@ def schema_registry():
"Chunk": Chunk,
"Triple": Triple,
"Triples": Triples,
"Value": Value,
"Term": Term,
"Error": Error,
"EntityContext": EntityContext,
"EntityContexts": EntityContexts,
@ -98,26 +98,22 @@ def sample_message_data():
"collection": "test_collection",
"metadata": []
},
"Value": {
"value": "http://example.com/entity",
"is_uri": True,
"type": ""
"Term": {
"type": IRI,
"iri": "http://example.com/entity"
},
"Triple": {
"s": Value(
value="http://example.com/subject",
is_uri=True,
type=""
"s": Term(
type=IRI,
iri="http://example.com/subject"
),
"p": Value(
value="http://example.com/predicate",
is_uri=True,
type=""
"p": Term(
type=IRI,
iri="http://example.com/predicate"
),
"o": Value(
value="Object value",
is_uri=False,
type=""
"o": Term(
type=LITERAL,
value="Object value"
)
}
}
@ -139,10 +135,10 @@ def invalid_message_data():
{"query": "test", "user": "test", "collection": "test", "doc_limit": -1}, # Invalid doc_limit
{"query": "test"}, # Missing required fields
],
"Value": [
{"value": None, "is_uri": True, "type": ""}, # Invalid value (None)
{"value": "test", "is_uri": "not_boolean", "type": ""}, # Invalid is_uri
{"value": 123, "is_uri": True, "type": ""}, # Invalid value (not string)
"Term": [
{"type": IRI, "iri": None}, # Invalid iri (None)
{"type": "invalid_type", "value": "test"}, # Invalid type
{"type": LITERAL, "value": 123}, # Invalid value (not string)
]
}

View file

@ -15,14 +15,14 @@ from trustgraph.schema import (
TextCompletionRequest, TextCompletionResponse,
DocumentRagQuery, DocumentRagResponse,
AgentRequest, AgentResponse, AgentStep,
Chunk, Triple, Triples, Value, Error,
Chunk, Triple, Triples, Term, Error,
EntityContext, EntityContexts,
GraphEmbeddings, EntityEmbeddings,
Metadata, Field, RowSchema,
StructuredDataSubmission, ExtractedObject,
QuestionToStructuredQueryRequest, QuestionToStructuredQueryResponse,
StructuredQueryRequest, StructuredQueryResponse,
StructuredObjectEmbedding
StructuredObjectEmbedding, IRI, LITERAL
)
from .conftest import validate_schema_contract, serialize_deserialize_test
@ -271,52 +271,51 @@ class TestAgentMessageContracts:
class TestGraphMessageContracts:
"""Contract tests for Graph/Knowledge message schemas"""
def test_value_schema_contract(self, sample_message_data):
"""Test Value schema contract"""
def test_term_schema_contract(self, sample_message_data):
"""Test Term schema contract"""
# Arrange
value_data = sample_message_data["Value"]
term_data = sample_message_data["Term"]
# Act & Assert
assert validate_schema_contract(Value, value_data)
# Test URI value
uri_value = Value(**value_data)
assert uri_value.value == "http://example.com/entity"
assert uri_value.is_uri is True
assert validate_schema_contract(Term, term_data)
# Test literal value
literal_value = Value(
value="Literal text value",
is_uri=False,
type=""
# Test URI term
uri_term = Term(**term_data)
assert uri_term.iri == "http://example.com/entity"
assert uri_term.type == IRI
# Test literal term
literal_term = Term(
type=LITERAL,
value="Literal text value"
)
assert literal_value.value == "Literal text value"
assert literal_value.is_uri is False
assert literal_term.value == "Literal text value"
assert literal_term.type == LITERAL
def test_triple_schema_contract(self, sample_message_data):
"""Test Triple schema contract"""
# Arrange
triple_data = sample_message_data["Triple"]
# Act & Assert - Triple uses Value objects, not dict validation
# Act & Assert - Triple uses Term objects, not dict validation
triple = Triple(
s=triple_data["s"],
p=triple_data["p"],
p=triple_data["p"],
o=triple_data["o"]
)
assert triple.s.value == "http://example.com/subject"
assert triple.p.value == "http://example.com/predicate"
assert triple.s.iri == "http://example.com/subject"
assert triple.p.iri == "http://example.com/predicate"
assert triple.o.value == "Object value"
assert triple.s.is_uri is True
assert triple.p.is_uri is True
assert triple.o.is_uri is False
assert triple.s.type == IRI
assert triple.p.type == IRI
assert triple.o.type == LITERAL
def test_triples_schema_contract(self, sample_message_data):
"""Test Triples (batch) schema contract"""
# Arrange
metadata = Metadata(**sample_message_data["Metadata"])
triple = Triple(**sample_message_data["Triple"])
triples_data = {
"metadata": metadata,
"triples": [triple]
@ -324,11 +323,11 @@ class TestGraphMessageContracts:
# Act & Assert
assert validate_schema_contract(Triples, triples_data)
triples = Triples(**triples_data)
assert triples.metadata.id == "test-doc-123"
assert len(triples.triples) == 1
assert triples.triples[0].s.value == "http://example.com/subject"
assert triples.triples[0].s.iri == "http://example.com/subject"
def test_chunk_schema_contract(self, sample_message_data):
"""Test Chunk schema contract"""
@ -349,29 +348,29 @@ class TestGraphMessageContracts:
def test_entity_context_schema_contract(self):
"""Test EntityContext schema contract"""
# Arrange
entity_value = Value(value="http://example.com/entity", is_uri=True, type="")
entity_term = Term(type=IRI, iri="http://example.com/entity")
entity_context_data = {
"entity": entity_value,
"entity": entity_term,
"context": "Context information about the entity"
}
# Act & Assert
assert validate_schema_contract(EntityContext, entity_context_data)
entity_context = EntityContext(**entity_context_data)
assert entity_context.entity.value == "http://example.com/entity"
assert entity_context.entity.iri == "http://example.com/entity"
assert entity_context.context == "Context information about the entity"
def test_entity_contexts_batch_schema_contract(self, sample_message_data):
"""Test EntityContexts (batch) schema contract"""
# Arrange
metadata = Metadata(**sample_message_data["Metadata"])
entity_value = Value(value="http://example.com/entity", is_uri=True, type="")
entity_term = Term(type=IRI, iri="http://example.com/entity")
entity_context = EntityContext(
entity=entity_value,
entity=entity_term,
context="Entity context"
)
entity_contexts_data = {
"metadata": metadata,
"entities": [entity_context]
@ -379,7 +378,7 @@ class TestGraphMessageContracts:
# Act & Assert
assert validate_schema_contract(EntityContexts, entity_contexts_data)
entity_contexts = EntityContexts(**entity_contexts_data)
assert entity_contexts.metadata.id == "test-doc-123"
assert len(entity_contexts.entities) == 1
@ -417,10 +416,10 @@ class TestMetadataMessageContracts:
# Act & Assert
assert validate_schema_contract(Metadata, metadata_data)
metadata = Metadata(**metadata_data)
assert len(metadata.metadata) == 1
assert metadata.metadata[0].s.value == "http://example.com/subject"
assert metadata.metadata[0].s.iri == "http://example.com/subject"
def test_error_schema_contract(self):
"""Test Error schema contract"""
@ -532,7 +531,7 @@ class TestSerializationContracts:
# Test each schema in the registry
for schema_name, schema_class in schema_registry.items():
if schema_name in sample_message_data:
# Skip Triple schema as it requires special handling with Value objects
# Skip Triple schema as it requires special handling with Term objects
if schema_name == "Triple":
continue
@ -541,36 +540,36 @@ class TestSerializationContracts:
assert serialize_deserialize_test(schema_class, data), f"Serialization failed for {schema_name}"
def test_triple_serialization_contract(self, sample_message_data):
"""Test Triple schema serialization contract with Value objects"""
"""Test Triple schema serialization contract with Term objects"""
# Arrange
triple_data = sample_message_data["Triple"]
# Act
triple = Triple(
s=triple_data["s"],
p=triple_data["p"],
p=triple_data["p"],
o=triple_data["o"]
)
# Assert - Test that Value objects are properly constructed and accessible
assert triple.s.value == "http://example.com/subject"
assert triple.p.value == "http://example.com/predicate"
# Assert - Test that Term objects are properly constructed and accessible
assert triple.s.iri == "http://example.com/subject"
assert triple.p.iri == "http://example.com/predicate"
assert triple.o.value == "Object value"
assert isinstance(triple.s, Value)
assert isinstance(triple.p, Value)
assert isinstance(triple.o, Value)
assert isinstance(triple.s, Term)
assert isinstance(triple.p, Term)
assert isinstance(triple.o, Term)
def test_nested_schema_serialization_contract(self, sample_message_data):
"""Test serialization of nested schemas"""
# Test Triples (contains Metadata and Triple objects)
metadata = Metadata(**sample_message_data["Metadata"])
triple = Triple(**sample_message_data["Triple"])
triples = Triples(metadata=metadata, triples=[triple])
# Verify nested objects maintain their contracts
assert triples.metadata.id == "test-doc-123"
assert triples.triples[0].s.value == "http://example.com/subject"
assert triples.triples[0].s.iri == "http://example.com/subject"
def test_array_field_serialization_contract(self):
"""Test serialization of array fields"""

View file

@ -1,8 +1,8 @@
"""
Contract tests for Cassandra Object Storage
Contract tests for Cassandra Row Storage
These tests verify the message contracts and schema compatibility
for the objects storage processor.
for the rows storage processor.
"""
import pytest
@ -10,12 +10,12 @@ import json
from pulsar.schema import AvroSchema
from trustgraph.schema import ExtractedObject, Metadata, RowSchema, Field
from trustgraph.storage.objects.cassandra.write import Processor
from trustgraph.storage.rows.cassandra.write import Processor
@pytest.mark.contract
class TestObjectsCassandraContracts:
"""Contract tests for Cassandra object storage messages"""
class TestRowsCassandraContracts:
"""Contract tests for Cassandra row storage messages"""
def test_extracted_object_input_contract(self):
"""Test that ExtractedObject schema matches expected input format"""
@ -145,50 +145,6 @@ class TestObjectsCassandraContracts:
assert required_field_keys.issubset(field.keys())
assert set(field.keys()).issubset(required_field_keys | optional_field_keys)
def test_cassandra_type_mapping_contract(self):
"""Test that all supported field types have Cassandra mappings"""
processor = Processor.__new__(Processor)
# All field types that should be supported
supported_types = [
("string", "text"),
("integer", "int"), # or bigint based on size
("float", "float"), # or double based on size
("boolean", "boolean"),
("timestamp", "timestamp"),
("date", "date"),
("time", "time"),
("uuid", "uuid")
]
for field_type, expected_cassandra_type in supported_types:
cassandra_type = processor.get_cassandra_type(field_type)
# For integer and float, the exact type depends on size
if field_type in ["integer", "float"]:
assert cassandra_type in ["int", "bigint", "float", "double"]
else:
assert cassandra_type == expected_cassandra_type
def test_value_conversion_contract(self):
"""Test value conversion for all supported types"""
processor = Processor.__new__(Processor)
# Test conversions maintain data integrity
test_cases = [
# (input_value, field_type, expected_output, expected_type)
("123", "integer", 123, int),
("123.45", "float", 123.45, float),
("true", "boolean", True, bool),
("false", "boolean", False, bool),
("test string", "string", "test string", str),
(None, "string", None, type(None)),
]
for input_val, field_type, expected_val, expected_type in test_cases:
result = processor.convert_value(input_val, field_type)
assert result == expected_val
assert isinstance(result, expected_type) or result is None
@pytest.mark.skip(reason="ExtractedObject is a dataclass, not a Pulsar Record type")
def test_extracted_object_serialization_contract(self):
"""Test that ExtractedObject can be serialized/deserialized correctly"""
@ -222,43 +178,31 @@ class TestObjectsCassandraContracts:
assert decoded.confidence == original.confidence
assert decoded.source_span == original.source_span
def test_cassandra_table_naming_contract(self):
def test_cassandra_name_sanitization_contract(self):
"""Test Cassandra naming conventions and constraints"""
processor = Processor.__new__(Processor)
# Test table naming (always gets o_ prefix)
table_test_names = [
("simple_name", "o_simple_name"),
("Name-With-Dashes", "o_name_with_dashes"),
("name.with.dots", "o_name_with_dots"),
("123_numbers", "o_123_numbers"),
("special!@#chars", "o_special___chars"), # 3 special chars become 3 underscores
("UPPERCASE", "o_uppercase"),
("CamelCase", "o_camelcase"),
("", "o_"), # Edge case - empty string becomes o_
]
for input_name, expected_name in table_test_names:
result = processor.sanitize_table(input_name)
assert result == expected_name
# Verify result is valid Cassandra identifier (starts with letter)
assert result.startswith('o_')
assert result.replace('o_', '').replace('_', '').isalnum() or result == 'o_'
# Test regular name sanitization (only adds o_ prefix if starts with number)
# Test name sanitization for Cassandra identifiers
# - Non-alphanumeric chars (except underscore) become underscores
# - Names starting with non-letter get 'r_' prefix
# - All names converted to lowercase
name_test_cases = [
("simple_name", "simple_name"),
("Name-With-Dashes", "name_with_dashes"),
("name.with.dots", "name_with_dots"),
("123_numbers", "o_123_numbers"), # Only this gets o_ prefix
("123_numbers", "r_123_numbers"), # Gets r_ prefix (starts with number)
("special!@#chars", "special___chars"), # 3 special chars become 3 underscores
("UPPERCASE", "uppercase"),
("CamelCase", "camelcase"),
("_underscore_start", "r__underscore_start"), # Gets r_ prefix (starts with underscore)
]
for input_name, expected_name in name_test_cases:
result = processor.sanitize_name(input_name)
assert result == expected_name
assert result == expected_name, f"Expected {expected_name} but got {result} for input {input_name}"
# Verify result is valid Cassandra identifier (starts with letter)
if result: # Skip empty string case
assert result[0].isalpha(), f"Result {result} should start with a letter"
def test_primary_key_structure_contract(self):
"""Test that primary key structure follows Cassandra best practices"""
@ -308,8 +252,8 @@ class TestObjectsCassandraContracts:
@pytest.mark.contract
class TestObjectsCassandraContractsBatch:
"""Contract tests for Cassandra object storage batch processing"""
class TestRowsCassandraContractsBatch:
"""Contract tests for Cassandra row storage batch processing"""
def test_extracted_object_batch_input_contract(self):
"""Test that batched ExtractedObject schema matches expected input format"""

View file

@ -1,26 +1,26 @@
"""
Contract tests for Objects GraphQL Query Service
Contract tests for Rows GraphQL Query Service
These tests verify the message contracts and schema compatibility
for the objects GraphQL query processor.
for the rows GraphQL query processor.
"""
import pytest
import json
from pulsar.schema import AvroSchema
from trustgraph.schema import ObjectsQueryRequest, ObjectsQueryResponse, GraphQLError
from trustgraph.query.objects.cassandra.service import Processor
from trustgraph.schema import RowsQueryRequest, RowsQueryResponse, GraphQLError
from trustgraph.query.rows.cassandra.service import Processor
@pytest.mark.contract
class TestObjectsGraphQLQueryContracts:
class TestRowsGraphQLQueryContracts:
"""Contract tests for GraphQL query service messages"""
def test_objects_query_request_contract(self):
"""Test ObjectsQueryRequest schema structure and required fields"""
def test_rows_query_request_contract(self):
"""Test RowsQueryRequest schema structure and required fields"""
# Create test request with all required fields
test_request = ObjectsQueryRequest(
test_request = RowsQueryRequest(
user="test_user",
collection="test_collection",
query='{ customers { id name email } }',
@ -49,10 +49,10 @@ class TestObjectsGraphQLQueryContracts:
assert test_request.variables["status"] == "active"
assert test_request.operation_name == "GetCustomers"
def test_objects_query_request_minimal(self):
"""Test ObjectsQueryRequest with minimal required fields"""
def test_rows_query_request_minimal(self):
"""Test RowsQueryRequest with minimal required fields"""
# Create request with only essential fields
minimal_request = ObjectsQueryRequest(
minimal_request = RowsQueryRequest(
user="user",
collection="collection",
query='{ test }',
@ -91,10 +91,10 @@ class TestObjectsGraphQLQueryContracts:
assert test_error.path == ["customers", "0", "nonexistent"]
assert test_error.extensions["code"] == "FIELD_ERROR"
def test_objects_query_response_success_contract(self):
"""Test ObjectsQueryResponse schema for successful queries"""
def test_rows_query_response_success_contract(self):
"""Test RowsQueryResponse schema for successful queries"""
# Create successful response
success_response = ObjectsQueryResponse(
success_response = RowsQueryResponse(
error=None,
data='{"customers": [{"id": "1", "name": "John", "email": "john@example.com"}]}',
errors=[],
@ -119,11 +119,11 @@ class TestObjectsGraphQLQueryContracts:
assert len(parsed_data["customers"]) == 1
assert parsed_data["customers"][0]["id"] == "1"
def test_objects_query_response_error_contract(self):
"""Test ObjectsQueryResponse schema for error cases"""
def test_rows_query_response_error_contract(self):
"""Test RowsQueryResponse schema for error cases"""
# Create GraphQL errors - work around Pulsar Array(Record) validation bug
# by creating a response without the problematic errors array first
error_response = ObjectsQueryResponse(
error_response = RowsQueryResponse(
error=None, # System error is None - these are GraphQL errors
data=None, # No data due to errors
errors=[], # Empty errors array to avoid Pulsar bug
@ -160,14 +160,14 @@ class TestObjectsGraphQLQueryContracts:
assert validation_error.path == ["customers", "email"]
assert validation_error.extensions["details"] == "Invalid email format"
def test_objects_query_response_system_error_contract(self):
"""Test ObjectsQueryResponse schema for system errors"""
def test_rows_query_response_system_error_contract(self):
"""Test RowsQueryResponse schema for system errors"""
from trustgraph.schema import Error
# Create system error response
system_error_response = ObjectsQueryResponse(
system_error_response = RowsQueryResponse(
error=Error(
type="objects-query-error",
type="rows-query-error",
message="Failed to connect to Cassandra cluster"
),
data=None,
@ -177,7 +177,7 @@ class TestObjectsGraphQLQueryContracts:
# Verify system error structure
assert system_error_response.error is not None
assert system_error_response.error.type == "objects-query-error"
assert system_error_response.error.type == "rows-query-error"
assert "Cassandra" in system_error_response.error.message
assert system_error_response.data is None
assert len(system_error_response.errors) == 0
@ -186,7 +186,7 @@ class TestObjectsGraphQLQueryContracts:
def test_request_response_serialization_contract(self):
"""Test that request/response can be serialized/deserialized correctly"""
# Create original request
original_request = ObjectsQueryRequest(
original_request = RowsQueryRequest(
user="serialization_test",
collection="test_data",
query='{ orders(limit: 5) { id total customer { name } } }',
@ -195,7 +195,7 @@ class TestObjectsGraphQLQueryContracts:
)
# Test request serialization using Pulsar schema
request_schema = AvroSchema(ObjectsQueryRequest)
request_schema = AvroSchema(RowsQueryRequest)
# Encode and decode request
encoded_request = request_schema.encode(original_request)
@ -209,7 +209,7 @@ class TestObjectsGraphQLQueryContracts:
assert decoded_request.operation_name == original_request.operation_name
# Create original response - work around Pulsar Array(Record) bug
original_response = ObjectsQueryResponse(
original_response = RowsQueryResponse(
error=None,
data='{"orders": []}',
errors=[], # Empty to avoid Pulsar validation bug
@ -224,7 +224,7 @@ class TestObjectsGraphQLQueryContracts:
)
# Test response serialization
response_schema = AvroSchema(ObjectsQueryResponse)
response_schema = AvroSchema(RowsQueryResponse)
# Encode and decode response
encoded_response = response_schema.encode(original_response)
@ -244,7 +244,7 @@ class TestObjectsGraphQLQueryContracts:
def test_graphql_query_format_contract(self):
"""Test supported GraphQL query formats"""
# Test basic query
basic_query = ObjectsQueryRequest(
basic_query = RowsQueryRequest(
user="test", collection="test", query='{ customers { id } }',
variables={}, operation_name=""
)
@ -253,7 +253,7 @@ class TestObjectsGraphQLQueryContracts:
assert basic_query.query.strip().endswith('}')
# Test query with variables
parameterized_query = ObjectsQueryRequest(
parameterized_query = RowsQueryRequest(
user="test", collection="test",
query='query GetCustomers($status: String, $limit: Int) { customers(status: $status, limit: $limit) { id name } }',
variables={"status": "active", "limit": "10"},
@ -265,7 +265,7 @@ class TestObjectsGraphQLQueryContracts:
assert parameterized_query.operation_name == "GetCustomers"
# Test complex nested query
nested_query = ObjectsQueryRequest(
nested_query = RowsQueryRequest(
user="test", collection="test",
query='''
{
@ -296,7 +296,7 @@ class TestObjectsGraphQLQueryContracts:
# Note: Current schema uses Map(String()) which only supports string values
# This test verifies the current contract, though ideally we'd support all JSON types
variables_test = ObjectsQueryRequest(
variables_test = RowsQueryRequest(
user="test", collection="test", query='{ test }',
variables={
"string_var": "test_value",
@ -319,7 +319,7 @@ class TestObjectsGraphQLQueryContracts:
def test_cassandra_context_fields_contract(self):
"""Test that request contains necessary fields for Cassandra operations"""
# Verify request has fields needed for Cassandra keyspace/table targeting
request = ObjectsQueryRequest(
request = RowsQueryRequest(
user="keyspace_name", # Maps to Cassandra keyspace
collection="partition_collection", # Used in partition key
query='{ objects { id } }',
@ -338,7 +338,7 @@ class TestObjectsGraphQLQueryContracts:
def test_graphql_extensions_contract(self):
"""Test GraphQL extensions field format and usage"""
# Extensions should support query metadata
response_with_extensions = ObjectsQueryResponse(
response_with_extensions = RowsQueryResponse(
error=None,
data='{"test": "data"}',
errors=[],
@ -404,7 +404,7 @@ class TestObjectsGraphQLQueryContracts:
'''
# Request to execute specific operation
multi_op_request = ObjectsQueryRequest(
multi_op_request = RowsQueryRequest(
user="test", collection="test",
query=multi_op_query,
variables={},
@ -417,7 +417,7 @@ class TestObjectsGraphQLQueryContracts:
assert "GetOrders" in multi_op_request.query
# Test single operation (operation_name optional)
single_op_request = ObjectsQueryRequest(
single_op_request = RowsQueryRequest(
user="test", collection="test",
query='{ customers { id } }',
variables={}, operation_name=""

View file

@ -15,7 +15,7 @@ from trustgraph.schema import (
QuestionToStructuredQueryRequest, QuestionToStructuredQueryResponse,
StructuredQueryRequest, StructuredQueryResponse,
StructuredObjectEmbedding, Field, RowSchema,
Metadata, Error, Value
Metadata, Error
)
from .conftest import serialize_deserialize_test