mirror of
https://github.com/trustgraph-ai/trustgraph.git
synced 2026-04-25 16:36:21 +02:00
* Tech spec * Object query service for Cassandra * Gateway support for objects-query * GraphQL query utility * Filters, ordering
624 lines
No EOL
22 KiB
Python
624 lines
No EOL
22 KiB
Python
"""
|
|
Integration tests for Objects GraphQL Query Service
|
|
|
|
These tests verify end-to-end functionality including:
|
|
- Real Cassandra database operations
|
|
- Full GraphQL query execution
|
|
- Schema generation and configuration handling
|
|
- Message processing with actual Pulsar schemas
|
|
"""
|
|
|
|
import pytest
|
|
import json
|
|
import asyncio
|
|
from unittest.mock import MagicMock, AsyncMock
|
|
|
|
# Check if Docker/testcontainers is available
|
|
try:
|
|
from testcontainers.cassandra import CassandraContainer
|
|
import docker
|
|
# Test Docker connection
|
|
docker.from_env().ping()
|
|
DOCKER_AVAILABLE = True
|
|
except Exception:
|
|
DOCKER_AVAILABLE = False
|
|
CassandraContainer = None
|
|
|
|
from trustgraph.query.objects.cassandra.service import Processor
|
|
from trustgraph.schema import ObjectsQueryRequest, ObjectsQueryResponse, GraphQLError
|
|
from trustgraph.schema import RowSchema, Field, ExtractedObject, Metadata
|
|
|
|
|
|
@pytest.mark.integration
|
|
@pytest.mark.skipif(not DOCKER_AVAILABLE, reason="Docker/testcontainers not available")
|
|
class TestObjectsGraphQLQueryIntegration:
|
|
"""Integration tests with real Cassandra database"""
|
|
|
|
@pytest.fixture(scope="class")
|
|
def cassandra_container(self):
|
|
"""Start Cassandra container for testing"""
|
|
if not DOCKER_AVAILABLE:
|
|
pytest.skip("Docker/testcontainers not available")
|
|
|
|
with CassandraContainer("cassandra:3.11") as cassandra:
|
|
# Wait for Cassandra to be ready
|
|
cassandra.get_connection_url()
|
|
yield cassandra
|
|
|
|
@pytest.fixture
|
|
def processor(self, cassandra_container):
|
|
"""Create processor with real Cassandra connection"""
|
|
# Extract host and port from container
|
|
host = cassandra_container.get_container_host_ip()
|
|
port = cassandra_container.get_exposed_port(9042)
|
|
|
|
# Create processor
|
|
processor = Processor(
|
|
id="test-graphql-query",
|
|
graph_host=host,
|
|
# Note: testcontainer typically doesn't require auth
|
|
graph_username=None,
|
|
graph_password=None,
|
|
config_type="schema"
|
|
)
|
|
|
|
# Override connection parameters for test container
|
|
processor.graph_host = host
|
|
processor.cluster = None
|
|
processor.session = None
|
|
|
|
return processor
|
|
|
|
@pytest.fixture
|
|
def sample_schema_config(self):
|
|
"""Sample schema configuration for testing"""
|
|
return {
|
|
"schema": {
|
|
"customer": json.dumps({
|
|
"name": "customer",
|
|
"description": "Customer records",
|
|
"fields": [
|
|
{
|
|
"name": "customer_id",
|
|
"type": "string",
|
|
"primary_key": True,
|
|
"required": True,
|
|
"description": "Customer identifier"
|
|
},
|
|
{
|
|
"name": "name",
|
|
"type": "string",
|
|
"required": True,
|
|
"indexed": True,
|
|
"description": "Customer name"
|
|
},
|
|
{
|
|
"name": "email",
|
|
"type": "string",
|
|
"required": True,
|
|
"indexed": True,
|
|
"description": "Customer email"
|
|
},
|
|
{
|
|
"name": "status",
|
|
"type": "string",
|
|
"required": False,
|
|
"indexed": True,
|
|
"enum": ["active", "inactive", "pending"],
|
|
"description": "Customer status"
|
|
},
|
|
{
|
|
"name": "created_date",
|
|
"type": "timestamp",
|
|
"required": False,
|
|
"description": "Registration date"
|
|
}
|
|
]
|
|
}),
|
|
"order": json.dumps({
|
|
"name": "order",
|
|
"description": "Order records",
|
|
"fields": [
|
|
{
|
|
"name": "order_id",
|
|
"type": "string",
|
|
"primary_key": True,
|
|
"required": True
|
|
},
|
|
{
|
|
"name": "customer_id",
|
|
"type": "string",
|
|
"required": True,
|
|
"indexed": True,
|
|
"description": "Related customer"
|
|
},
|
|
{
|
|
"name": "total",
|
|
"type": "float",
|
|
"required": True,
|
|
"description": "Order total amount"
|
|
},
|
|
{
|
|
"name": "status",
|
|
"type": "string",
|
|
"indexed": True,
|
|
"enum": ["pending", "processing", "shipped", "delivered"],
|
|
"description": "Order status"
|
|
}
|
|
]
|
|
})
|
|
}
|
|
}
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_schema_configuration_and_generation(self, processor, sample_schema_config):
|
|
"""Test schema configuration loading and GraphQL schema generation"""
|
|
# Load schema configuration
|
|
await processor.on_schema_config(sample_schema_config, version=1)
|
|
|
|
# Verify schemas were loaded
|
|
assert len(processor.schemas) == 2
|
|
assert "customer" in processor.schemas
|
|
assert "order" in processor.schemas
|
|
|
|
# Verify customer schema
|
|
customer_schema = processor.schemas["customer"]
|
|
assert customer_schema.name == "customer"
|
|
assert len(customer_schema.fields) == 5
|
|
|
|
# Find primary key field
|
|
pk_field = next((f for f in customer_schema.fields if f.primary), None)
|
|
assert pk_field is not None
|
|
assert pk_field.name == "customer_id"
|
|
|
|
# Verify GraphQL schema was generated
|
|
assert processor.graphql_schema is not None
|
|
assert len(processor.graphql_types) == 2
|
|
assert "customer" in processor.graphql_types
|
|
assert "order" in processor.graphql_types
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_cassandra_connection_and_table_creation(self, processor, sample_schema_config):
|
|
"""Test Cassandra connection and dynamic table creation"""
|
|
# Load schema configuration
|
|
await processor.on_schema_config(sample_schema_config, version=1)
|
|
|
|
# Connect to Cassandra
|
|
processor.connect_cassandra()
|
|
assert processor.session is not None
|
|
|
|
# Create test keyspace and table
|
|
keyspace = "test_user"
|
|
collection = "test_collection"
|
|
schema_name = "customer"
|
|
schema = processor.schemas[schema_name]
|
|
|
|
# Ensure table creation
|
|
processor.ensure_table(keyspace, schema_name, schema)
|
|
|
|
# Verify keyspace and table tracking
|
|
assert keyspace in processor.known_keyspaces
|
|
assert keyspace in processor.known_tables
|
|
|
|
# Verify table was created by querying Cassandra system tables
|
|
safe_keyspace = processor.sanitize_name(keyspace)
|
|
safe_table = processor.sanitize_table(schema_name)
|
|
|
|
# Check if table exists
|
|
table_query = """
|
|
SELECT table_name FROM system_schema.tables
|
|
WHERE keyspace_name = %s AND table_name = %s
|
|
"""
|
|
result = processor.session.execute(table_query, (safe_keyspace, safe_table))
|
|
rows = list(result)
|
|
assert len(rows) == 1
|
|
assert rows[0].table_name == safe_table
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_data_insertion_and_graphql_query(self, processor, sample_schema_config):
|
|
"""Test inserting data and querying via GraphQL"""
|
|
# Load schema and connect
|
|
await processor.on_schema_config(sample_schema_config, version=1)
|
|
processor.connect_cassandra()
|
|
|
|
# Setup test data
|
|
keyspace = "test_user"
|
|
collection = "integration_test"
|
|
schema_name = "customer"
|
|
schema = processor.schemas[schema_name]
|
|
|
|
# Ensure table exists
|
|
processor.ensure_table(keyspace, schema_name, schema)
|
|
|
|
# Insert test data directly (simulating what storage processor would do)
|
|
safe_keyspace = processor.sanitize_name(keyspace)
|
|
safe_table = processor.sanitize_table(schema_name)
|
|
|
|
insert_query = f"""
|
|
INSERT INTO {safe_keyspace}.{safe_table}
|
|
(collection, customer_id, name, email, status, created_date)
|
|
VALUES (%s, %s, %s, %s, %s, %s)
|
|
"""
|
|
|
|
test_customers = [
|
|
(collection, "CUST001", "John Doe", "john@example.com", "active", "2024-01-15"),
|
|
(collection, "CUST002", "Jane Smith", "jane@example.com", "active", "2024-01-16"),
|
|
(collection, "CUST003", "Bob Wilson", "bob@example.com", "inactive", "2024-01-17")
|
|
]
|
|
|
|
for customer_data in test_customers:
|
|
processor.session.execute(insert_query, customer_data)
|
|
|
|
# Test GraphQL query execution
|
|
graphql_query = '''
|
|
{
|
|
customer_objects(collection: "integration_test") {
|
|
customer_id
|
|
name
|
|
email
|
|
status
|
|
}
|
|
}
|
|
'''
|
|
|
|
result = await processor.execute_graphql_query(
|
|
query=graphql_query,
|
|
variables={},
|
|
operation_name=None,
|
|
user=keyspace,
|
|
collection=collection
|
|
)
|
|
|
|
# Verify query results
|
|
assert "data" in result
|
|
assert "customer_objects" in result["data"]
|
|
|
|
customers = result["data"]["customer_objects"]
|
|
assert len(customers) == 3
|
|
|
|
# Verify customer data
|
|
customer_ids = [c["customer_id"] for c in customers]
|
|
assert "CUST001" in customer_ids
|
|
assert "CUST002" in customer_ids
|
|
assert "CUST003" in customer_ids
|
|
|
|
# Find specific customer and verify fields
|
|
john = next(c for c in customers if c["customer_id"] == "CUST001")
|
|
assert john["name"] == "John Doe"
|
|
assert john["email"] == "john@example.com"
|
|
assert john["status"] == "active"
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_graphql_query_with_filters(self, processor, sample_schema_config):
|
|
"""Test GraphQL queries with filtering on indexed fields"""
|
|
# Setup (reuse previous setup)
|
|
await processor.on_schema_config(sample_schema_config, version=1)
|
|
processor.connect_cassandra()
|
|
|
|
keyspace = "test_user"
|
|
collection = "filter_test"
|
|
schema_name = "customer"
|
|
schema = processor.schemas[schema_name]
|
|
|
|
processor.ensure_table(keyspace, schema_name, schema)
|
|
|
|
# Insert test data
|
|
safe_keyspace = processor.sanitize_name(keyspace)
|
|
safe_table = processor.sanitize_table(schema_name)
|
|
|
|
insert_query = f"""
|
|
INSERT INTO {safe_keyspace}.{safe_table}
|
|
(collection, customer_id, name, email, status)
|
|
VALUES (%s, %s, %s, %s, %s)
|
|
"""
|
|
|
|
test_data = [
|
|
(collection, "A001", "Active User 1", "active1@test.com", "active"),
|
|
(collection, "A002", "Active User 2", "active2@test.com", "active"),
|
|
(collection, "I001", "Inactive User", "inactive@test.com", "inactive")
|
|
]
|
|
|
|
for data in test_data:
|
|
processor.session.execute(insert_query, data)
|
|
|
|
# Query with status filter (indexed field)
|
|
filtered_query = '''
|
|
{
|
|
customer_objects(collection: "filter_test", status: "active") {
|
|
customer_id
|
|
name
|
|
status
|
|
}
|
|
}
|
|
'''
|
|
|
|
result = await processor.execute_graphql_query(
|
|
query=filtered_query,
|
|
variables={},
|
|
operation_name=None,
|
|
user=keyspace,
|
|
collection=collection
|
|
)
|
|
|
|
# Verify filtered results
|
|
assert "data" in result
|
|
customers = result["data"]["customer_objects"]
|
|
assert len(customers) == 2 # Only active customers
|
|
|
|
for customer in customers:
|
|
assert customer["status"] == "active"
|
|
assert customer["customer_id"] in ["A001", "A002"]
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_graphql_error_handling(self, processor, sample_schema_config):
|
|
"""Test GraphQL error handling for invalid queries"""
|
|
# Setup
|
|
await processor.on_schema_config(sample_schema_config, version=1)
|
|
|
|
# Test invalid field query
|
|
invalid_query = '''
|
|
{
|
|
customer_objects {
|
|
customer_id
|
|
nonexistent_field
|
|
}
|
|
}
|
|
'''
|
|
|
|
result = await processor.execute_graphql_query(
|
|
query=invalid_query,
|
|
variables={},
|
|
operation_name=None,
|
|
user="test_user",
|
|
collection="test_collection"
|
|
)
|
|
|
|
# Verify error response
|
|
assert "errors" in result
|
|
assert len(result["errors"]) > 0
|
|
|
|
error = result["errors"][0]
|
|
assert "message" in error
|
|
# GraphQL error should mention the invalid field
|
|
assert "nonexistent_field" in error["message"] or "Cannot query field" in error["message"]
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_message_processing_integration(self, processor, sample_schema_config):
|
|
"""Test full message processing workflow"""
|
|
# Setup
|
|
await processor.on_schema_config(sample_schema_config, version=1)
|
|
processor.connect_cassandra()
|
|
|
|
# Create mock message
|
|
request = ObjectsQueryRequest(
|
|
user="msg_test_user",
|
|
collection="msg_test_collection",
|
|
query='{ customer_objects { customer_id name } }',
|
|
variables={},
|
|
operation_name=""
|
|
)
|
|
|
|
mock_msg = MagicMock()
|
|
mock_msg.value.return_value = request
|
|
mock_msg.properties.return_value = {"id": "integration-test-123"}
|
|
|
|
# Mock flow for response
|
|
mock_response_producer = AsyncMock()
|
|
mock_flow = MagicMock()
|
|
mock_flow.return_value = mock_response_producer
|
|
|
|
# Process message
|
|
await processor.on_message(mock_msg, None, mock_flow)
|
|
|
|
# Verify response was sent
|
|
mock_response_producer.send.assert_called_once()
|
|
|
|
# Verify response structure
|
|
sent_response = mock_response_producer.send.call_args[0][0]
|
|
assert isinstance(sent_response, ObjectsQueryResponse)
|
|
|
|
# Should have no system error (even if no data)
|
|
assert sent_response.error is None
|
|
|
|
# Data should be JSON string (even if empty result)
|
|
assert sent_response.data is not None
|
|
assert isinstance(sent_response.data, str)
|
|
|
|
# Should be able to parse as JSON
|
|
parsed_data = json.loads(sent_response.data)
|
|
assert isinstance(parsed_data, dict)
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_concurrent_queries(self, processor, sample_schema_config):
|
|
"""Test handling multiple concurrent GraphQL queries"""
|
|
# Setup
|
|
await processor.on_schema_config(sample_schema_config, version=1)
|
|
processor.connect_cassandra()
|
|
|
|
# Create multiple query tasks
|
|
queries = [
|
|
'{ customer_objects { customer_id } }',
|
|
'{ order_objects { order_id } }',
|
|
'{ customer_objects { name email } }',
|
|
'{ order_objects { total status } }'
|
|
]
|
|
|
|
# Execute queries concurrently
|
|
tasks = []
|
|
for i, query in enumerate(queries):
|
|
task = processor.execute_graphql_query(
|
|
query=query,
|
|
variables={},
|
|
operation_name=None,
|
|
user=f"concurrent_user_{i}",
|
|
collection=f"concurrent_collection_{i}"
|
|
)
|
|
tasks.append(task)
|
|
|
|
# Wait for all queries to complete
|
|
results = await asyncio.gather(*tasks, return_exceptions=True)
|
|
|
|
# Verify all queries completed without exceptions
|
|
for i, result in enumerate(results):
|
|
assert not isinstance(result, Exception), f"Query {i} failed: {result}"
|
|
assert "data" in result or "errors" in result
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_schema_update_handling(self, processor):
|
|
"""Test handling of schema configuration updates"""
|
|
# Load initial schema
|
|
initial_config = {
|
|
"schema": {
|
|
"simple": json.dumps({
|
|
"name": "simple",
|
|
"fields": [{"name": "id", "type": "string", "primary_key": True}]
|
|
})
|
|
}
|
|
}
|
|
|
|
await processor.on_schema_config(initial_config, version=1)
|
|
assert len(processor.schemas) == 1
|
|
assert "simple" in processor.schemas
|
|
|
|
# Update with additional schema
|
|
updated_config = {
|
|
"schema": {
|
|
"simple": json.dumps({
|
|
"name": "simple",
|
|
"fields": [
|
|
{"name": "id", "type": "string", "primary_key": True},
|
|
{"name": "name", "type": "string"} # New field
|
|
]
|
|
}),
|
|
"complex": json.dumps({
|
|
"name": "complex",
|
|
"fields": [
|
|
{"name": "id", "type": "string", "primary_key": True},
|
|
{"name": "data", "type": "string"}
|
|
]
|
|
})
|
|
}
|
|
}
|
|
|
|
await processor.on_schema_config(updated_config, version=2)
|
|
|
|
# Verify updated schemas
|
|
assert len(processor.schemas) == 2
|
|
assert "simple" in processor.schemas
|
|
assert "complex" in processor.schemas
|
|
|
|
# Verify simple schema was updated
|
|
simple_schema = processor.schemas["simple"]
|
|
assert len(simple_schema.fields) == 2
|
|
|
|
# Verify GraphQL schema was regenerated
|
|
assert len(processor.graphql_types) == 2
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_large_result_set_handling(self, processor, sample_schema_config):
|
|
"""Test handling of large query result sets"""
|
|
# Setup
|
|
await processor.on_schema_config(sample_schema_config, version=1)
|
|
processor.connect_cassandra()
|
|
|
|
keyspace = "large_test_user"
|
|
collection = "large_collection"
|
|
schema_name = "customer"
|
|
schema = processor.schemas[schema_name]
|
|
|
|
processor.ensure_table(keyspace, schema_name, schema)
|
|
|
|
# Insert larger dataset
|
|
safe_keyspace = processor.sanitize_name(keyspace)
|
|
safe_table = processor.sanitize_table(schema_name)
|
|
|
|
insert_query = f"""
|
|
INSERT INTO {safe_keyspace}.{safe_table}
|
|
(collection, customer_id, name, email, status)
|
|
VALUES (%s, %s, %s, %s, %s)
|
|
"""
|
|
|
|
# Insert 50 records
|
|
for i in range(50):
|
|
processor.session.execute(insert_query, (
|
|
collection,
|
|
f"CUST{i:03d}",
|
|
f"Customer {i}",
|
|
f"customer{i}@test.com",
|
|
"active" if i % 2 == 0 else "inactive"
|
|
))
|
|
|
|
# Query with limit
|
|
limited_query = '''
|
|
{
|
|
customer_objects(collection: "large_collection", limit: 10) {
|
|
customer_id
|
|
name
|
|
}
|
|
}
|
|
'''
|
|
|
|
result = await processor.execute_graphql_query(
|
|
query=limited_query,
|
|
variables={},
|
|
operation_name=None,
|
|
user=keyspace,
|
|
collection=collection
|
|
)
|
|
|
|
# Verify limited results
|
|
assert "data" in result
|
|
customers = result["data"]["customer_objects"]
|
|
assert len(customers) <= 10 # Should be limited
|
|
|
|
|
|
@pytest.mark.integration
|
|
@pytest.mark.skipif(not DOCKER_AVAILABLE, reason="Docker/testcontainers not available")
|
|
class TestObjectsGraphQLQueryPerformance:
|
|
"""Performance-focused integration tests"""
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_query_execution_timing(self, cassandra_container):
|
|
"""Test query execution performance and timeout handling"""
|
|
import time
|
|
|
|
# Create processor with shorter timeout for testing
|
|
host = cassandra_container.get_container_host_ip()
|
|
|
|
processor = Processor(
|
|
id="perf-test-graphql-query",
|
|
graph_host=host,
|
|
config_type="schema"
|
|
)
|
|
|
|
# Load minimal schema
|
|
schema_config = {
|
|
"schema": {
|
|
"perf_test": json.dumps({
|
|
"name": "perf_test",
|
|
"fields": [{"name": "id", "type": "string", "primary_key": True}]
|
|
})
|
|
}
|
|
}
|
|
|
|
await processor.on_schema_config(schema_config, version=1)
|
|
|
|
# Measure query execution time
|
|
start_time = time.time()
|
|
|
|
result = await processor.execute_graphql_query(
|
|
query='{ perf_test_objects { id } }',
|
|
variables={},
|
|
operation_name=None,
|
|
user="perf_user",
|
|
collection="perf_collection"
|
|
)
|
|
|
|
end_time = time.time()
|
|
execution_time = end_time - start_time
|
|
|
|
# Verify reasonable execution time (should be under 1 second for empty result)
|
|
assert execution_time < 1.0
|
|
|
|
# Verify result structure
|
|
assert "data" in result or "errors" in result |