Structured query support (#492)

* Tweak the structured query schema

* Structure query service

* Gateway support for nlp-query and structured-query

* API support

* Added CLI

* Update tests

* More tests
This commit is contained in:
cybermaggedon 2025-09-04 16:06:18 +01:00 committed by GitHub
parent 8d4aa0069c
commit a6d9f5e849
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
22 changed files with 2813 additions and 31 deletions

View file

@ -20,7 +20,7 @@ from trustgraph.schema import (
GraphEmbeddings, EntityEmbeddings,
Metadata, Field, RowSchema,
StructuredDataSubmission, ExtractedObject,
NLPToStructuredQueryRequest, NLPToStructuredQueryResponse,
QuestionToStructuredQueryRequest, QuestionToStructuredQueryResponse,
StructuredQueryRequest, StructuredQueryResponse,
StructuredObjectEmbedding
)

View file

@ -12,7 +12,7 @@ from typing import Dict, Any
from trustgraph.schema import (
StructuredDataSubmission, ExtractedObject,
NLPToStructuredQueryRequest, NLPToStructuredQueryResponse,
QuestionToStructuredQueryRequest, QuestionToStructuredQueryResponse,
StructuredQueryRequest, StructuredQueryResponse,
StructuredObjectEmbedding, Field, RowSchema,
Metadata, Error, Value
@ -146,23 +146,21 @@ class TestStructuredQueryServiceContracts:
"""Contract tests for structured query services"""
def test_nlp_to_structured_query_request_contract(self):
"""Test NLPToStructuredQueryRequest schema contract"""
"""Test QuestionToStructuredQueryRequest schema contract"""
# Act
request = NLPToStructuredQueryRequest(
natural_language_query="Show me all customers who registered last month",
max_results=100,
context_hints={"time_range": "last_month", "entity_type": "customer"}
request = QuestionToStructuredQueryRequest(
question="Show me all customers who registered last month",
max_results=100
)
# Assert
assert "customers" in request.natural_language_query
assert "customers" in request.question
assert request.max_results == 100
assert request.context_hints["time_range"] == "last_month"
def test_nlp_to_structured_query_response_contract(self):
"""Test NLPToStructuredQueryResponse schema contract"""
"""Test QuestionToStructuredQueryResponse schema contract"""
# Act
response = NLPToStructuredQueryResponse(
response = QuestionToStructuredQueryResponse(
error=None,
graphql_query="query { customers(filter: {registered: {gte: \"2024-01-01\"}}) { id name email } }",
variables={"start_date": "2024-01-01"},
@ -180,15 +178,11 @@ class TestStructuredQueryServiceContracts:
"""Test StructuredQueryRequest schema contract"""
# Act
request = StructuredQueryRequest(
query="query GetCustomers($limit: Int) { customers(limit: $limit) { id name email } }",
variables={"limit": "10"},
operation_name="GetCustomers"
question="Show me customers with limit 10"
)
# Assert
assert "customers" in request.query
assert request.variables["limit"] == "10"
assert request.operation_name == "GetCustomers"
assert "customers" in request.question
def test_structured_query_response_contract(self):
"""Test StructuredQueryResponse schema contract"""
@ -291,11 +285,10 @@ class TestStructuredDataSerializationContracts:
"""Test NLP query request/response serialization contract"""
# Test request
request_data = {
"natural_language_query": "test query",
"max_results": 10,
"context_hints": {}
"question": "test query",
"max_results": 10
}
assert serialize_deserialize_test(NLPToStructuredQueryRequest, request_data)
assert serialize_deserialize_test(QuestionToStructuredQueryRequest, request_data)
# Test response
response_data = {
@ -305,4 +298,20 @@ class TestStructuredDataSerializationContracts:
"detected_schemas": ["test"],
"confidence": 0.9
}
assert serialize_deserialize_test(NLPToStructuredQueryResponse, response_data)
assert serialize_deserialize_test(QuestionToStructuredQueryResponse, response_data)
def test_structured_query_serialization(self):
"""Test structured query request/response serialization contract"""
# Test request
request_data = {
"question": "Show me all customers"
}
assert serialize_deserialize_test(StructuredQueryRequest, request_data)
# Test response
response_data = {
"error": None,
"data": '{"customers": [{"id": "1", "name": "John"}]}',
"errors": []
}
assert serialize_deserialize_test(StructuredQueryResponse, response_data)

View file

@ -0,0 +1,539 @@
"""
Integration tests for NLP Query Service
These tests verify the end-to-end functionality of the NLP query service,
testing service coordination, prompt service integration, and schema processing.
Following the TEST_STRATEGY.md approach for integration testing.
"""
import pytest
import json
from unittest.mock import AsyncMock, MagicMock, patch
from trustgraph.schema import (
QuestionToStructuredQueryRequest, QuestionToStructuredQueryResponse,
PromptRequest, PromptResponse, Error, RowSchema, Field as SchemaField
)
from trustgraph.retrieval.nlp_query.service import Processor
@pytest.mark.integration
class TestNLPQueryServiceIntegration:
"""Integration tests for NLP query service coordination"""
@pytest.fixture
def sample_schemas(self):
"""Sample schemas for testing"""
return {
"customers": RowSchema(
name="customers",
description="Customer data with contact information",
fields=[
SchemaField(name="id", type="string", primary=True),
SchemaField(name="name", type="string"),
SchemaField(name="email", type="string"),
SchemaField(name="state", type="string"),
SchemaField(name="phone", type="string")
]
),
"orders": RowSchema(
name="orders",
description="Customer order transactions",
fields=[
SchemaField(name="order_id", type="string", primary=True),
SchemaField(name="customer_id", type="string"),
SchemaField(name="total", type="float"),
SchemaField(name="status", type="string"),
SchemaField(name="order_date", type="datetime")
]
),
"products": RowSchema(
name="products",
description="Product catalog information",
fields=[
SchemaField(name="product_id", type="string", primary=True),
SchemaField(name="name", type="string"),
SchemaField(name="category", type="string"),
SchemaField(name="price", type="float"),
SchemaField(name="in_stock", type="boolean")
]
)
}
@pytest.fixture
def integration_processor(self, sample_schemas):
"""Create processor with realistic configuration"""
proc = Processor(
taskgroup=MagicMock(),
pulsar_client=AsyncMock(),
config_type="schema",
schema_selection_template="schema-selection-v1",
graphql_generation_template="graphql-generation-v1"
)
# Set up schemas
proc.schemas = sample_schemas
# Mock the client method
proc.client = MagicMock()
return proc
@pytest.mark.asyncio
async def test_end_to_end_nlp_query_processing(self, integration_processor):
"""Test complete NLP query processing pipeline"""
# Arrange - Create realistic query request
request = QuestionToStructuredQueryRequest(
question="Show me customers from California who have placed orders over $500",
max_results=50
)
msg = MagicMock()
msg.value.return_value = request
msg.properties.return_value = {"id": "integration-test-001"}
consumer = MagicMock()
flow = MagicMock()
flow_response = AsyncMock()
flow.return_value = flow_response
# Mock Phase 1 - Schema Selection Response
phase1_response = PromptResponse(
text=json.dumps(["customers", "orders"]),
error=None
)
# Mock Phase 2 - GraphQL Generation Response
expected_graphql = """
query GetCaliforniaCustomersWithLargeOrders($min_total: Float!) {
customers(where: {state: {eq: "California"}}) {
id
name
email
state
orders(where: {total: {gt: $min_total}}) {
order_id
total
status
order_date
}
}
}
"""
phase2_response = PromptResponse(
text=json.dumps({
"query": expected_graphql.strip(),
"variables": {"min_total": "500.0"},
"confidence": 0.92
}),
error=None
)
# Set up mock to return different responses for each call
integration_processor.client.return_value.request = AsyncMock(
side_effect=[phase1_response, phase2_response]
)
# Act - Process the message
await integration_processor.on_message(msg, consumer, flow)
# Assert - Verify the complete pipeline
assert integration_processor.client.return_value.request.call_count == 2
flow_response.send.assert_called_once()
# Verify response structure and content
response_call = flow_response.send.call_args
response = response_call[0][0]
assert isinstance(response, QuestionToStructuredQueryResponse)
assert response.error is None
assert "customers" in response.graphql_query
assert "orders" in response.graphql_query
assert "California" in response.graphql_query
assert response.detected_schemas == ["customers", "orders"]
assert response.confidence == 0.92
assert response.variables["min_total"] == "500.0"
@pytest.mark.asyncio
async def test_complex_multi_table_query_integration(self, integration_processor):
"""Test integration with complex multi-table queries"""
# Arrange
request = QuestionToStructuredQueryRequest(
question="Find all electronic products under $100 that are in stock, along with any recent orders",
max_results=25
)
msg = MagicMock()
msg.value.return_value = request
msg.properties.return_value = {"id": "multi-table-test"}
consumer = MagicMock()
flow = MagicMock()
flow_response = AsyncMock()
flow.return_value = flow_response
# Mock responses
phase1_response = PromptResponse(
text=json.dumps(["products", "orders"]),
error=None
)
phase2_response = PromptResponse(
text=json.dumps({
"query": "query { products(where: {category: {eq: \"Electronics\"}, price: {lt: 100}, in_stock: {eq: true}}) { product_id name price orders { order_id total } } }",
"variables": {},
"confidence": 0.88
}),
error=None
)
integration_processor.client.return_value.request = AsyncMock(
side_effect=[phase1_response, phase2_response]
)
# Act
await integration_processor.on_message(msg, consumer, flow)
# Assert
response_call = flow_response.send.call_args
response = response_call[0][0]
assert response.detected_schemas == ["products", "orders"]
assert "Electronics" in response.graphql_query
assert "price: {lt: 100}" in response.graphql_query
assert "in_stock: {eq: true}" in response.graphql_query
@pytest.mark.asyncio
async def test_schema_configuration_integration(self, integration_processor):
"""Test integration with dynamic schema configuration"""
# Arrange - New schema configuration
new_schema_config = {
"schema": {
"inventory": json.dumps({
"name": "inventory",
"description": "Product inventory tracking",
"fields": [
{"name": "sku", "type": "string", "primary_key": True},
{"name": "quantity", "type": "integer"},
{"name": "warehouse_location", "type": "string"}
]
})
}
}
# Act - Update configuration
await integration_processor.on_schema_config(new_schema_config, "v2")
# Arrange - Test query using new schema
request = QuestionToStructuredQueryRequest(
question="Show inventory levels for all products in warehouse A",
max_results=100
)
msg = MagicMock()
msg.value.return_value = request
msg.properties.return_value = {"id": "schema-config-test"}
consumer = MagicMock()
flow = MagicMock()
flow_response = AsyncMock()
flow.return_value = flow_response
# Mock responses that use the new schema
phase1_response = PromptResponse(
text=json.dumps(["inventory"]),
error=None
)
phase2_response = PromptResponse(
text=json.dumps({
"query": "query { inventory(where: {warehouse_location: {eq: \"A\"}}) { sku quantity warehouse_location } }",
"variables": {},
"confidence": 0.85
}),
error=None
)
integration_processor.client.return_value.request = AsyncMock(
side_effect=[phase1_response, phase2_response]
)
# Act
await integration_processor.on_message(msg, consumer, flow)
# Assert
assert "inventory" in integration_processor.schemas
response_call = flow_response.send.call_args
response = response_call[0][0]
assert response.detected_schemas == ["inventory"]
assert "inventory" in response.graphql_query
@pytest.mark.asyncio
async def test_prompt_service_error_recovery_integration(self, integration_processor):
"""Test integration with prompt service error scenarios"""
# Arrange
request = QuestionToStructuredQueryRequest(
question="Show me customer data",
max_results=10
)
msg = MagicMock()
msg.value.return_value = request
msg.properties.return_value = {"id": "error-recovery-test"}
consumer = MagicMock()
flow = MagicMock()
flow_response = AsyncMock()
flow.return_value = flow_response
# Mock Phase 1 error
phase1_error_response = PromptResponse(
text="",
error=Error(type="template-not-found", message="Schema selection template not available")
)
integration_processor.client.return_value.request = AsyncMock(
return_value=phase1_error_response
)
# Act
await integration_processor.on_message(msg, consumer, flow)
# Assert - Error is properly handled and propagated
flow_response.send.assert_called_once()
response_call = flow_response.send.call_args
response = response_call[0][0]
assert isinstance(response, QuestionToStructuredQueryResponse)
assert response.error is not None
assert response.error.type == "nlp-query-error"
assert "Prompt service error" in response.error.message
@pytest.mark.asyncio
async def test_template_parameter_integration(self, sample_schemas):
"""Test integration with different template configurations"""
# Test with custom templates
custom_processor = Processor(
taskgroup=MagicMock(),
pulsar_client=AsyncMock(),
config_type="schema",
schema_selection_template="custom-schema-selector",
graphql_generation_template="custom-graphql-generator"
)
custom_processor.schemas = sample_schemas
custom_processor.client = MagicMock()
request = QuestionToStructuredQueryRequest(
question="Test query",
max_results=5
)
msg = MagicMock()
msg.value.return_value = request
msg.properties.return_value = {"id": "template-test"}
consumer = MagicMock()
flow = MagicMock()
flow_response = AsyncMock()
flow.return_value = flow_response
# Mock responses
phase1_response = PromptResponse(text=json.dumps(["customers"]), error=None)
phase2_response = PromptResponse(
text=json.dumps({
"query": "query { customers { id name } }",
"variables": {},
"confidence": 0.9
}),
error=None
)
custom_processor.client.return_value.request = AsyncMock(
side_effect=[phase1_response, phase2_response]
)
# Act
await custom_processor.on_message(msg, consumer, flow)
# Assert - Verify custom templates are used
assert custom_processor.schema_selection_template == "custom-schema-selector"
assert custom_processor.graphql_generation_template == "custom-graphql-generator"
# Verify the calls were made
assert custom_processor.client.return_value.request.call_count == 2
@pytest.mark.asyncio
async def test_large_schema_set_integration(self, integration_processor):
"""Test integration with large numbers of schemas"""
# Arrange - Add many schemas
large_schema_set = {}
for i in range(20):
schema_name = f"table_{i:02d}"
large_schema_set[schema_name] = RowSchema(
name=schema_name,
description=f"Test table {i} with sample data",
fields=[
SchemaField(name="id", type="string", primary=True)
] + [SchemaField(name=f"field_{j}", type="string") for j in range(5)]
)
integration_processor.schemas.update(large_schema_set)
request = QuestionToStructuredQueryRequest(
question="Show me data from table_05 and table_12",
max_results=20
)
msg = MagicMock()
msg.value.return_value = request
msg.properties.return_value = {"id": "large-schema-test"}
consumer = MagicMock()
flow = MagicMock()
flow_response = AsyncMock()
flow.return_value = flow_response
# Mock responses
phase1_response = PromptResponse(
text=json.dumps(["table_05", "table_12"]),
error=None
)
phase2_response = PromptResponse(
text=json.dumps({
"query": "query { table_05 { id field_0 } table_12 { id field_1 } }",
"variables": {},
"confidence": 0.87
}),
error=None
)
integration_processor.client.return_value.request = AsyncMock(
side_effect=[phase1_response, phase2_response]
)
# Act
await integration_processor.on_message(msg, consumer, flow)
# Assert - Should handle large schema sets efficiently
response_call = flow_response.send.call_args
response = response_call[0][0]
assert response.detected_schemas == ["table_05", "table_12"]
assert "table_05" in response.graphql_query
assert "table_12" in response.graphql_query
@pytest.mark.asyncio
async def test_concurrent_request_handling_integration(self, integration_processor):
"""Test integration with concurrent request processing"""
# Arrange - Multiple concurrent requests
requests = []
messages = []
flows = []
for i in range(5):
request = QuestionToStructuredQueryRequest(
question=f"Query {i}: Show me data",
max_results=10
)
msg = MagicMock()
msg.value.return_value = request
msg.properties.return_value = {"id": f"concurrent-test-{i}"}
flow = MagicMock()
flow_response = AsyncMock()
flow.return_value = flow_response
requests.append(request)
messages.append(msg)
flows.append(flow)
# Mock responses for all requests
mock_responses = []
for i in range(10): # 2 calls per request (phase1 + phase2)
if i % 2 == 0: # Phase 1 responses
mock_responses.append(PromptResponse(
text=json.dumps(["customers"]),
error=None
))
else: # Phase 2 responses
mock_responses.append(PromptResponse(
text=json.dumps({
"query": f"query {{ customers {{ id name }} }}",
"variables": {},
"confidence": 0.9
}),
error=None
))
integration_processor.client.return_value.request = AsyncMock(
side_effect=mock_responses
)
# Act - Process all messages concurrently
import asyncio
consumer = MagicMock()
tasks = []
for msg, flow in zip(messages, flows):
task = integration_processor.on_message(msg, consumer, flow)
tasks.append(task)
await asyncio.gather(*tasks)
# Assert - All requests should be processed
assert integration_processor.client.return_value.request.call_count == 10
for flow in flows:
flow.return_value.send.assert_called_once()
@pytest.mark.asyncio
async def test_performance_timing_integration(self, integration_processor):
"""Test performance characteristics of the integration"""
# Arrange
request = QuestionToStructuredQueryRequest(
question="Performance test query",
max_results=100
)
msg = MagicMock()
msg.value.return_value = request
msg.properties.return_value = {"id": "performance-test"}
consumer = MagicMock()
flow = MagicMock()
flow_response = AsyncMock()
flow.return_value = flow_response
# Mock fast responses
phase1_response = PromptResponse(text=json.dumps(["customers"]), error=None)
phase2_response = PromptResponse(
text=json.dumps({
"query": "query { customers { id } }",
"variables": {},
"confidence": 0.9
}),
error=None
)
integration_processor.client.return_value.request = AsyncMock(
side_effect=[phase1_response, phase2_response]
)
# Act
import time
start_time = time.time()
await integration_processor.on_message(msg, consumer, flow)
end_time = time.time()
execution_time = end_time - start_time
# Assert
assert execution_time < 1.0 # Should complete quickly with mocked services
flow_response.send.assert_called_once()
response_call = flow_response.send.call_args
response = response_call[0][0]
assert response.error is None

View file

@ -0,0 +1,665 @@
"""
Integration tests for Structured Query Service
These tests verify the end-to-end functionality of the structured query service,
testing orchestration between nlp-query and objects-query services.
Following the TEST_STRATEGY.md approach for integration testing.
"""
import pytest
import json
from unittest.mock import AsyncMock, MagicMock
from trustgraph.schema import (
StructuredQueryRequest, StructuredQueryResponse,
QuestionToStructuredQueryRequest, QuestionToStructuredQueryResponse,
ObjectsQueryRequest, ObjectsQueryResponse,
Error, GraphQLError
)
from trustgraph.retrieval.structured_query.service import Processor
@pytest.mark.integration
class TestStructuredQueryServiceIntegration:
"""Integration tests for structured query service orchestration"""
@pytest.fixture
def integration_processor(self):
"""Create processor with realistic configuration"""
proc = Processor(
taskgroup=MagicMock(),
pulsar_client=AsyncMock()
)
# Mock the client method
proc.client = MagicMock()
return proc
@pytest.mark.asyncio
async def test_end_to_end_structured_query_processing(self, integration_processor):
"""Test complete structured query processing pipeline"""
# Arrange - Create realistic query request
request = StructuredQueryRequest(
question="Show me all customers from California who have made purchases over $500"
)
msg = MagicMock()
msg.value.return_value = request
msg.properties.return_value = {"id": "integration-test-001"}
consumer = MagicMock()
flow = MagicMock()
flow_response = AsyncMock()
flow.return_value = flow_response
# Mock NLP Query Service Response
nlp_response = QuestionToStructuredQueryResponse(
error=None,
graphql_query='''
query GetCaliforniaCustomersWithLargePurchases($minAmount: String!, $state: String!) {
customers(where: {state: {eq: $state}}) {
id
name
email
orders(where: {total: {gt: $minAmount}}) {
id
total
date
}
}
}
''',
variables={
"minAmount": "500.0",
"state": "California"
},
detected_schemas=["customers", "orders"],
confidence=0.91
)
# Mock Objects Query Service Response
objects_response = ObjectsQueryResponse(
error=None,
data='{"customers": [{"id": "123", "name": "Alice Johnson", "email": "alice@example.com", "orders": [{"id": "456", "total": 750.0, "date": "2024-01-15"}]}]}',
errors=None,
extensions={"execution_time": "150ms", "query_complexity": "8"}
)
# Set up mock clients to return different responses
mock_nlp_client = AsyncMock()
mock_nlp_client.request.return_value = nlp_response
mock_objects_client = AsyncMock()
mock_objects_client.request.return_value = objects_response
integration_processor.client.side_effect = lambda name: (
mock_nlp_client if name == "nlp-query-request" else mock_objects_client
)
# Act - Process the message
await integration_processor.on_message(msg, consumer, flow)
# Assert - Verify the complete orchestration
# Verify NLP service call
mock_nlp_client.request.assert_called_once()
nlp_call_args = mock_nlp_client.request.call_args[0][0]
assert isinstance(nlp_call_args, QuestionToStructuredQueryRequest)
assert nlp_call_args.question == "Show me all customers from California who have made purchases over $500"
assert nlp_call_args.max_results == 100 # Default max_results
# Verify Objects service call
mock_objects_client.request.assert_called_once()
objects_call_args = mock_objects_client.request.call_args[0][0]
assert isinstance(objects_call_args, ObjectsQueryRequest)
assert "customers" in objects_call_args.query
assert "orders" in objects_call_args.query
assert objects_call_args.variables["minAmount"] == "500.0" # Converted to string
assert objects_call_args.variables["state"] == "California"
assert objects_call_args.user == "default"
assert objects_call_args.collection == "default"
# Verify response
flow_response.send.assert_called_once()
response_call = flow_response.send.call_args
response = response_call[0][0]
assert isinstance(response, StructuredQueryResponse)
assert response.error is None
assert "Alice Johnson" in response.data
assert "750.0" in response.data
assert len(response.errors) == 0
@pytest.mark.asyncio
async def test_nlp_service_integration_failure(self, integration_processor):
"""Test integration when NLP service fails"""
# Arrange
request = StructuredQueryRequest(
question="This is an unparseable query ][{}"
)
msg = MagicMock()
msg.value.return_value = request
msg.properties.return_value = {"id": "nlp-failure-test"}
consumer = MagicMock()
flow = MagicMock()
flow_response = AsyncMock()
flow.return_value = flow_response
# Mock NLP service failure
nlp_error_response = QuestionToStructuredQueryResponse(
error=Error(type="nlp-parsing-error", message="Unable to parse natural language query"),
graphql_query="",
variables={},
detected_schemas=[],
confidence=0.0
)
mock_nlp_client = AsyncMock()
mock_nlp_client.request.return_value = nlp_error_response
integration_processor.client.return_value = mock_nlp_client
# Act
await integration_processor.on_message(msg, consumer, flow)
# Assert - Error should be propagated properly
flow_response.send.assert_called_once()
response_call = flow_response.send.call_args
response = response_call[0][0]
assert isinstance(response, StructuredQueryResponse)
assert response.error is not None
assert response.error.type == "structured-query-error"
assert "NLP query service error" in response.error.message
assert "Unable to parse natural language query" in response.error.message
@pytest.mark.asyncio
async def test_objects_service_integration_failure(self, integration_processor):
"""Test integration when Objects service fails"""
# Arrange
request = StructuredQueryRequest(
question="Show me data from a table that doesn't exist"
)
msg = MagicMock()
msg.value.return_value = request
msg.properties.return_value = {"id": "objects-failure-test"}
consumer = MagicMock()
flow = MagicMock()
flow_response = AsyncMock()
flow.return_value = flow_response
# Mock successful NLP response
nlp_response = QuestionToStructuredQueryResponse(
error=None,
graphql_query='query { nonexistent_table { id name } }',
variables={},
detected_schemas=["nonexistent_table"],
confidence=0.7
)
# Mock Objects service failure
objects_error_response = ObjectsQueryResponse(
error=Error(type="graphql-schema-error", message="Table 'nonexistent_table' does not exist in schema"),
data=None,
errors=None,
extensions={}
)
mock_nlp_client = AsyncMock()
mock_nlp_client.request.return_value = nlp_response
mock_objects_client = AsyncMock()
mock_objects_client.request.return_value = objects_error_response
integration_processor.client.side_effect = lambda name: (
mock_nlp_client if name == "nlp-query-request" else mock_objects_client
)
# Act
await integration_processor.on_message(msg, consumer, flow)
# Assert - Error should be propagated
flow_response.send.assert_called_once()
response_call = flow_response.send.call_args
response = response_call[0][0]
assert response.error is not None
assert response.error.type == "structured-query-error"
assert "Objects query service error" in response.error.message
assert "nonexistent_table" in response.error.message
@pytest.mark.asyncio
async def test_graphql_validation_errors_integration(self, integration_processor):
"""Test integration with GraphQL validation errors"""
# Arrange
request = StructuredQueryRequest(
question="Show me customer invalid_field values"
)
msg = MagicMock()
msg.value.return_value = request
msg.properties.return_value = {"id": "validation-error-test"}
consumer = MagicMock()
flow = MagicMock()
flow_response = AsyncMock()
flow.return_value = flow_response
# Mock NLP response with invalid field
nlp_response = QuestionToStructuredQueryResponse(
error=None,
graphql_query='query { customers { id invalid_field } }',
variables={},
detected_schemas=["customers"],
confidence=0.8
)
# Mock Objects response with GraphQL validation errors
validation_errors = [
GraphQLError(
message="Cannot query field 'invalid_field' on type 'Customer'",
path=["customers", "0", "invalid_field"],
extensions={"code": "VALIDATION_ERROR"}
),
GraphQLError(
message="Field 'invalid_field' is not defined in the schema",
path=["customers", "invalid_field"],
extensions={"code": "FIELD_NOT_FOUND"}
)
]
objects_response = ObjectsQueryResponse(
error=None,
data=None, # No data when validation fails
errors=validation_errors,
extensions={"validation_errors": "2"}
)
mock_nlp_client = AsyncMock()
mock_nlp_client.request.return_value = nlp_response
mock_objects_client = AsyncMock()
mock_objects_client.request.return_value = objects_response
integration_processor.client.side_effect = lambda name: (
mock_nlp_client if name == "nlp-query-request" else mock_objects_client
)
# Act
await integration_processor.on_message(msg, consumer, flow)
# Assert - GraphQL errors should be included in response
flow_response.send.assert_called_once()
response_call = flow_response.send.call_args
response = response_call[0][0]
assert response.error is None # No system error
assert len(response.errors) == 2 # Two GraphQL errors
assert "Cannot query field 'invalid_field'" in response.errors[0]
assert "Field 'invalid_field' is not defined" in response.errors[1]
assert "customers" in response.errors[0]
@pytest.mark.asyncio
async def test_complex_multi_service_integration(self, integration_processor):
"""Test complex integration scenario with multiple entities and relationships"""
# Arrange
request = StructuredQueryRequest(
question="Find all products under $100 that are in stock, along with their recent orders from customers in New York"
)
msg = MagicMock()
msg.value.return_value = request
msg.properties.return_value = {"id": "complex-integration-test"}
consumer = MagicMock()
flow = MagicMock()
flow_response = AsyncMock()
flow.return_value = flow_response
# Mock complex NLP response
nlp_response = QuestionToStructuredQueryResponse(
error=None,
graphql_query='''
query GetProductsWithCustomerOrders($maxPrice: String!, $inStock: String!, $state: String!) {
products(where: {price: {lt: $maxPrice}, in_stock: {eq: $inStock}}) {
id
name
price
orders {
id
total
customer {
id
name
state
}
}
}
}
''',
variables={
"maxPrice": "100.0",
"inStock": "true",
"state": "New York"
},
detected_schemas=["products", "orders", "customers"],
confidence=0.85
)
# Mock complex Objects response
complex_data = {
"products": [
{
"id": "prod_123",
"name": "Widget A",
"price": 89.99,
"orders": [
{
"id": "order_456",
"total": 179.98,
"customer": {
"id": "cust_789",
"name": "Bob Smith",
"state": "New York"
}
}
]
},
{
"id": "prod_124",
"name": "Widget B",
"price": 65.50,
"orders": [
{
"id": "order_457",
"total": 131.00,
"customer": {
"id": "cust_790",
"name": "Carol Jones",
"state": "New York"
}
}
]
}
]
}
objects_response = ObjectsQueryResponse(
error=None,
data=json.dumps(complex_data),
errors=None,
extensions={
"execution_time": "250ms",
"query_complexity": "15",
"data_sources": "products,orders,customers" # Convert array to comma-separated string
}
)
mock_nlp_client = AsyncMock()
mock_nlp_client.request.return_value = nlp_response
mock_objects_client = AsyncMock()
mock_objects_client.request.return_value = objects_response
integration_processor.client.side_effect = lambda name: (
mock_nlp_client if name == "nlp-query-request" else mock_objects_client
)
# Act
await integration_processor.on_message(msg, consumer, flow)
# Assert - Verify complex data integration
# Check NLP service call
nlp_call_args = mock_nlp_client.request.call_args[0][0]
assert len(nlp_call_args.question) > 50 # Complex question
# Check Objects service call with variable conversion
objects_call_args = mock_objects_client.request.call_args[0][0]
assert objects_call_args.variables["maxPrice"] == "100.0"
assert objects_call_args.variables["inStock"] == "true"
assert objects_call_args.variables["state"] == "New York"
# Check response contains complex data
response_call = flow_response.send.call_args
response = response_call[0][0]
assert response.error is None
assert "Widget A" in response.data
assert "Widget B" in response.data
assert "Bob Smith" in response.data
assert "Carol Jones" in response.data
assert "New York" in response.data
@pytest.mark.asyncio
async def test_empty_result_integration(self, integration_processor):
"""Test integration when query returns empty results"""
# Arrange
request = StructuredQueryRequest(
question="Show me customers from Mars"
)
msg = MagicMock()
msg.value.return_value = request
msg.properties.return_value = {"id": "empty-result-test"}
consumer = MagicMock()
flow = MagicMock()
flow_response = AsyncMock()
flow.return_value = flow_response
# Mock NLP response
nlp_response = QuestionToStructuredQueryResponse(
error=None,
graphql_query='query { customers(where: {planet: {eq: "Mars"}}) { id name planet } }',
variables={},
detected_schemas=["customers"],
confidence=0.9
)
# Mock empty Objects response
objects_response = ObjectsQueryResponse(
error=None,
data='{"customers": []}', # Empty result set
errors=None,
extensions={"result_count": "0"}
)
mock_nlp_client = AsyncMock()
mock_nlp_client.request.return_value = nlp_response
mock_objects_client = AsyncMock()
mock_objects_client.request.return_value = objects_response
integration_processor.client.side_effect = lambda name: (
mock_nlp_client if name == "nlp-query-request" else mock_objects_client
)
# Act
await integration_processor.on_message(msg, consumer, flow)
# Assert - Empty results should be handled gracefully
response_call = flow_response.send.call_args
response = response_call[0][0]
assert response.error is None
assert response.data == '{"customers": []}'
assert len(response.errors) == 0
@pytest.mark.asyncio
async def test_concurrent_requests_integration(self, integration_processor):
"""Test integration with concurrent request processing"""
# Arrange - Multiple concurrent requests
requests = []
messages = []
flows = []
for i in range(3):
request = StructuredQueryRequest(
question=f"Query {i}: Show me data"
)
msg = MagicMock()
msg.value.return_value = request
msg.properties.return_value = {"id": f"concurrent-test-{i}"}
flow = MagicMock()
flow_response = AsyncMock()
flow.return_value = flow_response
requests.append(request)
messages.append(msg)
flows.append(flow)
# Mock responses for all requests (6 total: 3 NLP + 3 Objects)
mock_responses = []
for i in range(6):
if i % 2 == 0: # NLP responses
mock_responses.append(QuestionToStructuredQueryResponse(
error=None,
graphql_query=f'query {{ test_{i//2} {{ id }} }}',
variables={},
detected_schemas=[f"test_{i//2}"],
confidence=0.9
))
else: # Objects responses
mock_responses.append(ObjectsQueryResponse(
error=None,
data=f'{{"test_{i//2}": [{{"id": "{i//2}"}}]}}',
errors=None,
extensions={}
))
call_count = 0
def mock_client_side_effect(name):
nonlocal call_count
client = AsyncMock()
client.request.return_value = mock_responses[call_count]
call_count += 1
return client
integration_processor.client.side_effect = mock_client_side_effect
# Act - Process all messages concurrently
import asyncio
consumer = MagicMock()
tasks = []
for msg, flow in zip(messages, flows):
task = integration_processor.on_message(msg, consumer, flow)
tasks.append(task)
await asyncio.gather(*tasks)
# Assert - All requests should be processed
assert call_count == 6 # 2 calls per request (NLP + Objects)
for flow in flows:
flow.return_value.send.assert_called_once()
@pytest.mark.asyncio
async def test_service_timeout_integration(self, integration_processor):
"""Test integration with service timeout scenarios"""
# Arrange
request = StructuredQueryRequest(
question="This query will timeout"
)
msg = MagicMock()
msg.value.return_value = request
msg.properties.return_value = {"id": "timeout-test"}
consumer = MagicMock()
flow = MagicMock()
flow_response = AsyncMock()
flow.return_value = flow_response
# Mock NLP service timeout
mock_nlp_client = AsyncMock()
mock_nlp_client.request.side_effect = Exception("Service timeout: Request took longer than 30s")
integration_processor.client.return_value = mock_nlp_client
# Act
await integration_processor.on_message(msg, consumer, flow)
# Assert - Timeout should be handled gracefully
flow_response.send.assert_called_once()
response_call = flow_response.send.call_args
response = response_call[0][0]
assert response.error is not None
assert response.error.type == "structured-query-error"
assert "timeout" in response.error.message.lower()
@pytest.mark.asyncio
async def test_variable_type_conversion_integration(self, integration_processor):
"""Test integration with complex variable type conversions"""
# Arrange
request = StructuredQueryRequest(
question="Show me orders with totals between 50.5 and 200.75 from the last 30 days"
)
msg = MagicMock()
msg.value.return_value = request
msg.properties.return_value = {"id": "variable-conversion-test"}
consumer = MagicMock()
flow = MagicMock()
flow_response = AsyncMock()
flow.return_value = flow_response
# Mock NLP response with various data types that need string conversion
nlp_response = QuestionToStructuredQueryResponse(
error=None,
graphql_query='query($minTotal: Float!, $maxTotal: Float!, $daysPast: Int!) { orders(filter: {total: {between: [$minTotal, $maxTotal]}, date: {gte: $daysPast}}) { id total date } }',
variables={
"minTotal": "50.5", # Already string
"maxTotal": "200.75", # Already string
"daysPast": "30" # Already string
},
detected_schemas=["orders"],
confidence=0.88
)
# Mock Objects response
objects_response = ObjectsQueryResponse(
error=None,
data='{"orders": [{"id": "123", "total": 125.50, "date": "2024-01-15"}]}',
errors=None,
extensions={}
)
mock_nlp_client = AsyncMock()
mock_nlp_client.request.return_value = nlp_response
mock_objects_client = AsyncMock()
mock_objects_client.request.return_value = objects_response
integration_processor.client.side_effect = lambda name: (
mock_nlp_client if name == "nlp-query-request" else mock_objects_client
)
# Act
await integration_processor.on_message(msg, consumer, flow)
# Assert - Variables should be properly converted to strings
objects_call_args = mock_objects_client.request.call_args[0][0]
# All variables should be strings for Pulsar schema compatibility
assert isinstance(objects_call_args.variables["minTotal"], str)
assert isinstance(objects_call_args.variables["maxTotal"], str)
assert isinstance(objects_call_args.variables["daysPast"], str)
# Values should be preserved
assert objects_call_args.variables["minTotal"] == "50.5"
assert objects_call_args.variables["maxTotal"] == "200.75"
assert objects_call_args.variables["daysPast"] == "30"
# Response should contain expected data
response_call = flow_response.send.call_args
response = response_call[0][0]
assert response.error is None
assert "125.50" in response.data

View file

@ -0,0 +1,356 @@
"""
Unit tests for NLP Query service
Following TEST_STRATEGY.md approach for service testing
"""
import pytest
import json
from unittest.mock import AsyncMock, MagicMock, patch
from typing import Dict, Any
from trustgraph.schema import (
QuestionToStructuredQueryRequest, QuestionToStructuredQueryResponse,
PromptRequest, PromptResponse, Error, RowSchema, Field as SchemaField
)
from trustgraph.retrieval.nlp_query.service import Processor
@pytest.fixture
def mock_prompt_client():
"""Mock prompt service client"""
return AsyncMock()
@pytest.fixture
def mock_pulsar_client():
"""Mock Pulsar client"""
return AsyncMock()
@pytest.fixture
def sample_schemas():
"""Sample schemas for testing"""
return {
"customers": RowSchema(
name="customers",
description="Customer data",
fields=[
SchemaField(name="id", type="string", primary=True),
SchemaField(name="name", type="string"),
SchemaField(name="email", type="string"),
SchemaField(name="state", type="string")
]
),
"orders": RowSchema(
name="orders",
description="Order data",
fields=[
SchemaField(name="order_id", type="string", primary=True),
SchemaField(name="customer_id", type="string"),
SchemaField(name="total", type="float"),
SchemaField(name="status", type="string")
]
)
}
@pytest.fixture
def processor(mock_pulsar_client, sample_schemas):
"""Create processor with mocked dependencies"""
proc = Processor(
taskgroup=MagicMock(),
pulsar_client=mock_pulsar_client,
config_type="schema"
)
# Set up schemas
proc.schemas = sample_schemas
# Mock the client method
proc.client = MagicMock()
return proc
@pytest.mark.asyncio
class TestNLPQueryProcessor:
"""Test NLP Query service processor"""
async def test_phase1_select_schemas_success(self, processor, mock_prompt_client):
"""Test successful schema selection (Phase 1)"""
# Arrange
question = "Show me customers from California"
expected_schemas = ["customers"]
mock_response = PromptResponse(
text=json.dumps(expected_schemas),
error=None
)
processor.client.return_value.request = AsyncMock(return_value=mock_response)
# Act
result = await processor.phase1_select_schemas(question)
# Assert
assert result == expected_schemas
processor.client.assert_called_once_with("prompt-request")
async def test_phase1_select_schemas_prompt_error(self, processor):
"""Test schema selection with prompt service error"""
# Arrange
question = "Show me customers"
error = Error(type="prompt-error", message="Template not found")
mock_response = PromptResponse(text="", error=error)
processor.client.return_value.request = AsyncMock(return_value=mock_response)
# Act & Assert
with pytest.raises(Exception, match="Prompt service error"):
await processor.phase1_select_schemas(question)
async def test_phase2_generate_graphql_success(self, processor):
"""Test successful GraphQL generation (Phase 2)"""
# Arrange
question = "Show me customers from California"
selected_schemas = ["customers"]
expected_result = {
"query": "query { customers(where: {state: {eq: \"California\"}}) { id name email state } }",
"variables": {},
"confidence": 0.95
}
mock_response = PromptResponse(
text=json.dumps(expected_result),
error=None
)
processor.client.return_value.request = AsyncMock(return_value=mock_response)
# Act
result = await processor.phase2_generate_graphql(question, selected_schemas)
# Assert
assert result == expected_result
processor.client.assert_called_once_with("prompt-request")
async def test_phase2_generate_graphql_prompt_error(self, processor):
"""Test GraphQL generation with prompt service error"""
# Arrange
question = "Show me customers"
selected_schemas = ["customers"]
error = Error(type="prompt-error", message="Generation failed")
mock_response = PromptResponse(text="", error=error)
processor.client.return_value.request = AsyncMock(return_value=mock_response)
# Act & Assert
with pytest.raises(Exception, match="Prompt service error"):
await processor.phase2_generate_graphql(question, selected_schemas)
async def test_on_message_full_flow_success(self, processor):
"""Test complete message processing flow"""
# Arrange
request = QuestionToStructuredQueryRequest(
question="Show me customers from California",
max_results=100
)
msg = MagicMock()
msg.value.return_value = request
msg.properties.return_value = {"id": "test-123"}
consumer = MagicMock()
flow = MagicMock()
flow_response = AsyncMock()
flow.return_value = flow_response
# Mock Phase 1 response
phase1_response = PromptResponse(
text=json.dumps(["customers"]),
error=None
)
# Mock Phase 2 response
phase2_response = PromptResponse(
text=json.dumps({
"query": "query { customers(where: {state: {eq: \"California\"}}) { id name email } }",
"variables": {},
"confidence": 0.9
}),
error=None
)
# Set up mock to return different responses for each call
processor.client.return_value.request = AsyncMock(
side_effect=[phase1_response, phase2_response]
)
# Act
await processor.on_message(msg, consumer, flow)
# Assert
assert processor.client.return_value.request.call_count == 2
flow_response.send.assert_called_once()
# Verify response structure
response_call = flow_response.send.call_args
response = response_call[0][0] # First argument is the response object
assert isinstance(response, QuestionToStructuredQueryResponse)
assert response.error is None
assert "customers" in response.graphql_query
assert response.detected_schemas == ["customers"]
assert response.confidence == 0.9
async def test_on_message_phase1_error(self, processor):
"""Test message processing with Phase 1 failure"""
# Arrange
request = QuestionToStructuredQueryRequest(
question="Show me customers",
max_results=100
)
msg = MagicMock()
msg.value.return_value = request
msg.properties.return_value = {"id": "test-123"}
consumer = MagicMock()
flow = MagicMock()
flow_response = AsyncMock()
flow.return_value = flow_response
# Mock Phase 1 error
phase1_response = PromptResponse(
text="",
error=Error(type="template-error", message="Template not found")
)
processor.client.return_value.request = AsyncMock(return_value=phase1_response)
# Act
await processor.on_message(msg, consumer, flow)
# Assert
flow_response.send.assert_called_once()
# Verify error response
response_call = flow_response.send.call_args
response = response_call[0][0]
assert isinstance(response, QuestionToStructuredQueryResponse)
assert response.error is not None
assert response.error.type == "nlp-query-error"
assert "Prompt service error" in response.error.message
async def test_schema_config_loading(self, processor):
"""Test schema configuration loading"""
# Arrange
config = {
"schema": {
"test_schema": json.dumps({
"name": "test_schema",
"description": "Test schema",
"fields": [
{
"name": "id",
"type": "string",
"primary_key": True,
"required": True
},
{
"name": "name",
"type": "string",
"description": "User name"
}
]
})
}
}
# Act
await processor.on_schema_config(config, "v1")
# Assert
assert "test_schema" in processor.schemas
schema = processor.schemas["test_schema"]
assert schema.name == "test_schema"
assert schema.description == "Test schema"
assert len(schema.fields) == 2
assert schema.fields[0].name == "id"
assert schema.fields[0].primary == True
assert schema.fields[1].name == "name"
async def test_schema_config_loading_invalid_json(self, processor):
"""Test schema configuration loading with invalid JSON"""
# Arrange
config = {
"schema": {
"bad_schema": "invalid json{"
}
}
# Act
await processor.on_schema_config(config, "v1")
# Assert - bad schema should be ignored
assert "bad_schema" not in processor.schemas
def test_processor_initialization(self, mock_pulsar_client):
"""Test processor initialization with correct specifications"""
# Act
processor = Processor(
taskgroup=MagicMock(),
pulsar_client=mock_pulsar_client,
schema_selection_template="custom-schema-select",
graphql_generation_template="custom-graphql-gen"
)
# Assert
assert processor.schema_selection_template == "custom-schema-select"
assert processor.graphql_generation_template == "custom-graphql-gen"
assert processor.config_key == "schema"
assert processor.schemas == {}
def test_add_args(self):
"""Test command-line argument parsing"""
import argparse
parser = argparse.ArgumentParser()
Processor.add_args(parser)
# Test default values
args = parser.parse_args([])
assert args.config_type == "schema"
assert args.schema_selection_template == "schema-selection"
assert args.graphql_generation_template == "graphql-generation"
# Test custom values
args = parser.parse_args([
"--config-type", "custom",
"--schema-selection-template", "my-selector",
"--graphql-generation-template", "my-generator"
])
assert args.config_type == "custom"
assert args.schema_selection_template == "my-selector"
assert args.graphql_generation_template == "my-generator"
@pytest.mark.unit
class TestNLPQueryHelperFunctions:
"""Test helper functions and data transformations"""
def test_schema_info_formatting(self, sample_schemas):
"""Test schema info formatting for prompts"""
# This would test any helper functions for formatting schema data
# Currently the formatting is inline, but good to test if extracted
customers_schema = sample_schemas["customers"]
expected_fields = ["id", "name", "email", "state"]
actual_fields = [f.name for f in customers_schema.fields]
assert actual_fields == expected_fields
# Test primary key detection
primary_fields = [f.name for f in customers_schema.fields if f.primary]
assert primary_fields == ["id"]

View file

@ -0,0 +1,522 @@
"""
Unit tests for Structured Query Service
Following TEST_STRATEGY.md approach for service testing
"""
import pytest
import json
from unittest.mock import AsyncMock, MagicMock, patch
from trustgraph.schema import (
StructuredQueryRequest, StructuredQueryResponse,
QuestionToStructuredQueryRequest, QuestionToStructuredQueryResponse,
ObjectsQueryRequest, ObjectsQueryResponse,
Error, GraphQLError
)
from trustgraph.retrieval.structured_query.service import Processor
@pytest.fixture
def mock_pulsar_client():
"""Mock Pulsar client"""
return AsyncMock()
@pytest.fixture
def processor(mock_pulsar_client):
"""Create processor with mocked dependencies"""
proc = Processor(
taskgroup=MagicMock(),
pulsar_client=mock_pulsar_client
)
# Mock the client method
proc.client = MagicMock()
return proc
@pytest.mark.asyncio
class TestStructuredQueryProcessor:
"""Test Structured Query service processor"""
async def test_successful_end_to_end_query(self, processor):
"""Test successful end-to-end query processing"""
# Arrange
request = StructuredQueryRequest(
question="Show me all customers from New York"
)
msg = MagicMock()
msg.value.return_value = request
msg.properties.return_value = {"id": "test-123"}
consumer = MagicMock()
flow = MagicMock()
flow_response = AsyncMock()
flow.return_value = flow_response
# Mock NLP query service response
nlp_response = QuestionToStructuredQueryResponse(
error=None,
graphql_query='query { customers(where: {state: {eq: "NY"}}) { id name email } }',
variables={"state": "NY"},
detected_schemas=["customers"],
confidence=0.95
)
# Mock objects query service response
objects_response = ObjectsQueryResponse(
error=None,
data='{"customers": [{"id": "1", "name": "John", "email": "john@example.com"}]}',
errors=None,
extensions={}
)
# Set up mock clients
mock_nlp_client = AsyncMock()
mock_nlp_client.request.return_value = nlp_response
mock_objects_client = AsyncMock()
mock_objects_client.request.return_value = objects_response
processor.client.side_effect = lambda name: (
mock_nlp_client if name == "nlp-query-request" else mock_objects_client
)
# Act
await processor.on_message(msg, consumer, flow)
# Assert
# Verify NLP query service was called correctly
mock_nlp_client.request.assert_called_once()
nlp_call_args = mock_nlp_client.request.call_args[0][0]
assert isinstance(nlp_call_args, QuestionToStructuredQueryRequest)
assert nlp_call_args.question == "Show me all customers from New York"
assert nlp_call_args.max_results == 100
# Verify objects query service was called correctly
mock_objects_client.request.assert_called_once()
objects_call_args = mock_objects_client.request.call_args[0][0]
assert isinstance(objects_call_args, ObjectsQueryRequest)
assert objects_call_args.query == 'query { customers(where: {state: {eq: "NY"}}) { id name email } }'
assert objects_call_args.variables == {"state": "NY"}
assert objects_call_args.user == "default"
assert objects_call_args.collection == "default"
# Verify response
flow_response.send.assert_called_once()
response_call = flow_response.send.call_args
response = response_call[0][0]
assert isinstance(response, StructuredQueryResponse)
assert response.error is None
assert response.data == '{"customers": [{"id": "1", "name": "John", "email": "john@example.com"}]}'
assert len(response.errors) == 0
async def test_nlp_query_service_error(self, processor):
"""Test handling of NLP query service errors"""
# Arrange
request = StructuredQueryRequest(
question="Invalid query"
)
msg = MagicMock()
msg.value.return_value = request
msg.properties.return_value = {"id": "test-error"}
consumer = MagicMock()
flow = MagicMock()
flow_response = AsyncMock()
flow.return_value = flow_response
# Mock NLP query service error response
nlp_response = QuestionToStructuredQueryResponse(
error=Error(type="nlp-query-error", message="Failed to parse question"),
graphql_query="",
variables={},
detected_schemas=[],
confidence=0.0
)
mock_nlp_client = AsyncMock()
mock_nlp_client.request.return_value = nlp_response
processor.client.return_value = mock_nlp_client
# Act
await processor.on_message(msg, consumer, flow)
# Assert
flow_response.send.assert_called_once()
response_call = flow_response.send.call_args
response = response_call[0][0]
assert isinstance(response, StructuredQueryResponse)
assert response.error is not None
assert response.error.type == "structured-query-error"
assert "NLP query service error" in response.error.message
async def test_empty_graphql_query_error(self, processor):
"""Test handling of empty GraphQL query from NLP service"""
# Arrange
request = StructuredQueryRequest(
question="Ambiguous question"
)
msg = MagicMock()
msg.value.return_value = request
msg.properties.return_value = {"id": "test-empty"}
consumer = MagicMock()
flow = MagicMock()
flow_response = AsyncMock()
flow.return_value = flow_response
# Mock NLP query service response with empty query
nlp_response = QuestionToStructuredQueryResponse(
error=None,
graphql_query="", # Empty query
variables={},
detected_schemas=[],
confidence=0.1
)
mock_nlp_client = AsyncMock()
mock_nlp_client.request.return_value = nlp_response
processor.client.return_value = mock_nlp_client
# Act
await processor.on_message(msg, consumer, flow)
# Assert
flow_response.send.assert_called_once()
response_call = flow_response.send.call_args
response = response_call[0][0]
assert response.error is not None
assert "empty GraphQL query" in response.error.message
async def test_objects_query_service_error(self, processor):
"""Test handling of objects query service errors"""
# Arrange
request = StructuredQueryRequest(
question="Show me customers"
)
msg = MagicMock()
msg.value.return_value = request
msg.properties.return_value = {"id": "test-objects-error"}
consumer = MagicMock()
flow = MagicMock()
flow_response = AsyncMock()
flow.return_value = flow_response
# Mock successful NLP response
nlp_response = QuestionToStructuredQueryResponse(
error=None,
graphql_query='query { customers { id name } }',
variables={},
detected_schemas=["customers"],
confidence=0.9
)
# Mock objects query service error
objects_response = ObjectsQueryResponse(
error=Error(type="graphql-execution-error", message="Table 'customers' not found"),
data=None,
errors=None,
extensions={}
)
mock_nlp_client = AsyncMock()
mock_nlp_client.request.return_value = nlp_response
mock_objects_client = AsyncMock()
mock_objects_client.request.return_value = objects_response
processor.client.side_effect = lambda name: (
mock_nlp_client if name == "nlp-query-request" else mock_objects_client
)
# Act
await processor.on_message(msg, consumer, flow)
# Assert
flow_response.send.assert_called_once()
response_call = flow_response.send.call_args
response = response_call[0][0]
assert response.error is not None
assert "Objects query service error" in response.error.message
assert "Table 'customers' not found" in response.error.message
async def test_graphql_errors_handling(self, processor):
"""Test handling of GraphQL validation/execution errors"""
# Arrange
request = StructuredQueryRequest(
question="Show invalid field"
)
msg = MagicMock()
msg.value.return_value = request
msg.properties.return_value = {"id": "test-graphql-errors"}
consumer = MagicMock()
flow = MagicMock()
flow_response = AsyncMock()
flow.return_value = flow_response
# Mock successful NLP response
nlp_response = QuestionToStructuredQueryResponse(
error=None,
graphql_query='query { customers { invalid_field } }',
variables={},
detected_schemas=["customers"],
confidence=0.8
)
# Mock objects response with GraphQL errors
graphql_errors = [
GraphQLError(
message="Cannot query field 'invalid_field' on type 'Customer'",
path=["customers", "0", "invalid_field"], # All path elements must be strings
extensions={}
)
]
objects_response = ObjectsQueryResponse(
error=None,
data=None,
errors=graphql_errors,
extensions={}
)
mock_nlp_client = AsyncMock()
mock_nlp_client.request.return_value = nlp_response
mock_objects_client = AsyncMock()
mock_objects_client.request.return_value = objects_response
processor.client.side_effect = lambda name: (
mock_nlp_client if name == "nlp-query-request" else mock_objects_client
)
# Act
await processor.on_message(msg, consumer, flow)
# Assert
flow_response.send.assert_called_once()
response_call = flow_response.send.call_args
response = response_call[0][0]
assert response.error is None
assert len(response.errors) == 1
assert "Cannot query field 'invalid_field'" in response.errors[0]
assert "customers" in response.errors[0]
async def test_complex_query_with_variables(self, processor):
"""Test processing complex queries with variables"""
# Arrange
request = StructuredQueryRequest(
question="Show customers with orders over $100 from last month"
)
msg = MagicMock()
msg.value.return_value = request
msg.properties.return_value = {"id": "test-complex"}
consumer = MagicMock()
flow = MagicMock()
flow_response = AsyncMock()
flow.return_value = flow_response
# Mock NLP response with complex query and variables
nlp_response = QuestionToStructuredQueryResponse(
error=None,
graphql_query='''
query GetCustomersWithLargeOrders($minTotal: Float!, $startDate: String!) {
customers {
id
name
orders(where: {total: {gt: $minTotal}, date: {gte: $startDate}}) {
id
total
date
}
}
}
''',
variables={
"minTotal": "100.0", # Convert to string for Pulsar schema
"startDate": "2024-01-01"
},
detected_schemas=["customers", "orders"],
confidence=0.88
)
# Mock objects response
objects_response = ObjectsQueryResponse(
error=None,
data='{"customers": [{"id": "1", "name": "Alice", "orders": [{"id": "100", "total": 150.0}]}]}',
errors=None
)
mock_nlp_client = AsyncMock()
mock_nlp_client.request.return_value = nlp_response
mock_objects_client = AsyncMock()
mock_objects_client.request.return_value = objects_response
processor.client.side_effect = lambda name: (
mock_nlp_client if name == "nlp-query-request" else mock_objects_client
)
# Act
await processor.on_message(msg, consumer, flow)
# Assert
# Verify variables were passed correctly (converted to strings)
objects_call_args = mock_objects_client.request.call_args[0][0]
assert objects_call_args.variables["minTotal"] == "100.0" # Should be converted to string
assert objects_call_args.variables["startDate"] == "2024-01-01"
# Verify response
response_call = flow_response.send.call_args
response = response_call[0][0]
assert response.error is None
assert "Alice" in response.data
async def test_null_data_handling(self, processor):
"""Test handling of null/empty data responses"""
# Arrange
request = StructuredQueryRequest(
question="Show nonexistent data"
)
msg = MagicMock()
msg.value.return_value = request
msg.properties.return_value = {"id": "test-null"}
consumer = MagicMock()
flow = MagicMock()
flow_response = AsyncMock()
flow.return_value = flow_response
# Mock responses
nlp_response = QuestionToStructuredQueryResponse(
error=None,
graphql_query='query { customers { id } }',
variables={},
detected_schemas=["customers"],
confidence=0.9
)
objects_response = ObjectsQueryResponse(
error=None,
data=None, # Null data
errors=None,
extensions={}
)
mock_nlp_client = AsyncMock()
mock_nlp_client.request.return_value = nlp_response
mock_objects_client = AsyncMock()
mock_objects_client.request.return_value = objects_response
processor.client.side_effect = lambda name: (
mock_nlp_client if name == "nlp-query-request" else mock_objects_client
)
# Act
await processor.on_message(msg, consumer, flow)
# Assert
response_call = flow_response.send.call_args
response = response_call[0][0]
assert response.error is None
assert response.data == "null" # Should convert None to "null" string
async def test_exception_handling(self, processor):
"""Test general exception handling"""
# Arrange
request = StructuredQueryRequest(
question="Test exception"
)
msg = MagicMock()
msg.value.return_value = request
msg.properties.return_value = {"id": "test-exception"}
consumer = MagicMock()
flow = MagicMock()
flow_response = AsyncMock()
flow.return_value = flow_response
# Mock client to raise exception
mock_client = AsyncMock()
mock_client.request.side_effect = Exception("Network timeout")
processor.client.return_value = mock_client
# Act
await processor.on_message(msg, consumer, flow)
# Assert
flow_response.send.assert_called_once()
response_call = flow_response.send.call_args
response = response_call[0][0]
assert response.error is not None
assert response.error.type == "structured-query-error"
assert "Network timeout" in response.error.message
assert response.data == "null"
assert len(response.errors) == 0
def test_processor_initialization(self, mock_pulsar_client):
"""Test processor initialization with correct specifications"""
# Act
processor = Processor(
taskgroup=MagicMock(),
pulsar_client=mock_pulsar_client
)
# Assert - Test default ID
assert processor.id == "structured-query"
# Verify specifications were registered (we can't directly access them,
# but we know they were registered if initialization succeeded)
assert processor is not None
def test_add_args(self):
"""Test command-line argument parsing"""
import argparse
parser = argparse.ArgumentParser()
Processor.add_args(parser)
# Test that it doesn't crash (no additional args)
args = parser.parse_args([])
# No specific assertions since no custom args are added
assert args is not None
@pytest.mark.unit
class TestStructuredQueryHelperFunctions:
"""Test helper functions and data transformations"""
def test_service_logging_integration(self):
"""Test that logging is properly configured"""
# Import the logger
from trustgraph.retrieval.structured_query.service import logger
assert logger.name == "trustgraph.retrieval.structured_query.service"
def test_default_values(self):
"""Test default configuration values"""
from trustgraph.retrieval.structured_query.service import default_ident
assert default_ident == "structured-query"