""" Integration tests for Structured Query Service These tests verify the end-to-end functionality of the structured query service, testing orchestration between nlp-query and objects-query services. Following the TEST_STRATEGY.md approach for integration testing. """ import pytest import json from unittest.mock import AsyncMock, MagicMock from trustgraph.schema import ( StructuredQueryRequest, StructuredQueryResponse, QuestionToStructuredQueryRequest, QuestionToStructuredQueryResponse, ObjectsQueryRequest, ObjectsQueryResponse, Error, GraphQLError ) from trustgraph.retrieval.structured_query.service import Processor @pytest.mark.integration class TestStructuredQueryServiceIntegration: """Integration tests for structured query service orchestration""" @pytest.fixture def integration_processor(self): """Create processor with realistic configuration""" proc = Processor( taskgroup=MagicMock(), pulsar_client=AsyncMock() ) # Mock the client method proc.client = MagicMock() return proc @pytest.mark.asyncio async def test_end_to_end_structured_query_processing(self, integration_processor): """Test complete structured query processing pipeline""" # Arrange - Create realistic query request request = StructuredQueryRequest( question="Show me all customers from California who have made purchases over $500", user="trustgraph", collection="default" ) msg = MagicMock() msg.value.return_value = request msg.properties.return_value = {"id": "integration-test-001"} consumer = MagicMock() flow = MagicMock() flow_response = AsyncMock() flow.return_value = flow_response # Mock NLP Query Service Response nlp_response = QuestionToStructuredQueryResponse( error=None, graphql_query=''' query GetCaliforniaCustomersWithLargePurchases($minAmount: String!, $state: String!) { customers(where: {state: {eq: $state}}) { id name email orders(where: {total: {gt: $minAmount}}) { id total date } } } ''', variables={ "minAmount": "500.0", "state": "California" }, detected_schemas=["customers", "orders"], confidence=0.91 ) # Mock Objects Query Service Response objects_response = ObjectsQueryResponse( error=None, data='{"customers": [{"id": "123", "name": "Alice Johnson", "email": "alice@example.com", "orders": [{"id": "456", "total": 750.0, "date": "2024-01-15"}]}]}', errors=None, extensions={"execution_time": "150ms", "query_complexity": "8"} ) # Set up mock clients to return different responses mock_nlp_client = AsyncMock() mock_nlp_client.request.return_value = nlp_response mock_objects_client = AsyncMock() mock_objects_client.request.return_value = objects_response # Mock flow context to route to appropriate services def flow_router(service_name): if service_name == "nlp-query-request": return mock_nlp_client elif service_name == "objects-query-request": return mock_objects_client elif service_name == "response": return flow_response else: return AsyncMock() flow.side_effect = flow_router # Act - Process the message await integration_processor.on_message(msg, consumer, flow) # Assert - Verify the complete orchestration # Verify NLP service call mock_nlp_client.request.assert_called_once() nlp_call_args = mock_nlp_client.request.call_args[0][0] assert isinstance(nlp_call_args, QuestionToStructuredQueryRequest) assert nlp_call_args.question == "Show me all customers from California who have made purchases over $500" assert nlp_call_args.max_results == 100 # Default max_results # Verify Objects service call mock_objects_client.request.assert_called_once() objects_call_args = mock_objects_client.request.call_args[0][0] assert isinstance(objects_call_args, ObjectsQueryRequest) assert "customers" in objects_call_args.query assert "orders" in objects_call_args.query assert objects_call_args.variables["minAmount"] == "500.0" # Converted to string assert objects_call_args.variables["state"] == "California" assert objects_call_args.user == "trustgraph" assert objects_call_args.collection == "default" # Verify response flow_response.send.assert_called_once() response_call = flow_response.send.call_args response = response_call[0][0] assert isinstance(response, StructuredQueryResponse) assert response.error is None assert "Alice Johnson" in response.data assert "750.0" in response.data assert len(response.errors) == 0 @pytest.mark.asyncio async def test_nlp_service_integration_failure(self, integration_processor): """Test integration when NLP service fails""" # Arrange request = StructuredQueryRequest( question="This is an unparseable query ][{}" ) msg = MagicMock() msg.value.return_value = request msg.properties.return_value = {"id": "nlp-failure-test"} consumer = MagicMock() flow = MagicMock() flow_response = AsyncMock() flow.return_value = flow_response # Mock NLP service failure nlp_error_response = QuestionToStructuredQueryResponse( error=Error(type="nlp-parsing-error", message="Unable to parse natural language query"), graphql_query="", variables={}, detected_schemas=[], confidence=0.0 ) mock_nlp_client = AsyncMock() mock_nlp_client.request.return_value = nlp_error_response # Mock flow context to route to nlp service def flow_router(service_name): if service_name == "nlp-query-request": return mock_nlp_client elif service_name == "response": return flow_response else: return AsyncMock() flow.side_effect = flow_router # Act await integration_processor.on_message(msg, consumer, flow) # Assert - Error should be propagated properly flow_response.send.assert_called_once() response_call = flow_response.send.call_args response = response_call[0][0] assert isinstance(response, StructuredQueryResponse) assert response.error is not None assert response.error.type == "structured-query-error" assert "NLP query service error" in response.error.message assert "Unable to parse natural language query" in response.error.message @pytest.mark.asyncio async def test_objects_service_integration_failure(self, integration_processor): """Test integration when Objects service fails""" # Arrange request = StructuredQueryRequest( question="Show me data from a table that doesn't exist" ) msg = MagicMock() msg.value.return_value = request msg.properties.return_value = {"id": "objects-failure-test"} consumer = MagicMock() flow = MagicMock() flow_response = AsyncMock() flow.return_value = flow_response # Mock successful NLP response nlp_response = QuestionToStructuredQueryResponse( error=None, graphql_query='query { nonexistent_table { id name } }', variables={}, detected_schemas=["nonexistent_table"], confidence=0.7 ) # Mock Objects service failure objects_error_response = ObjectsQueryResponse( error=Error(type="graphql-schema-error", message="Table 'nonexistent_table' does not exist in schema"), data=None, errors=None, extensions={} ) mock_nlp_client = AsyncMock() mock_nlp_client.request.return_value = nlp_response mock_objects_client = AsyncMock() mock_objects_client.request.return_value = objects_error_response # Mock flow context to route to appropriate services def flow_router(service_name): if service_name == "nlp-query-request": return mock_nlp_client elif service_name == "objects-query-request": return mock_objects_client elif service_name == "response": return flow_response else: return AsyncMock() flow.side_effect = flow_router # Act await integration_processor.on_message(msg, consumer, flow) # Assert - Error should be propagated flow_response.send.assert_called_once() response_call = flow_response.send.call_args response = response_call[0][0] assert response.error is not None assert response.error.type == "structured-query-error" assert "Objects query service error" in response.error.message assert "nonexistent_table" in response.error.message @pytest.mark.asyncio async def test_graphql_validation_errors_integration(self, integration_processor): """Test integration with GraphQL validation errors""" # Arrange request = StructuredQueryRequest( question="Show me customer invalid_field values" ) msg = MagicMock() msg.value.return_value = request msg.properties.return_value = {"id": "validation-error-test"} consumer = MagicMock() flow = MagicMock() flow_response = AsyncMock() flow.return_value = flow_response # Mock NLP response with invalid field nlp_response = QuestionToStructuredQueryResponse( error=None, graphql_query='query { customers { id invalid_field } }', variables={}, detected_schemas=["customers"], confidence=0.8 ) # Mock Objects response with GraphQL validation errors validation_errors = [ GraphQLError( message="Cannot query field 'invalid_field' on type 'Customer'", path=["customers", "0", "invalid_field"], extensions={"code": "VALIDATION_ERROR"} ), GraphQLError( message="Field 'invalid_field' is not defined in the schema", path=["customers", "invalid_field"], extensions={"code": "FIELD_NOT_FOUND"} ) ] objects_response = ObjectsQueryResponse( error=None, data=None, # No data when validation fails errors=validation_errors, extensions={"validation_errors": "2"} ) mock_nlp_client = AsyncMock() mock_nlp_client.request.return_value = nlp_response mock_objects_client = AsyncMock() mock_objects_client.request.return_value = objects_response # Mock flow context to route to appropriate services def flow_router(service_name): if service_name == "nlp-query-request": return mock_nlp_client elif service_name == "objects-query-request": return mock_objects_client elif service_name == "response": return flow_response else: return AsyncMock() flow.side_effect = flow_router # Act await integration_processor.on_message(msg, consumer, flow) # Assert - GraphQL errors should be included in response flow_response.send.assert_called_once() response_call = flow_response.send.call_args response = response_call[0][0] assert response.error is None # No system error assert len(response.errors) == 2 # Two GraphQL errors assert "Cannot query field 'invalid_field'" in response.errors[0] assert "Field 'invalid_field' is not defined" in response.errors[1] assert "customers" in response.errors[0] @pytest.mark.asyncio async def test_complex_multi_service_integration(self, integration_processor): """Test complex integration scenario with multiple entities and relationships""" # Arrange request = StructuredQueryRequest( question="Find all products under $100 that are in stock, along with their recent orders from customers in New York" ) msg = MagicMock() msg.value.return_value = request msg.properties.return_value = {"id": "complex-integration-test"} consumer = MagicMock() flow = MagicMock() flow_response = AsyncMock() flow.return_value = flow_response # Mock complex NLP response nlp_response = QuestionToStructuredQueryResponse( error=None, graphql_query=''' query GetProductsWithCustomerOrders($maxPrice: String!, $inStock: String!, $state: String!) { products(where: {price: {lt: $maxPrice}, in_stock: {eq: $inStock}}) { id name price orders { id total customer { id name state } } } } ''', variables={ "maxPrice": "100.0", "inStock": "true", "state": "New York" }, detected_schemas=["products", "orders", "customers"], confidence=0.85 ) # Mock complex Objects response complex_data = { "products": [ { "id": "prod_123", "name": "Widget A", "price": 89.99, "orders": [ { "id": "order_456", "total": 179.98, "customer": { "id": "cust_789", "name": "Bob Smith", "state": "New York" } } ] }, { "id": "prod_124", "name": "Widget B", "price": 65.50, "orders": [ { "id": "order_457", "total": 131.00, "customer": { "id": "cust_790", "name": "Carol Jones", "state": "New York" } } ] } ] } objects_response = ObjectsQueryResponse( error=None, data=json.dumps(complex_data), errors=None, extensions={ "execution_time": "250ms", "query_complexity": "15", "data_sources": "products,orders,customers" # Convert array to comma-separated string } ) mock_nlp_client = AsyncMock() mock_nlp_client.request.return_value = nlp_response mock_objects_client = AsyncMock() mock_objects_client.request.return_value = objects_response # Mock flow context to route to appropriate services def flow_router(service_name): if service_name == "nlp-query-request": return mock_nlp_client elif service_name == "objects-query-request": return mock_objects_client elif service_name == "response": return flow_response else: return AsyncMock() flow.side_effect = flow_router # Act await integration_processor.on_message(msg, consumer, flow) # Assert - Verify complex data integration # Check NLP service call nlp_call_args = mock_nlp_client.request.call_args[0][0] assert len(nlp_call_args.question) > 50 # Complex question # Check Objects service call with variable conversion objects_call_args = mock_objects_client.request.call_args[0][0] assert objects_call_args.variables["maxPrice"] == "100.0" assert objects_call_args.variables["inStock"] == "true" assert objects_call_args.variables["state"] == "New York" # Check response contains complex data response_call = flow_response.send.call_args response = response_call[0][0] assert response.error is None assert "Widget A" in response.data assert "Widget B" in response.data assert "Bob Smith" in response.data assert "Carol Jones" in response.data assert "New York" in response.data @pytest.mark.asyncio async def test_empty_result_integration(self, integration_processor): """Test integration when query returns empty results""" # Arrange request = StructuredQueryRequest( question="Show me customers from Mars" ) msg = MagicMock() msg.value.return_value = request msg.properties.return_value = {"id": "empty-result-test"} consumer = MagicMock() flow = MagicMock() flow_response = AsyncMock() flow.return_value = flow_response # Mock NLP response nlp_response = QuestionToStructuredQueryResponse( error=None, graphql_query='query { customers(where: {planet: {eq: "Mars"}}) { id name planet } }', variables={}, detected_schemas=["customers"], confidence=0.9 ) # Mock empty Objects response objects_response = ObjectsQueryResponse( error=None, data='{"customers": []}', # Empty result set errors=None, extensions={"result_count": "0"} ) mock_nlp_client = AsyncMock() mock_nlp_client.request.return_value = nlp_response mock_objects_client = AsyncMock() mock_objects_client.request.return_value = objects_response # Mock flow context to route to appropriate services def flow_router(service_name): if service_name == "nlp-query-request": return mock_nlp_client elif service_name == "objects-query-request": return mock_objects_client elif service_name == "response": return flow_response else: return AsyncMock() flow.side_effect = flow_router # Act await integration_processor.on_message(msg, consumer, flow) # Assert - Empty results should be handled gracefully response_call = flow_response.send.call_args response = response_call[0][0] assert response.error is None assert response.data == '{"customers": []}' assert len(response.errors) == 0 @pytest.mark.asyncio async def test_concurrent_requests_integration(self, integration_processor): """Test integration with concurrent request processing""" # Arrange - Multiple concurrent requests requests = [] messages = [] flows = [] for i in range(3): request = StructuredQueryRequest( question=f"Query {i}: Show me data" ) msg = MagicMock() msg.value.return_value = request msg.properties.return_value = {"id": f"concurrent-test-{i}"} flow = MagicMock() flow_response = AsyncMock() flow.return_value = flow_response requests.append(request) messages.append(msg) flows.append(flow) # Set up individual flow routing for each concurrent request service_call_count = 0 for i in range(3): # 3 concurrent requests # Create NLP and Objects responses for this request nlp_response = QuestionToStructuredQueryResponse( error=None, graphql_query=f'query {{ test_{i} {{ id }} }}', variables={}, detected_schemas=[f"test_{i}"], confidence=0.9 ) objects_response = ObjectsQueryResponse( error=None, data=f'{{"test_{i}": [{{"id": "{i}"}}]}}', errors=None, extensions={} ) # Create mock services for this request mock_nlp_client = AsyncMock() mock_nlp_client.request.return_value = nlp_response mock_objects_client = AsyncMock() mock_objects_client.request.return_value = objects_response # Set up flow routing for this specific request flow_response = flows[i].return_value def create_flow_router(nlp_client, objects_client, response_producer): def flow_router(service_name): nonlocal service_call_count if service_name == "nlp-query-request": service_call_count += 1 return nlp_client elif service_name == "objects-query-request": service_call_count += 1 return objects_client elif service_name == "response": return response_producer else: return AsyncMock() return flow_router flows[i].side_effect = create_flow_router(mock_nlp_client, mock_objects_client, flow_response) # Act - Process all messages concurrently import asyncio consumer = MagicMock() tasks = [] for msg, flow in zip(messages, flows): task = integration_processor.on_message(msg, consumer, flow) tasks.append(task) await asyncio.gather(*tasks) # Assert - All requests should be processed assert service_call_count == 6 # 2 calls per request (NLP + Objects) for flow in flows: flow.return_value.send.assert_called_once() @pytest.mark.asyncio async def test_service_timeout_integration(self, integration_processor): """Test integration with service timeout scenarios""" # Arrange request = StructuredQueryRequest( question="This query will timeout" ) msg = MagicMock() msg.value.return_value = request msg.properties.return_value = {"id": "timeout-test"} consumer = MagicMock() flow = MagicMock() flow_response = AsyncMock() flow.return_value = flow_response # Mock NLP service timeout mock_nlp_client = AsyncMock() mock_nlp_client.request.side_effect = Exception("Service timeout: Request took longer than 30s") # Mock flow context to route to nlp service def flow_router(service_name): if service_name == "nlp-query-request": return mock_nlp_client elif service_name == "response": return flow_response else: return AsyncMock() flow.side_effect = flow_router # Act await integration_processor.on_message(msg, consumer, flow) # Assert - Timeout should be handled gracefully flow_response.send.assert_called_once() response_call = flow_response.send.call_args response = response_call[0][0] assert response.error is not None assert response.error.type == "structured-query-error" assert "timeout" in response.error.message.lower() @pytest.mark.asyncio async def test_variable_type_conversion_integration(self, integration_processor): """Test integration with complex variable type conversions""" # Arrange request = StructuredQueryRequest( question="Show me orders with totals between 50.5 and 200.75 from the last 30 days" ) msg = MagicMock() msg.value.return_value = request msg.properties.return_value = {"id": "variable-conversion-test"} consumer = MagicMock() flow = MagicMock() flow_response = AsyncMock() flow.return_value = flow_response # Mock NLP response with various data types that need string conversion nlp_response = QuestionToStructuredQueryResponse( error=None, graphql_query='query($minTotal: Float!, $maxTotal: Float!, $daysPast: Int!) { orders(filter: {total: {between: [$minTotal, $maxTotal]}, date: {gte: $daysPast}}) { id total date } }', variables={ "minTotal": "50.5", # Already string "maxTotal": "200.75", # Already string "daysPast": "30" # Already string }, detected_schemas=["orders"], confidence=0.88 ) # Mock Objects response objects_response = ObjectsQueryResponse( error=None, data='{"orders": [{"id": "123", "total": 125.50, "date": "2024-01-15"}]}', errors=None, extensions={} ) mock_nlp_client = AsyncMock() mock_nlp_client.request.return_value = nlp_response mock_objects_client = AsyncMock() mock_objects_client.request.return_value = objects_response # Mock flow context to route to appropriate services def flow_router(service_name): if service_name == "nlp-query-request": return mock_nlp_client elif service_name == "objects-query-request": return mock_objects_client elif service_name == "response": return flow_response else: return AsyncMock() flow.side_effect = flow_router # Act await integration_processor.on_message(msg, consumer, flow) # Assert - Variables should be properly converted to strings objects_call_args = mock_objects_client.request.call_args[0][0] # All variables should be strings for Pulsar schema compatibility assert isinstance(objects_call_args.variables["minTotal"], str) assert isinstance(objects_call_args.variables["maxTotal"], str) assert isinstance(objects_call_args.variables["daysPast"], str) # Values should be preserved assert objects_call_args.variables["minTotal"] == "50.5" assert objects_call_args.variables["maxTotal"] == "200.75" assert objects_call_args.variables["daysPast"] == "30" # Response should contain expected data response_call = flow_response.send.call_args response = response_call[0][0] assert response.error is None assert "125.50" in response.data