Release 1.4 -> master (#524)

Catch up
This commit is contained in:
cybermaggedon 2025-09-20 16:00:37 +01:00 committed by GitHub
parent a8e437fc7f
commit 6c7af8789d
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
216 changed files with 31360 additions and 1611 deletions

View file

@ -0,0 +1,77 @@
"""
Unit test for DocumentRAG service parameter passing fix.
Tests that user and collection parameters from the message are correctly
passed to the DocumentRag.query() method.
"""
import pytest
from unittest.mock import MagicMock, AsyncMock, patch
from trustgraph.retrieval.document_rag.rag import Processor
from trustgraph.schema import DocumentRagQuery, DocumentRagResponse
class TestDocumentRagService:
"""Test DocumentRAG service parameter passing"""
@patch('trustgraph.retrieval.document_rag.rag.DocumentRag')
@pytest.mark.asyncio
async def test_user_and_collection_parameters_passed_to_query(self, mock_document_rag_class):
"""
Test that user and collection from message are passed to DocumentRag.query().
This is a regression test for the bug where user/collection parameters
were ignored, causing wrong collection names like 'd_trustgraph_default_384'
instead of 'd_my_user_test_coll_1_384'.
"""
# Setup processor
processor = Processor(
taskgroup=MagicMock(),
id="test-processor",
doc_limit=10
)
# Setup mock DocumentRag instance
mock_rag_instance = AsyncMock()
mock_document_rag_class.return_value = mock_rag_instance
mock_rag_instance.query.return_value = "test response"
# Setup message with custom user/collection
msg = MagicMock()
msg.value.return_value = DocumentRagQuery(
query="test query",
user="my_user", # Custom user (not default "trustgraph")
collection="test_coll_1", # Custom collection (not default "default")
doc_limit=5
)
msg.properties.return_value = {"id": "test-id"}
# Setup flow mock
consumer = MagicMock()
flow = MagicMock()
# Mock flow to return AsyncMock for clients and response producer
mock_producer = AsyncMock()
def flow_router(service_name):
if service_name == "response":
return mock_producer
return AsyncMock() # embeddings, doc-embeddings, prompt clients
flow.side_effect = flow_router
# Execute
await processor.on_request(msg, consumer, flow)
# Verify: DocumentRag.query was called with correct parameters
mock_rag_instance.query.assert_called_once_with(
"test query",
user="my_user", # Must be from message, not hardcoded default
collection="test_coll_1", # Must be from message, not hardcoded default
doc_limit=5
)
# Verify response was sent
mock_producer.send.assert_called_once()
sent_response = mock_producer.send.call_args[0][0]
assert isinstance(sent_response, DocumentRagResponse)
assert sent_response.response == "test response"
assert sent_response.error is None

View file

@ -0,0 +1,374 @@
"""
Unit tests for NLP Query service
Following TEST_STRATEGY.md approach for service testing
"""
import pytest
import json
from unittest.mock import AsyncMock, MagicMock, patch
from typing import Dict, Any
from trustgraph.schema import (
QuestionToStructuredQueryRequest, QuestionToStructuredQueryResponse,
PromptRequest, PromptResponse, Error, RowSchema, Field as SchemaField
)
from trustgraph.retrieval.nlp_query.service import Processor
@pytest.fixture
def mock_prompt_client():
"""Mock prompt service client"""
return AsyncMock()
@pytest.fixture
def mock_pulsar_client():
"""Mock Pulsar client"""
return AsyncMock()
@pytest.fixture
def sample_schemas():
"""Sample schemas for testing"""
return {
"customers": RowSchema(
name="customers",
description="Customer data",
fields=[
SchemaField(name="id", type="string", primary=True),
SchemaField(name="name", type="string"),
SchemaField(name="email", type="string"),
SchemaField(name="state", type="string")
]
),
"orders": RowSchema(
name="orders",
description="Order data",
fields=[
SchemaField(name="order_id", type="string", primary=True),
SchemaField(name="customer_id", type="string"),
SchemaField(name="total", type="float"),
SchemaField(name="status", type="string")
]
)
}
@pytest.fixture
def processor(mock_pulsar_client, sample_schemas):
"""Create processor with mocked dependencies"""
proc = Processor(
taskgroup=MagicMock(),
pulsar_client=mock_pulsar_client,
config_type="schema"
)
# Set up schemas
proc.schemas = sample_schemas
# Mock the client method
proc.client = MagicMock()
return proc
@pytest.mark.asyncio
class TestNLPQueryProcessor:
"""Test NLP Query service processor"""
async def test_phase1_select_schemas_success(self, processor, mock_prompt_client):
"""Test successful schema selection (Phase 1)"""
# Arrange
question = "Show me customers from California"
expected_schemas = ["customers"]
mock_response = PromptResponse(
text=json.dumps(expected_schemas),
error=None
)
# Mock flow context
flow = MagicMock()
mock_prompt_service = AsyncMock()
mock_prompt_service.request = AsyncMock(return_value=mock_response)
flow.side_effect = lambda service_name: mock_prompt_service if service_name == "prompt-request" else AsyncMock()
# Act
result = await processor.phase1_select_schemas(question, flow)
# Assert
assert result == expected_schemas
mock_prompt_service.request.assert_called_once()
async def test_phase1_select_schemas_prompt_error(self, processor):
"""Test schema selection with prompt service error"""
# Arrange
question = "Show me customers"
error = Error(type="prompt-error", message="Template not found")
mock_response = PromptResponse(text="", error=error)
# Mock flow context
flow = MagicMock()
mock_prompt_service = AsyncMock()
mock_prompt_service.request = AsyncMock(return_value=mock_response)
flow.side_effect = lambda service_name: mock_prompt_service if service_name == "prompt-request" else AsyncMock()
# Act & Assert
with pytest.raises(Exception, match="Prompt service error"):
await processor.phase1_select_schemas(question, flow)
async def test_phase2_generate_graphql_success(self, processor):
"""Test successful GraphQL generation (Phase 2)"""
# Arrange
question = "Show me customers from California"
selected_schemas = ["customers"]
expected_result = {
"query": "query { customers(where: {state: {eq: \"California\"}}) { id name email state } }",
"variables": {},
"confidence": 0.95
}
mock_response = PromptResponse(
text=json.dumps(expected_result),
error=None
)
# Mock flow context
flow = MagicMock()
mock_prompt_service = AsyncMock()
mock_prompt_service.request = AsyncMock(return_value=mock_response)
flow.side_effect = lambda service_name: mock_prompt_service if service_name == "prompt-request" else AsyncMock()
# Act
result = await processor.phase2_generate_graphql(question, selected_schemas, flow)
# Assert
assert result == expected_result
mock_prompt_service.request.assert_called_once()
async def test_phase2_generate_graphql_prompt_error(self, processor):
"""Test GraphQL generation with prompt service error"""
# Arrange
question = "Show me customers"
selected_schemas = ["customers"]
error = Error(type="prompt-error", message="Generation failed")
mock_response = PromptResponse(text="", error=error)
# Mock flow context
flow = MagicMock()
mock_prompt_service = AsyncMock()
mock_prompt_service.request = AsyncMock(return_value=mock_response)
flow.side_effect = lambda service_name: mock_prompt_service if service_name == "prompt-request" else AsyncMock()
# Act & Assert
with pytest.raises(Exception, match="Prompt service error"):
await processor.phase2_generate_graphql(question, selected_schemas, flow)
async def test_on_message_full_flow_success(self, processor):
"""Test complete message processing flow"""
# Arrange
request = QuestionToStructuredQueryRequest(
question="Show me customers from California",
max_results=100
)
msg = MagicMock()
msg.value.return_value = request
msg.properties.return_value = {"id": "test-123"}
consumer = MagicMock()
flow = MagicMock()
flow_response = AsyncMock()
flow.return_value = flow_response
# Mock Phase 1 response
phase1_response = PromptResponse(
text=json.dumps(["customers"]),
error=None
)
# Mock Phase 2 response
phase2_response = PromptResponse(
text=json.dumps({
"query": "query { customers(where: {state: {eq: \"California\"}}) { id name email } }",
"variables": {},
"confidence": 0.9
}),
error=None
)
# Mock flow context to return prompt service responses
mock_prompt_service = AsyncMock()
mock_prompt_service.request = AsyncMock(
side_effect=[phase1_response, phase2_response]
)
flow.side_effect = lambda service_name: mock_prompt_service if service_name == "prompt-request" else flow_response if service_name == "response" else AsyncMock()
# Act
await processor.on_message(msg, consumer, flow)
# Assert
assert mock_prompt_service.request.call_count == 2
flow_response.send.assert_called_once()
# Verify response structure
response_call = flow_response.send.call_args
response = response_call[0][0] # First argument is the response object
assert isinstance(response, QuestionToStructuredQueryResponse)
assert response.error is None
assert "customers" in response.graphql_query
assert response.detected_schemas == ["customers"]
assert response.confidence == 0.9
async def test_on_message_phase1_error(self, processor):
"""Test message processing with Phase 1 failure"""
# Arrange
request = QuestionToStructuredQueryRequest(
question="Show me customers",
max_results=100
)
msg = MagicMock()
msg.value.return_value = request
msg.properties.return_value = {"id": "test-123"}
consumer = MagicMock()
flow = MagicMock()
flow_response = AsyncMock()
flow.return_value = flow_response
# Mock Phase 1 error
phase1_response = PromptResponse(
text="",
error=Error(type="template-error", message="Template not found")
)
processor.client.return_value.request = AsyncMock(return_value=phase1_response)
# Act
await processor.on_message(msg, consumer, flow)
# Assert
flow_response.send.assert_called_once()
# Verify error response
response_call = flow_response.send.call_args
response = response_call[0][0]
assert isinstance(response, QuestionToStructuredQueryResponse)
assert response.error is not None
assert response.error.type == "nlp-query-error"
assert "Prompt service error" in response.error.message
async def test_schema_config_loading(self, processor):
"""Test schema configuration loading"""
# Arrange
config = {
"schema": {
"test_schema": json.dumps({
"name": "test_schema",
"description": "Test schema",
"fields": [
{
"name": "id",
"type": "string",
"primary_key": True,
"required": True
},
{
"name": "name",
"type": "string",
"description": "User name"
}
]
})
}
}
# Act
await processor.on_schema_config(config, "v1")
# Assert
assert "test_schema" in processor.schemas
schema = processor.schemas["test_schema"]
assert schema.name == "test_schema"
assert schema.description == "Test schema"
assert len(schema.fields) == 2
assert schema.fields[0].name == "id"
assert schema.fields[0].primary == True
assert schema.fields[1].name == "name"
async def test_schema_config_loading_invalid_json(self, processor):
"""Test schema configuration loading with invalid JSON"""
# Arrange
config = {
"schema": {
"bad_schema": "invalid json{"
}
}
# Act
await processor.on_schema_config(config, "v1")
# Assert - bad schema should be ignored
assert "bad_schema" not in processor.schemas
def test_processor_initialization(self, mock_pulsar_client):
"""Test processor initialization with correct specifications"""
# Act
processor = Processor(
taskgroup=MagicMock(),
pulsar_client=mock_pulsar_client,
schema_selection_template="custom-schema-select",
graphql_generation_template="custom-graphql-gen"
)
# Assert
assert processor.schema_selection_template == "custom-schema-select"
assert processor.graphql_generation_template == "custom-graphql-gen"
assert processor.config_key == "schema"
assert processor.schemas == {}
def test_add_args(self):
"""Test command-line argument parsing"""
import argparse
parser = argparse.ArgumentParser()
Processor.add_args(parser)
# Test default values
args = parser.parse_args([])
assert args.config_type == "schema"
assert args.schema_selection_template == "schema-selection"
assert args.graphql_generation_template == "graphql-generation"
# Test custom values
args = parser.parse_args([
"--config-type", "custom",
"--schema-selection-template", "my-selector",
"--graphql-generation-template", "my-generator"
])
assert args.config_type == "custom"
assert args.schema_selection_template == "my-selector"
assert args.graphql_generation_template == "my-generator"
@pytest.mark.unit
class TestNLPQueryHelperFunctions:
"""Test helper functions and data transformations"""
def test_schema_info_formatting(self, sample_schemas):
"""Test schema info formatting for prompts"""
# This would test any helper functions for formatting schema data
# Currently the formatting is inline, but good to test if extracted
customers_schema = sample_schemas["customers"]
expected_fields = ["id", "name", "email", "state"]
actual_fields = [f.name for f in customers_schema.fields]
assert actual_fields == expected_fields
# Test primary key detection
primary_fields = [f.name for f in customers_schema.fields if f.primary]
assert primary_fields == ["id"]

View file

@ -0,0 +1,3 @@
"""
Unit and contract tests for structured-diag service
"""

View file

@ -0,0 +1,172 @@
"""
Unit tests for message translation in structured-diag service
"""
import pytest
from trustgraph.messaging.translators.diagnosis import (
StructuredDataDiagnosisRequestTranslator,
StructuredDataDiagnosisResponseTranslator
)
from trustgraph.schema.services.diagnosis import (
StructuredDataDiagnosisRequest,
StructuredDataDiagnosisResponse
)
class TestRequestTranslation:
"""Test request message translation"""
def test_translate_schema_selection_request(self):
"""Test translating schema-selection request from API to Pulsar"""
translator = StructuredDataDiagnosisRequestTranslator()
# API format (with hyphens)
api_data = {
"operation": "schema-selection",
"sample": "test data sample",
"options": {"filter": "catalog"}
}
# Translate to Pulsar
pulsar_msg = translator.to_pulsar(api_data)
assert pulsar_msg.operation == "schema-selection"
assert pulsar_msg.sample == "test data sample"
assert pulsar_msg.options == {"filter": "catalog"}
def test_translate_request_with_all_fields(self):
"""Test translating request with all fields"""
translator = StructuredDataDiagnosisRequestTranslator()
api_data = {
"operation": "generate-descriptor",
"sample": "csv data",
"type": "csv",
"schema-name": "products",
"options": {"delimiter": ","}
}
pulsar_msg = translator.to_pulsar(api_data)
assert pulsar_msg.operation == "generate-descriptor"
assert pulsar_msg.sample == "csv data"
assert pulsar_msg.type == "csv"
assert pulsar_msg.schema_name == "products"
assert pulsar_msg.options == {"delimiter": ","}
class TestResponseTranslation:
"""Test response message translation"""
def test_translate_schema_selection_response(self):
"""Test translating schema-selection response from Pulsar to API"""
translator = StructuredDataDiagnosisResponseTranslator()
# Create Pulsar response with schema_matches
pulsar_response = StructuredDataDiagnosisResponse(
operation="schema-selection",
schema_matches=["products", "inventory", "catalog"],
error=None
)
# Translate to API format
api_data = translator.from_pulsar(pulsar_response)
assert api_data["operation"] == "schema-selection"
assert api_data["schema-matches"] == ["products", "inventory", "catalog"]
assert "error" not in api_data # None errors shouldn't be included
def test_translate_empty_schema_matches(self):
"""Test translating response with empty schema_matches"""
translator = StructuredDataDiagnosisResponseTranslator()
pulsar_response = StructuredDataDiagnosisResponse(
operation="schema-selection",
schema_matches=[],
error=None
)
api_data = translator.from_pulsar(pulsar_response)
assert api_data["operation"] == "schema-selection"
assert api_data["schema-matches"] == []
def test_translate_response_without_schema_matches(self):
"""Test translating response without schema_matches field"""
translator = StructuredDataDiagnosisResponseTranslator()
# Old-style response without schema_matches
pulsar_response = StructuredDataDiagnosisResponse(
operation="detect-type",
detected_type="xml",
confidence=0.9,
error=None
)
api_data = translator.from_pulsar(pulsar_response)
assert api_data["operation"] == "detect-type"
assert api_data["detected-type"] == "xml"
assert api_data["confidence"] == 0.9
assert "schema-matches" not in api_data # None values shouldn't be included
def test_translate_response_with_error(self):
"""Test translating response with error"""
translator = StructuredDataDiagnosisResponseTranslator()
from trustgraph.schema.core.primitives import Error
pulsar_response = StructuredDataDiagnosisResponse(
operation="schema-selection",
error=Error(
type="PromptServiceError",
message="Service unavailable"
)
)
api_data = translator.from_pulsar(pulsar_response)
assert api_data["operation"] == "schema-selection"
# Error objects are typically handled separately by the gateway
# but the translator shouldn't break on them
def test_translate_all_response_fields(self):
"""Test translating response with all possible fields"""
translator = StructuredDataDiagnosisResponseTranslator()
import json
descriptor_data = {"mapping": {"field1": "column1"}}
pulsar_response = StructuredDataDiagnosisResponse(
operation="diagnose",
detected_type="csv",
confidence=0.95,
descriptor=json.dumps(descriptor_data),
metadata={"field_count": "5"},
schema_matches=["schema1", "schema2"],
error=None
)
api_data = translator.from_pulsar(pulsar_response)
assert api_data["operation"] == "diagnose"
assert api_data["detected-type"] == "csv"
assert api_data["confidence"] == 0.95
assert api_data["descriptor"] == descriptor_data # Should be parsed from JSON
assert api_data["metadata"] == {"field_count": "5"}
assert api_data["schema-matches"] == ["schema1", "schema2"]
def test_response_completion_flag(self):
"""Test that response includes completion flag"""
translator = StructuredDataDiagnosisResponseTranslator()
pulsar_response = StructuredDataDiagnosisResponse(
operation="schema-selection",
schema_matches=["products"],
error=None
)
api_data, is_final = translator.from_response_with_completion(pulsar_response)
assert is_final is True # Structured-diag responses are always final
assert api_data["operation"] == "schema-selection"
assert api_data["schema-matches"] == ["products"]

View file

@ -0,0 +1,258 @@
"""
Contract tests for structured-diag service schemas
"""
import pytest
import json
from pulsar.schema import JsonSchema
from trustgraph.schema.services.diagnosis import (
StructuredDataDiagnosisRequest,
StructuredDataDiagnosisResponse
)
class TestStructuredDiagnosisSchemaContract:
"""Contract tests for structured diagnosis message schemas"""
def test_request_schema_basic_fields(self):
"""Test basic request schema fields"""
request = StructuredDataDiagnosisRequest(
operation="detect-type",
sample="test data"
)
assert request.operation == "detect-type"
assert request.sample == "test data"
assert request.type is None # Optional, defaults to None
assert request.schema_name is None # Optional, defaults to None
assert request.options is None # Optional, defaults to None
def test_request_schema_all_operations(self):
"""Test request schema supports all operations"""
operations = ["detect-type", "generate-descriptor", "diagnose", "schema-selection"]
for op in operations:
request = StructuredDataDiagnosisRequest(
operation=op,
sample="test data"
)
assert request.operation == op
def test_request_schema_with_options(self):
"""Test request schema with options"""
options = {"delimiter": ",", "has_header": "true"}
request = StructuredDataDiagnosisRequest(
operation="generate-descriptor",
sample="test data",
type="csv",
schema_name="products",
options=options
)
assert request.options == options
assert request.type == "csv"
assert request.schema_name == "products"
def test_response_schema_basic_fields(self):
"""Test basic response schema fields"""
response = StructuredDataDiagnosisResponse(
operation="detect-type",
detected_type="xml",
confidence=0.9,
error=None # Explicitly set to None
)
assert response.operation == "detect-type"
assert response.detected_type == "xml"
assert response.confidence == 0.9
assert response.error is None
assert response.descriptor is None
assert response.metadata is None
assert response.schema_matches is None # New field, defaults to None
def test_response_schema_with_error(self):
"""Test response schema with error"""
from trustgraph.schema.core.primitives import Error
error = Error(
type="ServiceError",
message="Service unavailable"
)
response = StructuredDataDiagnosisResponse(
operation="schema-selection",
error=error
)
assert response.error == error
assert response.error.type == "ServiceError"
assert response.error.message == "Service unavailable"
def test_response_schema_with_schema_matches(self):
"""Test response schema with schema_matches array"""
matches = ["products", "inventory", "catalog"]
response = StructuredDataDiagnosisResponse(
operation="schema-selection",
schema_matches=matches
)
assert response.operation == "schema-selection"
assert response.schema_matches == matches
assert len(response.schema_matches) == 3
def test_response_schema_empty_schema_matches(self):
"""Test response schema with empty schema_matches array"""
response = StructuredDataDiagnosisResponse(
operation="schema-selection",
schema_matches=[]
)
assert response.schema_matches == []
assert isinstance(response.schema_matches, list)
def test_response_schema_with_descriptor(self):
"""Test response schema with descriptor"""
descriptor = {
"mapping": {
"field1": "column1",
"field2": "column2"
}
}
response = StructuredDataDiagnosisResponse(
operation="generate-descriptor",
descriptor=json.dumps(descriptor)
)
assert response.descriptor == json.dumps(descriptor)
parsed = json.loads(response.descriptor)
assert parsed["mapping"]["field1"] == "column1"
def test_response_schema_with_metadata(self):
"""Test response schema with metadata"""
metadata = {
"csv_options": json.dumps({"delimiter": ","}),
"field_count": "5"
}
response = StructuredDataDiagnosisResponse(
operation="diagnose",
metadata=metadata
)
assert response.metadata == metadata
assert response.metadata["field_count"] == "5"
def test_schema_serialization(self):
"""Test that schemas can be serialized and deserialized correctly"""
# Test request serialization
request = StructuredDataDiagnosisRequest(
operation="schema-selection",
sample="test data",
options={"key": "value"}
)
# Simulate Pulsar JsonSchema serialization
schema = JsonSchema(StructuredDataDiagnosisRequest)
serialized = schema.encode(request)
deserialized = schema.decode(serialized)
assert deserialized.operation == request.operation
assert deserialized.sample == request.sample
assert deserialized.options == request.options
def test_response_serialization_with_schema_matches(self):
"""Test response serialization with schema_matches array"""
response = StructuredDataDiagnosisResponse(
operation="schema-selection",
schema_matches=["schema1", "schema2"],
confidence=0.85
)
# Simulate Pulsar JsonSchema serialization
schema = JsonSchema(StructuredDataDiagnosisResponse)
serialized = schema.encode(response)
deserialized = schema.decode(serialized)
assert deserialized.operation == response.operation
assert deserialized.schema_matches == response.schema_matches
assert deserialized.confidence == response.confidence
def test_backwards_compatibility(self):
"""Test that old clients can still use the service without schema_matches"""
# Old response without schema_matches should still work
response = StructuredDataDiagnosisResponse(
operation="detect-type",
detected_type="json",
confidence=0.95
)
# Verify default value for new field
assert response.schema_matches is None # Defaults to None when not set
# Verify old fields still work
assert response.detected_type == "json"
assert response.confidence == 0.95
def test_schema_selection_operation_contract(self):
"""Test complete contract for schema-selection operation"""
# Request
request = StructuredDataDiagnosisRequest(
operation="schema-selection",
sample="product_id,name,price\n1,Widget,9.99"
)
assert request.operation == "schema-selection"
assert request.sample != ""
# Response with matches
response = StructuredDataDiagnosisResponse(
operation="schema-selection",
schema_matches=["products", "inventory"]
)
assert response.operation == "schema-selection"
assert isinstance(response.schema_matches, list)
assert len(response.schema_matches) == 2
assert all(isinstance(s, str) for s in response.schema_matches)
# Response with error
from trustgraph.schema.core.primitives import Error
error_response = StructuredDataDiagnosisResponse(
operation="schema-selection",
error=Error(type="PromptServiceError", message="Service unavailable")
)
assert error_response.error is not None
assert error_response.schema_matches is None # Default None when not set
def test_all_operations_supported(self):
"""Verify all operations are properly supported in the contract"""
supported_operations = {
"detect-type": {
"required_request": ["sample"],
"expected_response": ["detected_type", "confidence"]
},
"generate-descriptor": {
"required_request": ["sample", "type", "schema_name"],
"expected_response": ["descriptor"]
},
"diagnose": {
"required_request": ["sample"],
"expected_response": ["detected_type", "confidence", "descriptor"]
},
"schema-selection": {
"required_request": ["sample"],
"expected_response": ["schema_matches"]
}
}
for operation, contract in supported_operations.items():
# Test request creation
request_data = {"operation": operation}
for field in contract["required_request"]:
request_data[field] = "test_value"
request = StructuredDataDiagnosisRequest(**request_data)
assert request.operation == operation
# Test response creation
response = StructuredDataDiagnosisResponse(operation=operation)
assert response.operation == operation

View file

@ -0,0 +1,361 @@
"""
Unit tests for structured-diag service schema-selection operation
"""
import pytest
import json
from unittest.mock import AsyncMock, MagicMock, patch
from trustgraph.retrieval.structured_diag.service import Processor
from trustgraph.schema.services.diagnosis import StructuredDataDiagnosisRequest, StructuredDataDiagnosisResponse
from trustgraph.schema import RowSchema, Field as SchemaField, Error
@pytest.fixture
def mock_schemas():
"""Create mock schemas for testing"""
schemas = {
"products": RowSchema(
name="products",
description="Product catalog schema",
fields=[
SchemaField(
name="product_id",
type="string",
description="Product identifier",
required=True,
primary=True,
indexed=True
),
SchemaField(
name="name",
type="string",
description="Product name",
required=True
),
SchemaField(
name="price",
type="number",
description="Product price",
required=True
)
]
),
"customers": RowSchema(
name="customers",
description="Customer database schema",
fields=[
SchemaField(
name="customer_id",
type="string",
description="Customer identifier",
required=True,
primary=True
),
SchemaField(
name="name",
type="string",
description="Customer name",
required=True
),
SchemaField(
name="email",
type="string",
description="Customer email",
required=True
)
]
),
"orders": RowSchema(
name="orders",
description="Order management schema",
fields=[
SchemaField(
name="order_id",
type="string",
description="Order identifier",
required=True,
primary=True
),
SchemaField(
name="customer_id",
type="string",
description="Customer identifier",
required=True
),
SchemaField(
name="total",
type="number",
description="Order total",
required=True
)
]
)
}
return schemas
@pytest.fixture
def service(mock_schemas):
"""Create service instance with mock configuration"""
service = Processor(
taskgroup=MagicMock(),
id="test-processor"
)
service.schemas = mock_schemas
return service
@pytest.fixture
def mock_flow():
"""Create mock flow with prompt service"""
flow = MagicMock()
prompt_request_flow = AsyncMock()
flow.return_value.request = prompt_request_flow
return flow, prompt_request_flow
@pytest.mark.asyncio
async def test_schema_selection_success(service, mock_flow):
"""Test successful schema selection"""
flow, prompt_request_flow = mock_flow
# Mock prompt service response with matching schemas
mock_response = MagicMock()
mock_response.error = None
mock_response.text = '["products", "orders"]'
mock_response.object = None # Explicitly set to None
prompt_request_flow.return_value = mock_response
# Create request
request = StructuredDataDiagnosisRequest(
operation="schema-selection",
sample="product_id,name,price,quantity\nPROD001,Widget,19.99,5"
)
# Execute operation
response = await service.schema_selection_operation(request, flow)
# Verify response
assert response.error is None
assert response.operation == "schema-selection"
assert response.schema_matches == ["products", "orders"]
# Verify prompt service was called correctly
prompt_request_flow.assert_called_once()
call_args = prompt_request_flow.call_args[0][0]
assert call_args.id == "schema-selection"
# Check that all schemas were passed to prompt
terms = call_args.terms
schemas_data = json.loads(terms["schemas"])
assert len(schemas_data) == 3 # All 3 schemas
assert any(s["name"] == "products" for s in schemas_data)
assert any(s["name"] == "customers" for s in schemas_data)
assert any(s["name"] == "orders" for s in schemas_data)
@pytest.mark.asyncio
async def test_schema_selection_empty_response(service, mock_flow):
"""Test handling of empty prompt service response"""
flow, prompt_request_flow = mock_flow
# Mock empty response from prompt service
mock_response = MagicMock()
mock_response.error = None
mock_response.text = ""
mock_response.object = "" # Both fields empty
prompt_request_flow.return_value = mock_response
# Create request
request = StructuredDataDiagnosisRequest(
operation="schema-selection",
sample="test data"
)
# Execute operation
response = await service.schema_selection_operation(request, flow)
# Verify error response
assert response.error is not None
assert response.error.type == "PromptServiceError"
assert "Empty response" in response.error.message
assert response.operation == "schema-selection"
@pytest.mark.asyncio
async def test_schema_selection_prompt_error(service, mock_flow):
"""Test handling of prompt service error"""
flow, prompt_request_flow = mock_flow
# Mock error response from prompt service
mock_response = MagicMock()
mock_response.error = Error(
type="ServiceError",
message="Prompt service unavailable"
)
mock_response.text = None
prompt_request_flow.return_value = mock_response
# Create request
request = StructuredDataDiagnosisRequest(
operation="schema-selection",
sample="test data"
)
# Execute operation
response = await service.schema_selection_operation(request, flow)
# Verify error response
assert response.error is not None
assert response.error.type == "PromptServiceError"
assert "Failed to select schemas" in response.error.message
assert response.operation == "schema-selection"
@pytest.mark.asyncio
async def test_schema_selection_invalid_json(service, mock_flow):
"""Test handling of invalid JSON response from prompt service"""
flow, prompt_request_flow = mock_flow
# Mock invalid JSON response
mock_response = MagicMock()
mock_response.error = None
mock_response.text = "not valid json"
mock_response.object = None
prompt_request_flow.return_value = mock_response
# Create request
request = StructuredDataDiagnosisRequest(
operation="schema-selection",
sample="test data"
)
# Execute operation
response = await service.schema_selection_operation(request, flow)
# Verify error response
assert response.error is not None
assert response.error.type == "ParseError"
assert "Failed to parse schema selection response" in response.error.message
assert response.operation == "schema-selection"
@pytest.mark.asyncio
async def test_schema_selection_non_array_response(service, mock_flow):
"""Test handling of non-array JSON response from prompt service"""
flow, prompt_request_flow = mock_flow
# Mock non-array JSON response
mock_response = MagicMock()
mock_response.error = None
mock_response.text = '{"schema": "products"}' # Object instead of array
mock_response.object = None
prompt_request_flow.return_value = mock_response
# Create request
request = StructuredDataDiagnosisRequest(
operation="schema-selection",
sample="test data"
)
# Execute operation
response = await service.schema_selection_operation(request, flow)
# Verify error response
assert response.error is not None
assert response.error.type == "ParseError"
assert "Failed to parse schema selection response" in response.error.message
assert response.operation == "schema-selection"
@pytest.mark.asyncio
async def test_schema_selection_with_options(service, mock_flow):
"""Test schema selection with additional options"""
flow, prompt_request_flow = mock_flow
# Mock successful response
mock_response = MagicMock()
mock_response.error = None
mock_response.text = '["products"]'
mock_response.object = None
prompt_request_flow.return_value = mock_response
# Create request with options
request = StructuredDataDiagnosisRequest(
operation="schema-selection",
sample="test data",
options={"filter": "catalog", "confidence": "high"}
)
# Execute operation
response = await service.schema_selection_operation(request, flow)
# Verify response
assert response.error is None
assert response.schema_matches == ["products"]
# Verify options were passed to prompt
call_args = prompt_request_flow.call_args[0][0]
terms = call_args.terms
options = json.loads(terms["options"])
assert options["filter"] == "catalog"
assert options["confidence"] == "high"
@pytest.mark.asyncio
async def test_schema_selection_exception_handling(service, mock_flow):
"""Test handling of unexpected exceptions"""
flow, prompt_request_flow = mock_flow
# Mock exception during prompt service call
prompt_request_flow.side_effect = Exception("Unexpected error")
# Create request
request = StructuredDataDiagnosisRequest(
operation="schema-selection",
sample="test data"
)
# Execute operation
response = await service.schema_selection_operation(request, flow)
# Verify error response
assert response.error is not None
assert response.error.type == "PromptServiceError"
assert "Failed to select schemas" in response.error.message
assert response.operation == "schema-selection"
@pytest.mark.asyncio
async def test_schema_selection_empty_schemas(service, mock_flow):
"""Test schema selection with no schemas configured"""
flow, prompt_request_flow = mock_flow
# Clear schemas
service.schemas = {}
# Mock response (shouldn't be reached)
mock_response = MagicMock()
mock_response.error = None
mock_response.text = '[]'
mock_response.object = None
prompt_request_flow.return_value = mock_response
# Create request
request = StructuredDataDiagnosisRequest(
operation="schema-selection",
sample="test data"
)
# Execute operation
response = await service.schema_selection_operation(request, flow)
# Should still succeed but with empty schemas array passed to prompt
assert response.error is None
assert response.schema_matches == []
# Verify empty schemas array was passed
call_args = prompt_request_flow.call_args[0][0]
terms = call_args.terms
schemas_data = json.loads(terms["schemas"])
assert len(schemas_data) == 0

View file

@ -0,0 +1,179 @@
"""
Unit tests for simplified type detection in structured-diag service
"""
import pytest
from trustgraph.retrieval.structured_diag.type_detector import detect_data_type
class TestSimplifiedTypeDetection:
"""Test the simplified type detection logic"""
def test_xml_detection_with_declaration(self):
"""Test XML detection with XML declaration"""
sample = '<?xml version="1.0"?><root><item>data</item></root>'
data_type, confidence = detect_data_type(sample)
assert data_type == "xml"
assert confidence == 0.9
def test_xml_detection_without_declaration(self):
"""Test XML detection without declaration but with closing tags"""
sample = '<root><item>data</item></root>'
data_type, confidence = detect_data_type(sample)
assert data_type == "xml"
assert confidence == 0.9
def test_xml_detection_truncated(self):
"""Test XML detection with truncated XML (common with 500-byte samples)"""
sample = '''<?xml version="1.0" encoding="UTF-8"?>
<pieDataset>
<pies>
<pie id="1">
<pieType>Steak &amp; Kidney</pieType>
<region>Yorkshire</region>
<diameterCm>12.5</diameterCm>
<heightCm>4.2''' # Truncated mid-element
data_type, confidence = detect_data_type(sample)
assert data_type == "xml"
assert confidence == 0.9
def test_json_object_detection(self):
"""Test JSON object detection"""
sample = '{"name": "John", "age": 30, "city": "New York"}'
data_type, confidence = detect_data_type(sample)
assert data_type == "json"
assert confidence == 0.9
def test_json_array_detection(self):
"""Test JSON array detection"""
sample = '[{"id": 1}, {"id": 2}, {"id": 3}]'
data_type, confidence = detect_data_type(sample)
assert data_type == "json"
assert confidence == 0.9
def test_json_truncated(self):
"""Test JSON detection with truncated JSON"""
sample = '{"products": [{"id": 1, "name": "Widget", "price": 19.99}, {"id": 2, "na'
data_type, confidence = detect_data_type(sample)
assert data_type == "json"
assert confidence == 0.9
def test_csv_detection(self):
"""Test CSV detection as fallback"""
sample = '''name,age,city
John,30,New York
Jane,25,Boston
Bob,35,Chicago'''
data_type, confidence = detect_data_type(sample)
assert data_type == "csv"
assert confidence == 0.8
def test_csv_detection_single_line(self):
"""Test CSV detection with single line defaults to CSV"""
sample = 'column1,column2,column3'
data_type, confidence = detect_data_type(sample)
assert data_type == "csv"
assert confidence == 0.8
def test_empty_input(self):
"""Test empty input handling"""
data_type, confidence = detect_data_type("")
assert data_type is None
assert confidence == 0.0
def test_whitespace_only(self):
"""Test whitespace-only input"""
data_type, confidence = detect_data_type(" \n \t ")
assert data_type is None
assert confidence == 0.0
def test_html_not_xml(self):
"""Test HTML is detected as XML (has closing tags)"""
sample = '<html><body><h1>Title</h1></body></html>'
data_type, confidence = detect_data_type(sample)
assert data_type == "xml" # HTML is detected as XML
assert confidence == 0.9
def test_malformed_xml_still_detected(self):
"""Test malformed XML is still detected as XML"""
sample = '<root><item>data</item><unclosed>'
data_type, confidence = detect_data_type(sample)
assert data_type == "xml"
assert confidence == 0.9
def test_json_with_whitespace(self):
"""Test JSON detection with leading whitespace"""
sample = ' \n {"key": "value"}'
data_type, confidence = detect_data_type(sample)
assert data_type == "json"
assert confidence == 0.9
def test_priority_xml_over_csv(self):
"""Test XML takes priority over CSV when both patterns present"""
sample = '<?xml version="1.0"?>\n<data>a,b,c</data>'
data_type, confidence = detect_data_type(sample)
assert data_type == "xml"
assert confidence == 0.9
def test_priority_json_over_csv(self):
"""Test JSON takes priority over CSV when both patterns present"""
sample = '{"data": "a,b,c"}'
data_type, confidence = detect_data_type(sample)
assert data_type == "json"
assert confidence == 0.9
def test_text_defaults_to_csv(self):
"""Test plain text defaults to CSV"""
sample = 'This is just plain text without any structure'
data_type, confidence = detect_data_type(sample)
assert data_type == "csv"
assert confidence == 0.8
class TestRealWorldSamples:
"""Test with real-world data samples"""
def test_uk_pies_xml_sample(self):
"""Test with actual UK pies XML sample (first 500 bytes)"""
sample = '''<?xml version="1.0" encoding="UTF-8"?>
<pieDataset>
<pies>
<pie id="1">
<pieType>Steak &amp; Kidney</pieType>
<region>Yorkshire</region>
<diameterCm>12.5</diameterCm>
<heightCm>4.2</heightCm>
<weightGrams>285</weightGrams>
<crustType>Shortcrust</crustType>
<fillingCategory>Meat</fillingCategory>
<price>3.50</price>
<currency>GBP</currency>
<bakeryType>Traditional</bakeryType>
</pie>
<pie id="2">
<pieType>Chicken &amp; Mushroom</pieType>
<region>Lancashire</regio''' # Cut at 500 chars
data_type, confidence = detect_data_type(sample[:500])
assert data_type == "xml"
assert confidence == 0.9
def test_product_json_sample(self):
"""Test with product catalog JSON sample"""
sample = '''{"products": [
{"id": "PROD001", "name": "Widget", "price": 19.99, "category": "Tools"},
{"id": "PROD002", "name": "Gadget", "price": 29.99, "category": "Electronics"},
{"id": "PROD003", "name": "Doohickey", "price": 9.99, "category": "Accessories"}
]}'''
data_type, confidence = detect_data_type(sample)
assert data_type == "json"
assert confidence == 0.9
def test_customer_csv_sample(self):
"""Test with customer CSV sample"""
sample = '''customer_id,name,email,signup_date,total_orders
CUST001,John Smith,john@example.com,2023-01-15,5
CUST002,Jane Doe,jane@example.com,2023-02-20,3
CUST003,Bob Johnson,bob@example.com,2023-03-10,7'''
data_type, confidence = detect_data_type(sample)
assert data_type == "csv"
assert confidence == 0.8

View file

@ -0,0 +1,588 @@
"""
Unit tests for Structured Query Service
Following TEST_STRATEGY.md approach for service testing
"""
import pytest
import json
from unittest.mock import AsyncMock, MagicMock, patch
from trustgraph.schema import (
StructuredQueryRequest, StructuredQueryResponse,
QuestionToStructuredQueryRequest, QuestionToStructuredQueryResponse,
ObjectsQueryRequest, ObjectsQueryResponse,
Error, GraphQLError
)
from trustgraph.retrieval.structured_query.service import Processor
@pytest.fixture
def mock_pulsar_client():
"""Mock Pulsar client"""
return AsyncMock()
@pytest.fixture
def processor(mock_pulsar_client):
"""Create processor with mocked dependencies"""
proc = Processor(
taskgroup=MagicMock(),
pulsar_client=mock_pulsar_client
)
# Mock the client method
proc.client = MagicMock()
return proc
@pytest.mark.asyncio
class TestStructuredQueryProcessor:
"""Test Structured Query service processor"""
async def test_successful_end_to_end_query(self, processor):
"""Test successful end-to-end query processing"""
# Arrange
request = StructuredQueryRequest(
question="Show me all customers from New York",
user="trustgraph",
collection="default"
)
msg = MagicMock()
msg.value.return_value = request
msg.properties.return_value = {"id": "test-123"}
consumer = MagicMock()
flow = MagicMock()
flow_response = AsyncMock()
flow.return_value = flow_response
# Mock NLP query service response
nlp_response = QuestionToStructuredQueryResponse(
error=None,
graphql_query='query { customers(where: {state: {eq: "NY"}}) { id name email } }',
variables={"state": "NY"},
detected_schemas=["customers"],
confidence=0.95
)
# Mock objects query service response
objects_response = ObjectsQueryResponse(
error=None,
data='{"customers": [{"id": "1", "name": "John", "email": "john@example.com"}]}',
errors=None,
extensions={}
)
# Set up mock clients
mock_nlp_client = AsyncMock()
mock_nlp_client.request.return_value = nlp_response
mock_objects_client = AsyncMock()
mock_objects_client.request.return_value = objects_response
# Mock flow context to route to appropriate services
def flow_router(service_name):
if service_name == "nlp-query-request":
return mock_nlp_client
elif service_name == "objects-query-request":
return mock_objects_client
elif service_name == "response":
return flow_response
else:
return AsyncMock()
flow.side_effect = flow_router
# Act
await processor.on_message(msg, consumer, flow)
# Assert
# Verify NLP query service was called correctly
mock_nlp_client.request.assert_called_once()
nlp_call_args = mock_nlp_client.request.call_args[0][0]
assert isinstance(nlp_call_args, QuestionToStructuredQueryRequest)
assert nlp_call_args.question == "Show me all customers from New York"
assert nlp_call_args.max_results == 100
# Verify objects query service was called correctly
mock_objects_client.request.assert_called_once()
objects_call_args = mock_objects_client.request.call_args[0][0]
assert isinstance(objects_call_args, ObjectsQueryRequest)
assert objects_call_args.query == 'query { customers(where: {state: {eq: "NY"}}) { id name email } }'
assert objects_call_args.variables == {"state": "NY"}
assert objects_call_args.user == "trustgraph"
assert objects_call_args.collection == "default"
# Verify response
flow_response.send.assert_called_once()
response_call = flow_response.send.call_args
response = response_call[0][0]
assert isinstance(response, StructuredQueryResponse)
assert response.error is None
assert response.data == '{"customers": [{"id": "1", "name": "John", "email": "john@example.com"}]}'
assert len(response.errors) == 0
async def test_nlp_query_service_error(self, processor):
"""Test handling of NLP query service errors"""
# Arrange
request = StructuredQueryRequest(
question="Invalid query"
)
msg = MagicMock()
msg.value.return_value = request
msg.properties.return_value = {"id": "test-error"}
consumer = MagicMock()
flow = MagicMock()
flow_response = AsyncMock()
flow.return_value = flow_response
# Mock NLP query service error response
nlp_response = QuestionToStructuredQueryResponse(
error=Error(type="nlp-query-error", message="Failed to parse question"),
graphql_query="",
variables={},
detected_schemas=[],
confidence=0.0
)
mock_nlp_client = AsyncMock()
mock_nlp_client.request.return_value = nlp_response
# Mock flow context to route to nlp service
def flow_router(service_name):
if service_name == "nlp-query-request":
return mock_nlp_client
elif service_name == "response":
return flow_response
else:
return AsyncMock()
flow.side_effect = flow_router
# Act
await processor.on_message(msg, consumer, flow)
# Assert
flow_response.send.assert_called_once()
response_call = flow_response.send.call_args
response = response_call[0][0]
assert isinstance(response, StructuredQueryResponse)
assert response.error is not None
assert response.error.type == "structured-query-error"
assert "NLP query service error" in response.error.message
async def test_empty_graphql_query_error(self, processor):
"""Test handling of empty GraphQL query from NLP service"""
# Arrange
request = StructuredQueryRequest(
question="Ambiguous question"
)
msg = MagicMock()
msg.value.return_value = request
msg.properties.return_value = {"id": "test-empty"}
consumer = MagicMock()
flow = MagicMock()
flow_response = AsyncMock()
flow.return_value = flow_response
# Mock NLP query service response with empty query
nlp_response = QuestionToStructuredQueryResponse(
error=None,
graphql_query="", # Empty query
variables={},
detected_schemas=[],
confidence=0.1
)
mock_nlp_client = AsyncMock()
mock_nlp_client.request.return_value = nlp_response
# Mock flow context to route to nlp service
def flow_router(service_name):
if service_name == "nlp-query-request":
return mock_nlp_client
elif service_name == "response":
return flow_response
else:
return AsyncMock()
flow.side_effect = flow_router
# Act
await processor.on_message(msg, consumer, flow)
# Assert
flow_response.send.assert_called_once()
response_call = flow_response.send.call_args
response = response_call[0][0]
assert response.error is not None
assert "empty GraphQL query" in response.error.message
async def test_objects_query_service_error(self, processor):
"""Test handling of objects query service errors"""
# Arrange
request = StructuredQueryRequest(
question="Show me customers"
)
msg = MagicMock()
msg.value.return_value = request
msg.properties.return_value = {"id": "test-objects-error"}
consumer = MagicMock()
flow = MagicMock()
flow_response = AsyncMock()
flow.return_value = flow_response
# Mock successful NLP response
nlp_response = QuestionToStructuredQueryResponse(
error=None,
graphql_query='query { customers { id name } }',
variables={},
detected_schemas=["customers"],
confidence=0.9
)
# Mock objects query service error
objects_response = ObjectsQueryResponse(
error=Error(type="graphql-execution-error", message="Table 'customers' not found"),
data=None,
errors=None,
extensions={}
)
mock_nlp_client = AsyncMock()
mock_nlp_client.request.return_value = nlp_response
mock_objects_client = AsyncMock()
mock_objects_client.request.return_value = objects_response
# Mock flow context to route to appropriate services
def flow_router(service_name):
if service_name == "nlp-query-request":
return mock_nlp_client
elif service_name == "objects-query-request":
return mock_objects_client
elif service_name == "response":
return flow_response
else:
return AsyncMock()
flow.side_effect = flow_router
# Act
await processor.on_message(msg, consumer, flow)
# Assert
flow_response.send.assert_called_once()
response_call = flow_response.send.call_args
response = response_call[0][0]
assert response.error is not None
assert "Objects query service error" in response.error.message
assert "Table 'customers' not found" in response.error.message
async def test_graphql_errors_handling(self, processor):
"""Test handling of GraphQL validation/execution errors"""
# Arrange
request = StructuredQueryRequest(
question="Show invalid field"
)
msg = MagicMock()
msg.value.return_value = request
msg.properties.return_value = {"id": "test-graphql-errors"}
consumer = MagicMock()
flow = MagicMock()
flow_response = AsyncMock()
flow.return_value = flow_response
# Mock successful NLP response
nlp_response = QuestionToStructuredQueryResponse(
error=None,
graphql_query='query { customers { invalid_field } }',
variables={},
detected_schemas=["customers"],
confidence=0.8
)
# Mock objects response with GraphQL errors
graphql_errors = [
GraphQLError(
message="Cannot query field 'invalid_field' on type 'Customer'",
path=["customers", "0", "invalid_field"], # All path elements must be strings
extensions={}
)
]
objects_response = ObjectsQueryResponse(
error=None,
data=None,
errors=graphql_errors,
extensions={}
)
mock_nlp_client = AsyncMock()
mock_nlp_client.request.return_value = nlp_response
mock_objects_client = AsyncMock()
mock_objects_client.request.return_value = objects_response
# Mock flow context to route to appropriate services
def flow_router(service_name):
if service_name == "nlp-query-request":
return mock_nlp_client
elif service_name == "objects-query-request":
return mock_objects_client
elif service_name == "response":
return flow_response
else:
return AsyncMock()
flow.side_effect = flow_router
# Act
await processor.on_message(msg, consumer, flow)
# Assert
flow_response.send.assert_called_once()
response_call = flow_response.send.call_args
response = response_call[0][0]
assert response.error is None
assert len(response.errors) == 1
assert "Cannot query field 'invalid_field'" in response.errors[0]
assert "customers" in response.errors[0]
async def test_complex_query_with_variables(self, processor):
"""Test processing complex queries with variables"""
# Arrange
request = StructuredQueryRequest(
question="Show customers with orders over $100 from last month"
)
msg = MagicMock()
msg.value.return_value = request
msg.properties.return_value = {"id": "test-complex"}
consumer = MagicMock()
flow = MagicMock()
flow_response = AsyncMock()
flow.return_value = flow_response
# Mock NLP response with complex query and variables
nlp_response = QuestionToStructuredQueryResponse(
error=None,
graphql_query='''
query GetCustomersWithLargeOrders($minTotal: Float!, $startDate: String!) {
customers {
id
name
orders(where: {total: {gt: $minTotal}, date: {gte: $startDate}}) {
id
total
date
}
}
}
''',
variables={
"minTotal": "100.0", # Convert to string for Pulsar schema
"startDate": "2024-01-01"
},
detected_schemas=["customers", "orders"],
confidence=0.88
)
# Mock objects response
objects_response = ObjectsQueryResponse(
error=None,
data='{"customers": [{"id": "1", "name": "Alice", "orders": [{"id": "100", "total": 150.0}]}]}',
errors=None
)
mock_nlp_client = AsyncMock()
mock_nlp_client.request.return_value = nlp_response
mock_objects_client = AsyncMock()
mock_objects_client.request.return_value = objects_response
# Mock flow context to route to appropriate services
def flow_router(service_name):
if service_name == "nlp-query-request":
return mock_nlp_client
elif service_name == "objects-query-request":
return mock_objects_client
elif service_name == "response":
return flow_response
else:
return AsyncMock()
flow.side_effect = flow_router
# Act
await processor.on_message(msg, consumer, flow)
# Assert
# Verify variables were passed correctly (converted to strings)
objects_call_args = mock_objects_client.request.call_args[0][0]
assert objects_call_args.variables["minTotal"] == "100.0" # Should be converted to string
assert objects_call_args.variables["startDate"] == "2024-01-01"
# Verify response
response_call = flow_response.send.call_args
response = response_call[0][0]
assert response.error is None
assert "Alice" in response.data
async def test_null_data_handling(self, processor):
"""Test handling of null/empty data responses"""
# Arrange
request = StructuredQueryRequest(
question="Show nonexistent data"
)
msg = MagicMock()
msg.value.return_value = request
msg.properties.return_value = {"id": "test-null"}
consumer = MagicMock()
flow = MagicMock()
flow_response = AsyncMock()
flow.return_value = flow_response
# Mock responses
nlp_response = QuestionToStructuredQueryResponse(
error=None,
graphql_query='query { customers { id } }',
variables={},
detected_schemas=["customers"],
confidence=0.9
)
objects_response = ObjectsQueryResponse(
error=None,
data=None, # Null data
errors=None,
extensions={}
)
mock_nlp_client = AsyncMock()
mock_nlp_client.request.return_value = nlp_response
mock_objects_client = AsyncMock()
mock_objects_client.request.return_value = objects_response
# Mock flow context to route to appropriate services
def flow_router(service_name):
if service_name == "nlp-query-request":
return mock_nlp_client
elif service_name == "objects-query-request":
return mock_objects_client
elif service_name == "response":
return flow_response
else:
return AsyncMock()
flow.side_effect = flow_router
# Act
await processor.on_message(msg, consumer, flow)
# Assert
response_call = flow_response.send.call_args
response = response_call[0][0]
assert response.error is None
assert response.data == "null" # Should convert None to "null" string
async def test_exception_handling(self, processor):
"""Test general exception handling"""
# Arrange
request = StructuredQueryRequest(
question="Test exception"
)
msg = MagicMock()
msg.value.return_value = request
msg.properties.return_value = {"id": "test-exception"}
consumer = MagicMock()
flow = MagicMock()
flow_response = AsyncMock()
flow.return_value = flow_response
# Mock flow context to raise exception
mock_client = AsyncMock()
mock_client.request.side_effect = Exception("Network timeout")
def flow_router(service_name):
if service_name == "nlp-query-request":
return mock_client
elif service_name == "response":
return flow_response
else:
return AsyncMock()
flow.side_effect = flow_router
# Act
await processor.on_message(msg, consumer, flow)
# Assert
flow_response.send.assert_called_once()
response_call = flow_response.send.call_args
response = response_call[0][0]
assert response.error is not None
assert response.error.type == "structured-query-error"
assert "Network timeout" in response.error.message
assert response.data == "null"
assert len(response.errors) == 0
def test_processor_initialization(self, mock_pulsar_client):
"""Test processor initialization with correct specifications"""
# Act
processor = Processor(
taskgroup=MagicMock(),
pulsar_client=mock_pulsar_client
)
# Assert - Test default ID
assert processor.id == "structured-query"
# Verify specifications were registered (we can't directly access them,
# but we know they were registered if initialization succeeded)
assert processor is not None
def test_add_args(self):
"""Test command-line argument parsing"""
import argparse
parser = argparse.ArgumentParser()
Processor.add_args(parser)
# Test that it doesn't crash (no additional args)
args = parser.parse_args([])
# No specific assertions since no custom args are added
assert args is not None
@pytest.mark.unit
class TestStructuredQueryHelperFunctions:
"""Test helper functions and data transformations"""
def test_service_logging_integration(self):
"""Test that logging is properly configured"""
# Import the logger
from trustgraph.retrieval.structured_query.service import logger
assert logger.name == "trustgraph.retrieval.structured_query.service"
def test_default_values(self):
"""Test default configuration values"""
from trustgraph.retrieval.structured_query.service import default_ident
assert default_ident == "structured-query"