mirror of
https://github.com/trustgraph-ai/trustgraph.git
synced 2026-04-27 01:16:22 +02:00
parent
a8e437fc7f
commit
6c7af8789d
216 changed files with 31360 additions and 1611 deletions
77
tests/unit/test_retrieval/test_document_rag_service.py
Normal file
77
tests/unit/test_retrieval/test_document_rag_service.py
Normal file
|
|
@ -0,0 +1,77 @@
|
|||
"""
|
||||
Unit test for DocumentRAG service parameter passing fix.
|
||||
Tests that user and collection parameters from the message are correctly
|
||||
passed to the DocumentRag.query() method.
|
||||
"""
|
||||
|
||||
import pytest
|
||||
from unittest.mock import MagicMock, AsyncMock, patch
|
||||
|
||||
from trustgraph.retrieval.document_rag.rag import Processor
|
||||
from trustgraph.schema import DocumentRagQuery, DocumentRagResponse
|
||||
|
||||
|
||||
class TestDocumentRagService:
|
||||
"""Test DocumentRAG service parameter passing"""
|
||||
|
||||
@patch('trustgraph.retrieval.document_rag.rag.DocumentRag')
|
||||
@pytest.mark.asyncio
|
||||
async def test_user_and_collection_parameters_passed_to_query(self, mock_document_rag_class):
|
||||
"""
|
||||
Test that user and collection from message are passed to DocumentRag.query().
|
||||
|
||||
This is a regression test for the bug where user/collection parameters
|
||||
were ignored, causing wrong collection names like 'd_trustgraph_default_384'
|
||||
instead of 'd_my_user_test_coll_1_384'.
|
||||
"""
|
||||
# Setup processor
|
||||
processor = Processor(
|
||||
taskgroup=MagicMock(),
|
||||
id="test-processor",
|
||||
doc_limit=10
|
||||
)
|
||||
|
||||
# Setup mock DocumentRag instance
|
||||
mock_rag_instance = AsyncMock()
|
||||
mock_document_rag_class.return_value = mock_rag_instance
|
||||
mock_rag_instance.query.return_value = "test response"
|
||||
|
||||
# Setup message with custom user/collection
|
||||
msg = MagicMock()
|
||||
msg.value.return_value = DocumentRagQuery(
|
||||
query="test query",
|
||||
user="my_user", # Custom user (not default "trustgraph")
|
||||
collection="test_coll_1", # Custom collection (not default "default")
|
||||
doc_limit=5
|
||||
)
|
||||
msg.properties.return_value = {"id": "test-id"}
|
||||
|
||||
# Setup flow mock
|
||||
consumer = MagicMock()
|
||||
flow = MagicMock()
|
||||
|
||||
# Mock flow to return AsyncMock for clients and response producer
|
||||
mock_producer = AsyncMock()
|
||||
def flow_router(service_name):
|
||||
if service_name == "response":
|
||||
return mock_producer
|
||||
return AsyncMock() # embeddings, doc-embeddings, prompt clients
|
||||
flow.side_effect = flow_router
|
||||
|
||||
# Execute
|
||||
await processor.on_request(msg, consumer, flow)
|
||||
|
||||
# Verify: DocumentRag.query was called with correct parameters
|
||||
mock_rag_instance.query.assert_called_once_with(
|
||||
"test query",
|
||||
user="my_user", # Must be from message, not hardcoded default
|
||||
collection="test_coll_1", # Must be from message, not hardcoded default
|
||||
doc_limit=5
|
||||
)
|
||||
|
||||
# Verify response was sent
|
||||
mock_producer.send.assert_called_once()
|
||||
sent_response = mock_producer.send.call_args[0][0]
|
||||
assert isinstance(sent_response, DocumentRagResponse)
|
||||
assert sent_response.response == "test response"
|
||||
assert sent_response.error is None
|
||||
374
tests/unit/test_retrieval/test_nlp_query.py
Normal file
374
tests/unit/test_retrieval/test_nlp_query.py
Normal file
|
|
@ -0,0 +1,374 @@
|
|||
"""
|
||||
Unit tests for NLP Query service
|
||||
Following TEST_STRATEGY.md approach for service testing
|
||||
"""
|
||||
|
||||
import pytest
|
||||
import json
|
||||
from unittest.mock import AsyncMock, MagicMock, patch
|
||||
from typing import Dict, Any
|
||||
|
||||
from trustgraph.schema import (
|
||||
QuestionToStructuredQueryRequest, QuestionToStructuredQueryResponse,
|
||||
PromptRequest, PromptResponse, Error, RowSchema, Field as SchemaField
|
||||
)
|
||||
from trustgraph.retrieval.nlp_query.service import Processor
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_prompt_client():
|
||||
"""Mock prompt service client"""
|
||||
return AsyncMock()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_pulsar_client():
|
||||
"""Mock Pulsar client"""
|
||||
return AsyncMock()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def sample_schemas():
|
||||
"""Sample schemas for testing"""
|
||||
return {
|
||||
"customers": RowSchema(
|
||||
name="customers",
|
||||
description="Customer data",
|
||||
fields=[
|
||||
SchemaField(name="id", type="string", primary=True),
|
||||
SchemaField(name="name", type="string"),
|
||||
SchemaField(name="email", type="string"),
|
||||
SchemaField(name="state", type="string")
|
||||
]
|
||||
),
|
||||
"orders": RowSchema(
|
||||
name="orders",
|
||||
description="Order data",
|
||||
fields=[
|
||||
SchemaField(name="order_id", type="string", primary=True),
|
||||
SchemaField(name="customer_id", type="string"),
|
||||
SchemaField(name="total", type="float"),
|
||||
SchemaField(name="status", type="string")
|
||||
]
|
||||
)
|
||||
}
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def processor(mock_pulsar_client, sample_schemas):
|
||||
"""Create processor with mocked dependencies"""
|
||||
proc = Processor(
|
||||
taskgroup=MagicMock(),
|
||||
pulsar_client=mock_pulsar_client,
|
||||
config_type="schema"
|
||||
)
|
||||
|
||||
# Set up schemas
|
||||
proc.schemas = sample_schemas
|
||||
|
||||
# Mock the client method
|
||||
proc.client = MagicMock()
|
||||
|
||||
return proc
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
class TestNLPQueryProcessor:
|
||||
"""Test NLP Query service processor"""
|
||||
|
||||
async def test_phase1_select_schemas_success(self, processor, mock_prompt_client):
|
||||
"""Test successful schema selection (Phase 1)"""
|
||||
# Arrange
|
||||
question = "Show me customers from California"
|
||||
expected_schemas = ["customers"]
|
||||
|
||||
mock_response = PromptResponse(
|
||||
text=json.dumps(expected_schemas),
|
||||
error=None
|
||||
)
|
||||
|
||||
# Mock flow context
|
||||
flow = MagicMock()
|
||||
mock_prompt_service = AsyncMock()
|
||||
mock_prompt_service.request = AsyncMock(return_value=mock_response)
|
||||
flow.side_effect = lambda service_name: mock_prompt_service if service_name == "prompt-request" else AsyncMock()
|
||||
|
||||
# Act
|
||||
result = await processor.phase1_select_schemas(question, flow)
|
||||
|
||||
# Assert
|
||||
assert result == expected_schemas
|
||||
mock_prompt_service.request.assert_called_once()
|
||||
|
||||
async def test_phase1_select_schemas_prompt_error(self, processor):
|
||||
"""Test schema selection with prompt service error"""
|
||||
# Arrange
|
||||
question = "Show me customers"
|
||||
error = Error(type="prompt-error", message="Template not found")
|
||||
mock_response = PromptResponse(text="", error=error)
|
||||
|
||||
# Mock flow context
|
||||
flow = MagicMock()
|
||||
mock_prompt_service = AsyncMock()
|
||||
mock_prompt_service.request = AsyncMock(return_value=mock_response)
|
||||
flow.side_effect = lambda service_name: mock_prompt_service if service_name == "prompt-request" else AsyncMock()
|
||||
|
||||
# Act & Assert
|
||||
with pytest.raises(Exception, match="Prompt service error"):
|
||||
await processor.phase1_select_schemas(question, flow)
|
||||
|
||||
async def test_phase2_generate_graphql_success(self, processor):
|
||||
"""Test successful GraphQL generation (Phase 2)"""
|
||||
# Arrange
|
||||
question = "Show me customers from California"
|
||||
selected_schemas = ["customers"]
|
||||
expected_result = {
|
||||
"query": "query { customers(where: {state: {eq: \"California\"}}) { id name email state } }",
|
||||
"variables": {},
|
||||
"confidence": 0.95
|
||||
}
|
||||
|
||||
mock_response = PromptResponse(
|
||||
text=json.dumps(expected_result),
|
||||
error=None
|
||||
)
|
||||
|
||||
# Mock flow context
|
||||
flow = MagicMock()
|
||||
mock_prompt_service = AsyncMock()
|
||||
mock_prompt_service.request = AsyncMock(return_value=mock_response)
|
||||
flow.side_effect = lambda service_name: mock_prompt_service if service_name == "prompt-request" else AsyncMock()
|
||||
|
||||
# Act
|
||||
result = await processor.phase2_generate_graphql(question, selected_schemas, flow)
|
||||
|
||||
# Assert
|
||||
assert result == expected_result
|
||||
mock_prompt_service.request.assert_called_once()
|
||||
|
||||
async def test_phase2_generate_graphql_prompt_error(self, processor):
|
||||
"""Test GraphQL generation with prompt service error"""
|
||||
# Arrange
|
||||
question = "Show me customers"
|
||||
selected_schemas = ["customers"]
|
||||
error = Error(type="prompt-error", message="Generation failed")
|
||||
mock_response = PromptResponse(text="", error=error)
|
||||
|
||||
# Mock flow context
|
||||
flow = MagicMock()
|
||||
mock_prompt_service = AsyncMock()
|
||||
mock_prompt_service.request = AsyncMock(return_value=mock_response)
|
||||
flow.side_effect = lambda service_name: mock_prompt_service if service_name == "prompt-request" else AsyncMock()
|
||||
|
||||
# Act & Assert
|
||||
with pytest.raises(Exception, match="Prompt service error"):
|
||||
await processor.phase2_generate_graphql(question, selected_schemas, flow)
|
||||
|
||||
async def test_on_message_full_flow_success(self, processor):
|
||||
"""Test complete message processing flow"""
|
||||
# Arrange
|
||||
request = QuestionToStructuredQueryRequest(
|
||||
question="Show me customers from California",
|
||||
max_results=100
|
||||
)
|
||||
|
||||
msg = MagicMock()
|
||||
msg.value.return_value = request
|
||||
msg.properties.return_value = {"id": "test-123"}
|
||||
|
||||
consumer = MagicMock()
|
||||
flow = MagicMock()
|
||||
flow_response = AsyncMock()
|
||||
flow.return_value = flow_response
|
||||
|
||||
# Mock Phase 1 response
|
||||
phase1_response = PromptResponse(
|
||||
text=json.dumps(["customers"]),
|
||||
error=None
|
||||
)
|
||||
|
||||
# Mock Phase 2 response
|
||||
phase2_response = PromptResponse(
|
||||
text=json.dumps({
|
||||
"query": "query { customers(where: {state: {eq: \"California\"}}) { id name email } }",
|
||||
"variables": {},
|
||||
"confidence": 0.9
|
||||
}),
|
||||
error=None
|
||||
)
|
||||
|
||||
# Mock flow context to return prompt service responses
|
||||
mock_prompt_service = AsyncMock()
|
||||
mock_prompt_service.request = AsyncMock(
|
||||
side_effect=[phase1_response, phase2_response]
|
||||
)
|
||||
flow.side_effect = lambda service_name: mock_prompt_service if service_name == "prompt-request" else flow_response if service_name == "response" else AsyncMock()
|
||||
|
||||
# Act
|
||||
await processor.on_message(msg, consumer, flow)
|
||||
|
||||
# Assert
|
||||
assert mock_prompt_service.request.call_count == 2
|
||||
flow_response.send.assert_called_once()
|
||||
|
||||
# Verify response structure
|
||||
response_call = flow_response.send.call_args
|
||||
response = response_call[0][0] # First argument is the response object
|
||||
|
||||
assert isinstance(response, QuestionToStructuredQueryResponse)
|
||||
assert response.error is None
|
||||
assert "customers" in response.graphql_query
|
||||
assert response.detected_schemas == ["customers"]
|
||||
assert response.confidence == 0.9
|
||||
|
||||
async def test_on_message_phase1_error(self, processor):
|
||||
"""Test message processing with Phase 1 failure"""
|
||||
# Arrange
|
||||
request = QuestionToStructuredQueryRequest(
|
||||
question="Show me customers",
|
||||
max_results=100
|
||||
)
|
||||
|
||||
msg = MagicMock()
|
||||
msg.value.return_value = request
|
||||
msg.properties.return_value = {"id": "test-123"}
|
||||
|
||||
consumer = MagicMock()
|
||||
flow = MagicMock()
|
||||
flow_response = AsyncMock()
|
||||
flow.return_value = flow_response
|
||||
|
||||
# Mock Phase 1 error
|
||||
phase1_response = PromptResponse(
|
||||
text="",
|
||||
error=Error(type="template-error", message="Template not found")
|
||||
)
|
||||
|
||||
processor.client.return_value.request = AsyncMock(return_value=phase1_response)
|
||||
|
||||
# Act
|
||||
await processor.on_message(msg, consumer, flow)
|
||||
|
||||
# Assert
|
||||
flow_response.send.assert_called_once()
|
||||
|
||||
# Verify error response
|
||||
response_call = flow_response.send.call_args
|
||||
response = response_call[0][0]
|
||||
|
||||
assert isinstance(response, QuestionToStructuredQueryResponse)
|
||||
assert response.error is not None
|
||||
assert response.error.type == "nlp-query-error"
|
||||
assert "Prompt service error" in response.error.message
|
||||
|
||||
async def test_schema_config_loading(self, processor):
|
||||
"""Test schema configuration loading"""
|
||||
# Arrange
|
||||
config = {
|
||||
"schema": {
|
||||
"test_schema": json.dumps({
|
||||
"name": "test_schema",
|
||||
"description": "Test schema",
|
||||
"fields": [
|
||||
{
|
||||
"name": "id",
|
||||
"type": "string",
|
||||
"primary_key": True,
|
||||
"required": True
|
||||
},
|
||||
{
|
||||
"name": "name",
|
||||
"type": "string",
|
||||
"description": "User name"
|
||||
}
|
||||
]
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
# Act
|
||||
await processor.on_schema_config(config, "v1")
|
||||
|
||||
# Assert
|
||||
assert "test_schema" in processor.schemas
|
||||
schema = processor.schemas["test_schema"]
|
||||
assert schema.name == "test_schema"
|
||||
assert schema.description == "Test schema"
|
||||
assert len(schema.fields) == 2
|
||||
assert schema.fields[0].name == "id"
|
||||
assert schema.fields[0].primary == True
|
||||
assert schema.fields[1].name == "name"
|
||||
|
||||
async def test_schema_config_loading_invalid_json(self, processor):
|
||||
"""Test schema configuration loading with invalid JSON"""
|
||||
# Arrange
|
||||
config = {
|
||||
"schema": {
|
||||
"bad_schema": "invalid json{"
|
||||
}
|
||||
}
|
||||
|
||||
# Act
|
||||
await processor.on_schema_config(config, "v1")
|
||||
|
||||
# Assert - bad schema should be ignored
|
||||
assert "bad_schema" not in processor.schemas
|
||||
|
||||
def test_processor_initialization(self, mock_pulsar_client):
|
||||
"""Test processor initialization with correct specifications"""
|
||||
# Act
|
||||
processor = Processor(
|
||||
taskgroup=MagicMock(),
|
||||
pulsar_client=mock_pulsar_client,
|
||||
schema_selection_template="custom-schema-select",
|
||||
graphql_generation_template="custom-graphql-gen"
|
||||
)
|
||||
|
||||
# Assert
|
||||
assert processor.schema_selection_template == "custom-schema-select"
|
||||
assert processor.graphql_generation_template == "custom-graphql-gen"
|
||||
assert processor.config_key == "schema"
|
||||
assert processor.schemas == {}
|
||||
|
||||
def test_add_args(self):
|
||||
"""Test command-line argument parsing"""
|
||||
import argparse
|
||||
|
||||
parser = argparse.ArgumentParser()
|
||||
Processor.add_args(parser)
|
||||
|
||||
# Test default values
|
||||
args = parser.parse_args([])
|
||||
assert args.config_type == "schema"
|
||||
assert args.schema_selection_template == "schema-selection"
|
||||
assert args.graphql_generation_template == "graphql-generation"
|
||||
|
||||
# Test custom values
|
||||
args = parser.parse_args([
|
||||
"--config-type", "custom",
|
||||
"--schema-selection-template", "my-selector",
|
||||
"--graphql-generation-template", "my-generator"
|
||||
])
|
||||
assert args.config_type == "custom"
|
||||
assert args.schema_selection_template == "my-selector"
|
||||
assert args.graphql_generation_template == "my-generator"
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
class TestNLPQueryHelperFunctions:
|
||||
"""Test helper functions and data transformations"""
|
||||
|
||||
def test_schema_info_formatting(self, sample_schemas):
|
||||
"""Test schema info formatting for prompts"""
|
||||
# This would test any helper functions for formatting schema data
|
||||
# Currently the formatting is inline, but good to test if extracted
|
||||
|
||||
customers_schema = sample_schemas["customers"]
|
||||
expected_fields = ["id", "name", "email", "state"]
|
||||
|
||||
actual_fields = [f.name for f in customers_schema.fields]
|
||||
assert actual_fields == expected_fields
|
||||
|
||||
# Test primary key detection
|
||||
primary_fields = [f.name for f in customers_schema.fields if f.primary]
|
||||
assert primary_fields == ["id"]
|
||||
|
|
@ -0,0 +1,3 @@
|
|||
"""
|
||||
Unit and contract tests for structured-diag service
|
||||
"""
|
||||
|
|
@ -0,0 +1,172 @@
|
|||
"""
|
||||
Unit tests for message translation in structured-diag service
|
||||
"""
|
||||
|
||||
import pytest
|
||||
from trustgraph.messaging.translators.diagnosis import (
|
||||
StructuredDataDiagnosisRequestTranslator,
|
||||
StructuredDataDiagnosisResponseTranslator
|
||||
)
|
||||
from trustgraph.schema.services.diagnosis import (
|
||||
StructuredDataDiagnosisRequest,
|
||||
StructuredDataDiagnosisResponse
|
||||
)
|
||||
|
||||
|
||||
class TestRequestTranslation:
|
||||
"""Test request message translation"""
|
||||
|
||||
def test_translate_schema_selection_request(self):
|
||||
"""Test translating schema-selection request from API to Pulsar"""
|
||||
translator = StructuredDataDiagnosisRequestTranslator()
|
||||
|
||||
# API format (with hyphens)
|
||||
api_data = {
|
||||
"operation": "schema-selection",
|
||||
"sample": "test data sample",
|
||||
"options": {"filter": "catalog"}
|
||||
}
|
||||
|
||||
# Translate to Pulsar
|
||||
pulsar_msg = translator.to_pulsar(api_data)
|
||||
|
||||
assert pulsar_msg.operation == "schema-selection"
|
||||
assert pulsar_msg.sample == "test data sample"
|
||||
assert pulsar_msg.options == {"filter": "catalog"}
|
||||
|
||||
def test_translate_request_with_all_fields(self):
|
||||
"""Test translating request with all fields"""
|
||||
translator = StructuredDataDiagnosisRequestTranslator()
|
||||
|
||||
api_data = {
|
||||
"operation": "generate-descriptor",
|
||||
"sample": "csv data",
|
||||
"type": "csv",
|
||||
"schema-name": "products",
|
||||
"options": {"delimiter": ","}
|
||||
}
|
||||
|
||||
pulsar_msg = translator.to_pulsar(api_data)
|
||||
|
||||
assert pulsar_msg.operation == "generate-descriptor"
|
||||
assert pulsar_msg.sample == "csv data"
|
||||
assert pulsar_msg.type == "csv"
|
||||
assert pulsar_msg.schema_name == "products"
|
||||
assert pulsar_msg.options == {"delimiter": ","}
|
||||
|
||||
|
||||
class TestResponseTranslation:
|
||||
"""Test response message translation"""
|
||||
|
||||
def test_translate_schema_selection_response(self):
|
||||
"""Test translating schema-selection response from Pulsar to API"""
|
||||
translator = StructuredDataDiagnosisResponseTranslator()
|
||||
|
||||
# Create Pulsar response with schema_matches
|
||||
pulsar_response = StructuredDataDiagnosisResponse(
|
||||
operation="schema-selection",
|
||||
schema_matches=["products", "inventory", "catalog"],
|
||||
error=None
|
||||
)
|
||||
|
||||
# Translate to API format
|
||||
api_data = translator.from_pulsar(pulsar_response)
|
||||
|
||||
assert api_data["operation"] == "schema-selection"
|
||||
assert api_data["schema-matches"] == ["products", "inventory", "catalog"]
|
||||
assert "error" not in api_data # None errors shouldn't be included
|
||||
|
||||
def test_translate_empty_schema_matches(self):
|
||||
"""Test translating response with empty schema_matches"""
|
||||
translator = StructuredDataDiagnosisResponseTranslator()
|
||||
|
||||
pulsar_response = StructuredDataDiagnosisResponse(
|
||||
operation="schema-selection",
|
||||
schema_matches=[],
|
||||
error=None
|
||||
)
|
||||
|
||||
api_data = translator.from_pulsar(pulsar_response)
|
||||
|
||||
assert api_data["operation"] == "schema-selection"
|
||||
assert api_data["schema-matches"] == []
|
||||
|
||||
def test_translate_response_without_schema_matches(self):
|
||||
"""Test translating response without schema_matches field"""
|
||||
translator = StructuredDataDiagnosisResponseTranslator()
|
||||
|
||||
# Old-style response without schema_matches
|
||||
pulsar_response = StructuredDataDiagnosisResponse(
|
||||
operation="detect-type",
|
||||
detected_type="xml",
|
||||
confidence=0.9,
|
||||
error=None
|
||||
)
|
||||
|
||||
api_data = translator.from_pulsar(pulsar_response)
|
||||
|
||||
assert api_data["operation"] == "detect-type"
|
||||
assert api_data["detected-type"] == "xml"
|
||||
assert api_data["confidence"] == 0.9
|
||||
assert "schema-matches" not in api_data # None values shouldn't be included
|
||||
|
||||
def test_translate_response_with_error(self):
|
||||
"""Test translating response with error"""
|
||||
translator = StructuredDataDiagnosisResponseTranslator()
|
||||
from trustgraph.schema.core.primitives import Error
|
||||
|
||||
pulsar_response = StructuredDataDiagnosisResponse(
|
||||
operation="schema-selection",
|
||||
error=Error(
|
||||
type="PromptServiceError",
|
||||
message="Service unavailable"
|
||||
)
|
||||
)
|
||||
|
||||
api_data = translator.from_pulsar(pulsar_response)
|
||||
|
||||
assert api_data["operation"] == "schema-selection"
|
||||
# Error objects are typically handled separately by the gateway
|
||||
# but the translator shouldn't break on them
|
||||
|
||||
def test_translate_all_response_fields(self):
|
||||
"""Test translating response with all possible fields"""
|
||||
translator = StructuredDataDiagnosisResponseTranslator()
|
||||
import json
|
||||
|
||||
descriptor_data = {"mapping": {"field1": "column1"}}
|
||||
|
||||
pulsar_response = StructuredDataDiagnosisResponse(
|
||||
operation="diagnose",
|
||||
detected_type="csv",
|
||||
confidence=0.95,
|
||||
descriptor=json.dumps(descriptor_data),
|
||||
metadata={"field_count": "5"},
|
||||
schema_matches=["schema1", "schema2"],
|
||||
error=None
|
||||
)
|
||||
|
||||
api_data = translator.from_pulsar(pulsar_response)
|
||||
|
||||
assert api_data["operation"] == "diagnose"
|
||||
assert api_data["detected-type"] == "csv"
|
||||
assert api_data["confidence"] == 0.95
|
||||
assert api_data["descriptor"] == descriptor_data # Should be parsed from JSON
|
||||
assert api_data["metadata"] == {"field_count": "5"}
|
||||
assert api_data["schema-matches"] == ["schema1", "schema2"]
|
||||
|
||||
def test_response_completion_flag(self):
|
||||
"""Test that response includes completion flag"""
|
||||
translator = StructuredDataDiagnosisResponseTranslator()
|
||||
|
||||
pulsar_response = StructuredDataDiagnosisResponse(
|
||||
operation="schema-selection",
|
||||
schema_matches=["products"],
|
||||
error=None
|
||||
)
|
||||
|
||||
api_data, is_final = translator.from_response_with_completion(pulsar_response)
|
||||
|
||||
assert is_final is True # Structured-diag responses are always final
|
||||
assert api_data["operation"] == "schema-selection"
|
||||
assert api_data["schema-matches"] == ["products"]
|
||||
|
|
@ -0,0 +1,258 @@
|
|||
"""
|
||||
Contract tests for structured-diag service schemas
|
||||
"""
|
||||
|
||||
import pytest
|
||||
import json
|
||||
from pulsar.schema import JsonSchema
|
||||
from trustgraph.schema.services.diagnosis import (
|
||||
StructuredDataDiagnosisRequest,
|
||||
StructuredDataDiagnosisResponse
|
||||
)
|
||||
|
||||
|
||||
class TestStructuredDiagnosisSchemaContract:
|
||||
"""Contract tests for structured diagnosis message schemas"""
|
||||
|
||||
def test_request_schema_basic_fields(self):
|
||||
"""Test basic request schema fields"""
|
||||
request = StructuredDataDiagnosisRequest(
|
||||
operation="detect-type",
|
||||
sample="test data"
|
||||
)
|
||||
|
||||
assert request.operation == "detect-type"
|
||||
assert request.sample == "test data"
|
||||
assert request.type is None # Optional, defaults to None
|
||||
assert request.schema_name is None # Optional, defaults to None
|
||||
assert request.options is None # Optional, defaults to None
|
||||
|
||||
def test_request_schema_all_operations(self):
|
||||
"""Test request schema supports all operations"""
|
||||
operations = ["detect-type", "generate-descriptor", "diagnose", "schema-selection"]
|
||||
|
||||
for op in operations:
|
||||
request = StructuredDataDiagnosisRequest(
|
||||
operation=op,
|
||||
sample="test data"
|
||||
)
|
||||
assert request.operation == op
|
||||
|
||||
def test_request_schema_with_options(self):
|
||||
"""Test request schema with options"""
|
||||
options = {"delimiter": ",", "has_header": "true"}
|
||||
request = StructuredDataDiagnosisRequest(
|
||||
operation="generate-descriptor",
|
||||
sample="test data",
|
||||
type="csv",
|
||||
schema_name="products",
|
||||
options=options
|
||||
)
|
||||
|
||||
assert request.options == options
|
||||
assert request.type == "csv"
|
||||
assert request.schema_name == "products"
|
||||
|
||||
def test_response_schema_basic_fields(self):
|
||||
"""Test basic response schema fields"""
|
||||
response = StructuredDataDiagnosisResponse(
|
||||
operation="detect-type",
|
||||
detected_type="xml",
|
||||
confidence=0.9,
|
||||
error=None # Explicitly set to None
|
||||
)
|
||||
|
||||
assert response.operation == "detect-type"
|
||||
assert response.detected_type == "xml"
|
||||
assert response.confidence == 0.9
|
||||
assert response.error is None
|
||||
assert response.descriptor is None
|
||||
assert response.metadata is None
|
||||
assert response.schema_matches is None # New field, defaults to None
|
||||
|
||||
def test_response_schema_with_error(self):
|
||||
"""Test response schema with error"""
|
||||
from trustgraph.schema.core.primitives import Error
|
||||
|
||||
error = Error(
|
||||
type="ServiceError",
|
||||
message="Service unavailable"
|
||||
)
|
||||
response = StructuredDataDiagnosisResponse(
|
||||
operation="schema-selection",
|
||||
error=error
|
||||
)
|
||||
|
||||
assert response.error == error
|
||||
assert response.error.type == "ServiceError"
|
||||
assert response.error.message == "Service unavailable"
|
||||
|
||||
def test_response_schema_with_schema_matches(self):
|
||||
"""Test response schema with schema_matches array"""
|
||||
matches = ["products", "inventory", "catalog"]
|
||||
response = StructuredDataDiagnosisResponse(
|
||||
operation="schema-selection",
|
||||
schema_matches=matches
|
||||
)
|
||||
|
||||
assert response.operation == "schema-selection"
|
||||
assert response.schema_matches == matches
|
||||
assert len(response.schema_matches) == 3
|
||||
|
||||
def test_response_schema_empty_schema_matches(self):
|
||||
"""Test response schema with empty schema_matches array"""
|
||||
response = StructuredDataDiagnosisResponse(
|
||||
operation="schema-selection",
|
||||
schema_matches=[]
|
||||
)
|
||||
|
||||
assert response.schema_matches == []
|
||||
assert isinstance(response.schema_matches, list)
|
||||
|
||||
def test_response_schema_with_descriptor(self):
|
||||
"""Test response schema with descriptor"""
|
||||
descriptor = {
|
||||
"mapping": {
|
||||
"field1": "column1",
|
||||
"field2": "column2"
|
||||
}
|
||||
}
|
||||
response = StructuredDataDiagnosisResponse(
|
||||
operation="generate-descriptor",
|
||||
descriptor=json.dumps(descriptor)
|
||||
)
|
||||
|
||||
assert response.descriptor == json.dumps(descriptor)
|
||||
parsed = json.loads(response.descriptor)
|
||||
assert parsed["mapping"]["field1"] == "column1"
|
||||
|
||||
def test_response_schema_with_metadata(self):
|
||||
"""Test response schema with metadata"""
|
||||
metadata = {
|
||||
"csv_options": json.dumps({"delimiter": ","}),
|
||||
"field_count": "5"
|
||||
}
|
||||
response = StructuredDataDiagnosisResponse(
|
||||
operation="diagnose",
|
||||
metadata=metadata
|
||||
)
|
||||
|
||||
assert response.metadata == metadata
|
||||
assert response.metadata["field_count"] == "5"
|
||||
|
||||
def test_schema_serialization(self):
|
||||
"""Test that schemas can be serialized and deserialized correctly"""
|
||||
# Test request serialization
|
||||
request = StructuredDataDiagnosisRequest(
|
||||
operation="schema-selection",
|
||||
sample="test data",
|
||||
options={"key": "value"}
|
||||
)
|
||||
|
||||
# Simulate Pulsar JsonSchema serialization
|
||||
schema = JsonSchema(StructuredDataDiagnosisRequest)
|
||||
serialized = schema.encode(request)
|
||||
deserialized = schema.decode(serialized)
|
||||
|
||||
assert deserialized.operation == request.operation
|
||||
assert deserialized.sample == request.sample
|
||||
assert deserialized.options == request.options
|
||||
|
||||
def test_response_serialization_with_schema_matches(self):
|
||||
"""Test response serialization with schema_matches array"""
|
||||
response = StructuredDataDiagnosisResponse(
|
||||
operation="schema-selection",
|
||||
schema_matches=["schema1", "schema2"],
|
||||
confidence=0.85
|
||||
)
|
||||
|
||||
# Simulate Pulsar JsonSchema serialization
|
||||
schema = JsonSchema(StructuredDataDiagnosisResponse)
|
||||
serialized = schema.encode(response)
|
||||
deserialized = schema.decode(serialized)
|
||||
|
||||
assert deserialized.operation == response.operation
|
||||
assert deserialized.schema_matches == response.schema_matches
|
||||
assert deserialized.confidence == response.confidence
|
||||
|
||||
def test_backwards_compatibility(self):
|
||||
"""Test that old clients can still use the service without schema_matches"""
|
||||
# Old response without schema_matches should still work
|
||||
response = StructuredDataDiagnosisResponse(
|
||||
operation="detect-type",
|
||||
detected_type="json",
|
||||
confidence=0.95
|
||||
)
|
||||
|
||||
# Verify default value for new field
|
||||
assert response.schema_matches is None # Defaults to None when not set
|
||||
|
||||
# Verify old fields still work
|
||||
assert response.detected_type == "json"
|
||||
assert response.confidence == 0.95
|
||||
|
||||
def test_schema_selection_operation_contract(self):
|
||||
"""Test complete contract for schema-selection operation"""
|
||||
# Request
|
||||
request = StructuredDataDiagnosisRequest(
|
||||
operation="schema-selection",
|
||||
sample="product_id,name,price\n1,Widget,9.99"
|
||||
)
|
||||
|
||||
assert request.operation == "schema-selection"
|
||||
assert request.sample != ""
|
||||
|
||||
# Response with matches
|
||||
response = StructuredDataDiagnosisResponse(
|
||||
operation="schema-selection",
|
||||
schema_matches=["products", "inventory"]
|
||||
)
|
||||
|
||||
assert response.operation == "schema-selection"
|
||||
assert isinstance(response.schema_matches, list)
|
||||
assert len(response.schema_matches) == 2
|
||||
assert all(isinstance(s, str) for s in response.schema_matches)
|
||||
|
||||
# Response with error
|
||||
from trustgraph.schema.core.primitives import Error
|
||||
error_response = StructuredDataDiagnosisResponse(
|
||||
operation="schema-selection",
|
||||
error=Error(type="PromptServiceError", message="Service unavailable")
|
||||
)
|
||||
|
||||
assert error_response.error is not None
|
||||
assert error_response.schema_matches is None # Default None when not set
|
||||
|
||||
def test_all_operations_supported(self):
|
||||
"""Verify all operations are properly supported in the contract"""
|
||||
supported_operations = {
|
||||
"detect-type": {
|
||||
"required_request": ["sample"],
|
||||
"expected_response": ["detected_type", "confidence"]
|
||||
},
|
||||
"generate-descriptor": {
|
||||
"required_request": ["sample", "type", "schema_name"],
|
||||
"expected_response": ["descriptor"]
|
||||
},
|
||||
"diagnose": {
|
||||
"required_request": ["sample"],
|
||||
"expected_response": ["detected_type", "confidence", "descriptor"]
|
||||
},
|
||||
"schema-selection": {
|
||||
"required_request": ["sample"],
|
||||
"expected_response": ["schema_matches"]
|
||||
}
|
||||
}
|
||||
|
||||
for operation, contract in supported_operations.items():
|
||||
# Test request creation
|
||||
request_data = {"operation": operation}
|
||||
for field in contract["required_request"]:
|
||||
request_data[field] = "test_value"
|
||||
|
||||
request = StructuredDataDiagnosisRequest(**request_data)
|
||||
assert request.operation == operation
|
||||
|
||||
# Test response creation
|
||||
response = StructuredDataDiagnosisResponse(operation=operation)
|
||||
assert response.operation == operation
|
||||
|
|
@ -0,0 +1,361 @@
|
|||
"""
|
||||
Unit tests for structured-diag service schema-selection operation
|
||||
"""
|
||||
|
||||
import pytest
|
||||
import json
|
||||
from unittest.mock import AsyncMock, MagicMock, patch
|
||||
from trustgraph.retrieval.structured_diag.service import Processor
|
||||
from trustgraph.schema.services.diagnosis import StructuredDataDiagnosisRequest, StructuredDataDiagnosisResponse
|
||||
from trustgraph.schema import RowSchema, Field as SchemaField, Error
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_schemas():
|
||||
"""Create mock schemas for testing"""
|
||||
schemas = {
|
||||
"products": RowSchema(
|
||||
name="products",
|
||||
description="Product catalog schema",
|
||||
fields=[
|
||||
SchemaField(
|
||||
name="product_id",
|
||||
type="string",
|
||||
description="Product identifier",
|
||||
required=True,
|
||||
primary=True,
|
||||
indexed=True
|
||||
),
|
||||
SchemaField(
|
||||
name="name",
|
||||
type="string",
|
||||
description="Product name",
|
||||
required=True
|
||||
),
|
||||
SchemaField(
|
||||
name="price",
|
||||
type="number",
|
||||
description="Product price",
|
||||
required=True
|
||||
)
|
||||
]
|
||||
),
|
||||
"customers": RowSchema(
|
||||
name="customers",
|
||||
description="Customer database schema",
|
||||
fields=[
|
||||
SchemaField(
|
||||
name="customer_id",
|
||||
type="string",
|
||||
description="Customer identifier",
|
||||
required=True,
|
||||
primary=True
|
||||
),
|
||||
SchemaField(
|
||||
name="name",
|
||||
type="string",
|
||||
description="Customer name",
|
||||
required=True
|
||||
),
|
||||
SchemaField(
|
||||
name="email",
|
||||
type="string",
|
||||
description="Customer email",
|
||||
required=True
|
||||
)
|
||||
]
|
||||
),
|
||||
"orders": RowSchema(
|
||||
name="orders",
|
||||
description="Order management schema",
|
||||
fields=[
|
||||
SchemaField(
|
||||
name="order_id",
|
||||
type="string",
|
||||
description="Order identifier",
|
||||
required=True,
|
||||
primary=True
|
||||
),
|
||||
SchemaField(
|
||||
name="customer_id",
|
||||
type="string",
|
||||
description="Customer identifier",
|
||||
required=True
|
||||
),
|
||||
SchemaField(
|
||||
name="total",
|
||||
type="number",
|
||||
description="Order total",
|
||||
required=True
|
||||
)
|
||||
]
|
||||
)
|
||||
}
|
||||
return schemas
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def service(mock_schemas):
|
||||
"""Create service instance with mock configuration"""
|
||||
service = Processor(
|
||||
taskgroup=MagicMock(),
|
||||
id="test-processor"
|
||||
)
|
||||
service.schemas = mock_schemas
|
||||
return service
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_flow():
|
||||
"""Create mock flow with prompt service"""
|
||||
flow = MagicMock()
|
||||
prompt_request_flow = AsyncMock()
|
||||
flow.return_value.request = prompt_request_flow
|
||||
return flow, prompt_request_flow
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_schema_selection_success(service, mock_flow):
|
||||
"""Test successful schema selection"""
|
||||
flow, prompt_request_flow = mock_flow
|
||||
|
||||
# Mock prompt service response with matching schemas
|
||||
mock_response = MagicMock()
|
||||
mock_response.error = None
|
||||
mock_response.text = '["products", "orders"]'
|
||||
mock_response.object = None # Explicitly set to None
|
||||
prompt_request_flow.return_value = mock_response
|
||||
|
||||
# Create request
|
||||
request = StructuredDataDiagnosisRequest(
|
||||
operation="schema-selection",
|
||||
sample="product_id,name,price,quantity\nPROD001,Widget,19.99,5"
|
||||
)
|
||||
|
||||
# Execute operation
|
||||
response = await service.schema_selection_operation(request, flow)
|
||||
|
||||
# Verify response
|
||||
assert response.error is None
|
||||
assert response.operation == "schema-selection"
|
||||
assert response.schema_matches == ["products", "orders"]
|
||||
|
||||
# Verify prompt service was called correctly
|
||||
prompt_request_flow.assert_called_once()
|
||||
call_args = prompt_request_flow.call_args[0][0]
|
||||
assert call_args.id == "schema-selection"
|
||||
|
||||
# Check that all schemas were passed to prompt
|
||||
terms = call_args.terms
|
||||
schemas_data = json.loads(terms["schemas"])
|
||||
assert len(schemas_data) == 3 # All 3 schemas
|
||||
assert any(s["name"] == "products" for s in schemas_data)
|
||||
assert any(s["name"] == "customers" for s in schemas_data)
|
||||
assert any(s["name"] == "orders" for s in schemas_data)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_schema_selection_empty_response(service, mock_flow):
|
||||
"""Test handling of empty prompt service response"""
|
||||
flow, prompt_request_flow = mock_flow
|
||||
|
||||
# Mock empty response from prompt service
|
||||
mock_response = MagicMock()
|
||||
mock_response.error = None
|
||||
mock_response.text = ""
|
||||
mock_response.object = "" # Both fields empty
|
||||
prompt_request_flow.return_value = mock_response
|
||||
|
||||
# Create request
|
||||
request = StructuredDataDiagnosisRequest(
|
||||
operation="schema-selection",
|
||||
sample="test data"
|
||||
)
|
||||
|
||||
# Execute operation
|
||||
response = await service.schema_selection_operation(request, flow)
|
||||
|
||||
# Verify error response
|
||||
assert response.error is not None
|
||||
assert response.error.type == "PromptServiceError"
|
||||
assert "Empty response" in response.error.message
|
||||
assert response.operation == "schema-selection"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_schema_selection_prompt_error(service, mock_flow):
|
||||
"""Test handling of prompt service error"""
|
||||
flow, prompt_request_flow = mock_flow
|
||||
|
||||
# Mock error response from prompt service
|
||||
mock_response = MagicMock()
|
||||
mock_response.error = Error(
|
||||
type="ServiceError",
|
||||
message="Prompt service unavailable"
|
||||
)
|
||||
mock_response.text = None
|
||||
prompt_request_flow.return_value = mock_response
|
||||
|
||||
# Create request
|
||||
request = StructuredDataDiagnosisRequest(
|
||||
operation="schema-selection",
|
||||
sample="test data"
|
||||
)
|
||||
|
||||
# Execute operation
|
||||
response = await service.schema_selection_operation(request, flow)
|
||||
|
||||
# Verify error response
|
||||
assert response.error is not None
|
||||
assert response.error.type == "PromptServiceError"
|
||||
assert "Failed to select schemas" in response.error.message
|
||||
assert response.operation == "schema-selection"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_schema_selection_invalid_json(service, mock_flow):
|
||||
"""Test handling of invalid JSON response from prompt service"""
|
||||
flow, prompt_request_flow = mock_flow
|
||||
|
||||
# Mock invalid JSON response
|
||||
mock_response = MagicMock()
|
||||
mock_response.error = None
|
||||
mock_response.text = "not valid json"
|
||||
mock_response.object = None
|
||||
prompt_request_flow.return_value = mock_response
|
||||
|
||||
# Create request
|
||||
request = StructuredDataDiagnosisRequest(
|
||||
operation="schema-selection",
|
||||
sample="test data"
|
||||
)
|
||||
|
||||
# Execute operation
|
||||
response = await service.schema_selection_operation(request, flow)
|
||||
|
||||
# Verify error response
|
||||
assert response.error is not None
|
||||
assert response.error.type == "ParseError"
|
||||
assert "Failed to parse schema selection response" in response.error.message
|
||||
assert response.operation == "schema-selection"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_schema_selection_non_array_response(service, mock_flow):
|
||||
"""Test handling of non-array JSON response from prompt service"""
|
||||
flow, prompt_request_flow = mock_flow
|
||||
|
||||
# Mock non-array JSON response
|
||||
mock_response = MagicMock()
|
||||
mock_response.error = None
|
||||
mock_response.text = '{"schema": "products"}' # Object instead of array
|
||||
mock_response.object = None
|
||||
prompt_request_flow.return_value = mock_response
|
||||
|
||||
# Create request
|
||||
request = StructuredDataDiagnosisRequest(
|
||||
operation="schema-selection",
|
||||
sample="test data"
|
||||
)
|
||||
|
||||
# Execute operation
|
||||
response = await service.schema_selection_operation(request, flow)
|
||||
|
||||
# Verify error response
|
||||
assert response.error is not None
|
||||
assert response.error.type == "ParseError"
|
||||
assert "Failed to parse schema selection response" in response.error.message
|
||||
assert response.operation == "schema-selection"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_schema_selection_with_options(service, mock_flow):
|
||||
"""Test schema selection with additional options"""
|
||||
flow, prompt_request_flow = mock_flow
|
||||
|
||||
# Mock successful response
|
||||
mock_response = MagicMock()
|
||||
mock_response.error = None
|
||||
mock_response.text = '["products"]'
|
||||
mock_response.object = None
|
||||
prompt_request_flow.return_value = mock_response
|
||||
|
||||
# Create request with options
|
||||
request = StructuredDataDiagnosisRequest(
|
||||
operation="schema-selection",
|
||||
sample="test data",
|
||||
options={"filter": "catalog", "confidence": "high"}
|
||||
)
|
||||
|
||||
# Execute operation
|
||||
response = await service.schema_selection_operation(request, flow)
|
||||
|
||||
# Verify response
|
||||
assert response.error is None
|
||||
assert response.schema_matches == ["products"]
|
||||
|
||||
# Verify options were passed to prompt
|
||||
call_args = prompt_request_flow.call_args[0][0]
|
||||
terms = call_args.terms
|
||||
options = json.loads(terms["options"])
|
||||
assert options["filter"] == "catalog"
|
||||
assert options["confidence"] == "high"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_schema_selection_exception_handling(service, mock_flow):
|
||||
"""Test handling of unexpected exceptions"""
|
||||
flow, prompt_request_flow = mock_flow
|
||||
|
||||
# Mock exception during prompt service call
|
||||
prompt_request_flow.side_effect = Exception("Unexpected error")
|
||||
|
||||
# Create request
|
||||
request = StructuredDataDiagnosisRequest(
|
||||
operation="schema-selection",
|
||||
sample="test data"
|
||||
)
|
||||
|
||||
# Execute operation
|
||||
response = await service.schema_selection_operation(request, flow)
|
||||
|
||||
# Verify error response
|
||||
assert response.error is not None
|
||||
assert response.error.type == "PromptServiceError"
|
||||
assert "Failed to select schemas" in response.error.message
|
||||
assert response.operation == "schema-selection"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_schema_selection_empty_schemas(service, mock_flow):
|
||||
"""Test schema selection with no schemas configured"""
|
||||
flow, prompt_request_flow = mock_flow
|
||||
|
||||
# Clear schemas
|
||||
service.schemas = {}
|
||||
|
||||
# Mock response (shouldn't be reached)
|
||||
mock_response = MagicMock()
|
||||
mock_response.error = None
|
||||
mock_response.text = '[]'
|
||||
mock_response.object = None
|
||||
prompt_request_flow.return_value = mock_response
|
||||
|
||||
# Create request
|
||||
request = StructuredDataDiagnosisRequest(
|
||||
operation="schema-selection",
|
||||
sample="test data"
|
||||
)
|
||||
|
||||
# Execute operation
|
||||
response = await service.schema_selection_operation(request, flow)
|
||||
|
||||
# Should still succeed but with empty schemas array passed to prompt
|
||||
assert response.error is None
|
||||
assert response.schema_matches == []
|
||||
|
||||
# Verify empty schemas array was passed
|
||||
call_args = prompt_request_flow.call_args[0][0]
|
||||
terms = call_args.terms
|
||||
schemas_data = json.loads(terms["schemas"])
|
||||
assert len(schemas_data) == 0
|
||||
|
|
@ -0,0 +1,179 @@
|
|||
"""
|
||||
Unit tests for simplified type detection in structured-diag service
|
||||
"""
|
||||
|
||||
import pytest
|
||||
from trustgraph.retrieval.structured_diag.type_detector import detect_data_type
|
||||
|
||||
|
||||
class TestSimplifiedTypeDetection:
|
||||
"""Test the simplified type detection logic"""
|
||||
|
||||
def test_xml_detection_with_declaration(self):
|
||||
"""Test XML detection with XML declaration"""
|
||||
sample = '<?xml version="1.0"?><root><item>data</item></root>'
|
||||
data_type, confidence = detect_data_type(sample)
|
||||
assert data_type == "xml"
|
||||
assert confidence == 0.9
|
||||
|
||||
def test_xml_detection_without_declaration(self):
|
||||
"""Test XML detection without declaration but with closing tags"""
|
||||
sample = '<root><item>data</item></root>'
|
||||
data_type, confidence = detect_data_type(sample)
|
||||
assert data_type == "xml"
|
||||
assert confidence == 0.9
|
||||
|
||||
def test_xml_detection_truncated(self):
|
||||
"""Test XML detection with truncated XML (common with 500-byte samples)"""
|
||||
sample = '''<?xml version="1.0" encoding="UTF-8"?>
|
||||
<pieDataset>
|
||||
<pies>
|
||||
<pie id="1">
|
||||
<pieType>Steak & Kidney</pieType>
|
||||
<region>Yorkshire</region>
|
||||
<diameterCm>12.5</diameterCm>
|
||||
<heightCm>4.2''' # Truncated mid-element
|
||||
data_type, confidence = detect_data_type(sample)
|
||||
assert data_type == "xml"
|
||||
assert confidence == 0.9
|
||||
|
||||
def test_json_object_detection(self):
|
||||
"""Test JSON object detection"""
|
||||
sample = '{"name": "John", "age": 30, "city": "New York"}'
|
||||
data_type, confidence = detect_data_type(sample)
|
||||
assert data_type == "json"
|
||||
assert confidence == 0.9
|
||||
|
||||
def test_json_array_detection(self):
|
||||
"""Test JSON array detection"""
|
||||
sample = '[{"id": 1}, {"id": 2}, {"id": 3}]'
|
||||
data_type, confidence = detect_data_type(sample)
|
||||
assert data_type == "json"
|
||||
assert confidence == 0.9
|
||||
|
||||
def test_json_truncated(self):
|
||||
"""Test JSON detection with truncated JSON"""
|
||||
sample = '{"products": [{"id": 1, "name": "Widget", "price": 19.99}, {"id": 2, "na'
|
||||
data_type, confidence = detect_data_type(sample)
|
||||
assert data_type == "json"
|
||||
assert confidence == 0.9
|
||||
|
||||
def test_csv_detection(self):
|
||||
"""Test CSV detection as fallback"""
|
||||
sample = '''name,age,city
|
||||
John,30,New York
|
||||
Jane,25,Boston
|
||||
Bob,35,Chicago'''
|
||||
data_type, confidence = detect_data_type(sample)
|
||||
assert data_type == "csv"
|
||||
assert confidence == 0.8
|
||||
|
||||
def test_csv_detection_single_line(self):
|
||||
"""Test CSV detection with single line defaults to CSV"""
|
||||
sample = 'column1,column2,column3'
|
||||
data_type, confidence = detect_data_type(sample)
|
||||
assert data_type == "csv"
|
||||
assert confidence == 0.8
|
||||
|
||||
def test_empty_input(self):
|
||||
"""Test empty input handling"""
|
||||
data_type, confidence = detect_data_type("")
|
||||
assert data_type is None
|
||||
assert confidence == 0.0
|
||||
|
||||
def test_whitespace_only(self):
|
||||
"""Test whitespace-only input"""
|
||||
data_type, confidence = detect_data_type(" \n \t ")
|
||||
assert data_type is None
|
||||
assert confidence == 0.0
|
||||
|
||||
def test_html_not_xml(self):
|
||||
"""Test HTML is detected as XML (has closing tags)"""
|
||||
sample = '<html><body><h1>Title</h1></body></html>'
|
||||
data_type, confidence = detect_data_type(sample)
|
||||
assert data_type == "xml" # HTML is detected as XML
|
||||
assert confidence == 0.9
|
||||
|
||||
def test_malformed_xml_still_detected(self):
|
||||
"""Test malformed XML is still detected as XML"""
|
||||
sample = '<root><item>data</item><unclosed>'
|
||||
data_type, confidence = detect_data_type(sample)
|
||||
assert data_type == "xml"
|
||||
assert confidence == 0.9
|
||||
|
||||
def test_json_with_whitespace(self):
|
||||
"""Test JSON detection with leading whitespace"""
|
||||
sample = ' \n {"key": "value"}'
|
||||
data_type, confidence = detect_data_type(sample)
|
||||
assert data_type == "json"
|
||||
assert confidence == 0.9
|
||||
|
||||
def test_priority_xml_over_csv(self):
|
||||
"""Test XML takes priority over CSV when both patterns present"""
|
||||
sample = '<?xml version="1.0"?>\n<data>a,b,c</data>'
|
||||
data_type, confidence = detect_data_type(sample)
|
||||
assert data_type == "xml"
|
||||
assert confidence == 0.9
|
||||
|
||||
def test_priority_json_over_csv(self):
|
||||
"""Test JSON takes priority over CSV when both patterns present"""
|
||||
sample = '{"data": "a,b,c"}'
|
||||
data_type, confidence = detect_data_type(sample)
|
||||
assert data_type == "json"
|
||||
assert confidence == 0.9
|
||||
|
||||
def test_text_defaults_to_csv(self):
|
||||
"""Test plain text defaults to CSV"""
|
||||
sample = 'This is just plain text without any structure'
|
||||
data_type, confidence = detect_data_type(sample)
|
||||
assert data_type == "csv"
|
||||
assert confidence == 0.8
|
||||
|
||||
|
||||
class TestRealWorldSamples:
|
||||
"""Test with real-world data samples"""
|
||||
|
||||
def test_uk_pies_xml_sample(self):
|
||||
"""Test with actual UK pies XML sample (first 500 bytes)"""
|
||||
sample = '''<?xml version="1.0" encoding="UTF-8"?>
|
||||
<pieDataset>
|
||||
<pies>
|
||||
<pie id="1">
|
||||
<pieType>Steak & Kidney</pieType>
|
||||
<region>Yorkshire</region>
|
||||
<diameterCm>12.5</diameterCm>
|
||||
<heightCm>4.2</heightCm>
|
||||
<weightGrams>285</weightGrams>
|
||||
<crustType>Shortcrust</crustType>
|
||||
<fillingCategory>Meat</fillingCategory>
|
||||
<price>3.50</price>
|
||||
<currency>GBP</currency>
|
||||
<bakeryType>Traditional</bakeryType>
|
||||
</pie>
|
||||
<pie id="2">
|
||||
<pieType>Chicken & Mushroom</pieType>
|
||||
<region>Lancashire</regio''' # Cut at 500 chars
|
||||
data_type, confidence = detect_data_type(sample[:500])
|
||||
assert data_type == "xml"
|
||||
assert confidence == 0.9
|
||||
|
||||
def test_product_json_sample(self):
|
||||
"""Test with product catalog JSON sample"""
|
||||
sample = '''{"products": [
|
||||
{"id": "PROD001", "name": "Widget", "price": 19.99, "category": "Tools"},
|
||||
{"id": "PROD002", "name": "Gadget", "price": 29.99, "category": "Electronics"},
|
||||
{"id": "PROD003", "name": "Doohickey", "price": 9.99, "category": "Accessories"}
|
||||
]}'''
|
||||
data_type, confidence = detect_data_type(sample)
|
||||
assert data_type == "json"
|
||||
assert confidence == 0.9
|
||||
|
||||
def test_customer_csv_sample(self):
|
||||
"""Test with customer CSV sample"""
|
||||
sample = '''customer_id,name,email,signup_date,total_orders
|
||||
CUST001,John Smith,john@example.com,2023-01-15,5
|
||||
CUST002,Jane Doe,jane@example.com,2023-02-20,3
|
||||
CUST003,Bob Johnson,bob@example.com,2023-03-10,7'''
|
||||
data_type, confidence = detect_data_type(sample)
|
||||
assert data_type == "csv"
|
||||
assert confidence == 0.8
|
||||
588
tests/unit/test_retrieval/test_structured_query.py
Normal file
588
tests/unit/test_retrieval/test_structured_query.py
Normal file
|
|
@ -0,0 +1,588 @@
|
|||
"""
|
||||
Unit tests for Structured Query Service
|
||||
Following TEST_STRATEGY.md approach for service testing
|
||||
"""
|
||||
|
||||
import pytest
|
||||
import json
|
||||
from unittest.mock import AsyncMock, MagicMock, patch
|
||||
|
||||
from trustgraph.schema import (
|
||||
StructuredQueryRequest, StructuredQueryResponse,
|
||||
QuestionToStructuredQueryRequest, QuestionToStructuredQueryResponse,
|
||||
ObjectsQueryRequest, ObjectsQueryResponse,
|
||||
Error, GraphQLError
|
||||
)
|
||||
from trustgraph.retrieval.structured_query.service import Processor
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_pulsar_client():
|
||||
"""Mock Pulsar client"""
|
||||
return AsyncMock()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def processor(mock_pulsar_client):
|
||||
"""Create processor with mocked dependencies"""
|
||||
proc = Processor(
|
||||
taskgroup=MagicMock(),
|
||||
pulsar_client=mock_pulsar_client
|
||||
)
|
||||
|
||||
# Mock the client method
|
||||
proc.client = MagicMock()
|
||||
|
||||
return proc
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
class TestStructuredQueryProcessor:
|
||||
"""Test Structured Query service processor"""
|
||||
|
||||
async def test_successful_end_to_end_query(self, processor):
|
||||
"""Test successful end-to-end query processing"""
|
||||
# Arrange
|
||||
request = StructuredQueryRequest(
|
||||
question="Show me all customers from New York",
|
||||
user="trustgraph",
|
||||
collection="default"
|
||||
)
|
||||
|
||||
msg = MagicMock()
|
||||
msg.value.return_value = request
|
||||
msg.properties.return_value = {"id": "test-123"}
|
||||
|
||||
consumer = MagicMock()
|
||||
flow = MagicMock()
|
||||
flow_response = AsyncMock()
|
||||
flow.return_value = flow_response
|
||||
|
||||
# Mock NLP query service response
|
||||
nlp_response = QuestionToStructuredQueryResponse(
|
||||
error=None,
|
||||
graphql_query='query { customers(where: {state: {eq: "NY"}}) { id name email } }',
|
||||
variables={"state": "NY"},
|
||||
detected_schemas=["customers"],
|
||||
confidence=0.95
|
||||
)
|
||||
|
||||
# Mock objects query service response
|
||||
objects_response = ObjectsQueryResponse(
|
||||
error=None,
|
||||
data='{"customers": [{"id": "1", "name": "John", "email": "john@example.com"}]}',
|
||||
errors=None,
|
||||
extensions={}
|
||||
)
|
||||
|
||||
# Set up mock clients
|
||||
mock_nlp_client = AsyncMock()
|
||||
mock_nlp_client.request.return_value = nlp_response
|
||||
|
||||
mock_objects_client = AsyncMock()
|
||||
mock_objects_client.request.return_value = objects_response
|
||||
|
||||
# Mock flow context to route to appropriate services
|
||||
def flow_router(service_name):
|
||||
if service_name == "nlp-query-request":
|
||||
return mock_nlp_client
|
||||
elif service_name == "objects-query-request":
|
||||
return mock_objects_client
|
||||
elif service_name == "response":
|
||||
return flow_response
|
||||
else:
|
||||
return AsyncMock()
|
||||
flow.side_effect = flow_router
|
||||
|
||||
# Act
|
||||
await processor.on_message(msg, consumer, flow)
|
||||
|
||||
# Assert
|
||||
# Verify NLP query service was called correctly
|
||||
mock_nlp_client.request.assert_called_once()
|
||||
nlp_call_args = mock_nlp_client.request.call_args[0][0]
|
||||
assert isinstance(nlp_call_args, QuestionToStructuredQueryRequest)
|
||||
assert nlp_call_args.question == "Show me all customers from New York"
|
||||
assert nlp_call_args.max_results == 100
|
||||
|
||||
# Verify objects query service was called correctly
|
||||
mock_objects_client.request.assert_called_once()
|
||||
objects_call_args = mock_objects_client.request.call_args[0][0]
|
||||
assert isinstance(objects_call_args, ObjectsQueryRequest)
|
||||
assert objects_call_args.query == 'query { customers(where: {state: {eq: "NY"}}) { id name email } }'
|
||||
assert objects_call_args.variables == {"state": "NY"}
|
||||
assert objects_call_args.user == "trustgraph"
|
||||
assert objects_call_args.collection == "default"
|
||||
|
||||
# Verify response
|
||||
flow_response.send.assert_called_once()
|
||||
response_call = flow_response.send.call_args
|
||||
response = response_call[0][0]
|
||||
|
||||
assert isinstance(response, StructuredQueryResponse)
|
||||
assert response.error is None
|
||||
assert response.data == '{"customers": [{"id": "1", "name": "John", "email": "john@example.com"}]}'
|
||||
assert len(response.errors) == 0
|
||||
|
||||
async def test_nlp_query_service_error(self, processor):
|
||||
"""Test handling of NLP query service errors"""
|
||||
# Arrange
|
||||
request = StructuredQueryRequest(
|
||||
question="Invalid query"
|
||||
)
|
||||
|
||||
msg = MagicMock()
|
||||
msg.value.return_value = request
|
||||
msg.properties.return_value = {"id": "test-error"}
|
||||
|
||||
consumer = MagicMock()
|
||||
flow = MagicMock()
|
||||
flow_response = AsyncMock()
|
||||
flow.return_value = flow_response
|
||||
|
||||
# Mock NLP query service error response
|
||||
nlp_response = QuestionToStructuredQueryResponse(
|
||||
error=Error(type="nlp-query-error", message="Failed to parse question"),
|
||||
graphql_query="",
|
||||
variables={},
|
||||
detected_schemas=[],
|
||||
confidence=0.0
|
||||
)
|
||||
|
||||
mock_nlp_client = AsyncMock()
|
||||
mock_nlp_client.request.return_value = nlp_response
|
||||
|
||||
# Mock flow context to route to nlp service
|
||||
def flow_router(service_name):
|
||||
if service_name == "nlp-query-request":
|
||||
return mock_nlp_client
|
||||
elif service_name == "response":
|
||||
return flow_response
|
||||
else:
|
||||
return AsyncMock()
|
||||
flow.side_effect = flow_router
|
||||
|
||||
# Act
|
||||
await processor.on_message(msg, consumer, flow)
|
||||
|
||||
# Assert
|
||||
flow_response.send.assert_called_once()
|
||||
response_call = flow_response.send.call_args
|
||||
response = response_call[0][0]
|
||||
|
||||
assert isinstance(response, StructuredQueryResponse)
|
||||
assert response.error is not None
|
||||
assert response.error.type == "structured-query-error"
|
||||
assert "NLP query service error" in response.error.message
|
||||
|
||||
async def test_empty_graphql_query_error(self, processor):
|
||||
"""Test handling of empty GraphQL query from NLP service"""
|
||||
# Arrange
|
||||
request = StructuredQueryRequest(
|
||||
question="Ambiguous question"
|
||||
)
|
||||
|
||||
msg = MagicMock()
|
||||
msg.value.return_value = request
|
||||
msg.properties.return_value = {"id": "test-empty"}
|
||||
|
||||
consumer = MagicMock()
|
||||
flow = MagicMock()
|
||||
flow_response = AsyncMock()
|
||||
flow.return_value = flow_response
|
||||
|
||||
# Mock NLP query service response with empty query
|
||||
nlp_response = QuestionToStructuredQueryResponse(
|
||||
error=None,
|
||||
graphql_query="", # Empty query
|
||||
variables={},
|
||||
detected_schemas=[],
|
||||
confidence=0.1
|
||||
)
|
||||
|
||||
mock_nlp_client = AsyncMock()
|
||||
mock_nlp_client.request.return_value = nlp_response
|
||||
|
||||
# Mock flow context to route to nlp service
|
||||
def flow_router(service_name):
|
||||
if service_name == "nlp-query-request":
|
||||
return mock_nlp_client
|
||||
elif service_name == "response":
|
||||
return flow_response
|
||||
else:
|
||||
return AsyncMock()
|
||||
flow.side_effect = flow_router
|
||||
|
||||
# Act
|
||||
await processor.on_message(msg, consumer, flow)
|
||||
|
||||
# Assert
|
||||
flow_response.send.assert_called_once()
|
||||
response_call = flow_response.send.call_args
|
||||
response = response_call[0][0]
|
||||
|
||||
assert response.error is not None
|
||||
assert "empty GraphQL query" in response.error.message
|
||||
|
||||
async def test_objects_query_service_error(self, processor):
|
||||
"""Test handling of objects query service errors"""
|
||||
# Arrange
|
||||
request = StructuredQueryRequest(
|
||||
question="Show me customers"
|
||||
)
|
||||
|
||||
msg = MagicMock()
|
||||
msg.value.return_value = request
|
||||
msg.properties.return_value = {"id": "test-objects-error"}
|
||||
|
||||
consumer = MagicMock()
|
||||
flow = MagicMock()
|
||||
flow_response = AsyncMock()
|
||||
flow.return_value = flow_response
|
||||
|
||||
# Mock successful NLP response
|
||||
nlp_response = QuestionToStructuredQueryResponse(
|
||||
error=None,
|
||||
graphql_query='query { customers { id name } }',
|
||||
variables={},
|
||||
detected_schemas=["customers"],
|
||||
confidence=0.9
|
||||
)
|
||||
|
||||
# Mock objects query service error
|
||||
objects_response = ObjectsQueryResponse(
|
||||
error=Error(type="graphql-execution-error", message="Table 'customers' not found"),
|
||||
data=None,
|
||||
errors=None,
|
||||
extensions={}
|
||||
)
|
||||
|
||||
mock_nlp_client = AsyncMock()
|
||||
mock_nlp_client.request.return_value = nlp_response
|
||||
|
||||
mock_objects_client = AsyncMock()
|
||||
mock_objects_client.request.return_value = objects_response
|
||||
|
||||
# Mock flow context to route to appropriate services
|
||||
def flow_router(service_name):
|
||||
if service_name == "nlp-query-request":
|
||||
return mock_nlp_client
|
||||
elif service_name == "objects-query-request":
|
||||
return mock_objects_client
|
||||
elif service_name == "response":
|
||||
return flow_response
|
||||
else:
|
||||
return AsyncMock()
|
||||
flow.side_effect = flow_router
|
||||
|
||||
# Act
|
||||
await processor.on_message(msg, consumer, flow)
|
||||
|
||||
# Assert
|
||||
flow_response.send.assert_called_once()
|
||||
response_call = flow_response.send.call_args
|
||||
response = response_call[0][0]
|
||||
|
||||
assert response.error is not None
|
||||
assert "Objects query service error" in response.error.message
|
||||
assert "Table 'customers' not found" in response.error.message
|
||||
|
||||
async def test_graphql_errors_handling(self, processor):
|
||||
"""Test handling of GraphQL validation/execution errors"""
|
||||
# Arrange
|
||||
request = StructuredQueryRequest(
|
||||
question="Show invalid field"
|
||||
)
|
||||
|
||||
msg = MagicMock()
|
||||
msg.value.return_value = request
|
||||
msg.properties.return_value = {"id": "test-graphql-errors"}
|
||||
|
||||
consumer = MagicMock()
|
||||
flow = MagicMock()
|
||||
flow_response = AsyncMock()
|
||||
flow.return_value = flow_response
|
||||
|
||||
# Mock successful NLP response
|
||||
nlp_response = QuestionToStructuredQueryResponse(
|
||||
error=None,
|
||||
graphql_query='query { customers { invalid_field } }',
|
||||
variables={},
|
||||
detected_schemas=["customers"],
|
||||
confidence=0.8
|
||||
)
|
||||
|
||||
# Mock objects response with GraphQL errors
|
||||
graphql_errors = [
|
||||
GraphQLError(
|
||||
message="Cannot query field 'invalid_field' on type 'Customer'",
|
||||
path=["customers", "0", "invalid_field"], # All path elements must be strings
|
||||
extensions={}
|
||||
)
|
||||
]
|
||||
|
||||
objects_response = ObjectsQueryResponse(
|
||||
error=None,
|
||||
data=None,
|
||||
errors=graphql_errors,
|
||||
extensions={}
|
||||
)
|
||||
|
||||
mock_nlp_client = AsyncMock()
|
||||
mock_nlp_client.request.return_value = nlp_response
|
||||
|
||||
mock_objects_client = AsyncMock()
|
||||
mock_objects_client.request.return_value = objects_response
|
||||
|
||||
# Mock flow context to route to appropriate services
|
||||
def flow_router(service_name):
|
||||
if service_name == "nlp-query-request":
|
||||
return mock_nlp_client
|
||||
elif service_name == "objects-query-request":
|
||||
return mock_objects_client
|
||||
elif service_name == "response":
|
||||
return flow_response
|
||||
else:
|
||||
return AsyncMock()
|
||||
flow.side_effect = flow_router
|
||||
|
||||
# Act
|
||||
await processor.on_message(msg, consumer, flow)
|
||||
|
||||
# Assert
|
||||
flow_response.send.assert_called_once()
|
||||
response_call = flow_response.send.call_args
|
||||
response = response_call[0][0]
|
||||
|
||||
assert response.error is None
|
||||
assert len(response.errors) == 1
|
||||
assert "Cannot query field 'invalid_field'" in response.errors[0]
|
||||
assert "customers" in response.errors[0]
|
||||
|
||||
async def test_complex_query_with_variables(self, processor):
|
||||
"""Test processing complex queries with variables"""
|
||||
# Arrange
|
||||
request = StructuredQueryRequest(
|
||||
question="Show customers with orders over $100 from last month"
|
||||
)
|
||||
|
||||
msg = MagicMock()
|
||||
msg.value.return_value = request
|
||||
msg.properties.return_value = {"id": "test-complex"}
|
||||
|
||||
consumer = MagicMock()
|
||||
flow = MagicMock()
|
||||
flow_response = AsyncMock()
|
||||
flow.return_value = flow_response
|
||||
|
||||
# Mock NLP response with complex query and variables
|
||||
nlp_response = QuestionToStructuredQueryResponse(
|
||||
error=None,
|
||||
graphql_query='''
|
||||
query GetCustomersWithLargeOrders($minTotal: Float!, $startDate: String!) {
|
||||
customers {
|
||||
id
|
||||
name
|
||||
orders(where: {total: {gt: $minTotal}, date: {gte: $startDate}}) {
|
||||
id
|
||||
total
|
||||
date
|
||||
}
|
||||
}
|
||||
}
|
||||
''',
|
||||
variables={
|
||||
"minTotal": "100.0", # Convert to string for Pulsar schema
|
||||
"startDate": "2024-01-01"
|
||||
},
|
||||
detected_schemas=["customers", "orders"],
|
||||
confidence=0.88
|
||||
)
|
||||
|
||||
# Mock objects response
|
||||
objects_response = ObjectsQueryResponse(
|
||||
error=None,
|
||||
data='{"customers": [{"id": "1", "name": "Alice", "orders": [{"id": "100", "total": 150.0}]}]}',
|
||||
errors=None
|
||||
)
|
||||
|
||||
mock_nlp_client = AsyncMock()
|
||||
mock_nlp_client.request.return_value = nlp_response
|
||||
|
||||
mock_objects_client = AsyncMock()
|
||||
mock_objects_client.request.return_value = objects_response
|
||||
|
||||
# Mock flow context to route to appropriate services
|
||||
def flow_router(service_name):
|
||||
if service_name == "nlp-query-request":
|
||||
return mock_nlp_client
|
||||
elif service_name == "objects-query-request":
|
||||
return mock_objects_client
|
||||
elif service_name == "response":
|
||||
return flow_response
|
||||
else:
|
||||
return AsyncMock()
|
||||
flow.side_effect = flow_router
|
||||
|
||||
# Act
|
||||
await processor.on_message(msg, consumer, flow)
|
||||
|
||||
# Assert
|
||||
# Verify variables were passed correctly (converted to strings)
|
||||
objects_call_args = mock_objects_client.request.call_args[0][0]
|
||||
assert objects_call_args.variables["minTotal"] == "100.0" # Should be converted to string
|
||||
assert objects_call_args.variables["startDate"] == "2024-01-01"
|
||||
|
||||
# Verify response
|
||||
response_call = flow_response.send.call_args
|
||||
response = response_call[0][0]
|
||||
assert response.error is None
|
||||
assert "Alice" in response.data
|
||||
|
||||
async def test_null_data_handling(self, processor):
|
||||
"""Test handling of null/empty data responses"""
|
||||
# Arrange
|
||||
request = StructuredQueryRequest(
|
||||
question="Show nonexistent data"
|
||||
)
|
||||
|
||||
msg = MagicMock()
|
||||
msg.value.return_value = request
|
||||
msg.properties.return_value = {"id": "test-null"}
|
||||
|
||||
consumer = MagicMock()
|
||||
flow = MagicMock()
|
||||
flow_response = AsyncMock()
|
||||
flow.return_value = flow_response
|
||||
|
||||
# Mock responses
|
||||
nlp_response = QuestionToStructuredQueryResponse(
|
||||
error=None,
|
||||
graphql_query='query { customers { id } }',
|
||||
variables={},
|
||||
detected_schemas=["customers"],
|
||||
confidence=0.9
|
||||
)
|
||||
|
||||
objects_response = ObjectsQueryResponse(
|
||||
error=None,
|
||||
data=None, # Null data
|
||||
errors=None,
|
||||
extensions={}
|
||||
)
|
||||
|
||||
mock_nlp_client = AsyncMock()
|
||||
mock_nlp_client.request.return_value = nlp_response
|
||||
|
||||
mock_objects_client = AsyncMock()
|
||||
mock_objects_client.request.return_value = objects_response
|
||||
|
||||
# Mock flow context to route to appropriate services
|
||||
def flow_router(service_name):
|
||||
if service_name == "nlp-query-request":
|
||||
return mock_nlp_client
|
||||
elif service_name == "objects-query-request":
|
||||
return mock_objects_client
|
||||
elif service_name == "response":
|
||||
return flow_response
|
||||
else:
|
||||
return AsyncMock()
|
||||
flow.side_effect = flow_router
|
||||
|
||||
# Act
|
||||
await processor.on_message(msg, consumer, flow)
|
||||
|
||||
# Assert
|
||||
response_call = flow_response.send.call_args
|
||||
response = response_call[0][0]
|
||||
|
||||
assert response.error is None
|
||||
assert response.data == "null" # Should convert None to "null" string
|
||||
|
||||
async def test_exception_handling(self, processor):
|
||||
"""Test general exception handling"""
|
||||
# Arrange
|
||||
request = StructuredQueryRequest(
|
||||
question="Test exception"
|
||||
)
|
||||
|
||||
msg = MagicMock()
|
||||
msg.value.return_value = request
|
||||
msg.properties.return_value = {"id": "test-exception"}
|
||||
|
||||
consumer = MagicMock()
|
||||
flow = MagicMock()
|
||||
flow_response = AsyncMock()
|
||||
flow.return_value = flow_response
|
||||
|
||||
# Mock flow context to raise exception
|
||||
mock_client = AsyncMock()
|
||||
mock_client.request.side_effect = Exception("Network timeout")
|
||||
|
||||
def flow_router(service_name):
|
||||
if service_name == "nlp-query-request":
|
||||
return mock_client
|
||||
elif service_name == "response":
|
||||
return flow_response
|
||||
else:
|
||||
return AsyncMock()
|
||||
flow.side_effect = flow_router
|
||||
|
||||
# Act
|
||||
await processor.on_message(msg, consumer, flow)
|
||||
|
||||
# Assert
|
||||
flow_response.send.assert_called_once()
|
||||
response_call = flow_response.send.call_args
|
||||
response = response_call[0][0]
|
||||
|
||||
assert response.error is not None
|
||||
assert response.error.type == "structured-query-error"
|
||||
assert "Network timeout" in response.error.message
|
||||
assert response.data == "null"
|
||||
assert len(response.errors) == 0
|
||||
|
||||
def test_processor_initialization(self, mock_pulsar_client):
|
||||
"""Test processor initialization with correct specifications"""
|
||||
# Act
|
||||
processor = Processor(
|
||||
taskgroup=MagicMock(),
|
||||
pulsar_client=mock_pulsar_client
|
||||
)
|
||||
|
||||
# Assert - Test default ID
|
||||
assert processor.id == "structured-query"
|
||||
|
||||
# Verify specifications were registered (we can't directly access them,
|
||||
# but we know they were registered if initialization succeeded)
|
||||
assert processor is not None
|
||||
|
||||
def test_add_args(self):
|
||||
"""Test command-line argument parsing"""
|
||||
import argparse
|
||||
|
||||
parser = argparse.ArgumentParser()
|
||||
Processor.add_args(parser)
|
||||
|
||||
# Test that it doesn't crash (no additional args)
|
||||
args = parser.parse_args([])
|
||||
# No specific assertions since no custom args are added
|
||||
assert args is not None
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
class TestStructuredQueryHelperFunctions:
|
||||
"""Test helper functions and data transformations"""
|
||||
|
||||
def test_service_logging_integration(self):
|
||||
"""Test that logging is properly configured"""
|
||||
# Import the logger
|
||||
from trustgraph.retrieval.structured_query.service import logger
|
||||
|
||||
assert logger.name == "trustgraph.retrieval.structured_query.service"
|
||||
|
||||
def test_default_values(self):
|
||||
"""Test default configuration values"""
|
||||
from trustgraph.retrieval.structured_query.service import default_ident
|
||||
|
||||
assert default_ident == "structured-query"
|
||||
Loading…
Add table
Add a link
Reference in a new issue