Release 1.4 -> master (#524)

Catch up
2026-04-27 01:16:22 +02:00 · 2025-09-20 16:00:37 +01:00 · 2025-09-20 16:00:37 +01:00 · 6c7af8789d
commit 6c7af8789d
parent a8e437fc7f
216 changed files with 31360 additions and 1611 deletions
--- a/tests/unit/test_retrieval/test_document_rag_service.py
+++ b/tests/unit/test_retrieval/test_document_rag_service.py
@ -0,0 +1,77 @@
+"""
+Unit test for DocumentRAG service parameter passing fix.
+Tests that user and collection parameters from the message are correctly
+passed to the DocumentRag.query() method.
+"""
+
+import pytest
+from unittest.mock import MagicMock, AsyncMock, patch
+
+from trustgraph.retrieval.document_rag.rag import Processor
+from trustgraph.schema import DocumentRagQuery, DocumentRagResponse
+
+
+class TestDocumentRagService:
+    """Test DocumentRAG service parameter passing"""
+
+    @patch('trustgraph.retrieval.document_rag.rag.DocumentRag')
+    @pytest.mark.asyncio
+    async def test_user_and_collection_parameters_passed_to_query(self, mock_document_rag_class):
+        """
+        Test that user and collection from message are passed to DocumentRag.query().
+        
+        This is a regression test for the bug where user/collection parameters
+        were ignored, causing wrong collection names like 'd_trustgraph_default_384'
+        instead of 'd_my_user_test_coll_1_384'.
+        """
+        # Setup processor
+        processor = Processor(
+            taskgroup=MagicMock(),
+            id="test-processor",
+            doc_limit=10
+        )
+        
+        # Setup mock DocumentRag instance
+        mock_rag_instance = AsyncMock()
+        mock_document_rag_class.return_value = mock_rag_instance
+        mock_rag_instance.query.return_value = "test response"
+        
+        # Setup message with custom user/collection
+        msg = MagicMock()
+        msg.value.return_value = DocumentRagQuery(
+            query="test query",
+            user="my_user",        # Custom user (not default "trustgraph")  
+            collection="test_coll_1",  # Custom collection (not default "default")
+            doc_limit=5
+        )
+        msg.properties.return_value = {"id": "test-id"}
+        
+        # Setup flow mock
+        consumer = MagicMock()
+        flow = MagicMock()
+        
+        # Mock flow to return AsyncMock for clients and response producer
+        mock_producer = AsyncMock()
+        def flow_router(service_name):
+            if service_name == "response":
+                return mock_producer
+            return AsyncMock()  # embeddings, doc-embeddings, prompt clients
+        flow.side_effect = flow_router
+        
+        # Execute
+        await processor.on_request(msg, consumer, flow)
+        
+        # Verify: DocumentRag.query was called with correct parameters
+        mock_rag_instance.query.assert_called_once_with(
+            "test query",
+            user="my_user",           # Must be from message, not hardcoded default
+            collection="test_coll_1", # Must be from message, not hardcoded default  
+            doc_limit=5
+        )
+        
+        # Verify response was sent
+        mock_producer.send.assert_called_once()
+        sent_response = mock_producer.send.call_args[0][0]
+        assert isinstance(sent_response, DocumentRagResponse)
+        assert sent_response.response == "test response"
+        assert sent_response.error is None
--- a/tests/unit/test_retrieval/test_nlp_query.py
+++ b/tests/unit/test_retrieval/test_nlp_query.py
@ -0,0 +1,374 @@
+"""
+Unit tests for NLP Query service
+Following TEST_STRATEGY.md approach for service testing
+"""
+
+import pytest
+import json
+from unittest.mock import AsyncMock, MagicMock, patch
+from typing import Dict, Any
+
+from trustgraph.schema import (
+    QuestionToStructuredQueryRequest, QuestionToStructuredQueryResponse,
+    PromptRequest, PromptResponse, Error, RowSchema, Field as SchemaField
+)
+from trustgraph.retrieval.nlp_query.service import Processor
+
+
+@pytest.fixture
+def mock_prompt_client():
+    """Mock prompt service client"""
+    return AsyncMock()
+
+
+@pytest.fixture
+def mock_pulsar_client():
+    """Mock Pulsar client"""
+    return AsyncMock()
+
+
+@pytest.fixture
+def sample_schemas():
+    """Sample schemas for testing"""
+    return {
+        "customers": RowSchema(
+            name="customers",
+            description="Customer data",
+            fields=[
+                SchemaField(name="id", type="string", primary=True),
+                SchemaField(name="name", type="string"),
+                SchemaField(name="email", type="string"),
+                SchemaField(name="state", type="string")
+            ]
+        ),
+        "orders": RowSchema(
+            name="orders", 
+            description="Order data",
+            fields=[
+                SchemaField(name="order_id", type="string", primary=True),
+                SchemaField(name="customer_id", type="string"),
+                SchemaField(name="total", type="float"),
+                SchemaField(name="status", type="string")
+            ]
+        )
+    }
+
+
+@pytest.fixture
+def processor(mock_pulsar_client, sample_schemas):
+    """Create processor with mocked dependencies"""
+    proc = Processor(
+        taskgroup=MagicMock(),
+        pulsar_client=mock_pulsar_client,
+        config_type="schema"
+    )
+    
+    # Set up schemas
+    proc.schemas = sample_schemas
+    
+    # Mock the client method
+    proc.client = MagicMock()
+    
+    return proc
+
+
+@pytest.mark.asyncio
+class TestNLPQueryProcessor:
+    """Test NLP Query service processor"""
+
+    async def test_phase1_select_schemas_success(self, processor, mock_prompt_client):
+        """Test successful schema selection (Phase 1)"""
+        # Arrange
+        question = "Show me customers from California"
+        expected_schemas = ["customers"]
+        
+        mock_response = PromptResponse(
+            text=json.dumps(expected_schemas),
+            error=None
+        )
+        
+        # Mock flow context
+        flow = MagicMock()
+        mock_prompt_service = AsyncMock()
+        mock_prompt_service.request = AsyncMock(return_value=mock_response)
+        flow.side_effect = lambda service_name: mock_prompt_service if service_name == "prompt-request" else AsyncMock()
+        
+        # Act
+        result = await processor.phase1_select_schemas(question, flow)
+        
+        # Assert
+        assert result == expected_schemas
+        mock_prompt_service.request.assert_called_once()
+
+    async def test_phase1_select_schemas_prompt_error(self, processor):
+        """Test schema selection with prompt service error"""
+        # Arrange
+        question = "Show me customers"
+        error = Error(type="prompt-error", message="Template not found")
+        mock_response = PromptResponse(text="", error=error)
+        
+        # Mock flow context
+        flow = MagicMock()
+        mock_prompt_service = AsyncMock()
+        mock_prompt_service.request = AsyncMock(return_value=mock_response)
+        flow.side_effect = lambda service_name: mock_prompt_service if service_name == "prompt-request" else AsyncMock()
+        
+        # Act & Assert
+        with pytest.raises(Exception, match="Prompt service error"):
+            await processor.phase1_select_schemas(question, flow)
+
+    async def test_phase2_generate_graphql_success(self, processor):
+        """Test successful GraphQL generation (Phase 2)"""
+        # Arrange
+        question = "Show me customers from California"
+        selected_schemas = ["customers"]
+        expected_result = {
+            "query": "query { customers(where: {state: {eq: \"California\"}}) { id name email state } }",
+            "variables": {},
+            "confidence": 0.95
+        }
+        
+        mock_response = PromptResponse(
+            text=json.dumps(expected_result),
+            error=None
+        )
+        
+        # Mock flow context
+        flow = MagicMock()
+        mock_prompt_service = AsyncMock()
+        mock_prompt_service.request = AsyncMock(return_value=mock_response)
+        flow.side_effect = lambda service_name: mock_prompt_service if service_name == "prompt-request" else AsyncMock()
+        
+        # Act
+        result = await processor.phase2_generate_graphql(question, selected_schemas, flow)
+        
+        # Assert
+        assert result == expected_result
+        mock_prompt_service.request.assert_called_once()
+
+    async def test_phase2_generate_graphql_prompt_error(self, processor):
+        """Test GraphQL generation with prompt service error"""
+        # Arrange
+        question = "Show me customers"
+        selected_schemas = ["customers"]
+        error = Error(type="prompt-error", message="Generation failed")
+        mock_response = PromptResponse(text="", error=error)
+        
+        # Mock flow context
+        flow = MagicMock()
+        mock_prompt_service = AsyncMock()
+        mock_prompt_service.request = AsyncMock(return_value=mock_response)
+        flow.side_effect = lambda service_name: mock_prompt_service if service_name == "prompt-request" else AsyncMock()
+        
+        # Act & Assert
+        with pytest.raises(Exception, match="Prompt service error"):
+            await processor.phase2_generate_graphql(question, selected_schemas, flow)
+
+    async def test_on_message_full_flow_success(self, processor):
+        """Test complete message processing flow"""
+        # Arrange
+        request = QuestionToStructuredQueryRequest(
+            question="Show me customers from California", 
+            max_results=100
+        )
+        
+        msg = MagicMock()
+        msg.value.return_value = request
+        msg.properties.return_value = {"id": "test-123"}
+        
+        consumer = MagicMock()
+        flow = MagicMock()
+        flow_response = AsyncMock()
+        flow.return_value = flow_response
+        
+        # Mock Phase 1 response
+        phase1_response = PromptResponse(
+            text=json.dumps(["customers"]),
+            error=None
+        )
+        
+        # Mock Phase 2 response
+        phase2_response = PromptResponse(
+            text=json.dumps({
+                "query": "query { customers(where: {state: {eq: \"California\"}}) { id name email } }",
+                "variables": {},
+                "confidence": 0.9
+            }),
+            error=None
+        )
+        
+        # Mock flow context to return prompt service responses
+        mock_prompt_service = AsyncMock()
+        mock_prompt_service.request = AsyncMock(
+            side_effect=[phase1_response, phase2_response]
+        )
+        flow.side_effect = lambda service_name: mock_prompt_service if service_name == "prompt-request" else flow_response if service_name == "response" else AsyncMock()
+        
+        # Act
+        await processor.on_message(msg, consumer, flow)
+        
+        # Assert
+        assert mock_prompt_service.request.call_count == 2
+        flow_response.send.assert_called_once()
+        
+        # Verify response structure
+        response_call = flow_response.send.call_args
+        response = response_call[0][0]  # First argument is the response object
+        
+        assert isinstance(response, QuestionToStructuredQueryResponse)
+        assert response.error is None
+        assert "customers" in response.graphql_query
+        assert response.detected_schemas == ["customers"]
+        assert response.confidence == 0.9
+
+    async def test_on_message_phase1_error(self, processor):
+        """Test message processing with Phase 1 failure"""
+        # Arrange
+        request = QuestionToStructuredQueryRequest(
+            question="Show me customers", 
+            max_results=100
+        )
+        
+        msg = MagicMock()
+        msg.value.return_value = request
+        msg.properties.return_value = {"id": "test-123"}
+        
+        consumer = MagicMock()
+        flow = MagicMock()
+        flow_response = AsyncMock()
+        flow.return_value = flow_response
+        
+        # Mock Phase 1 error
+        phase1_response = PromptResponse(
+            text="",
+            error=Error(type="template-error", message="Template not found")
+        )
+        
+        processor.client.return_value.request = AsyncMock(return_value=phase1_response)
+        
+        # Act
+        await processor.on_message(msg, consumer, flow)
+        
+        # Assert
+        flow_response.send.assert_called_once()
+        
+        # Verify error response
+        response_call = flow_response.send.call_args
+        response = response_call[0][0]
+        
+        assert isinstance(response, QuestionToStructuredQueryResponse)
+        assert response.error is not None
+        assert response.error.type == "nlp-query-error"
+        assert "Prompt service error" in response.error.message
+
+    async def test_schema_config_loading(self, processor):
+        """Test schema configuration loading"""
+        # Arrange
+        config = {
+            "schema": {
+                "test_schema": json.dumps({
+                    "name": "test_schema",
+                    "description": "Test schema", 
+                    "fields": [
+                        {
+                            "name": "id",
+                            "type": "string",
+                            "primary_key": True,
+                            "required": True
+                        },
+                        {
+                            "name": "name", 
+                            "type": "string",
+                            "description": "User name"
+                        }
+                    ]
+                })
+            }
+        }
+        
+        # Act
+        await processor.on_schema_config(config, "v1")
+        
+        # Assert
+        assert "test_schema" in processor.schemas
+        schema = processor.schemas["test_schema"]
+        assert schema.name == "test_schema"
+        assert schema.description == "Test schema"
+        assert len(schema.fields) == 2
+        assert schema.fields[0].name == "id"
+        assert schema.fields[0].primary == True
+        assert schema.fields[1].name == "name"
+
+    async def test_schema_config_loading_invalid_json(self, processor):
+        """Test schema configuration loading with invalid JSON"""
+        # Arrange
+        config = {
+            "schema": {
+                "bad_schema": "invalid json{"
+            }
+        }
+        
+        # Act
+        await processor.on_schema_config(config, "v1")
+        
+        # Assert - bad schema should be ignored
+        assert "bad_schema" not in processor.schemas
+
+    def test_processor_initialization(self, mock_pulsar_client):
+        """Test processor initialization with correct specifications"""
+        # Act
+        processor = Processor(
+            taskgroup=MagicMock(),
+            pulsar_client=mock_pulsar_client,
+            schema_selection_template="custom-schema-select",
+            graphql_generation_template="custom-graphql-gen"
+        )
+        
+        # Assert
+        assert processor.schema_selection_template == "custom-schema-select"
+        assert processor.graphql_generation_template == "custom-graphql-gen"
+        assert processor.config_key == "schema"
+        assert processor.schemas == {}
+
+    def test_add_args(self):
+        """Test command-line argument parsing"""
+        import argparse
+        
+        parser = argparse.ArgumentParser()
+        Processor.add_args(parser)
+        
+        # Test default values
+        args = parser.parse_args([])
+        assert args.config_type == "schema"
+        assert args.schema_selection_template == "schema-selection"
+        assert args.graphql_generation_template == "graphql-generation"
+        
+        # Test custom values
+        args = parser.parse_args([
+            "--config-type", "custom",
+            "--schema-selection-template", "my-selector",
+            "--graphql-generation-template", "my-generator"
+        ])
+        assert args.config_type == "custom"
+        assert args.schema_selection_template == "my-selector"
+        assert args.graphql_generation_template == "my-generator"
+
+
+@pytest.mark.unit
+class TestNLPQueryHelperFunctions:
+    """Test helper functions and data transformations"""
+    
+    def test_schema_info_formatting(self, sample_schemas):
+        """Test schema info formatting for prompts"""
+        # This would test any helper functions for formatting schema data
+        # Currently the formatting is inline, but good to test if extracted
+        
+        customers_schema = sample_schemas["customers"]
+        expected_fields = ["id", "name", "email", "state"]
+        
+        actual_fields = [f.name for f in customers_schema.fields]
+        assert actual_fields == expected_fields
+        
+        # Test primary key detection
+        primary_fields = [f.name for f in customers_schema.fields if f.primary]
+        assert primary_fields == ["id"]
--- a/tests/unit/test_retrieval/test_structured_diag/init.py
+++ b/tests/unit/test_retrieval/test_structured_diag/init.py
@ -0,0 +1,3 @@
+"""
+Unit and contract tests for structured-diag service
+"""
--- a/tests/unit/test_retrieval/test_structured_diag/test_message_translation.py
+++ b/tests/unit/test_retrieval/test_structured_diag/test_message_translation.py
@ -0,0 +1,172 @@
+"""
+Unit tests for message translation in structured-diag service
+"""
+
+import pytest
+from trustgraph.messaging.translators.diagnosis import (
+    StructuredDataDiagnosisRequestTranslator,
+    StructuredDataDiagnosisResponseTranslator
+)
+from trustgraph.schema.services.diagnosis import (
+    StructuredDataDiagnosisRequest,
+    StructuredDataDiagnosisResponse
+)
+
+
+class TestRequestTranslation:
+    """Test request message translation"""
+
+    def test_translate_schema_selection_request(self):
+        """Test translating schema-selection request from API to Pulsar"""
+        translator = StructuredDataDiagnosisRequestTranslator()
+
+        # API format (with hyphens)
+        api_data = {
+            "operation": "schema-selection",
+            "sample": "test data sample",
+            "options": {"filter": "catalog"}
+        }
+
+        # Translate to Pulsar
+        pulsar_msg = translator.to_pulsar(api_data)
+
+        assert pulsar_msg.operation == "schema-selection"
+        assert pulsar_msg.sample == "test data sample"
+        assert pulsar_msg.options == {"filter": "catalog"}
+
+    def test_translate_request_with_all_fields(self):
+        """Test translating request with all fields"""
+        translator = StructuredDataDiagnosisRequestTranslator()
+
+        api_data = {
+            "operation": "generate-descriptor",
+            "sample": "csv data",
+            "type": "csv",
+            "schema-name": "products",
+            "options": {"delimiter": ","}
+        }
+
+        pulsar_msg = translator.to_pulsar(api_data)
+
+        assert pulsar_msg.operation == "generate-descriptor"
+        assert pulsar_msg.sample == "csv data"
+        assert pulsar_msg.type == "csv"
+        assert pulsar_msg.schema_name == "products"
+        assert pulsar_msg.options == {"delimiter": ","}
+
+
+class TestResponseTranslation:
+    """Test response message translation"""
+
+    def test_translate_schema_selection_response(self):
+        """Test translating schema-selection response from Pulsar to API"""
+        translator = StructuredDataDiagnosisResponseTranslator()
+
+        # Create Pulsar response with schema_matches
+        pulsar_response = StructuredDataDiagnosisResponse(
+            operation="schema-selection",
+            schema_matches=["products", "inventory", "catalog"],
+            error=None
+        )
+
+        # Translate to API format
+        api_data = translator.from_pulsar(pulsar_response)
+
+        assert api_data["operation"] == "schema-selection"
+        assert api_data["schema-matches"] == ["products", "inventory", "catalog"]
+        assert "error" not in api_data  # None errors shouldn't be included
+
+    def test_translate_empty_schema_matches(self):
+        """Test translating response with empty schema_matches"""
+        translator = StructuredDataDiagnosisResponseTranslator()
+
+        pulsar_response = StructuredDataDiagnosisResponse(
+            operation="schema-selection",
+            schema_matches=[],
+            error=None
+        )
+
+        api_data = translator.from_pulsar(pulsar_response)
+
+        assert api_data["operation"] == "schema-selection"
+        assert api_data["schema-matches"] == []
+
+    def test_translate_response_without_schema_matches(self):
+        """Test translating response without schema_matches field"""
+        translator = StructuredDataDiagnosisResponseTranslator()
+
+        # Old-style response without schema_matches
+        pulsar_response = StructuredDataDiagnosisResponse(
+            operation="detect-type",
+            detected_type="xml",
+            confidence=0.9,
+            error=None
+        )
+
+        api_data = translator.from_pulsar(pulsar_response)
+
+        assert api_data["operation"] == "detect-type"
+        assert api_data["detected-type"] == "xml"
+        assert api_data["confidence"] == 0.9
+        assert "schema-matches" not in api_data  # None values shouldn't be included
+
+    def test_translate_response_with_error(self):
+        """Test translating response with error"""
+        translator = StructuredDataDiagnosisResponseTranslator()
+        from trustgraph.schema.core.primitives import Error
+
+        pulsar_response = StructuredDataDiagnosisResponse(
+            operation="schema-selection",
+            error=Error(
+                type="PromptServiceError",
+                message="Service unavailable"
+            )
+        )
+
+        api_data = translator.from_pulsar(pulsar_response)
+
+        assert api_data["operation"] == "schema-selection"
+        # Error objects are typically handled separately by the gateway
+        # but the translator shouldn't break on them
+
+    def test_translate_all_response_fields(self):
+        """Test translating response with all possible fields"""
+        translator = StructuredDataDiagnosisResponseTranslator()
+        import json
+
+        descriptor_data = {"mapping": {"field1": "column1"}}
+
+        pulsar_response = StructuredDataDiagnosisResponse(
+            operation="diagnose",
+            detected_type="csv",
+            confidence=0.95,
+            descriptor=json.dumps(descriptor_data),
+            metadata={"field_count": "5"},
+            schema_matches=["schema1", "schema2"],
+            error=None
+        )
+
+        api_data = translator.from_pulsar(pulsar_response)
+
+        assert api_data["operation"] == "diagnose"
+        assert api_data["detected-type"] == "csv"
+        assert api_data["confidence"] == 0.95
+        assert api_data["descriptor"] == descriptor_data  # Should be parsed from JSON
+        assert api_data["metadata"] == {"field_count": "5"}
+        assert api_data["schema-matches"] == ["schema1", "schema2"]
+
+    def test_response_completion_flag(self):
+        """Test that response includes completion flag"""
+        translator = StructuredDataDiagnosisResponseTranslator()
+
+        pulsar_response = StructuredDataDiagnosisResponse(
+            operation="schema-selection",
+            schema_matches=["products"],
+            error=None
+        )
+
+        api_data, is_final = translator.from_response_with_completion(pulsar_response)
+
+        assert is_final is True  # Structured-diag responses are always final
+        assert api_data["operation"] == "schema-selection"
+        assert api_data["schema-matches"] == ["products"]
--- a/tests/unit/test_retrieval/test_structured_diag/test_schema_contracts.py
+++ b/tests/unit/test_retrieval/test_structured_diag/test_schema_contracts.py
@ -0,0 +1,258 @@
+"""
+Contract tests for structured-diag service schemas
+"""
+
+import pytest
+import json
+from pulsar.schema import JsonSchema
+from trustgraph.schema.services.diagnosis import (
+    StructuredDataDiagnosisRequest,
+    StructuredDataDiagnosisResponse
+)
+
+
+class TestStructuredDiagnosisSchemaContract:
+    """Contract tests for structured diagnosis message schemas"""
+
+    def test_request_schema_basic_fields(self):
+        """Test basic request schema fields"""
+        request = StructuredDataDiagnosisRequest(
+            operation="detect-type",
+            sample="test data"
+        )
+
+        assert request.operation == "detect-type"
+        assert request.sample == "test data"
+        assert request.type is None  # Optional, defaults to None
+        assert request.schema_name is None  # Optional, defaults to None
+        assert request.options is None  # Optional, defaults to None
+
+    def test_request_schema_all_operations(self):
+        """Test request schema supports all operations"""
+        operations = ["detect-type", "generate-descriptor", "diagnose", "schema-selection"]
+
+        for op in operations:
+            request = StructuredDataDiagnosisRequest(
+                operation=op,
+                sample="test data"
+            )
+            assert request.operation == op
+
+    def test_request_schema_with_options(self):
+        """Test request schema with options"""
+        options = {"delimiter": ",", "has_header": "true"}
+        request = StructuredDataDiagnosisRequest(
+            operation="generate-descriptor",
+            sample="test data",
+            type="csv",
+            schema_name="products",
+            options=options
+        )
+
+        assert request.options == options
+        assert request.type == "csv"
+        assert request.schema_name == "products"
+
+    def test_response_schema_basic_fields(self):
+        """Test basic response schema fields"""
+        response = StructuredDataDiagnosisResponse(
+            operation="detect-type",
+            detected_type="xml",
+            confidence=0.9,
+            error=None  # Explicitly set to None
+        )
+
+        assert response.operation == "detect-type"
+        assert response.detected_type == "xml"
+        assert response.confidence == 0.9
+        assert response.error is None
+        assert response.descriptor is None
+        assert response.metadata is None
+        assert response.schema_matches is None  # New field, defaults to None
+
+    def test_response_schema_with_error(self):
+        """Test response schema with error"""
+        from trustgraph.schema.core.primitives import Error
+
+        error = Error(
+            type="ServiceError",
+            message="Service unavailable"
+        )
+        response = StructuredDataDiagnosisResponse(
+            operation="schema-selection",
+            error=error
+        )
+
+        assert response.error == error
+        assert response.error.type == "ServiceError"
+        assert response.error.message == "Service unavailable"
+
+    def test_response_schema_with_schema_matches(self):
+        """Test response schema with schema_matches array"""
+        matches = ["products", "inventory", "catalog"]
+        response = StructuredDataDiagnosisResponse(
+            operation="schema-selection",
+            schema_matches=matches
+        )
+
+        assert response.operation == "schema-selection"
+        assert response.schema_matches == matches
+        assert len(response.schema_matches) == 3
+
+    def test_response_schema_empty_schema_matches(self):
+        """Test response schema with empty schema_matches array"""
+        response = StructuredDataDiagnosisResponse(
+            operation="schema-selection",
+            schema_matches=[]
+        )
+
+        assert response.schema_matches == []
+        assert isinstance(response.schema_matches, list)
+
+    def test_response_schema_with_descriptor(self):
+        """Test response schema with descriptor"""
+        descriptor = {
+            "mapping": {
+                "field1": "column1",
+                "field2": "column2"
+            }
+        }
+        response = StructuredDataDiagnosisResponse(
+            operation="generate-descriptor",
+            descriptor=json.dumps(descriptor)
+        )
+
+        assert response.descriptor == json.dumps(descriptor)
+        parsed = json.loads(response.descriptor)
+        assert parsed["mapping"]["field1"] == "column1"
+
+    def test_response_schema_with_metadata(self):
+        """Test response schema with metadata"""
+        metadata = {
+            "csv_options": json.dumps({"delimiter": ","}),
+            "field_count": "5"
+        }
+        response = StructuredDataDiagnosisResponse(
+            operation="diagnose",
+            metadata=metadata
+        )
+
+        assert response.metadata == metadata
+        assert response.metadata["field_count"] == "5"
+
+    def test_schema_serialization(self):
+        """Test that schemas can be serialized and deserialized correctly"""
+        # Test request serialization
+        request = StructuredDataDiagnosisRequest(
+            operation="schema-selection",
+            sample="test data",
+            options={"key": "value"}
+        )
+
+        # Simulate Pulsar JsonSchema serialization
+        schema = JsonSchema(StructuredDataDiagnosisRequest)
+        serialized = schema.encode(request)
+        deserialized = schema.decode(serialized)
+
+        assert deserialized.operation == request.operation
+        assert deserialized.sample == request.sample
+        assert deserialized.options == request.options
+
+    def test_response_serialization_with_schema_matches(self):
+        """Test response serialization with schema_matches array"""
+        response = StructuredDataDiagnosisResponse(
+            operation="schema-selection",
+            schema_matches=["schema1", "schema2"],
+            confidence=0.85
+        )
+
+        # Simulate Pulsar JsonSchema serialization
+        schema = JsonSchema(StructuredDataDiagnosisResponse)
+        serialized = schema.encode(response)
+        deserialized = schema.decode(serialized)
+
+        assert deserialized.operation == response.operation
+        assert deserialized.schema_matches == response.schema_matches
+        assert deserialized.confidence == response.confidence
+
+    def test_backwards_compatibility(self):
+        """Test that old clients can still use the service without schema_matches"""
+        # Old response without schema_matches should still work
+        response = StructuredDataDiagnosisResponse(
+            operation="detect-type",
+            detected_type="json",
+            confidence=0.95
+        )
+
+        # Verify default value for new field
+        assert response.schema_matches is None  # Defaults to None when not set
+
+        # Verify old fields still work
+        assert response.detected_type == "json"
+        assert response.confidence == 0.95
+
+    def test_schema_selection_operation_contract(self):
+        """Test complete contract for schema-selection operation"""
+        # Request
+        request = StructuredDataDiagnosisRequest(
+            operation="schema-selection",
+            sample="product_id,name,price\n1,Widget,9.99"
+        )
+
+        assert request.operation == "schema-selection"
+        assert request.sample != ""
+
+        # Response with matches
+        response = StructuredDataDiagnosisResponse(
+            operation="schema-selection",
+            schema_matches=["products", "inventory"]
+        )
+
+        assert response.operation == "schema-selection"
+        assert isinstance(response.schema_matches, list)
+        assert len(response.schema_matches) == 2
+        assert all(isinstance(s, str) for s in response.schema_matches)
+
+        # Response with error
+        from trustgraph.schema.core.primitives import Error
+        error_response = StructuredDataDiagnosisResponse(
+            operation="schema-selection",
+            error=Error(type="PromptServiceError", message="Service unavailable")
+        )
+
+        assert error_response.error is not None
+        assert error_response.schema_matches is None  # Default None when not set
+
+    def test_all_operations_supported(self):
+        """Verify all operations are properly supported in the contract"""
+        supported_operations = {
+            "detect-type": {
+                "required_request": ["sample"],
+                "expected_response": ["detected_type", "confidence"]
+            },
+            "generate-descriptor": {
+                "required_request": ["sample", "type", "schema_name"],
+                "expected_response": ["descriptor"]
+            },
+            "diagnose": {
+                "required_request": ["sample"],
+                "expected_response": ["detected_type", "confidence", "descriptor"]
+            },
+            "schema-selection": {
+                "required_request": ["sample"],
+                "expected_response": ["schema_matches"]
+            }
+        }
+
+        for operation, contract in supported_operations.items():
+            # Test request creation
+            request_data = {"operation": operation}
+            for field in contract["required_request"]:
+                request_data[field] = "test_value"
+
+            request = StructuredDataDiagnosisRequest(**request_data)
+            assert request.operation == operation
+
+            # Test response creation
+            response = StructuredDataDiagnosisResponse(operation=operation)
+            assert response.operation == operation
--- a/tests/unit/test_retrieval/test_structured_diag/test_schema_selection.py
+++ b/tests/unit/test_retrieval/test_structured_diag/test_schema_selection.py
@ -0,0 +1,361 @@
+"""
+Unit tests for structured-diag service schema-selection operation
+"""
+
+import pytest
+import json
+from unittest.mock import AsyncMock, MagicMock, patch
+from trustgraph.retrieval.structured_diag.service import Processor
+from trustgraph.schema.services.diagnosis import StructuredDataDiagnosisRequest, StructuredDataDiagnosisResponse
+from trustgraph.schema import RowSchema, Field as SchemaField, Error
+
+
+@pytest.fixture
+def mock_schemas():
+    """Create mock schemas for testing"""
+    schemas = {
+        "products": RowSchema(
+            name="products",
+            description="Product catalog schema",
+            fields=[
+                SchemaField(
+                    name="product_id",
+                    type="string",
+                    description="Product identifier",
+                    required=True,
+                    primary=True,
+                    indexed=True
+                ),
+                SchemaField(
+                    name="name",
+                    type="string",
+                    description="Product name",
+                    required=True
+                ),
+                SchemaField(
+                    name="price",
+                    type="number",
+                    description="Product price",
+                    required=True
+                )
+            ]
+        ),
+        "customers": RowSchema(
+            name="customers",
+            description="Customer database schema",
+            fields=[
+                SchemaField(
+                    name="customer_id",
+                    type="string",
+                    description="Customer identifier",
+                    required=True,
+                    primary=True
+                ),
+                SchemaField(
+                    name="name",
+                    type="string",
+                    description="Customer name",
+                    required=True
+                ),
+                SchemaField(
+                    name="email",
+                    type="string",
+                    description="Customer email",
+                    required=True
+                )
+            ]
+        ),
+        "orders": RowSchema(
+            name="orders",
+            description="Order management schema",
+            fields=[
+                SchemaField(
+                    name="order_id",
+                    type="string",
+                    description="Order identifier",
+                    required=True,
+                    primary=True
+                ),
+                SchemaField(
+                    name="customer_id",
+                    type="string",
+                    description="Customer identifier",
+                    required=True
+                ),
+                SchemaField(
+                    name="total",
+                    type="number",
+                    description="Order total",
+                    required=True
+                )
+            ]
+        )
+    }
+    return schemas
+
+
+@pytest.fixture
+def service(mock_schemas):
+    """Create service instance with mock configuration"""
+    service = Processor(
+        taskgroup=MagicMock(),
+        id="test-processor"
+    )
+    service.schemas = mock_schemas
+    return service
+
+
+@pytest.fixture
+def mock_flow():
+    """Create mock flow with prompt service"""
+    flow = MagicMock()
+    prompt_request_flow = AsyncMock()
+    flow.return_value.request = prompt_request_flow
+    return flow, prompt_request_flow
+
+
+@pytest.mark.asyncio
+async def test_schema_selection_success(service, mock_flow):
+    """Test successful schema selection"""
+    flow, prompt_request_flow = mock_flow
+
+    # Mock prompt service response with matching schemas
+    mock_response = MagicMock()
+    mock_response.error = None
+    mock_response.text = '["products", "orders"]'
+    mock_response.object = None  # Explicitly set to None
+    prompt_request_flow.return_value = mock_response
+
+    # Create request
+    request = StructuredDataDiagnosisRequest(
+        operation="schema-selection",
+        sample="product_id,name,price,quantity\nPROD001,Widget,19.99,5"
+    )
+
+    # Execute operation
+    response = await service.schema_selection_operation(request, flow)
+
+    # Verify response
+    assert response.error is None
+    assert response.operation == "schema-selection"
+    assert response.schema_matches == ["products", "orders"]
+
+    # Verify prompt service was called correctly
+    prompt_request_flow.assert_called_once()
+    call_args = prompt_request_flow.call_args[0][0]
+    assert call_args.id == "schema-selection"
+
+    # Check that all schemas were passed to prompt
+    terms = call_args.terms
+    schemas_data = json.loads(terms["schemas"])
+    assert len(schemas_data) == 3  # All 3 schemas
+    assert any(s["name"] == "products" for s in schemas_data)
+    assert any(s["name"] == "customers" for s in schemas_data)
+    assert any(s["name"] == "orders" for s in schemas_data)
+
+
+@pytest.mark.asyncio
+async def test_schema_selection_empty_response(service, mock_flow):
+    """Test handling of empty prompt service response"""
+    flow, prompt_request_flow = mock_flow
+
+    # Mock empty response from prompt service
+    mock_response = MagicMock()
+    mock_response.error = None
+    mock_response.text = ""
+    mock_response.object = ""  # Both fields empty
+    prompt_request_flow.return_value = mock_response
+
+    # Create request
+    request = StructuredDataDiagnosisRequest(
+        operation="schema-selection",
+        sample="test data"
+    )
+
+    # Execute operation
+    response = await service.schema_selection_operation(request, flow)
+
+    # Verify error response
+    assert response.error is not None
+    assert response.error.type == "PromptServiceError"
+    assert "Empty response" in response.error.message
+    assert response.operation == "schema-selection"
+
+
+@pytest.mark.asyncio
+async def test_schema_selection_prompt_error(service, mock_flow):
+    """Test handling of prompt service error"""
+    flow, prompt_request_flow = mock_flow
+
+    # Mock error response from prompt service
+    mock_response = MagicMock()
+    mock_response.error = Error(
+        type="ServiceError",
+        message="Prompt service unavailable"
+    )
+    mock_response.text = None
+    prompt_request_flow.return_value = mock_response
+
+    # Create request
+    request = StructuredDataDiagnosisRequest(
+        operation="schema-selection",
+        sample="test data"
+    )
+
+    # Execute operation
+    response = await service.schema_selection_operation(request, flow)
+
+    # Verify error response
+    assert response.error is not None
+    assert response.error.type == "PromptServiceError"
+    assert "Failed to select schemas" in response.error.message
+    assert response.operation == "schema-selection"
+
+
+@pytest.mark.asyncio
+async def test_schema_selection_invalid_json(service, mock_flow):
+    """Test handling of invalid JSON response from prompt service"""
+    flow, prompt_request_flow = mock_flow
+
+    # Mock invalid JSON response
+    mock_response = MagicMock()
+    mock_response.error = None
+    mock_response.text = "not valid json"
+    mock_response.object = None
+    prompt_request_flow.return_value = mock_response
+
+    # Create request
+    request = StructuredDataDiagnosisRequest(
+        operation="schema-selection",
+        sample="test data"
+    )
+
+    # Execute operation
+    response = await service.schema_selection_operation(request, flow)
+
+    # Verify error response
+    assert response.error is not None
+    assert response.error.type == "ParseError"
+    assert "Failed to parse schema selection response" in response.error.message
+    assert response.operation == "schema-selection"
+
+
+@pytest.mark.asyncio
+async def test_schema_selection_non_array_response(service, mock_flow):
+    """Test handling of non-array JSON response from prompt service"""
+    flow, prompt_request_flow = mock_flow
+
+    # Mock non-array JSON response
+    mock_response = MagicMock()
+    mock_response.error = None
+    mock_response.text = '{"schema": "products"}'  # Object instead of array
+    mock_response.object = None
+    prompt_request_flow.return_value = mock_response
+
+    # Create request
+    request = StructuredDataDiagnosisRequest(
+        operation="schema-selection",
+        sample="test data"
+    )
+
+    # Execute operation
+    response = await service.schema_selection_operation(request, flow)
+
+    # Verify error response
+    assert response.error is not None
+    assert response.error.type == "ParseError"
+    assert "Failed to parse schema selection response" in response.error.message
+    assert response.operation == "schema-selection"
+
+
+@pytest.mark.asyncio
+async def test_schema_selection_with_options(service, mock_flow):
+    """Test schema selection with additional options"""
+    flow, prompt_request_flow = mock_flow
+
+    # Mock successful response
+    mock_response = MagicMock()
+    mock_response.error = None
+    mock_response.text = '["products"]'
+    mock_response.object = None
+    prompt_request_flow.return_value = mock_response
+
+    # Create request with options
+    request = StructuredDataDiagnosisRequest(
+        operation="schema-selection",
+        sample="test data",
+        options={"filter": "catalog", "confidence": "high"}
+    )
+
+    # Execute operation
+    response = await service.schema_selection_operation(request, flow)
+
+    # Verify response
+    assert response.error is None
+    assert response.schema_matches == ["products"]
+
+    # Verify options were passed to prompt
+    call_args = prompt_request_flow.call_args[0][0]
+    terms = call_args.terms
+    options = json.loads(terms["options"])
+    assert options["filter"] == "catalog"
+    assert options["confidence"] == "high"
+
+
+@pytest.mark.asyncio
+async def test_schema_selection_exception_handling(service, mock_flow):
+    """Test handling of unexpected exceptions"""
+    flow, prompt_request_flow = mock_flow
+
+    # Mock exception during prompt service call
+    prompt_request_flow.side_effect = Exception("Unexpected error")
+
+    # Create request
+    request = StructuredDataDiagnosisRequest(
+        operation="schema-selection",
+        sample="test data"
+    )
+
+    # Execute operation
+    response = await service.schema_selection_operation(request, flow)
+
+    # Verify error response
+    assert response.error is not None
+    assert response.error.type == "PromptServiceError"
+    assert "Failed to select schemas" in response.error.message
+    assert response.operation == "schema-selection"
+
+
+@pytest.mark.asyncio
+async def test_schema_selection_empty_schemas(service, mock_flow):
+    """Test schema selection with no schemas configured"""
+    flow, prompt_request_flow = mock_flow
+
+    # Clear schemas
+    service.schemas = {}
+
+    # Mock response (shouldn't be reached)
+    mock_response = MagicMock()
+    mock_response.error = None
+    mock_response.text = '[]'
+    mock_response.object = None
+    prompt_request_flow.return_value = mock_response
+
+    # Create request
+    request = StructuredDataDiagnosisRequest(
+        operation="schema-selection",
+        sample="test data"
+    )
+
+    # Execute operation
+    response = await service.schema_selection_operation(request, flow)
+
+    # Should still succeed but with empty schemas array passed to prompt
+    assert response.error is None
+    assert response.schema_matches == []
+
+    # Verify empty schemas array was passed
+    call_args = prompt_request_flow.call_args[0][0]
+    terms = call_args.terms
+    schemas_data = json.loads(terms["schemas"])
+    assert len(schemas_data) == 0
--- a/tests/unit/test_retrieval/test_structured_diag/test_type_detection.py
+++ b/tests/unit/test_retrieval/test_structured_diag/test_type_detection.py
@ -0,0 +1,179 @@
+"""
+Unit tests for simplified type detection in structured-diag service
+"""
+
+import pytest
+from trustgraph.retrieval.structured_diag.type_detector import detect_data_type
+
+
+class TestSimplifiedTypeDetection:
+    """Test the simplified type detection logic"""
+
+    def test_xml_detection_with_declaration(self):
+        """Test XML detection with XML declaration"""
+        sample = '<?xml version="1.0"?><root><item>data</item></root>'
+        data_type, confidence = detect_data_type(sample)
+        assert data_type == "xml"
+        assert confidence == 0.9
+
+    def test_xml_detection_without_declaration(self):
+        """Test XML detection without declaration but with closing tags"""
+        sample = '<root><item>data</item></root>'
+        data_type, confidence = detect_data_type(sample)
+        assert data_type == "xml"
+        assert confidence == 0.9
+
+    def test_xml_detection_truncated(self):
+        """Test XML detection with truncated XML (common with 500-byte samples)"""
+        sample = '''<?xml version="1.0" encoding="UTF-8"?>
+<pieDataset>
+  <pies>
+    <pie id="1">
+      <pieType>Steak &amp; Kidney</pieType>
+      <region>Yorkshire</region>
+      <diameterCm>12.5</diameterCm>
+      <heightCm>4.2'''  # Truncated mid-element
+        data_type, confidence = detect_data_type(sample)
+        assert data_type == "xml"
+        assert confidence == 0.9
+
+    def test_json_object_detection(self):
+        """Test JSON object detection"""
+        sample = '{"name": "John", "age": 30, "city": "New York"}'
+        data_type, confidence = detect_data_type(sample)
+        assert data_type == "json"
+        assert confidence == 0.9
+
+    def test_json_array_detection(self):
+        """Test JSON array detection"""
+        sample = '[{"id": 1}, {"id": 2}, {"id": 3}]'
+        data_type, confidence = detect_data_type(sample)
+        assert data_type == "json"
+        assert confidence == 0.9
+
+    def test_json_truncated(self):
+        """Test JSON detection with truncated JSON"""
+        sample = '{"products": [{"id": 1, "name": "Widget", "price": 19.99}, {"id": 2, "na'
+        data_type, confidence = detect_data_type(sample)
+        assert data_type == "json"
+        assert confidence == 0.9
+
+    def test_csv_detection(self):
+        """Test CSV detection as fallback"""
+        sample = '''name,age,city
+John,30,New York
+Jane,25,Boston
+Bob,35,Chicago'''
+        data_type, confidence = detect_data_type(sample)
+        assert data_type == "csv"
+        assert confidence == 0.8
+
+    def test_csv_detection_single_line(self):
+        """Test CSV detection with single line defaults to CSV"""
+        sample = 'column1,column2,column3'
+        data_type, confidence = detect_data_type(sample)
+        assert data_type == "csv"
+        assert confidence == 0.8
+
+    def test_empty_input(self):
+        """Test empty input handling"""
+        data_type, confidence = detect_data_type("")
+        assert data_type is None
+        assert confidence == 0.0
+
+    def test_whitespace_only(self):
+        """Test whitespace-only input"""
+        data_type, confidence = detect_data_type("   \n  \t  ")
+        assert data_type is None
+        assert confidence == 0.0
+
+    def test_html_not_xml(self):
+        """Test HTML is detected as XML (has closing tags)"""
+        sample = '<html><body><h1>Title</h1></body></html>'
+        data_type, confidence = detect_data_type(sample)
+        assert data_type == "xml"  # HTML is detected as XML
+        assert confidence == 0.9
+
+    def test_malformed_xml_still_detected(self):
+        """Test malformed XML is still detected as XML"""
+        sample = '<root><item>data</item><unclosed>'
+        data_type, confidence = detect_data_type(sample)
+        assert data_type == "xml"
+        assert confidence == 0.9
+
+    def test_json_with_whitespace(self):
+        """Test JSON detection with leading whitespace"""
+        sample = '   \n  {"key": "value"}'
+        data_type, confidence = detect_data_type(sample)
+        assert data_type == "json"
+        assert confidence == 0.9
+
+    def test_priority_xml_over_csv(self):
+        """Test XML takes priority over CSV when both patterns present"""
+        sample = '<?xml version="1.0"?>\n<data>a,b,c</data>'
+        data_type, confidence = detect_data_type(sample)
+        assert data_type == "xml"
+        assert confidence == 0.9
+
+    def test_priority_json_over_csv(self):
+        """Test JSON takes priority over CSV when both patterns present"""
+        sample = '{"data": "a,b,c"}'
+        data_type, confidence = detect_data_type(sample)
+        assert data_type == "json"
+        assert confidence == 0.9
+
+    def test_text_defaults_to_csv(self):
+        """Test plain text defaults to CSV"""
+        sample = 'This is just plain text without any structure'
+        data_type, confidence = detect_data_type(sample)
+        assert data_type == "csv"
+        assert confidence == 0.8
+
+
+class TestRealWorldSamples:
+    """Test with real-world data samples"""
+
+    def test_uk_pies_xml_sample(self):
+        """Test with actual UK pies XML sample (first 500 bytes)"""
+        sample = '''<?xml version="1.0" encoding="UTF-8"?>
+<pieDataset>
+  <pies>
+    <pie id="1">
+      <pieType>Steak &amp; Kidney</pieType>
+      <region>Yorkshire</region>
+      <diameterCm>12.5</diameterCm>
+      <heightCm>4.2</heightCm>
+      <weightGrams>285</weightGrams>
+      <crustType>Shortcrust</crustType>
+      <fillingCategory>Meat</fillingCategory>
+      <price>3.50</price>
+      <currency>GBP</currency>
+      <bakeryType>Traditional</bakeryType>
+    </pie>
+    <pie id="2">
+      <pieType>Chicken &amp; Mushroom</pieType>
+      <region>Lancashire</regio'''  # Cut at 500 chars
+        data_type, confidence = detect_data_type(sample[:500])
+        assert data_type == "xml"
+        assert confidence == 0.9
+
+    def test_product_json_sample(self):
+        """Test with product catalog JSON sample"""
+        sample = '''{"products": [
+  {"id": "PROD001", "name": "Widget", "price": 19.99, "category": "Tools"},
+  {"id": "PROD002", "name": "Gadget", "price": 29.99, "category": "Electronics"},
+  {"id": "PROD003", "name": "Doohickey", "price": 9.99, "category": "Accessories"}
+]}'''
+        data_type, confidence = detect_data_type(sample)
+        assert data_type == "json"
+        assert confidence == 0.9
+
+    def test_customer_csv_sample(self):
+        """Test with customer CSV sample"""
+        sample = '''customer_id,name,email,signup_date,total_orders
+CUST001,John Smith,john@example.com,2023-01-15,5
+CUST002,Jane Doe,jane@example.com,2023-02-20,3
+CUST003,Bob Johnson,bob@example.com,2023-03-10,7'''
+        data_type, confidence = detect_data_type(sample)
+        assert data_type == "csv"
+        assert confidence == 0.8
--- a/tests/unit/test_retrieval/test_structured_query.py
+++ b/tests/unit/test_retrieval/test_structured_query.py
@ -0,0 +1,588 @@
+"""
+Unit tests for Structured Query Service
+Following TEST_STRATEGY.md approach for service testing
+"""
+
+import pytest
+import json
+from unittest.mock import AsyncMock, MagicMock, patch
+
+from trustgraph.schema import (
+    StructuredQueryRequest, StructuredQueryResponse,
+    QuestionToStructuredQueryRequest, QuestionToStructuredQueryResponse,
+    ObjectsQueryRequest, ObjectsQueryResponse,
+    Error, GraphQLError
+)
+from trustgraph.retrieval.structured_query.service import Processor
+
+
+@pytest.fixture
+def mock_pulsar_client():
+    """Mock Pulsar client"""
+    return AsyncMock()
+
+
+@pytest.fixture
+def processor(mock_pulsar_client):
+    """Create processor with mocked dependencies"""
+    proc = Processor(
+        taskgroup=MagicMock(),
+        pulsar_client=mock_pulsar_client
+    )
+    
+    # Mock the client method
+    proc.client = MagicMock()
+    
+    return proc
+
+
+@pytest.mark.asyncio
+class TestStructuredQueryProcessor:
+    """Test Structured Query service processor"""
+
+    async def test_successful_end_to_end_query(self, processor):
+        """Test successful end-to-end query processing"""
+        # Arrange
+        request = StructuredQueryRequest(
+            question="Show me all customers from New York",
+            user="trustgraph", 
+            collection="default"
+        )
+        
+        msg = MagicMock()
+        msg.value.return_value = request
+        msg.properties.return_value = {"id": "test-123"}
+        
+        consumer = MagicMock()
+        flow = MagicMock()
+        flow_response = AsyncMock()
+        flow.return_value = flow_response
+        
+        # Mock NLP query service response
+        nlp_response = QuestionToStructuredQueryResponse(
+            error=None,
+            graphql_query='query { customers(where: {state: {eq: "NY"}}) { id name email } }',
+            variables={"state": "NY"},
+            detected_schemas=["customers"],
+            confidence=0.95
+        )
+        
+        # Mock objects query service response
+        objects_response = ObjectsQueryResponse(
+            error=None,
+            data='{"customers": [{"id": "1", "name": "John", "email": "john@example.com"}]}',
+            errors=None,
+            extensions={}
+        )
+        
+        # Set up mock clients
+        mock_nlp_client = AsyncMock()
+        mock_nlp_client.request.return_value = nlp_response
+        
+        mock_objects_client = AsyncMock()
+        mock_objects_client.request.return_value = objects_response
+        
+        # Mock flow context to route to appropriate services
+        def flow_router(service_name):
+            if service_name == "nlp-query-request":
+                return mock_nlp_client
+            elif service_name == "objects-query-request":
+                return mock_objects_client
+            elif service_name == "response":
+                return flow_response
+            else:
+                return AsyncMock()
+        flow.side_effect = flow_router
+        
+        # Act
+        await processor.on_message(msg, consumer, flow)
+        
+        # Assert
+        # Verify NLP query service was called correctly
+        mock_nlp_client.request.assert_called_once()
+        nlp_call_args = mock_nlp_client.request.call_args[0][0]
+        assert isinstance(nlp_call_args, QuestionToStructuredQueryRequest)
+        assert nlp_call_args.question == "Show me all customers from New York"
+        assert nlp_call_args.max_results == 100
+        
+        # Verify objects query service was called correctly
+        mock_objects_client.request.assert_called_once()
+        objects_call_args = mock_objects_client.request.call_args[0][0]
+        assert isinstance(objects_call_args, ObjectsQueryRequest)
+        assert objects_call_args.query == 'query { customers(where: {state: {eq: "NY"}}) { id name email } }'
+        assert objects_call_args.variables == {"state": "NY"}
+        assert objects_call_args.user == "trustgraph"
+        assert objects_call_args.collection == "default"
+        
+        # Verify response
+        flow_response.send.assert_called_once()
+        response_call = flow_response.send.call_args
+        response = response_call[0][0]
+        
+        assert isinstance(response, StructuredQueryResponse)
+        assert response.error is None
+        assert response.data == '{"customers": [{"id": "1", "name": "John", "email": "john@example.com"}]}'
+        assert len(response.errors) == 0
+
+    async def test_nlp_query_service_error(self, processor):
+        """Test handling of NLP query service errors"""
+        # Arrange
+        request = StructuredQueryRequest(
+            question="Invalid query"
+        )
+        
+        msg = MagicMock()
+        msg.value.return_value = request
+        msg.properties.return_value = {"id": "test-error"}
+        
+        consumer = MagicMock()
+        flow = MagicMock()
+        flow_response = AsyncMock()
+        flow.return_value = flow_response
+        
+        # Mock NLP query service error response
+        nlp_response = QuestionToStructuredQueryResponse(
+            error=Error(type="nlp-query-error", message="Failed to parse question"),
+            graphql_query="",
+            variables={},
+            detected_schemas=[],
+            confidence=0.0
+        )
+        
+        mock_nlp_client = AsyncMock()
+        mock_nlp_client.request.return_value = nlp_response
+        
+        # Mock flow context to route to nlp service
+        def flow_router(service_name):
+            if service_name == "nlp-query-request":
+                return mock_nlp_client
+            elif service_name == "response":
+                return flow_response
+            else:
+                return AsyncMock()
+        flow.side_effect = flow_router
+        
+        # Act
+        await processor.on_message(msg, consumer, flow)
+        
+        # Assert
+        flow_response.send.assert_called_once()
+        response_call = flow_response.send.call_args
+        response = response_call[0][0]
+        
+        assert isinstance(response, StructuredQueryResponse)
+        assert response.error is not None
+        assert response.error.type == "structured-query-error"
+        assert "NLP query service error" in response.error.message
+
+    async def test_empty_graphql_query_error(self, processor):
+        """Test handling of empty GraphQL query from NLP service"""
+        # Arrange
+        request = StructuredQueryRequest(
+            question="Ambiguous question"
+        )
+        
+        msg = MagicMock()
+        msg.value.return_value = request
+        msg.properties.return_value = {"id": "test-empty"}
+        
+        consumer = MagicMock()
+        flow = MagicMock()
+        flow_response = AsyncMock()
+        flow.return_value = flow_response
+        
+        # Mock NLP query service response with empty query
+        nlp_response = QuestionToStructuredQueryResponse(
+            error=None,
+            graphql_query="",  # Empty query
+            variables={},
+            detected_schemas=[],
+            confidence=0.1
+        )
+        
+        mock_nlp_client = AsyncMock()
+        mock_nlp_client.request.return_value = nlp_response
+        
+        # Mock flow context to route to nlp service
+        def flow_router(service_name):
+            if service_name == "nlp-query-request":
+                return mock_nlp_client
+            elif service_name == "response":
+                return flow_response
+            else:
+                return AsyncMock()
+        flow.side_effect = flow_router
+        
+        # Act
+        await processor.on_message(msg, consumer, flow)
+        
+        # Assert
+        flow_response.send.assert_called_once()
+        response_call = flow_response.send.call_args
+        response = response_call[0][0]
+        
+        assert response.error is not None
+        assert "empty GraphQL query" in response.error.message
+
+    async def test_objects_query_service_error(self, processor):
+        """Test handling of objects query service errors"""
+        # Arrange
+        request = StructuredQueryRequest(
+            question="Show me customers"
+        )
+        
+        msg = MagicMock()
+        msg.value.return_value = request
+        msg.properties.return_value = {"id": "test-objects-error"}
+        
+        consumer = MagicMock()
+        flow = MagicMock()
+        flow_response = AsyncMock()
+        flow.return_value = flow_response
+        
+        # Mock successful NLP response
+        nlp_response = QuestionToStructuredQueryResponse(
+            error=None,
+            graphql_query='query { customers { id name } }',
+            variables={},
+            detected_schemas=["customers"],
+            confidence=0.9
+        )
+        
+        # Mock objects query service error
+        objects_response = ObjectsQueryResponse(
+            error=Error(type="graphql-execution-error", message="Table 'customers' not found"),
+            data=None,
+            errors=None,
+            extensions={}
+        )
+        
+        mock_nlp_client = AsyncMock()
+        mock_nlp_client.request.return_value = nlp_response
+        
+        mock_objects_client = AsyncMock()
+        mock_objects_client.request.return_value = objects_response
+        
+        # Mock flow context to route to appropriate services
+        def flow_router(service_name):
+            if service_name == "nlp-query-request":
+                return mock_nlp_client
+            elif service_name == "objects-query-request":
+                return mock_objects_client
+            elif service_name == "response":
+                return flow_response
+            else:
+                return AsyncMock()
+        flow.side_effect = flow_router
+        
+        # Act
+        await processor.on_message(msg, consumer, flow)
+        
+        # Assert
+        flow_response.send.assert_called_once()
+        response_call = flow_response.send.call_args
+        response = response_call[0][0]
+        
+        assert response.error is not None
+        assert "Objects query service error" in response.error.message
+        assert "Table 'customers' not found" in response.error.message
+
+    async def test_graphql_errors_handling(self, processor):
+        """Test handling of GraphQL validation/execution errors"""
+        # Arrange
+        request = StructuredQueryRequest(
+            question="Show invalid field"
+        )
+        
+        msg = MagicMock()
+        msg.value.return_value = request
+        msg.properties.return_value = {"id": "test-graphql-errors"}
+        
+        consumer = MagicMock()
+        flow = MagicMock()
+        flow_response = AsyncMock()
+        flow.return_value = flow_response
+        
+        # Mock successful NLP response
+        nlp_response = QuestionToStructuredQueryResponse(
+            error=None,
+            graphql_query='query { customers { invalid_field } }',
+            variables={},
+            detected_schemas=["customers"],
+            confidence=0.8
+        )
+        
+        # Mock objects response with GraphQL errors
+        graphql_errors = [
+            GraphQLError(
+                message="Cannot query field 'invalid_field' on type 'Customer'",
+                path=["customers", "0", "invalid_field"],  # All path elements must be strings
+                extensions={}
+            )
+        ]
+        
+        objects_response = ObjectsQueryResponse(
+            error=None,
+            data=None,
+            errors=graphql_errors,
+            extensions={}
+        )
+        
+        mock_nlp_client = AsyncMock()
+        mock_nlp_client.request.return_value = nlp_response
+        
+        mock_objects_client = AsyncMock()
+        mock_objects_client.request.return_value = objects_response
+        
+        # Mock flow context to route to appropriate services
+        def flow_router(service_name):
+            if service_name == "nlp-query-request":
+                return mock_nlp_client
+            elif service_name == "objects-query-request":
+                return mock_objects_client
+            elif service_name == "response":
+                return flow_response
+            else:
+                return AsyncMock()
+        flow.side_effect = flow_router
+        
+        # Act
+        await processor.on_message(msg, consumer, flow)
+        
+        # Assert
+        flow_response.send.assert_called_once()
+        response_call = flow_response.send.call_args
+        response = response_call[0][0]
+        
+        assert response.error is None
+        assert len(response.errors) == 1
+        assert "Cannot query field 'invalid_field'" in response.errors[0]
+        assert "customers" in response.errors[0]
+
+    async def test_complex_query_with_variables(self, processor):
+        """Test processing complex queries with variables"""
+        # Arrange
+        request = StructuredQueryRequest(
+            question="Show customers with orders over $100 from last month"
+        )
+        
+        msg = MagicMock()
+        msg.value.return_value = request
+        msg.properties.return_value = {"id": "test-complex"}
+        
+        consumer = MagicMock()
+        flow = MagicMock()
+        flow_response = AsyncMock()
+        flow.return_value = flow_response
+        
+        # Mock NLP response with complex query and variables
+        nlp_response = QuestionToStructuredQueryResponse(
+            error=None,
+            graphql_query='''
+            query GetCustomersWithLargeOrders($minTotal: Float!, $startDate: String!) {
+                customers {
+                    id
+                    name
+                    orders(where: {total: {gt: $minTotal}, date: {gte: $startDate}}) {
+                        id
+                        total
+                        date
+                    }
+                }
+            }
+            ''',
+            variables={
+                "minTotal": "100.0",  # Convert to string for Pulsar schema
+                "startDate": "2024-01-01"
+            },
+            detected_schemas=["customers", "orders"],
+            confidence=0.88
+        )
+        
+        # Mock objects response
+        objects_response = ObjectsQueryResponse(
+            error=None,
+            data='{"customers": [{"id": "1", "name": "Alice", "orders": [{"id": "100", "total": 150.0}]}]}',
+            errors=None
+        )
+        
+        mock_nlp_client = AsyncMock()
+        mock_nlp_client.request.return_value = nlp_response
+        
+        mock_objects_client = AsyncMock()
+        mock_objects_client.request.return_value = objects_response
+        
+        # Mock flow context to route to appropriate services
+        def flow_router(service_name):
+            if service_name == "nlp-query-request":
+                return mock_nlp_client
+            elif service_name == "objects-query-request":
+                return mock_objects_client
+            elif service_name == "response":
+                return flow_response
+            else:
+                return AsyncMock()
+        flow.side_effect = flow_router
+        
+        # Act
+        await processor.on_message(msg, consumer, flow)
+        
+        # Assert
+        # Verify variables were passed correctly (converted to strings)
+        objects_call_args = mock_objects_client.request.call_args[0][0]
+        assert objects_call_args.variables["minTotal"] == "100.0"  # Should be converted to string
+        assert objects_call_args.variables["startDate"] == "2024-01-01"
+        
+        # Verify response
+        response_call = flow_response.send.call_args
+        response = response_call[0][0]
+        assert response.error is None
+        assert "Alice" in response.data
+
+    async def test_null_data_handling(self, processor):
+        """Test handling of null/empty data responses"""
+        # Arrange
+        request = StructuredQueryRequest(
+            question="Show nonexistent data"
+        )
+        
+        msg = MagicMock()
+        msg.value.return_value = request
+        msg.properties.return_value = {"id": "test-null"}
+        
+        consumer = MagicMock()
+        flow = MagicMock()
+        flow_response = AsyncMock()
+        flow.return_value = flow_response
+        
+        # Mock responses
+        nlp_response = QuestionToStructuredQueryResponse(
+            error=None,
+            graphql_query='query { customers { id } }',
+            variables={},
+            detected_schemas=["customers"],
+            confidence=0.9
+        )
+        
+        objects_response = ObjectsQueryResponse(
+            error=None,
+            data=None,  # Null data
+            errors=None,
+            extensions={}
+        )
+        
+        mock_nlp_client = AsyncMock()
+        mock_nlp_client.request.return_value = nlp_response
+        
+        mock_objects_client = AsyncMock()
+        mock_objects_client.request.return_value = objects_response
+        
+        # Mock flow context to route to appropriate services
+        def flow_router(service_name):
+            if service_name == "nlp-query-request":
+                return mock_nlp_client
+            elif service_name == "objects-query-request":
+                return mock_objects_client
+            elif service_name == "response":
+                return flow_response
+            else:
+                return AsyncMock()
+        flow.side_effect = flow_router
+        
+        # Act
+        await processor.on_message(msg, consumer, flow)
+        
+        # Assert
+        response_call = flow_response.send.call_args
+        response = response_call[0][0]
+        
+        assert response.error is None
+        assert response.data == "null"  # Should convert None to "null" string
+
+    async def test_exception_handling(self, processor):
+        """Test general exception handling"""
+        # Arrange
+        request = StructuredQueryRequest(
+            question="Test exception"
+        )
+        
+        msg = MagicMock()
+        msg.value.return_value = request
+        msg.properties.return_value = {"id": "test-exception"}
+        
+        consumer = MagicMock()
+        flow = MagicMock()
+        flow_response = AsyncMock()
+        flow.return_value = flow_response
+        
+        # Mock flow context to raise exception
+        mock_client = AsyncMock()
+        mock_client.request.side_effect = Exception("Network timeout")
+        
+        def flow_router(service_name):
+            if service_name == "nlp-query-request":
+                return mock_client
+            elif service_name == "response":
+                return flow_response
+            else:
+                return AsyncMock()
+        flow.side_effect = flow_router
+        
+        # Act
+        await processor.on_message(msg, consumer, flow)
+        
+        # Assert
+        flow_response.send.assert_called_once()
+        response_call = flow_response.send.call_args
+        response = response_call[0][0]
+        
+        assert response.error is not None
+        assert response.error.type == "structured-query-error"
+        assert "Network timeout" in response.error.message
+        assert response.data == "null"
+        assert len(response.errors) == 0
+
+    def test_processor_initialization(self, mock_pulsar_client):
+        """Test processor initialization with correct specifications"""
+        # Act
+        processor = Processor(
+            taskgroup=MagicMock(),
+            pulsar_client=mock_pulsar_client
+        )
+        
+        # Assert - Test default ID
+        assert processor.id == "structured-query"
+        
+        # Verify specifications were registered (we can't directly access them,
+        # but we know they were registered if initialization succeeded)
+        assert processor is not None
+
+    def test_add_args(self):
+        """Test command-line argument parsing"""
+        import argparse
+        
+        parser = argparse.ArgumentParser()
+        Processor.add_args(parser)
+        
+        # Test that it doesn't crash (no additional args)
+        args = parser.parse_args([])
+        # No specific assertions since no custom args are added
+        assert args is not None
+
+
+@pytest.mark.unit
+class TestStructuredQueryHelperFunctions:
+    """Test helper functions and data transformations"""
+    
+    def test_service_logging_integration(self):
+        """Test that logging is properly configured"""
+        # Import the logger
+        from trustgraph.retrieval.structured_query.service import logger
+        
+        assert logger.name == "trustgraph.retrieval.structured_query.service"
+        
+    def test_default_values(self):
+        """Test default configuration values"""
+        from trustgraph.retrieval.structured_query.service import default_ident
+        
+        assert default_ident == "structured-query"