diff --git a/tests/unit/test_retrieval/test_structured_diag/__init__.py b/tests/unit/test_retrieval/test_structured_diag/__init__.py
new file mode 100644
index 00000000..a900cbbb
--- /dev/null
+++ b/tests/unit/test_retrieval/test_structured_diag/__init__.py
@@ -0,0 +1,3 @@
+"""
+Unit and contract tests for structured-diag service
+"""
\ No newline at end of file
diff --git a/tests/unit/test_retrieval/test_structured_diag/test_message_translation.py b/tests/unit/test_retrieval/test_structured_diag/test_message_translation.py
new file mode 100644
index 00000000..7a113250
--- /dev/null
+++ b/tests/unit/test_retrieval/test_structured_diag/test_message_translation.py
@@ -0,0 +1,172 @@
+"""
+Unit tests for message translation in structured-diag service
+"""
+
+import pytest
+from trustgraph.messaging.translators.diagnosis import (
+    StructuredDataDiagnosisRequestTranslator,
+    StructuredDataDiagnosisResponseTranslator
+)
+from trustgraph.schema.services.diagnosis import (
+    StructuredDataDiagnosisRequest,
+    StructuredDataDiagnosisResponse
+)
+
+
+class TestRequestTranslation:
+    """Test request message translation"""
+
+    def test_translate_schema_selection_request(self):
+        """Test translating schema-selection request from API to Pulsar"""
+        translator = StructuredDataDiagnosisRequestTranslator()
+
+        # API format (with hyphens)
+        api_data = {
+            "operation": "schema-selection",
+            "sample": "test data sample",
+            "options": {"filter": "catalog"}
+        }
+
+        # Translate to Pulsar
+        pulsar_msg = translator.to_pulsar(api_data)
+
+        assert pulsar_msg.operation == "schema-selection"
+        assert pulsar_msg.sample == "test data sample"
+        assert pulsar_msg.options == {"filter": "catalog"}
+
+    def test_translate_request_with_all_fields(self):
+        """Test translating request with all fields"""
+        translator = StructuredDataDiagnosisRequestTranslator()
+
+        api_data = {
+            "operation": "generate-descriptor",
+            "sample": "csv data",
+            "type": "csv",
+            "schema-name": "products",
+            "options": {"delimiter": ","}
+        }
+
+        pulsar_msg = translator.to_pulsar(api_data)
+
+        assert pulsar_msg.operation == "generate-descriptor"
+        assert pulsar_msg.sample == "csv data"
+        assert pulsar_msg.type == "csv"
+        assert pulsar_msg.schema_name == "products"
+        assert pulsar_msg.options == {"delimiter": ","}
+
+
+class TestResponseTranslation:
+    """Test response message translation"""
+
+    def test_translate_schema_selection_response(self):
+        """Test translating schema-selection response from Pulsar to API"""
+        translator = StructuredDataDiagnosisResponseTranslator()
+
+        # Create Pulsar response with schema_matches
+        pulsar_response = StructuredDataDiagnosisResponse(
+            operation="schema-selection",
+            schema_matches=["products", "inventory", "catalog"],
+            error=None
+        )
+
+        # Translate to API format
+        api_data = translator.from_pulsar(pulsar_response)
+
+        assert api_data["operation"] == "schema-selection"
+        assert api_data["schema-matches"] == ["products", "inventory", "catalog"]
+        assert "error" not in api_data  # None errors shouldn't be included
+
+    def test_translate_empty_schema_matches(self):
+        """Test translating response with empty schema_matches"""
+        translator = StructuredDataDiagnosisResponseTranslator()
+
+        pulsar_response = StructuredDataDiagnosisResponse(
+            operation="schema-selection",
+            schema_matches=[],
+            error=None
+        )
+
+        api_data = translator.from_pulsar(pulsar_response)
+
+        assert api_data["operation"] == "schema-selection"
+        assert api_data["schema-matches"] == []
+
+    def test_translate_response_without_schema_matches(self):
+        """Test translating response without schema_matches field"""
+        translator = StructuredDataDiagnosisResponseTranslator()
+
+        # Old-style response without schema_matches
+        pulsar_response = StructuredDataDiagnosisResponse(
+            operation="detect-type",
+            detected_type="xml",
+            confidence=0.9,
+            error=None
+        )
+
+        api_data = translator.from_pulsar(pulsar_response)
+
+        assert api_data["operation"] == "detect-type"
+        assert api_data["detected-type"] == "xml"
+        assert api_data["confidence"] == 0.9
+        assert "schema-matches" not in api_data  # None values shouldn't be included
+
+    def test_translate_response_with_error(self):
+        """Test translating response with error"""
+        translator = StructuredDataDiagnosisResponseTranslator()
+        from trustgraph.schema.core.primitives import Error
+
+        pulsar_response = StructuredDataDiagnosisResponse(
+            operation="schema-selection",
+            error=Error(
+                type="PromptServiceError",
+                message="Service unavailable"
+            )
+        )
+
+        api_data = translator.from_pulsar(pulsar_response)
+
+        assert api_data["operation"] == "schema-selection"
+        # Error objects are typically handled separately by the gateway
+        # but the translator shouldn't break on them
+
+    def test_translate_all_response_fields(self):
+        """Test translating response with all possible fields"""
+        translator = StructuredDataDiagnosisResponseTranslator()
+        import json
+
+        descriptor_data = {"mapping": {"field1": "column1"}}
+
+        pulsar_response = StructuredDataDiagnosisResponse(
+            operation="diagnose",
+            detected_type="csv",
+            confidence=0.95,
+            descriptor=json.dumps(descriptor_data),
+            metadata={"field_count": "5"},
+            schema_matches=["schema1", "schema2"],
+            error=None
+        )
+
+        api_data = translator.from_pulsar(pulsar_response)
+
+        assert api_data["operation"] == "diagnose"
+        assert api_data["detected-type"] == "csv"
+        assert api_data["confidence"] == 0.95
+        assert api_data["descriptor"] == descriptor_data  # Should be parsed from JSON
+        assert api_data["metadata"] == {"field_count": "5"}
+        assert api_data["schema-matches"] == ["schema1", "schema2"]
+
+    def test_response_completion_flag(self):
+        """Test that response includes completion flag"""
+        translator = StructuredDataDiagnosisResponseTranslator()
+
+        pulsar_response = StructuredDataDiagnosisResponse(
+            operation="schema-selection",
+            schema_matches=["products"],
+            error=None
+        )
+
+        api_data, is_final = translator.from_response_with_completion(pulsar_response)
+
+        assert is_final is True  # Structured-diag responses are always final
+        assert api_data["operation"] == "schema-selection"
+        assert api_data["schema-matches"] == ["products"]
\ No newline at end of file
diff --git a/tests/unit/test_retrieval/test_structured_diag/test_schema_contracts.py b/tests/unit/test_retrieval/test_structured_diag/test_schema_contracts.py
new file mode 100644
index 00000000..99f66dc7
--- /dev/null
+++ b/tests/unit/test_retrieval/test_structured_diag/test_schema_contracts.py
@@ -0,0 +1,258 @@
+"""
+Contract tests for structured-diag service schemas
+"""
+
+import pytest
+import json
+from pulsar.schema import JsonSchema
+from trustgraph.schema.services.diagnosis import (
+    StructuredDataDiagnosisRequest,
+    StructuredDataDiagnosisResponse
+)
+
+
+class TestStructuredDiagnosisSchemaContract:
+    """Contract tests for structured diagnosis message schemas"""
+
+    def test_request_schema_basic_fields(self):
+        """Test basic request schema fields"""
+        request = StructuredDataDiagnosisRequest(
+            operation="detect-type",
+            sample="test data"
+        )
+
+        assert request.operation == "detect-type"
+        assert request.sample == "test data"
+        assert request.type is None  # Optional, defaults to None
+        assert request.schema_name is None  # Optional, defaults to None
+        assert request.options is None  # Optional, defaults to None
+
+    def test_request_schema_all_operations(self):
+        """Test request schema supports all operations"""
+        operations = ["detect-type", "generate-descriptor", "diagnose", "schema-selection"]
+
+        for op in operations:
+            request = StructuredDataDiagnosisRequest(
+                operation=op,
+                sample="test data"
+            )
+            assert request.operation == op
+
+    def test_request_schema_with_options(self):
+        """Test request schema with options"""
+        options = {"delimiter": ",", "has_header": "true"}
+        request = StructuredDataDiagnosisRequest(
+            operation="generate-descriptor",
+            sample="test data",
+            type="csv",
+            schema_name="products",
+            options=options
+        )
+
+        assert request.options == options
+        assert request.type == "csv"
+        assert request.schema_name == "products"
+
+    def test_response_schema_basic_fields(self):
+        """Test basic response schema fields"""
+        response = StructuredDataDiagnosisResponse(
+            operation="detect-type",
+            detected_type="xml",
+            confidence=0.9,
+            error=None  # Explicitly set to None
+        )
+
+        assert response.operation == "detect-type"
+        assert response.detected_type == "xml"
+        assert response.confidence == 0.9
+        assert response.error is None
+        assert response.descriptor is None
+        assert response.metadata is None
+        assert response.schema_matches is None  # New field, defaults to None
+
+    def test_response_schema_with_error(self):
+        """Test response schema with error"""
+        from trustgraph.schema.core.primitives import Error
+
+        error = Error(
+            type="ServiceError",
+            message="Service unavailable"
+        )
+        response = StructuredDataDiagnosisResponse(
+            operation="schema-selection",
+            error=error
+        )
+
+        assert response.error == error
+        assert response.error.type == "ServiceError"
+        assert response.error.message == "Service unavailable"
+
+    def test_response_schema_with_schema_matches(self):
+        """Test response schema with schema_matches array"""
+        matches = ["products", "inventory", "catalog"]
+        response = StructuredDataDiagnosisResponse(
+            operation="schema-selection",
+            schema_matches=matches
+        )
+
+        assert response.operation == "schema-selection"
+        assert response.schema_matches == matches
+        assert len(response.schema_matches) == 3
+
+    def test_response_schema_empty_schema_matches(self):
+        """Test response schema with empty schema_matches array"""
+        response = StructuredDataDiagnosisResponse(
+            operation="schema-selection",
+            schema_matches=[]
+        )
+
+        assert response.schema_matches == []
+        assert isinstance(response.schema_matches, list)
+
+    def test_response_schema_with_descriptor(self):
+        """Test response schema with descriptor"""
+        descriptor = {
+            "mapping": {
+                "field1": "column1",
+                "field2": "column2"
+            }
+        }
+        response = StructuredDataDiagnosisResponse(
+            operation="generate-descriptor",
+            descriptor=json.dumps(descriptor)
+        )
+
+        assert response.descriptor == json.dumps(descriptor)
+        parsed = json.loads(response.descriptor)
+        assert parsed["mapping"]["field1"] == "column1"
+
+    def test_response_schema_with_metadata(self):
+        """Test response schema with metadata"""
+        metadata = {
+            "csv_options": json.dumps({"delimiter": ","}),
+            "field_count": "5"
+        }
+        response = StructuredDataDiagnosisResponse(
+            operation="diagnose",
+            metadata=metadata
+        )
+
+        assert response.metadata == metadata
+        assert response.metadata["field_count"] == "5"
+
+    def test_schema_serialization(self):
+        """Test that schemas can be serialized and deserialized correctly"""
+        # Test request serialization
+        request = StructuredDataDiagnosisRequest(
+            operation="schema-selection",
+            sample="test data",
+            options={"key": "value"}
+        )
+
+        # Simulate Pulsar JsonSchema serialization
+        schema = JsonSchema(StructuredDataDiagnosisRequest)
+        serialized = schema.encode(request)
+        deserialized = schema.decode(serialized)
+
+        assert deserialized.operation == request.operation
+        assert deserialized.sample == request.sample
+        assert deserialized.options == request.options
+
+    def test_response_serialization_with_schema_matches(self):
+        """Test response serialization with schema_matches array"""
+        response = StructuredDataDiagnosisResponse(
+            operation="schema-selection",
+            schema_matches=["schema1", "schema2"],
+            confidence=0.85
+        )
+
+        # Simulate Pulsar JsonSchema serialization
+        schema = JsonSchema(StructuredDataDiagnosisResponse)
+        serialized = schema.encode(response)
+        deserialized = schema.decode(serialized)
+
+        assert deserialized.operation == response.operation
+        assert deserialized.schema_matches == response.schema_matches
+        assert deserialized.confidence == response.confidence
+
+    def test_backwards_compatibility(self):
+        """Test that old clients can still use the service without schema_matches"""
+        # Old response without schema_matches should still work
+        response = StructuredDataDiagnosisResponse(
+            operation="detect-type",
+            detected_type="json",
+            confidence=0.95
+        )
+
+        # Verify default value for new field
+        assert response.schema_matches is None  # Defaults to None when not set
+
+        # Verify old fields still work
+        assert response.detected_type == "json"
+        assert response.confidence == 0.95
+
+    def test_schema_selection_operation_contract(self):
+        """Test complete contract for schema-selection operation"""
+        # Request
+        request = StructuredDataDiagnosisRequest(
+            operation="schema-selection",
+            sample="product_id,name,price\n1,Widget,9.99"
+        )
+
+        assert request.operation == "schema-selection"
+        assert request.sample != ""
+
+        # Response with matches
+        response = StructuredDataDiagnosisResponse(
+            operation="schema-selection",
+            schema_matches=["products", "inventory"]
+        )
+
+        assert response.operation == "schema-selection"
+        assert isinstance(response.schema_matches, list)
+        assert len(response.schema_matches) == 2
+        assert all(isinstance(s, str) for s in response.schema_matches)
+
+        # Response with error
+        from trustgraph.schema.core.primitives import Error
+        error_response = StructuredDataDiagnosisResponse(
+            operation="schema-selection",
+            error=Error(type="PromptServiceError", message="Service unavailable")
+        )
+
+        assert error_response.error is not None
+        assert error_response.schema_matches is None  # Default None when not set
+
+    def test_all_operations_supported(self):
+        """Verify all operations are properly supported in the contract"""
+        supported_operations = {
+            "detect-type": {
+                "required_request": ["sample"],
+                "expected_response": ["detected_type", "confidence"]
+            },
+            "generate-descriptor": {
+                "required_request": ["sample", "type", "schema_name"],
+                "expected_response": ["descriptor"]
+            },
+            "diagnose": {
+                "required_request": ["sample"],
+                "expected_response": ["detected_type", "confidence", "descriptor"]
+            },
+            "schema-selection": {
+                "required_request": ["sample"],
+                "expected_response": ["schema_matches"]
+            }
+        }
+
+        for operation, contract in supported_operations.items():
+            # Test request creation
+            request_data = {"operation": operation}
+            for field in contract["required_request"]:
+                request_data[field] = "test_value"
+
+            request = StructuredDataDiagnosisRequest(**request_data)
+            assert request.operation == operation
+
+            # Test response creation
+            response = StructuredDataDiagnosisResponse(operation=operation)
+            assert response.operation == operation
\ No newline at end of file
diff --git a/tests/unit/test_retrieval/test_structured_diag/test_schema_selection.py b/tests/unit/test_retrieval/test_structured_diag/test_schema_selection.py
new file mode 100644
index 00000000..8ce1b97e
--- /dev/null
+++ b/tests/unit/test_retrieval/test_structured_diag/test_schema_selection.py
@@ -0,0 +1,361 @@
+"""
+Unit tests for structured-diag service schema-selection operation
+"""
+
+import pytest
+import json
+from unittest.mock import AsyncMock, MagicMock, patch
+from trustgraph.retrieval.structured_diag.service import Processor
+from trustgraph.schema.services.diagnosis import StructuredDataDiagnosisRequest, StructuredDataDiagnosisResponse
+from trustgraph.schema import RowSchema, Field as SchemaField, Error
+
+
+@pytest.fixture
+def mock_schemas():
+    """Create mock schemas for testing"""
+    schemas = {
+        "products": RowSchema(
+            name="products",
+            description="Product catalog schema",
+            fields=[
+                SchemaField(
+                    name="product_id",
+                    type="string",
+                    description="Product identifier",
+                    required=True,
+                    primary=True,
+                    indexed=True
+                ),
+                SchemaField(
+                    name="name",
+                    type="string",
+                    description="Product name",
+                    required=True
+                ),
+                SchemaField(
+                    name="price",
+                    type="number",
+                    description="Product price",
+                    required=True
+                )
+            ]
+        ),
+        "customers": RowSchema(
+            name="customers",
+            description="Customer database schema",
+            fields=[
+                SchemaField(
+                    name="customer_id",
+                    type="string",
+                    description="Customer identifier",
+                    required=True,
+                    primary=True
+                ),
+                SchemaField(
+                    name="name",
+                    type="string",
+                    description="Customer name",
+                    required=True
+                ),
+                SchemaField(
+                    name="email",
+                    type="string",
+                    description="Customer email",
+                    required=True
+                )
+            ]
+        ),
+        "orders": RowSchema(
+            name="orders",
+            description="Order management schema",
+            fields=[
+                SchemaField(
+                    name="order_id",
+                    type="string",
+                    description="Order identifier",
+                    required=True,
+                    primary=True
+                ),
+                SchemaField(
+                    name="customer_id",
+                    type="string",
+                    description="Customer identifier",
+                    required=True
+                ),
+                SchemaField(
+                    name="total",
+                    type="number",
+                    description="Order total",
+                    required=True
+                )
+            ]
+        )
+    }
+    return schemas
+
+
+@pytest.fixture
+def service(mock_schemas):
+    """Create service instance with mock configuration"""
+    service = Processor(
+        taskgroup=MagicMock(),
+        id="test-processor"
+    )
+    service.schemas = mock_schemas
+    return service
+
+
+@pytest.fixture
+def mock_flow():
+    """Create mock flow with prompt service"""
+    flow = MagicMock()
+    prompt_request_flow = AsyncMock()
+    flow.return_value.request = prompt_request_flow
+    return flow, prompt_request_flow
+
+
+@pytest.mark.asyncio
+async def test_schema_selection_success(service, mock_flow):
+    """Test successful schema selection"""
+    flow, prompt_request_flow = mock_flow
+
+    # Mock prompt service response with matching schemas
+    mock_response = MagicMock()
+    mock_response.error = None
+    mock_response.text = '["products", "orders"]'
+    mock_response.object = None  # Explicitly set to None
+    prompt_request_flow.return_value = mock_response
+
+    # Create request
+    request = StructuredDataDiagnosisRequest(
+        operation="schema-selection",
+        sample="product_id,name,price,quantity\nPROD001,Widget,19.99,5"
+    )
+
+    # Execute operation
+    response = await service.schema_selection_operation(request, flow)
+
+    # Verify response
+    assert response.error is None
+    assert response.operation == "schema-selection"
+    assert response.schema_matches == ["products", "orders"]
+
+    # Verify prompt service was called correctly
+    prompt_request_flow.assert_called_once()
+    call_args = prompt_request_flow.call_args[0][0]
+    assert call_args.id == "schema-selection"
+
+    # Check that all schemas were passed to prompt
+    terms = call_args.terms
+    schemas_data = json.loads(terms["schemas"])
+    assert len(schemas_data) == 3  # All 3 schemas
+    assert any(s["name"] == "products" for s in schemas_data)
+    assert any(s["name"] == "customers" for s in schemas_data)
+    assert any(s["name"] == "orders" for s in schemas_data)
+
+
+@pytest.mark.asyncio
+async def test_schema_selection_empty_response(service, mock_flow):
+    """Test handling of empty prompt service response"""
+    flow, prompt_request_flow = mock_flow
+
+    # Mock empty response from prompt service
+    mock_response = MagicMock()
+    mock_response.error = None
+    mock_response.text = ""
+    mock_response.object = ""  # Both fields empty
+    prompt_request_flow.return_value = mock_response
+
+    # Create request
+    request = StructuredDataDiagnosisRequest(
+        operation="schema-selection",
+        sample="test data"
+    )
+
+    # Execute operation
+    response = await service.schema_selection_operation(request, flow)
+
+    # Verify error response
+    assert response.error is not None
+    assert response.error.type == "PromptServiceError"
+    assert "Empty response" in response.error.message
+    assert response.operation == "schema-selection"
+
+
+@pytest.mark.asyncio
+async def test_schema_selection_prompt_error(service, mock_flow):
+    """Test handling of prompt service error"""
+    flow, prompt_request_flow = mock_flow
+
+    # Mock error response from prompt service
+    mock_response = MagicMock()
+    mock_response.error = Error(
+        type="ServiceError",
+        message="Prompt service unavailable"
+    )
+    mock_response.text = None
+    prompt_request_flow.return_value = mock_response
+
+    # Create request
+    request = StructuredDataDiagnosisRequest(
+        operation="schema-selection",
+        sample="test data"
+    )
+
+    # Execute operation
+    response = await service.schema_selection_operation(request, flow)
+
+    # Verify error response
+    assert response.error is not None
+    assert response.error.type == "PromptServiceError"
+    assert "Failed to select schemas" in response.error.message
+    assert response.operation == "schema-selection"
+
+
+@pytest.mark.asyncio
+async def test_schema_selection_invalid_json(service, mock_flow):
+    """Test handling of invalid JSON response from prompt service"""
+    flow, prompt_request_flow = mock_flow
+
+    # Mock invalid JSON response
+    mock_response = MagicMock()
+    mock_response.error = None
+    mock_response.text = "not valid json"
+    mock_response.object = None
+    prompt_request_flow.return_value = mock_response
+
+    # Create request
+    request = StructuredDataDiagnosisRequest(
+        operation="schema-selection",
+        sample="test data"
+    )
+
+    # Execute operation
+    response = await service.schema_selection_operation(request, flow)
+
+    # Verify error response
+    assert response.error is not None
+    assert response.error.type == "ParseError"
+    assert "Failed to parse schema selection response" in response.error.message
+    assert response.operation == "schema-selection"
+
+
+@pytest.mark.asyncio
+async def test_schema_selection_non_array_response(service, mock_flow):
+    """Test handling of non-array JSON response from prompt service"""
+    flow, prompt_request_flow = mock_flow
+
+    # Mock non-array JSON response
+    mock_response = MagicMock()
+    mock_response.error = None
+    mock_response.text = '{"schema": "products"}'  # Object instead of array
+    mock_response.object = None
+    prompt_request_flow.return_value = mock_response
+
+    # Create request
+    request = StructuredDataDiagnosisRequest(
+        operation="schema-selection",
+        sample="test data"
+    )
+
+    # Execute operation
+    response = await service.schema_selection_operation(request, flow)
+
+    # Verify error response
+    assert response.error is not None
+    assert response.error.type == "ParseError"
+    assert "Failed to parse schema selection response" in response.error.message
+    assert response.operation == "schema-selection"
+
+
+@pytest.mark.asyncio
+async def test_schema_selection_with_options(service, mock_flow):
+    """Test schema selection with additional options"""
+    flow, prompt_request_flow = mock_flow
+
+    # Mock successful response
+    mock_response = MagicMock()
+    mock_response.error = None
+    mock_response.text = '["products"]'
+    mock_response.object = None
+    prompt_request_flow.return_value = mock_response
+
+    # Create request with options
+    request = StructuredDataDiagnosisRequest(
+        operation="schema-selection",
+        sample="test data",
+        options={"filter": "catalog", "confidence": "high"}
+    )
+
+    # Execute operation
+    response = await service.schema_selection_operation(request, flow)
+
+    # Verify response
+    assert response.error is None
+    assert response.schema_matches == ["products"]
+
+    # Verify options were passed to prompt
+    call_args = prompt_request_flow.call_args[0][0]
+    terms = call_args.terms
+    options = json.loads(terms["options"])
+    assert options["filter"] == "catalog"
+    assert options["confidence"] == "high"
+
+
+@pytest.mark.asyncio
+async def test_schema_selection_exception_handling(service, mock_flow):
+    """Test handling of unexpected exceptions"""
+    flow, prompt_request_flow = mock_flow
+
+    # Mock exception during prompt service call
+    prompt_request_flow.side_effect = Exception("Unexpected error")
+
+    # Create request
+    request = StructuredDataDiagnosisRequest(
+        operation="schema-selection",
+        sample="test data"
+    )
+
+    # Execute operation
+    response = await service.schema_selection_operation(request, flow)
+
+    # Verify error response
+    assert response.error is not None
+    assert response.error.type == "PromptServiceError"
+    assert "Failed to select schemas" in response.error.message
+    assert response.operation == "schema-selection"
+
+
+@pytest.mark.asyncio
+async def test_schema_selection_empty_schemas(service, mock_flow):
+    """Test schema selection with no schemas configured"""
+    flow, prompt_request_flow = mock_flow
+
+    # Clear schemas
+    service.schemas = {}
+
+    # Mock response (shouldn't be reached)
+    mock_response = MagicMock()
+    mock_response.error = None
+    mock_response.text = '[]'
+    mock_response.object = None
+    prompt_request_flow.return_value = mock_response
+
+    # Create request
+    request = StructuredDataDiagnosisRequest(
+        operation="schema-selection",
+        sample="test data"
+    )
+
+    # Execute operation
+    response = await service.schema_selection_operation(request, flow)
+
+    # Should still succeed but with empty schemas array passed to prompt
+    assert response.error is None
+    assert response.schema_matches == []
+
+    # Verify empty schemas array was passed
+    call_args = prompt_request_flow.call_args[0][0]
+    terms = call_args.terms
+    schemas_data = json.loads(terms["schemas"])
+    assert len(schemas_data) == 0
\ No newline at end of file
diff --git a/tests/unit/test_retrieval/test_structured_diag/test_type_detection.py b/tests/unit/test_retrieval/test_structured_diag/test_type_detection.py
new file mode 100644
index 00000000..60eae2ef
--- /dev/null
+++ b/tests/unit/test_retrieval/test_structured_diag/test_type_detection.py
@@ -0,0 +1,179 @@
+"""
+Unit tests for simplified type detection in structured-diag service
+"""
+
+import pytest
+from trustgraph.retrieval.structured_diag.type_detector import detect_data_type
+
+
+class TestSimplifiedTypeDetection:
+    """Test the simplified type detection logic"""
+
+    def test_xml_detection_with_declaration(self):
+        """Test XML detection with XML declaration"""
+        sample = '<?xml version="1.0"?><root><item>data</item></root>'
+        data_type, confidence = detect_data_type(sample)
+        assert data_type == "xml"
+        assert confidence == 0.9
+
+    def test_xml_detection_without_declaration(self):
+        """Test XML detection without declaration but with closing tags"""
+        sample = '<root><item>data</item></root>'
+        data_type, confidence = detect_data_type(sample)
+        assert data_type == "xml"
+        assert confidence == 0.9
+
+    def test_xml_detection_truncated(self):
+        """Test XML detection with truncated XML (common with 500-byte samples)"""
+        sample = '''<?xml version="1.0" encoding="UTF-8"?>
+<pieDataset>
+  <pies>
+    <pie id="1">
+      <pieType>Steak &amp; Kidney</pieType>
+      <region>Yorkshire</region>
+      <diameterCm>12.5</diameterCm>
+      <heightCm>4.2'''  # Truncated mid-element
+        data_type, confidence = detect_data_type(sample)
+        assert data_type == "xml"
+        assert confidence == 0.9
+
+    def test_json_object_detection(self):
+        """Test JSON object detection"""
+        sample = '{"name": "John", "age": 30, "city": "New York"}'
+        data_type, confidence = detect_data_type(sample)
+        assert data_type == "json"
+        assert confidence == 0.9
+
+    def test_json_array_detection(self):
+        """Test JSON array detection"""
+        sample = '[{"id": 1}, {"id": 2}, {"id": 3}]'
+        data_type, confidence = detect_data_type(sample)
+        assert data_type == "json"
+        assert confidence == 0.9
+
+    def test_json_truncated(self):
+        """Test JSON detection with truncated JSON"""
+        sample = '{"products": [{"id": 1, "name": "Widget", "price": 19.99}, {"id": 2, "na'
+        data_type, confidence = detect_data_type(sample)
+        assert data_type == "json"
+        assert confidence == 0.9
+
+    def test_csv_detection(self):
+        """Test CSV detection as fallback"""
+        sample = '''name,age,city
+John,30,New York
+Jane,25,Boston
+Bob,35,Chicago'''
+        data_type, confidence = detect_data_type(sample)
+        assert data_type == "csv"
+        assert confidence == 0.8
+
+    def test_csv_detection_single_line(self):
+        """Test CSV detection with single line defaults to CSV"""
+        sample = 'column1,column2,column3'
+        data_type, confidence = detect_data_type(sample)
+        assert data_type == "csv"
+        assert confidence == 0.8
+
+    def test_empty_input(self):
+        """Test empty input handling"""
+        data_type, confidence = detect_data_type("")
+        assert data_type is None
+        assert confidence == 0.0
+
+    def test_whitespace_only(self):
+        """Test whitespace-only input"""
+        data_type, confidence = detect_data_type("   \n  \t  ")
+        assert data_type is None
+        assert confidence == 0.0
+
+    def test_html_not_xml(self):
+        """Test HTML is detected as XML (has closing tags)"""
+        sample = '<html><body><h1>Title</h1></body></html>'
+        data_type, confidence = detect_data_type(sample)
+        assert data_type == "xml"  # HTML is detected as XML
+        assert confidence == 0.9
+
+    def test_malformed_xml_still_detected(self):
+        """Test malformed XML is still detected as XML"""
+        sample = '<root><item>data</item><unclosed>'
+        data_type, confidence = detect_data_type(sample)
+        assert data_type == "xml"
+        assert confidence == 0.9
+
+    def test_json_with_whitespace(self):
+        """Test JSON detection with leading whitespace"""
+        sample = '   \n  {"key": "value"}'
+        data_type, confidence = detect_data_type(sample)
+        assert data_type == "json"
+        assert confidence == 0.9
+
+    def test_priority_xml_over_csv(self):
+        """Test XML takes priority over CSV when both patterns present"""
+        sample = '<?xml version="1.0"?>\n<data>a,b,c</data>'
+        data_type, confidence = detect_data_type(sample)
+        assert data_type == "xml"
+        assert confidence == 0.9
+
+    def test_priority_json_over_csv(self):
+        """Test JSON takes priority over CSV when both patterns present"""
+        sample = '{"data": "a,b,c"}'
+        data_type, confidence = detect_data_type(sample)
+        assert data_type == "json"
+        assert confidence == 0.9
+
+    def test_text_defaults_to_csv(self):
+        """Test plain text defaults to CSV"""
+        sample = 'This is just plain text without any structure'
+        data_type, confidence = detect_data_type(sample)
+        assert data_type == "csv"
+        assert confidence == 0.8
+
+
+class TestRealWorldSamples:
+    """Test with real-world data samples"""
+
+    def test_uk_pies_xml_sample(self):
+        """Test with actual UK pies XML sample (first 500 bytes)"""
+        sample = '''<?xml version="1.0" encoding="UTF-8"?>
+<pieDataset>
+  <pies>
+    <pie id="1">
+      <pieType>Steak &amp; Kidney</pieType>
+      <region>Yorkshire</region>
+      <diameterCm>12.5</diameterCm>
+      <heightCm>4.2</heightCm>
+      <weightGrams>285</weightGrams>
+      <crustType>Shortcrust</crustType>
+      <fillingCategory>Meat</fillingCategory>
+      <price>3.50</price>
+      <currency>GBP</currency>
+      <bakeryType>Traditional</bakeryType>
+    </pie>
+    <pie id="2">
+      <pieType>Chicken &amp; Mushroom</pieType>
+      <region>Lancashire</regio'''  # Cut at 500 chars
+        data_type, confidence = detect_data_type(sample[:500])
+        assert data_type == "xml"
+        assert confidence == 0.9
+
+    def test_product_json_sample(self):
+        """Test with product catalog JSON sample"""
+        sample = '''{"products": [
+  {"id": "PROD001", "name": "Widget", "price": 19.99, "category": "Tools"},
+  {"id": "PROD002", "name": "Gadget", "price": 29.99, "category": "Electronics"},
+  {"id": "PROD003", "name": "Doohickey", "price": 9.99, "category": "Accessories"}
+]}'''
+        data_type, confidence = detect_data_type(sample)
+        assert data_type == "json"
+        assert confidence == 0.9
+
+    def test_customer_csv_sample(self):
+        """Test with customer CSV sample"""
+        sample = '''customer_id,name,email,signup_date,total_orders
+CUST001,John Smith,john@example.com,2023-01-15,5
+CUST002,Jane Doe,jane@example.com,2023-02-20,3
+CUST003,Bob Johnson,bob@example.com,2023-03-10,7'''
+        data_type, confidence = detect_data_type(sample)
+        assert data_type == "csv"
+        assert confidence == 0.8
\ No newline at end of file
diff --git a/trustgraph-base/trustgraph/api/flow.py b/trustgraph-base/trustgraph/api/flow.py
index 74b7a117..d1d5f95e 100644
--- a/trustgraph-base/trustgraph/api/flow.py
+++ b/trustgraph-base/trustgraph/api/flow.py
@@ -492,12 +492,148 @@ class FlowInstance:
             "service/structured-query",
             input
         )
-        
+
         # Check for system-level error
         if "error" in response and response["error"]:
             error_type = response["error"].get("type", "unknown")
             error_message = response["error"].get("message", "Unknown error")
             raise ProtocolException(f"{error_type}: {error_message}")
-        
+
         return response
 
+    def detect_type(self, sample):
+        """
+        Detect the data type of a structured data sample.
+
+        Args:
+            sample: Data sample to analyze (string content)
+
+        Returns:
+            dict with detected_type, confidence, and optional metadata
+        """
+
+        input = {
+            "operation": "detect-type",
+            "sample": sample
+        }
+
+        response = self.request(
+            "service/structured-diag",
+            input
+        )
+
+        # Check for system-level error
+        if "error" in response and response["error"]:
+            error_type = response["error"].get("type", "unknown")
+            error_message = response["error"].get("message", "Unknown error")
+            raise ProtocolException(f"{error_type}: {error_message}")
+
+        return response["detected-type"]
+
+    def generate_descriptor(self, sample, data_type, schema_name, options=None):
+        """
+        Generate a descriptor for structured data mapping to a specific schema.
+
+        Args:
+            sample: Data sample to analyze (string content)
+            data_type: Data type (csv, json, xml)
+            schema_name: Target schema name for descriptor generation
+            options: Optional parameters (e.g., delimiter for CSV)
+
+        Returns:
+            dict with descriptor and metadata
+        """
+
+        input = {
+            "operation": "generate-descriptor",
+            "sample": sample,
+            "type": data_type,
+            "schema-name": schema_name
+        }
+
+        if options:
+            input["options"] = options
+
+        response = self.request(
+            "service/structured-diag",
+            input
+        )
+
+        # Check for system-level error
+        if "error" in response and response["error"]:
+            error_type = response["error"].get("type", "unknown")
+            error_message = response["error"].get("message", "Unknown error")
+            raise ProtocolException(f"{error_type}: {error_message}")
+
+        return response["descriptor"]
+
+    def diagnose_data(self, sample, schema_name=None, options=None):
+        """
+        Perform combined data diagnosis: detect type and generate descriptor.
+
+        Args:
+            sample: Data sample to analyze (string content)
+            schema_name: Optional target schema name for descriptor generation
+            options: Optional parameters (e.g., delimiter for CSV)
+
+        Returns:
+            dict with detected_type, confidence, descriptor, and metadata
+        """
+
+        input = {
+            "operation": "diagnose",
+            "sample": sample
+        }
+
+        if schema_name:
+            input["schema-name"] = schema_name
+
+        if options:
+            input["options"] = options
+
+        response = self.request(
+            "service/structured-diag",
+            input
+        )
+
+        # Check for system-level error
+        if "error" in response and response["error"]:
+            error_type = response["error"].get("type", "unknown")
+            error_message = response["error"].get("message", "Unknown error")
+            raise ProtocolException(f"{error_type}: {error_message}")
+
+        return response
+
+    def schema_selection(self, sample, options=None):
+        """
+        Select matching schemas for a data sample using prompt analysis.
+
+        Args:
+            sample: Data sample to analyze (string content)
+            options: Optional parameters
+
+        Returns:
+            dict with schema_matches array and metadata
+        """
+
+        input = {
+            "operation": "schema-selection",
+            "sample": sample
+        }
+
+        if options:
+            input["options"] = options
+
+        response = self.request(
+            "service/structured-diag",
+            input
+        )
+
+        # Check for system-level error
+        if "error" in response and response["error"]:
+            error_type = response["error"].get("type", "unknown")
+            error_message = response["error"].get("message", "Unknown error")
+            raise ProtocolException(f"{error_type}: {error_message}")
+
+        return response["schema-matches"]
+
diff --git a/trustgraph-base/trustgraph/messaging/translators/diagnosis.py b/trustgraph-base/trustgraph/messaging/translators/diagnosis.py
index f7b0150d..92bad16f 100644
--- a/trustgraph-base/trustgraph/messaging/translators/diagnosis.py
+++ b/trustgraph-base/trustgraph/messaging/translators/diagnosis.py
@@ -57,6 +57,8 @@ class StructuredDataDiagnosisResponseTranslator(MessageTranslator):
                 result["descriptor"] = obj.descriptor
         if obj.metadata:
             result["metadata"] = obj.metadata
+        if obj.schema_matches is not None:
+            result["schema-matches"] = obj.schema_matches
 
         return result
 
diff --git a/trustgraph-base/trustgraph/schema/services/diagnosis.py b/trustgraph-base/trustgraph/schema/services/diagnosis.py
index 2bd6caf0..1bd6d3ed 100644
--- a/trustgraph-base/trustgraph/schema/services/diagnosis.py
+++ b/trustgraph-base/trustgraph/schema/services/diagnosis.py
@@ -1,4 +1,4 @@
-from pulsar.schema import Record, String, Map, Double
+from pulsar.schema import Record, String, Map, Double, Array
 from ..core.primitives import Error
 
 ############################################################################
@@ -6,7 +6,7 @@ from ..core.primitives import Error
 # Structured data diagnosis services
 
 class StructuredDataDiagnosisRequest(Record):
-    operation = String()  # "detect-type", "generate-descriptor", or "diagnose"
+    operation = String()  # "detect-type", "generate-descriptor", "diagnose", or "schema-selection"
     sample = String()     # Data sample to analyze (text content)
     type = String()       # Data type (csv, json, xml) - optional, required for generate-descriptor
     schema_name = String() # Target schema name for descriptor generation - optional
@@ -27,4 +27,7 @@ class StructuredDataDiagnosisResponse(Record):
     # JSON encoded additional metadata (e.g., field count, sample records)
     metadata = Map(String())
 
+    # Array of matching schema IDs (for schema-selection operation) - optional
+    schema_matches = Array(String())
+
 ############################################################################
\ No newline at end of file
diff --git a/trustgraph-flow/trustgraph/retrieval/structured_diag/service.py b/trustgraph-flow/trustgraph/retrieval/structured_diag/service.py
index 75af6dc3..d69c8f17 100644
--- a/trustgraph-flow/trustgraph/retrieval/structured_diag/service.py
+++ b/trustgraph-flow/trustgraph/retrieval/structured_diag/service.py
@@ -21,6 +21,7 @@ default_ident = "structured-diag"
 default_csv_prompt = "diagnose-csv"
 default_json_prompt = "diagnose-json"
 default_xml_prompt = "diagnose-xml"
+default_schema_selection_prompt = "schema-selection"
 
 
 class Processor(FlowProcessor):
@@ -36,6 +37,7 @@ class Processor(FlowProcessor):
         self.csv_prompt = params.get("csv_prompt", default_csv_prompt)
         self.json_prompt = params.get("json_prompt", default_json_prompt)
         self.xml_prompt = params.get("xml_prompt", default_xml_prompt)
+        self.schema_selection_prompt = params.get("schema_selection_prompt", default_schema_selection_prompt)
 
         super(Processor, self).__init__(
             **params | {
@@ -143,10 +145,12 @@ class Processor(FlowProcessor):
                 response = await self.generate_descriptor_operation(request, flow)
             elif request.operation == "diagnose":
                 response = await self.diagnose_operation(request, flow)
+            elif request.operation == "schema-selection":
+                response = await self.schema_selection_operation(request, flow)
             else:
                 error = Error(
                     type="InvalidOperation",
-                    message=f"Unknown operation: {request.operation}. Supported: detect-type, generate-descriptor, diagnose"
+                    message=f"Unknown operation: {request.operation}. Supported: detect-type, generate-descriptor, diagnose, schema-selection"
                 )
                 response = StructuredDataDiagnosisResponse(
                     error=error,
@@ -155,7 +159,7 @@ class Processor(FlowProcessor):
 
             # Send response
             await flow("response").send(
-                id, response, properties={"id": id}
+                response, properties={"id": id}
             )
 
         except Exception as e:
@@ -172,7 +176,7 @@ class Processor(FlowProcessor):
             )
 
             await flow("response").send(
-                id, response, properties={"id": id}
+                response, properties={"id": id}
             )
 
     async def detect_type_operation(self, request: StructuredDataDiagnosisRequest, flow) -> StructuredDataDiagnosisResponse:
@@ -307,6 +311,102 @@ class Processor(FlowProcessor):
             metadata=metadata
         )
 
+    async def schema_selection_operation(self, request: StructuredDataDiagnosisRequest, flow) -> StructuredDataDiagnosisResponse:
+        """Handle schema-selection operation"""
+        logger.info("Processing schema-selection operation")
+
+        # Prepare all schemas for the prompt - match the original config format
+        all_schemas = []
+        for schema_name, row_schema in self.schemas.items():
+            schema_info = {
+                "name": row_schema.name,
+                "description": row_schema.description,
+                "fields": [
+                    {
+                        "name": f.name,
+                        "type": f.type,
+                        "description": f.description,
+                        "required": f.required,
+                        "primary_key": f.primary,
+                        "indexed": f.indexed,
+                        "enum": f.enum_values if f.enum_values else [],
+                        "size": f.size if hasattr(f, 'size') else 0
+                    }
+                    for f in row_schema.fields
+                ]
+            }
+            all_schemas.append(schema_info)
+
+        # Create prompt variables - schemas array contains ALL schemas
+        # Note: The prompt expects 'question' not 'sample'
+        variables = {
+            "question": request.sample,  # The prompt template expects 'question'
+            "schemas": all_schemas,
+            "options": request.options or {}
+        }
+
+        # Call prompt service with configurable template
+        terms = {k: json.dumps(v) for k, v in variables.items()}
+        prompt_request = PromptRequest(
+            id=self.schema_selection_prompt,
+            terms=terms
+        )
+
+        try:
+            logger.info(f"Calling prompt service for schema selection with template: {self.schema_selection_prompt}")
+            response = await flow("prompt-request").request(prompt_request)
+
+            if response.error:
+                logger.error(f"Prompt service error: {response.error.message}")
+                error = Error(
+                    type="PromptServiceError",
+                    message="Failed to select schemas using prompt service"
+                )
+                return StructuredDataDiagnosisResponse(error=error, operation=request.operation)
+
+            # Check both text and object fields for response
+            response_data = None
+            if response.object and response.object.strip():
+                response_data = response.object.strip()
+                logger.debug(f"Using response from 'object' field: {response_data}")
+            elif response.text and response.text.strip():
+                response_data = response.text.strip()
+                logger.debug(f"Using response from 'text' field: {response_data}")
+            else:
+                logger.error("Empty response from prompt service (checked both text and object fields)")
+                error = Error(
+                    type="PromptServiceError",
+                    message="Empty response from prompt service"
+                )
+                return StructuredDataDiagnosisResponse(error=error, operation=request.operation)
+
+            # Parse the response as JSON array of schema IDs
+            try:
+                schema_matches = json.loads(response_data)
+                if not isinstance(schema_matches, list):
+                    raise ValueError("Response must be an array")
+            except (json.JSONDecodeError, ValueError) as e:
+                logger.error(f"Failed to parse schema matches response: {e}")
+                error = Error(
+                    type="ParseError",
+                    message="Failed to parse schema selection response as JSON array"
+                )
+                return StructuredDataDiagnosisResponse(error=error, operation=request.operation)
+
+            return StructuredDataDiagnosisResponse(
+                error=None,
+                operation=request.operation,
+                schema_matches=schema_matches
+            )
+
+        except Exception as e:
+            logger.error(f"Error calling prompt service: {e}", exc_info=True)
+            error = Error(
+                type="PromptServiceError",
+                message="Failed to select schemas using prompt service"
+            )
+            return StructuredDataDiagnosisResponse(error=error, operation=request.operation)
+
     async def generate_descriptor_with_prompt(
         self, sample: str, data_type: str, target_schema: RowSchema,
         options: Dict[str, str], flow
diff --git a/trustgraph-flow/trustgraph/retrieval/structured_diag/type_detector.py b/trustgraph-flow/trustgraph/retrieval/structured_diag/type_detector.py
index ccd6bf8b..a291d5cc 100644
--- a/trustgraph-flow/trustgraph/retrieval/structured_diag/type_detector.py
+++ b/trustgraph-flow/trustgraph/retrieval/structured_diag/type_detector.py
@@ -31,28 +31,13 @@ def detect_data_type(sample: str) -> Tuple[Optional[str], float]:
 
     sample = sample.strip()
 
-    # Try each format and calculate confidence scores
-    json_confidence = _check_json_format(sample)
-    xml_confidence = _check_xml_format(sample)
-    csv_confidence = _check_csv_format(sample)
-
-    logger.debug(f"Format confidence scores - JSON: {json_confidence}, XML: {xml_confidence}, CSV: {csv_confidence}")
-
-    # Find the format with highest confidence
-    scores = {
-        "json": json_confidence,
-        "xml": xml_confidence,
-        "csv": csv_confidence
-    }
-
-    best_format = max(scores, key=scores.get)
-    best_confidence = scores[best_format]
-
-    # Only return a result if confidence is above threshold
-    if best_confidence < 0.3:
-        return None, best_confidence
-
-    return best_format, best_confidence
+    # Simple pattern matching
+    if sample.startswith('<?xml') or (sample.startswith('<') and '</' in sample):
+        return 'xml', 0.9
+    elif sample.startswith(('{', '[')):
+        return 'json', 0.9
+    else:
+        return 'csv', 0.8
 
 
 def _check_json_format(sample: str) -> float:
@@ -83,33 +68,20 @@ def _check_json_format(sample: str) -> float:
 
 def _check_xml_format(sample: str) -> float:
     """Check if sample is valid XML format"""
-    try:
-        # Quick heuristic checks first
-        if not sample.startswith('<'):
-            return 0.0
-
-        if not ('>' in sample and '</' in sample):
-            return 0.1  # Might be incomplete XML
-
-        # Try to parse as XML
-        root = ET.fromstring(sample)
-
-        # Higher confidence for XML with multiple child elements
-        child_count = len(list(root))
-        if child_count > 10:
-            return 0.95
-        elif child_count > 5:
-            return 0.9
-        elif child_count > 0:
-            return 0.8
+    # XML declaration or starts with tag
+    if sample.startswith('<?xml') or sample.startswith('<'):
+        # Must have closing tags for valid XML
+        if '</' in sample and '>' in sample:
+            try:
+                # Quick parse test
+                ET.fromstring(sample)
+                return 0.9  # Valid XML
+            except ET.ParseError:
+                return 0.3  # Looks like XML but malformed
         else:
-            return 0.6
+            return 0.1  # Incomplete XML
 
-    except ET.ParseError:
-        # Check for common XML characteristics even if not well-formed
-        xml_indicators = ['</', '<?xml', 'xmlns:', '<![CDATA[']
-        score = sum(0.1 for indicator in xml_indicators if indicator in sample)
-        return min(score, 0.3)  # Max 0.3 for malformed XML
+    return 0.0  # Not XML
 
 
 def _check_csv_format(sample: str) -> float: