mirror of
https://github.com/trustgraph-ai/trustgraph.git
synced 2026-04-26 00:46:22 +02:00
Introduces `workspace` as the isolation boundary for config, flows,
library, and knowledge data. Removes `user` as a schema-level field
throughout the code, API specs, and tests; workspace provides the
same separation more cleanly at the trusted flow.workspace layer
rather than through client-supplied message fields.
Design
------
- IAM tech spec (docs/tech-specs/iam.md) documents current state,
proposed auth/access model, and migration direction.
- Data ownership model (docs/tech-specs/data-ownership-model.md)
captures the workspace/collection/flow hierarchy.
Schema + messaging
------------------
- Drop `user` field from AgentRequest/Step, GraphRagQuery,
DocumentRagQuery, Triples/Graph/Document/Row EmbeddingsRequest,
Sparql/Rows/Structured QueryRequest, ToolServiceRequest.
- Keep collection/workspace routing via flow.workspace at the
service layer.
- Translators updated to not serialise/deserialise user.
API specs
---------
- OpenAPI schemas and path examples cleaned of user fields.
- Websocket async-api messages updated.
- Removed the unused parameters/User.yaml.
Services + base
---------------
- Librarian, collection manager, knowledge, config: all operations
scoped by workspace. Config client API takes workspace as first
positional arg.
- `flow.workspace` set at flow start time by the infrastructure;
no longer pass-through from clients.
- Tool service drops user-personalisation passthrough.
CLI + SDK
---------
- tg-init-workspace and workspace-aware import/export.
- All tg-* commands drop user args; accept --workspace.
- Python API/SDK (flow, socket_client, async_*, explainability,
library) drop user kwargs from every method signature.
MCP server
----------
- All tool endpoints drop user parameters; socket_manager no longer
keyed per user.
Flow service
------------
- Closure-based topic cleanup on flow stop: only delete topics
whose blueprint template was parameterised AND no remaining
live flow (across all workspaces) still resolves to that topic.
Three scopes fall out naturally from template analysis:
* {id} -> per-flow, deleted on stop
* {blueprint} -> per-blueprint, kept while any flow of the
same blueprint exists
* {workspace} -> per-workspace, kept while any flow in the
workspace exists
* literal -> global, never deleted (e.g. tg.request.librarian)
Fixes a bug where stopping a flow silently destroyed the global
librarian exchange, wedging all library operations until manual
restart.
RabbitMQ backend
----------------
- heartbeat=60, blocked_connection_timeout=300. Catches silently
dead connections (broker restart, orphaned channels, network
partitions) within ~2 heartbeat windows, so the consumer
reconnects and re-binds its queue rather than sitting forever
on a zombie connection.
Tests
-----
- Full test refresh: unit, integration, contract, provenance.
- Dropped user-field assertions and constructor kwargs across
~100 test files.
- Renamed user-collection isolation tests to workspace-collection.
585 lines
No EOL
20 KiB
Python
585 lines
No EOL
20 KiB
Python
"""
|
|
Unit tests for Structured Query Service
|
|
Following TEST_STRATEGY.md approach for service testing
|
|
"""
|
|
|
|
import pytest
|
|
import json
|
|
from unittest.mock import AsyncMock, MagicMock, patch
|
|
|
|
from trustgraph.schema import (
|
|
StructuredQueryRequest, StructuredQueryResponse,
|
|
QuestionToStructuredQueryRequest, QuestionToStructuredQueryResponse,
|
|
RowsQueryRequest, RowsQueryResponse,
|
|
Error, GraphQLError
|
|
)
|
|
from trustgraph.retrieval.structured_query.service import Processor
|
|
|
|
|
|
@pytest.fixture
|
|
def mock_pulsar_client():
|
|
"""Mock Pulsar client"""
|
|
return AsyncMock()
|
|
|
|
|
|
@pytest.fixture
|
|
def processor(mock_pulsar_client):
|
|
"""Create processor with mocked dependencies"""
|
|
proc = Processor(
|
|
taskgroup=MagicMock(),
|
|
pulsar_client=mock_pulsar_client
|
|
)
|
|
|
|
# Mock the client method
|
|
proc.client = MagicMock()
|
|
|
|
return proc
|
|
|
|
|
|
class TestStructuredQueryProcessor:
|
|
"""Test Structured Query service processor"""
|
|
|
|
async def test_successful_end_to_end_query(self, processor):
|
|
"""Test successful end-to-end query processing"""
|
|
# Arrange
|
|
request = StructuredQueryRequest(
|
|
question="Show me all customers from New York",
|
|
collection="default"
|
|
)
|
|
|
|
msg = MagicMock()
|
|
msg.value.return_value = request
|
|
msg.properties.return_value = {"id": "test-123"}
|
|
|
|
consumer = MagicMock()
|
|
flow = MagicMock()
|
|
flow_response = AsyncMock()
|
|
flow.return_value = flow_response
|
|
|
|
# Mock NLP query service response
|
|
nlp_response = QuestionToStructuredQueryResponse(
|
|
error=None,
|
|
graphql_query='query { customers(where: {state: {eq: "NY"}}) { id name email } }',
|
|
variables={"state": "NY"},
|
|
detected_schemas=["customers"],
|
|
confidence=0.95
|
|
)
|
|
|
|
# Mock objects query service response
|
|
objects_response = RowsQueryResponse(
|
|
error=None,
|
|
data='{"customers": [{"id": "1", "name": "John", "email": "john@example.com"}]}',
|
|
errors=None,
|
|
extensions={}
|
|
)
|
|
|
|
# Set up mock clients
|
|
mock_nlp_client = AsyncMock()
|
|
mock_nlp_client.request.return_value = nlp_response
|
|
|
|
mock_objects_client = AsyncMock()
|
|
mock_objects_client.request.return_value = objects_response
|
|
|
|
# Mock flow context to route to appropriate services
|
|
def flow_router(service_name):
|
|
if service_name == "nlp-query-request":
|
|
return mock_nlp_client
|
|
elif service_name == "rows-query-request":
|
|
return mock_objects_client
|
|
elif service_name == "response":
|
|
return flow_response
|
|
else:
|
|
return AsyncMock()
|
|
flow.side_effect = flow_router
|
|
|
|
# Act
|
|
await processor.on_message(msg, consumer, flow)
|
|
|
|
# Assert
|
|
# Verify NLP query service was called correctly
|
|
mock_nlp_client.request.assert_called_once()
|
|
nlp_call_args = mock_nlp_client.request.call_args[0][0]
|
|
assert isinstance(nlp_call_args, QuestionToStructuredQueryRequest)
|
|
assert nlp_call_args.question == "Show me all customers from New York"
|
|
assert nlp_call_args.max_results == 100
|
|
|
|
# Verify objects query service was called correctly
|
|
mock_objects_client.request.assert_called_once()
|
|
objects_call_args = mock_objects_client.request.call_args[0][0]
|
|
assert isinstance(objects_call_args, RowsQueryRequest)
|
|
assert objects_call_args.query == 'query { customers(where: {state: {eq: "NY"}}) { id name email } }'
|
|
assert objects_call_args.variables == {"state": "NY"}
|
|
assert objects_call_args.collection == "default"
|
|
|
|
# Verify response
|
|
flow_response.send.assert_called_once()
|
|
response_call = flow_response.send.call_args
|
|
response = response_call[0][0]
|
|
|
|
assert isinstance(response, StructuredQueryResponse)
|
|
assert response.error is None
|
|
assert response.data == '{"customers": [{"id": "1", "name": "John", "email": "john@example.com"}]}'
|
|
assert len(response.errors) == 0
|
|
|
|
async def test_nlp_query_service_error(self, processor):
|
|
"""Test handling of NLP query service errors"""
|
|
# Arrange
|
|
request = StructuredQueryRequest(
|
|
question="Invalid query"
|
|
)
|
|
|
|
msg = MagicMock()
|
|
msg.value.return_value = request
|
|
msg.properties.return_value = {"id": "test-error"}
|
|
|
|
consumer = MagicMock()
|
|
flow = MagicMock()
|
|
flow_response = AsyncMock()
|
|
flow.return_value = flow_response
|
|
|
|
# Mock NLP query service error response
|
|
nlp_response = QuestionToStructuredQueryResponse(
|
|
error=Error(type="nlp-query-error", message="Failed to parse question"),
|
|
graphql_query="",
|
|
variables={},
|
|
detected_schemas=[],
|
|
confidence=0.0
|
|
)
|
|
|
|
mock_nlp_client = AsyncMock()
|
|
mock_nlp_client.request.return_value = nlp_response
|
|
|
|
# Mock flow context to route to nlp service
|
|
def flow_router(service_name):
|
|
if service_name == "nlp-query-request":
|
|
return mock_nlp_client
|
|
elif service_name == "response":
|
|
return flow_response
|
|
else:
|
|
return AsyncMock()
|
|
flow.side_effect = flow_router
|
|
|
|
# Act
|
|
await processor.on_message(msg, consumer, flow)
|
|
|
|
# Assert
|
|
flow_response.send.assert_called_once()
|
|
response_call = flow_response.send.call_args
|
|
response = response_call[0][0]
|
|
|
|
assert isinstance(response, StructuredQueryResponse)
|
|
assert response.error is not None
|
|
assert response.error.type == "structured-query-error"
|
|
assert "NLP query service error" in response.error.message
|
|
|
|
async def test_empty_graphql_query_error(self, processor):
|
|
"""Test handling of empty GraphQL query from NLP service"""
|
|
# Arrange
|
|
request = StructuredQueryRequest(
|
|
question="Ambiguous question"
|
|
)
|
|
|
|
msg = MagicMock()
|
|
msg.value.return_value = request
|
|
msg.properties.return_value = {"id": "test-empty"}
|
|
|
|
consumer = MagicMock()
|
|
flow = MagicMock()
|
|
flow_response = AsyncMock()
|
|
flow.return_value = flow_response
|
|
|
|
# Mock NLP query service response with empty query
|
|
nlp_response = QuestionToStructuredQueryResponse(
|
|
error=None,
|
|
graphql_query="", # Empty query
|
|
variables={},
|
|
detected_schemas=[],
|
|
confidence=0.1
|
|
)
|
|
|
|
mock_nlp_client = AsyncMock()
|
|
mock_nlp_client.request.return_value = nlp_response
|
|
|
|
# Mock flow context to route to nlp service
|
|
def flow_router(service_name):
|
|
if service_name == "nlp-query-request":
|
|
return mock_nlp_client
|
|
elif service_name == "response":
|
|
return flow_response
|
|
else:
|
|
return AsyncMock()
|
|
flow.side_effect = flow_router
|
|
|
|
# Act
|
|
await processor.on_message(msg, consumer, flow)
|
|
|
|
# Assert
|
|
flow_response.send.assert_called_once()
|
|
response_call = flow_response.send.call_args
|
|
response = response_call[0][0]
|
|
|
|
assert response.error is not None
|
|
assert "empty GraphQL query" in response.error.message
|
|
|
|
async def test_rows_query_service_error(self, processor):
|
|
"""Test handling of objects query service errors"""
|
|
# Arrange
|
|
request = StructuredQueryRequest(
|
|
question="Show me customers"
|
|
)
|
|
|
|
msg = MagicMock()
|
|
msg.value.return_value = request
|
|
msg.properties.return_value = {"id": "test-objects-error"}
|
|
|
|
consumer = MagicMock()
|
|
flow = MagicMock()
|
|
flow_response = AsyncMock()
|
|
flow.return_value = flow_response
|
|
|
|
# Mock successful NLP response
|
|
nlp_response = QuestionToStructuredQueryResponse(
|
|
error=None,
|
|
graphql_query='query { customers { id name } }',
|
|
variables={},
|
|
detected_schemas=["customers"],
|
|
confidence=0.9
|
|
)
|
|
|
|
# Mock objects query service error
|
|
objects_response = RowsQueryResponse(
|
|
error=Error(type="graphql-execution-error", message="Table 'customers' not found"),
|
|
data=None,
|
|
errors=None,
|
|
extensions={}
|
|
)
|
|
|
|
mock_nlp_client = AsyncMock()
|
|
mock_nlp_client.request.return_value = nlp_response
|
|
|
|
mock_objects_client = AsyncMock()
|
|
mock_objects_client.request.return_value = objects_response
|
|
|
|
# Mock flow context to route to appropriate services
|
|
def flow_router(service_name):
|
|
if service_name == "nlp-query-request":
|
|
return mock_nlp_client
|
|
elif service_name == "rows-query-request":
|
|
return mock_objects_client
|
|
elif service_name == "response":
|
|
return flow_response
|
|
else:
|
|
return AsyncMock()
|
|
flow.side_effect = flow_router
|
|
|
|
# Act
|
|
await processor.on_message(msg, consumer, flow)
|
|
|
|
# Assert
|
|
flow_response.send.assert_called_once()
|
|
response_call = flow_response.send.call_args
|
|
response = response_call[0][0]
|
|
|
|
assert response.error is not None
|
|
assert "Rows query service error" in response.error.message
|
|
assert "Table 'customers' not found" in response.error.message
|
|
|
|
async def test_graphql_errors_handling(self, processor):
|
|
"""Test handling of GraphQL validation/execution errors"""
|
|
# Arrange
|
|
request = StructuredQueryRequest(
|
|
question="Show invalid field"
|
|
)
|
|
|
|
msg = MagicMock()
|
|
msg.value.return_value = request
|
|
msg.properties.return_value = {"id": "test-graphql-errors"}
|
|
|
|
consumer = MagicMock()
|
|
flow = MagicMock()
|
|
flow_response = AsyncMock()
|
|
flow.return_value = flow_response
|
|
|
|
# Mock successful NLP response
|
|
nlp_response = QuestionToStructuredQueryResponse(
|
|
error=None,
|
|
graphql_query='query { customers { invalid_field } }',
|
|
variables={},
|
|
detected_schemas=["customers"],
|
|
confidence=0.8
|
|
)
|
|
|
|
# Mock objects response with GraphQL errors
|
|
graphql_errors = [
|
|
GraphQLError(
|
|
message="Cannot query field 'invalid_field' on type 'Customer'",
|
|
path=["customers", "0", "invalid_field"], # All path elements must be strings
|
|
extensions={}
|
|
)
|
|
]
|
|
|
|
objects_response = RowsQueryResponse(
|
|
error=None,
|
|
data=None,
|
|
errors=graphql_errors,
|
|
extensions={}
|
|
)
|
|
|
|
mock_nlp_client = AsyncMock()
|
|
mock_nlp_client.request.return_value = nlp_response
|
|
|
|
mock_objects_client = AsyncMock()
|
|
mock_objects_client.request.return_value = objects_response
|
|
|
|
# Mock flow context to route to appropriate services
|
|
def flow_router(service_name):
|
|
if service_name == "nlp-query-request":
|
|
return mock_nlp_client
|
|
elif service_name == "rows-query-request":
|
|
return mock_objects_client
|
|
elif service_name == "response":
|
|
return flow_response
|
|
else:
|
|
return AsyncMock()
|
|
flow.side_effect = flow_router
|
|
|
|
# Act
|
|
await processor.on_message(msg, consumer, flow)
|
|
|
|
# Assert
|
|
flow_response.send.assert_called_once()
|
|
response_call = flow_response.send.call_args
|
|
response = response_call[0][0]
|
|
|
|
assert response.error is None
|
|
assert len(response.errors) == 1
|
|
assert "Cannot query field 'invalid_field'" in response.errors[0]
|
|
assert "customers" in response.errors[0]
|
|
|
|
async def test_complex_query_with_variables(self, processor):
|
|
"""Test processing complex queries with variables"""
|
|
# Arrange
|
|
request = StructuredQueryRequest(
|
|
question="Show customers with orders over $100 from last month"
|
|
)
|
|
|
|
msg = MagicMock()
|
|
msg.value.return_value = request
|
|
msg.properties.return_value = {"id": "test-complex"}
|
|
|
|
consumer = MagicMock()
|
|
flow = MagicMock()
|
|
flow_response = AsyncMock()
|
|
flow.return_value = flow_response
|
|
|
|
# Mock NLP response with complex query and variables
|
|
nlp_response = QuestionToStructuredQueryResponse(
|
|
error=None,
|
|
graphql_query='''
|
|
query GetCustomersWithLargeOrders($minTotal: Float!, $startDate: String!) {
|
|
customers {
|
|
id
|
|
name
|
|
orders(where: {total: {gt: $minTotal}, date: {gte: $startDate}}) {
|
|
id
|
|
total
|
|
date
|
|
}
|
|
}
|
|
}
|
|
''',
|
|
variables={
|
|
"minTotal": "100.0", # Convert to string for Pulsar schema
|
|
"startDate": "2024-01-01"
|
|
},
|
|
detected_schemas=["customers", "orders"],
|
|
confidence=0.88
|
|
)
|
|
|
|
# Mock objects response
|
|
objects_response = RowsQueryResponse(
|
|
error=None,
|
|
data='{"customers": [{"id": "1", "name": "Alice", "orders": [{"id": "100", "total": 150.0}]}]}',
|
|
errors=None
|
|
)
|
|
|
|
mock_nlp_client = AsyncMock()
|
|
mock_nlp_client.request.return_value = nlp_response
|
|
|
|
mock_objects_client = AsyncMock()
|
|
mock_objects_client.request.return_value = objects_response
|
|
|
|
# Mock flow context to route to appropriate services
|
|
def flow_router(service_name):
|
|
if service_name == "nlp-query-request":
|
|
return mock_nlp_client
|
|
elif service_name == "rows-query-request":
|
|
return mock_objects_client
|
|
elif service_name == "response":
|
|
return flow_response
|
|
else:
|
|
return AsyncMock()
|
|
flow.side_effect = flow_router
|
|
|
|
# Act
|
|
await processor.on_message(msg, consumer, flow)
|
|
|
|
# Assert
|
|
# Verify variables were passed correctly (converted to strings)
|
|
objects_call_args = mock_objects_client.request.call_args[0][0]
|
|
assert objects_call_args.variables["minTotal"] == "100.0" # Should be converted to string
|
|
assert objects_call_args.variables["startDate"] == "2024-01-01"
|
|
|
|
# Verify response
|
|
response_call = flow_response.send.call_args
|
|
response = response_call[0][0]
|
|
assert response.error is None
|
|
assert "Alice" in response.data
|
|
|
|
async def test_null_data_handling(self, processor):
|
|
"""Test handling of null/empty data responses"""
|
|
# Arrange
|
|
request = StructuredQueryRequest(
|
|
question="Show nonexistent data"
|
|
)
|
|
|
|
msg = MagicMock()
|
|
msg.value.return_value = request
|
|
msg.properties.return_value = {"id": "test-null"}
|
|
|
|
consumer = MagicMock()
|
|
flow = MagicMock()
|
|
flow_response = AsyncMock()
|
|
flow.return_value = flow_response
|
|
|
|
# Mock responses
|
|
nlp_response = QuestionToStructuredQueryResponse(
|
|
error=None,
|
|
graphql_query='query { customers { id } }',
|
|
variables={},
|
|
detected_schemas=["customers"],
|
|
confidence=0.9
|
|
)
|
|
|
|
objects_response = RowsQueryResponse(
|
|
error=None,
|
|
data=None, # Null data
|
|
errors=None,
|
|
extensions={}
|
|
)
|
|
|
|
mock_nlp_client = AsyncMock()
|
|
mock_nlp_client.request.return_value = nlp_response
|
|
|
|
mock_objects_client = AsyncMock()
|
|
mock_objects_client.request.return_value = objects_response
|
|
|
|
# Mock flow context to route to appropriate services
|
|
def flow_router(service_name):
|
|
if service_name == "nlp-query-request":
|
|
return mock_nlp_client
|
|
elif service_name == "rows-query-request":
|
|
return mock_objects_client
|
|
elif service_name == "response":
|
|
return flow_response
|
|
else:
|
|
return AsyncMock()
|
|
flow.side_effect = flow_router
|
|
|
|
# Act
|
|
await processor.on_message(msg, consumer, flow)
|
|
|
|
# Assert
|
|
response_call = flow_response.send.call_args
|
|
response = response_call[0][0]
|
|
|
|
assert response.error is None
|
|
assert response.data == "null" # Should convert None to "null" string
|
|
|
|
async def test_exception_handling(self, processor):
|
|
"""Test general exception handling"""
|
|
# Arrange
|
|
request = StructuredQueryRequest(
|
|
question="Test exception"
|
|
)
|
|
|
|
msg = MagicMock()
|
|
msg.value.return_value = request
|
|
msg.properties.return_value = {"id": "test-exception"}
|
|
|
|
consumer = MagicMock()
|
|
flow = MagicMock()
|
|
flow_response = AsyncMock()
|
|
flow.return_value = flow_response
|
|
|
|
# Mock flow context to raise exception
|
|
mock_client = AsyncMock()
|
|
mock_client.request.side_effect = Exception("Network timeout")
|
|
|
|
def flow_router(service_name):
|
|
if service_name == "nlp-query-request":
|
|
return mock_client
|
|
elif service_name == "response":
|
|
return flow_response
|
|
else:
|
|
return AsyncMock()
|
|
flow.side_effect = flow_router
|
|
|
|
# Act
|
|
await processor.on_message(msg, consumer, flow)
|
|
|
|
# Assert
|
|
flow_response.send.assert_called_once()
|
|
response_call = flow_response.send.call_args
|
|
response = response_call[0][0]
|
|
|
|
assert response.error is not None
|
|
assert response.error.type == "structured-query-error"
|
|
assert "Network timeout" in response.error.message
|
|
assert response.data == "null"
|
|
assert len(response.errors) == 0
|
|
|
|
def test_processor_initialization(self, mock_pulsar_client):
|
|
"""Test processor initialization with correct specifications"""
|
|
# Act
|
|
processor = Processor(
|
|
taskgroup=MagicMock(),
|
|
pulsar_client=mock_pulsar_client
|
|
)
|
|
|
|
# Assert - Test default ID
|
|
assert processor.id == "structured-query"
|
|
|
|
# Verify specifications were registered (we can't directly access them,
|
|
# but we know they were registered if initialization succeeded)
|
|
assert processor is not None
|
|
|
|
def test_add_args(self):
|
|
"""Test command-line argument parsing"""
|
|
import argparse
|
|
|
|
parser = argparse.ArgumentParser()
|
|
Processor.add_args(parser)
|
|
|
|
# Test that it doesn't crash (no additional args)
|
|
args = parser.parse_args([])
|
|
# No specific assertions since no custom args are added
|
|
assert args is not None
|
|
|
|
|
|
@pytest.mark.unit
|
|
class TestStructuredQueryHelperFunctions:
|
|
"""Test helper functions and data transformations"""
|
|
|
|
def test_service_logging_integration(self):
|
|
"""Test that logging is properly configured"""
|
|
# Import the logger
|
|
from trustgraph.retrieval.structured_query.service import logger
|
|
|
|
assert logger.name == "trustgraph.retrieval.structured_query.service"
|
|
|
|
def test_default_values(self):
|
|
"""Test default configuration values"""
|
|
from trustgraph.retrieval.structured_query.service import default_ident
|
|
|
|
assert default_ident == "structured-query" |