mirror of
https://github.com/trustgraph-ai/trustgraph.git
synced 2026-04-25 00:16:23 +02:00
Introduces `workspace` as the isolation boundary for config, flows,
library, and knowledge data. Removes `user` as a schema-level field
throughout the code, API specs, and tests; workspace provides the
same separation more cleanly at the trusted flow.workspace layer
rather than through client-supplied message fields.
Design
------
- IAM tech spec (docs/tech-specs/iam.md) documents current state,
proposed auth/access model, and migration direction.
- Data ownership model (docs/tech-specs/data-ownership-model.md)
captures the workspace/collection/flow hierarchy.
Schema + messaging
------------------
- Drop `user` field from AgentRequest/Step, GraphRagQuery,
DocumentRagQuery, Triples/Graph/Document/Row EmbeddingsRequest,
Sparql/Rows/Structured QueryRequest, ToolServiceRequest.
- Keep collection/workspace routing via flow.workspace at the
service layer.
- Translators updated to not serialise/deserialise user.
API specs
---------
- OpenAPI schemas and path examples cleaned of user fields.
- Websocket async-api messages updated.
- Removed the unused parameters/User.yaml.
Services + base
---------------
- Librarian, collection manager, knowledge, config: all operations
scoped by workspace. Config client API takes workspace as first
positional arg.
- `flow.workspace` set at flow start time by the infrastructure;
no longer pass-through from clients.
- Tool service drops user-personalisation passthrough.
CLI + SDK
---------
- tg-init-workspace and workspace-aware import/export.
- All tg-* commands drop user args; accept --workspace.
- Python API/SDK (flow, socket_client, async_*, explainability,
library) drop user kwargs from every method signature.
MCP server
----------
- All tool endpoints drop user parameters; socket_manager no longer
keyed per user.
Flow service
------------
- Closure-based topic cleanup on flow stop: only delete topics
whose blueprint template was parameterised AND no remaining
live flow (across all workspaces) still resolves to that topic.
Three scopes fall out naturally from template analysis:
* {id} -> per-flow, deleted on stop
* {blueprint} -> per-blueprint, kept while any flow of the
same blueprint exists
* {workspace} -> per-workspace, kept while any flow in the
workspace exists
* literal -> global, never deleted (e.g. tg.request.librarian)
Fixes a bug where stopping a flow silently destroyed the global
librarian exchange, wedging all library operations until manual
restart.
RabbitMQ backend
----------------
- heartbeat=60, blocked_connection_timeout=300. Catches silently
dead connections (broker restart, orphaned channels, network
partitions) within ~2 heartbeat windows, so the consumer
reconnects and re-binds its queue rather than sitting forever
on a zombie connection.
Tests
-----
- Full test refresh: unit, integration, contract, provenance.
- Dropped user-field assertions and constructor kwargs across
~100 test files.
- Renamed user-collection isolation tests to workspace-collection.
457 lines
No EOL
20 KiB
Python
457 lines
No EOL
20 KiB
Python
"""
|
|
End-to-end integration tests for Cassandra configuration.
|
|
|
|
Tests complete configuration flow from environment variables
|
|
through processors to Cassandra connections.
|
|
"""
|
|
|
|
import os
|
|
import pytest
|
|
from unittest.mock import Mock, patch, MagicMock, call
|
|
from argparse import ArgumentParser
|
|
|
|
# Import processors that use Cassandra configuration
|
|
from trustgraph.storage.triples.cassandra.write import Processor as TriplesWriter
|
|
from trustgraph.storage.rows.cassandra.write import Processor as RowsWriter
|
|
from trustgraph.query.triples.cassandra.service import Processor as TriplesQuery
|
|
from trustgraph.storage.knowledge.store import Processor as KgStore
|
|
|
|
|
|
class TestEndToEndConfigurationFlow:
|
|
"""Test complete configuration flow from environment to processors."""
|
|
|
|
@pytest.mark.asyncio
|
|
@patch('trustgraph.direct.cassandra_kg.Cluster')
|
|
async def test_triples_writer_env_to_connection(self, mock_cluster):
|
|
"""Test complete flow from environment variables to TrustGraph connection."""
|
|
env_vars = {
|
|
'CASSANDRA_HOST': 'integration-host1,integration-host2,integration-host3',
|
|
'CASSANDRA_USERNAME': 'integration-user',
|
|
'CASSANDRA_PASSWORD': 'integration-pass'
|
|
}
|
|
|
|
mock_cluster_instance = MagicMock()
|
|
mock_session = MagicMock()
|
|
mock_cluster_instance.connect.return_value = mock_session
|
|
mock_cluster.return_value = mock_cluster_instance
|
|
|
|
with patch.dict(os.environ, env_vars, clear=True):
|
|
processor = TriplesWriter(taskgroup=MagicMock())
|
|
|
|
# Create a mock message to trigger TrustGraph creation
|
|
mock_message = MagicMock()
|
|
mock_message.metadata.collection = 'test_collection'
|
|
mock_message.triples = []
|
|
|
|
# Mock collection_exists to return True
|
|
with patch('trustgraph.direct.cassandra_kg.KnowledgeGraph.collection_exists', return_value=True):
|
|
# This should create TrustGraph with environment config
|
|
await processor.store_triples('test_user', mock_message)
|
|
|
|
# Verify Cluster was created with correct hosts
|
|
mock_cluster.assert_called_once()
|
|
call_args = mock_cluster.call_args
|
|
assert call_args.args[0] == ['integration-host1', 'integration-host2', 'integration-host3']
|
|
assert 'auth_provider' in call_args.kwargs # Should have auth since credentials provided
|
|
|
|
@patch('trustgraph.storage.rows.cassandra.write.Cluster')
|
|
@patch('trustgraph.storage.rows.cassandra.write.PlainTextAuthProvider')
|
|
def test_objects_writer_env_to_cluster_connection(self, mock_auth_provider, mock_cluster):
|
|
"""Test complete flow from environment variables to Cassandra Cluster connection."""
|
|
env_vars = {
|
|
'CASSANDRA_HOST': 'obj-host1,obj-host2',
|
|
'CASSANDRA_USERNAME': 'obj-user',
|
|
'CASSANDRA_PASSWORD': 'obj-pass'
|
|
}
|
|
|
|
mock_auth_instance = MagicMock()
|
|
mock_auth_provider.return_value = mock_auth_instance
|
|
mock_cluster_instance = MagicMock()
|
|
mock_session = MagicMock()
|
|
mock_cluster_instance.connect.return_value = mock_session
|
|
mock_cluster.return_value = mock_cluster_instance
|
|
|
|
with patch.dict(os.environ, env_vars, clear=True):
|
|
processor = RowsWriter(taskgroup=MagicMock())
|
|
|
|
# Trigger Cassandra connection
|
|
processor.connect_cassandra()
|
|
|
|
# Verify auth provider was created with env vars
|
|
mock_auth_provider.assert_called_once_with(
|
|
username='obj-user',
|
|
password='obj-pass'
|
|
)
|
|
|
|
# Verify cluster was created with hosts from env and auth
|
|
mock_cluster.assert_called_once()
|
|
call_args = mock_cluster.call_args
|
|
assert call_args.kwargs['contact_points'] == ['obj-host1', 'obj-host2']
|
|
assert call_args.kwargs['auth_provider'] == mock_auth_instance
|
|
|
|
@pytest.mark.asyncio
|
|
@patch('trustgraph.storage.knowledge.store.KnowledgeTableStore')
|
|
async def test_kg_store_env_to_table_store(self, mock_table_store):
|
|
"""Test complete flow from environment variables to KnowledgeTableStore."""
|
|
env_vars = {
|
|
'CASSANDRA_HOST': 'kg-host1,kg-host2,kg-host3,kg-host4',
|
|
'CASSANDRA_USERNAME': 'kg-user',
|
|
'CASSANDRA_PASSWORD': 'kg-pass'
|
|
}
|
|
|
|
mock_store_instance = MagicMock()
|
|
mock_table_store.return_value = mock_store_instance
|
|
|
|
with patch.dict(os.environ, env_vars, clear=True):
|
|
processor = KgStore(taskgroup=MagicMock())
|
|
|
|
# Verify KnowledgeTableStore was created with env config
|
|
mock_table_store.assert_called_once_with(
|
|
cassandra_host=['kg-host1', 'kg-host2', 'kg-host3', 'kg-host4'],
|
|
cassandra_username='kg-user',
|
|
cassandra_password='kg-pass',
|
|
keyspace='knowledge'
|
|
)
|
|
|
|
|
|
class TestConfigurationPriorityEndToEnd:
|
|
"""Test configuration priority chains end-to-end."""
|
|
|
|
@pytest.mark.asyncio
|
|
@patch('trustgraph.direct.cassandra_kg.Cluster')
|
|
async def test_cli_override_env_end_to_end(self, mock_cluster):
|
|
"""Test that CLI parameters override environment variables end-to-end."""
|
|
env_vars = {
|
|
'CASSANDRA_HOST': 'env-host',
|
|
'CASSANDRA_USERNAME': 'env-user',
|
|
'CASSANDRA_PASSWORD': 'env-pass'
|
|
}
|
|
|
|
mock_cluster_instance = MagicMock()
|
|
mock_session = MagicMock()
|
|
mock_cluster_instance.connect.return_value = mock_session
|
|
mock_cluster.return_value = mock_cluster_instance
|
|
|
|
with patch.dict(os.environ, env_vars, clear=True):
|
|
# CLI parameters should override environment
|
|
processor = TriplesWriter(
|
|
taskgroup=MagicMock(),
|
|
cassandra_host='cli-host1,cli-host2',
|
|
cassandra_username='cli-user',
|
|
cassandra_password='cli-pass'
|
|
)
|
|
|
|
# Trigger TrustGraph creation
|
|
mock_message = MagicMock()
|
|
mock_message.metadata.collection = 'test_collection'
|
|
mock_message.triples = []
|
|
|
|
# Mock collection_exists to return True
|
|
with patch('trustgraph.direct.cassandra_kg.KnowledgeGraph.collection_exists', return_value=True):
|
|
await processor.store_triples('test_user', mock_message)
|
|
|
|
# Should use CLI parameters, not environment
|
|
mock_cluster.assert_called_once()
|
|
call_args = mock_cluster.call_args
|
|
assert call_args.args[0] == ['cli-host1', 'cli-host2'] # From CLI
|
|
assert 'auth_provider' in call_args.kwargs # Should have auth since credentials provided
|
|
|
|
@pytest.mark.asyncio
|
|
@patch('trustgraph.storage.knowledge.store.KnowledgeTableStore')
|
|
async def test_partial_cli_with_env_fallback_end_to_end(self, mock_table_store):
|
|
"""Test partial CLI parameters with environment fallback end-to-end."""
|
|
env_vars = {
|
|
'CASSANDRA_HOST': 'fallback-host1,fallback-host2',
|
|
'CASSANDRA_USERNAME': 'fallback-user',
|
|
'CASSANDRA_PASSWORD': 'fallback-pass'
|
|
}
|
|
|
|
mock_store_instance = MagicMock()
|
|
mock_table_store.return_value = mock_store_instance
|
|
|
|
with patch.dict(os.environ, env_vars, clear=True):
|
|
# Only provide host via parameter, rest should fall back to env
|
|
processor = KgStore(
|
|
taskgroup=MagicMock(),
|
|
cassandra_host='partial-host'
|
|
# username and password not provided - should use env
|
|
)
|
|
|
|
# Verify mixed configuration
|
|
mock_table_store.assert_called_once_with(
|
|
cassandra_host=['partial-host'], # From parameter
|
|
cassandra_username='fallback-user', # From environment
|
|
cassandra_password='fallback-pass', # From environment
|
|
keyspace='knowledge'
|
|
)
|
|
|
|
@pytest.mark.asyncio
|
|
@patch('trustgraph.direct.cassandra_kg.Cluster')
|
|
async def test_no_config_defaults_end_to_end(self, mock_cluster):
|
|
"""Test that defaults are used when no configuration provided end-to-end."""
|
|
mock_cluster_instance = MagicMock()
|
|
mock_session = MagicMock()
|
|
mock_cluster_instance.connect.return_value = mock_session
|
|
mock_cluster.return_value = mock_cluster_instance
|
|
|
|
with patch.dict(os.environ, {}, clear=True):
|
|
processor = TriplesQuery(taskgroup=MagicMock())
|
|
|
|
# Mock query to trigger TrustGraph creation
|
|
mock_query = MagicMock()
|
|
mock_query.collection = 'default_collection'
|
|
mock_query.s = None
|
|
mock_query.p = None
|
|
mock_query.o = None
|
|
mock_query.limit = 100
|
|
|
|
# Mock the get_all method to return empty list
|
|
mock_tg_instance = MagicMock()
|
|
mock_tg_instance.get_all.return_value = []
|
|
processor.tg = mock_tg_instance
|
|
|
|
await processor.query_triples('default_user', mock_query)
|
|
|
|
# Should use defaults
|
|
mock_cluster.assert_called_once()
|
|
call_args = mock_cluster.call_args
|
|
assert call_args.args[0] == ['cassandra'] # Default host
|
|
assert 'auth_provider' not in call_args.kwargs # No auth with default config
|
|
|
|
|
|
class TestNoBackwardCompatibilityEndToEnd:
|
|
"""Test that backward compatibility with old parameter names is removed."""
|
|
|
|
@pytest.mark.asyncio
|
|
@patch('trustgraph.direct.cassandra_kg.Cluster')
|
|
async def test_old_graph_params_no_longer_work_end_to_end(self, mock_cluster):
|
|
"""Test that old graph_* parameters no longer work end-to-end."""
|
|
mock_cluster_instance = MagicMock()
|
|
mock_session = MagicMock()
|
|
mock_cluster_instance.connect.return_value = mock_session
|
|
mock_cluster.return_value = mock_cluster_instance
|
|
|
|
# Use old parameter names (should be ignored)
|
|
processor = TriplesWriter(
|
|
taskgroup=MagicMock(),
|
|
graph_host='legacy-host',
|
|
graph_username='legacy-user',
|
|
graph_password='legacy-pass'
|
|
)
|
|
|
|
# Trigger TrustGraph creation
|
|
mock_message = MagicMock()
|
|
mock_message.metadata.collection = 'legacy_collection'
|
|
mock_message.triples = []
|
|
|
|
# Mock collection_exists to return True
|
|
with patch('trustgraph.direct.cassandra_kg.KnowledgeGraph.collection_exists', return_value=True):
|
|
await processor.store_triples('legacy_user', mock_message)
|
|
|
|
# Should use defaults since old parameters are not recognized
|
|
mock_cluster.assert_called_once()
|
|
call_args = mock_cluster.call_args
|
|
assert call_args.args[0] == ['cassandra'] # Default, not legacy-host
|
|
assert 'auth_provider' not in call_args.kwargs # No auth since no valid credentials
|
|
|
|
@patch('trustgraph.storage.knowledge.store.KnowledgeTableStore')
|
|
def test_old_cassandra_user_param_no_longer_works_end_to_end(self, mock_table_store):
|
|
"""Test that old cassandra_user parameter no longer works."""
|
|
mock_store_instance = MagicMock()
|
|
mock_table_store.return_value = mock_store_instance
|
|
|
|
# Use old cassandra_user parameter (should be ignored)
|
|
processor = KgStore(
|
|
taskgroup=MagicMock(),
|
|
cassandra_host='legacy-kg-host',
|
|
cassandra_user='legacy-kg-user', # Old parameter name - not supported
|
|
cassandra_password='legacy-kg-pass'
|
|
)
|
|
|
|
# cassandra_user should be ignored, only cassandra_username works
|
|
mock_table_store.assert_called_once_with(
|
|
cassandra_host=['legacy-kg-host'],
|
|
cassandra_username=None, # Should be None since cassandra_user is not recognized
|
|
cassandra_password='legacy-kg-pass',
|
|
keyspace='knowledge'
|
|
)
|
|
|
|
@pytest.mark.asyncio
|
|
@patch('trustgraph.direct.cassandra_kg.Cluster')
|
|
async def test_new_params_override_old_params_end_to_end(self, mock_cluster):
|
|
"""Test that new parameters override old ones when both are present end-to-end."""
|
|
mock_cluster_instance = MagicMock()
|
|
mock_session = MagicMock()
|
|
mock_cluster_instance.connect.return_value = mock_session
|
|
mock_cluster.return_value = mock_cluster_instance
|
|
|
|
# Provide both old and new parameters
|
|
processor = TriplesWriter(
|
|
taskgroup=MagicMock(),
|
|
cassandra_host='new-host',
|
|
graph_host='old-host', # Should be ignored
|
|
cassandra_username='new-user',
|
|
graph_username='old-user', # Should be ignored
|
|
cassandra_password='new-pass',
|
|
graph_password='old-pass' # Should be ignored
|
|
)
|
|
|
|
# Trigger TrustGraph creation
|
|
mock_message = MagicMock()
|
|
mock_message.metadata.collection = 'precedence_collection'
|
|
mock_message.triples = []
|
|
|
|
# Mock collection_exists to return True
|
|
with patch('trustgraph.direct.cassandra_kg.KnowledgeGraph.collection_exists', return_value=True):
|
|
await processor.store_triples('precedence_user', mock_message)
|
|
|
|
# Should use new parameters, not old ones
|
|
mock_cluster.assert_called_once()
|
|
call_args = mock_cluster.call_args
|
|
assert call_args.args[0] == ['new-host'] # New parameter wins
|
|
assert 'auth_provider' in call_args.kwargs # Should have auth since credentials provided
|
|
|
|
|
|
class TestMultipleHostsHandling:
|
|
"""Test multiple Cassandra hosts handling end-to-end."""
|
|
|
|
@patch('trustgraph.storage.rows.cassandra.write.Cluster')
|
|
def test_multiple_hosts_passed_to_cluster(self, mock_cluster):
|
|
"""Test that multiple hosts are correctly passed to Cassandra cluster."""
|
|
env_vars = {
|
|
'CASSANDRA_HOST': 'host1,host2,host3,host4,host5'
|
|
}
|
|
|
|
mock_cluster_instance = MagicMock()
|
|
mock_session = MagicMock()
|
|
mock_cluster_instance.connect.return_value = mock_session
|
|
mock_cluster.return_value = mock_cluster_instance
|
|
|
|
with patch.dict(os.environ, env_vars, clear=True):
|
|
processor = RowsWriter(taskgroup=MagicMock())
|
|
processor.connect_cassandra()
|
|
|
|
# Verify all hosts were passed to Cluster
|
|
mock_cluster.assert_called_once()
|
|
call_args = mock_cluster.call_args
|
|
assert call_args.kwargs['contact_points'] == ['host1', 'host2', 'host3', 'host4', 'host5']
|
|
|
|
@pytest.mark.asyncio
|
|
@patch('trustgraph.direct.cassandra_kg.Cluster')
|
|
async def test_single_host_converted_to_list(self, mock_cluster):
|
|
"""Test that single host is converted to list for TrustGraph."""
|
|
mock_cluster_instance = MagicMock()
|
|
mock_session = MagicMock()
|
|
mock_cluster_instance.connect.return_value = mock_session
|
|
mock_cluster.return_value = mock_cluster_instance
|
|
|
|
processor = TriplesWriter(taskgroup=MagicMock(), cassandra_host='single-host')
|
|
|
|
# Trigger TrustGraph creation
|
|
mock_message = MagicMock()
|
|
mock_message.metadata.collection = 'single_collection'
|
|
mock_message.triples = []
|
|
|
|
# Mock collection_exists to return True
|
|
with patch('trustgraph.direct.cassandra_kg.KnowledgeGraph.collection_exists', return_value=True):
|
|
await processor.store_triples('single_user', mock_message)
|
|
|
|
# Single host should be converted to list
|
|
mock_cluster.assert_called_once()
|
|
call_args = mock_cluster.call_args
|
|
assert call_args.args[0] == ['single-host'] # Converted to list
|
|
assert 'auth_provider' not in call_args.kwargs # No auth since no credentials provided
|
|
|
|
def test_whitespace_handling_in_host_list(self):
|
|
"""Test that whitespace in host lists is handled correctly."""
|
|
from trustgraph.base.cassandra_config import resolve_cassandra_config
|
|
|
|
# Test various whitespace scenarios
|
|
hosts1, _, _, _ = resolve_cassandra_config(host='host1, host2 , host3')
|
|
assert hosts1 == ['host1', 'host2', 'host3']
|
|
|
|
hosts2, _, _, _ = resolve_cassandra_config(host='host1,host2,host3,')
|
|
assert hosts2 == ['host1', 'host2', 'host3']
|
|
|
|
hosts3, _, _, _ = resolve_cassandra_config(host=' host1 , host2 ')
|
|
assert hosts3 == ['host1', 'host2']
|
|
|
|
|
|
class TestAuthenticationFlow:
|
|
"""Test authentication configuration flow end-to-end."""
|
|
|
|
@patch('trustgraph.storage.rows.cassandra.write.Cluster')
|
|
@patch('trustgraph.storage.rows.cassandra.write.PlainTextAuthProvider')
|
|
def test_authentication_enabled_when_both_credentials_provided(self, mock_auth_provider, mock_cluster):
|
|
"""Test that authentication is enabled when both username and password are provided."""
|
|
env_vars = {
|
|
'CASSANDRA_HOST': 'auth-host',
|
|
'CASSANDRA_USERNAME': 'auth-user',
|
|
'CASSANDRA_PASSWORD': 'auth-secret'
|
|
}
|
|
|
|
mock_auth_instance = MagicMock()
|
|
mock_auth_provider.return_value = mock_auth_instance
|
|
mock_cluster_instance = MagicMock()
|
|
mock_cluster.return_value = mock_cluster_instance
|
|
|
|
with patch.dict(os.environ, env_vars, clear=True):
|
|
processor = RowsWriter(taskgroup=MagicMock())
|
|
processor.connect_cassandra()
|
|
|
|
# Auth provider should be created
|
|
mock_auth_provider.assert_called_once_with(
|
|
username='auth-user',
|
|
password='auth-secret'
|
|
)
|
|
|
|
# Cluster should be created with auth provider
|
|
call_args = mock_cluster.call_args
|
|
assert 'auth_provider' in call_args.kwargs
|
|
assert call_args.kwargs['auth_provider'] == mock_auth_instance
|
|
|
|
@patch('trustgraph.storage.rows.cassandra.write.Cluster')
|
|
@patch('trustgraph.storage.rows.cassandra.write.PlainTextAuthProvider')
|
|
def test_no_authentication_when_credentials_missing(self, mock_auth_provider, mock_cluster):
|
|
"""Test that authentication is not used when credentials are missing."""
|
|
env_vars = {
|
|
'CASSANDRA_HOST': 'no-auth-host'
|
|
# No username/password
|
|
}
|
|
|
|
mock_cluster_instance = MagicMock()
|
|
mock_cluster.return_value = mock_cluster_instance
|
|
|
|
with patch.dict(os.environ, env_vars, clear=True):
|
|
processor = RowsWriter(taskgroup=MagicMock())
|
|
processor.connect_cassandra()
|
|
|
|
# Auth provider should not be created
|
|
mock_auth_provider.assert_not_called()
|
|
|
|
# Cluster should be created without auth provider
|
|
call_args = mock_cluster.call_args
|
|
assert 'auth_provider' not in call_args.kwargs
|
|
|
|
@patch('trustgraph.storage.rows.cassandra.write.Cluster')
|
|
@patch('trustgraph.storage.rows.cassandra.write.PlainTextAuthProvider')
|
|
def test_no_authentication_when_only_username_provided(self, mock_auth_provider, mock_cluster):
|
|
"""Test that authentication is not used when only username is provided."""
|
|
processor = RowsWriter(
|
|
taskgroup=MagicMock(),
|
|
cassandra_host='partial-auth-host',
|
|
cassandra_username='partial-user'
|
|
# No password
|
|
)
|
|
|
|
mock_cluster_instance = MagicMock()
|
|
mock_cluster.return_value = mock_cluster_instance
|
|
|
|
processor.connect_cassandra()
|
|
|
|
# Auth provider should not be created (needs both username AND password)
|
|
mock_auth_provider.assert_not_called()
|
|
|
|
# Cluster should be created without auth provider
|
|
call_args = mock_cluster.call_args
|
|
assert 'auth_provider' not in call_args.kwargs |