Recent fixes -> release/v2.4 (#891)

* Fix publisher resource leak in librarian submit_document (#883)

Wrap pub.start()/pub.send() in try/finally to guarantee pub.stop() is
called on error. Remove unnecessary asyncio.sleep(1) kludge.

* Make Cassandra replication factor configurable (issue #787) (#887)

Add CASSANDRA_REPLICATION_FACTOR environment variable and
--cassandra-replication-factor CLI argument to cassandra_config.py.

Update all four table store constructors (ConfigTableStore,
KnowledgeTableStore, LibraryTableStore, IamTableStore) to accept
an optional replication_factor parameter and use it in keyspace
creation CQL queries.

Thread the replication factor through all service constructors:
Configuration, KnowledgeManager, Librarian, IamService, and
knowledge store Processor.

* Update tests

---------

Co-authored-by: gittihub-jpg <rico@springer-mail.net>
This commit is contained in:
cybermaggedon 2026-05-08 19:48:12 +01:00 committed by GitHub
parent fe542b3d33
commit fd8d5b2c42
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
21 changed files with 105 additions and 72 deletions

View file

@ -110,7 +110,8 @@ class TestEndToEndConfigurationFlow:
cassandra_host=['kg-host1', 'kg-host2', 'kg-host3', 'kg-host4'], cassandra_host=['kg-host1', 'kg-host2', 'kg-host3', 'kg-host4'],
cassandra_username='kg-user', cassandra_username='kg-user',
cassandra_password='kg-pass', cassandra_password='kg-pass',
keyspace='knowledge' keyspace='knowledge',
replication_factor=1,
) )
@ -182,7 +183,8 @@ class TestConfigurationPriorityEndToEnd:
cassandra_host=['partial-host'], # From parameter cassandra_host=['partial-host'], # From parameter
cassandra_username='fallback-user', # From environment cassandra_username='fallback-user', # From environment
cassandra_password='fallback-pass', # From environment cassandra_password='fallback-pass', # From environment
keyspace='knowledge' keyspace='knowledge',
replication_factor=1,
) )
@pytest.mark.asyncio @pytest.mark.asyncio
@ -273,7 +275,8 @@ class TestNoBackwardCompatibilityEndToEnd:
cassandra_host=['legacy-kg-host'], cassandra_host=['legacy-kg-host'],
cassandra_username=None, # Should be None since cassandra_user is not recognized cassandra_username=None, # Should be None since cassandra_user is not recognized
cassandra_password='legacy-kg-pass', cassandra_password='legacy-kg-pass',
keyspace='knowledge' keyspace='knowledge',
replication_factor=1,
) )
@pytest.mark.asyncio @pytest.mark.asyncio
@ -367,13 +370,13 @@ class TestMultipleHostsHandling:
from trustgraph.base.cassandra_config import resolve_cassandra_config from trustgraph.base.cassandra_config import resolve_cassandra_config
# Test various whitespace scenarios # Test various whitespace scenarios
hosts1, _, _, _ = resolve_cassandra_config(host='host1, host2 , host3') hosts1, _, _, _, _ = resolve_cassandra_config(host='host1, host2 , host3')
assert hosts1 == ['host1', 'host2', 'host3'] assert hosts1 == ['host1', 'host2', 'host3']
hosts2, _, _, _ = resolve_cassandra_config(host='host1,host2,host3,') hosts2, _, _, _, _ = resolve_cassandra_config(host='host1,host2,host3,')
assert hosts2 == ['host1', 'host2', 'host3'] assert hosts2 == ['host1', 'host2', 'host3']
hosts3, _, _, _ = resolve_cassandra_config(host=' host1 , host2 ') hosts3, _, _, _, _ = resolve_cassandra_config(host=' host1 , host2 ')
assert hosts3 == ['host1', 'host2'] assert hosts3 == ['host1', 'host2']

View file

@ -145,7 +145,7 @@ class TestResolveCassandraConfig:
def test_default_configuration(self): def test_default_configuration(self):
"""Test resolution with no parameters or environment variables.""" """Test resolution with no parameters or environment variables."""
with patch.dict(os.environ, {}, clear=True): with patch.dict(os.environ, {}, clear=True):
hosts, username, password, keyspace = resolve_cassandra_config() hosts, username, password, keyspace, _ = resolve_cassandra_config()
assert hosts == ['cassandra'] assert hosts == ['cassandra']
assert username is None assert username is None
@ -160,7 +160,7 @@ class TestResolveCassandraConfig:
} }
with patch.dict(os.environ, env_vars, clear=True): with patch.dict(os.environ, env_vars, clear=True):
hosts, username, password, keyspace = resolve_cassandra_config() hosts, username, password, keyspace, _ = resolve_cassandra_config()
assert hosts == ['env1', 'env2', 'env3'] assert hosts == ['env1', 'env2', 'env3']
assert username == 'env-user' assert username == 'env-user'
@ -175,7 +175,7 @@ class TestResolveCassandraConfig:
} }
with patch.dict(os.environ, env_vars, clear=True): with patch.dict(os.environ, env_vars, clear=True):
hosts, username, password, keyspace = resolve_cassandra_config( hosts, username, password, keyspace, _ = resolve_cassandra_config(
host='explicit-host', host='explicit-host',
username='explicit-user', username='explicit-user',
password='explicit-pass' password='explicit-pass'
@ -188,19 +188,19 @@ class TestResolveCassandraConfig:
def test_host_list_parsing(self): def test_host_list_parsing(self):
"""Test different host list formats.""" """Test different host list formats."""
# Single host # Single host
hosts, _, _, _ = resolve_cassandra_config(host='single-host') hosts, _, _, _, _ = resolve_cassandra_config(host='single-host')
assert hosts == ['single-host'] assert hosts == ['single-host']
# Multiple hosts with spaces # Multiple hosts with spaces
hosts, _, _, _ = resolve_cassandra_config(host='host1, host2 ,host3') hosts, _, _, _, _ = resolve_cassandra_config(host='host1, host2 ,host3')
assert hosts == ['host1', 'host2', 'host3'] assert hosts == ['host1', 'host2', 'host3']
# Empty elements filtered out # Empty elements filtered out
hosts, _, _, _ = resolve_cassandra_config(host='host1,,host2,') hosts, _, _, _, _ = resolve_cassandra_config(host='host1,,host2,')
assert hosts == ['host1', 'host2'] assert hosts == ['host1', 'host2']
# Already a list # Already a list
hosts, _, _, _ = resolve_cassandra_config(host=['list-host1', 'list-host2']) hosts, _, _, _, _ = resolve_cassandra_config(host=['list-host1', 'list-host2'])
assert hosts == ['list-host1', 'list-host2'] assert hosts == ['list-host1', 'list-host2']
def test_args_object_resolution(self): def test_args_object_resolution(self):
@ -212,7 +212,7 @@ class TestResolveCassandraConfig:
cassandra_password = 'args-pass' cassandra_password = 'args-pass'
args = MockArgs() args = MockArgs()
hosts, username, password, keyspace = resolve_cassandra_config(args) hosts, username, password, keyspace, _ = resolve_cassandra_config(args)
assert hosts == ['args-host1', 'args-host2'] assert hosts == ['args-host1', 'args-host2']
assert username == 'args-user' assert username == 'args-user'
@ -233,7 +233,7 @@ class TestResolveCassandraConfig:
with patch.dict(os.environ, env_vars, clear=True): with patch.dict(os.environ, env_vars, clear=True):
args = PartialArgs() args = PartialArgs()
hosts, username, password, keyspace = resolve_cassandra_config(args) hosts, username, password, keyspace, _ = resolve_cassandra_config(args)
assert hosts == ['args-host'] # From args assert hosts == ['args-host'] # From args
assert username == 'env-user' # From env assert username == 'env-user' # From env
@ -251,7 +251,7 @@ class TestGetCassandraConfigFromParams:
'cassandra_password': 'new-pass' 'cassandra_password': 'new-pass'
} }
hosts, username, password, keyspace = get_cassandra_config_from_params(params) hosts, username, password, keyspace, _ = get_cassandra_config_from_params(params)
assert hosts == ['new-host1', 'new-host2'] assert hosts == ['new-host1', 'new-host2']
assert username == 'new-user' assert username == 'new-user'
@ -265,7 +265,7 @@ class TestGetCassandraConfigFromParams:
'graph_password': 'old-pass' 'graph_password': 'old-pass'
} }
hosts, username, password, keyspace = get_cassandra_config_from_params(params) hosts, username, password, keyspace, _ = get_cassandra_config_from_params(params)
# Should use defaults since graph_* params are not recognized # Should use defaults since graph_* params are not recognized
assert hosts == ['cassandra'] # Default assert hosts == ['cassandra'] # Default
@ -280,7 +280,7 @@ class TestGetCassandraConfigFromParams:
'cassandra_password': 'compat-pass' 'cassandra_password': 'compat-pass'
} }
hosts, username, password, keyspace = get_cassandra_config_from_params(params) hosts, username, password, keyspace, _ = get_cassandra_config_from_params(params)
assert hosts == ['compat-host'] assert hosts == ['compat-host']
assert username is None # cassandra_user is not recognized assert username is None # cassandra_user is not recognized
@ -298,7 +298,7 @@ class TestGetCassandraConfigFromParams:
'graph_password': 'old-pass' 'graph_password': 'old-pass'
} }
hosts, username, password, keyspace = get_cassandra_config_from_params(params) hosts, username, password, keyspace, _ = get_cassandra_config_from_params(params)
assert hosts == ['new-host'] # Only cassandra_* params work assert hosts == ['new-host'] # Only cassandra_* params work
assert username == 'new-user' # Only cassandra_* params work assert username == 'new-user' # Only cassandra_* params work
@ -314,7 +314,7 @@ class TestGetCassandraConfigFromParams:
with patch.dict(os.environ, env_vars, clear=True): with patch.dict(os.environ, env_vars, clear=True):
params = {} params = {}
hosts, username, password, keyspace = get_cassandra_config_from_params(params) hosts, username, password, keyspace, _ = get_cassandra_config_from_params(params)
assert hosts == ['fallback-host1', 'fallback-host2'] assert hosts == ['fallback-host1', 'fallback-host2']
assert username == 'fallback-user' assert username == 'fallback-user'
@ -334,7 +334,7 @@ class TestConfigurationPriority:
with patch.dict(os.environ, env_vars, clear=True): with patch.dict(os.environ, env_vars, clear=True):
# CLI args should override everything # CLI args should override everything
hosts, username, password, keyspace = resolve_cassandra_config( hosts, username, password, keyspace, _ = resolve_cassandra_config(
host='cli-host', host='cli-host',
username='cli-user', username='cli-user',
password='cli-pass' password='cli-pass'
@ -354,7 +354,7 @@ class TestConfigurationPriority:
with patch.dict(os.environ, env_vars, clear=True): with patch.dict(os.environ, env_vars, clear=True):
# Only provide host via CLI # Only provide host via CLI
hosts, username, password, keyspace = resolve_cassandra_config( hosts, username, password, keyspace, _ = resolve_cassandra_config(
host='cli-host' host='cli-host'
# username and password not provided # username and password not provided
) )
@ -366,7 +366,7 @@ class TestConfigurationPriority:
def test_no_config_defaults(self): def test_no_config_defaults(self):
"""Test that defaults are used when no configuration is provided.""" """Test that defaults are used when no configuration is provided."""
with patch.dict(os.environ, {}, clear=True): with patch.dict(os.environ, {}, clear=True):
hosts, username, password, keyspace = resolve_cassandra_config() hosts, username, password, keyspace, _ = resolve_cassandra_config()
assert hosts == ['cassandra'] # Default assert hosts == ['cassandra'] # Default
assert username is None # Default assert username is None # Default
@ -378,17 +378,17 @@ class TestEdgeCases:
def test_empty_host_string(self): def test_empty_host_string(self):
"""Test handling of empty host string falls back to default.""" """Test handling of empty host string falls back to default."""
hosts, _, _, _ = resolve_cassandra_config(host='') hosts, _, _, _, _ = resolve_cassandra_config(host='')
assert hosts == ['cassandra'] # Falls back to default assert hosts == ['cassandra'] # Falls back to default
def test_whitespace_only_host(self): def test_whitespace_only_host(self):
"""Test handling of whitespace-only host string.""" """Test handling of whitespace-only host string."""
hosts, _, _, _ = resolve_cassandra_config(host=' ') hosts, _, _, _, _ = resolve_cassandra_config(host=' ')
assert hosts == [] # Empty after stripping whitespace assert hosts == [] # Empty after stripping whitespace
def test_none_values_preserved(self): def test_none_values_preserved(self):
"""Test that None values are preserved correctly.""" """Test that None values are preserved correctly."""
hosts, username, password, keyspace = resolve_cassandra_config( hosts, username, password, keyspace, _ = resolve_cassandra_config(
host=None, host=None,
username=None, username=None,
password=None password=None
@ -401,7 +401,7 @@ class TestEdgeCases:
def test_mixed_none_and_values(self): def test_mixed_none_and_values(self):
"""Test mixing None and actual values.""" """Test mixing None and actual values."""
hosts, username, password, keyspace = resolve_cassandra_config( hosts, username, password, keyspace, _ = resolve_cassandra_config(
host='mixed-host', host='mixed-host',
username=None, username=None,
password='mixed-pass' password='mixed-pass'

View file

@ -218,7 +218,8 @@ class TestKgStoreConfiguration:
cassandra_host=['kg-env-host1', 'kg-env-host2', 'kg-env-host3'], cassandra_host=['kg-env-host1', 'kg-env-host2', 'kg-env-host3'],
cassandra_username='kg-env-user', cassandra_username='kg-env-user',
cassandra_password='kg-env-pass', cassandra_password='kg-env-pass',
keyspace='knowledge' keyspace='knowledge',
replication_factor=1,
) )
@patch('trustgraph.storage.knowledge.store.KnowledgeTableStore') @patch('trustgraph.storage.knowledge.store.KnowledgeTableStore')
@ -239,7 +240,8 @@ class TestKgStoreConfiguration:
cassandra_host=['explicit-host'], cassandra_host=['explicit-host'],
cassandra_username='explicit-user', cassandra_username='explicit-user',
cassandra_password='explicit-pass', cassandra_password='explicit-pass',
keyspace='knowledge' keyspace='knowledge',
replication_factor=1,
) )
@patch('trustgraph.storage.knowledge.store.KnowledgeTableStore') @patch('trustgraph.storage.knowledge.store.KnowledgeTableStore')
@ -260,7 +262,8 @@ class TestKgStoreConfiguration:
cassandra_host=['compat-host'], cassandra_host=['compat-host'],
cassandra_username=None, # Should be None since cassandra_user is ignored cassandra_username=None, # Should be None since cassandra_user is ignored
cassandra_password='compat-pass', cassandra_password='compat-pass',
keyspace='knowledge' keyspace='knowledge',
replication_factor=1,
) )
@patch('trustgraph.storage.knowledge.store.KnowledgeTableStore') @patch('trustgraph.storage.knowledge.store.KnowledgeTableStore')
@ -277,7 +280,8 @@ class TestKgStoreConfiguration:
cassandra_host=['cassandra'], cassandra_host=['cassandra'],
cassandra_username=None, cassandra_username=None,
cassandra_password=None, cassandra_password=None,
keyspace='knowledge' keyspace='knowledge',
replication_factor=1,
) )
@ -425,5 +429,6 @@ class TestConfigurationPriorityIntegration:
cassandra_host=['param-host'], # From parameter cassandra_host=['param-host'], # From parameter
cassandra_username='env-user', # From environment cassandra_username='env-user', # From environment
cassandra_password='env-pass', # From environment cassandra_password='env-pass', # From environment
keyspace='knowledge' keyspace='knowledge',
replication_factor=1,
) )

View file

@ -21,7 +21,8 @@ def get_cassandra_defaults() -> dict:
'host': os.getenv('CASSANDRA_HOST', 'cassandra'), 'host': os.getenv('CASSANDRA_HOST', 'cassandra'),
'username': os.getenv('CASSANDRA_USERNAME'), 'username': os.getenv('CASSANDRA_USERNAME'),
'password': os.getenv('CASSANDRA_PASSWORD'), 'password': os.getenv('CASSANDRA_PASSWORD'),
'keyspace': os.getenv('CASSANDRA_KEYSPACE') 'keyspace': os.getenv('CASSANDRA_KEYSPACE'),
'replication_factor': int(os.getenv('CASSANDRA_REPLICATION_FACTOR', '1'))
} }
@ -85,6 +86,17 @@ def add_cassandra_args(parser: argparse.ArgumentParser) -> None:
help=keyspace_help help=keyspace_help
) )
replication_factor_help = f"Cassandra keyspace replication factor (default: {defaults['replication_factor']})"
if 'CASSANDRA_REPLICATION_FACTOR' in os.environ:
replication_factor_help += " [from CASSANDRA_REPLICATION_FACTOR]"
parser.add_argument(
'--cassandra-replication-factor',
type=int,
default=defaults['replication_factor'],
help=replication_factor_help
)
def resolve_cassandra_config( def resolve_cassandra_config(
args: Optional[Any] = None, args: Optional[Any] = None,
@ -92,7 +104,7 @@ def resolve_cassandra_config(
username: Optional[str] = None, username: Optional[str] = None,
password: Optional[str] = None, password: Optional[str] = None,
default_keyspace: Optional[str] = None default_keyspace: Optional[str] = None
) -> Tuple[List[str], Optional[str], Optional[str], Optional[str]]: ) -> Tuple[List[str], Optional[str], Optional[str], Optional[str], int]:
""" """
Resolve Cassandra configuration from various sources. Resolve Cassandra configuration from various sources.
@ -100,22 +112,24 @@ def resolve_cassandra_config(
Converts host string to list format for Cassandra driver. Converts host string to list format for Cassandra driver.
Args: Args:
args: Optional argparse namespace with cassandra_host, cassandra_username, cassandra_password, cassandra_keyspace args: Optional argparse namespace with cassandra_host, cassandra_username, cassandra_password, cassandra_keyspace, cassandra_replication_factor
host: Optional explicit host parameter (overrides args) host: Optional explicit host parameter (overrides args)
username: Optional explicit username parameter (overrides args) username: Optional explicit username parameter (overrides args)
password: Optional explicit password parameter (overrides args) password: Optional explicit password parameter (overrides args)
default_keyspace: Optional default keyspace if not specified elsewhere default_keyspace: Optional default keyspace if not specified elsewhere
Returns: Returns:
tuple: (hosts_list, username, password, keyspace) tuple: (hosts_list, username, password, keyspace, replication_factor)
""" """
# If args provided, extract values # If args provided, extract values
keyspace = None keyspace = None
replication_factor = 1
if args is not None: if args is not None:
host = host or getattr(args, 'cassandra_host', None) host = host or getattr(args, 'cassandra_host', None)
username = username or getattr(args, 'cassandra_username', None) username = username or getattr(args, 'cassandra_username', None)
password = password or getattr(args, 'cassandra_password', None) password = password or getattr(args, 'cassandra_password', None)
keyspace = getattr(args, 'cassandra_keyspace', None) keyspace = getattr(args, 'cassandra_keyspace', None)
replication_factor = getattr(args, 'cassandra_replication_factor', 1)
# Apply defaults if still None # Apply defaults if still None
defaults = get_cassandra_defaults() defaults = get_cassandra_defaults()
@ -123,6 +137,7 @@ def resolve_cassandra_config(
username = username or defaults['username'] username = username or defaults['username']
password = password or defaults['password'] password = password or defaults['password']
keyspace = keyspace or defaults['keyspace'] or default_keyspace keyspace = keyspace or defaults['keyspace'] or default_keyspace
replication_factor = replication_factor or defaults['replication_factor']
# Convert host string to list # Convert host string to list
if isinstance(host, str): if isinstance(host, str):
@ -130,13 +145,13 @@ def resolve_cassandra_config(
else: else:
hosts = host hosts = host
return hosts, username, password, keyspace return hosts, username, password, keyspace, replication_factor
def get_cassandra_config_from_params( def get_cassandra_config_from_params(
params: dict, params: dict,
default_keyspace: Optional[str] = None default_keyspace: Optional[str] = None
) -> Tuple[List[str], Optional[str], Optional[str], Optional[str]]: ) -> Tuple[List[str], Optional[str], Optional[str], Optional[str], int]:
""" """
Extract and resolve Cassandra configuration from a parameters dictionary. Extract and resolve Cassandra configuration from a parameters dictionary.
@ -145,14 +160,12 @@ def get_cassandra_config_from_params(
default_keyspace: Optional default keyspace if not specified in params default_keyspace: Optional default keyspace if not specified in params
Returns: Returns:
tuple: (hosts_list, username, password, keyspace) tuple: (hosts_list, username, password, keyspace, replication_factor)
""" """
# Get Cassandra parameters
host = params.get('cassandra_host') host = params.get('cassandra_host')
username = params.get('cassandra_username') username = params.get('cassandra_username')
password = params.get('cassandra_password') password = params.get('cassandra_password')
# Use resolve function to handle defaults and list conversion
return resolve_cassandra_config( return resolve_cassandra_config(
host=host, host=host,
username=username, username=username,

View file

@ -15,13 +15,14 @@ TEMPLATE_WORKSPACE = "__template__"
class Configuration: class Configuration:
def __init__(self, push, host, username, password, keyspace): def __init__(self, push, host, username, password, keyspace,
replication_factor=1):
# External function to respond to update # External function to respond to update
self.push = push self.push = push
self.table_store = ConfigTableStore( self.table_store = ConfigTableStore(
host, username, password, keyspace host, username, password, keyspace, replication_factor
) )
async def inc_version(self): async def inc_version(self):

View file

@ -79,7 +79,7 @@ class Processor(AsyncProcessor):
cassandra_password = params.get("cassandra_password") cassandra_password = params.get("cassandra_password")
# Resolve configuration with environment variable fallback # Resolve configuration with environment variable fallback
hosts, username, password, keyspace = resolve_cassandra_config( hosts, username, password, keyspace, replication_factor = resolve_cassandra_config(
host=cassandra_host, host=cassandra_host,
username=cassandra_username, username=cassandra_username,
password=cassandra_password, password=cassandra_password,
@ -147,6 +147,7 @@ class Processor(AsyncProcessor):
username = self.cassandra_username, username = self.cassandra_username,
password = self.cassandra_password, password = self.cassandra_password,
keyspace = keyspace, keyspace = keyspace,
replication_factor = replication_factor,
push = self.push push = self.push
) )

View file

@ -17,11 +17,12 @@ class KnowledgeManager:
def __init__( def __init__(
self, cassandra_host, cassandra_username, cassandra_password, self, cassandra_host, cassandra_username, cassandra_password,
keyspace, flow_config, keyspace, flow_config, replication_factor=1,
): ):
self.table_store = KnowledgeTableStore( self.table_store = KnowledgeTableStore(
cassandra_host, cassandra_username, cassandra_password, keyspace cassandra_host, cassandra_username, cassandra_password, keyspace,
replication_factor
) )
self.loader_queue = asyncio.Queue(maxsize=20) self.loader_queue = asyncio.Queue(maxsize=20)

View file

@ -56,7 +56,7 @@ class Processor(WorkspaceProcessor):
cassandra_username = params.get("cassandra_username") cassandra_username = params.get("cassandra_username")
cassandra_password = params.get("cassandra_password") cassandra_password = params.get("cassandra_password")
hosts, username, password, keyspace = resolve_cassandra_config( hosts, username, password, keyspace, replication_factor = resolve_cassandra_config(
host=cassandra_host, host=cassandra_host,
username=cassandra_username, username=cassandra_username,
password=cassandra_password, password=cassandra_password,
@ -83,6 +83,7 @@ class Processor(WorkspaceProcessor):
cassandra_password = self.cassandra_password, cassandra_password = self.cassandra_password,
keyspace = keyspace, keyspace = keyspace,
flow_config = self, flow_config = self,
replication_factor = replication_factor,
) )
self.register_config_handler(self.on_knowledge_config, types=["flow"]) self.register_config_handler(self.on_knowledge_config, types=["flow"])

View file

@ -246,9 +246,11 @@ class IamService:
def __init__(self, host, username, password, keyspace, def __init__(self, host, username, password, keyspace,
bootstrap_mode, bootstrap_token=None, bootstrap_mode, bootstrap_token=None,
on_workspace_created=None, on_workspace_deleted=None): on_workspace_created=None, on_workspace_deleted=None,
replication_factor=1):
self.table_store = IamTableStore( self.table_store = IamTableStore(
host, username, password, keyspace, host, username, password, keyspace,
replication_factor,
) )
# bootstrap_mode: "token" or "bootstrap". In "token" mode the # bootstrap_mode: "token" or "bootstrap". In "token" mode the
# service auto-seeds on first start using the provided # service auto-seeds on first start using the provided

View file

@ -96,7 +96,7 @@ class Processor(AsyncProcessor):
cassandra_username = params.get("cassandra_username") cassandra_username = params.get("cassandra_username")
cassandra_password = params.get("cassandra_password") cassandra_password = params.get("cassandra_password")
hosts, username, password, keyspace = resolve_cassandra_config( hosts, username, password, keyspace, replication_factor = resolve_cassandra_config(
host=cassandra_host, host=cassandra_host,
username=cassandra_username, username=cassandra_username,
password=cassandra_password, password=cassandra_password,
@ -149,6 +149,7 @@ class Processor(AsyncProcessor):
username=self.cassandra_username, username=self.cassandra_username,
password=self.cassandra_password, password=self.cassandra_password,
keyspace=keyspace, keyspace=keyspace,
replication_factor=replication_factor,
bootstrap_mode=self.bootstrap_mode, bootstrap_mode=self.bootstrap_mode,
bootstrap_token=self.bootstrap_token, bootstrap_token=self.bootstrap_token,
on_workspace_created=self._ensure_workspace_registered, on_workspace_created=self._ensure_workspace_registered,

View file

@ -28,6 +28,7 @@ class Librarian:
bucket_name, keyspace, load_document, bucket_name, keyspace, load_document,
object_store_use_ssl=False, object_store_region=None, object_store_use_ssl=False, object_store_region=None,
min_chunk_size=1, # Default: no minimum (for Garage) min_chunk_size=1, # Default: no minimum (for Garage)
replication_factor=1,
): ):
self.blob_store = BlobStore( self.blob_store = BlobStore(
@ -36,7 +37,8 @@ class Librarian:
) )
self.table_store = LibraryTableStore( self.table_store = LibraryTableStore(
cassandra_host, cassandra_username, cassandra_password, keyspace cassandra_host, cassandra_username, cassandra_password, keyspace,
replication_factor
) )
self.load_document = load_document self.load_document = load_document

View file

@ -117,7 +117,7 @@ class Processor(WorkspaceProcessor):
cassandra_password = params.get("cassandra_password") cassandra_password = params.get("cassandra_password")
# Resolve configuration with environment variable fallback # Resolve configuration with environment variable fallback
hosts, username, password, keyspace = resolve_cassandra_config( hosts, username, password, keyspace, replication_factor = resolve_cassandra_config(
host=cassandra_host, host=cassandra_host,
username=cassandra_username, username=cassandra_username,
password=cassandra_password, password=cassandra_password,
@ -179,6 +179,7 @@ class Processor(WorkspaceProcessor):
object_store_secret_key = object_store_secret_key, object_store_secret_key = object_store_secret_key,
bucket_name = bucket_name, bucket_name = bucket_name,
keyspace = keyspace, keyspace = keyspace,
replication_factor = replication_factor,
load_document = self.load_document, load_document = self.load_document,
object_store_use_ssl = object_store_use_ssl, object_store_use_ssl = object_store_use_ssl,
object_store_region = object_store_region, object_store_region = object_store_region,
@ -450,13 +451,10 @@ class Processor(WorkspaceProcessor):
self.pubsub, q, schema=schema self.pubsub, q, schema=schema
) )
try:
await pub.start() await pub.start()
# FIXME: Time wait kludge?
await asyncio.sleep(1)
await pub.send(None, doc) await pub.send(None, doc)
finally:
await pub.stop() await pub.stop()
logger.debug("Document submitted") logger.debug("Document submitted")

View file

@ -47,7 +47,7 @@ class Processor(FlowProcessor):
cassandra_password = params.get("cassandra_password") cassandra_password = params.get("cassandra_password")
# Resolve configuration with environment variable fallback # Resolve configuration with environment variable fallback
hosts, username, password, keyspace = resolve_cassandra_config( hosts, username, password, keyspace, _ = resolve_cassandra_config(
host=cassandra_host, host=cassandra_host,
username=cassandra_username, username=cassandra_username,
password=cassandra_password password=cassandra_password

View file

@ -160,7 +160,7 @@ class Processor(TriplesQueryService):
cassandra_password = params.get("cassandra_password") cassandra_password = params.get("cassandra_password")
# Resolve configuration with environment variable fallback # Resolve configuration with environment variable fallback
hosts, username, password, keyspace = resolve_cassandra_config( hosts, username, password, keyspace, _ = resolve_cassandra_config(
host=cassandra_host, host=cassandra_host,
username=cassandra_username, username=cassandra_username,
password=cassandra_password password=cassandra_password

View file

@ -23,7 +23,7 @@ class Processor(FlowProcessor):
id = params.get("id") id = params.get("id")
# Use helper to resolve configuration # Use helper to resolve configuration
hosts, username, password, keyspace = resolve_cassandra_config( hosts, username, password, keyspace, replication_factor = resolve_cassandra_config(
host=params.get("cassandra_host"), host=params.get("cassandra_host"),
username=params.get("cassandra_username"), username=params.get("cassandra_username"),
password=params.get("cassandra_password"), password=params.get("cassandra_password"),
@ -59,6 +59,7 @@ class Processor(FlowProcessor):
cassandra_username = username, cassandra_username = username,
cassandra_password = password, cassandra_password = password,
keyspace = keyspace, keyspace = keyspace,
replication_factor = replication_factor,
) )
async def on_triples(self, msg, consumer, flow): async def on_triples(self, msg, consumer, flow):

View file

@ -47,7 +47,7 @@ class Processor(CollectionConfigHandler, FlowProcessor):
cassandra_password = params.get("cassandra_password") cassandra_password = params.get("cassandra_password")
# Resolve configuration with environment variable fallback # Resolve configuration with environment variable fallback
hosts, username, password, keyspace = resolve_cassandra_config( hosts, username, password, keyspace, _ = resolve_cassandra_config(
host=cassandra_host, host=cassandra_host,
username=cassandra_username, username=cassandra_username,
password=cassandra_password password=cassandra_password

View file

@ -125,7 +125,7 @@ class Processor(CollectionConfigHandler, TriplesStoreService):
cassandra_password = params.get("cassandra_password") cassandra_password = params.get("cassandra_password")
# Resolve configuration with environment variable fallback # Resolve configuration with environment variable fallback
hosts, username, password, keyspace = resolve_cassandra_config( hosts, username, password, keyspace, _ = resolve_cassandra_config(
host=cassandra_host, host=cassandra_host,
username=cassandra_username, username=cassandra_username,
password=cassandra_password password=cassandra_password

View file

@ -20,9 +20,11 @@ class ConfigTableStore:
def __init__( def __init__(
self, self,
cassandra_host, cassandra_username, cassandra_password, keyspace, cassandra_host, cassandra_username, cassandra_password, keyspace,
replication_factor=1,
): ):
self.keyspace = keyspace self.keyspace = keyspace
self.replication_factor = replication_factor
logger.info("Connecting to Cassandra...") logger.info("Connecting to Cassandra...")
@ -57,12 +59,11 @@ class ConfigTableStore:
logger.debug("Keyspace...") logger.debug("Keyspace...")
# FIXME: Replication factor should be configurable
self.cassandra.execute(f""" self.cassandra.execute(f"""
create keyspace if not exists {self.keyspace} create keyspace if not exists {self.keyspace}
with replication = {{ with replication = {{
'class' : 'SimpleStrategy', 'class' : 'SimpleStrategy',
'replication_factor' : 1 'replication_factor' : {self.replication_factor}
}}; }};
"""); """);

View file

@ -28,8 +28,10 @@ class IamTableStore:
self, self,
cassandra_host, cassandra_username, cassandra_password, cassandra_host, cassandra_username, cassandra_password,
keyspace, keyspace,
replication_factor=1,
): ):
self.keyspace = keyspace self.keyspace = keyspace
self.replication_factor = replication_factor
logger.info("IAM: connecting to Cassandra...") logger.info("IAM: connecting to Cassandra...")
@ -57,12 +59,11 @@ class IamTableStore:
self._prepare_statements() self._prepare_statements()
def _ensure_schema(self): def _ensure_schema(self):
# FIXME: Replication factor should be configurable.
self.cassandra.execute(f""" self.cassandra.execute(f"""
create keyspace if not exists {self.keyspace} create keyspace if not exists {self.keyspace}
with replication = {{ with replication = {{
'class' : 'SimpleStrategy', 'class' : 'SimpleStrategy',
'replication_factor' : 1 'replication_factor' : {self.replication_factor}
}}; }};
""") """)
self.cassandra.set_keyspace(self.keyspace) self.cassandra.set_keyspace(self.keyspace)

View file

@ -36,9 +36,11 @@ class KnowledgeTableStore:
def __init__( def __init__(
self, self,
cassandra_host, cassandra_username, cassandra_password, keyspace, cassandra_host, cassandra_username, cassandra_password, keyspace,
replication_factor=1,
): ):
self.keyspace = keyspace self.keyspace = keyspace
self.replication_factor = replication_factor
logger.info("Connecting to Cassandra...") logger.info("Connecting to Cassandra...")
@ -73,12 +75,11 @@ class KnowledgeTableStore:
logger.debug("Keyspace...") logger.debug("Keyspace...")
# FIXME: Replication factor should be configurable
self.cassandra.execute(f""" self.cassandra.execute(f"""
create keyspace if not exists {self.keyspace} create keyspace if not exists {self.keyspace}
with replication = {{ with replication = {{
'class' : 'SimpleStrategy', 'class' : 'SimpleStrategy',
'replication_factor' : 1 'replication_factor' : {self.replication_factor}
}}; }};
"""); """);

View file

@ -40,9 +40,11 @@ class LibraryTableStore:
def __init__( def __init__(
self, self,
cassandra_host, cassandra_username, cassandra_password, keyspace, cassandra_host, cassandra_username, cassandra_password, keyspace,
replication_factor=1,
): ):
self.keyspace = keyspace self.keyspace = keyspace
self.replication_factor = replication_factor
logger.info("Connecting to Cassandra...") logger.info("Connecting to Cassandra...")
@ -77,12 +79,11 @@ class LibraryTableStore:
logger.debug("Keyspace...") logger.debug("Keyspace...")
# FIXME: Replication factor should be configurable
self.cassandra.execute(f""" self.cassandra.execute(f"""
create keyspace if not exists {self.keyspace} create keyspace if not exists {self.keyspace}
with replication = {{ with replication = {{
'class' : 'SimpleStrategy', 'class' : 'SimpleStrategy',
'replication_factor' : 1 'replication_factor' : {self.replication_factor}
}}; }};
"""); """);