Recent fixes -> release/v2.4 (#891)

* Fix publisher resource leak in librarian submit_document (#883)

Wrap pub.start()/pub.send() in try/finally to guarantee pub.stop() is
called on error. Remove unnecessary asyncio.sleep(1) kludge.

* Make Cassandra replication factor configurable (issue #787) (#887)

Add CASSANDRA_REPLICATION_FACTOR environment variable and
--cassandra-replication-factor CLI argument to cassandra_config.py.

Update all four table store constructors (ConfigTableStore,
KnowledgeTableStore, LibraryTableStore, IamTableStore) to accept
an optional replication_factor parameter and use it in keyspace
creation CQL queries.

Thread the replication factor through all service constructors:
Configuration, KnowledgeManager, Librarian, IamService, and
knowledge store Processor.

* Update tests

---------

Co-authored-by: gittihub-jpg <rico@springer-mail.net>
This commit is contained in:
cybermaggedon 2026-05-08 19:48:12 +01:00 committed by GitHub
parent fe542b3d33
commit fd8d5b2c42
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
21 changed files with 105 additions and 72 deletions

View file

@ -15,13 +15,14 @@ TEMPLATE_WORKSPACE = "__template__"
class Configuration:
def __init__(self, push, host, username, password, keyspace):
def __init__(self, push, host, username, password, keyspace,
replication_factor=1):
# External function to respond to update
self.push = push
self.table_store = ConfigTableStore(
host, username, password, keyspace
host, username, password, keyspace, replication_factor
)
async def inc_version(self):

View file

@ -79,7 +79,7 @@ class Processor(AsyncProcessor):
cassandra_password = params.get("cassandra_password")
# Resolve configuration with environment variable fallback
hosts, username, password, keyspace = resolve_cassandra_config(
hosts, username, password, keyspace, replication_factor = resolve_cassandra_config(
host=cassandra_host,
username=cassandra_username,
password=cassandra_password,
@ -147,6 +147,7 @@ class Processor(AsyncProcessor):
username = self.cassandra_username,
password = self.cassandra_password,
keyspace = keyspace,
replication_factor = replication_factor,
push = self.push
)

View file

@ -17,11 +17,12 @@ class KnowledgeManager:
def __init__(
self, cassandra_host, cassandra_username, cassandra_password,
keyspace, flow_config,
keyspace, flow_config, replication_factor=1,
):
self.table_store = KnowledgeTableStore(
cassandra_host, cassandra_username, cassandra_password, keyspace
cassandra_host, cassandra_username, cassandra_password, keyspace,
replication_factor
)
self.loader_queue = asyncio.Queue(maxsize=20)

View file

@ -56,7 +56,7 @@ class Processor(WorkspaceProcessor):
cassandra_username = params.get("cassandra_username")
cassandra_password = params.get("cassandra_password")
hosts, username, password, keyspace = resolve_cassandra_config(
hosts, username, password, keyspace, replication_factor = resolve_cassandra_config(
host=cassandra_host,
username=cassandra_username,
password=cassandra_password,
@ -83,6 +83,7 @@ class Processor(WorkspaceProcessor):
cassandra_password = self.cassandra_password,
keyspace = keyspace,
flow_config = self,
replication_factor = replication_factor,
)
self.register_config_handler(self.on_knowledge_config, types=["flow"])

View file

@ -246,9 +246,11 @@ class IamService:
def __init__(self, host, username, password, keyspace,
bootstrap_mode, bootstrap_token=None,
on_workspace_created=None, on_workspace_deleted=None):
on_workspace_created=None, on_workspace_deleted=None,
replication_factor=1):
self.table_store = IamTableStore(
host, username, password, keyspace,
replication_factor,
)
# bootstrap_mode: "token" or "bootstrap". In "token" mode the
# service auto-seeds on first start using the provided

View file

@ -96,7 +96,7 @@ class Processor(AsyncProcessor):
cassandra_username = params.get("cassandra_username")
cassandra_password = params.get("cassandra_password")
hosts, username, password, keyspace = resolve_cassandra_config(
hosts, username, password, keyspace, replication_factor = resolve_cassandra_config(
host=cassandra_host,
username=cassandra_username,
password=cassandra_password,
@ -149,6 +149,7 @@ class Processor(AsyncProcessor):
username=self.cassandra_username,
password=self.cassandra_password,
keyspace=keyspace,
replication_factor=replication_factor,
bootstrap_mode=self.bootstrap_mode,
bootstrap_token=self.bootstrap_token,
on_workspace_created=self._ensure_workspace_registered,

View file

@ -28,6 +28,7 @@ class Librarian:
bucket_name, keyspace, load_document,
object_store_use_ssl=False, object_store_region=None,
min_chunk_size=1, # Default: no minimum (for Garage)
replication_factor=1,
):
self.blob_store = BlobStore(
@ -36,7 +37,8 @@ class Librarian:
)
self.table_store = LibraryTableStore(
cassandra_host, cassandra_username, cassandra_password, keyspace
cassandra_host, cassandra_username, cassandra_password, keyspace,
replication_factor
)
self.load_document = load_document

View file

@ -117,7 +117,7 @@ class Processor(WorkspaceProcessor):
cassandra_password = params.get("cassandra_password")
# Resolve configuration with environment variable fallback
hosts, username, password, keyspace = resolve_cassandra_config(
hosts, username, password, keyspace, replication_factor = resolve_cassandra_config(
host=cassandra_host,
username=cassandra_username,
password=cassandra_password,
@ -179,6 +179,7 @@ class Processor(WorkspaceProcessor):
object_store_secret_key = object_store_secret_key,
bucket_name = bucket_name,
keyspace = keyspace,
replication_factor = replication_factor,
load_document = self.load_document,
object_store_use_ssl = object_store_use_ssl,
object_store_region = object_store_region,
@ -450,14 +451,11 @@ class Processor(WorkspaceProcessor):
self.pubsub, q, schema=schema
)
await pub.start()
# FIXME: Time wait kludge?
await asyncio.sleep(1)
await pub.send(None, doc)
await pub.stop()
try:
await pub.start()
await pub.send(None, doc)
finally:
await pub.stop()
logger.debug("Document submitted")

View file

@ -47,7 +47,7 @@ class Processor(FlowProcessor):
cassandra_password = params.get("cassandra_password")
# Resolve configuration with environment variable fallback
hosts, username, password, keyspace = resolve_cassandra_config(
hosts, username, password, keyspace, _ = resolve_cassandra_config(
host=cassandra_host,
username=cassandra_username,
password=cassandra_password

View file

@ -160,7 +160,7 @@ class Processor(TriplesQueryService):
cassandra_password = params.get("cassandra_password")
# Resolve configuration with environment variable fallback
hosts, username, password, keyspace = resolve_cassandra_config(
hosts, username, password, keyspace, _ = resolve_cassandra_config(
host=cassandra_host,
username=cassandra_username,
password=cassandra_password

View file

@ -23,7 +23,7 @@ class Processor(FlowProcessor):
id = params.get("id")
# Use helper to resolve configuration
hosts, username, password, keyspace = resolve_cassandra_config(
hosts, username, password, keyspace, replication_factor = resolve_cassandra_config(
host=params.get("cassandra_host"),
username=params.get("cassandra_username"),
password=params.get("cassandra_password"),
@ -59,6 +59,7 @@ class Processor(FlowProcessor):
cassandra_username = username,
cassandra_password = password,
keyspace = keyspace,
replication_factor = replication_factor,
)
async def on_triples(self, msg, consumer, flow):

View file

@ -47,7 +47,7 @@ class Processor(CollectionConfigHandler, FlowProcessor):
cassandra_password = params.get("cassandra_password")
# Resolve configuration with environment variable fallback
hosts, username, password, keyspace = resolve_cassandra_config(
hosts, username, password, keyspace, _ = resolve_cassandra_config(
host=cassandra_host,
username=cassandra_username,
password=cassandra_password

View file

@ -125,7 +125,7 @@ class Processor(CollectionConfigHandler, TriplesStoreService):
cassandra_password = params.get("cassandra_password")
# Resolve configuration with environment variable fallback
hosts, username, password, keyspace = resolve_cassandra_config(
hosts, username, password, keyspace, _ = resolve_cassandra_config(
host=cassandra_host,
username=cassandra_username,
password=cassandra_password

View file

@ -20,9 +20,11 @@ class ConfigTableStore:
def __init__(
self,
cassandra_host, cassandra_username, cassandra_password, keyspace,
replication_factor=1,
):
self.keyspace = keyspace
self.replication_factor = replication_factor
logger.info("Connecting to Cassandra...")
@ -57,12 +59,11 @@ class ConfigTableStore:
logger.debug("Keyspace...")
# FIXME: Replication factor should be configurable
self.cassandra.execute(f"""
create keyspace if not exists {self.keyspace}
with replication = {{
'class' : 'SimpleStrategy',
'replication_factor' : 1
'replication_factor' : {self.replication_factor}
}};
""");

View file

@ -28,8 +28,10 @@ class IamTableStore:
self,
cassandra_host, cassandra_username, cassandra_password,
keyspace,
replication_factor=1,
):
self.keyspace = keyspace
self.replication_factor = replication_factor
logger.info("IAM: connecting to Cassandra...")
@ -57,12 +59,11 @@ class IamTableStore:
self._prepare_statements()
def _ensure_schema(self):
# FIXME: Replication factor should be configurable.
self.cassandra.execute(f"""
create keyspace if not exists {self.keyspace}
with replication = {{
'class' : 'SimpleStrategy',
'replication_factor' : 1
'replication_factor' : {self.replication_factor}
}};
""")
self.cassandra.set_keyspace(self.keyspace)

View file

@ -36,9 +36,11 @@ class KnowledgeTableStore:
def __init__(
self,
cassandra_host, cassandra_username, cassandra_password, keyspace,
replication_factor=1,
):
self.keyspace = keyspace
self.replication_factor = replication_factor
logger.info("Connecting to Cassandra...")
@ -73,12 +75,11 @@ class KnowledgeTableStore:
logger.debug("Keyspace...")
# FIXME: Replication factor should be configurable
self.cassandra.execute(f"""
create keyspace if not exists {self.keyspace}
with replication = {{
'class' : 'SimpleStrategy',
'replication_factor' : 1
'replication_factor' : {self.replication_factor}
}};
""");

View file

@ -40,9 +40,11 @@ class LibraryTableStore:
def __init__(
self,
cassandra_host, cassandra_username, cassandra_password, keyspace,
replication_factor=1,
):
self.keyspace = keyspace
self.replication_factor = replication_factor
logger.info("Connecting to Cassandra...")
@ -77,12 +79,11 @@ class LibraryTableStore:
logger.debug("Keyspace...")
# FIXME: Replication factor should be configurable
self.cassandra.execute(f"""
create keyspace if not exists {self.keyspace}
with replication = {{
'class' : 'SimpleStrategy',
'replication_factor' : 1
'replication_factor' : {self.replication_factor}
}};
""");