mirror of
https://github.com/trustgraph-ai/trustgraph.git
synced 2026-05-10 07:42:38 +02:00
Add CASSANDRA_REPLICATION_FACTOR environment variable and --cassandra-replication-factor CLI argument to cassandra_config.py. Update all four table store constructors (ConfigTableStore, KnowledgeTableStore, LibraryTableStore, IamTableStore) to accept an optional replication_factor parameter and use it in keyspace creation CQL queries. Thread the replication factor through all service constructors: Configuration, KnowledgeManager, Librarian, IamService, and knowledge store Processor.
This commit is contained in:
parent
f9d6606423
commit
e23d4a5b58
10 changed files with 46 additions and 20 deletions
|
|
@ -21,7 +21,8 @@ def get_cassandra_defaults() -> dict:
|
||||||
'host': os.getenv('CASSANDRA_HOST', 'cassandra'),
|
'host': os.getenv('CASSANDRA_HOST', 'cassandra'),
|
||||||
'username': os.getenv('CASSANDRA_USERNAME'),
|
'username': os.getenv('CASSANDRA_USERNAME'),
|
||||||
'password': os.getenv('CASSANDRA_PASSWORD'),
|
'password': os.getenv('CASSANDRA_PASSWORD'),
|
||||||
'keyspace': os.getenv('CASSANDRA_KEYSPACE')
|
'keyspace': os.getenv('CASSANDRA_KEYSPACE'),
|
||||||
|
'replication_factor': int(os.getenv('CASSANDRA_REPLICATION_FACTOR', '1'))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -85,6 +86,17 @@ def add_cassandra_args(parser: argparse.ArgumentParser) -> None:
|
||||||
help=keyspace_help
|
help=keyspace_help
|
||||||
)
|
)
|
||||||
|
|
||||||
|
replication_factor_help = f"Cassandra keyspace replication factor (default: {defaults['replication_factor']})"
|
||||||
|
if 'CASSANDRA_REPLICATION_FACTOR' in os.environ:
|
||||||
|
replication_factor_help += " [from CASSANDRA_REPLICATION_FACTOR]"
|
||||||
|
|
||||||
|
parser.add_argument(
|
||||||
|
'--cassandra-replication-factor',
|
||||||
|
type=int,
|
||||||
|
default=defaults['replication_factor'],
|
||||||
|
help=replication_factor_help
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def resolve_cassandra_config(
|
def resolve_cassandra_config(
|
||||||
args: Optional[Any] = None,
|
args: Optional[Any] = None,
|
||||||
|
|
@ -92,7 +104,7 @@ def resolve_cassandra_config(
|
||||||
username: Optional[str] = None,
|
username: Optional[str] = None,
|
||||||
password: Optional[str] = None,
|
password: Optional[str] = None,
|
||||||
default_keyspace: Optional[str] = None
|
default_keyspace: Optional[str] = None
|
||||||
) -> Tuple[List[str], Optional[str], Optional[str], Optional[str]]:
|
) -> Tuple[List[str], Optional[str], Optional[str], Optional[str], int]:
|
||||||
"""
|
"""
|
||||||
Resolve Cassandra configuration from various sources.
|
Resolve Cassandra configuration from various sources.
|
||||||
|
|
||||||
|
|
@ -100,22 +112,24 @@ def resolve_cassandra_config(
|
||||||
Converts host string to list format for Cassandra driver.
|
Converts host string to list format for Cassandra driver.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
args: Optional argparse namespace with cassandra_host, cassandra_username, cassandra_password, cassandra_keyspace
|
args: Optional argparse namespace with cassandra_host, cassandra_username, cassandra_password, cassandra_keyspace, cassandra_replication_factor
|
||||||
host: Optional explicit host parameter (overrides args)
|
host: Optional explicit host parameter (overrides args)
|
||||||
username: Optional explicit username parameter (overrides args)
|
username: Optional explicit username parameter (overrides args)
|
||||||
password: Optional explicit password parameter (overrides args)
|
password: Optional explicit password parameter (overrides args)
|
||||||
default_keyspace: Optional default keyspace if not specified elsewhere
|
default_keyspace: Optional default keyspace if not specified elsewhere
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
tuple: (hosts_list, username, password, keyspace)
|
tuple: (hosts_list, username, password, keyspace, replication_factor)
|
||||||
"""
|
"""
|
||||||
# If args provided, extract values
|
# If args provided, extract values
|
||||||
keyspace = None
|
keyspace = None
|
||||||
|
replication_factor = 1
|
||||||
if args is not None:
|
if args is not None:
|
||||||
host = host or getattr(args, 'cassandra_host', None)
|
host = host or getattr(args, 'cassandra_host', None)
|
||||||
username = username or getattr(args, 'cassandra_username', None)
|
username = username or getattr(args, 'cassandra_username', None)
|
||||||
password = password or getattr(args, 'cassandra_password', None)
|
password = password or getattr(args, 'cassandra_password', None)
|
||||||
keyspace = getattr(args, 'cassandra_keyspace', None)
|
keyspace = getattr(args, 'cassandra_keyspace', None)
|
||||||
|
replication_factor = getattr(args, 'cassandra_replication_factor', 1)
|
||||||
|
|
||||||
# Apply defaults if still None
|
# Apply defaults if still None
|
||||||
defaults = get_cassandra_defaults()
|
defaults = get_cassandra_defaults()
|
||||||
|
|
@ -123,6 +137,7 @@ def resolve_cassandra_config(
|
||||||
username = username or defaults['username']
|
username = username or defaults['username']
|
||||||
password = password or defaults['password']
|
password = password or defaults['password']
|
||||||
keyspace = keyspace or defaults['keyspace'] or default_keyspace
|
keyspace = keyspace or defaults['keyspace'] or default_keyspace
|
||||||
|
replication_factor = replication_factor or defaults['replication_factor']
|
||||||
|
|
||||||
# Convert host string to list
|
# Convert host string to list
|
||||||
if isinstance(host, str):
|
if isinstance(host, str):
|
||||||
|
|
@ -130,7 +145,7 @@ def resolve_cassandra_config(
|
||||||
else:
|
else:
|
||||||
hosts = host
|
hosts = host
|
||||||
|
|
||||||
return hosts, username, password, keyspace
|
return hosts, username, password, keyspace, replication_factor
|
||||||
|
|
||||||
|
|
||||||
def get_cassandra_config_from_params(
|
def get_cassandra_config_from_params(
|
||||||
|
|
|
||||||
|
|
@ -11,13 +11,14 @@ logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
class Configuration:
|
class Configuration:
|
||||||
|
|
||||||
def __init__(self, push, host, username, password, keyspace):
|
def __init__(self, push, host, username, password, keyspace,
|
||||||
|
replication_factor=1):
|
||||||
|
|
||||||
# External function to respond to update
|
# External function to respond to update
|
||||||
self.push = push
|
self.push = push
|
||||||
|
|
||||||
self.table_store = ConfigTableStore(
|
self.table_store = ConfigTableStore(
|
||||||
host, username, password, keyspace
|
host, username, password, keyspace, replication_factor
|
||||||
)
|
)
|
||||||
|
|
||||||
async def inc_version(self):
|
async def inc_version(self):
|
||||||
|
|
|
||||||
|
|
@ -17,11 +17,12 @@ class KnowledgeManager:
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self, cassandra_host, cassandra_username, cassandra_password,
|
self, cassandra_host, cassandra_username, cassandra_password,
|
||||||
keyspace, flow_config,
|
keyspace, flow_config, replication_factor=1,
|
||||||
):
|
):
|
||||||
|
|
||||||
self.table_store = KnowledgeTableStore(
|
self.table_store = KnowledgeTableStore(
|
||||||
cassandra_host, cassandra_username, cassandra_password, keyspace
|
cassandra_host, cassandra_username, cassandra_password, keyspace,
|
||||||
|
replication_factor
|
||||||
)
|
)
|
||||||
|
|
||||||
self.loader_queue = asyncio.Queue(maxsize=20)
|
self.loader_queue = asyncio.Queue(maxsize=20)
|
||||||
|
|
|
||||||
|
|
@ -245,9 +245,11 @@ def _sign_jwt(kid, private_pem, claims):
|
||||||
class IamService:
|
class IamService:
|
||||||
|
|
||||||
def __init__(self, host, username, password, keyspace,
|
def __init__(self, host, username, password, keyspace,
|
||||||
bootstrap_mode, bootstrap_token=None):
|
bootstrap_mode, bootstrap_token=None,
|
||||||
|
replication_factor=1):
|
||||||
self.table_store = IamTableStore(
|
self.table_store = IamTableStore(
|
||||||
host, username, password, keyspace,
|
host, username, password, keyspace,
|
||||||
|
replication_factor,
|
||||||
)
|
)
|
||||||
# bootstrap_mode: "token" or "bootstrap". In "token" mode the
|
# bootstrap_mode: "token" or "bootstrap". In "token" mode the
|
||||||
# service auto-seeds on first start using the provided
|
# service auto-seeds on first start using the provided
|
||||||
|
|
|
||||||
|
|
@ -28,6 +28,7 @@ class Librarian:
|
||||||
bucket_name, keyspace, load_document,
|
bucket_name, keyspace, load_document,
|
||||||
object_store_use_ssl=False, object_store_region=None,
|
object_store_use_ssl=False, object_store_region=None,
|
||||||
min_chunk_size=1, # Default: no minimum (for Garage)
|
min_chunk_size=1, # Default: no minimum (for Garage)
|
||||||
|
replication_factor=1,
|
||||||
):
|
):
|
||||||
|
|
||||||
self.blob_store = BlobStore(
|
self.blob_store = BlobStore(
|
||||||
|
|
@ -36,7 +37,8 @@ class Librarian:
|
||||||
)
|
)
|
||||||
|
|
||||||
self.table_store = LibraryTableStore(
|
self.table_store = LibraryTableStore(
|
||||||
cassandra_host, cassandra_username, cassandra_password, keyspace
|
cassandra_host, cassandra_username, cassandra_password, keyspace,
|
||||||
|
replication_factor
|
||||||
)
|
)
|
||||||
|
|
||||||
self.load_document = load_document
|
self.load_document = load_document
|
||||||
|
|
|
||||||
|
|
@ -23,7 +23,7 @@ class Processor(FlowProcessor):
|
||||||
id = params.get("id")
|
id = params.get("id")
|
||||||
|
|
||||||
# Use helper to resolve configuration
|
# Use helper to resolve configuration
|
||||||
hosts, username, password, keyspace = resolve_cassandra_config(
|
hosts, username, password, keyspace, replication_factor = resolve_cassandra_config(
|
||||||
host=params.get("cassandra_host"),
|
host=params.get("cassandra_host"),
|
||||||
username=params.get("cassandra_username"),
|
username=params.get("cassandra_username"),
|
||||||
password=params.get("cassandra_password"),
|
password=params.get("cassandra_password"),
|
||||||
|
|
@ -59,6 +59,7 @@ class Processor(FlowProcessor):
|
||||||
cassandra_username = username,
|
cassandra_username = username,
|
||||||
cassandra_password = password,
|
cassandra_password = password,
|
||||||
keyspace = keyspace,
|
keyspace = keyspace,
|
||||||
|
replication_factor = replication_factor,
|
||||||
)
|
)
|
||||||
|
|
||||||
async def on_triples(self, msg, consumer, flow):
|
async def on_triples(self, msg, consumer, flow):
|
||||||
|
|
|
||||||
|
|
@ -20,9 +20,11 @@ class ConfigTableStore:
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
cassandra_host, cassandra_username, cassandra_password, keyspace,
|
cassandra_host, cassandra_username, cassandra_password, keyspace,
|
||||||
|
replication_factor=1,
|
||||||
):
|
):
|
||||||
|
|
||||||
self.keyspace = keyspace
|
self.keyspace = keyspace
|
||||||
|
self.replication_factor = replication_factor
|
||||||
|
|
||||||
logger.info("Connecting to Cassandra...")
|
logger.info("Connecting to Cassandra...")
|
||||||
|
|
||||||
|
|
@ -57,12 +59,11 @@ class ConfigTableStore:
|
||||||
|
|
||||||
logger.debug("Keyspace...")
|
logger.debug("Keyspace...")
|
||||||
|
|
||||||
# FIXME: Replication factor should be configurable
|
|
||||||
self.cassandra.execute(f"""
|
self.cassandra.execute(f"""
|
||||||
create keyspace if not exists {self.keyspace}
|
create keyspace if not exists {self.keyspace}
|
||||||
with replication = {{
|
with replication = {{
|
||||||
'class' : 'SimpleStrategy',
|
'class' : 'SimpleStrategy',
|
||||||
'replication_factor' : 1
|
'replication_factor' : {self.replication_factor}
|
||||||
}};
|
}};
|
||||||
""");
|
""");
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -28,8 +28,10 @@ class IamTableStore:
|
||||||
self,
|
self,
|
||||||
cassandra_host, cassandra_username, cassandra_password,
|
cassandra_host, cassandra_username, cassandra_password,
|
||||||
keyspace,
|
keyspace,
|
||||||
|
replication_factor=1,
|
||||||
):
|
):
|
||||||
self.keyspace = keyspace
|
self.keyspace = keyspace
|
||||||
|
self.replication_factor = replication_factor
|
||||||
|
|
||||||
logger.info("IAM: connecting to Cassandra...")
|
logger.info("IAM: connecting to Cassandra...")
|
||||||
|
|
||||||
|
|
@ -57,12 +59,11 @@ class IamTableStore:
|
||||||
self._prepare_statements()
|
self._prepare_statements()
|
||||||
|
|
||||||
def _ensure_schema(self):
|
def _ensure_schema(self):
|
||||||
# FIXME: Replication factor should be configurable.
|
|
||||||
self.cassandra.execute(f"""
|
self.cassandra.execute(f"""
|
||||||
create keyspace if not exists {self.keyspace}
|
create keyspace if not exists {self.keyspace}
|
||||||
with replication = {{
|
with replication = {{
|
||||||
'class' : 'SimpleStrategy',
|
'class' : 'SimpleStrategy',
|
||||||
'replication_factor' : 1
|
'replication_factor' : {self.replication_factor}
|
||||||
}};
|
}};
|
||||||
""")
|
""")
|
||||||
self.cassandra.set_keyspace(self.keyspace)
|
self.cassandra.set_keyspace(self.keyspace)
|
||||||
|
|
|
||||||
|
|
@ -36,9 +36,11 @@ class KnowledgeTableStore:
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
cassandra_host, cassandra_username, cassandra_password, keyspace,
|
cassandra_host, cassandra_username, cassandra_password, keyspace,
|
||||||
|
replication_factor=1,
|
||||||
):
|
):
|
||||||
|
|
||||||
self.keyspace = keyspace
|
self.keyspace = keyspace
|
||||||
|
self.replication_factor = replication_factor
|
||||||
|
|
||||||
logger.info("Connecting to Cassandra...")
|
logger.info("Connecting to Cassandra...")
|
||||||
|
|
||||||
|
|
@ -73,12 +75,11 @@ class KnowledgeTableStore:
|
||||||
|
|
||||||
logger.debug("Keyspace...")
|
logger.debug("Keyspace...")
|
||||||
|
|
||||||
# FIXME: Replication factor should be configurable
|
|
||||||
self.cassandra.execute(f"""
|
self.cassandra.execute(f"""
|
||||||
create keyspace if not exists {self.keyspace}
|
create keyspace if not exists {self.keyspace}
|
||||||
with replication = {{
|
with replication = {{
|
||||||
'class' : 'SimpleStrategy',
|
'class' : 'SimpleStrategy',
|
||||||
'replication_factor' : 1
|
'replication_factor' : {self.replication_factor}
|
||||||
}};
|
}};
|
||||||
""");
|
""");
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -40,9 +40,11 @@ class LibraryTableStore:
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
cassandra_host, cassandra_username, cassandra_password, keyspace,
|
cassandra_host, cassandra_username, cassandra_password, keyspace,
|
||||||
|
replication_factor=1,
|
||||||
):
|
):
|
||||||
|
|
||||||
self.keyspace = keyspace
|
self.keyspace = keyspace
|
||||||
|
self.replication_factor = replication_factor
|
||||||
|
|
||||||
logger.info("Connecting to Cassandra...")
|
logger.info("Connecting to Cassandra...")
|
||||||
|
|
||||||
|
|
@ -77,12 +79,11 @@ class LibraryTableStore:
|
||||||
|
|
||||||
logger.debug("Keyspace...")
|
logger.debug("Keyspace...")
|
||||||
|
|
||||||
# FIXME: Replication factor should be configurable
|
|
||||||
self.cassandra.execute(f"""
|
self.cassandra.execute(f"""
|
||||||
create keyspace if not exists {self.keyspace}
|
create keyspace if not exists {self.keyspace}
|
||||||
with replication = {{
|
with replication = {{
|
||||||
'class' : 'SimpleStrategy',
|
'class' : 'SimpleStrategy',
|
||||||
'replication_factor' : 1
|
'replication_factor' : {self.replication_factor}
|
||||||
}};
|
}};
|
||||||
""");
|
""");
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue