Feature/consolidate cassandra config (#483)

* Cassandra consolidation of parameters

* New Cassandra configuration helper

* Implemented Cassanda config refactor

* New tests
This commit is contained in:
cybermaggedon 2025-09-03 23:41:22 +01:00 committed by GitHub
parent e74eb5d1ff
commit ccaec88a72
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
15 changed files with 1968 additions and 170 deletions

View file

@ -10,32 +10,40 @@ from .... direct.cassandra import TrustGraph
from .... schema import TriplesQueryRequest, TriplesQueryResponse, Error
from .... schema import Value, Triple
from .... base import TriplesQueryService
from .... base.cassandra_config import add_cassandra_args, resolve_cassandra_config
# Module logger
logger = logging.getLogger(__name__)
default_ident = "triples-query"
default_graph_host='localhost'
class Processor(TriplesQueryService):
def __init__(self, **params):
graph_host = params.get("graph_host", default_graph_host)
graph_username = params.get("graph_username", None)
graph_password = params.get("graph_password", None)
# Use new parameter names, fall back to old for compatibility
cassandra_host = params.get("cassandra_host", params.get("graph_host"))
cassandra_username = params.get("cassandra_username", params.get("graph_username"))
cassandra_password = params.get("cassandra_password", params.get("graph_password"))
# Resolve configuration with environment variable fallback
hosts, username, password = resolve_cassandra_config(
host=cassandra_host,
username=cassandra_username,
password=cassandra_password
)
super(Processor, self).__init__(
**params | {
"graph_host": graph_host,
"graph_username": graph_username,
"cassandra_host": ','.join(hosts),
"cassandra_username": username,
}
)
self.graph_host = [graph_host]
self.username = graph_username
self.password = graph_password
self.graph_host = hosts
self.username = username
self.password = password
self.table = None
def create_value(self, ent):
@ -147,24 +155,7 @@ class Processor(TriplesQueryService):
def add_args(parser):
TriplesQueryService.add_args(parser)
parser.add_argument(
'-g', '--graph-host',
default="localhost",
help=f'Graph host (default: localhost)'
)
parser.add_argument(
'--graph-username',
default=None,
help=f'Cassandra username'
)
parser.add_argument(
'--graph-password',
default=None,
help=f'Cassandra password'
)
add_cassandra_args(parser)
def run():

View file

@ -8,12 +8,12 @@ import urllib.parse
from ... schema import Triples, GraphEmbeddings
from ... base import FlowProcessor, ConsumerSpec
from ... base.cassandra_config import add_cassandra_args, resolve_cassandra_config
from ... tables.knowledge import KnowledgeTableStore
default_ident = "kg-store"
default_cassandra_host = "cassandra"
keyspace = "knowledge"
class Processor(FlowProcessor):
@ -22,15 +22,18 @@ class Processor(FlowProcessor):
id = params.get("id")
cassandra_host = params.get("cassandra_host", default_cassandra_host)
cassandra_user = params.get("cassandra_user")
cassandra_password = params.get("cassandra_password")
# Use helper to resolve configuration
hosts, username, password = resolve_cassandra_config(
host=params.get("cassandra_host"),
username=params.get("cassandra_user", params.get("cassandra_username")),
password=params.get("cassandra_password")
)
super(Processor, self).__init__(
**params | {
"id": id,
"cassandra_host": cassandra_host,
"cassandra_user": cassandra_user,
"cassandra_host": ','.join(hosts),
"cassandra_username": username,
}
)
@ -51,9 +54,9 @@ class Processor(FlowProcessor):
)
self.table_store = KnowledgeTableStore(
cassandra_host = cassandra_host.split(","),
cassandra_user = cassandra_user,
cassandra_password = cassandra_password,
cassandra_host = hosts,
cassandra_user = username,
cassandra_password = password,
keyspace = keyspace,
)
@ -71,6 +74,7 @@ class Processor(FlowProcessor):
def add_args(parser):
FlowProcessor.add_args(parser)
add_cassandra_args(parser)
def run():

View file

@ -14,12 +14,12 @@ from cassandra import ConsistencyLevel
from .... schema import ExtractedObject
from .... schema import RowSchema, Field
from .... base import FlowProcessor, ConsumerSpec
from .... base.cassandra_config import add_cassandra_args, resolve_cassandra_config
# Module logger
logger = logging.getLogger(__name__)
default_ident = "objects-write"
default_graph_host = 'localhost'
class Processor(FlowProcessor):
@ -27,10 +27,22 @@ class Processor(FlowProcessor):
id = params.get("id", default_ident)
# Cassandra connection parameters
self.graph_host = params.get("graph_host", default_graph_host)
self.graph_username = params.get("graph_username", None)
self.graph_password = params.get("graph_password", None)
# Use new parameter names, fall back to old for compatibility
cassandra_host = params.get("cassandra_host", params.get("graph_host"))
cassandra_username = params.get("cassandra_username", params.get("graph_username"))
cassandra_password = params.get("cassandra_password", params.get("graph_password"))
# Resolve configuration with environment variable fallback
hosts, username, password = resolve_cassandra_config(
host=cassandra_host,
username=cassandra_username,
password=cassandra_password
)
# Store resolved configuration
self.graph_host = hosts # Store as list
self.graph_username = username
self.graph_password = password
# Config key for schemas
self.config_key = params.get("config_type", "schema")
@ -76,11 +88,11 @@ class Processor(FlowProcessor):
password=self.graph_password
)
self.cluster = Cluster(
contact_points=[self.graph_host],
contact_points=self.graph_host,
auth_provider=auth_provider
)
else:
self.cluster = Cluster(contact_points=[self.graph_host])
self.cluster = Cluster(contact_points=self.graph_host)
self.session = self.cluster.connect()
logger.info(f"Connected to Cassandra cluster at {self.graph_host}")
@ -381,24 +393,7 @@ class Processor(FlowProcessor):
"""Add command-line arguments"""
FlowProcessor.add_args(parser)
parser.add_argument(
'-g', '--graph-host',
default=default_graph_host,
help=f'Cassandra host (default: {default_graph_host})'
)
parser.add_argument(
'--graph-username',
default=None,
help='Cassandra username'
)
parser.add_argument(
'--graph-password',
default=None,
help='Cassandra password'
)
add_cassandra_args(parser)
parser.add_argument(
'--config-type',

View file

@ -3,6 +3,8 @@
Graph writer. Input is graph edge. Writes edges to Cassandra graph.
"""
raise RuntimeError("This code is no longer in use")
import pulsar
import base64
import os
@ -14,9 +16,9 @@ from cassandra.auth import PlainTextAuthProvider
from ssl import SSLContext, PROTOCOL_TLSv1_2
from .... schema import Rows
from .... schema import rows_store_queue
from .... log_level import LogLevel
from .... base import Consumer
from .... base.cassandra_config import add_cassandra_args, resolve_cassandra_config
# Module logger
logger = logging.getLogger(__name__)
@ -24,9 +26,8 @@ logger = logging.getLogger(__name__)
module = "rows-write"
ssl_context = SSLContext(PROTOCOL_TLSv1_2)
default_input_queue = rows_store_queue
default_input_queue = "rows-store" # Default queue name
default_subscriber = module
default_graph_host='localhost'
class Processor(Consumer):
@ -34,26 +35,35 @@ class Processor(Consumer):
input_queue = params.get("input_queue", default_input_queue)
subscriber = params.get("subscriber", default_subscriber)
graph_host = params.get("graph_host", default_graph_host)
graph_username = params.get("graph_username", None)
graph_password = params.get("graph_password", None)
# Use new parameter names, fall back to old for compatibility
cassandra_host = params.get("cassandra_host", params.get("graph_host"))
cassandra_username = params.get("cassandra_username", params.get("graph_username"))
cassandra_password = params.get("cassandra_password", params.get("graph_password"))
# Resolve configuration with environment variable fallback
hosts, username, password = resolve_cassandra_config(
host=cassandra_host,
username=cassandra_username,
password=cassandra_password
)
super(Processor, self).__init__(
**params | {
"input_queue": input_queue,
"subscriber": subscriber,
"input_schema": Rows,
"graph_host": graph_host,
"graph_username": graph_username,
"graph_password": graph_password,
"cassandra_host": ','.join(hosts),
"cassandra_username": username,
"cassandra_password": password,
}
)
if graph_username and graph_password:
auth_provider = PlainTextAuthProvider(username=graph_username, password=graph_password)
self.cluster = Cluster(graph_host.split(","), auth_provider=auth_provider, ssl_context=ssl_context)
if username and password:
auth_provider = PlainTextAuthProvider(username=username, password=password)
self.cluster = Cluster(hosts, auth_provider=auth_provider, ssl_context=ssl_context)
else:
self.cluster = Cluster(graph_host.split(","))
self.cluster = Cluster(hosts)
self.session = self.cluster.connect()
self.tables = set()
@ -128,24 +138,7 @@ class Processor(Consumer):
Consumer.add_args(
parser, default_input_queue, default_subscriber,
)
parser.add_argument(
'-g', '--graph-host',
default="localhost",
help=f'Graph host (default: localhost)'
)
parser.add_argument(
'--graph-username',
default=None,
help=f'Cassandra username'
)
parser.add_argument(
'--graph-password',
default=None,
help=f'Cassandra password'
)
add_cassandra_args(parser)
def run():

View file

@ -12,13 +12,13 @@ import logging
from .... direct.cassandra import TrustGraph
from .... base import TriplesStoreService
from .... base.cassandra_config import add_cassandra_args, resolve_cassandra_config
# Module logger
logger = logging.getLogger(__name__)
default_ident = "triples-write"
default_graph_host='localhost'
class Processor(TriplesStoreService):
@ -26,20 +26,28 @@ class Processor(TriplesStoreService):
id = params.get("id", default_ident)
graph_host = params.get("graph_host", default_graph_host)
graph_username = params.get("graph_username", None)
graph_password = params.get("graph_password", None)
# Use new parameter names, fall back to old for compatibility
cassandra_host = params.get("cassandra_host", params.get("graph_host"))
cassandra_username = params.get("cassandra_username", params.get("graph_username"))
cassandra_password = params.get("cassandra_password", params.get("graph_password"))
# Resolve configuration with environment variable fallback
hosts, username, password = resolve_cassandra_config(
host=cassandra_host,
username=cassandra_username,
password=cassandra_password
)
super(Processor, self).__init__(
**params | {
"graph_host": graph_host,
"graph_username": graph_username
"cassandra_host": ','.join(hosts),
"cassandra_username": username
}
)
self.graph_host = [graph_host]
self.username = graph_username
self.password = graph_password
self.graph_host = hosts
self.username = username
self.password = password
self.table = None
async def store_triples(self, message):
@ -82,24 +90,7 @@ class Processor(TriplesStoreService):
def add_args(parser):
TriplesStoreService.add_args(parser)
parser.add_argument(
'-g', '--graph-host',
default="localhost",
help=f'Graph host (default: localhost)'
)
parser.add_argument(
'--graph-username',
default=None,
help=f'Cassandra username'
)
parser.add_argument(
'--graph-password',
default=None,
help=f'Cassandra password'
)
add_cassandra_args(parser)
def run():

View file

@ -24,6 +24,10 @@ class ConfigTableStore:
logger.info("Connecting to Cassandra...")
# Ensure cassandra_host is a list
if isinstance(cassandra_host, str):
cassandra_host = [h.strip() for h in cassandra_host.split(',')]
if cassandra_user and cassandra_password:
ssl_context = SSLContext(PROTOCOL_TLSv1_2)
auth_provider = PlainTextAuthProvider(

View file

@ -24,6 +24,10 @@ class KnowledgeTableStore:
logger.info("Connecting to Cassandra...")
# Ensure cassandra_host is a list
if isinstance(cassandra_host, str):
cassandra_host = [h.strip() for h in cassandra_host.split(',')]
if cassandra_user and cassandra_password:
ssl_context = SSLContext(PROTOCOL_TLSv1_2)
auth_provider = PlainTextAuthProvider(

View file

@ -28,6 +28,10 @@ class LibraryTableStore:
logger.info("Connecting to Cassandra...")
# Ensure cassandra_host is a list
if isinstance(cassandra_host, str):
cassandra_host = [h.strip() for h in cassandra_host.split(',')]
if cassandra_user and cassandra_password:
ssl_context = SSLContext(PROTOCOL_TLSv1_2)
auth_provider = PlainTextAuthProvider(