Basic multitenant support (#583)

* Tech spec

* Address multi-tenant queue option problems in CLI

* Modified collection service to use config

* Changed storage management to use the config service definition
This commit is contained in:
cybermaggedon 2025-12-05 21:45:30 +00:00 committed by GitHub
parent 789d9713a0
commit 7d07f802a8
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
28 changed files with 1416 additions and 1731 deletions

View file

@ -11,12 +11,10 @@ import time
import logging
from .... direct.cassandra_kg import KnowledgeGraph
from .... base import TriplesStoreService
from .... base import TriplesStoreService, CollectionConfigHandler
from .... base import AsyncProcessor, Consumer, Producer
from .... base import ConsumerMetrics, ProducerMetrics
from .... base.cassandra_config import add_cassandra_args, resolve_cassandra_config
from .... schema import StorageManagementRequest, StorageManagementResponse, Error
from .... schema import triples_storage_management_topic, storage_management_response_topic
# Module logger
logger = logging.getLogger(__name__)
@ -24,10 +22,10 @@ logger = logging.getLogger(__name__)
default_ident = "triples-write"
class Processor(TriplesStoreService):
class Processor(CollectionConfigHandler, TriplesStoreService):
def __init__(self, **params):
id = params.get("id", default_ident)
# Get Cassandra parameters
@ -36,7 +34,7 @@ class Processor(TriplesStoreService):
cassandra_password = params.get("cassandra_password")
# Resolve configuration with environment variable fallback
hosts, username, password = resolve_cassandra_config(
hosts, username, password, keyspace = resolve_cassandra_config(
host=cassandra_host,
username=cassandra_username,
password=cassandra_password
@ -48,39 +46,15 @@ class Processor(TriplesStoreService):
"cassandra_username": username
}
)
self.cassandra_host = hosts
self.cassandra_username = username
self.cassandra_password = password
self.table = None
self.tg = None
# Set up metrics for storage management
storage_request_metrics = ConsumerMetrics(
processor=self.id, flow=None, name="storage-request"
)
storage_response_metrics = ProducerMetrics(
processor=self.id, flow=None, name="storage-response"
)
# Set up consumer for storage management requests
self.storage_request_consumer = Consumer(
taskgroup=self.taskgroup,
client=self.pulsar_client,
flow=None,
topic=triples_storage_management_topic,
subscriber=f"{id}-storage",
schema=StorageManagementRequest,
handler=self.on_storage_management,
metrics=storage_request_metrics,
)
# Set up producer for storage management responses
self.storage_response_producer = Producer(
client=self.pulsar_client,
topic=storage_management_response_topic,
schema=StorageManagementResponse,
metrics=storage_response_metrics,
)
# Register for config push notifications
self.register_config_handler(self.on_collection_config)
async def store_triples(self, message):
@ -109,15 +83,6 @@ class Processor(TriplesStoreService):
self.table = user
# Validate collection exists before accepting writes
if not self.tg.collection_exists(message.metadata.collection):
error_msg = (
f"Collection {message.metadata.collection} does not exist. "
f"Create it first with tg-set-collection."
)
logger.error(error_msg)
raise ValueError(error_msg)
for t in message.triples:
self.tg.insert(
message.metadata.collection,
@ -126,133 +91,77 @@ class Processor(TriplesStoreService):
t.o.value
)
async def start(self):
"""Start the processor and its storage management consumer"""
await super().start()
await self.storage_request_consumer.start()
await self.storage_response_producer.start()
async def on_storage_management(self, message, consumer, flow):
"""Handle storage management requests"""
request = message.value()
logger.info(f"Storage management request: {request.operation} for {request.user}/{request.collection}")
try:
if request.operation == "create-collection":
await self.handle_create_collection(request)
elif request.operation == "delete-collection":
await self.handle_delete_collection(request)
else:
response = StorageManagementResponse(
error=Error(
type="invalid_operation",
message=f"Unknown operation: {request.operation}"
)
)
await self.storage_response_producer.send(response)
except Exception as e:
logger.error(f"Error processing storage management request: {e}", exc_info=True)
response = StorageManagementResponse(
error=Error(
type="processing_error",
message=str(e)
)
)
await self.storage_response_producer.send(response)
async def handle_create_collection(self, request):
"""Create a collection in Cassandra triple store"""
async def create_collection(self, user: str, collection: str, metadata: dict):
"""Create a collection in Cassandra triple store via config push"""
try:
# Create or reuse connection for this user's keyspace
if self.table is None or self.table != request.user:
if self.table is None or self.table != user:
self.tg = None
try:
if self.cassandra_username and self.cassandra_password:
self.tg = KnowledgeGraph(
hosts=self.cassandra_host,
keyspace=request.user,
keyspace=user,
username=self.cassandra_username,
password=self.cassandra_password
)
else:
self.tg = KnowledgeGraph(
hosts=self.cassandra_host,
keyspace=request.user,
keyspace=user,
)
except Exception as e:
logger.error(f"Failed to connect to Cassandra for user {request.user}: {e}")
logger.error(f"Failed to connect to Cassandra for user {user}: {e}")
raise
self.table = request.user
self.table = user
# Create collection using the built-in method
logger.info(f"Creating collection {request.collection} for user {request.user}")
logger.info(f"Creating collection {collection} for user {user}")
if self.tg.collection_exists(request.collection):
logger.info(f"Collection {request.collection} already exists")
if self.tg.collection_exists(collection):
logger.info(f"Collection {collection} already exists")
else:
self.tg.create_collection(request.collection)
logger.info(f"Created collection {request.collection}")
# Send success response
response = StorageManagementResponse(error=None)
await self.storage_response_producer.send(response)
self.tg.create_collection(collection)
logger.info(f"Created collection {collection}")
except Exception as e:
logger.error(f"Failed to create collection: {e}", exc_info=True)
response = StorageManagementResponse(
error=Error(
type="creation_error",
message=str(e)
)
)
await self.storage_response_producer.send(response)
logger.error(f"Failed to create collection {user}/{collection}: {e}", exc_info=True)
raise
async def handle_delete_collection(self, request):
async def delete_collection(self, user: str, collection: str):
"""Delete all data for a specific collection from the unified triples table"""
try:
# Create or reuse connection for this user's keyspace
if self.table is None or self.table != request.user:
if self.table is None or self.table != user:
self.tg = None
try:
if self.cassandra_username and self.cassandra_password:
self.tg = KnowledgeGraph(
hosts=self.cassandra_host,
keyspace=request.user,
keyspace=user,
username=self.cassandra_username,
password=self.cassandra_password
)
else:
self.tg = KnowledgeGraph(
hosts=self.cassandra_host,
keyspace=request.user,
keyspace=user,
)
except Exception as e:
logger.error(f"Failed to connect to Cassandra for user {request.user}: {e}")
logger.error(f"Failed to connect to Cassandra for user {user}: {e}")
raise
self.table = request.user
self.table = user
# Delete all triples for this collection using the built-in method
try:
self.tg.delete_collection(request.collection)
logger.info(f"Deleted all triples for collection {request.collection} from keyspace {request.user}")
except Exception as e:
logger.error(f"Failed to delete collection data: {e}")
raise
# Send success response
response = StorageManagementResponse(
error=None # No error means success
)
await self.storage_response_producer.send(response)
logger.info(f"Successfully deleted collection {request.user}/{request.collection}")
self.tg.delete_collection(collection)
logger.info(f"Deleted all triples for collection {collection} from keyspace {user}")
except Exception as e:
logger.error(f"Failed to delete collection: {e}")
logger.error(f"Failed to delete collection {user}/{collection}: {e}", exc_info=True)
raise
@staticmethod