Basic multitenant support (#583)

* Tech spec

* Address multi-tenant queue option problems in CLI

* Modified collection service to use config

* Changed storage management to use the config service definition
This commit is contained in:
cybermaggedon 2025-12-05 21:45:30 +00:00 committed by GitHub
parent 789d9713a0
commit 7d07f802a8
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
28 changed files with 1416 additions and 1731 deletions

View file

@ -33,4 +33,5 @@ from . tool_service import ToolService
from . tool_client import ToolClientSpec
from . agent_client import AgentClientSpec
from . structured_query_client import StructuredQueryClientSpec
from . collection_config_handler import CollectionConfigHandler

View file

@ -258,9 +258,9 @@ class AsyncProcessor:
PulsarClient.add_args(parser)
parser.add_argument(
'--config-queue',
'--config-push-queue',
default=default_config_queue,
help=f'Config push queue {default_config_queue}',
help=f'Config push queue (default: {default_config_queue})',
)
parser.add_argument(

View file

@ -13,14 +13,15 @@ from typing import Optional, Tuple, List, Any
def get_cassandra_defaults() -> dict:
"""
Get default Cassandra configuration values from environment variables or fallback defaults.
Returns:
dict: Dictionary with 'host', 'username', and 'password' keys
dict: Dictionary with 'host', 'username', 'password', and 'keyspace' keys
"""
return {
'host': os.getenv('CASSANDRA_HOST', 'cassandra'),
'username': os.getenv('CASSANDRA_USERNAME'),
'password': os.getenv('CASSANDRA_PASSWORD')
'password': os.getenv('CASSANDRA_PASSWORD'),
'keyspace': os.getenv('CASSANDRA_KEYSPACE')
}
@ -53,82 +54,108 @@ def add_cassandra_args(parser: argparse.ArgumentParser) -> None:
password_help += " (default: <set>)"
if 'CASSANDRA_PASSWORD' in os.environ:
password_help += " [from CASSANDRA_PASSWORD]"
keyspace_help = "Cassandra keyspace (default: service-specific)"
if defaults['keyspace']:
keyspace_help = f"Cassandra keyspace (default: {defaults['keyspace']})"
if 'CASSANDRA_KEYSPACE' in os.environ:
keyspace_help += " [from CASSANDRA_KEYSPACE]"
parser.add_argument(
'--cassandra-host',
default=defaults['host'],
help=host_help
)
parser.add_argument(
'--cassandra-username',
default=defaults['username'],
help=username_help
)
parser.add_argument(
'--cassandra-password',
default=defaults['password'],
help=password_help
)
parser.add_argument(
'--cassandra-keyspace',
default=defaults['keyspace'],
help=keyspace_help
)
def resolve_cassandra_config(
args: Optional[Any] = None,
host: Optional[str] = None,
username: Optional[str] = None,
password: Optional[str] = None
) -> Tuple[List[str], Optional[str], Optional[str]]:
password: Optional[str] = None,
default_keyspace: Optional[str] = None
) -> Tuple[List[str], Optional[str], Optional[str], Optional[str]]:
"""
Resolve Cassandra configuration from various sources.
Can accept either argparse args object or explicit parameters.
Converts host string to list format for Cassandra driver.
Args:
args: Optional argparse namespace with cassandra_host, cassandra_username, cassandra_password
args: Optional argparse namespace with cassandra_host, cassandra_username, cassandra_password, cassandra_keyspace
host: Optional explicit host parameter (overrides args)
username: Optional explicit username parameter (overrides args)
password: Optional explicit password parameter (overrides args)
default_keyspace: Optional default keyspace if not specified elsewhere
Returns:
tuple: (hosts_list, username, password)
tuple: (hosts_list, username, password, keyspace)
"""
# If args provided, extract values
keyspace = None
if args is not None:
host = host or getattr(args, 'cassandra_host', None)
username = username or getattr(args, 'cassandra_username', None)
password = password or getattr(args, 'cassandra_password', None)
keyspace = getattr(args, 'cassandra_keyspace', None)
# Apply defaults if still None
defaults = get_cassandra_defaults()
host = host or defaults['host']
username = username or defaults['username']
password = password or defaults['password']
keyspace = keyspace or defaults['keyspace'] or default_keyspace
# Convert host string to list
if isinstance(host, str):
hosts = [h.strip() for h in host.split(',') if h.strip()]
else:
hosts = host
return hosts, username, password
return hosts, username, password, keyspace
def get_cassandra_config_from_params(params: dict) -> Tuple[List[str], Optional[str], Optional[str]]:
def get_cassandra_config_from_params(
params: dict,
default_keyspace: Optional[str] = None
) -> Tuple[List[str], Optional[str], Optional[str], Optional[str]]:
"""
Extract and resolve Cassandra configuration from a parameters dictionary.
Args:
params: Dictionary of parameters that may contain Cassandra configuration
default_keyspace: Optional default keyspace if not specified in params
Returns:
tuple: (hosts_list, username, password)
tuple: (hosts_list, username, password, keyspace)
"""
# Get Cassandra parameters
host = params.get('cassandra_host')
username = params.get('cassandra_username')
password = params.get('cassandra_password')
# Use resolve function to handle defaults and list conversion
return resolve_cassandra_config(host=host, username=username, password=password)
return resolve_cassandra_config(
host=host,
username=username,
password=password,
default_keyspace=default_keyspace
)

View file

@ -0,0 +1,127 @@
"""
Handler for storage services to process collection configuration from config push
"""
import json
import logging
from typing import Dict, Set
logger = logging.getLogger(__name__)
class CollectionConfigHandler:
"""
Handles collection configuration from config push messages for storage services.
Storage services should:
1. Inherit from this class along with their service base class
2. Call register_config_handler(self.on_collection_config) in __init__
3. Implement create_collection(user, collection, metadata) method
4. Implement delete_collection(user, collection) method
"""
def __init__(self, **kwargs):
# Track known collections: {(user, collection): metadata_dict}
self.known_collections: Dict[tuple, dict] = {}
# Pass remaining kwargs up the inheritance chain
super().__init__(**kwargs)
async def on_collection_config(self, config: dict, version: int):
"""
Handle config push messages and extract collection information
Args:
config: Configuration dictionary from ConfigPush message
version: Configuration version number
"""
logger.info(f"Processing collection configuration (version {version})")
# Extract collections from config
if "collection" not in config:
logger.debug("No collection configuration in config push")
return
collection_config = config["collection"]
# Track which collections we've seen in this config
current_collections: Set[tuple] = set()
# Process each collection in the config
for key, value_json in collection_config.items():
try:
# Parse user:collection key
if ":" not in key:
logger.warning(f"Invalid collection key format (expected user:collection): {key}")
continue
user, collection = key.split(":", 1)
current_collections.add((user, collection))
# Parse metadata
metadata = json.loads(value_json)
# Check if this is a new collection or updated
collection_key = (user, collection)
if collection_key not in self.known_collections:
logger.info(f"New collection detected: {user}/{collection}")
await self.create_collection(user, collection, metadata)
self.known_collections[collection_key] = metadata
else:
# Collection already exists, update metadata if changed
if self.known_collections[collection_key] != metadata:
logger.info(f"Collection metadata updated: {user}/{collection}")
# Most storage services don't need to do anything for metadata updates
# They just need to know the collection exists
self.known_collections[collection_key] = metadata
except Exception as e:
logger.error(f"Error processing collection config for key {key}: {e}", exc_info=True)
# Find collections that were deleted (in known but not in current)
deleted_collections = set(self.known_collections.keys()) - current_collections
for user, collection in deleted_collections:
logger.info(f"Collection deleted: {user}/{collection}")
try:
await self.delete_collection(user, collection)
del self.known_collections[(user, collection)]
except Exception as e:
logger.error(f"Error deleting collection {user}/{collection}: {e}", exc_info=True)
logger.debug(f"Collection config processing complete. Known collections: {len(self.known_collections)}")
async def create_collection(self, user: str, collection: str, metadata: dict):
"""
Create a collection in the storage backend.
Subclasses must implement this method.
Args:
user: User ID
collection: Collection ID
metadata: Collection metadata dictionary
"""
raise NotImplementedError("Storage service must implement create_collection method")
async def delete_collection(self, user: str, collection: str):
"""
Delete a collection from the storage backend.
Subclasses must implement this method.
Args:
user: User ID
collection: Collection ID
"""
raise NotImplementedError("Storage service must implement delete_collection method")
def collection_exists(self, user: str, collection: str) -> bool:
"""
Check if a collection is known to exist
Args:
user: User ID
collection: Collection ID
Returns:
True if collection exists, False otherwise
"""
return (user, collection) in self.known_collections