mirror of
https://github.com/trustgraph-ai/trustgraph.git
synced 2026-07-01 17:39:39 +02:00
Basic multitenant support (#583)
* Tech spec * Address multi-tenant queue option problems in CLI * Modified collection service to use config * Changed storage management to use the config service definition
This commit is contained in:
parent
789d9713a0
commit
7d07f802a8
28 changed files with 1416 additions and 1731 deletions
|
|
@ -1,142 +1,130 @@
|
|||
"""
|
||||
Collection management for the librarian
|
||||
Collection management for the librarian - uses config service for storage
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
import json
|
||||
import uuid
|
||||
from datetime import datetime
|
||||
from typing import Dict, Any, List, Optional
|
||||
|
||||
from .. schema import CollectionManagementRequest, CollectionManagementResponse, Error
|
||||
from .. schema import CollectionMetadata
|
||||
from .. schema import StorageManagementRequest, StorageManagementResponse
|
||||
from .. schema import ConfigRequest, ConfigResponse
|
||||
from .. exceptions import RequestError
|
||||
from .. tables.library import LibraryTableStore
|
||||
|
||||
# Module logger
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class CollectionManager:
|
||||
"""Manages collection metadata and coordinates collection operations across storage types"""
|
||||
"""Manages collection metadata via config service"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
cassandra_host,
|
||||
cassandra_username,
|
||||
cassandra_password,
|
||||
keyspace,
|
||||
vector_storage_producer=None,
|
||||
object_storage_producer=None,
|
||||
triples_storage_producer=None,
|
||||
storage_response_consumer=None
|
||||
config_request_producer,
|
||||
config_response_consumer,
|
||||
taskgroup
|
||||
):
|
||||
"""
|
||||
Initialize the CollectionManager
|
||||
|
||||
Args:
|
||||
cassandra_host: Cassandra host(s)
|
||||
cassandra_username: Cassandra username
|
||||
cassandra_password: Cassandra password
|
||||
keyspace: Cassandra keyspace for library data
|
||||
vector_storage_producer: Producer for vector storage management
|
||||
object_storage_producer: Producer for object storage management
|
||||
triples_storage_producer: Producer for triples storage management
|
||||
storage_response_consumer: Consumer for storage management responses
|
||||
config_request_producer: Producer for config service requests
|
||||
config_response_consumer: Consumer for config service responses
|
||||
taskgroup: Task group for async operations
|
||||
"""
|
||||
self.table_store = LibraryTableStore(
|
||||
cassandra_host, cassandra_username, cassandra_password, keyspace
|
||||
)
|
||||
self.config_request_producer = config_request_producer
|
||||
self.config_response_consumer = config_response_consumer
|
||||
self.taskgroup = taskgroup
|
||||
|
||||
# Storage management producers
|
||||
self.vector_storage_producer = vector_storage_producer
|
||||
self.object_storage_producer = object_storage_producer
|
||||
self.triples_storage_producer = triples_storage_producer
|
||||
self.storage_response_consumer = storage_response_consumer
|
||||
# Track pending config requests
|
||||
self.pending_config_requests = {}
|
||||
|
||||
# Track pending deletion operations
|
||||
self.pending_deletions = {}
|
||||
logger.info("Collection manager initialized with config service backend")
|
||||
|
||||
logger.info("Collection manager initialized")
|
||||
async def send_config_request(self, request: ConfigRequest) -> ConfigResponse:
|
||||
"""
|
||||
Send config request and wait for response
|
||||
|
||||
Args:
|
||||
request: Config service request
|
||||
|
||||
Returns:
|
||||
ConfigResponse from config service
|
||||
"""
|
||||
event = asyncio.Event()
|
||||
self.pending_config_requests[request.id] = event
|
||||
|
||||
await self.config_request_producer.send(request)
|
||||
await event.wait()
|
||||
|
||||
response = self.pending_config_requests.pop(request.id + "_response")
|
||||
return response
|
||||
|
||||
async def on_config_response(self, message, consumer, flow):
|
||||
"""
|
||||
Handle config response
|
||||
|
||||
Args:
|
||||
message: Pulsar message
|
||||
consumer: Consumer instance
|
||||
flow: Flow context
|
||||
"""
|
||||
response = message.value()
|
||||
if response.id in self.pending_config_requests:
|
||||
self.pending_config_requests[response.id + "_response"] = response
|
||||
self.pending_config_requests[response.id].set()
|
||||
|
||||
async def ensure_collection_exists(self, user: str, collection: str):
|
||||
"""
|
||||
Ensure a collection exists, creating it if necessary with broadcast to storage
|
||||
Ensure a collection exists, creating it if necessary
|
||||
|
||||
Args:
|
||||
user: User ID
|
||||
collection: Collection ID
|
||||
"""
|
||||
try:
|
||||
# Check if collection already exists
|
||||
existing = await self.table_store.get_collection(user, collection)
|
||||
if existing:
|
||||
# Check if collection exists via config service
|
||||
request = ConfigRequest(
|
||||
id=str(uuid.uuid4()),
|
||||
operation='get',
|
||||
type='collection',
|
||||
keys=[f'{user}:{collection}']
|
||||
)
|
||||
|
||||
response = await self.send_config_request(request)
|
||||
|
||||
# If collection exists, we're done
|
||||
if response.values and len(response.values) > 0:
|
||||
logger.debug(f"Collection {user}/{collection} already exists")
|
||||
return
|
||||
|
||||
# Create new collection with default metadata
|
||||
logger.info(f"Auto-creating collection {user}/{collection} from document submission")
|
||||
await self.table_store.create_collection(
|
||||
logger.info(f"Auto-creating collection {user}/{collection}")
|
||||
|
||||
metadata = CollectionMetadata(
|
||||
user=user,
|
||||
collection=collection,
|
||||
name=collection, # Default name to collection ID
|
||||
description="",
|
||||
tags=set()
|
||||
tags=[]
|
||||
)
|
||||
|
||||
# Broadcast collection creation to all storage backends
|
||||
creation_key = (user, collection)
|
||||
logger.info(f"Broadcasting create-collection for {creation_key}")
|
||||
|
||||
self.pending_deletions[creation_key] = {
|
||||
"responses_pending": 4, # doc-embeddings, graph-embeddings, object, triples
|
||||
"responses_received": [],
|
||||
"all_successful": True,
|
||||
"error_messages": [],
|
||||
"deletion_complete": asyncio.Event()
|
||||
}
|
||||
|
||||
storage_request = StorageManagementRequest(
|
||||
operation="create-collection",
|
||||
user=user,
|
||||
collection=collection
|
||||
request = ConfigRequest(
|
||||
id=str(uuid.uuid4()),
|
||||
operation='put',
|
||||
type='collection',
|
||||
key=f'{user}:{collection}',
|
||||
value=json.dumps(metadata.to_dict())
|
||||
)
|
||||
|
||||
# Send creation requests to all storage types
|
||||
if self.vector_storage_producer:
|
||||
await self.vector_storage_producer.send(storage_request)
|
||||
if self.object_storage_producer:
|
||||
await self.object_storage_producer.send(storage_request)
|
||||
if self.triples_storage_producer:
|
||||
await self.triples_storage_producer.send(storage_request)
|
||||
response = await self.send_config_request(request)
|
||||
|
||||
# Wait for all storage creations to complete (with timeout)
|
||||
creation_info = self.pending_deletions[creation_key]
|
||||
try:
|
||||
await asyncio.wait_for(
|
||||
creation_info["deletion_complete"].wait(),
|
||||
timeout=30.0 # 30 second timeout
|
||||
)
|
||||
except asyncio.TimeoutError:
|
||||
logger.error(f"Timeout waiting for storage creation responses for {creation_key}")
|
||||
creation_info["all_successful"] = False
|
||||
creation_info["error_messages"].append("Timeout waiting for storage creation")
|
||||
if response.error:
|
||||
raise RuntimeError(f"Config update failed: {response.error.message}")
|
||||
|
||||
# Check if all creations succeeded
|
||||
if not creation_info["all_successful"]:
|
||||
error_msg = f"Storage creation failed: {'; '.join(creation_info['error_messages'])}"
|
||||
logger.error(error_msg)
|
||||
|
||||
# Clean up metadata on failure
|
||||
await self.table_store.delete_collection(user, collection)
|
||||
|
||||
# Clean up tracking
|
||||
del self.pending_deletions[creation_key]
|
||||
|
||||
raise RuntimeError(error_msg)
|
||||
|
||||
# Clean up tracking
|
||||
del self.pending_deletions[creation_key]
|
||||
logger.info(f"Collection {creation_key} auto-created successfully in all storage backends")
|
||||
logger.info(f"Collection {user}/{collection} auto-created in config service")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error ensuring collection exists: {e}")
|
||||
|
|
@ -144,7 +132,7 @@ class CollectionManager:
|
|||
|
||||
async def list_collections(self, request: CollectionManagementRequest) -> CollectionManagementResponse:
|
||||
"""
|
||||
List collections for a user with optional tag filtering
|
||||
List collections for a user from config service
|
||||
|
||||
Args:
|
||||
request: Collection management request
|
||||
|
|
@ -153,25 +141,43 @@ class CollectionManager:
|
|||
CollectionManagementResponse with list of collections
|
||||
"""
|
||||
try:
|
||||
tag_filter = list(request.tag_filter) if request.tag_filter else None
|
||||
collections = await self.table_store.list_collections(request.user, tag_filter)
|
||||
# Get all collections from config service
|
||||
config_request = ConfigRequest(
|
||||
id=str(uuid.uuid4()),
|
||||
operation='getvalues',
|
||||
type='collection'
|
||||
)
|
||||
|
||||
collection_metadata = [
|
||||
CollectionMetadata(
|
||||
user=coll["user"],
|
||||
collection=coll["collection"],
|
||||
name=coll["name"],
|
||||
description=coll["description"],
|
||||
tags=coll["tags"],
|
||||
created_at=coll["created_at"],
|
||||
updated_at=coll["updated_at"]
|
||||
)
|
||||
for coll in collections
|
||||
]
|
||||
response = await self.send_config_request(config_request)
|
||||
|
||||
if response.error:
|
||||
raise RuntimeError(f"Config query failed: {response.error.message}")
|
||||
|
||||
# Parse collections and filter by user
|
||||
collections = []
|
||||
for key, value_json in response.values.items():
|
||||
if ":" in key:
|
||||
coll_user, coll_name = key.split(":", 1)
|
||||
if coll_user == request.user:
|
||||
metadata_dict = json.loads(value_json)
|
||||
metadata = CollectionMetadata(**metadata_dict)
|
||||
collections.append(metadata)
|
||||
|
||||
# Apply tag filtering if specified
|
||||
if request.tag_filter:
|
||||
tag_filter_set = set(request.tag_filter)
|
||||
collections = [
|
||||
c for c in collections
|
||||
if any(tag in tag_filter_set for tag in c.tags)
|
||||
]
|
||||
|
||||
# Apply limit if specified
|
||||
if request.limit and request.limit > 0:
|
||||
collections = collections[:request.limit]
|
||||
|
||||
return CollectionManagementResponse(
|
||||
error=None,
|
||||
collections=collection_metadata,
|
||||
collections=collections,
|
||||
timestamp=datetime.now().isoformat()
|
||||
)
|
||||
|
||||
|
|
@ -181,7 +187,7 @@ class CollectionManager:
|
|||
|
||||
async def update_collection(self, request: CollectionManagementRequest) -> CollectionManagementResponse:
|
||||
"""
|
||||
Update collection metadata (creates if doesn't exist)
|
||||
Update collection metadata via config service (creates if doesn't exist)
|
||||
|
||||
Args:
|
||||
request: Collection management request
|
||||
|
|
@ -190,120 +196,41 @@ class CollectionManager:
|
|||
CollectionManagementResponse with updated collection
|
||||
"""
|
||||
try:
|
||||
# Check if collection exists, create if it doesn't
|
||||
existing = await self.table_store.get_collection(request.user, request.collection)
|
||||
if not existing:
|
||||
# Create new collection with provided metadata
|
||||
logger.info(f"Creating new collection {request.user}/{request.collection}")
|
||||
# Create metadata from request
|
||||
name = request.name if request.name else request.collection
|
||||
description = request.description if request.description else ""
|
||||
tags = list(request.tags) if request.tags else []
|
||||
|
||||
name = request.name if request.name else request.collection
|
||||
description = request.description if request.description else ""
|
||||
tags = set(request.tags) if request.tags else set()
|
||||
metadata = CollectionMetadata(
|
||||
user=request.user,
|
||||
collection=request.collection,
|
||||
name=name,
|
||||
description=description,
|
||||
tags=tags
|
||||
)
|
||||
|
||||
await self.table_store.create_collection(
|
||||
user=request.user,
|
||||
collection=request.collection,
|
||||
name=name,
|
||||
description=description,
|
||||
tags=tags
|
||||
)
|
||||
# Send put request to config service
|
||||
config_request = ConfigRequest(
|
||||
id=str(uuid.uuid4()),
|
||||
operation='put',
|
||||
type='collection',
|
||||
key=f'{request.user}:{request.collection}',
|
||||
value=json.dumps(metadata.to_dict())
|
||||
)
|
||||
|
||||
# Broadcast collection creation to all storage backends
|
||||
creation_key = (request.user, request.collection)
|
||||
logger.info(f"Broadcasting create-collection for {creation_key}")
|
||||
response = await self.send_config_request(config_request)
|
||||
|
||||
self.pending_deletions[creation_key] = {
|
||||
"responses_pending": 4, # doc-embeddings, graph-embeddings, object, triples
|
||||
"responses_received": [],
|
||||
"all_successful": True,
|
||||
"error_messages": [],
|
||||
"deletion_complete": asyncio.Event()
|
||||
}
|
||||
if response.error:
|
||||
raise RuntimeError(f"Config update failed: {response.error.message}")
|
||||
|
||||
storage_request = StorageManagementRequest(
|
||||
operation="create-collection",
|
||||
user=request.user,
|
||||
collection=request.collection
|
||||
)
|
||||
logger.info(f"Collection {request.user}/{request.collection} updated in config service")
|
||||
|
||||
# Send creation requests to all storage types
|
||||
if self.vector_storage_producer:
|
||||
await self.vector_storage_producer.send(storage_request)
|
||||
if self.object_storage_producer:
|
||||
await self.object_storage_producer.send(storage_request)
|
||||
if self.triples_storage_producer:
|
||||
await self.triples_storage_producer.send(storage_request)
|
||||
|
||||
# Wait for all storage creations to complete (with timeout)
|
||||
creation_info = self.pending_deletions[creation_key]
|
||||
try:
|
||||
await asyncio.wait_for(
|
||||
creation_info["deletion_complete"].wait(),
|
||||
timeout=30.0 # 30 second timeout
|
||||
)
|
||||
except asyncio.TimeoutError:
|
||||
logger.error(f"Timeout waiting for storage creation responses for {creation_key}")
|
||||
creation_info["all_successful"] = False
|
||||
creation_info["error_messages"].append("Timeout waiting for storage creation")
|
||||
|
||||
# Check if all creations succeeded
|
||||
if not creation_info["all_successful"]:
|
||||
error_msg = f"Storage creation failed: {'; '.join(creation_info['error_messages'])}"
|
||||
logger.error(error_msg)
|
||||
|
||||
# Clean up metadata on failure
|
||||
await self.table_store.delete_collection(request.user, request.collection)
|
||||
|
||||
# Clean up tracking
|
||||
del self.pending_deletions[creation_key]
|
||||
|
||||
return CollectionManagementResponse(
|
||||
error=Error(
|
||||
type="storage_creation_error",
|
||||
message=error_msg
|
||||
),
|
||||
timestamp=datetime.now().isoformat()
|
||||
)
|
||||
|
||||
# Clean up tracking
|
||||
del self.pending_deletions[creation_key]
|
||||
logger.info(f"Collection {creation_key} created successfully in all storage backends")
|
||||
|
||||
# Get the newly created collection for response
|
||||
created_collection = await self.table_store.get_collection(request.user, request.collection)
|
||||
|
||||
collection_metadata = CollectionMetadata(
|
||||
user=created_collection["user"],
|
||||
collection=created_collection["collection"],
|
||||
name=created_collection["name"],
|
||||
description=created_collection["description"],
|
||||
tags=created_collection["tags"],
|
||||
created_at=created_collection["created_at"],
|
||||
updated_at=created_collection["updated_at"]
|
||||
)
|
||||
else:
|
||||
# Collection exists, update it
|
||||
name = request.name if request.name else None
|
||||
description = request.description if request.description else None
|
||||
tags = list(request.tags) if request.tags else None
|
||||
|
||||
updated_collection = await self.table_store.update_collection(
|
||||
request.user, request.collection, name, description, tags
|
||||
)
|
||||
|
||||
collection_metadata = CollectionMetadata(
|
||||
user=updated_collection["user"],
|
||||
collection=updated_collection["collection"],
|
||||
name=updated_collection["name"],
|
||||
description=updated_collection["description"],
|
||||
tags=updated_collection["tags"],
|
||||
created_at="", # Not returned by update
|
||||
updated_at=updated_collection["updated_at"]
|
||||
)
|
||||
# Config service will trigger config push automatically
|
||||
# Storage services will receive update and create/update collections
|
||||
|
||||
return CollectionManagementResponse(
|
||||
error=None,
|
||||
collections=[collection_metadata],
|
||||
collections=[metadata],
|
||||
timestamp=datetime.now().isoformat()
|
||||
)
|
||||
|
||||
|
|
@ -313,7 +240,7 @@ class CollectionManager:
|
|||
|
||||
async def delete_collection(self, request: CollectionManagementRequest) -> CollectionManagementResponse:
|
||||
"""
|
||||
Delete collection with cascade to all storage types
|
||||
Delete collection via config service
|
||||
|
||||
Args:
|
||||
request: Collection management request
|
||||
|
|
@ -322,68 +249,25 @@ class CollectionManager:
|
|||
CollectionManagementResponse indicating success or failure
|
||||
"""
|
||||
try:
|
||||
deletion_key = (request.user, request.collection)
|
||||
logger.info(f"Deleting collection {request.user}/{request.collection}")
|
||||
|
||||
logger.info(f"Starting cascade deletion for {request.user}/{request.collection}")
|
||||
|
||||
# Track this deletion request
|
||||
self.pending_deletions[deletion_key] = {
|
||||
"responses_pending": 4, # doc-embeddings, graph-embeddings, object, triples
|
||||
"responses_received": [],
|
||||
"all_successful": True,
|
||||
"error_messages": [],
|
||||
"deletion_complete": asyncio.Event()
|
||||
}
|
||||
|
||||
# Create storage management request
|
||||
storage_request = StorageManagementRequest(
|
||||
operation="delete-collection",
|
||||
user=request.user,
|
||||
collection=request.collection
|
||||
# Send delete request to config service
|
||||
config_request = ConfigRequest(
|
||||
id=str(uuid.uuid4()),
|
||||
operation='delete',
|
||||
type='collection',
|
||||
key=f'{request.user}:{request.collection}'
|
||||
)
|
||||
|
||||
# Send deletion requests to all storage types
|
||||
if self.vector_storage_producer:
|
||||
await self.vector_storage_producer.send(storage_request)
|
||||
if self.object_storage_producer:
|
||||
await self.object_storage_producer.send(storage_request)
|
||||
if self.triples_storage_producer:
|
||||
await self.triples_storage_producer.send(storage_request)
|
||||
response = await self.send_config_request(config_request)
|
||||
|
||||
# Wait for all storage deletions to complete (with timeout)
|
||||
deletion_info = self.pending_deletions[deletion_key]
|
||||
try:
|
||||
await asyncio.wait_for(
|
||||
deletion_info["deletion_complete"].wait(),
|
||||
timeout=30.0 # 30 second timeout
|
||||
)
|
||||
except asyncio.TimeoutError:
|
||||
logger.error(f"Timeout waiting for storage deletion responses for {deletion_key}")
|
||||
deletion_info["all_successful"] = False
|
||||
deletion_info["error_messages"].append("Timeout waiting for storage deletion")
|
||||
if response.error:
|
||||
raise RuntimeError(f"Config delete failed: {response.error.message}")
|
||||
|
||||
# Check if all deletions succeeded
|
||||
if not deletion_info["all_successful"]:
|
||||
error_msg = f"Storage deletion failed: {'; '.join(deletion_info['error_messages'])}"
|
||||
logger.error(error_msg)
|
||||
logger.info(f"Collection {request.user}/{request.collection} deleted from config service")
|
||||
|
||||
# Clean up tracking
|
||||
del self.pending_deletions[deletion_key]
|
||||
|
||||
return CollectionManagementResponse(
|
||||
error=Error(
|
||||
type="storage_deletion_error",
|
||||
message=error_msg
|
||||
),
|
||||
timestamp=datetime.now().isoformat()
|
||||
)
|
||||
|
||||
# All storage deletions succeeded, now delete metadata
|
||||
logger.info(f"Storage deletions complete, removing metadata for {deletion_key}")
|
||||
await self.table_store.delete_collection(request.user, request.collection)
|
||||
|
||||
# Clean up tracking
|
||||
del self.pending_deletions[deletion_key]
|
||||
# Config service will trigger config push automatically
|
||||
# Storage services will receive update and delete collections
|
||||
|
||||
return CollectionManagementResponse(
|
||||
error=None,
|
||||
|
|
@ -392,39 +276,4 @@ class CollectionManager:
|
|||
|
||||
except Exception as e:
|
||||
logger.error(f"Error deleting collection: {e}")
|
||||
# Clean up tracking on error
|
||||
if deletion_key in self.pending_deletions:
|
||||
del self.pending_deletions[deletion_key]
|
||||
raise RequestError(f"Failed to delete collection: {str(e)}")
|
||||
|
||||
async def on_storage_response(self, response: StorageManagementResponse):
|
||||
"""
|
||||
Handle storage management responses for deletion tracking
|
||||
|
||||
Args:
|
||||
response: Storage management response
|
||||
"""
|
||||
logger.debug(f"Received storage response: error={response.error}")
|
||||
|
||||
# Find matching deletion by checking all pending deletions
|
||||
# Note: This is simplified correlation - in production we'd want better correlation
|
||||
for deletion_key, info in list(self.pending_deletions.items()):
|
||||
if info["responses_pending"] > 0:
|
||||
# Record this response
|
||||
info["responses_received"].append(response)
|
||||
info["responses_pending"] -= 1
|
||||
|
||||
# Check if this response indicates failure
|
||||
if response.error and response.error.message:
|
||||
info["all_successful"] = False
|
||||
info["error_messages"].append(response.error.message)
|
||||
logger.warning(f"Storage operation failed for {deletion_key}: {response.error.message}")
|
||||
else:
|
||||
logger.debug(f"Storage operation succeeded for {deletion_key}")
|
||||
|
||||
# If all responses received, signal completion
|
||||
if info["responses_pending"] == 0:
|
||||
logger.info(f"All storage responses received for {deletion_key}")
|
||||
info["deletion_complete"].set()
|
||||
|
||||
break # Only process for first matching deletion
|
||||
|
|
@ -18,9 +18,8 @@ from .. schema import LibrarianRequest, LibrarianResponse, Error
|
|||
from .. schema import librarian_request_queue, librarian_response_queue
|
||||
from .. schema import CollectionManagementRequest, CollectionManagementResponse
|
||||
from .. schema import collection_request_queue, collection_response_queue
|
||||
from .. schema import StorageManagementRequest, StorageManagementResponse
|
||||
from .. schema import vector_storage_management_topic, object_storage_management_topic
|
||||
from .. schema import triples_storage_management_topic, storage_management_response_topic
|
||||
from .. schema import ConfigRequest, ConfigResponse
|
||||
from .. schema import config_request_queue, config_response_queue
|
||||
|
||||
from .. schema import Document, Metadata
|
||||
from .. schema import TextDocument, Metadata
|
||||
|
|
@ -39,6 +38,8 @@ default_librarian_request_queue = librarian_request_queue
|
|||
default_librarian_response_queue = librarian_response_queue
|
||||
default_collection_request_queue = collection_request_queue
|
||||
default_collection_response_queue = collection_response_queue
|
||||
default_config_request_queue = config_request_queue
|
||||
default_config_response_queue = config_response_queue
|
||||
|
||||
default_minio_host = "minio:9000"
|
||||
default_minio_access_key = "minioadmin"
|
||||
|
|
@ -47,9 +48,6 @@ default_cassandra_host = "cassandra"
|
|||
|
||||
bucket_name = "library"
|
||||
|
||||
# FIXME: How to ensure this doesn't conflict with other usage?
|
||||
keyspace = "librarian"
|
||||
|
||||
class Processor(AsyncProcessor):
|
||||
|
||||
def __init__(self, **params):
|
||||
|
|
@ -74,6 +72,14 @@ class Processor(AsyncProcessor):
|
|||
"collection_response_queue", default_collection_response_queue
|
||||
)
|
||||
|
||||
config_request_queue = params.get(
|
||||
"config_request_queue", default_config_request_queue
|
||||
)
|
||||
|
||||
config_response_queue = params.get(
|
||||
"config_response_queue", default_config_response_queue
|
||||
)
|
||||
|
||||
minio_host = params.get("minio_host", default_minio_host)
|
||||
minio_access_key = params.get(
|
||||
"minio_access_key",
|
||||
|
|
@ -87,14 +93,15 @@ class Processor(AsyncProcessor):
|
|||
cassandra_host = params.get("cassandra_host")
|
||||
cassandra_username = params.get("cassandra_username")
|
||||
cassandra_password = params.get("cassandra_password")
|
||||
|
||||
|
||||
# Resolve configuration with environment variable fallback
|
||||
hosts, username, password = resolve_cassandra_config(
|
||||
hosts, username, password, keyspace = resolve_cassandra_config(
|
||||
host=cassandra_host,
|
||||
username=cassandra_username,
|
||||
password=cassandra_password
|
||||
password=cassandra_password,
|
||||
default_keyspace="librarian"
|
||||
)
|
||||
|
||||
|
||||
# Store resolved configuration
|
||||
self.cassandra_host = hosts
|
||||
self.cassandra_username = username
|
||||
|
|
@ -170,34 +177,31 @@ class Processor(AsyncProcessor):
|
|||
metrics = collection_response_metrics,
|
||||
)
|
||||
|
||||
# Storage management producers for collection deletion
|
||||
self.vector_storage_producer = Producer(
|
||||
client = self.pulsar_client,
|
||||
topic = vector_storage_management_topic,
|
||||
schema = StorageManagementRequest,
|
||||
# Config service client for collection management
|
||||
config_request_metrics = ProducerMetrics(
|
||||
processor = id, flow = None, name = "config-request"
|
||||
)
|
||||
|
||||
self.object_storage_producer = Producer(
|
||||
self.config_request_producer = Producer(
|
||||
client = self.pulsar_client,
|
||||
topic = object_storage_management_topic,
|
||||
schema = StorageManagementRequest,
|
||||
topic = config_request_queue,
|
||||
schema = ConfigRequest,
|
||||
metrics = config_request_metrics,
|
||||
)
|
||||
|
||||
self.triples_storage_producer = Producer(
|
||||
client = self.pulsar_client,
|
||||
topic = triples_storage_management_topic,
|
||||
schema = StorageManagementRequest,
|
||||
config_response_metrics = ConsumerMetrics(
|
||||
processor = id, flow = None, name = "config-response"
|
||||
)
|
||||
|
||||
self.storage_response_consumer = Consumer(
|
||||
self.config_response_consumer = Consumer(
|
||||
taskgroup = self.taskgroup,
|
||||
client = self.pulsar_client,
|
||||
flow = None,
|
||||
topic = storage_management_response_topic,
|
||||
subscriber = id,
|
||||
schema = StorageManagementResponse,
|
||||
handler = self.on_storage_response,
|
||||
metrics = storage_response_metrics,
|
||||
topic = config_response_queue,
|
||||
subscriber = f"{id}-config",
|
||||
schema = ConfigResponse,
|
||||
handler = self.on_config_response,
|
||||
metrics = config_response_metrics,
|
||||
)
|
||||
|
||||
self.librarian = Librarian(
|
||||
|
|
@ -213,14 +217,9 @@ class Processor(AsyncProcessor):
|
|||
)
|
||||
|
||||
self.collection_manager = CollectionManager(
|
||||
cassandra_host = self.cassandra_host,
|
||||
cassandra_username = self.cassandra_username,
|
||||
cassandra_password = self.cassandra_password,
|
||||
keyspace = keyspace,
|
||||
vector_storage_producer = self.vector_storage_producer,
|
||||
object_storage_producer = self.object_storage_producer,
|
||||
triples_storage_producer = self.triples_storage_producer,
|
||||
storage_response_consumer = self.storage_response_consumer,
|
||||
config_request_producer = self.config_request_producer,
|
||||
config_response_consumer = self.config_response_consumer,
|
||||
taskgroup = self.taskgroup,
|
||||
)
|
||||
|
||||
self.register_config_handler(self.on_librarian_config)
|
||||
|
|
@ -236,10 +235,12 @@ class Processor(AsyncProcessor):
|
|||
await self.librarian_response_producer.start()
|
||||
await self.collection_request_consumer.start()
|
||||
await self.collection_response_producer.start()
|
||||
await self.vector_storage_producer.start()
|
||||
await self.object_storage_producer.start()
|
||||
await self.triples_storage_producer.start()
|
||||
await self.storage_response_consumer.start()
|
||||
await self.config_request_producer.start()
|
||||
await self.config_response_consumer.start()
|
||||
|
||||
async def on_config_response(self, message, consumer, flow):
|
||||
"""Forward config responses to collection manager"""
|
||||
await self.collection_manager.on_config_response(message, consumer, flow)
|
||||
|
||||
async def on_librarian_config(self, config, version):
|
||||
|
||||
|
|
@ -464,14 +465,6 @@ class Processor(AsyncProcessor):
|
|||
|
||||
logger.debug("Collection request processing complete")
|
||||
|
||||
async def on_storage_response(self, msg, consumer, flow):
|
||||
"""
|
||||
Handle storage management response messages
|
||||
"""
|
||||
v = msg.value()
|
||||
logger.debug("Received storage management response")
|
||||
await self.collection_manager.on_storage_response(v)
|
||||
|
||||
@staticmethod
|
||||
def add_args(parser):
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue