Address legacy issues in storage management (#595)

* Removed legacy storage management cruft.  Tidied tech specs.

* Fix deletion of last collection

* Storage processor ignores data on the queue which is for a deleted collection

* Updated tests
This commit is contained in:
cybermaggedon 2026-01-05 13:45:14 +00:00 committed by GitHub
parent 25563bae3c
commit ae13190093
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
12 changed files with 188 additions and 264 deletions

View file

@ -35,12 +35,8 @@ class CollectionConfigHandler:
"""
logger.info(f"Processing collection configuration (version {version})")
# Extract collections from config
if "collection" not in config:
logger.debug("No collection configuration in config push")
return
collection_config = config["collection"]
# Extract collections from config (treat missing key as empty)
collection_config = config.get("collection", {})
# Track which collections we've seen in this config
current_collections: Set[tuple] = set()
@ -81,10 +77,15 @@ class CollectionConfigHandler:
for user, collection in deleted_collections:
logger.info(f"Collection deleted: {user}/{collection}")
try:
await self.delete_collection(user, collection)
# Remove from known_collections FIRST to immediately reject new writes
# This eliminates race condition with worker threads
del self.known_collections[(user, collection)]
# Physical deletion happens after - worker threads already rejecting writes
await self.delete_collection(user, collection)
except Exception as e:
logger.error(f"Error deleting collection {user}/{collection}: {e}", exc_info=True)
# If physical deletion failed, should we re-add to known_collections?
# For now, keep it removed - collection is logically deleted per config
logger.debug(f"Collection config processing complete. Known collections: {len(self.known_collections)}")

View file

@ -1,45 +1,8 @@
from dataclasses import dataclass
from ..core.primitives import Error
from ..core.topic import topic
############################################################################
# Storage management operations
@dataclass
class StorageManagementRequest:
"""Request for storage management operations sent to store processors"""
operation: str = "" # e.g., "delete-collection"
user: str = ""
collection: str = ""
@dataclass
class StorageManagementResponse:
"""Response from storage processors for management operations"""
error: Error | None = None # Only populated if there's an error, if null success
############################################################################
# Storage management topics
# Topics for sending collection management requests to different storage types
vector_storage_management_topic = topic(
'vector-storage-management', qos='q0', namespace='request'
)
object_storage_management_topic = topic(
'object-storage-management', qos='q0', namespace='request'
)
triples_storage_management_topic = topic(
'triples-storage-management', qos='q0', namespace='request'
)
# Topic for receiving responses from storage processors
storage_management_response_topic = topic(
'storage-management', qos='q0', namespace='response'
)
############################################################################
# This file previously contained legacy storage management queue definitions
# (StorageManagementRequest, StorageManagementResponse, and related topics).
#
# These have been removed as collection management now uses a config-based
# approach via CollectionConfigHandler instead of request/response queues.
#
# This file is kept for potential future storage-related schema definitions.