Collection delete pt. 3 (#542)

* Fixing collection deletion

* Fixing collection management param error

* Always test for collections

* Add Cassandra collection table

* Updated tech spec for explicit creation/deletion

* Remove implicit collection creation

* Fix up collection tracking in all processors
This commit is contained in:
cybermaggedon 2025-09-30 16:02:33 +01:00 committed by GitHub
parent dc79b10552
commit 52b133fc86
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
31 changed files with 1761 additions and 843 deletions

View file

@ -115,38 +115,36 @@ class Processor(DocumentEmbeddingsStoreService):
"Gave up waiting for index creation"
)
async def start(self):
"""Start the processor and its storage management consumer"""
await super().start()
await self.storage_request_consumer.start()
await self.storage_response_producer.start()
async def store_document_embeddings(self, message):
index_name = (
"d-" + message.metadata.user + "-" + message.metadata.collection
)
# Validate collection exists before accepting writes
if not self.pinecone.has_index(index_name):
error_msg = (
f"Collection {message.metadata.collection} does not exist. "
f"Create it first with tg-set-collection."
)
logger.error(error_msg)
raise ValueError(error_msg)
for emb in message.chunks:
if emb.chunk is None or emb.chunk == b"": continue
chunk = emb.chunk.decode("utf-8")
if chunk == "": continue
for vec in emb.vectors:
dim = len(vec)
index_name = (
"d-" + message.metadata.user + "-" + message.metadata.collection
)
if index_name != self.last_index_name:
if not self.pinecone.has_index(index_name):
try:
self.create_index(index_name, dim)
except Exception as e:
logger.error("Pinecone index creation failed")
raise e
logger.info(f"Index {index_name} created")
self.last_index_name = index_name
index = self.pinecone.Index(index_name)
# Generate unique ID for each vector
@ -192,18 +190,21 @@ class Processor(DocumentEmbeddingsStoreService):
help=f'Pinecone region, (default: {default_region}'
)
async def on_storage_management(self, message):
async def on_storage_management(self, message, consumer, flow):
"""Handle storage management requests"""
logger.info(f"Storage management request: {message.operation} for {message.user}/{message.collection}")
request = message.value()
logger.info(f"Storage management request: {request.operation} for {request.user}/{request.collection}")
try:
if message.operation == "delete-collection":
await self.handle_delete_collection(message)
if request.operation == "create-collection":
await self.handle_create_collection(request)
elif request.operation == "delete-collection":
await self.handle_delete_collection(request)
else:
response = StorageManagementResponse(
error=Error(
type="invalid_operation",
message=f"Unknown operation: {message.operation}"
message=f"Unknown operation: {request.operation}"
)
)
await self.storage_response_producer.send(response)
@ -218,10 +219,36 @@ class Processor(DocumentEmbeddingsStoreService):
)
await self.storage_response_producer.send(response)
async def handle_delete_collection(self, message):
async def handle_create_collection(self, request):
"""Create a Pinecone index for document embeddings"""
try:
index_name = f"d-{request.user}-{request.collection}"
if self.pinecone.has_index(index_name):
logger.info(f"Pinecone index {index_name} already exists")
else:
# Create with default dimension - will need to be recreated if dimension doesn't match
self.create_index(index_name, dim=384)
logger.info(f"Created Pinecone index: {index_name}")
# Send success response
response = StorageManagementResponse(error=None)
await self.storage_response_producer.send(response)
except Exception as e:
logger.error(f"Failed to create collection: {e}", exc_info=True)
response = StorageManagementResponse(
error=Error(
type="creation_error",
message=str(e)
)
)
await self.storage_response_producer.send(response)
async def handle_delete_collection(self, request):
"""Delete the collection for document embeddings"""
try:
index_name = f"d-{message.user}-{message.collection}"
index_name = f"d-{request.user}-{request.collection}"
if self.pinecone.has_index(index_name):
self.pinecone.delete_index(index_name)
@ -234,7 +261,7 @@ class Processor(DocumentEmbeddingsStoreService):
error=None # No error means success
)
await self.storage_response_producer.send(response)
logger.info(f"Successfully deleted collection {message.user}/{message.collection}")
logger.info(f"Successfully deleted collection {request.user}/{request.collection}")
except Exception as e:
logger.error(f"Failed to delete collection: {e}")