Address legacy issues in storage management (#595)

* Removed legacy storage management cruft.  Tidied tech specs.

* Fix deletion of last collection

* Storage processor ignores data on the queue which is for a deleted collection

* Updated tests
This commit is contained in:
cybermaggedon 2026-01-05 13:45:14 +00:00 committed by GitHub
parent 25563bae3c
commit ae13190093
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
12 changed files with 188 additions and 264 deletions

View file

@ -117,7 +117,7 @@ class TestObjectsCassandraIntegration:
assert "customer_records" in processor.schemas
# Step 1.5: Create the collection first (simulate tg-set-collection)
await processor.create_collection("test_user", "import_2024")
await processor.create_collection("test_user", "import_2024", {})
# Step 2: Process an ExtractedObject
test_obj = ExtractedObject(
@ -213,8 +213,8 @@ class TestObjectsCassandraIntegration:
assert len(processor.schemas) == 2
# Create collections first
await processor.create_collection("shop", "catalog")
await processor.create_collection("shop", "sales")
await processor.create_collection("shop", "catalog", {})
await processor.create_collection("shop", "sales", {})
# Process objects for different schemas
product_obj = ExtractedObject(
@ -263,7 +263,7 @@ class TestObjectsCassandraIntegration:
)
# Create collection first
await processor.create_collection("test", "test")
await processor.create_collection("test", "test", {})
# Create object missing required field
test_obj = ExtractedObject(
@ -302,7 +302,7 @@ class TestObjectsCassandraIntegration:
)
# Create collection first
await processor.create_collection("logger", "app_events")
await processor.create_collection("logger", "app_events", {})
# Process object
test_obj = ExtractedObject(
@ -407,7 +407,7 @@ class TestObjectsCassandraIntegration:
# Create all collections first
for coll in collections:
await processor.create_collection("analytics", coll)
await processor.create_collection("analytics", coll, {})
for coll in collections:
obj = ExtractedObject(
@ -486,7 +486,7 @@ class TestObjectsCassandraIntegration:
)
# Create collection first
await processor.create_collection("test_user", "batch_import")
await processor.create_collection("test_user", "batch_import", {})
msg = MagicMock()
msg.value.return_value = batch_obj
@ -532,7 +532,7 @@ class TestObjectsCassandraIntegration:
)
# Create collection first
await processor.create_collection("test", "empty")
await processor.create_collection("test", "empty", {})
# Process empty batch object
empty_obj = ExtractedObject(
@ -573,7 +573,7 @@ class TestObjectsCassandraIntegration:
)
# Create collection first
await processor.create_collection("test", "mixed")
await processor.create_collection("test", "mixed", {})
# Single object (backward compatibility)
single_obj = ExtractedObject(

View file

@ -78,7 +78,10 @@ class TestQdrantDocEmbeddingsStorage(IsolatedAsyncioTestCase):
}
processor = Processor(**config)
# Add collection to known_collections (simulates config push)
processor.known_collections[('test_user', 'test_collection')] = {}
# Create mock message with chunks and vectors
mock_message = MagicMock()
mock_message.metadata.user = 'test_user'
@ -129,7 +132,10 @@ class TestQdrantDocEmbeddingsStorage(IsolatedAsyncioTestCase):
}
processor = Processor(**config)
# Add collection to known_collections (simulates config push)
processor.known_collections[('multi_user', 'multi_collection')] = {}
# Create mock message with multiple chunks
mock_message = MagicMock()
mock_message.metadata.user = 'multi_user'
@ -186,7 +192,10 @@ class TestQdrantDocEmbeddingsStorage(IsolatedAsyncioTestCase):
}
processor = Processor(**config)
# Add collection to known_collections (simulates config push)
processor.known_collections[('vector_user', 'vector_collection')] = {}
# Create mock message with chunk having multiple vectors
mock_message = MagicMock()
mock_message.metadata.user = 'vector_user'
@ -280,6 +289,9 @@ class TestQdrantDocEmbeddingsStorage(IsolatedAsyncioTestCase):
processor = Processor(**config)
# Add collection to known_collections (simulates config push)
processor.known_collections[('new_user', 'new_collection')] = {}
# Create mock message
mock_message = MagicMock()
mock_message.metadata.user = 'new_user'
@ -329,6 +341,9 @@ class TestQdrantDocEmbeddingsStorage(IsolatedAsyncioTestCase):
processor = Processor(**config)
# Add collection to known_collections (simulates config push)
processor.known_collections[('error_user', 'error_collection')] = {}
# Create mock message
mock_message = MagicMock()
mock_message.metadata.user = 'error_user'
@ -364,6 +379,9 @@ class TestQdrantDocEmbeddingsStorage(IsolatedAsyncioTestCase):
processor = Processor(**config)
# Add collection to known_collections (simulates config push)
processor.known_collections[('cache_user', 'cache_collection')] = {}
# Create first mock message
mock_message1 = MagicMock()
mock_message1.metadata.user = 'cache_user'
@ -425,6 +443,9 @@ class TestQdrantDocEmbeddingsStorage(IsolatedAsyncioTestCase):
processor = Processor(**config)
# Add collection to known_collections (simulates config push)
processor.known_collections[('dim_user', 'dim_collection')] = {}
# Create mock message with different dimension vectors
mock_message = MagicMock()
mock_message.metadata.user = 'dim_user'
@ -494,7 +515,10 @@ class TestQdrantDocEmbeddingsStorage(IsolatedAsyncioTestCase):
}
processor = Processor(**config)
# Add collection to known_collections (simulates config push)
processor.known_collections[('utf8_user', 'utf8_collection')] = {}
# Create mock message with UTF-8 encoded text
mock_message = MagicMock()
mock_message.metadata.user = 'utf8_user'
@ -533,7 +557,10 @@ class TestQdrantDocEmbeddingsStorage(IsolatedAsyncioTestCase):
}
processor = Processor(**config)
# Add collection to known_collections (simulates config push)
processor.known_collections[('decode_user', 'decode_collection')] = {}
# Create mock message with decode error
mock_message = MagicMock()
mock_message.metadata.user = 'decode_user'

View file

@ -57,7 +57,10 @@ class TestQdrantGraphEmbeddingsStorage(IsolatedAsyncioTestCase):
}
processor = Processor(**config)
# Add collection to known_collections (simulates config push)
processor.known_collections[('test_user', 'test_collection')] = {}
# Create mock message with entities and vectors
mock_message = MagicMock()
mock_message.metadata.user = 'test_user'
@ -107,7 +110,10 @@ class TestQdrantGraphEmbeddingsStorage(IsolatedAsyncioTestCase):
}
processor = Processor(**config)
# Add collection to known_collections (simulates config push)
processor.known_collections[('multi_user', 'multi_collection')] = {}
# Create mock message with multiple entities
mock_message = MagicMock()
mock_message.metadata.user = 'multi_user'
@ -163,7 +169,10 @@ class TestQdrantGraphEmbeddingsStorage(IsolatedAsyncioTestCase):
}
processor = Processor(**config)
# Add collection to known_collections (simulates config push)
processor.known_collections[('vector_user', 'vector_collection')] = {}
# Create mock message with entity having multiple vectors
mock_message = MagicMock()
mock_message.metadata.user = 'vector_user'