Make all Cassandra and Qdrant I/O async-safe with proper concurrency controls (#916)

Cassandra triples services were using syncronous EntityCentricKnowledgeGraph
methods from async contexts, and connection state was managed with
threading.local which is wrong for asyncio coroutines sharing a single
thread. Qdrant services had no async wrapping at all, blocking the event
loop on every network call. Rows services had unprotected shared state
mutations across concurrent coroutines.

- Add async methods to EntityCentricKnowledgeGraph (async_insert,
  async_get_s/p/o/sp/po/os/spo/all, async_collection_exists,
  async_create_collection, async_delete_collection) using the existing
  cassandra_async.async_execute bridge
- Rewrite triples write + query services: replace threading.local with
  asyncio.Lock + dict cache for per-workspace connections, use async
  ECKG methods for all data operations, keep asyncio.to_thread only for
  one-time blocking ECKG construction
- Wrap all Qdrant calls in asyncio.to_thread across all 6 services
  (doc/graph/row embeddings write + query), add asyncio.Lock + set cache
  for collection existence checks
- Add asyncio.Lock to rows write + query services to protect shared
  state (schemas, sessions, config caches) from concurrent mutation
- Update all affected tests to match new async patterns
This commit is contained in:
cybermaggedon 2026-05-14 16:00:54 +01:00 committed by GitHub
parent bb1109963c
commit a2dde9cafb
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
22 changed files with 736 additions and 621 deletions

View file

@ -184,7 +184,7 @@ class TestObjectsGraphQLQueryIntegration:
await processor.on_schema_config("default", sample_schema_config, version=1)
# Connect to Cassandra
processor.connect_cassandra()
await processor.connect_cassandra()
assert processor.session is not None
# Create test keyspace and table
@ -219,7 +219,7 @@ class TestObjectsGraphQLQueryIntegration:
"""Test inserting data and querying via GraphQL"""
# Load schema and connect
await processor.on_schema_config("default", sample_schema_config, version=1)
processor.connect_cassandra()
await processor.connect_cassandra()
# Setup test data
keyspace = "test_user"
@ -293,7 +293,7 @@ class TestObjectsGraphQLQueryIntegration:
"""Test GraphQL queries with filtering on indexed fields"""
# Setup (reuse previous setup)
await processor.on_schema_config("default", sample_schema_config, version=1)
processor.connect_cassandra()
await processor.connect_cassandra()
keyspace = "test_user"
collection = "filter_test"
@ -387,7 +387,7 @@ class TestObjectsGraphQLQueryIntegration:
"""Test full message processing workflow"""
# Setup
await processor.on_schema_config("default", sample_schema_config, version=1)
processor.connect_cassandra()
await processor.connect_cassandra()
# Create mock message
request = RowsQueryRequest(
@ -433,7 +433,7 @@ class TestObjectsGraphQLQueryIntegration:
"""Test handling multiple concurrent GraphQL queries"""
# Setup
await processor.on_schema_config("default", sample_schema_config, version=1)
processor.connect_cassandra()
await processor.connect_cassandra()
# Create multiple query tasks
queries = [
@ -519,7 +519,7 @@ class TestObjectsGraphQLQueryIntegration:
"""Test handling of large query result sets"""
# Setup
await processor.on_schema_config("default", sample_schema_config, version=1)
processor.connect_cassandra()
await processor.connect_cassandra()
keyspace = "large_test_user"
collection = "large_collection"