Make all Cassandra and Qdrant I/O async-safe with proper concurrency controls (#916)

Cassandra triples services were using syncronous EntityCentricKnowledgeGraph
methods from async contexts, and connection state was managed with
threading.local which is wrong for asyncio coroutines sharing a single
thread. Qdrant services had no async wrapping at all, blocking the event
loop on every network call. Rows services had unprotected shared state
mutations across concurrent coroutines.

- Add async methods to EntityCentricKnowledgeGraph (async_insert,
  async_get_s/p/o/sp/po/os/spo/all, async_collection_exists,
  async_create_collection, async_delete_collection) using the existing
  cassandra_async.async_execute bridge
- Rewrite triples write + query services: replace threading.local with
  asyncio.Lock + dict cache for per-workspace connections, use async
  ECKG methods for all data operations, keep asyncio.to_thread only for
  one-time blocking ECKG construction
- Wrap all Qdrant calls in asyncio.to_thread across all 6 services
  (doc/graph/row embeddings write + query), add asyncio.Lock + set cache
  for collection existence checks
- Add asyncio.Lock to rows write + query services to protect shared
  state (schemas, sessions, config caches) from concurrent mutation
- Update all affected tests to match new async patterns
This commit is contained in:
cybermaggedon 2026-05-14 16:00:54 +01:00 committed by GitHub
parent bb1109963c
commit a2dde9cafb
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
22 changed files with 736 additions and 621 deletions

View file

@ -113,12 +113,15 @@ class TestDocEmbeddingsNullProtection:
@pytest.mark.asyncio
async def test_valid_embedding_upserted(self):
import asyncio
from trustgraph.storage.doc_embeddings.qdrant.write import Processor
proc = Processor.__new__(Processor)
proc.qdrant = MagicMock()
proc.qdrant.collection_exists.return_value = True
proc.collection_exists = MagicMock(return_value=True)
proc._cache_lock = asyncio.Lock()
proc._known_collections = set()
msg = MagicMock()
msg.metadata.collection = "col1"
@ -134,12 +137,15 @@ class TestDocEmbeddingsNullProtection:
@pytest.mark.asyncio
async def test_dimension_in_collection_name(self):
"""Collection name should include vector dimension."""
import asyncio
from trustgraph.storage.doc_embeddings.qdrant.write import Processor
proc = Processor.__new__(Processor)
proc.qdrant = MagicMock()
proc.qdrant.collection_exists.return_value = True
proc.collection_exists = MagicMock(return_value=True)
proc._cache_lock = asyncio.Lock()
proc._known_collections = set()
msg = MagicMock()
msg.metadata.collection = "docs"
@ -220,12 +226,15 @@ class TestGraphEmbeddingsNullProtection:
@pytest.mark.asyncio
async def test_valid_entity_and_vector_upserted(self):
import asyncio
from trustgraph.storage.graph_embeddings.qdrant.write import Processor
proc = Processor.__new__(Processor)
proc.qdrant = MagicMock()
proc.qdrant.collection_exists.return_value = True
proc.collection_exists = MagicMock(return_value=True)
proc._cache_lock = asyncio.Lock()
proc._known_collections = set()
msg = MagicMock()
msg.metadata.collection = "col1"
@ -241,12 +250,15 @@ class TestGraphEmbeddingsNullProtection:
@pytest.mark.asyncio
async def test_lazy_collection_creation_on_new_dimension(self):
import asyncio
from trustgraph.storage.graph_embeddings.qdrant.write import Processor
proc = Processor.__new__(Processor)
proc.qdrant = MagicMock()
proc.qdrant.collection_exists.return_value = False
proc.collection_exists = MagicMock(return_value=True)
proc._cache_lock = asyncio.Lock()
proc._known_collections = set()
msg = MagicMock()
msg.metadata.collection = "graphs"