Graceful shutdown of importers/exporters

This commit is contained in:
Cyber MacGeddon 2025-08-28 09:54:01 +01:00
parent eef4c808e7
commit 976529de69
10 changed files with 412 additions and 171 deletions

View file

@ -26,46 +26,66 @@ class DocumentEmbeddingsExport:
self.subscriber = subscriber
async def destroy(self):
# Step 1: Signal stop to prevent new messages
self.running.stop()
await self.ws.close()
# Step 2: Wait briefly for in-flight messages
await asyncio.sleep(0.5)
# Step 3: Unsubscribe and stop subscriber (triggers queue drain)
if hasattr(self, 'subs'):
await self.subs.unsubscribe_all(self.id)
await self.subs.stop()
# Step 4: Close websocket last
if self.ws and not self.ws.closed:
await self.ws.close()
async def receive(self, msg):
# Ignore incoming info from websocket
pass
async def run(self):
subs = Subscriber(
client = self.pulsar_client, topic = self.queue,
consumer_name = self.consumer, subscription = self.subscriber,
schema = DocumentEmbeddings
"""Enhanced run with better error handling"""
self.subs = Subscriber(
client = self.pulsar_client,
topic = self.queue,
consumer_name = self.consumer,
subscription = self.subscriber,
schema = DocumentEmbeddings,
backpressure_strategy = "block" # Configurable
)
await subs.start()
id = str(uuid.uuid4())
q = await subs.subscribe_all(id)
await self.subs.start()
self.id = str(uuid.uuid4())
q = await self.subs.subscribe_all(self.id)
consecutive_errors = 0
max_consecutive_errors = 5
while self.running.get():
try:
resp = await asyncio.wait_for(q.get(), timeout=0.5)
await self.ws.send_json(serialize_document_embeddings(resp))
except TimeoutError:
consecutive_errors = 0 # Reset on success
except asyncio.TimeoutError:
continue
except queue.Empty:
continue
except Exception as e:
logger.error(f"Exception: {str(e)}", exc_info=True)
break
await subs.unsubscribe_all(id)
await subs.stop()
await self.ws.close()
self.running.stop()
logger.error(f"Exception sending to websocket: {str(e)}")
consecutive_errors += 1
if consecutive_errors >= max_consecutive_errors:
logger.error("Too many consecutive errors, shutting down")
break
# Brief pause before retry
await asyncio.sleep(0.1)
# Graceful cleanup handled in destroy()

View file

@ -1,6 +1,7 @@
import asyncio
import uuid
import logging
from aiohttp import WSMsgType
from ... schema import Metadata
@ -8,6 +9,9 @@ from ... schema import DocumentEmbeddings, ChunkEmbeddings
from ... base import Publisher
from ... messaging.translators.document_loading import DocumentEmbeddingsTranslator
# Module logger
logger = logging.getLogger(__name__)
class DocumentEmbeddingsImport:
def __init__(
@ -26,13 +30,17 @@ class DocumentEmbeddingsImport:
await self.publisher.start()
async def destroy(self):
# Step 1: Stop accepting new messages
self.running.stop()
# Step 2: Wait for publisher to drain its queue
logger.info("Draining publisher queue...")
await self.publisher.stop()
# Step 3: Close websocket only after queue is drained
if self.ws:
await self.ws.close()
await self.publisher.stop()
async def receive(self, msg):
data = msg.json()

View file

@ -26,46 +26,66 @@ class EntityContextsExport:
self.subscriber = subscriber
async def destroy(self):
# Step 1: Signal stop to prevent new messages
self.running.stop()
await self.ws.close()
# Step 2: Wait briefly for in-flight messages
await asyncio.sleep(0.5)
# Step 3: Unsubscribe and stop subscriber (triggers queue drain)
if hasattr(self, 'subs'):
await self.subs.unsubscribe_all(self.id)
await self.subs.stop()
# Step 4: Close websocket last
if self.ws and not self.ws.closed:
await self.ws.close()
async def receive(self, msg):
# Ignore incoming info from websocket
pass
async def run(self):
subs = Subscriber(
client = self.pulsar_client, topic = self.queue,
consumer_name = self.consumer, subscription = self.subscriber,
schema = EntityContexts
"""Enhanced run with better error handling"""
self.subs = Subscriber(
client = self.pulsar_client,
topic = self.queue,
consumer_name = self.consumer,
subscription = self.subscriber,
schema = EntityContexts,
backpressure_strategy = "block" # Configurable
)
await subs.start()
id = str(uuid.uuid4())
q = await subs.subscribe_all(id)
await self.subs.start()
self.id = str(uuid.uuid4())
q = await self.subs.subscribe_all(self.id)
consecutive_errors = 0
max_consecutive_errors = 5
while self.running.get():
try:
resp = await asyncio.wait_for(q.get(), timeout=0.5)
await self.ws.send_json(serialize_entity_contexts(resp))
except TimeoutError:
consecutive_errors = 0 # Reset on success
except asyncio.TimeoutError:
continue
except queue.Empty:
continue
except Exception as e:
logger.error(f"Exception: {str(e)}", exc_info=True)
break
await subs.unsubscribe_all(id)
await subs.stop()
await self.ws.close()
self.running.stop()
logger.error(f"Exception sending to websocket: {str(e)}")
consecutive_errors += 1
if consecutive_errors >= max_consecutive_errors:
logger.error("Too many consecutive errors, shutting down")
break
# Brief pause before retry
await asyncio.sleep(0.1)
# Graceful cleanup handled in destroy()

View file

@ -1,6 +1,7 @@
import asyncio
import uuid
import logging
from aiohttp import WSMsgType
from ... schema import Metadata
@ -9,6 +10,9 @@ from ... base import Publisher
from . serialize import to_subgraph, to_value
# Module logger
logger = logging.getLogger(__name__)
class EntityContextsImport:
def __init__(
@ -26,13 +30,17 @@ class EntityContextsImport:
await self.publisher.start()
async def destroy(self):
# Step 1: Stop accepting new messages
self.running.stop()
# Step 2: Wait for publisher to drain its queue
logger.info("Draining publisher queue...")
await self.publisher.stop()
# Step 3: Close websocket only after queue is drained
if self.ws:
await self.ws.close()
await self.publisher.stop()
async def receive(self, msg):
data = msg.json()

View file

@ -26,46 +26,66 @@ class GraphEmbeddingsExport:
self.subscriber = subscriber
async def destroy(self):
# Step 1: Signal stop to prevent new messages
self.running.stop()
await self.ws.close()
# Step 2: Wait briefly for in-flight messages
await asyncio.sleep(0.5)
# Step 3: Unsubscribe and stop subscriber (triggers queue drain)
if hasattr(self, 'subs'):
await self.subs.unsubscribe_all(self.id)
await self.subs.stop()
# Step 4: Close websocket last
if self.ws and not self.ws.closed:
await self.ws.close()
async def receive(self, msg):
# Ignore incoming info from websocket
pass
async def run(self):
subs = Subscriber(
client = self.pulsar_client, topic = self.queue,
consumer_name = self.consumer, subscription = self.subscriber,
schema = GraphEmbeddings
"""Enhanced run with better error handling"""
self.subs = Subscriber(
client = self.pulsar_client,
topic = self.queue,
consumer_name = self.consumer,
subscription = self.subscriber,
schema = GraphEmbeddings,
backpressure_strategy = "block" # Configurable
)
await subs.start()
id = str(uuid.uuid4())
q = await subs.subscribe_all(id)
await self.subs.start()
self.id = str(uuid.uuid4())
q = await self.subs.subscribe_all(self.id)
consecutive_errors = 0
max_consecutive_errors = 5
while self.running.get():
try:
resp = await asyncio.wait_for(q.get(), timeout=0.5)
await self.ws.send_json(serialize_graph_embeddings(resp))
except TimeoutError:
consecutive_errors = 0 # Reset on success
except asyncio.TimeoutError:
continue
except queue.Empty:
continue
except Exception as e:
logger.error(f"Exception: {str(e)}", exc_info=True)
break
await subs.unsubscribe_all(id)
await subs.stop()
await self.ws.close()
self.running.stop()
logger.error(f"Exception sending to websocket: {str(e)}")
consecutive_errors += 1
if consecutive_errors >= max_consecutive_errors:
logger.error("Too many consecutive errors, shutting down")
break
# Brief pause before retry
await asyncio.sleep(0.1)
# Graceful cleanup handled in destroy()

View file

@ -1,6 +1,7 @@
import asyncio
import uuid
import logging
from aiohttp import WSMsgType
from ... schema import Metadata
@ -9,6 +10,9 @@ from ... base import Publisher
from . serialize import to_subgraph, to_value
# Module logger
logger = logging.getLogger(__name__)
class GraphEmbeddingsImport:
def __init__(
@ -26,13 +30,17 @@ class GraphEmbeddingsImport:
await self.publisher.start()
async def destroy(self):
# Step 1: Stop accepting new messages
self.running.stop()
# Step 2: Wait for publisher to drain its queue
logger.info("Draining publisher queue...")
await self.publisher.stop()
# Step 3: Close websocket only after queue is drained
if self.ws:
await self.ws.close()
await self.publisher.stop()
async def receive(self, msg):
data = msg.json()

View file

@ -26,46 +26,66 @@ class TriplesExport:
self.subscriber = subscriber
async def destroy(self):
# Step 1: Signal stop to prevent new messages
self.running.stop()
await self.ws.close()
# Step 2: Wait briefly for in-flight messages
await asyncio.sleep(0.5)
# Step 3: Unsubscribe and stop subscriber (triggers queue drain)
if hasattr(self, 'subs'):
await self.subs.unsubscribe_all(self.id)
await self.subs.stop()
# Step 4: Close websocket last
if self.ws and not self.ws.closed:
await self.ws.close()
async def receive(self, msg):
# Ignore incoming info from websocket
pass
async def run(self):
subs = Subscriber(
client = self.pulsar_client, topic = self.queue,
consumer_name = self.consumer, subscription = self.subscriber,
schema = Triples
"""Enhanced run with better error handling"""
self.subs = Subscriber(
client = self.pulsar_client,
topic = self.queue,
consumer_name = self.consumer,
subscription = self.subscriber,
schema = Triples,
backpressure_strategy = "block" # Configurable
)
await subs.start()
id = str(uuid.uuid4())
q = await subs.subscribe_all(id)
await self.subs.start()
self.id = str(uuid.uuid4())
q = await self.subs.subscribe_all(self.id)
consecutive_errors = 0
max_consecutive_errors = 5
while self.running.get():
try:
resp = await asyncio.wait_for(q.get(), timeout=0.5)
await self.ws.send_json(serialize_triples(resp))
except TimeoutError:
consecutive_errors = 0 # Reset on success
except asyncio.TimeoutError:
continue
except queue.Empty:
continue
except Exception as e:
logger.error(f"Exception: {str(e)}", exc_info=True)
break
await subs.unsubscribe_all(id)
await subs.stop()
await self.ws.close()
self.running.stop()
logger.error(f"Exception sending to websocket: {str(e)}")
consecutive_errors += 1
if consecutive_errors >= max_consecutive_errors:
logger.error("Too many consecutive errors, shutting down")
break
# Brief pause before retry
await asyncio.sleep(0.1)
# Graceful cleanup handled in destroy()

View file

@ -1,6 +1,7 @@
import asyncio
import uuid
import logging
from aiohttp import WSMsgType
from ... schema import Metadata
@ -9,6 +10,9 @@ from ... base import Publisher
from . serialize import to_subgraph
# Module logger
logger = logging.getLogger(__name__)
class TriplesImport:
def __init__(
@ -26,13 +30,17 @@ class TriplesImport:
await self.publisher.start()
async def destroy(self):
# Step 1: Stop accepting new messages
self.running.stop()
# Step 2: Wait for publisher to drain its queue
logger.info("Draining publisher queue...")
await self.publisher.stop()
# Step 3: Close websocket only after queue is drained
if self.ws:
await self.ws.close()
await self.publisher.stop()
async def receive(self, msg):
data = msg.json()