mirror of
https://github.com/trustgraph-ai/trustgraph.git
synced 2026-04-25 16:36:21 +02:00
Config push notify pattern: replace stateful pub/sub with signal+ fetch (#760)
Replace the config push mechanism that broadcast the full config blob on a 'state' class pub/sub queue with a lightweight notify signal containing only the version number and affected config types. Processors fetch the full config via request/response from the config service when notified. This eliminates the need for the pub/sub 'state' queue class and stateful pub/sub services entirely. The config push queue moves from 'state' to 'flow' class — a simple transient signal rather than a retained message. This solves the RabbitMQ late-subscriber problem where restarting processes never received the current config because their fresh queue had no historical messages. Key changes: - ConfigPush schema: config dict replaced with types list - Subscribe-then-fetch startup with retry: processors subscribe to notify queue, fetch config via request/response, then process buffered notifies with version comparison to avoid race conditions - register_config_handler() accepts optional types parameter so handlers only fire when their config types change - Short-lived config request/response clients to avoid subscriber contention on non-persistent response topics - Config service passes affected types through put/delete/flow operations - Gateway ConfigReceiver rewritten with same notify pattern and retry loop Tests updated New tests: - register_config_handler: without types, with types, multiple types, multiple handlers - on_config_notify: old/same version skipped, irrelevant types skipped (version still updated), relevant type triggers fetch, handler without types always called, mixed handler filtering, empty types invokes all, fetch failure handled gracefully - fetch_config: returns config+version, raises on error response, stops client even on exception - fetch_and_apply_config: applies to all handlers on startup, retries on failure
This commit is contained in:
parent
d4723566cb
commit
4acd853023
37 changed files with 1449 additions and 406 deletions
|
|
@ -1,7 +1,8 @@
|
|||
|
||||
# Base class for processors. Implements:
|
||||
# - Pulsar client, subscribe and consume basic
|
||||
# - Pub/sub client, subscribe and consume basic
|
||||
# - the async startup logic
|
||||
# - Config notify handling with subscribe-then-fetch pattern
|
||||
# - Initialising metrics
|
||||
|
||||
import asyncio
|
||||
|
|
@ -12,12 +13,17 @@ import logging
|
|||
import os
|
||||
from prometheus_client import start_http_server, Info
|
||||
|
||||
from .. schema import ConfigPush, config_push_queue
|
||||
from .. schema import ConfigPush, ConfigRequest, ConfigResponse
|
||||
from .. schema import config_push_queue, config_request_queue
|
||||
from .. schema import config_response_queue
|
||||
from .. log_level import LogLevel
|
||||
from . pubsub import get_pubsub, add_pubsub_args
|
||||
from . producer import Producer
|
||||
from . consumer import Consumer
|
||||
from . metrics import ProcessorMetrics, ConsumerMetrics
|
||||
from . subscriber import Subscriber
|
||||
from . request_response_spec import RequestResponse
|
||||
from . metrics import ProcessorMetrics, ConsumerMetrics, ProducerMetrics
|
||||
from . metrics import SubscriberMetrics
|
||||
from . logging import add_logging_args, setup_logging
|
||||
|
||||
default_config_queue = config_push_queue
|
||||
|
|
@ -57,9 +63,13 @@ class AsyncProcessor:
|
|||
"config_push_queue", default_config_queue
|
||||
)
|
||||
|
||||
# This records registered configuration handlers
|
||||
# This records registered configuration handlers, each entry is:
|
||||
# { "handler": async_fn, "types": set_or_none }
|
||||
self.config_handlers = []
|
||||
|
||||
# Track the current config version for dedup
|
||||
self.config_version = 0
|
||||
|
||||
# Create a random ID for this subscription to the configuration
|
||||
# service
|
||||
config_subscriber_id = str(uuid.uuid4())
|
||||
|
|
@ -68,8 +78,7 @@ class AsyncProcessor:
|
|||
processor = self.id, flow = None, name = "config",
|
||||
)
|
||||
|
||||
# Subscribe to config queue — exclusive so every processor
|
||||
# gets its own copy of config pushes (broadcast pattern)
|
||||
# Subscribe to config notify queue
|
||||
self.config_sub_task = Consumer(
|
||||
|
||||
taskgroup = self.taskgroup,
|
||||
|
|
@ -80,21 +89,93 @@ class AsyncProcessor:
|
|||
topic = self.config_push_queue,
|
||||
schema = ConfigPush,
|
||||
|
||||
handler = self.on_config_change,
|
||||
handler = self.on_config_notify,
|
||||
|
||||
metrics = config_consumer_metrics,
|
||||
|
||||
start_of_messages = True,
|
||||
start_of_messages = False,
|
||||
consumer_type = 'exclusive',
|
||||
)
|
||||
|
||||
self.running = True
|
||||
|
||||
# This is called to start dynamic behaviour. An over-ride point for
|
||||
# extra functionality
|
||||
def _create_config_client(self):
|
||||
"""Create a short-lived config request/response client."""
|
||||
config_rr_id = str(uuid.uuid4())
|
||||
|
||||
config_req_metrics = ProducerMetrics(
|
||||
processor = self.id, flow = None, name = "config-request",
|
||||
)
|
||||
config_resp_metrics = SubscriberMetrics(
|
||||
processor = self.id, flow = None, name = "config-response",
|
||||
)
|
||||
|
||||
return RequestResponse(
|
||||
backend = self.pubsub_backend,
|
||||
subscription = f"{self.id}--config--{config_rr_id}",
|
||||
consumer_name = self.id,
|
||||
request_topic = config_request_queue,
|
||||
request_schema = ConfigRequest,
|
||||
request_metrics = config_req_metrics,
|
||||
response_topic = config_response_queue,
|
||||
response_schema = ConfigResponse,
|
||||
response_metrics = config_resp_metrics,
|
||||
)
|
||||
|
||||
async def fetch_config(self):
|
||||
"""Fetch full config from config service using a short-lived
|
||||
request/response client. Returns (config, version) or raises."""
|
||||
client = self._create_config_client()
|
||||
try:
|
||||
await client.start()
|
||||
resp = await client.request(
|
||||
ConfigRequest(operation="config"),
|
||||
timeout=10,
|
||||
)
|
||||
if resp.error:
|
||||
raise RuntimeError(f"Config error: {resp.error.message}")
|
||||
return resp.config, resp.version
|
||||
finally:
|
||||
await client.stop()
|
||||
|
||||
# This is called to start dynamic behaviour.
|
||||
# Implements the subscribe-then-fetch pattern to avoid race conditions.
|
||||
async def start(self):
|
||||
|
||||
# 1. Start the notify consumer (begins buffering incoming notifys)
|
||||
await self.config_sub_task.start()
|
||||
|
||||
# 2. Fetch current config via request/response
|
||||
await self.fetch_and_apply_config()
|
||||
|
||||
# 3. Any buffered notifys with version > fetched version will be
|
||||
# processed by on_config_notify, which does the version check
|
||||
|
||||
async def fetch_and_apply_config(self):
|
||||
"""Fetch full config from config service and apply to all handlers.
|
||||
Retries until successful — config service may not be ready yet."""
|
||||
|
||||
while self.running:
|
||||
|
||||
try:
|
||||
config, version = await self.fetch_config()
|
||||
|
||||
logger.info(f"Fetched config version {version}")
|
||||
|
||||
self.config_version = version
|
||||
|
||||
# Apply to all handlers (startup = invoke all)
|
||||
for entry in self.config_handlers:
|
||||
await entry["handler"](config, version)
|
||||
|
||||
return
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
f"Config fetch failed: {e}, retrying in 2s..."
|
||||
)
|
||||
await asyncio.sleep(2)
|
||||
|
||||
# This is called to stop all threads. An over-ride point for extra
|
||||
# functionality
|
||||
def stop(self):
|
||||
|
|
@ -110,20 +191,66 @@ class AsyncProcessor:
|
|||
def pulsar_host(self): return self._pulsar_host
|
||||
|
||||
# Register a new event handler for configuration change
|
||||
def register_config_handler(self, handler):
|
||||
self.config_handlers.append(handler)
|
||||
def register_config_handler(self, handler, types=None):
|
||||
self.config_handlers.append({
|
||||
"handler": handler,
|
||||
"types": set(types) if types else None,
|
||||
})
|
||||
|
||||
# Called when a new configuration message push occurs
|
||||
async def on_config_change(self, message, consumer, flow):
|
||||
# Called when a config notify message arrives
|
||||
async def on_config_notify(self, message, consumer, flow):
|
||||
|
||||
# Get configuration data and version number
|
||||
config = message.value().config
|
||||
version = message.value().version
|
||||
notify_version = message.value().version
|
||||
notify_types = set(message.value().types)
|
||||
|
||||
# Invoke message handlers
|
||||
logger.info(f"Config change event: version={version}")
|
||||
for ch in self.config_handlers:
|
||||
await ch(config, version)
|
||||
# Skip if we already have this version or newer
|
||||
if notify_version <= self.config_version:
|
||||
logger.debug(
|
||||
f"Ignoring config notify v{notify_version}, "
|
||||
f"already at v{self.config_version}"
|
||||
)
|
||||
return
|
||||
|
||||
# Check if any handler cares about the affected types
|
||||
if notify_types:
|
||||
any_interested = False
|
||||
for entry in self.config_handlers:
|
||||
handler_types = entry["types"]
|
||||
if handler_types is None or notify_types & handler_types:
|
||||
any_interested = True
|
||||
break
|
||||
|
||||
if not any_interested:
|
||||
logger.debug(
|
||||
f"Ignoring config notify v{notify_version}, "
|
||||
f"no handlers for types {notify_types}"
|
||||
)
|
||||
self.config_version = notify_version
|
||||
return
|
||||
|
||||
logger.info(
|
||||
f"Config notify v{notify_version} types={list(notify_types)}, "
|
||||
f"fetching config..."
|
||||
)
|
||||
|
||||
# Fetch full config using short-lived client
|
||||
try:
|
||||
config, version = await self.fetch_config()
|
||||
|
||||
self.config_version = version
|
||||
|
||||
# Invoke handlers that care about the affected types
|
||||
for entry in self.config_handlers:
|
||||
handler_types = entry["types"]
|
||||
if handler_types is None:
|
||||
await entry["handler"](config, version)
|
||||
elif not notify_types or notify_types & handler_types:
|
||||
await entry["handler"](config, version)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
f"Failed to fetch config on notify: {e}", exc_info=True
|
||||
)
|
||||
|
||||
# This is the 'main' body of the handler. It is a point to override
|
||||
# if needed. By default does nothing. Processors are implemented
|
||||
|
|
@ -181,7 +308,7 @@ class AsyncProcessor:
|
|||
prog=ident,
|
||||
description=doc
|
||||
)
|
||||
|
||||
|
||||
parser.add_argument(
|
||||
'--id',
|
||||
default=ident,
|
||||
|
|
@ -271,4 +398,3 @@ class AsyncProcessor:
|
|||
default=8000,
|
||||
help=f'Pulsar host (default: 8000)',
|
||||
)
|
||||
|
||||
|
|
|
|||
|
|
@ -26,7 +26,9 @@ class FlowProcessor(AsyncProcessor):
|
|||
super(FlowProcessor, self).__init__(**params)
|
||||
|
||||
# Register configuration handler
|
||||
self.register_config_handler(self.on_configure_flows)
|
||||
self.register_config_handler(
|
||||
self.on_configure_flows, types=["active-flow"]
|
||||
)
|
||||
|
||||
# Initialise flow information state
|
||||
self.flows = {}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue