Config push notify pattern: replace stateful pub/sub with signal+ fetch (#760)

Replace the config push mechanism that broadcast the full config
blob on a 'state' class pub/sub queue with a lightweight notify
signal containing only the version number and affected config
types. Processors fetch the full config via request/response from
the config service when notified.

This eliminates the need for the pub/sub 'state' queue class and
stateful pub/sub services entirely. The config push queue moves
from 'state' to 'flow' class — a simple transient signal rather
than a retained message.  This solves the RabbitMQ
late-subscriber problem where restarting processes never received
the current config because their fresh queue had no historical
messages.

Key changes:
- ConfigPush schema: config dict replaced with types list
- Subscribe-then-fetch startup with retry: processors subscribe
  to notify queue, fetch config via request/response, then
  process buffered notifies with version comparison to avoid race
  conditions
- register_config_handler() accepts optional types parameter so
  handlers only fire when their config types change
- Short-lived config request/response clients to avoid subscriber
  contention on non-persistent response topics
- Config service passes affected types through put/delete/flow
  operations
- Gateway ConfigReceiver rewritten with same notify pattern and
  retry loop

Tests updated

New tests:
- register_config_handler: without types, with types, multiple
  types, multiple handlers
- on_config_notify: old/same version skipped, irrelevant types
  skipped (version still updated), relevant type triggers fetch,
  handler without types always called, mixed handler filtering,
  empty types invokes all, fetch failure handled gracefully
- fetch_config: returns config+version, raises on error response,
  stops client even on exception
- fetch_and_apply_config: applies to all handlers on startup,
  retries on failure
This commit is contained in:
cybermaggedon 2026-04-06 16:57:27 +01:00 committed by GitHub
parent d4723566cb
commit 4acd853023
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
37 changed files with 1449 additions and 406 deletions

View file

@ -24,7 +24,7 @@ class Service(ToolService):
**params
)
self.register_config_handler(self.on_mcp_config)
self.register_config_handler(self.on_mcp_config, types=["mcp-tool"])
self.mcp_services = {}

View file

@ -148,18 +148,7 @@ class Configuration:
async def handle_delete(self, v):
# for k in v.keys:
# if k.type not in self or k.key not in self[k.type]:
# return ConfigResponse(
# version = None,
# values = None,
# directory = None,
# config = None,
# error = Error(
# type = "key-error",
# message = f"Key error"
# )
# )
types = list(set(k.type for k in v.keys))
for k in v.keys:
@ -167,20 +156,22 @@ class Configuration:
await self.inc_version()
await self.push()
await self.push(types=types)
return ConfigResponse(
)
async def handle_put(self, v):
types = list(set(k.type for k in v.values))
for k in v.values:
await self.table_store.put_config(k.type, k.key, k.value)
await self.inc_version()
await self.push()
await self.push(types=types)
return ConfigResponse(
)

View file

@ -126,12 +126,12 @@ class FlowConfig:
await self.config.inc_version()
await self.config.push()
await self.config.push(types=["flow-blueprint"])
return FlowResponse(
error = None,
)
async def handle_delete_blueprint(self, msg):
logger.debug(f"Flow config message: {msg}")
@ -140,7 +140,7 @@ class FlowConfig:
await self.config.inc_version()
await self.config.push()
await self.config.push(types=["flow-blueprint"])
return FlowResponse(
error = None,
@ -270,7 +270,7 @@ class FlowConfig:
await self.config.inc_version()
await self.config.push()
await self.config.push(types=["active-flow", "flow"])
return FlowResponse(
error = None,
@ -332,12 +332,12 @@ class FlowConfig:
await self.config.inc_version()
await self.config.push()
await self.config.push(types=["active-flow", "flow"])
return FlowResponse(
error = None,
)
async def handle(self, msg):
logger.debug(f"Handling flow message: {msg.operation}")

View file

@ -167,25 +167,22 @@ class Processor(AsyncProcessor):
async def start(self):
await self.push()
await self.push() # Startup poke: empty types = everything
await self.config_request_consumer.start()
await self.flow_request_consumer.start()
async def push(self):
config = await self.config.get_config()
async def push(self, types=None):
version = await self.config.get_version()
resp = ConfigPush(
version = version,
config = config,
types = types or [],
)
await self.config_push_producer.send(resp)
# Race condition, should make sure version & config sync
logger.info(f"Pushed configuration version {await self.config.get_version()}")
logger.info(f"Pushed config poke version {version}, types={resp.types}")
async def on_config_request(self, msg, consumer, flow):

View file

@ -108,7 +108,7 @@ class Processor(AsyncProcessor):
flow_config = self,
)
self.register_config_handler(self.on_knowledge_config)
self.register_config_handler(self.on_knowledge_config, types=["kg-core"])
self.flows = {}

View file

@ -66,8 +66,8 @@ class Processor(CollectionConfigHandler, FlowProcessor):
)
# Register config handlers
self.register_config_handler(self.on_schema_config)
self.register_config_handler(self.on_collection_config)
self.register_config_handler(self.on_schema_config, types=["schema"])
self.register_config_handler(self.on_collection_config, types=["collection"])
# Schema storage: name -> RowSchema
self.schemas: Dict[str, RowSchema] = {}

View file

@ -43,7 +43,7 @@ class Processor(FlowProcessor):
self.template_id = template_id
self.config_key = config_key
self.register_config_handler(self.on_prompt_config)
self.register_config_handler(self.on_prompt_config, types=["prompt"])
self.register_specification(
ConsumerSpec(

View file

@ -107,7 +107,7 @@ class Processor(FlowProcessor):
)
# Register config handler for ontology updates
self.register_config_handler(self.on_ontology_config)
self.register_config_handler(self.on_ontology_config, types=["ontology"])
# Shared components (not flow-specific)
self.ontology_loader = OntologyLoader()

View file

@ -82,7 +82,7 @@ class Processor(FlowProcessor):
)
# Register config handler for schema updates
self.register_config_handler(self.on_schema_config)
self.register_config_handler(self.on_schema_config, types=["schema"])
# Schema storage: name -> RowSchema
self.schemas: Dict[str, RowSchema] = {}

View file

@ -1,36 +1,27 @@
"""
API gateway. Offers HTTP services which are translated to interaction on the
Pulsar bus.
API gateway config receiver. Subscribes to config notify notifications and
fetches full config via request/response to manage flow lifecycle.
"""
module = "api-gateway"
# FIXME: Subscribes to Pulsar unnecessarily, should only do it when there
# are active listeners
# FIXME: Connection errors in publishers / subscribers cause those threads
# to fail and are not failed or retried
import asyncio
import argparse
from aiohttp import web
import logging
import os
import base64
import uuid
# Module logger
logger = logging.getLogger(__name__)
import logging
import json
from prometheus_client import start_http_server
from ... schema import ConfigPush, config_push_queue
from ... base import Consumer
from ... schema import ConfigPush, ConfigRequest, ConfigResponse
from ... schema import config_push_queue, config_request_queue
from ... schema import config_response_queue
from ... base import Consumer, Producer
from ... base.subscriber import Subscriber
from ... base.request_response_spec import RequestResponse
from ... base.metrics import ProducerMetrics, SubscriberMetrics
logger = logging.getLogger("config.receiver")
logger.setLevel(logging.INFO)
class ConfigReceiver:
def __init__(self, backend):
@ -41,34 +32,107 @@ class ConfigReceiver:
self.flows = {}
self.config_version = 0
def add_handler(self, h):
self.flow_handlers.append(h)
async def on_config(self, msg, proc, flow):
async def on_config_notify(self, msg, proc, flow):
try:
v = msg.value()
notify_version = v.version
notify_types = set(v.types)
logger.info(f"Config version: {v.version}")
# Skip if we already have this version or newer
if notify_version <= self.config_version:
logger.debug(
f"Ignoring config notify v{notify_version}, "
f"already at v{self.config_version}"
)
return
flows = v.config.get("flow", {})
# Gateway cares about flow config
if notify_types and "flow" not in notify_types and "active-flow" not in notify_types:
logger.debug(
f"Ignoring config notify v{notify_version}, "
f"no flow types in {notify_types}"
)
self.config_version = notify_version
return
wanted = list(flows.keys())
current = list(self.flows.keys())
logger.info(
f"Config notify v{notify_version}, fetching config..."
)
for k in wanted:
if k not in current:
self.flows[k] = json.loads(flows[k])
await self.start_flow(k, self.flows[k])
for k in current:
if k not in wanted:
await self.stop_flow(k, self.flows[k])
del self.flows[k]
await self.fetch_and_apply()
except Exception as e:
logger.error(f"Config processing exception: {e}", exc_info=True)
logger.error(
f"Config notify processing exception: {e}", exc_info=True
)
async def fetch_and_apply(self, retry=False):
"""Fetch full config and apply flow changes.
If retry=True, keeps retrying until successful."""
while True:
try:
logger.info("Fetching config from config service...")
resp = await self.config_client.request(
ConfigRequest(operation="config"),
timeout=10,
)
logger.info(f"Config response received")
if resp.error:
if retry:
logger.warning(
f"Config fetch error: {resp.error.message}, "
f"retrying in 2s..."
)
await asyncio.sleep(2)
continue
logger.error(
f"Config fetch error: {resp.error.message}"
)
return
self.config_version = resp.version
config = resp.config
flows = config.get("flow", {})
wanted = list(flows.keys())
current = list(self.flows.keys())
for k in wanted:
if k not in current:
self.flows[k] = json.loads(flows[k])
await self.start_flow(k, self.flows[k])
for k in current:
if k not in wanted:
await self.stop_flow(k, self.flows[k])
del self.flows[k]
return
except Exception as e:
if retry:
logger.warning(
f"Config fetch failed: {e}, retrying in 2s..."
)
await asyncio.sleep(2)
continue
logger.error(
f"Config fetch exception: {e}", exc_info=True
)
return
async def start_flow(self, id, flow):
@ -79,7 +143,9 @@ class ConfigReceiver:
try:
await handler.start_flow(id, flow)
except Exception as e:
logger.error(f"Config processing exception: {e}", exc_info=True)
logger.error(
f"Config processing exception: {e}", exc_info=True
)
async def stop_flow(self, id, flow):
@ -90,32 +156,80 @@ class ConfigReceiver:
try:
await handler.stop_flow(id, flow)
except Exception as e:
logger.error(f"Config processing exception: {e}", exc_info=True)
logger.error(
f"Config processing exception: {e}", exc_info=True
)
async def config_loader(self):
async with asyncio.TaskGroup() as tg:
while True:
id = str(uuid.uuid4())
try:
self.config_cons = Consumer(
taskgroup = tg,
flow = None,
backend = self.backend,
subscriber = f"gateway-{id}",
topic = config_push_queue,
schema = ConfigPush,
handler = self.on_config,
start_of_messages = True,
)
async with asyncio.TaskGroup() as tg:
await self.config_cons.start()
id = str(uuid.uuid4())
logger.debug("Waiting for config updates...")
# Config request/response client
config_req_metrics = ProducerMetrics(
processor="api-gateway", flow=None,
name="config-request",
)
config_resp_metrics = SubscriberMetrics(
processor="api-gateway", flow=None,
name="config-response",
)
logger.info("Config consumer finished")
self.config_client = RequestResponse(
backend=self.backend,
subscription=f"api-gateway--config--{id}",
consumer_name="api-gateway",
request_topic=config_request_queue,
request_schema=ConfigRequest,
request_metrics=config_req_metrics,
response_topic=config_response_queue,
response_schema=ConfigResponse,
response_metrics=config_resp_metrics,
)
logger.info("Starting config request/response client...")
await self.config_client.start()
logger.info("Config request/response client started")
# Subscribe to notify queue
self.config_cons = Consumer(
taskgroup=tg,
flow=None,
backend=self.backend,
subscriber=f"gateway-{id}",
topic=config_push_queue,
schema=ConfigPush,
handler=self.on_config_notify,
start_of_messages=False,
)
logger.info("Starting config notify consumer...")
await self.config_cons.start()
logger.info("Config notify consumer started")
# Fetch current config (subscribe-then-fetch pattern)
# Retry until config service is available
await self.fetch_and_apply(retry=True)
logger.info(
"Config loader initialised, waiting for notifys..."
)
logger.warning("Config consumer exited, restarting...")
except Exception as e:
logger.error(
f"Config loader exception: {e}, restarting in 4s...",
exc_info=True
)
await asyncio.sleep(4)
async def start(self):
asyncio.create_task(self.config_loader())
asyncio.create_task(self.config_loader())

View file

@ -246,7 +246,7 @@ class Processor(AsyncProcessor):
taskgroup = self.taskgroup,
)
self.register_config_handler(self.on_librarian_config)
self.register_config_handler(self.on_librarian_config, types=["librarian"])
self.flows = {}

View file

@ -40,7 +40,7 @@ class Processor(FlowProcessor):
}
)
self.register_config_handler(self.on_cost_config)
self.register_config_handler(self.on_cost_config, types=["token-costs"])
self.register_specification(
ConsumerSpec(

View file

@ -65,7 +65,7 @@ class Processor(FlowProcessor):
)
)
self.register_config_handler(self.on_prompt_config)
self.register_config_handler(self.on_prompt_config, types=["prompt"])
# Null configuration, should reload quickly
self.manager = PromptManager()

View file

@ -84,7 +84,7 @@ class Processor(FlowProcessor):
)
# Register config handler for schema updates
self.register_config_handler(self.on_schema_config)
self.register_config_handler(self.on_schema_config, types=["schema"])
# Schema storage: name -> RowSchema
self.schemas: Dict[str, RowSchema] = {}

View file

@ -64,7 +64,7 @@ class Processor(FlowProcessor):
)
# Register config handler for schema updates
self.register_config_handler(self.on_schema_config)
self.register_config_handler(self.on_schema_config, types=["schema"])
# Schema storage: name -> RowSchema
self.schemas: Dict[str, RowSchema] = {}

View file

@ -70,7 +70,7 @@ class Processor(FlowProcessor):
)
# Register config handler for schema updates
self.register_config_handler(self.on_schema_config)
self.register_config_handler(self.on_schema_config, types=["schema"])
# Schema storage: name -> RowSchema
self.schemas: Dict[str, RowSchema] = {}

View file

@ -31,7 +31,7 @@ class Processor(CollectionConfigHandler, DocumentEmbeddingsStoreService):
self.vecstore = DocVectors(store_uri)
# Register for config push notifications
self.register_config_handler(self.on_collection_config)
self.register_config_handler(self.on_collection_config, types=["collection"])
async def store_document_embeddings(self, message):

View file

@ -58,7 +58,7 @@ class Processor(CollectionConfigHandler, DocumentEmbeddingsStoreService):
self.last_index_name = None
# Register for config push notifications
self.register_config_handler(self.on_collection_config)
self.register_config_handler(self.on_collection_config, types=["collection"])
def create_index(self, index_name, dim):

View file

@ -37,7 +37,7 @@ class Processor(CollectionConfigHandler, DocumentEmbeddingsStoreService):
self.qdrant = QdrantClient(url=store_uri, api_key=api_key)
# Register for config push notifications
self.register_config_handler(self.on_collection_config)
self.register_config_handler(self.on_collection_config, types=["collection"])
async def store_document_embeddings(self, message):

View file

@ -45,7 +45,7 @@ class Processor(CollectionConfigHandler, GraphEmbeddingsStoreService):
self.vecstore = EntityVectors(store_uri)
# Register for config push notifications
self.register_config_handler(self.on_collection_config)
self.register_config_handler(self.on_collection_config, types=["collection"])
async def store_graph_embeddings(self, message):

View file

@ -72,7 +72,7 @@ class Processor(CollectionConfigHandler, GraphEmbeddingsStoreService):
self.last_index_name = None
# Register for config push notifications
self.register_config_handler(self.on_collection_config)
self.register_config_handler(self.on_collection_config, types=["collection"])
def create_index(self, index_name, dim):

View file

@ -52,7 +52,7 @@ class Processor(CollectionConfigHandler, GraphEmbeddingsStoreService):
self.qdrant = QdrantClient(url=store_uri, api_key=api_key)
# Register for config push notifications
self.register_config_handler(self.on_collection_config)
self.register_config_handler(self.on_collection_config, types=["collection"])
async def store_graph_embeddings(self, message):

View file

@ -61,7 +61,7 @@ class Processor(CollectionConfigHandler, FlowProcessor):
)
# Register config handler for collection management
self.register_config_handler(self.on_collection_config)
self.register_config_handler(self.on_collection_config, types=["collection"])
# Cache of created Qdrant collections
self.created_collections: Set[str] = set()

View file

@ -75,8 +75,8 @@ class Processor(CollectionConfigHandler, FlowProcessor):
)
# Register config handlers
self.register_config_handler(self.on_schema_config)
self.register_config_handler(self.on_collection_config)
self.register_config_handler(self.on_schema_config, types=["schema"])
self.register_config_handler(self.on_collection_config, types=["collection"])
# Cache of known keyspaces and whether tables exist
self.known_keyspaces: Set[str] = set()

View file

@ -144,7 +144,7 @@ class Processor(CollectionConfigHandler, TriplesStoreService):
self.tg = None
# Register for config push notifications
self.register_config_handler(self.on_collection_config)
self.register_config_handler(self.on_collection_config, types=["collection"])
async def store_triples(self, message):

View file

@ -57,7 +57,7 @@ class Processor(CollectionConfigHandler, TriplesStoreService):
self.io = FalkorDB.from_url(graph_url).select_graph(database)
# Register for config push notifications
self.register_config_handler(self.on_collection_config)
self.register_config_handler(self.on_collection_config, types=["collection"])
def create_node(self, uri, user, collection):

View file

@ -66,7 +66,7 @@ class Processor(CollectionConfigHandler, TriplesStoreService):
self.create_indexes(session)
# Register for config push notifications
self.register_config_handler(self.on_collection_config)
self.register_config_handler(self.on_collection_config, types=["collection"])
def create_indexes(self, session):

View file

@ -66,7 +66,7 @@ class Processor(CollectionConfigHandler, TriplesStoreService):
self.create_indexes(session)
# Register for config push notifications
self.register_config_handler(self.on_collection_config)
self.register_config_handler(self.on_collection_config, types=["collection"])
def create_indexes(self, session):