mirror of
https://github.com/trustgraph-ai/trustgraph.git
synced 2026-04-25 00:16:23 +02:00
Adds a RabbitMQ backend as an alternative to Pulsar, selectable via PUBSUB_BACKEND=rabbitmq. Both backends implement the same PubSubBackend protocol — no application code changes needed to switch. RabbitMQ topology: - Single topic exchange per topicspace (e.g. 'tg') - Routing key derived from queue class and topic name - Shared consumers: named queue bound to exchange (competing, round-robin) - Exclusive consumers: anonymous auto-delete queue (broadcast, each gets every message). Used by Subscriber and config push consumer. - Thread-local producer connections (pika is not thread-safe) - Push-based consumption via basic_consume with process_data_events for heartbeat processing Consumer model changes: - Consumer class creates one backend consumer per concurrent task (required for pika thread safety, harmless for Pulsar) - Consumer class accepts consumer_type parameter - Subscriber passes consumer_type='exclusive' for broadcast semantics - Config push consumer uses consumer_type='exclusive' so every processor instance receives config updates - handle_one_from_queue receives consumer as parameter for correct per-connection ack/nack LibrarianClient: - New shared client class replacing duplicated librarian request-response code across 6+ services (chunking, decoders, RAG, etc.) - Uses stream-document instead of get-document-content for fetching document content in 1MB chunks (avoids broker message size limits) - Standalone object (self.librarian = LibrarianClient(...)) not a mixin - get-document-content marked deprecated in schema and OpenAPI spec Serialisation: - Extracted dataclass_to_dict/dict_to_dataclass to shared serialization.py (used by both Pulsar and RabbitMQ backends) Librarian queues: - Changed from flow class (persistent) back to request/response class now that stream-document eliminates large single messages - API upload chunk size reduced from 5MB to 3MB to stay under broker limits after base64 encoding Factory and CLI: - get_pubsub() handles 'rabbitmq' backend with RabbitMQ connection params - add_pubsub_args() includes RabbitMQ options (host, port, credentials) - add_pubsub_args(standalone=True) defaults to localhost for CLI tools - init_trustgraph skips Pulsar admin setup for non-Pulsar backends - tg-dump-queues and tg-monitor-prompts use backend abstraction - BaseClient and ConfigClient accept generic pubsub config
274 lines
8.1 KiB
Python
274 lines
8.1 KiB
Python
|
|
# Base class for processors. Implements:
|
|
# - Pulsar client, subscribe and consume basic
|
|
# - the async startup logic
|
|
# - Initialising metrics
|
|
|
|
import asyncio
|
|
import argparse
|
|
import time
|
|
import uuid
|
|
import logging
|
|
import os
|
|
from prometheus_client import start_http_server, Info
|
|
|
|
from .. schema import ConfigPush, config_push_queue
|
|
from .. log_level import LogLevel
|
|
from . pubsub import get_pubsub, add_pubsub_args
|
|
from . producer import Producer
|
|
from . consumer import Consumer
|
|
from . metrics import ProcessorMetrics, ConsumerMetrics
|
|
from . logging import add_logging_args, setup_logging
|
|
|
|
default_config_queue = config_push_queue
|
|
|
|
# Module logger
|
|
logger = logging.getLogger(__name__)
|
|
|
|
# Async processor
|
|
class AsyncProcessor:
|
|
|
|
def __init__(self, **params):
|
|
|
|
# Store the identity
|
|
self.id = params.get("id")
|
|
|
|
# Create pub/sub backend via factory
|
|
self.pubsub_backend = get_pubsub(**params)
|
|
|
|
# Store pulsar_host for backward compatibility
|
|
self._pulsar_host = params.get("pulsar_host", "pulsar://pulsar:6650")
|
|
|
|
# Initialise metrics, records the parameters
|
|
ProcessorMetrics(processor = self.id).info({
|
|
k: str(params[k])
|
|
for k in params
|
|
if k != "id"
|
|
})
|
|
|
|
# The processor runs all activity in a taskgroup, it's mandatory
|
|
# that this is provded
|
|
self.taskgroup = params.get("taskgroup")
|
|
if self.taskgroup is None:
|
|
raise RuntimeError("Essential taskgroup missing")
|
|
|
|
# Get the configuration topic
|
|
self.config_push_queue = params.get(
|
|
"config_push_queue", default_config_queue
|
|
)
|
|
|
|
# This records registered configuration handlers
|
|
self.config_handlers = []
|
|
|
|
# Create a random ID for this subscription to the configuration
|
|
# service
|
|
config_subscriber_id = str(uuid.uuid4())
|
|
|
|
config_consumer_metrics = ConsumerMetrics(
|
|
processor = self.id, flow = None, name = "config",
|
|
)
|
|
|
|
# Subscribe to config queue — exclusive so every processor
|
|
# gets its own copy of config pushes (broadcast pattern)
|
|
self.config_sub_task = Consumer(
|
|
|
|
taskgroup = self.taskgroup,
|
|
backend = self.pubsub_backend,
|
|
subscriber = config_subscriber_id,
|
|
flow = None,
|
|
|
|
topic = self.config_push_queue,
|
|
schema = ConfigPush,
|
|
|
|
handler = self.on_config_change,
|
|
|
|
metrics = config_consumer_metrics,
|
|
|
|
start_of_messages = True,
|
|
consumer_type = 'exclusive',
|
|
)
|
|
|
|
self.running = True
|
|
|
|
# This is called to start dynamic behaviour. An over-ride point for
|
|
# extra functionality
|
|
async def start(self):
|
|
await self.config_sub_task.start()
|
|
|
|
# This is called to stop all threads. An over-ride point for extra
|
|
# functionality
|
|
def stop(self):
|
|
self.pubsub_backend.close()
|
|
self.running = False
|
|
|
|
# Returns the pub/sub backend (new interface)
|
|
@property
|
|
def pubsub(self): return self.pubsub_backend
|
|
|
|
# Returns the pulsar host (backward compatibility)
|
|
@property
|
|
def pulsar_host(self): return self._pulsar_host
|
|
|
|
# Register a new event handler for configuration change
|
|
def register_config_handler(self, handler):
|
|
self.config_handlers.append(handler)
|
|
|
|
# Called when a new configuration message push occurs
|
|
async def on_config_change(self, message, consumer, flow):
|
|
|
|
# Get configuration data and version number
|
|
config = message.value().config
|
|
version = message.value().version
|
|
|
|
# Invoke message handlers
|
|
logger.info(f"Config change event: version={version}")
|
|
for ch in self.config_handlers:
|
|
await ch(config, version)
|
|
|
|
# This is the 'main' body of the handler. It is a point to override
|
|
# if needed. By default does nothing. Processors are implemented
|
|
# by adding consumer/producer functionality so maybe nothing is needed
|
|
# in the run() body
|
|
async def run(self):
|
|
while self.running:
|
|
await asyncio.sleep(2)
|
|
|
|
# Startup fabric. This runs in 'async' mode, creates a taskgroup and
|
|
# runs the producer.
|
|
@classmethod
|
|
async def launch_async(cls, args):
|
|
|
|
try:
|
|
|
|
# Create a taskgroup. This seems complicated, when an exception
|
|
# occurs, unhandled it looks like it cancels all threads in the
|
|
# taskgroup. Needs the exception to be caught in the right
|
|
# place.
|
|
async with asyncio.TaskGroup() as tg:
|
|
|
|
|
|
# Create a processor instance, and include the taskgroup
|
|
# as a paramter. A processor identity ident is used as
|
|
# - subscriber name
|
|
# - an identifier for flow configuration
|
|
p = cls(**args | { "taskgroup": tg })
|
|
|
|
# Start the processor
|
|
await p.start()
|
|
|
|
# Run the processor
|
|
task = tg.create_task(p.run())
|
|
|
|
# The taskgroup causes everything to wait until
|
|
# all threads have stopped
|
|
|
|
# This is here to output a debug message, shouldn't be needed.
|
|
except Exception as e:
|
|
logger.error("Exception, closing taskgroup", exc_info=True)
|
|
raise e
|
|
|
|
@classmethod
|
|
def setup_logging(cls, args):
|
|
"""Configure logging for the entire application"""
|
|
setup_logging(args)
|
|
|
|
# Startup fabric. launch calls launch_async in async mode.
|
|
@classmethod
|
|
def launch(cls, ident, doc):
|
|
|
|
# Start assembling CLI arguments
|
|
parser = argparse.ArgumentParser(
|
|
prog=ident,
|
|
description=doc
|
|
)
|
|
|
|
parser.add_argument(
|
|
'--id',
|
|
default=ident,
|
|
help=f'Configuration identity (default: {ident})',
|
|
)
|
|
|
|
# Invoke the class-specific add_args, which manages adding all the
|
|
# command-line arguments
|
|
cls.add_args(parser)
|
|
|
|
# Parse arguments
|
|
args = parser.parse_args()
|
|
args = vars(args)
|
|
|
|
# Setup logging before anything else
|
|
cls.setup_logging(args)
|
|
|
|
# Debug
|
|
logger.debug(f"Arguments: {args}")
|
|
|
|
# Start the Prometheus metrics service if needed
|
|
if args["metrics"]:
|
|
start_http_server(args["metrics_port"])
|
|
|
|
# Loop forever, exception handler
|
|
while True:
|
|
|
|
logger.info("Starting...")
|
|
|
|
try:
|
|
|
|
# Launch the processor in an asyncio handler
|
|
asyncio.run(cls.launch_async(
|
|
args
|
|
))
|
|
|
|
except KeyboardInterrupt:
|
|
logger.info("Keyboard interrupt.")
|
|
return
|
|
|
|
except KeyboardInterrupt:
|
|
logger.info("Interrupted.")
|
|
return
|
|
|
|
# Exceptions from a taskgroup come in as an exception group
|
|
except ExceptionGroup as e:
|
|
|
|
logger.error("Exception group:")
|
|
|
|
for se in e.exceptions:
|
|
logger.error(f" Type: {type(se)}")
|
|
logger.error(f" Exception: {se}", exc_info=se)
|
|
|
|
except Exception as e:
|
|
logger.error(f"Type: {type(e)}")
|
|
logger.error(f"Exception: {e}", exc_info=True)
|
|
|
|
# Retry occurs here
|
|
logger.warning("Will retry...")
|
|
time.sleep(4)
|
|
logger.info("Retrying...")
|
|
|
|
# The command-line arguments are built using a stack of add_args
|
|
# invocations
|
|
@staticmethod
|
|
def add_args(parser):
|
|
|
|
add_pubsub_args(parser)
|
|
add_logging_args(parser)
|
|
|
|
parser.add_argument(
|
|
'--config-push-queue',
|
|
default=default_config_queue,
|
|
help=f'Config push queue (default: {default_config_queue})',
|
|
)
|
|
|
|
parser.add_argument(
|
|
'--metrics',
|
|
action=argparse.BooleanOptionalAction,
|
|
default=True,
|
|
help=f'Metrics enabled (default: true)',
|
|
)
|
|
|
|
parser.add_argument(
|
|
'-P', '--metrics-port',
|
|
type=int,
|
|
default=8000,
|
|
help=f'Pulsar host (default: 8000)',
|
|
)
|
|
|