mirror of
https://github.com/trustgraph-ai/trustgraph.git
synced 2026-04-26 00:46:22 +02:00
RabbitMQ pub/sub backend with topic exchange architecture (#752)
Adds a RabbitMQ backend as an alternative to Pulsar, selectable via PUBSUB_BACKEND=rabbitmq. Both backends implement the same PubSubBackend protocol — no application code changes needed to switch. RabbitMQ topology: - Single topic exchange per topicspace (e.g. 'tg') - Routing key derived from queue class and topic name - Shared consumers: named queue bound to exchange (competing, round-robin) - Exclusive consumers: anonymous auto-delete queue (broadcast, each gets every message). Used by Subscriber and config push consumer. - Thread-local producer connections (pika is not thread-safe) - Push-based consumption via basic_consume with process_data_events for heartbeat processing Consumer model changes: - Consumer class creates one backend consumer per concurrent task (required for pika thread safety, harmless for Pulsar) - Consumer class accepts consumer_type parameter - Subscriber passes consumer_type='exclusive' for broadcast semantics - Config push consumer uses consumer_type='exclusive' so every processor instance receives config updates - handle_one_from_queue receives consumer as parameter for correct per-connection ack/nack LibrarianClient: - New shared client class replacing duplicated librarian request-response code across 6+ services (chunking, decoders, RAG, etc.) - Uses stream-document instead of get-document-content for fetching document content in 1MB chunks (avoids broker message size limits) - Standalone object (self.librarian = LibrarianClient(...)) not a mixin - get-document-content marked deprecated in schema and OpenAPI spec Serialisation: - Extracted dataclass_to_dict/dict_to_dataclass to shared serialization.py (used by both Pulsar and RabbitMQ backends) Librarian queues: - Changed from flow class (persistent) back to request/response class now that stream-document eliminates large single messages - API upload chunk size reduced from 5MB to 3MB to stay under broker limits after base64 encoding Factory and CLI: - get_pubsub() handles 'rabbitmq' backend with RabbitMQ connection params - add_pubsub_args() includes RabbitMQ options (host, port, credentials) - add_pubsub_args(standalone=True) defaults to localhost for CLI tools - init_trustgraph skips Pulsar admin setup for non-Pulsar backends - tg-dump-queues and tg-monitor-prompts use backend abstraction - BaseClient and ConfigClient accept generic pubsub config
This commit is contained in:
parent
4fb0b4d8e8
commit
24f0190ce7
36 changed files with 1277 additions and 1313 deletions
|
|
@ -354,10 +354,8 @@ IMPORTANT:
|
|||
output_file=args.output,
|
||||
subscriber_name=args.subscriber,
|
||||
append_mode=args.append,
|
||||
pubsub_backend=args.pubsub_backend,
|
||||
pulsar_host=args.pulsar_host,
|
||||
pulsar_api_key=args.pulsar_api_key,
|
||||
pulsar_listener=args.pulsar_listener,
|
||||
**{k: v for k, v in vars(args).items()
|
||||
if k not in ('queues', 'output', 'subscriber', 'append')},
|
||||
))
|
||||
except KeyboardInterrupt:
|
||||
# Already handled in async_main
|
||||
|
|
|
|||
|
|
@ -1,5 +1,8 @@
|
|||
"""
|
||||
Initialises Pulsar with Trustgraph tenant / namespaces & policy.
|
||||
Initialises TrustGraph pub/sub infrastructure and pushes initial config.
|
||||
|
||||
For Pulsar: creates tenant, namespaces, and retention policies.
|
||||
For RabbitMQ: queues are auto-declared, so only config push is needed.
|
||||
"""
|
||||
|
||||
import requests
|
||||
|
|
@ -8,10 +11,11 @@ import argparse
|
|||
import json
|
||||
|
||||
from trustgraph.clients.config_client import ConfigClient
|
||||
from trustgraph.base.pubsub import add_pubsub_args
|
||||
|
||||
default_pulsar_admin_url = "http://pulsar:8080"
|
||||
default_pulsar_host = "pulsar://pulsar:6650"
|
||||
subscriber = "tg-init-pulsar"
|
||||
subscriber = "tg-init-pubsub"
|
||||
|
||||
|
||||
def get_clusters(url):
|
||||
|
||||
|
|
@ -65,12 +69,11 @@ def ensure_namespace(url, tenant, namespace, config):
|
|||
|
||||
print(f"Namespace {tenant}/{namespace} created.", flush=True)
|
||||
|
||||
def ensure_config(config, pulsar_host, pulsar_api_key):
|
||||
def ensure_config(config, **pubsub_config):
|
||||
|
||||
cli = ConfigClient(
|
||||
subscriber=subscriber,
|
||||
pulsar_host=pulsar_host,
|
||||
pulsar_api_key=pulsar_api_key,
|
||||
**pubsub_config,
|
||||
)
|
||||
|
||||
while True:
|
||||
|
|
@ -115,11 +118,9 @@ def ensure_config(config, pulsar_host, pulsar_api_key):
|
|||
time.sleep(2)
|
||||
print("Retrying...", flush=True)
|
||||
continue
|
||||
|
||||
def init(
|
||||
pulsar_admin_url, pulsar_host, pulsar_api_key, tenant,
|
||||
config, config_file,
|
||||
):
|
||||
|
||||
def init_pulsar(pulsar_admin_url, tenant):
|
||||
"""Pulsar-specific setup: create tenant, namespaces, retention policies."""
|
||||
|
||||
clusters = get_clusters(pulsar_admin_url)
|
||||
|
||||
|
|
@ -145,17 +146,21 @@ def init(
|
|||
}
|
||||
})
|
||||
|
||||
if config is not None:
|
||||
|
||||
def push_config(config_json, config_file, **pubsub_config):
|
||||
"""Push initial config if provided."""
|
||||
|
||||
if config_json is not None:
|
||||
|
||||
try:
|
||||
print("Decoding config...", flush=True)
|
||||
dec = json.loads(config)
|
||||
dec = json.loads(config_json)
|
||||
print("Decoded.", flush=True)
|
||||
except Exception as e:
|
||||
print("Exception:", e, flush=True)
|
||||
raise e
|
||||
|
||||
ensure_config(dec, pulsar_host, pulsar_api_key)
|
||||
ensure_config(dec, **pubsub_config)
|
||||
|
||||
elif config_file is not None:
|
||||
|
||||
|
|
@ -167,11 +172,12 @@ def init(
|
|||
print("Exception:", e, flush=True)
|
||||
raise e
|
||||
|
||||
ensure_config(dec, pulsar_host, pulsar_api_key)
|
||||
ensure_config(dec, **pubsub_config)
|
||||
|
||||
else:
|
||||
print("No config to update.", flush=True)
|
||||
|
||||
|
||||
def main():
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
|
|
@ -180,22 +186,11 @@ def main():
|
|||
)
|
||||
|
||||
parser.add_argument(
|
||||
'-p', '--pulsar-admin-url',
|
||||
'--pulsar-admin-url',
|
||||
default=default_pulsar_admin_url,
|
||||
help=f'Pulsar admin URL (default: {default_pulsar_admin_url})',
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--pulsar-host',
|
||||
default=default_pulsar_host,
|
||||
help=f'Pulsar host (default: {default_pulsar_host})',
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--pulsar-api-key',
|
||||
help=f'Pulsar API key',
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'-c', '--config',
|
||||
help=f'Initial configuration to load',
|
||||
|
|
@ -212,18 +207,43 @@ def main():
|
|||
help=f'Tenant (default: tg)',
|
||||
)
|
||||
|
||||
add_pubsub_args(parser)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
backend_type = args.pubsub_backend
|
||||
|
||||
# Extract pubsub config from args
|
||||
pubsub_config = {
|
||||
k: v for k, v in vars(args).items()
|
||||
if k not in ('pulsar_admin_url', 'config', 'config_file', 'tenant')
|
||||
}
|
||||
|
||||
while True:
|
||||
|
||||
try:
|
||||
|
||||
print(flush=True)
|
||||
print(
|
||||
f"Initialising with Pulsar {args.pulsar_admin_url}...",
|
||||
flush=True
|
||||
# Pulsar-specific setup (tenants, namespaces)
|
||||
if backend_type == 'pulsar':
|
||||
print(flush=True)
|
||||
print(
|
||||
f"Initialising Pulsar at {args.pulsar_admin_url}...",
|
||||
flush=True,
|
||||
)
|
||||
init_pulsar(args.pulsar_admin_url, args.tenant)
|
||||
else:
|
||||
print(flush=True)
|
||||
print(
|
||||
f"Using {backend_type} backend (no admin setup needed).",
|
||||
flush=True,
|
||||
)
|
||||
|
||||
# Push config (works with any backend)
|
||||
push_config(
|
||||
args.config, args.config_file,
|
||||
**pubsub_config,
|
||||
)
|
||||
init(**vars(args))
|
||||
|
||||
print("Initialisation complete.", flush=True)
|
||||
break
|
||||
|
||||
|
|
@ -236,4 +256,4 @@ def main():
|
|||
print("Will retry...", flush=True)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
main()
|
||||
|
|
|
|||
|
|
@ -316,10 +316,8 @@ def main():
|
|||
queue_type=args.queue_type,
|
||||
max_lines=args.max_lines,
|
||||
max_width=args.max_width,
|
||||
pulsar_host=args.pulsar_host,
|
||||
pulsar_api_key=args.pulsar_api_key,
|
||||
pulsar_listener=args.pulsar_listener,
|
||||
pubsub_backend=args.pubsub_backend,
|
||||
**{k: v for k, v in vars(args).items()
|
||||
if k not in ('flow', 'queue_type', 'max_lines', 'max_width')},
|
||||
))
|
||||
except KeyboardInterrupt:
|
||||
pass
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue