Pub/sub abstraction: decouple from Pulsar (#751)

Remove Pulsar-specific concepts from application code so that
the pub/sub backend is swappable via configuration.

Rename translators:
- to_pulsar/from_pulsar → decode/encode across all translator
  classes, dispatch handlers, and tests (55+ files)
- from_response_with_completion → encode_with_completion
- Remove pulsar.schema.Record from translator base class

Queue naming (CLASS:TOPICSPACE:TOPIC):
- Replace topic() helper with queue() using new format:
  flow:tg:name, request:tg:name, response:tg:name, state:tg:name
- Queue class implies persistence/TTL (no QoS in names)
- Update Pulsar backend map_topic() to parse new format
- Librarian queues use flow class (persistent, for chunking)
- Config push uses state class (persistent, last-value)
- Remove 15 dead topic imports from schema files
- Update init_trustgraph.py namespace: config → state

Confine Pulsar to pulsar_backend.py:
- Delete legacy PulsarClient class from pubsub.py
- Move add_args to add_pubsub_args() with standalone flag
  for CLI tools (defaults to localhost)
- PulsarBackendConsumer.receive() catches _pulsar.Timeout,
  raises standard TimeoutError
- Remove Pulsar imports from: async_processor, flow_processor,
  log_level, all 11 client files, 4 storage writers, gateway
  service, gateway config receiver
- Remove log_level/LoggerLevel from client API
- Rewrite tg-monitor-prompts to use backend abstraction
- Update tg-dump-queues to use add_pubsub_args

Also: pubsub-abstraction.md tech spec covering problem statement,
design goals, as-is requirements, candidate broker assessment,
approach, and implementation order.
This commit is contained in:
cybermaggedon 2026-04-01 20:16:53 +01:00 committed by GitHub
parent dbf8daa74a
commit 4fb0b4d8e8
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
106 changed files with 1269 additions and 788 deletions

View file

@ -8,8 +8,6 @@ message flows, diagnosing stuck services, and understanding system behavior.
Uses TrustGraph's Subscriber abstraction for future-proof pub/sub compatibility.
"""
import pulsar
from pulsar.schema import BytesSchema
import sys
import json
import asyncio
@ -17,7 +15,7 @@ from datetime import datetime
import argparse
from trustgraph.base.subscriber import Subscriber
from trustgraph.base.pubsub import get_pubsub
from trustgraph.base.pubsub import get_pubsub, add_pubsub_args
def decode_json_strings(obj):
"""Recursively decode JSON-encoded string values within a dict/list."""
@ -172,15 +170,13 @@ async def log_writer(central_queue, file_handle, shutdown_event, console_output=
break
async def async_main(queues, output_file, pulsar_host, listener_name, subscriber_name, append_mode):
async def async_main(queues, output_file, subscriber_name, append_mode, **pubsub_config):
"""
Main async function to monitor multiple queues concurrently.
Args:
queues: List of queue names to monitor
output_file: Path to output file
pulsar_host: Pulsar connection URL
listener_name: Pulsar listener name
subscriber_name: Base name for subscribers
append_mode: Whether to append to existing file
"""
@ -194,9 +190,9 @@ async def async_main(queues, output_file, pulsar_host, listener_name, subscriber
# Create backend connection
try:
backend = get_pubsub(pulsar_host=pulsar_host, pulsar_listener=listener_name, pubsub_backend='pulsar')
backend = get_pubsub(**pubsub_config)
except Exception as e:
print(f"Error connecting to backend at {pulsar_host}: {e}", file=sys.stderr)
print(f"Error connecting to backend: {e}", file=sys.stderr)
sys.exit(1)
# Create Subscribers and central queue
@ -291,25 +287,20 @@ def main():
description='Monitor and dump messages from multiple Pulsar queues',
epilog="""
Examples:
# Monitor agent and prompt queues
tg-dump-queues non-persistent://tg/request/agent:default \\
non-persistent://tg/request/prompt:default
# Monitor agent and prompt flow queues
tg-dump-queues flow:tg:agent-request:default \\
flow:tg:prompt-request:default
# Monitor with custom output file
tg-dump-queues non-persistent://tg/request/agent:default \\
tg-dump-queues flow:tg:agent-request:default \\
--output debug.log
# Append to existing log file
tg-dump-queues non-persistent://tg/request/agent:default \\
tg-dump-queues flow:tg:agent-request:default \\
--output queue.log --append
Common queue patterns:
- Agent requests: non-persistent://tg/request/agent:default
- Agent responses: non-persistent://tg/response/agent:default
- Prompt requests: non-persistent://tg/request/prompt:default
- Prompt responses: non-persistent://tg/response/prompt:default
- LLM requests: non-persistent://tg/request/text-completion:default
- LLM responses: non-persistent://tg/response/text-completion:default
# Raw Pulsar URIs also accepted
tg-dump-queues persistent://tg/flow/agent-request:default
IMPORTANT:
This tool subscribes to queues without a schema (schema-less mode). To avoid
@ -340,17 +331,7 @@ IMPORTANT:
help='Append to output file instead of overwriting'
)
parser.add_argument(
'--pulsar-host',
default='pulsar://localhost:6650',
help='Pulsar host URL (default: pulsar://localhost:6650)'
)
parser.add_argument(
'--listener-name',
default='localhost',
help='Pulsar listener name (default: localhost)'
)
add_pubsub_args(parser, standalone=True)
parser.add_argument(
'--subscriber',
@ -371,10 +352,12 @@ IMPORTANT:
asyncio.run(async_main(
queues=queues,
output_file=args.output,
pulsar_host=args.pulsar_host,
listener_name=args.listener_name,
subscriber_name=args.subscriber,
append_mode=args.append
append_mode=args.append,
pubsub_backend=args.pubsub_backend,
pulsar_host=args.pulsar_host,
pulsar_api_key=args.pulsar_api_key,
pulsar_listener=args.pulsar_listener,
))
except KeyboardInterrupt:
# Already handled in async_main

View file

@ -137,7 +137,7 @@ def init(
}
})
ensure_namespace(pulsar_admin_url, tenant, "config", {
ensure_namespace(pulsar_admin_url, tenant, "state", {
"retention_policies": {
"retentionSizeInMB": 10,
"retentionTimeInMinutes": -1,

View file

@ -1,7 +1,7 @@
"""
Monitor prompt request/response queues and log activity with timing.
Subscribes to prompt request and response Pulsar queues, correlates
Subscribes to prompt request and response queues, correlates
them by message ID, and logs a summary of each request/response with
elapsed time. Streaming responses are accumulated and shown once at
completion.
@ -19,8 +19,7 @@ import argparse
from datetime import datetime
from collections import OrderedDict
import pulsar
from pulsar.schema import BytesSchema
from trustgraph.base.pubsub import get_pubsub, add_pubsub_args
default_flow = "default"
@ -85,7 +84,7 @@ def format_terms(terms, max_lines, max_width):
def parse_raw_message(msg):
"""Parse a raw Pulsar message into (correlation_id, body_dict)."""
"""Parse a raw message into (correlation_id, body_dict)."""
try:
props = msg.properties()
corr_id = props.get("id", "")
@ -94,53 +93,46 @@ def parse_raw_message(msg):
try:
value = msg.value()
if isinstance(value, bytes):
value = value.decode("utf-8")
body = json.loads(value) if isinstance(value, str) else {}
if isinstance(value, dict):
body = value
elif isinstance(value, bytes):
body = json.loads(value.decode("utf-8"))
elif isinstance(value, str):
body = json.loads(value)
else:
body = {}
except Exception:
body = {}
return corr_id, body
def receive_with_timeout(consumer, timeout_ms=500):
"""Receive a message with timeout, returning None on timeout."""
try:
return consumer.receive(timeout_millis=timeout_ms)
except Exception:
return None
async def monitor(flow, queue_type, max_lines, max_width, **config):
async def monitor(flow, queue_type, max_lines, max_width,
pulsar_host, listener_name):
request_queue = f"non-persistent://tg/request/{queue_type}:{flow}"
response_queue = f"non-persistent://tg/response/{queue_type}:{flow}"
request_queue = f"request:tg:{queue_type}:{flow}"
response_queue = f"response:tg:{queue_type}:{flow}"
print(f"Monitoring prompt queues:")
print(f" Request: {request_queue}")
print(f" Response: {response_queue}")
print(f"Press Ctrl+C to stop\n")
client = pulsar.Client(
pulsar_host,
listener_name=listener_name,
backend = get_pubsub(**config)
req_consumer = backend.create_consumer(
topic=request_queue,
subscription="prompt-monitor-req",
schema=None,
consumer_type='shared',
initial_position='latest',
)
req_consumer = client.subscribe(
request_queue,
subscription_name="prompt-monitor-req",
consumer_type=pulsar.ConsumerType.Shared,
schema=BytesSchema(),
initial_position=pulsar.InitialPosition.Latest,
)
resp_consumer = client.subscribe(
response_queue,
subscription_name="prompt-monitor-resp",
consumer_type=pulsar.ConsumerType.Shared,
schema=BytesSchema(),
initial_position=pulsar.InitialPosition.Latest,
resp_consumer = backend.create_consumer(
topic=response_queue,
subscription="prompt-monitor-resp",
schema=None,
consumer_type='shared',
initial_position='latest',
)
# Track in-flight requests: corr_id -> (timestamp, template_id)
@ -156,8 +148,8 @@ async def monitor(flow, queue_type, max_lines, max_width,
got_message = False
# Poll request queue
msg = receive_with_timeout(req_consumer, 100)
if msg:
try:
msg = req_consumer.receive(timeout_millis=100)
got_message = True
timestamp = datetime.now()
corr_id, body = parse_raw_message(msg)
@ -182,10 +174,12 @@ async def monitor(flow, queue_type, max_lines, max_width,
print(format_terms(terms, max_lines, max_width))
req_consumer.acknowledge(msg)
except TimeoutError:
pass
# Poll response queue
msg = receive_with_timeout(resp_consumer, 100)
if msg:
try:
msg = resp_consumer.receive(timeout_millis=100)
got_message = True
timestamp = datetime.now()
corr_id, body = parse_raw_message(msg)
@ -265,6 +259,8 @@ async def monitor(flow, queue_type, max_lines, max_width,
print(f" {truncated}")
resp_consumer.acknowledge(msg)
except TimeoutError:
pass
if not got_message:
await asyncio.sleep(0.05)
@ -274,7 +270,7 @@ async def monitor(flow, queue_type, max_lines, max_width,
finally:
req_consumer.close()
resp_consumer.close()
client.close()
backend.close()
def main():
@ -310,17 +306,7 @@ def main():
help=f"Max width per line (default: {default_max_width})",
)
parser.add_argument(
"--pulsar-host",
default="pulsar://localhost:6650",
help="Pulsar host URL (default: pulsar://localhost:6650)",
)
parser.add_argument(
"--listener-name",
default="localhost",
help="Pulsar listener name (default: localhost)",
)
add_pubsub_args(parser, standalone=True)
args = parser.parse_args()
@ -331,7 +317,9 @@ def main():
max_lines=args.max_lines,
max_width=args.max_width,
pulsar_host=args.pulsar_host,
listener_name=args.listener_name,
pulsar_api_key=args.pulsar_api_key,
pulsar_listener=args.pulsar_listener,
pubsub_backend=args.pubsub_backend,
))
except KeyboardInterrupt:
pass