Pub/sub abstraction: decouple from Pulsar (#751)

Remove Pulsar-specific concepts from application code so that the pub/sub backend is swappable via configuration. Rename translators: - to_pulsar/from_pulsar → decode/encode across all translator classes, dispatch handlers, and tests (55+ files) - from_response_with_completion → encode_with_completion - Remove pulsar.schema.Record from translator base class Queue naming (CLASS:TOPICSPACE:TOPIC): - Replace topic() helper with queue() using new format: flow:tg:name, request:tg:name, response:tg:name, state:tg:name - Queue class implies persistence/TTL (no QoS in names) - Update Pulsar backend map_topic() to parse new format - Librarian queues use flow class (persistent, for chunking) - Config push uses state class (persistent, last-value) - Remove 15 dead topic imports from schema files - Update init_trustgraph.py namespace: config → state Confine Pulsar to pulsar_backend.py: - Delete legacy PulsarClient class from pubsub.py - Move add_args to add_pubsub_args() with standalone flag for CLI tools (defaults to localhost) - PulsarBackendConsumer.receive() catches _pulsar.Timeout, raises standard TimeoutError - Remove Pulsar imports from: async_processor, flow_processor, log_level, all 11 client files, 4 storage writers, gateway service, gateway config receiver - Remove log_level/LoggerLevel from client API - Rewrite tg-monitor-prompts to use backend abstraction - Update tg-dump-queues to use add_pubsub_args Also: pubsub-abstraction.md tech spec covering problem statement, design goals, as-is requirements, candidate broker assessment, approach, and implementation order.
2026-06-15 01:35:13 +02:00 · 2026-04-01 20:16:53 +01:00 · 2026-04-01 20:16:53 +01:00 · 4fb0b4d8e8
commit 4fb0b4d8e8
parent dbf8daa74a
106 changed files with 1269 additions and 788 deletions
--- a/trustgraph-cli/trustgraph/cli/dump_queues.py
+++ b/trustgraph-cli/trustgraph/cli/dump_queues.py
@ -8,8 +8,6 @@ message flows, diagnosing stuck services, and understanding system behavior.
 Uses TrustGraph's Subscriber abstraction for future-proof pub/sub compatibility.
 """

-import pulsar
-from pulsar.schema import BytesSchema
 import sys
 import json
 import asyncio
@ -17,7 +15,7 @@ from datetime import datetime
 import argparse

 from trustgraph.base.subscriber import Subscriber
-from trustgraph.base.pubsub import get_pubsub
+from trustgraph.base.pubsub import get_pubsub, add_pubsub_args

 def decode_json_strings(obj):
    """Recursively decode JSON-encoded string values within a dict/list."""
@ -172,15 +170,13 @@ async def log_writer(central_queue, file_handle, shutdown_event, console_output=
                break


-async def async_main(queues, output_file, pulsar_host, listener_name, subscriber_name, append_mode):
+async def async_main(queues, output_file, subscriber_name, append_mode, **pubsub_config):
    """
    Main async function to monitor multiple queues concurrently.

    Args:
        queues: List of queue names to monitor
        output_file: Path to output file
-        pulsar_host: Pulsar connection URL
-        listener_name: Pulsar listener name
        subscriber_name: Base name for subscribers
        append_mode: Whether to append to existing file
    """
@ -194,9 +190,9 @@ async def async_main(queues, output_file, pulsar_host, listener_name, subscriber

    # Create backend connection
    try:
-        backend = get_pubsub(pulsar_host=pulsar_host, pulsar_listener=listener_name, pubsub_backend='pulsar')
+        backend = get_pubsub(**pubsub_config)
    except Exception as e:
-        print(f"Error connecting to backend at {pulsar_host}: {e}", file=sys.stderr)
+        print(f"Error connecting to backend: {e}", file=sys.stderr)
        sys.exit(1)

    # Create Subscribers and central queue
@ -291,25 +287,20 @@ def main():
        description='Monitor and dump messages from multiple Pulsar queues',
        epilog="""
 Examples:
-  # Monitor agent and prompt queues
-  tg-dump-queues non-persistent://tg/request/agent:default \\
-                 non-persistent://tg/request/prompt:default
+  # Monitor agent and prompt flow queues
+  tg-dump-queues flow:tg:agent-request:default \\
+                 flow:tg:prompt-request:default

  # Monitor with custom output file
-  tg-dump-queues non-persistent://tg/request/agent:default \\
+  tg-dump-queues flow:tg:agent-request:default \\
                 --output debug.log

  # Append to existing log file
-  tg-dump-queues non-persistent://tg/request/agent:default \\
+  tg-dump-queues flow:tg:agent-request:default \\
                 --output queue.log --append

-Common queue patterns:
-  - Agent requests:   non-persistent://tg/request/agent:default
-  - Agent responses:  non-persistent://tg/response/agent:default
-  - Prompt requests:  non-persistent://tg/request/prompt:default
-  - Prompt responses: non-persistent://tg/response/prompt:default
-  - LLM requests:     non-persistent://tg/request/text-completion:default
-  - LLM responses:    non-persistent://tg/response/text-completion:default
+  # Raw Pulsar URIs also accepted
+  tg-dump-queues persistent://tg/flow/agent-request:default

 IMPORTANT:
  This tool subscribes to queues without a schema (schema-less mode). To avoid
@ -340,17 +331,7 @@ IMPORTANT:
        help='Append to output file instead of overwriting'
    )

-    parser.add_argument(
-        '--pulsar-host',
-        default='pulsar://localhost:6650',
-        help='Pulsar host URL (default: pulsar://localhost:6650)'
-    )
-
-    parser.add_argument(
-        '--listener-name',
-        default='localhost',
-        help='Pulsar listener name (default: localhost)'
-    )
+    add_pubsub_args(parser, standalone=True)

    parser.add_argument(
        '--subscriber',
@ -371,10 +352,12 @@ IMPORTANT:
        asyncio.run(async_main(
            queues=queues,
            output_file=args.output,
-            pulsar_host=args.pulsar_host,
-            listener_name=args.listener_name,
            subscriber_name=args.subscriber,
-            append_mode=args.append
+            append_mode=args.append,
+            pubsub_backend=args.pubsub_backend,
+            pulsar_host=args.pulsar_host,
+            pulsar_api_key=args.pulsar_api_key,
+            pulsar_listener=args.pulsar_listener,
        ))
    except KeyboardInterrupt:
        # Already handled in async_main
--- a/trustgraph-cli/trustgraph/cli/init_trustgraph.py
+++ b/trustgraph-cli/trustgraph/cli/init_trustgraph.py
@ -137,7 +137,7 @@ def init(
        }
    })

-    ensure_namespace(pulsar_admin_url, tenant, "config", {
+    ensure_namespace(pulsar_admin_url, tenant, "state", {
        "retention_policies": {
            "retentionSizeInMB": 10,
            "retentionTimeInMinutes": -1,
--- a/trustgraph-cli/trustgraph/cli/monitor_prompts.py
+++ b/trustgraph-cli/trustgraph/cli/monitor_prompts.py
@ -1,7 +1,7 @@
 """
 Monitor prompt request/response queues and log activity with timing.

-Subscribes to prompt request and response Pulsar queues, correlates
+Subscribes to prompt request and response queues, correlates
 them by message ID, and logs a summary of each request/response with
 elapsed time. Streaming responses are accumulated and shown once at
 completion.
@ -19,8 +19,7 @@ import argparse
 from datetime import datetime
 from collections import OrderedDict

-import pulsar
-from pulsar.schema import BytesSchema
+from trustgraph.base.pubsub import get_pubsub, add_pubsub_args


 default_flow = "default"
@ -85,7 +84,7 @@ def format_terms(terms, max_lines, max_width):


 def parse_raw_message(msg):
-    """Parse a raw Pulsar message into (correlation_id, body_dict)."""
+    """Parse a raw message into (correlation_id, body_dict)."""
    try:
        props = msg.properties()
        corr_id = props.get("id", "")
@ -94,53 +93,46 @@ def parse_raw_message(msg):

    try:
        value = msg.value()
-        if isinstance(value, bytes):
-            value = value.decode("utf-8")
-        body = json.loads(value) if isinstance(value, str) else {}
+        if isinstance(value, dict):
+            body = value
+        elif isinstance(value, bytes):
+            body = json.loads(value.decode("utf-8"))
+        elif isinstance(value, str):
+            body = json.loads(value)
+        else:
+            body = {}
    except Exception:
        body = {}

    return corr_id, body


-def receive_with_timeout(consumer, timeout_ms=500):
-    """Receive a message with timeout, returning None on timeout."""
-    try:
-        return consumer.receive(timeout_millis=timeout_ms)
-    except Exception:
-        return None
+async def monitor(flow, queue_type, max_lines, max_width, **config):

-
-async def monitor(flow, queue_type, max_lines, max_width,
-                  pulsar_host, listener_name):
-
-    request_queue = f"non-persistent://tg/request/{queue_type}:{flow}"
-    response_queue = f"non-persistent://tg/response/{queue_type}:{flow}"
+    request_queue = f"request:tg:{queue_type}:{flow}"
+    response_queue = f"response:tg:{queue_type}:{flow}"

    print(f"Monitoring prompt queues:")
    print(f"  Request:  {request_queue}")
    print(f"  Response: {response_queue}")
    print(f"Press Ctrl+C to stop\n")

-    client = pulsar.Client(
-        pulsar_host,
-        listener_name=listener_name,
+    backend = get_pubsub(**config)
+
+    req_consumer = backend.create_consumer(
+        topic=request_queue,
+        subscription="prompt-monitor-req",
+        schema=None,
+        consumer_type='shared',
+        initial_position='latest',
    )

-    req_consumer = client.subscribe(
-        request_queue,
-        subscription_name="prompt-monitor-req",
-        consumer_type=pulsar.ConsumerType.Shared,
-        schema=BytesSchema(),
-        initial_position=pulsar.InitialPosition.Latest,
-    )
-
-    resp_consumer = client.subscribe(
-        response_queue,
-        subscription_name="prompt-monitor-resp",
-        consumer_type=pulsar.ConsumerType.Shared,
-        schema=BytesSchema(),
-        initial_position=pulsar.InitialPosition.Latest,
+    resp_consumer = backend.create_consumer(
+        topic=response_queue,
+        subscription="prompt-monitor-resp",
+        schema=None,
+        consumer_type='shared',
+        initial_position='latest',
    )

    # Track in-flight requests: corr_id -> (timestamp, template_id)
@ -156,8 +148,8 @@ async def monitor(flow, queue_type, max_lines, max_width,
            got_message = False

            # Poll request queue
-            msg = receive_with_timeout(req_consumer, 100)
-            if msg:
+            try:
+                msg = req_consumer.receive(timeout_millis=100)
                got_message = True
                timestamp = datetime.now()
                corr_id, body = parse_raw_message(msg)
@ -182,10 +174,12 @@ async def monitor(flow, queue_type, max_lines, max_width,
                    print(format_terms(terms, max_lines, max_width))

                req_consumer.acknowledge(msg)
+            except TimeoutError:
+                pass

            # Poll response queue
-            msg = receive_with_timeout(resp_consumer, 100)
-            if msg:
+            try:
+                msg = resp_consumer.receive(timeout_millis=100)
                got_message = True
                timestamp = datetime.now()
                corr_id, body = parse_raw_message(msg)
@ -265,6 +259,8 @@ async def monitor(flow, queue_type, max_lines, max_width,
                        print(f"  {truncated}")

                resp_consumer.acknowledge(msg)
+            except TimeoutError:
+                pass

            if not got_message:
                await asyncio.sleep(0.05)
@ -274,7 +270,7 @@ async def monitor(flow, queue_type, max_lines, max_width,
    finally:
        req_consumer.close()
        resp_consumer.close()
-        client.close()
+        backend.close()


 def main():
@ -310,17 +306,7 @@ def main():
        help=f"Max width per line (default: {default_max_width})",
    )

-    parser.add_argument(
-        "--pulsar-host",
-        default="pulsar://localhost:6650",
-        help="Pulsar host URL (default: pulsar://localhost:6650)",
-    )
-
-    parser.add_argument(
-        "--listener-name",
-        default="localhost",
-        help="Pulsar listener name (default: localhost)",
-    )
+    add_pubsub_args(parser, standalone=True)

    args = parser.parse_args()

@ -331,7 +317,9 @@ def main():
            max_lines=args.max_lines,
            max_width=args.max_width,
            pulsar_host=args.pulsar_host,
-            listener_name=args.listener_name,
+            pulsar_api_key=args.pulsar_api_key,
+            pulsar_listener=args.pulsar_listener,
+            pubsub_backend=args.pubsub_backend,
        ))
    except KeyboardInterrupt:
        pass