trustgraph/trustgraph-cli/trustgraph/cli/monitor_prompts.py
cybermaggedon feeb92b33f
Refactor: Derive consumer behaviour from queue class (#772)
Derive consumer behaviour from queue class, remove
consumer_type parameter

The queue class prefix (flow, request, response, notify) now
fully determines consumer behaviour in both RabbitMQ and Pulsar
backends.  Added 'notify' class for ephemeral broadcast (config
push notifications).  Response and notify classes always create
per-subscriber auto-delete queues, eliminating orphaned queues
that accumulated on service restarts.

Change init-trustgraph to set up the 'notify' namespace in
Pulsar instead of old hangover 'state'.

Fixes 'stuck backlog' on RabbitMQ config notification queue.
2026-04-09 09:55:41 +01:00

328 lines
10 KiB
Python

"""
Monitor prompt request/response queues and log activity with timing.
Subscribes to prompt request and response queues, correlates
them by message ID, and logs a summary of each request/response with
elapsed time. Streaming responses are accumulated and shown once at
completion.
Examples:
tg-monitor-prompts
tg-monitor-prompts --flow default --max-lines 5
tg-monitor-prompts --queue-type prompt-rag
"""
import json
import asyncio
import sys
import argparse
from datetime import datetime
from collections import OrderedDict
from trustgraph.base.pubsub import get_pubsub, add_pubsub_args
default_flow = "default"
default_queue_type = "prompt-rag"
default_max_lines = 3
default_max_width = 80
def truncate_text(text, max_lines, max_width):
"""Truncate text to max_lines lines, each at most max_width chars."""
if not text:
return "(empty)"
lines = text.splitlines()
result = []
for line in lines[:max_lines]:
if len(line) > max_width:
result.append(line[:max_width - 3] + "...")
else:
result.append(line)
if len(lines) > max_lines:
result.append(f" ... ({len(lines) - max_lines} more lines)")
return "\n".join(result)
def summarise_value(value, max_width):
"""Summarise a term value — show type and size for large values."""
# Try to parse JSON
try:
parsed = json.loads(value)
except (json.JSONDecodeError, TypeError):
parsed = value
if isinstance(parsed, list):
return f"[{len(parsed)} items]"
elif isinstance(parsed, dict):
return f"{{{len(parsed)} keys}}"
elif isinstance(parsed, str):
if len(parsed) > max_width:
return parsed[:max_width - 3] + "..."
return parsed
else:
s = str(parsed)
if len(s) > max_width:
return s[:max_width - 3] + "..."
return s
def format_terms(terms, max_lines, max_width):
"""Format prompt terms for display — concise summary."""
if not terms:
return ""
parts = []
for key, value in terms.items():
summary = summarise_value(value, max_width - len(key) - 4)
parts.append(f" {key}: {summary}")
return "\n".join(parts)
def parse_raw_message(msg):
"""Parse a raw message into (correlation_id, body_dict)."""
try:
props = msg.properties()
corr_id = props.get("id", "")
except Exception:
corr_id = ""
try:
value = msg.value()
if isinstance(value, dict):
body = value
elif isinstance(value, bytes):
body = json.loads(value.decode("utf-8"))
elif isinstance(value, str):
body = json.loads(value)
else:
body = {}
except Exception:
body = {}
return corr_id, body
async def monitor(flow, queue_type, max_lines, max_width, **config):
request_queue = f"request:tg:{queue_type}:{flow}"
response_queue = f"response:tg:{queue_type}:{flow}"
print(f"Monitoring prompt queues:")
print(f" Request: {request_queue}")
print(f" Response: {response_queue}")
print(f"Press Ctrl+C to stop\n")
backend = get_pubsub(**config)
req_consumer = backend.create_consumer(
topic=request_queue,
subscription="prompt-monitor-req",
schema=None,
initial_position='latest',
)
resp_consumer = backend.create_consumer(
topic=response_queue,
subscription="prompt-monitor-resp",
schema=None,
initial_position='latest',
)
# Track in-flight requests: corr_id -> (timestamp, template_id)
in_flight = OrderedDict()
# Accumulate streaming responses: corr_id -> list of text chunks
streaming_chunks = {}
print("Listening...\n")
try:
while True:
got_message = False
# Poll request queue
try:
msg = req_consumer.receive(timeout_millis=100)
got_message = True
timestamp = datetime.now()
corr_id, body = parse_raw_message(msg)
time_str = timestamp.strftime("%H:%M:%S.%f")[:-3]
template_id = body.get("id", "(unknown)")
terms = body.get("terms", {})
streaming = body.get("streaming", False)
in_flight[corr_id] = (timestamp, template_id)
# Limit size
while len(in_flight) > 1000:
in_flight.popitem(last=False)
stream_flag = " [streaming]" if streaming else ""
id_display = corr_id[:8] if corr_id else "--------"
print(f"[{time_str}] REQ {id_display} "
f"template={template_id}{stream_flag}")
if terms:
print(format_terms(terms, max_lines, max_width))
req_consumer.acknowledge(msg)
except TimeoutError:
pass
# Poll response queue
try:
msg = resp_consumer.receive(timeout_millis=100)
got_message = True
timestamp = datetime.now()
corr_id, body = parse_raw_message(msg)
time_str = timestamp.strftime("%H:%M:%S.%f")[:-3]
id_display = corr_id[:8] if corr_id else "--------"
error = body.get("error")
text = body.get("text", "")
obj = body.get("object", "")
eos = body.get("end_of_stream", False)
if error:
# Error — show immediately
elapsed_str = ""
if corr_id in in_flight:
req_timestamp, _ = in_flight.pop(corr_id)
elapsed = (timestamp - req_timestamp).total_seconds()
elapsed_str = f" ({elapsed:.3f}s)"
streaming_chunks.pop(corr_id, None)
err_msg = error
if isinstance(error, dict):
err_msg = error.get("message", str(error))
print(f"[{time_str}] ERR {id_display} "
f"{err_msg}{elapsed_str}")
elif eos:
# End of stream — show accumulated text + timing
elapsed_str = ""
if corr_id in in_flight:
req_timestamp, _ = in_flight.pop(corr_id)
elapsed = (timestamp - req_timestamp).total_seconds()
elapsed_str = f" ({elapsed:.3f}s)"
accumulated = streaming_chunks.pop(corr_id, [])
if text:
accumulated.append(text)
full_text = "".join(accumulated)
if full_text:
truncated = truncate_text(
full_text, max_lines, max_width
)
print(f"[{time_str}] RESP {id_display}"
f"{elapsed_str}")
print(f" {truncated}")
else:
print(f"[{time_str}] RESP {id_display}"
f"{elapsed_str} (empty)")
elif text or obj:
# Streaming chunk or non-streaming response
if corr_id in streaming_chunks or (
corr_id in in_flight
):
# Accumulate streaming chunk
if corr_id not in streaming_chunks:
streaming_chunks[corr_id] = []
streaming_chunks[corr_id].append(text or obj)
else:
# Non-streaming single response
elapsed_str = ""
if corr_id in in_flight:
req_timestamp, _ = in_flight.pop(corr_id)
elapsed = (
timestamp - req_timestamp
).total_seconds()
elapsed_str = f" ({elapsed:.3f}s)"
content = text or obj
label = "" if text else " (object)"
truncated = truncate_text(
content, max_lines, max_width
)
print(f"[{time_str}] RESP {id_display}"
f"{label}{elapsed_str}")
print(f" {truncated}")
resp_consumer.acknowledge(msg)
except TimeoutError:
pass
if not got_message:
await asyncio.sleep(0.05)
except KeyboardInterrupt:
print("\nStopping...")
finally:
req_consumer.close()
resp_consumer.close()
backend.close()
def main():
parser = argparse.ArgumentParser(
prog="tg-monitor-prompts",
description=__doc__,
formatter_class=argparse.RawDescriptionHelpFormatter,
)
parser.add_argument(
"-f", "--flow",
default=default_flow,
help=f"Flow ID (default: {default_flow})",
)
parser.add_argument(
"-q", "--queue-type",
default=default_queue_type,
help=f"Queue type: prompt or prompt-rag (default: {default_queue_type})",
)
parser.add_argument(
"-l", "--max-lines",
type=int,
default=default_max_lines,
help=f"Max lines of text per term/response (default: {default_max_lines})",
)
parser.add_argument(
"-w", "--max-width",
type=int,
default=default_max_width,
help=f"Max width per line (default: {default_max_width})",
)
add_pubsub_args(parser, standalone=True)
args = parser.parse_args()
try:
asyncio.run(monitor(
flow=args.flow,
queue_type=args.queue_type,
max_lines=args.max_lines,
max_width=args.max_width,
**{k: v for k, v in vars(args).items()
if k not in ('flow', 'queue_type', 'max_lines', 'max_width')},
))
except KeyboardInterrupt:
pass
except Exception as e:
print(f"Fatal error: {e}", file=sys.stderr)
sys.exit(1)
if __name__ == "__main__":
main()