Streaming LLM part 2 (#567)

* Updates for agent API with streaming support * Added tg-dump-queues tool to dump Pulsar queues to a log * Updated tg-invoke-agent, incremental output * Queue dumper CLI - might be useful for debug * Updating for tests
2026-06-13 00:35:14 +02:00 · 2025-11-26 15:16:17 +00:00 · 2025-11-26 15:16:17 +00:00 · b1cc724f7d
commit b1cc724f7d
parent 310a2deb06
8 changed files with 609 additions and 51 deletions
--- a/trustgraph-cli/trustgraph/cli/dump_queues.py
+++ b/trustgraph-cli/trustgraph/cli/dump_queues.py
@ -0,0 +1,362 @@
+"""
+Multi-queue Pulsar message dumper for debugging TrustGraph message flows.
+
+This utility monitors multiple Pulsar queues simultaneously and logs all messages
+to a file with timestamps and pretty-printed formatting. Useful for debugging
+message flows, diagnosing stuck services, and understanding system behavior.
+
+Uses TrustGraph's Subscriber abstraction for future-proof pub/sub compatibility.
+"""
+
+import pulsar
+from pulsar.schema import BytesSchema
+import sys
+import json
+import asyncio
+from datetime import datetime
+import argparse
+
+from trustgraph.base.subscriber import Subscriber
+
+def format_message(queue_name, msg):
+    """Format a message with timestamp and queue name."""
+    timestamp = datetime.now().isoformat()
+
+    # Try to parse as JSON and pretty-print
+    try:
+        # Handle both Message objects and raw bytes
+        if hasattr(msg, 'value'):
+            # Message object with .value() method
+            value = msg.value()
+        else:
+            # Raw bytes from schema-less subscription
+            value = msg
+
+        # If it's bytes, decode it
+        if isinstance(value, bytes):
+            value = value.decode('utf-8')
+
+        # If it's a string, try to parse as JSON
+        if isinstance(value, str):
+            try:
+                parsed = json.loads(value)
+                body = json.dumps(parsed, indent=2)
+            except (json.JSONDecodeError, TypeError):
+                body = value
+        else:
+            # Try to convert to dict for pretty printing
+            try:
+                # Pulsar schema objects have __dict__ or similar
+                if hasattr(value, '__dict__'):
+                    parsed = {k: v for k, v in value.__dict__.items()
+                             if not k.startswith('_')}
+                else:
+                    parsed = str(value)
+                body = json.dumps(parsed, indent=2, default=str)
+            except (TypeError, AttributeError):
+                body = str(value)
+
+    except Exception as e:
+        body = f"<Error formatting message: {e}>\n{str(msg)}"
+
+    # Format the output
+    header = f"\n{'='*80}\n[{timestamp}] Queue: {queue_name}\n{'='*80}\n"
+    return header + body + "\n"
+
+
+async def monitor_queue(subscriber, queue_name, central_queue, monitor_id, shutdown_event):
+    """
+    Monitor a single queue via Subscriber and forward messages to central queue.
+
+    Args:
+        subscriber: Subscriber instance for this queue
+        queue_name: Name of the queue (for logging)
+        central_queue: asyncio.Queue to forward messages to
+        monitor_id: Unique ID for this monitor's subscription
+        shutdown_event: asyncio.Event to signal shutdown
+    """
+    msg_queue = None
+    try:
+        # Subscribe to all messages from this Subscriber
+        msg_queue = await subscriber.subscribe_all(monitor_id)
+
+        while not shutdown_event.is_set():
+            try:
+                # Read from Subscriber's internal queue with timeout
+                msg = await asyncio.wait_for(msg_queue.get(), timeout=0.5)
+                timestamp = datetime.now()
+                formatted = format_message(queue_name, msg)
+
+                # Forward to central queue for writing
+                await central_queue.put((timestamp, queue_name, formatted))
+            except asyncio.TimeoutError:
+                # No message, check shutdown flag again
+                continue
+
+    except Exception as e:
+        if not shutdown_event.is_set():
+            error_msg = f"\n{'='*80}\n[{datetime.now().isoformat()}] ERROR in monitor for {queue_name}\n{'='*80}\n{e}\n"
+            await central_queue.put((datetime.now(), queue_name, error_msg))
+    finally:
+        # Clean unsubscribe
+        if msg_queue is not None:
+            try:
+                await subscriber.unsubscribe_all(monitor_id)
+            except Exception:
+                pass
+
+
+async def log_writer(central_queue, file_handle, shutdown_event, console_output=True):
+    """
+    Write messages from central queue to file.
+
+    Args:
+        central_queue: asyncio.Queue containing (timestamp, queue_name, formatted_msg) tuples
+        file_handle: Open file handle to write to
+        shutdown_event: asyncio.Event to signal shutdown
+        console_output: Whether to print abbreviated messages to console
+    """
+    try:
+        while not shutdown_event.is_set():
+            try:
+                # Wait for messages with timeout to check shutdown flag
+                timestamp, queue_name, formatted_msg = await asyncio.wait_for(
+                    central_queue.get(), timeout=0.5
+                )
+
+                # Write to file
+                file_handle.write(formatted_msg)
+                file_handle.flush()
+
+                # Print abbreviated message to console
+                if console_output:
+                    time_str = timestamp.strftime('%H:%M:%S')
+                    print(f"[{time_str}] {queue_name}: Message received")
+            except asyncio.TimeoutError:
+                # No message, check shutdown flag again
+                continue
+
+    finally:
+        # Flush remaining messages after shutdown
+        while not central_queue.empty():
+            try:
+                timestamp, queue_name, formatted_msg = central_queue.get_nowait()
+                file_handle.write(formatted_msg)
+                file_handle.flush()
+            except asyncio.QueueEmpty:
+                break
+
+
+async def async_main(queues, output_file, pulsar_host, listener_name, subscriber_name, append_mode):
+    """
+    Main async function to monitor multiple queues concurrently.
+
+    Args:
+        queues: List of queue names to monitor
+        output_file: Path to output file
+        pulsar_host: Pulsar connection URL
+        listener_name: Pulsar listener name
+        subscriber_name: Base name for subscribers
+        append_mode: Whether to append to existing file
+    """
+    print(f"TrustGraph Queue Dumper")
+    print(f"Monitoring {len(queues)} queue(s):")
+    for q in queues:
+        print(f"  - {q}")
+    print(f"Output file: {output_file}")
+    print(f"Mode: {'append' if append_mode else 'overwrite'}")
+    print(f"Press Ctrl+C to stop\n")
+
+    # Connect to Pulsar
+    try:
+        client = pulsar.Client(pulsar_host, listener_name=listener_name)
+    except Exception as e:
+        print(f"Error connecting to Pulsar at {pulsar_host}: {e}", file=sys.stderr)
+        sys.exit(1)
+
+    # Create Subscribers and central queue
+    central_queue = asyncio.Queue()
+    subscribers = []
+
+    for queue_name in queues:
+        try:
+            sub = Subscriber(
+                client=client,
+                topic=queue_name,
+                subscription=subscriber_name,
+                consumer_name=f"{subscriber_name}-{queue_name}",
+                schema=None,  # No schema - accept any message type
+            )
+            await sub.start()
+            subscribers.append((queue_name, sub))
+            print(f"✓ Subscribed to: {queue_name}")
+        except Exception as e:
+            print(f"✗ Error subscribing to {queue_name}: {e}", file=sys.stderr)
+
+    if not subscribers:
+        print("\nNo subscribers created. Exiting.", file=sys.stderr)
+        client.close()
+        sys.exit(1)
+
+    print(f"\nListening for messages...\n")
+
+    # Open output file
+    mode = 'a' if append_mode else 'w'
+    try:
+        with open(output_file, mode) as f:
+            f.write(f"\n{'#'*80}\n")
+            f.write(f"# Session started: {datetime.now().isoformat()}\n")
+            f.write(f"# Monitoring queues: {', '.join(queues)}\n")
+            f.write(f"{'#'*80}\n")
+            f.flush()
+
+            # Create shutdown event for clean coordination
+            shutdown_event = asyncio.Event()
+
+            # Start monitoring tasks
+            tasks = []
+            try:
+                # Create one monitor task per subscriber
+                for queue_name, sub in subscribers:
+                    task = asyncio.create_task(
+                        monitor_queue(sub, queue_name, central_queue, "logger", shutdown_event)
+                    )
+                    tasks.append(task)
+
+                # Create single writer task
+                writer_task = asyncio.create_task(
+                    log_writer(central_queue, f, shutdown_event)
+                )
+                tasks.append(writer_task)
+
+                # Wait for all tasks (they check shutdown_event)
+                await asyncio.gather(*tasks)
+
+            except KeyboardInterrupt:
+                print("\n\nStopping...")
+            finally:
+                # Signal shutdown to all tasks
+                shutdown_event.set()
+
+                # Wait for tasks to finish cleanly (with timeout)
+                try:
+                    await asyncio.wait_for(asyncio.gather(*tasks, return_exceptions=True), timeout=2.0)
+                except asyncio.TimeoutError:
+                    print("Warning: Shutdown timeout", file=sys.stderr)
+
+                # Write session end marker
+                f.write(f"\n{'#'*80}\n")
+                f.write(f"# Session ended: {datetime.now().isoformat()}\n")
+                f.write(f"{'#'*80}\n")
+
+    except IOError as e:
+        print(f"Error writing to {output_file}: {e}", file=sys.stderr)
+        sys.exit(1)
+    finally:
+        # Clean shutdown of Subscribers
+        for _, sub in subscribers:
+            await sub.stop()
+        client.close()
+
+    print(f"\nMessages logged to: {output_file}")
+
+def main():
+    parser = argparse.ArgumentParser(
+        prog='tg-dump-queues',
+        description='Monitor and dump messages from multiple Pulsar queues',
+        epilog="""
+Examples:
+  # Monitor agent and prompt queues
+  tg-dump-queues non-persistent://tg/request/agent:default \\
+                 non-persistent://tg/request/prompt:default
+
+  # Monitor with custom output file
+  tg-dump-queues non-persistent://tg/request/agent:default \\
+                 --output debug.log
+
+  # Append to existing log file
+  tg-dump-queues non-persistent://tg/request/agent:default \\
+                 --output queue.log --append
+
+Common queue patterns:
+  - Agent requests:   non-persistent://tg/request/agent:default
+  - Agent responses:  non-persistent://tg/response/agent:default
+  - Prompt requests:  non-persistent://tg/request/prompt:default
+  - Prompt responses: non-persistent://tg/response/prompt:default
+  - LLM requests:     non-persistent://tg/request/text-completion:default
+  - LLM responses:    non-persistent://tg/response/text-completion:default
+
+IMPORTANT:
+  This tool subscribes to queues without a schema (schema-less mode). To avoid
+  schema conflicts, ensure that TrustGraph services and flows are already started
+  before running this tool. If this tool subscribes first, the real services may
+  encounter schema mismatch errors when they try to connect.
+
+  Best practice: Start services → Set up flows → Run tg-dump-queues
+        """,
+        formatter_class=argparse.RawDescriptionHelpFormatter
+    )
+
+    parser.add_argument(
+        'queues',
+        nargs='+',
+        help='Pulsar queue names to monitor'
+    )
+
+    parser.add_argument(
+        '--output', '-o',
+        default='queue.log',
+        help='Output file (default: queue.log)'
+    )
+
+    parser.add_argument(
+        '--append', '-a',
+        action='store_true',
+        help='Append to output file instead of overwriting'
+    )
+
+    parser.add_argument(
+        '--pulsar-host',
+        default='pulsar://localhost:6650',
+        help='Pulsar host URL (default: pulsar://localhost:6650)'
+    )
+
+    parser.add_argument(
+        '--listener-name',
+        default='localhost',
+        help='Pulsar listener name (default: localhost)'
+    )
+
+    parser.add_argument(
+        '--subscriber',
+        default='debug',
+        help='Subscriber name for queue subscription (default: debug)'
+    )
+
+    args = parser.parse_args()
+
+    # Filter out any accidentally included flags
+    queues = [q for q in args.queues if not q.startswith('--')]
+
+    if not queues:
+        parser.error("No queues specified")
+
+    # Run async main
+    try:
+        asyncio.run(async_main(
+            queues=queues,
+            output_file=args.output,
+            pulsar_host=args.pulsar_host,
+            listener_name=args.listener_name,
+            subscriber_name=args.subscriber,
+            append_mode=args.append
+        ))
+    except KeyboardInterrupt:
+        # Already handled in async_main
+        pass
+    except Exception as e:
+        print(f"Fatal error: {e}", file=sys.stderr)
+        sys.exit(1)
+
+if __name__ == '__main__':
+    main()
--- a/trustgraph-cli/trustgraph/cli/invoke_agent.py
+++ b/trustgraph-cli/trustgraph/cli/invoke_agent.py
@ -14,6 +14,78 @@ default_url = os.getenv("TRUSTGRAPH_URL", 'ws://localhost:8088/')
 default_user = 'trustgraph'
 default_collection = 'default'

+class Outputter:
+    def __init__(self, width=75, prefix="> "):
+        self.width = width
+        self.prefix = prefix
+        self.column = 0
+        self.word_buffer = ""
+        self.just_wrapped = False
+
+    def __enter__(self):
+        # Print prefix at start of first line
+        print(self.prefix, end="", flush=True)
+        self.column = len(self.prefix)
+        return self
+
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        # Flush remaining word buffer
+        if self.word_buffer:
+            print(self.word_buffer, end="", flush=True)
+            self.column += len(self.word_buffer)
+            self.word_buffer = ""
+
+        # Add final newline if not at line start
+        if self.column > 0:
+            print(flush=True)
+            self.column = 0
+
+    def output(self, text):
+        for char in text:
+            # Handle whitespace (space/tab)
+            if char in (' ', '\t'):
+                # Flush word buffer if present
+                if self.word_buffer:
+                    # Check if word + space would exceed width
+                    if self.column + len(self.word_buffer) + 1 > self.width:
+                        # Wrap: newline + prefix
+                        print(flush=True)
+                        print(self.prefix, end="", flush=True)
+                        self.column = len(self.prefix)
+                        self.just_wrapped = True
+
+                    # Output word buffer
+                    print(self.word_buffer, end="", flush=True)
+                    self.column += len(self.word_buffer)
+                    self.word_buffer = ""
+
+                # Output the space
+                print(char, end="", flush=True)
+                self.column += 1
+                self.just_wrapped = False
+
+            # Handle newline
+            elif char == '\n':
+                if self.just_wrapped:
+                    # Skip this newline (already wrapped)
+                    self.just_wrapped = False
+                else:
+                    # Flush word buffer if any
+                    if self.word_buffer:
+                        print(self.word_buffer, end="", flush=True)
+                        self.word_buffer = ""
+
+                    # Output newline + prefix
+                    print(flush=True)
+                    print(self.prefix, end="", flush=True)
+                    self.column = len(self.prefix)
+                    self.just_wrapped = False
+
+            # Regular character - add to word buffer
+            else:
+                self.word_buffer += char
+                self.just_wrapped = False
+
 def wrap(text, width=75):
    if text is None: text = "n/a"
    out = textwrap.wrap(
@ -41,6 +113,10 @@ async def question(
        output(wrap(question), "\U00002753 ")
        print()

+    # Track last chunk type and current outputter for streaming
+    last_chunk_type = None
+    current_outputter = None
+
    def think(x):
        if verbose:
            output(wrap(x), "\U0001f914 ")
@ -97,14 +173,30 @@ async def question(
                chunk_type = response["chunk_type"]
                content = response.get("content", "")

-                if chunk_type == "thought":
-                    think(content)
-                elif chunk_type == "observation":
-                    observe(content)
+                # Check if we're switching to a new message type
+                if last_chunk_type != chunk_type:
+                    # Close previous outputter if exists
+                    if current_outputter:
+                        current_outputter.__exit__(None, None, None)
+                        current_outputter = None
+                        print()  # Blank line between message types
+
+                    # Create new outputter for new message type
+                    if chunk_type == "thought" and verbose:
+                        current_outputter = Outputter(width=78, prefix="\U0001f914  ")
+                        current_outputter.__enter__()
+                    elif chunk_type == "observation" and verbose:
+                        current_outputter = Outputter(width=78, prefix="\U0001f4a1  ")
+                        current_outputter.__enter__()
+                    # For answer, don't use Outputter - just print as-is
+
+                    last_chunk_type = chunk_type
+
+                # Output the chunk
+                if current_outputter:
+                    current_outputter.output(content)
                elif chunk_type == "answer":
-                    print(content)
-                elif chunk_type == "error":
-                    raise RuntimeError(content)
+                    print(content, end="", flush=True)
            else:
                # Handle legacy format (backward compatibility)
                if "thought" in response:
@ -119,7 +211,15 @@ async def question(
                if "error" in response:
                    raise RuntimeError(response["error"])

-            if obj["complete"]: break
+            if obj["complete"]:
+                # Close any remaining outputter
+                if current_outputter:
+                    current_outputter.__exit__(None, None, None)
+                    current_outputter = None
+                # Add final newline if we were outputting answer
+                elif last_chunk_type == "answer":
+                    print()
+                break

        await ws.close()

@ -212,4 +312,4 @@ def main():
        print("Exception:", e, flush=True)

 if __name__ == "__main__":
-    main()
+    main()