Streaming LLM part 2 (#567)

* Updates for agent API with streaming support * Added tg-dump-queues tool to dump Pulsar queues to a log * Updated tg-invoke-agent, incremental output * Queue dumper CLI - might be useful for debug * Updating for tests
2026-04-26 00:46:22 +02:00 · 2025-11-26 15:16:17 +00:00 · 2025-11-26 15:16:17 +00:00 · b1cc724f7d
commit b1cc724f7d
parent 310a2deb06
8 changed files with 609 additions and 51 deletions
--- a/trustgraph-cli/trustgraph/cli/invoke_agent.py
+++ b/trustgraph-cli/trustgraph/cli/invoke_agent.py
@ -14,6 +14,78 @@ default_url = os.getenv("TRUSTGRAPH_URL", 'ws://localhost:8088/')
 default_user = 'trustgraph'
 default_collection = 'default'

+class Outputter:
+    def __init__(self, width=75, prefix="> "):
+        self.width = width
+        self.prefix = prefix
+        self.column = 0
+        self.word_buffer = ""
+        self.just_wrapped = False
+
+    def __enter__(self):
+        # Print prefix at start of first line
+        print(self.prefix, end="", flush=True)
+        self.column = len(self.prefix)
+        return self
+
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        # Flush remaining word buffer
+        if self.word_buffer:
+            print(self.word_buffer, end="", flush=True)
+            self.column += len(self.word_buffer)
+            self.word_buffer = ""
+
+        # Add final newline if not at line start
+        if self.column > 0:
+            print(flush=True)
+            self.column = 0
+
+    def output(self, text):
+        for char in text:
+            # Handle whitespace (space/tab)
+            if char in (' ', '\t'):
+                # Flush word buffer if present
+                if self.word_buffer:
+                    # Check if word + space would exceed width
+                    if self.column + len(self.word_buffer) + 1 > self.width:
+                        # Wrap: newline + prefix
+                        print(flush=True)
+                        print(self.prefix, end="", flush=True)
+                        self.column = len(self.prefix)
+                        self.just_wrapped = True
+
+                    # Output word buffer
+                    print(self.word_buffer, end="", flush=True)
+                    self.column += len(self.word_buffer)
+                    self.word_buffer = ""
+
+                # Output the space
+                print(char, end="", flush=True)
+                self.column += 1
+                self.just_wrapped = False
+
+            # Handle newline
+            elif char == '\n':
+                if self.just_wrapped:
+                    # Skip this newline (already wrapped)
+                    self.just_wrapped = False
+                else:
+                    # Flush word buffer if any
+                    if self.word_buffer:
+                        print(self.word_buffer, end="", flush=True)
+                        self.word_buffer = ""
+
+                    # Output newline + prefix
+                    print(flush=True)
+                    print(self.prefix, end="", flush=True)
+                    self.column = len(self.prefix)
+                    self.just_wrapped = False
+
+            # Regular character - add to word buffer
+            else:
+                self.word_buffer += char
+                self.just_wrapped = False
+
 def wrap(text, width=75):
    if text is None: text = "n/a"
    out = textwrap.wrap(
@ -41,6 +113,10 @@ async def question(
        output(wrap(question), "\U00002753 ")
        print()

+    # Track last chunk type and current outputter for streaming
+    last_chunk_type = None
+    current_outputter = None
+
    def think(x):
        if verbose:
            output(wrap(x), "\U0001f914 ")
@ -97,14 +173,30 @@ async def question(
                chunk_type = response["chunk_type"]
                content = response.get("content", "")

-                if chunk_type == "thought":
-                    think(content)
-                elif chunk_type == "observation":
-                    observe(content)
+                # Check if we're switching to a new message type
+                if last_chunk_type != chunk_type:
+                    # Close previous outputter if exists
+                    if current_outputter:
+                        current_outputter.__exit__(None, None, None)
+                        current_outputter = None
+                        print()  # Blank line between message types
+
+                    # Create new outputter for new message type
+                    if chunk_type == "thought" and verbose:
+                        current_outputter = Outputter(width=78, prefix="\U0001f914  ")
+                        current_outputter.__enter__()
+                    elif chunk_type == "observation" and verbose:
+                        current_outputter = Outputter(width=78, prefix="\U0001f4a1  ")
+                        current_outputter.__enter__()
+                    # For answer, don't use Outputter - just print as-is
+
+                    last_chunk_type = chunk_type
+
+                # Output the chunk
+                if current_outputter:
+                    current_outputter.output(content)
                elif chunk_type == "answer":
-                    print(content)
-                elif chunk_type == "error":
-                    raise RuntimeError(content)
+                    print(content, end="", flush=True)
            else:
                # Handle legacy format (backward compatibility)
                if "thought" in response:
@ -119,7 +211,15 @@ async def question(
                if "error" in response:
                    raise RuntimeError(response["error"])

-            if obj["complete"]: break
+            if obj["complete"]:
+                # Close any remaining outputter
+                if current_outputter:
+                    current_outputter.__exit__(None, None, None)
+                    current_outputter = None
+                # Add final newline if we were outputting answer
+                elif last_chunk_type == "answer":
+                    print()
+                break

        await ws.close()

@ -212,4 +312,4 @@ def main():
        print("Exception:", e, flush=True)

 if __name__ == "__main__":
-    main()
+    main()