Streaming LLM part 2 (#567)

* Updates for agent API with streaming support

* Added tg-dump-queues tool to dump Pulsar queues to a log

* Updated tg-invoke-agent, incremental output

* Queue dumper CLI - might be useful for debug

* Updating for tests
This commit is contained in:
cybermaggedon 2025-11-26 15:16:17 +00:00 committed by GitHub
parent 310a2deb06
commit b1cc724f7d
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
8 changed files with 609 additions and 51 deletions

View file

@ -14,6 +14,78 @@ default_url = os.getenv("TRUSTGRAPH_URL", 'ws://localhost:8088/')
default_user = 'trustgraph'
default_collection = 'default'
class Outputter:
def __init__(self, width=75, prefix="> "):
self.width = width
self.prefix = prefix
self.column = 0
self.word_buffer = ""
self.just_wrapped = False
def __enter__(self):
# Print prefix at start of first line
print(self.prefix, end="", flush=True)
self.column = len(self.prefix)
return self
def __exit__(self, exc_type, exc_val, exc_tb):
# Flush remaining word buffer
if self.word_buffer:
print(self.word_buffer, end="", flush=True)
self.column += len(self.word_buffer)
self.word_buffer = ""
# Add final newline if not at line start
if self.column > 0:
print(flush=True)
self.column = 0
def output(self, text):
for char in text:
# Handle whitespace (space/tab)
if char in (' ', '\t'):
# Flush word buffer if present
if self.word_buffer:
# Check if word + space would exceed width
if self.column + len(self.word_buffer) + 1 > self.width:
# Wrap: newline + prefix
print(flush=True)
print(self.prefix, end="", flush=True)
self.column = len(self.prefix)
self.just_wrapped = True
# Output word buffer
print(self.word_buffer, end="", flush=True)
self.column += len(self.word_buffer)
self.word_buffer = ""
# Output the space
print(char, end="", flush=True)
self.column += 1
self.just_wrapped = False
# Handle newline
elif char == '\n':
if self.just_wrapped:
# Skip this newline (already wrapped)
self.just_wrapped = False
else:
# Flush word buffer if any
if self.word_buffer:
print(self.word_buffer, end="", flush=True)
self.word_buffer = ""
# Output newline + prefix
print(flush=True)
print(self.prefix, end="", flush=True)
self.column = len(self.prefix)
self.just_wrapped = False
# Regular character - add to word buffer
else:
self.word_buffer += char
self.just_wrapped = False
def wrap(text, width=75):
if text is None: text = "n/a"
out = textwrap.wrap(
@ -41,6 +113,10 @@ async def question(
output(wrap(question), "\U00002753 ")
print()
# Track last chunk type and current outputter for streaming
last_chunk_type = None
current_outputter = None
def think(x):
if verbose:
output(wrap(x), "\U0001f914 ")
@ -97,14 +173,30 @@ async def question(
chunk_type = response["chunk_type"]
content = response.get("content", "")
if chunk_type == "thought":
think(content)
elif chunk_type == "observation":
observe(content)
# Check if we're switching to a new message type
if last_chunk_type != chunk_type:
# Close previous outputter if exists
if current_outputter:
current_outputter.__exit__(None, None, None)
current_outputter = None
print() # Blank line between message types
# Create new outputter for new message type
if chunk_type == "thought" and verbose:
current_outputter = Outputter(width=78, prefix="\U0001f914 ")
current_outputter.__enter__()
elif chunk_type == "observation" and verbose:
current_outputter = Outputter(width=78, prefix="\U0001f4a1 ")
current_outputter.__enter__()
# For answer, don't use Outputter - just print as-is
last_chunk_type = chunk_type
# Output the chunk
if current_outputter:
current_outputter.output(content)
elif chunk_type == "answer":
print(content)
elif chunk_type == "error":
raise RuntimeError(content)
print(content, end="", flush=True)
else:
# Handle legacy format (backward compatibility)
if "thought" in response:
@ -119,7 +211,15 @@ async def question(
if "error" in response:
raise RuntimeError(response["error"])
if obj["complete"]: break
if obj["complete"]:
# Close any remaining outputter
if current_outputter:
current_outputter.__exit__(None, None, None)
current_outputter = None
# Add final newline if we were outputting answer
elif last_chunk_type == "answer":
print()
break
await ws.close()
@ -212,4 +312,4 @@ def main():
print("Exception:", e, flush=True)
if __name__ == "__main__":
main()
main()