mirror of
https://github.com/trustgraph-ai/trustgraph.git
synced 2026-04-26 00:46:22 +02:00
Streaming LLM part 2 (#567)
* Updates for agent API with streaming support * Added tg-dump-queues tool to dump Pulsar queues to a log * Updated tg-invoke-agent, incremental output * Queue dumper CLI - might be useful for debug * Updating for tests
This commit is contained in:
parent
310a2deb06
commit
b1cc724f7d
8 changed files with 609 additions and 51 deletions
|
|
@ -14,6 +14,78 @@ default_url = os.getenv("TRUSTGRAPH_URL", 'ws://localhost:8088/')
|
|||
default_user = 'trustgraph'
|
||||
default_collection = 'default'
|
||||
|
||||
class Outputter:
|
||||
def __init__(self, width=75, prefix="> "):
|
||||
self.width = width
|
||||
self.prefix = prefix
|
||||
self.column = 0
|
||||
self.word_buffer = ""
|
||||
self.just_wrapped = False
|
||||
|
||||
def __enter__(self):
|
||||
# Print prefix at start of first line
|
||||
print(self.prefix, end="", flush=True)
|
||||
self.column = len(self.prefix)
|
||||
return self
|
||||
|
||||
def __exit__(self, exc_type, exc_val, exc_tb):
|
||||
# Flush remaining word buffer
|
||||
if self.word_buffer:
|
||||
print(self.word_buffer, end="", flush=True)
|
||||
self.column += len(self.word_buffer)
|
||||
self.word_buffer = ""
|
||||
|
||||
# Add final newline if not at line start
|
||||
if self.column > 0:
|
||||
print(flush=True)
|
||||
self.column = 0
|
||||
|
||||
def output(self, text):
|
||||
for char in text:
|
||||
# Handle whitespace (space/tab)
|
||||
if char in (' ', '\t'):
|
||||
# Flush word buffer if present
|
||||
if self.word_buffer:
|
||||
# Check if word + space would exceed width
|
||||
if self.column + len(self.word_buffer) + 1 > self.width:
|
||||
# Wrap: newline + prefix
|
||||
print(flush=True)
|
||||
print(self.prefix, end="", flush=True)
|
||||
self.column = len(self.prefix)
|
||||
self.just_wrapped = True
|
||||
|
||||
# Output word buffer
|
||||
print(self.word_buffer, end="", flush=True)
|
||||
self.column += len(self.word_buffer)
|
||||
self.word_buffer = ""
|
||||
|
||||
# Output the space
|
||||
print(char, end="", flush=True)
|
||||
self.column += 1
|
||||
self.just_wrapped = False
|
||||
|
||||
# Handle newline
|
||||
elif char == '\n':
|
||||
if self.just_wrapped:
|
||||
# Skip this newline (already wrapped)
|
||||
self.just_wrapped = False
|
||||
else:
|
||||
# Flush word buffer if any
|
||||
if self.word_buffer:
|
||||
print(self.word_buffer, end="", flush=True)
|
||||
self.word_buffer = ""
|
||||
|
||||
# Output newline + prefix
|
||||
print(flush=True)
|
||||
print(self.prefix, end="", flush=True)
|
||||
self.column = len(self.prefix)
|
||||
self.just_wrapped = False
|
||||
|
||||
# Regular character - add to word buffer
|
||||
else:
|
||||
self.word_buffer += char
|
||||
self.just_wrapped = False
|
||||
|
||||
def wrap(text, width=75):
|
||||
if text is None: text = "n/a"
|
||||
out = textwrap.wrap(
|
||||
|
|
@ -41,6 +113,10 @@ async def question(
|
|||
output(wrap(question), "\U00002753 ")
|
||||
print()
|
||||
|
||||
# Track last chunk type and current outputter for streaming
|
||||
last_chunk_type = None
|
||||
current_outputter = None
|
||||
|
||||
def think(x):
|
||||
if verbose:
|
||||
output(wrap(x), "\U0001f914 ")
|
||||
|
|
@ -97,14 +173,30 @@ async def question(
|
|||
chunk_type = response["chunk_type"]
|
||||
content = response.get("content", "")
|
||||
|
||||
if chunk_type == "thought":
|
||||
think(content)
|
||||
elif chunk_type == "observation":
|
||||
observe(content)
|
||||
# Check if we're switching to a new message type
|
||||
if last_chunk_type != chunk_type:
|
||||
# Close previous outputter if exists
|
||||
if current_outputter:
|
||||
current_outputter.__exit__(None, None, None)
|
||||
current_outputter = None
|
||||
print() # Blank line between message types
|
||||
|
||||
# Create new outputter for new message type
|
||||
if chunk_type == "thought" and verbose:
|
||||
current_outputter = Outputter(width=78, prefix="\U0001f914 ")
|
||||
current_outputter.__enter__()
|
||||
elif chunk_type == "observation" and verbose:
|
||||
current_outputter = Outputter(width=78, prefix="\U0001f4a1 ")
|
||||
current_outputter.__enter__()
|
||||
# For answer, don't use Outputter - just print as-is
|
||||
|
||||
last_chunk_type = chunk_type
|
||||
|
||||
# Output the chunk
|
||||
if current_outputter:
|
||||
current_outputter.output(content)
|
||||
elif chunk_type == "answer":
|
||||
print(content)
|
||||
elif chunk_type == "error":
|
||||
raise RuntimeError(content)
|
||||
print(content, end="", flush=True)
|
||||
else:
|
||||
# Handle legacy format (backward compatibility)
|
||||
if "thought" in response:
|
||||
|
|
@ -119,7 +211,15 @@ async def question(
|
|||
if "error" in response:
|
||||
raise RuntimeError(response["error"])
|
||||
|
||||
if obj["complete"]: break
|
||||
if obj["complete"]:
|
||||
# Close any remaining outputter
|
||||
if current_outputter:
|
||||
current_outputter.__exit__(None, None, None)
|
||||
current_outputter = None
|
||||
# Add final newline if we were outputting answer
|
||||
elif last_chunk_type == "answer":
|
||||
print()
|
||||
break
|
||||
|
||||
await ws.close()
|
||||
|
||||
|
|
@ -212,4 +312,4 @@ def main():
|
|||
print("Exception:", e, flush=True)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
main()
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue