Streaming LLM part 2 (#567)

* Updates for agent API with streaming support

* Added tg-dump-queues tool to dump Pulsar queues to a log

* Updated tg-invoke-agent, incremental output

* Queue dumper CLI - might be useful for debug

* Updating for tests
This commit is contained in:
cybermaggedon 2025-11-26 15:16:17 +00:00 committed by GitHub
parent 310a2deb06
commit b1cc724f7d
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
8 changed files with 609 additions and 51 deletions

View file

@ -118,7 +118,11 @@ class StreamingReActParser:
self.line_buffer = re.sub(r'\n```$', '', self.line_buffer)
# Process based on current state
# Track previous state to detect if we're making progress
while self.line_buffer and self.state != ParserState.COMPLETE:
prev_buffer_len = len(self.line_buffer)
prev_state = self.state
if self.state == ParserState.INITIAL:
self._process_initial()
elif self.state == ParserState.THOUGHT:
@ -130,14 +134,19 @@ class StreamingReActParser:
elif self.state == ParserState.FINAL_ANSWER:
self._process_final_answer()
# If no progress was made (buffer unchanged AND state unchanged), break
# to avoid infinite loop. We'll process more when the next chunk arrives.
if len(self.line_buffer) == prev_buffer_len and self.state == prev_state:
break
def _process_initial(self) -> None:
"""Process INITIAL state - looking for 'Thought:' delimiter"""
idx = self.line_buffer.find(self.THOUGHT_DELIMITER)
if idx >= 0:
# Found thought delimiter
# Discard any content before it
self.line_buffer = self.line_buffer[idx + len(self.THOUGHT_DELIMITER):]
# Discard any content before it and strip leading whitespace after delimiter
self.line_buffer = self.line_buffer[idx + len(self.THOUGHT_DELIMITER):].lstrip()
self.state = ParserState.THOUGHT
elif len(self.line_buffer) >= self.MAX_DELIMITER_BUFFER:
# Buffer getting too large, probably junk before thought
@ -171,7 +180,7 @@ class StreamingReActParser:
if self.on_thought_chunk:
self.on_thought_chunk(thought_chunk)
self.line_buffer = self.line_buffer[next_delimiter_idx + delimiter_len:]
self.line_buffer = self.line_buffer[next_delimiter_idx + delimiter_len:].lstrip()
self.state = next_state
else:
# No delimiter found yet
@ -194,7 +203,7 @@ class StreamingReActParser:
if args_idx >= 0 and (newline_idx < 0 or args_idx < newline_idx):
# Args delimiter found first
self.action_buffer = self.line_buffer[:args_idx].strip().strip('"')
self.line_buffer = self.line_buffer[args_idx + len(self.ARGS_DELIMITER):]
self.line_buffer = self.line_buffer[args_idx + len(self.ARGS_DELIMITER):].lstrip()
self.state = ParserState.ARGS
elif newline_idx >= 0:
# Newline found, action name complete
@ -204,7 +213,7 @@ class StreamingReActParser:
# Actually, check if next line has Args:
if self.line_buffer.lstrip().startswith(self.ARGS_DELIMITER):
args_start = self.line_buffer.find(self.ARGS_DELIMITER)
self.line_buffer = self.line_buffer[args_start + len(self.ARGS_DELIMITER):]
self.line_buffer = self.line_buffer[args_start + len(self.ARGS_DELIMITER):].lstrip()
self.state = ParserState.ARGS
else:
# Not enough content yet, keep buffering