mirror of
https://github.com/trustgraph-ai/trustgraph.git
synced 2026-04-28 18:06:21 +02:00
Streaming LLM part 2 (#567)
* Updates for agent API with streaming support * Added tg-dump-queues tool to dump Pulsar queues to a log * Updated tg-invoke-agent, incremental output * Queue dumper CLI - might be useful for debug * Updating for tests
This commit is contained in:
parent
310a2deb06
commit
b1cc724f7d
8 changed files with 609 additions and 51 deletions
|
|
@ -220,32 +220,72 @@ class AgentManager:
|
|||
|
||||
logger.info(f"prompt: {variables}")
|
||||
|
||||
logger.info(f"DEBUG: streaming={streaming}, think={think is not None}")
|
||||
|
||||
# Streaming path - use StreamingReActParser
|
||||
if streaming and think:
|
||||
logger.info("DEBUG: Entering streaming path")
|
||||
from .streaming_parser import StreamingReActParser
|
||||
|
||||
# Create parser with streaming callbacks
|
||||
# Thought chunks go to think(), answer chunks go to answer()
|
||||
logger.info("DEBUG: Creating StreamingReActParser")
|
||||
|
||||
# Collect chunks to send via async callbacks
|
||||
thought_chunks = []
|
||||
answer_chunks = []
|
||||
|
||||
# Create parser with synchronous callbacks that just collect chunks
|
||||
parser = StreamingReActParser(
|
||||
on_thought_chunk=lambda chunk: asyncio.create_task(think(chunk)),
|
||||
on_answer_chunk=lambda chunk: asyncio.create_task(answer(chunk) if answer else think(chunk)),
|
||||
on_thought_chunk=lambda chunk: thought_chunks.append(chunk),
|
||||
on_answer_chunk=lambda chunk: answer_chunks.append(chunk),
|
||||
)
|
||||
logger.info("DEBUG: StreamingReActParser created")
|
||||
|
||||
# Create async chunk callback that feeds parser
|
||||
# Create async chunk callback that feeds parser and sends collected chunks
|
||||
async def on_chunk(text):
|
||||
parser.feed(text)
|
||||
logger.info(f"DEBUG: on_chunk called with {len(text)} chars")
|
||||
|
||||
# Track what we had before
|
||||
prev_thought_count = len(thought_chunks)
|
||||
prev_answer_count = len(answer_chunks)
|
||||
|
||||
# Feed the parser (synchronous)
|
||||
logger.info(f"DEBUG: About to call parser.feed")
|
||||
parser.feed(text)
|
||||
logger.info(f"DEBUG: parser.feed returned")
|
||||
|
||||
# Send any new thought chunks
|
||||
for i in range(prev_thought_count, len(thought_chunks)):
|
||||
logger.info(f"DEBUG: Sending thought chunk {i}")
|
||||
await think(thought_chunks[i])
|
||||
|
||||
# Send any new answer chunks
|
||||
for i in range(prev_answer_count, len(answer_chunks)):
|
||||
logger.info(f"DEBUG: Sending answer chunk {i}")
|
||||
if answer:
|
||||
await answer(answer_chunks[i])
|
||||
else:
|
||||
await think(answer_chunks[i])
|
||||
|
||||
logger.info("DEBUG: Getting prompt-request client from context")
|
||||
client = context("prompt-request")
|
||||
logger.info(f"DEBUG: Got client: {client}")
|
||||
|
||||
logger.info("DEBUG: About to call agent_react with streaming=True")
|
||||
# Get streaming response
|
||||
response_text = await context("prompt-request").agent_react(
|
||||
response_text = await client.agent_react(
|
||||
variables=variables,
|
||||
streaming=True,
|
||||
chunk_callback=on_chunk
|
||||
)
|
||||
logger.info(f"DEBUG: agent_react returned, got {len(response_text) if response_text else 0} chars")
|
||||
|
||||
# Finalize parser
|
||||
logger.info("DEBUG: Finalizing parser")
|
||||
parser.finalize()
|
||||
logger.info("DEBUG: Parser finalized")
|
||||
|
||||
# Get result
|
||||
logger.info("DEBUG: Getting result from parser")
|
||||
result = parser.get_result()
|
||||
if result is None:
|
||||
raise RuntimeError("Parser failed to produce a result")
|
||||
|
|
@ -254,11 +294,18 @@ class AgentManager:
|
|||
return result
|
||||
|
||||
else:
|
||||
logger.info("DEBUG: Entering NON-streaming path")
|
||||
# Non-streaming path - get complete text and parse
|
||||
response_text = await context("prompt-request").agent_react(
|
||||
logger.info("DEBUG: Getting prompt-request client from context")
|
||||
client = context("prompt-request")
|
||||
logger.info(f"DEBUG: Got client: {client}")
|
||||
|
||||
logger.info("DEBUG: About to call agent_react with streaming=False")
|
||||
response_text = await client.agent_react(
|
||||
variables=variables,
|
||||
streaming=False
|
||||
)
|
||||
logger.info(f"DEBUG: agent_react returned, got response")
|
||||
|
||||
logger.debug(f"Response text:\n{response_text}")
|
||||
|
||||
|
|
|
|||
|
|
@ -118,7 +118,11 @@ class StreamingReActParser:
|
|||
self.line_buffer = re.sub(r'\n```$', '', self.line_buffer)
|
||||
|
||||
# Process based on current state
|
||||
# Track previous state to detect if we're making progress
|
||||
while self.line_buffer and self.state != ParserState.COMPLETE:
|
||||
prev_buffer_len = len(self.line_buffer)
|
||||
prev_state = self.state
|
||||
|
||||
if self.state == ParserState.INITIAL:
|
||||
self._process_initial()
|
||||
elif self.state == ParserState.THOUGHT:
|
||||
|
|
@ -130,14 +134,19 @@ class StreamingReActParser:
|
|||
elif self.state == ParserState.FINAL_ANSWER:
|
||||
self._process_final_answer()
|
||||
|
||||
# If no progress was made (buffer unchanged AND state unchanged), break
|
||||
# to avoid infinite loop. We'll process more when the next chunk arrives.
|
||||
if len(self.line_buffer) == prev_buffer_len and self.state == prev_state:
|
||||
break
|
||||
|
||||
def _process_initial(self) -> None:
|
||||
"""Process INITIAL state - looking for 'Thought:' delimiter"""
|
||||
idx = self.line_buffer.find(self.THOUGHT_DELIMITER)
|
||||
|
||||
if idx >= 0:
|
||||
# Found thought delimiter
|
||||
# Discard any content before it
|
||||
self.line_buffer = self.line_buffer[idx + len(self.THOUGHT_DELIMITER):]
|
||||
# Discard any content before it and strip leading whitespace after delimiter
|
||||
self.line_buffer = self.line_buffer[idx + len(self.THOUGHT_DELIMITER):].lstrip()
|
||||
self.state = ParserState.THOUGHT
|
||||
elif len(self.line_buffer) >= self.MAX_DELIMITER_BUFFER:
|
||||
# Buffer getting too large, probably junk before thought
|
||||
|
|
@ -171,7 +180,7 @@ class StreamingReActParser:
|
|||
if self.on_thought_chunk:
|
||||
self.on_thought_chunk(thought_chunk)
|
||||
|
||||
self.line_buffer = self.line_buffer[next_delimiter_idx + delimiter_len:]
|
||||
self.line_buffer = self.line_buffer[next_delimiter_idx + delimiter_len:].lstrip()
|
||||
self.state = next_state
|
||||
else:
|
||||
# No delimiter found yet
|
||||
|
|
@ -194,7 +203,7 @@ class StreamingReActParser:
|
|||
if args_idx >= 0 and (newline_idx < 0 or args_idx < newline_idx):
|
||||
# Args delimiter found first
|
||||
self.action_buffer = self.line_buffer[:args_idx].strip().strip('"')
|
||||
self.line_buffer = self.line_buffer[args_idx + len(self.ARGS_DELIMITER):]
|
||||
self.line_buffer = self.line_buffer[args_idx + len(self.ARGS_DELIMITER):].lstrip()
|
||||
self.state = ParserState.ARGS
|
||||
elif newline_idx >= 0:
|
||||
# Newline found, action name complete
|
||||
|
|
@ -204,7 +213,7 @@ class StreamingReActParser:
|
|||
# Actually, check if next line has Args:
|
||||
if self.line_buffer.lstrip().startswith(self.ARGS_DELIMITER):
|
||||
args_start = self.line_buffer.find(self.ARGS_DELIMITER)
|
||||
self.line_buffer = self.line_buffer[args_start + len(self.ARGS_DELIMITER):]
|
||||
self.line_buffer = self.line_buffer[args_start + len(self.ARGS_DELIMITER):].lstrip()
|
||||
self.state = ParserState.ARGS
|
||||
else:
|
||||
# Not enough content yet, keep buffering
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue