trustgraph/tests/integration/test_agent_streaming_integration.py
cybermaggedon d2751553a3
Add agent explainability instrumentation and unify envelope field naming (#795)
Addresses recommendations from the UX developer's agent experience report.
Adds provenance predicates, DAG structure changes, error resilience, and
a published OWL ontology.

Explainability additions:

- Tool candidates: tg:toolCandidate on Analysis events lists the tools
  visible to the LLM for each iteration (names only, descriptions in config)
- Termination reason: tg:terminationReason on Conclusion/Synthesis events
  (final-answer, plan-complete, subagents-complete)
- Step counter: tg:stepNumber on iteration events
- Pattern decision: new tg:PatternDecision entity in the DAG between
  session and first iteration, carrying tg:pattern and tg:taskType
- Latency: tg:llmDurationMs on Analysis events, tg:toolDurationMs on
  Observation events
- Token counts on events: tg:inToken/tg:outToken/tg:llmModel on
  Grounding, Focus, Synthesis, and Analysis events
- Tool/parse errors: tg:toolError on Observation events with tg:Error
  mixin type. Parse failures return as error observations instead of
  crashing the agent, giving it a chance to retry.

Envelope unification:

- Rename chunk_type to message_type across AgentResponse schema,
  translator, SDK types, socket clients, CLI, and all tests.
  Agent and RAG services now both use message_type on the wire.

Ontology:

- specs/ontology/trustgraph.ttl — OWL vocabulary covering all 26 classes,
  7 object properties, and 36+ datatype properties including new predicates.

DAG structure tests:

- tests/unit/test_provenance/test_dag_structure.py verifies the
  wasDerivedFrom chain for GraphRAG, DocumentRAG, and all three agent
  patterns (react, plan, supervisor) including the pattern-decision link.
2026-04-13 16:16:42 +01:00

399 lines
14 KiB
Python

"""
Integration tests for Agent Manager Streaming Functionality
These tests verify the streaming behavior of the Agent service, testing
chunk-by-chunk delivery of thoughts, actions, observations, and final answers.
"""
import pytest
from unittest.mock import AsyncMock, MagicMock
from trustgraph.agent.react.agent_manager import AgentManager
from trustgraph.agent.react.tools import KnowledgeQueryImpl
from trustgraph.agent.react.types import Tool, Argument
from trustgraph.base import PromptResult
from tests.utils.streaming_assertions import (
assert_agent_streaming_chunks,
assert_streaming_chunks_valid,
assert_callback_invoked,
assert_message_types_valid,
)
@pytest.mark.integration
class TestAgentStreaming:
"""Integration tests for Agent streaming functionality"""
@pytest.fixture
def mock_prompt_client_streaming(self):
"""Mock prompt client with streaming support"""
client = AsyncMock()
async def agent_react_streaming(variables, timeout=600, streaming=False, chunk_callback=None):
# Both modes return the same text for equivalence
full_text = """Thought: I need to search for information about machine learning.
Action: knowledge_query
Args: {
"question": "What is machine learning?"
}"""
if streaming and chunk_callback:
# Send realistic line-by-line chunks
# This tests that the parser properly handles "Args:" starting a new chunk
# (which previously caused a bug where action_buffer was overwritten)
chunks = [
"Thought: I need to search for information about machine learning.\n",
"Action: knowledge_query\n",
"Args: {\n", # This used to trigger bug - Args: at start of chunk
' "question": "What is machine learning?"\n',
"}"
]
for i, chunk in enumerate(chunks):
is_final = (i == len(chunks) - 1)
await chunk_callback(chunk, is_final)
return PromptResult(response_type="text", text=full_text)
else:
# Non-streaming response - same text
return PromptResult(response_type="text", text=full_text)
client.agent_react.side_effect = agent_react_streaming
return client
@pytest.fixture
def mock_flow_context(self, mock_prompt_client_streaming):
"""Mock flow context with streaming prompt client"""
context = MagicMock()
# Mock graph RAG client
graph_rag_client = AsyncMock()
graph_rag_client.rag.return_value = "Machine learning is a subset of AI."
def context_router(service_name):
if service_name == "prompt-request":
return mock_prompt_client_streaming
elif service_name == "graph-rag-request":
return graph_rag_client
else:
return AsyncMock()
context.side_effect = context_router
return context
@pytest.fixture
def sample_tools(self):
"""Sample tool configuration"""
return {
"knowledge_query": Tool(
name="knowledge_query",
description="Query the knowledge graph",
arguments=[
Argument(
name="question",
type="string",
description="The question to ask"
)
],
implementation=KnowledgeQueryImpl,
config={}
)
}
@pytest.fixture
def agent_manager(self, sample_tools):
"""Create AgentManager instance with streaming support"""
return AgentManager(
tools=sample_tools,
additional_context="You are a helpful AI assistant."
)
@pytest.mark.asyncio
async def test_agent_streaming_thought_chunks(self, agent_manager, mock_flow_context):
"""Test that thought chunks are streamed correctly"""
# Arrange
thought_chunks = []
async def think(chunk, is_final=False):
thought_chunks.append(chunk)
# Act
await agent_manager.react(
question="What is machine learning?",
history=[],
think=think,
observe=AsyncMock(),
context=mock_flow_context,
streaming=True
)
# Assert
assert len(thought_chunks) > 0
assert_streaming_chunks_valid(thought_chunks, min_chunks=1)
# Verify thought content makes sense
full_thought = "".join(thought_chunks)
assert "search" in full_thought.lower() or "information" in full_thought.lower()
@pytest.mark.asyncio
async def test_agent_streaming_observation_chunks(self, agent_manager, mock_flow_context):
"""Test that observation chunks are streamed correctly"""
# Arrange
observation_chunks = []
async def observe(chunk, is_final=False):
observation_chunks.append(chunk)
# Act
await agent_manager.react(
question="What is machine learning?",
history=[],
think=AsyncMock(),
observe=observe,
context=mock_flow_context,
streaming=True
)
# Assert
# Note: Observations come from tool execution, which may or may not be streamed
# depending on the tool implementation
# For now, verify callback was set up
assert observe is not None
@pytest.mark.asyncio
async def test_agent_streaming_vs_non_streaming(self, agent_manager, mock_flow_context):
"""Test that streaming and non-streaming produce equivalent results"""
# Arrange
question = "What is machine learning?"
history = []
# Act - Non-streaming
non_streaming_result = await agent_manager.react(
question=question,
history=history,
think=AsyncMock(),
observe=AsyncMock(),
context=mock_flow_context,
streaming=False
)
# Act - Streaming
thought_chunks = []
observation_chunks = []
async def think(chunk, is_final=False):
thought_chunks.append(chunk)
async def observe(chunk, is_final=False):
observation_chunks.append(chunk)
streaming_result = await agent_manager.react(
question=question,
history=history,
think=think,
observe=observe,
context=mock_flow_context,
streaming=True
)
# Assert - Results should be equivalent (or both valid)
assert non_streaming_result is not None
assert streaming_result is not None
@pytest.mark.asyncio
async def test_agent_streaming_callback_invocation(self, agent_manager, mock_flow_context):
"""Test that callbacks are invoked with correct parameters"""
# Arrange
think = AsyncMock()
observe = AsyncMock()
# Act
await agent_manager.react(
question="What is machine learning?",
history=[],
think=think,
observe=observe,
context=mock_flow_context,
streaming=True
)
# Assert - Think callback should be invoked
assert think.call_count > 0
# Verify all callback invocations had string arguments
for call in think.call_args_list:
assert len(call.args) > 0
assert isinstance(call.args[0], str)
@pytest.mark.asyncio
async def test_agent_streaming_without_callbacks(self, agent_manager, mock_flow_context):
"""Test streaming parameter without callbacks (should work gracefully)"""
# Arrange & Act
result = await agent_manager.react(
question="What is machine learning?",
history=[],
think=AsyncMock(),
observe=AsyncMock(),
context=mock_flow_context,
streaming=True # Streaming enabled with mock callbacks
)
# Assert - Should complete without error
assert result is not None
@pytest.mark.asyncio
async def test_agent_streaming_with_conversation_history(self, agent_manager, mock_flow_context):
"""Test streaming with existing conversation history"""
# Arrange
# History should be a list of Action objects
from trustgraph.agent.react.types import Action
history = [
Action(
thought="I need to search for information about machine learning",
name="knowledge_query",
arguments={"question": "What is machine learning?"},
observation="Machine learning is a subset of AI that enables computers to learn from data."
)
]
think = AsyncMock()
# Act
result = await agent_manager.react(
question="Tell me more about neural networks",
history=history,
think=think,
observe=AsyncMock(),
context=mock_flow_context,
streaming=True
)
# Assert
assert result is not None
assert think.call_count > 0
@pytest.mark.asyncio
async def test_agent_streaming_error_propagation(self, agent_manager, mock_flow_context):
"""Test that errors during streaming are properly propagated"""
# Arrange
mock_prompt_client = mock_flow_context("prompt-request")
mock_prompt_client.agent_react.side_effect = Exception("Prompt service error")
think = AsyncMock()
observe = AsyncMock()
# Act & Assert
with pytest.raises(Exception) as exc_info:
await agent_manager.react(
question="test question",
history=[],
think=think,
observe=observe,
context=mock_flow_context,
streaming=True
)
assert "Prompt service error" in str(exc_info.value)
@pytest.mark.asyncio
async def test_agent_streaming_multi_step_reasoning(self, agent_manager, mock_flow_context,
mock_prompt_client_streaming):
"""Test streaming through multi-step reasoning process"""
# Arrange - Mock a multi-step response
step_responses = [
"""Thought: I need to search for basic information.
Action: knowledge_query
Args: {"question": "What is AI?"}""",
"""Thought: Now I can answer the question.
Final Answer: AI is the simulation of human intelligence in machines."""
]
call_count = 0
async def multi_step_agent_react(variables, timeout=600, streaming=False, chunk_callback=None):
nonlocal call_count
response = step_responses[min(call_count, len(step_responses) - 1)]
call_count += 1
if streaming and chunk_callback:
chunks = response.split()
for i, chunk in enumerate(chunks):
is_final = (i == len(chunks) - 1)
await chunk_callback(chunk + " ", is_final)
return PromptResult(response_type="text", text=response)
return PromptResult(response_type="text", text=response)
mock_prompt_client_streaming.agent_react.side_effect = multi_step_agent_react
think = AsyncMock()
observe = AsyncMock()
# Act
result = await agent_manager.react(
question="What is artificial intelligence?",
history=[],
think=think,
observe=observe,
context=mock_flow_context,
streaming=True
)
# Assert
assert result is not None
assert think.call_count > 0
@pytest.mark.asyncio
async def test_agent_streaming_preserves_tool_config(self, agent_manager, mock_flow_context):
"""Test that streaming preserves tool configuration and context"""
# Arrange
think = AsyncMock()
observe = AsyncMock()
# Act
await agent_manager.react(
question="What is machine learning?",
history=[],
think=think,
observe=observe,
context=mock_flow_context,
streaming=True
)
# Assert - Verify prompt client was called with streaming
mock_prompt_client = mock_flow_context("prompt-request")
call_args = mock_prompt_client.agent_react.call_args
assert call_args.kwargs['streaming'] is True
assert call_args.kwargs['chunk_callback'] is not None
@pytest.mark.asyncio
async def test_agent_streaming_end_of_message_flags(self, agent_manager, mock_flow_context):
"""Test that end_of_message flags are correctly set for thought chunks"""
# Arrange
thought_calls = []
async def think(chunk, is_final=False):
thought_calls.append({
'chunk': chunk,
'is_final': is_final
})
# Act
await agent_manager.react(
question="What is machine learning?",
history=[],
think=think,
observe=AsyncMock(),
context=mock_flow_context,
streaming=True
)
# Assert
assert len(thought_calls) > 0, "Expected thought chunks to be sent"
# All chunks except the last should have is_final=False
for i, call in enumerate(thought_calls[:-1]):
assert call['is_final'] is False, \
f"Thought chunk {i} should have is_final=False, got {call['is_final']}"
# Last chunk should have is_final=True
last_call = thought_calls[-1]
assert last_call['is_final'] is True, \
f"Last thought chunk should have is_final=True, got {last_call['is_final']}"