Increase agent ReACT tool coverage (#460)

* Extra multi-step tool invocation

* Multi-step reasoning test
This commit is contained in:
cybermaggedon 2025-08-21 00:36:45 +01:00 committed by GitHub
parent 1adcbc3a3a
commit 54948e567f
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -305,6 +305,343 @@ Answer: The capital of France is Paris."""
assert reasoning_plan[1]["action"] == "find_population"
assert all("step" in step for step in reasoning_plan)
def test_multi_iteration_react_execution(self):
"""Test complete multi-iteration ReACT cycle with sequential tool invocations
This test simulates a complex query that requires:
1. Tool #1: Search for initial information
2. Tool #2: Analyze/refine based on Tool #1's output
3. Tool #3: Generate final answer using accumulated context
Each iteration includes Think -> Act -> Observe phases with
observations feeding into subsequent thinking phases.
"""
# Arrange
question = "Find the GDP of the capital of Japan and compare it to Tokyo's population"
# Mock tools that build on each other's outputs
tool_invocation_log = []
def mock_geo_search(query):
"""Tool 1: Geographic information search"""
tool_invocation_log.append(("geo_search", query))
if "capital" in query.lower() and "japan" in query.lower():
return {"city": "Tokyo", "country": "Japan", "is_capital": True}
return {"error": "Location not found"}
def mock_economic_data(query, context=None):
"""Tool 2: Economic data retrieval (uses context from Tool 1)"""
tool_invocation_log.append(("economic_data", query, context))
if context and context.get("city") == "Tokyo":
return {"city": "Tokyo", "gdp_trillion_yen": 115.7, "year": 2023}
return {"error": "Economic data not available"}
def mock_demographic_data(query, context=None):
"""Tool 3: Demographic data and comparison (uses context from Tools 1 & 2)"""
tool_invocation_log.append(("demographic_data", query, context))
if context and context.get("city") == "Tokyo":
population_millions = 14.0
gdp_from_context = context.get("gdp_trillion_yen", 0)
return {
"city": "Tokyo",
"population_millions": population_millions,
"gdp_trillion_yen": gdp_from_context,
"gdp_per_capita_million_yen": round(gdp_from_context / population_millions, 2) if population_millions > 0 else 0
}
return {"error": "Demographic data not available"}
# Execute multi-iteration ReACT cycle
def execute_multi_iteration_react(question, tools):
"""Execute a complete multi-iteration ReACT cycle"""
iterations = []
context = {}
# Iteration 1: Initial geographic search
iteration_1 = {
"iteration": 1,
"think": "I need to first identify the capital of Japan to get its GDP",
"act": {"tool": "geo_search", "query": "capital of Japan"},
"observe": None
}
result_1 = tools["geo_search"](iteration_1["act"]["query"])
iteration_1["observe"] = f"Found that {result_1['city']} is the capital of {result_1['country']}"
context.update(result_1)
iterations.append(iteration_1)
# Iteration 2: Get economic data using context from iteration 1
iteration_2 = {
"iteration": 2,
"think": f"Now I know {context['city']} is the capital. I need to get its GDP data",
"act": {"tool": "economic_data", "query": f"GDP of {context['city']}"},
"observe": None
}
result_2 = tools["economic_data"](iteration_2["act"]["query"], context)
iteration_2["observe"] = f"Retrieved GDP data: {result_2['gdp_trillion_yen']} trillion yen for {result_2['year']}"
context.update(result_2)
iterations.append(iteration_2)
# Iteration 3: Get demographic data and compare using accumulated context
iteration_3 = {
"iteration": 3,
"think": f"I have the GDP ({context['gdp_trillion_yen']} trillion yen). Now I need population data to compare",
"act": {"tool": "demographic_data", "query": f"population of {context['city']}"},
"observe": None
}
result_3 = tools["demographic_data"](iteration_3["act"]["query"], context)
iteration_3["observe"] = f"Population is {result_3['population_millions']} million. GDP per capita is {result_3['gdp_per_capita_million_yen']} million yen"
context.update(result_3)
iterations.append(iteration_3)
# Final answer synthesis
final_answer = {
"think": "I now have all the information needed to answer the question",
"answer": f"Tokyo, the capital of Japan, has a GDP of {context['gdp_trillion_yen']} trillion yen and a population of {context['population_millions']} million people, resulting in a GDP per capita of {context['gdp_per_capita_million_yen']} million yen."
}
return {
"iterations": iterations,
"final_answer": final_answer,
"context": context,
"tool_invocations": len(tool_invocation_log)
}
tools = {
"geo_search": mock_geo_search,
"economic_data": mock_economic_data,
"demographic_data": mock_demographic_data
}
# Act
result = execute_multi_iteration_react(question, tools)
# Assert - Verify complete multi-iteration execution
assert len(result["iterations"]) == 3, "Should have exactly 3 iterations"
# Verify each iteration has complete Think-Act-Observe cycle
for i, iteration in enumerate(result["iterations"], 1):
assert iteration["iteration"] == i
assert "think" in iteration and len(iteration["think"]) > 0
assert "act" in iteration and "tool" in iteration["act"]
assert "observe" in iteration and iteration["observe"] is not None
# Verify sequential tool invocations
assert tool_invocation_log[0][0] == "geo_search"
assert tool_invocation_log[1][0] == "economic_data"
assert tool_invocation_log[2][0] == "demographic_data"
# Verify context accumulation across iterations
assert "Tokyo" in tool_invocation_log[1][1], "Iteration 2 should use data from iteration 1"
assert tool_invocation_log[2][2].get("gdp_trillion_yen") == 115.7, "Iteration 3 should have accumulated GDP data"
# Verify observations feed into subsequent thinking
assert "Tokyo" in result["iterations"][1]["think"], "Iteration 2 thinking should reference observation from iteration 1"
assert "115.7" in result["iterations"][2]["think"], "Iteration 3 thinking should reference GDP from iteration 2"
# Verify final answer synthesis
assert "Tokyo" in result["final_answer"]["answer"]
assert "115.7" in result["final_answer"]["answer"]
assert "14.0" in result["final_answer"]["answer"]
assert "8.26" in result["final_answer"]["answer"], "Should include calculated GDP per capita"
# Verify all 3 tools were invoked in sequence
assert result["tool_invocations"] == 3
def test_multi_iteration_with_dynamic_tool_selection(self):
"""Test multi-iteration ReACT with mocked LLM reasoning dynamically selecting tools
This test simulates how an LLM would dynamically choose tools based on:
1. The original question
2. Previous observations
3. Accumulated context
The mocked LLM reasoning adapts its tool selection based on what it has learned
in previous iterations, mimicking real agent behavior.
"""
# Arrange
question = "What are the main exports of the largest city in Brazil by population?"
# Track reasoning and tool selection
reasoning_log = []
tool_invocation_log = []
def mock_llm_reasoning(question, history, available_tools):
"""Mock LLM that reasons about tool selection based on context"""
# Analyze what we know from history
context = {}
for step in history:
if "observation" in step:
# Extract information from observations
obs = step["observation"]
if "São Paulo" in obs:
context["city"] = "São Paulo"
if "largest city" in obs:
context["is_largest"] = True
if "million" in obs and "population" in obs:
context["has_population"] = True
if "exports" in obs:
context["has_exports"] = True
# Decide next action based on what we know
if not context.get("city"):
# Step 1: Need to find the largest city
reasoning = "I need to find the largest city in Brazil by population"
tool = "geo_search"
args = {"query": "largest city Brazil population"}
elif not context.get("has_population"):
# Step 2: Confirm population data
reasoning = f"I found {context['city']}. Now I need to verify it's the largest by checking population"
tool = "demographic_data"
args = {"query": f"population {context['city']} Brazil"}
elif not context.get("has_exports"):
# Step 3: Get export information
reasoning = f"Confirmed {context['city']} is the largest. Now I need export information"
tool = "economic_data"
args = {"query": f"main exports {context['city']} Brazil"}
else:
# Final: Have all information
reasoning = "I have all the information needed to answer"
tool = "final_answer"
args = None
reasoning_log.append({"reasoning": reasoning, "tool": tool, "context": context.copy()})
return reasoning, tool, args
def mock_geo_search(query):
"""Mock geographic search tool"""
tool_invocation_log.append(("geo_search", query))
if "largest city brazil" in query.lower():
return {
"result": "São Paulo is the largest city in Brazil",
"details": {"city": "São Paulo", "country": "Brazil", "rank": 1}
}
return {"error": "No results found"}
def mock_demographic_data(query):
"""Mock demographic data tool"""
tool_invocation_log.append(("demographic_data", query))
if "são paulo" in query.lower():
return {
"result": "São Paulo has a population of 12.4 million in the city proper, 22.8 million in the metro area",
"details": {"city_population": 12.4, "metro_population": 22.8, "unit": "million"}
}
return {"error": "No demographic data found"}
def mock_economic_data(query):
"""Mock economic data tool"""
tool_invocation_log.append(("economic_data", query))
if "são paulo" in query.lower() and "export" in query.lower():
return {
"result": "São Paulo's main exports include aircraft, vehicles, machinery, coffee, and soybeans",
"details": {
"top_exports": ["aircraft", "vehicles", "machinery", "coffee", "soybeans"],
"export_value_billions_usd": 65.2
}
}
return {"error": "No economic data found"}
# Execute multi-iteration ReACT with dynamic tool selection
def execute_dynamic_react(question, tools, llm_reasoner):
"""Execute ReACT with dynamic LLM-based tool selection"""
iterations = []
history = []
available_tools = list(tools.keys())
max_iterations = 4
for i in range(max_iterations):
# LLM reasons about next action
reasoning, tool_name, args = llm_reasoner(question, history, available_tools)
if tool_name == "final_answer":
# Agent has decided it has enough information
final_answer = {
"reasoning": reasoning,
"answer": "São Paulo, Brazil's largest city with 12.4 million people, " +
"has main exports including aircraft, vehicles, machinery, coffee, and soybeans."
}
break
# Execute selected tool
iteration = {
"iteration": i + 1,
"think": reasoning,
"act": {"tool": tool_name, "args": args},
"observe": None
}
# Get tool result
if tool_name in tools:
result = tools[tool_name](args["query"])
iteration["observe"] = result.get("result", "No information found")
else:
iteration["observe"] = f"Tool {tool_name} not available"
iterations.append(iteration)
# Add to history for next iteration
history.append({
"thought": reasoning,
"action": tool_name,
"args": args,
"observation": iteration["observe"]
})
return {
"iterations": iterations,
"final_answer": final_answer if 'final_answer' in locals() else None,
"reasoning_log": reasoning_log,
"tool_invocations": len(tool_invocation_log)
}
tools = {
"geo_search": mock_geo_search,
"demographic_data": mock_demographic_data,
"economic_data": mock_economic_data
}
# Act
result = execute_dynamic_react(question, tools, mock_llm_reasoning)
# Assert - Verify dynamic multi-iteration execution
assert len(result["iterations"]) == 3, "Should have 3 iterations before final answer"
# Verify reasoning adapts based on observations
assert len(reasoning_log) == 4, "Should have 4 reasoning steps (3 tools + final)"
# Verify first iteration searches for largest city
assert reasoning_log[0]["tool"] == "geo_search"
assert "largest city" in reasoning_log[0]["reasoning"].lower()
assert not reasoning_log[0]["context"].get("city")
# Verify second iteration uses city name from first observation
assert reasoning_log[1]["tool"] == "demographic_data"
assert "São Paulo" in reasoning_log[1]["reasoning"]
assert reasoning_log[1]["context"]["city"] == "São Paulo"
# Verify third iteration builds on previous knowledge
assert reasoning_log[2]["tool"] == "economic_data"
assert "export" in reasoning_log[2]["reasoning"].lower()
assert reasoning_log[2]["context"]["has_population"] is True
# Verify final reasoning has all information
assert reasoning_log[3]["tool"] == "final_answer"
assert reasoning_log[3]["context"]["has_exports"] is True
# Verify tool invocation sequence
assert tool_invocation_log[0][0] == "geo_search"
assert tool_invocation_log[1][0] == "demographic_data"
assert tool_invocation_log[2][0] == "economic_data"
# Verify observations influence subsequent tool selection
assert "São Paulo" in result["iterations"][1]["act"]["args"]["query"]
assert "São Paulo" in result["iterations"][2]["act"]["args"]["query"]
# Verify final answer synthesizes all gathered information
assert result["final_answer"] is not None
assert "São Paulo" in result["final_answer"]["answer"]
assert "12.4 million" in result["final_answer"]["answer"]
assert "aircraft" in result["final_answer"]["answer"]
assert "vehicles" in result["final_answer"]["answer"]
def test_error_handling_in_react_cycle(self):
"""Test error handling during ReAct execution"""
# Arrange