mirror of
https://github.com/trustgraph-ai/trustgraph.git
synced 2026-04-25 00:16:23 +02:00
Increase agent ReACT tool coverage (#460)
* Extra multi-step tool invocation * Multi-step reasoning test
This commit is contained in:
parent
1adcbc3a3a
commit
54948e567f
1 changed files with 337 additions and 0 deletions
|
|
@ -305,6 +305,343 @@ Answer: The capital of France is Paris."""
|
|||
assert reasoning_plan[1]["action"] == "find_population"
|
||||
assert all("step" in step for step in reasoning_plan)
|
||||
|
||||
def test_multi_iteration_react_execution(self):
|
||||
"""Test complete multi-iteration ReACT cycle with sequential tool invocations
|
||||
|
||||
This test simulates a complex query that requires:
|
||||
1. Tool #1: Search for initial information
|
||||
2. Tool #2: Analyze/refine based on Tool #1's output
|
||||
3. Tool #3: Generate final answer using accumulated context
|
||||
|
||||
Each iteration includes Think -> Act -> Observe phases with
|
||||
observations feeding into subsequent thinking phases.
|
||||
"""
|
||||
# Arrange
|
||||
question = "Find the GDP of the capital of Japan and compare it to Tokyo's population"
|
||||
|
||||
# Mock tools that build on each other's outputs
|
||||
tool_invocation_log = []
|
||||
|
||||
def mock_geo_search(query):
|
||||
"""Tool 1: Geographic information search"""
|
||||
tool_invocation_log.append(("geo_search", query))
|
||||
if "capital" in query.lower() and "japan" in query.lower():
|
||||
return {"city": "Tokyo", "country": "Japan", "is_capital": True}
|
||||
return {"error": "Location not found"}
|
||||
|
||||
def mock_economic_data(query, context=None):
|
||||
"""Tool 2: Economic data retrieval (uses context from Tool 1)"""
|
||||
tool_invocation_log.append(("economic_data", query, context))
|
||||
if context and context.get("city") == "Tokyo":
|
||||
return {"city": "Tokyo", "gdp_trillion_yen": 115.7, "year": 2023}
|
||||
return {"error": "Economic data not available"}
|
||||
|
||||
def mock_demographic_data(query, context=None):
|
||||
"""Tool 3: Demographic data and comparison (uses context from Tools 1 & 2)"""
|
||||
tool_invocation_log.append(("demographic_data", query, context))
|
||||
if context and context.get("city") == "Tokyo":
|
||||
population_millions = 14.0
|
||||
gdp_from_context = context.get("gdp_trillion_yen", 0)
|
||||
return {
|
||||
"city": "Tokyo",
|
||||
"population_millions": population_millions,
|
||||
"gdp_trillion_yen": gdp_from_context,
|
||||
"gdp_per_capita_million_yen": round(gdp_from_context / population_millions, 2) if population_millions > 0 else 0
|
||||
}
|
||||
return {"error": "Demographic data not available"}
|
||||
|
||||
# Execute multi-iteration ReACT cycle
|
||||
def execute_multi_iteration_react(question, tools):
|
||||
"""Execute a complete multi-iteration ReACT cycle"""
|
||||
iterations = []
|
||||
context = {}
|
||||
|
||||
# Iteration 1: Initial geographic search
|
||||
iteration_1 = {
|
||||
"iteration": 1,
|
||||
"think": "I need to first identify the capital of Japan to get its GDP",
|
||||
"act": {"tool": "geo_search", "query": "capital of Japan"},
|
||||
"observe": None
|
||||
}
|
||||
result_1 = tools["geo_search"](iteration_1["act"]["query"])
|
||||
iteration_1["observe"] = f"Found that {result_1['city']} is the capital of {result_1['country']}"
|
||||
context.update(result_1)
|
||||
iterations.append(iteration_1)
|
||||
|
||||
# Iteration 2: Get economic data using context from iteration 1
|
||||
iteration_2 = {
|
||||
"iteration": 2,
|
||||
"think": f"Now I know {context['city']} is the capital. I need to get its GDP data",
|
||||
"act": {"tool": "economic_data", "query": f"GDP of {context['city']}"},
|
||||
"observe": None
|
||||
}
|
||||
result_2 = tools["economic_data"](iteration_2["act"]["query"], context)
|
||||
iteration_2["observe"] = f"Retrieved GDP data: {result_2['gdp_trillion_yen']} trillion yen for {result_2['year']}"
|
||||
context.update(result_2)
|
||||
iterations.append(iteration_2)
|
||||
|
||||
# Iteration 3: Get demographic data and compare using accumulated context
|
||||
iteration_3 = {
|
||||
"iteration": 3,
|
||||
"think": f"I have the GDP ({context['gdp_trillion_yen']} trillion yen). Now I need population data to compare",
|
||||
"act": {"tool": "demographic_data", "query": f"population of {context['city']}"},
|
||||
"observe": None
|
||||
}
|
||||
result_3 = tools["demographic_data"](iteration_3["act"]["query"], context)
|
||||
iteration_3["observe"] = f"Population is {result_3['population_millions']} million. GDP per capita is {result_3['gdp_per_capita_million_yen']} million yen"
|
||||
context.update(result_3)
|
||||
iterations.append(iteration_3)
|
||||
|
||||
# Final answer synthesis
|
||||
final_answer = {
|
||||
"think": "I now have all the information needed to answer the question",
|
||||
"answer": f"Tokyo, the capital of Japan, has a GDP of {context['gdp_trillion_yen']} trillion yen and a population of {context['population_millions']} million people, resulting in a GDP per capita of {context['gdp_per_capita_million_yen']} million yen."
|
||||
}
|
||||
|
||||
return {
|
||||
"iterations": iterations,
|
||||
"final_answer": final_answer,
|
||||
"context": context,
|
||||
"tool_invocations": len(tool_invocation_log)
|
||||
}
|
||||
|
||||
tools = {
|
||||
"geo_search": mock_geo_search,
|
||||
"economic_data": mock_economic_data,
|
||||
"demographic_data": mock_demographic_data
|
||||
}
|
||||
|
||||
# Act
|
||||
result = execute_multi_iteration_react(question, tools)
|
||||
|
||||
# Assert - Verify complete multi-iteration execution
|
||||
assert len(result["iterations"]) == 3, "Should have exactly 3 iterations"
|
||||
|
||||
# Verify each iteration has complete Think-Act-Observe cycle
|
||||
for i, iteration in enumerate(result["iterations"], 1):
|
||||
assert iteration["iteration"] == i
|
||||
assert "think" in iteration and len(iteration["think"]) > 0
|
||||
assert "act" in iteration and "tool" in iteration["act"]
|
||||
assert "observe" in iteration and iteration["observe"] is not None
|
||||
|
||||
# Verify sequential tool invocations
|
||||
assert tool_invocation_log[0][0] == "geo_search"
|
||||
assert tool_invocation_log[1][0] == "economic_data"
|
||||
assert tool_invocation_log[2][0] == "demographic_data"
|
||||
|
||||
# Verify context accumulation across iterations
|
||||
assert "Tokyo" in tool_invocation_log[1][1], "Iteration 2 should use data from iteration 1"
|
||||
assert tool_invocation_log[2][2].get("gdp_trillion_yen") == 115.7, "Iteration 3 should have accumulated GDP data"
|
||||
|
||||
# Verify observations feed into subsequent thinking
|
||||
assert "Tokyo" in result["iterations"][1]["think"], "Iteration 2 thinking should reference observation from iteration 1"
|
||||
assert "115.7" in result["iterations"][2]["think"], "Iteration 3 thinking should reference GDP from iteration 2"
|
||||
|
||||
# Verify final answer synthesis
|
||||
assert "Tokyo" in result["final_answer"]["answer"]
|
||||
assert "115.7" in result["final_answer"]["answer"]
|
||||
assert "14.0" in result["final_answer"]["answer"]
|
||||
assert "8.26" in result["final_answer"]["answer"], "Should include calculated GDP per capita"
|
||||
|
||||
# Verify all 3 tools were invoked in sequence
|
||||
assert result["tool_invocations"] == 3
|
||||
|
||||
def test_multi_iteration_with_dynamic_tool_selection(self):
|
||||
"""Test multi-iteration ReACT with mocked LLM reasoning dynamically selecting tools
|
||||
|
||||
This test simulates how an LLM would dynamically choose tools based on:
|
||||
1. The original question
|
||||
2. Previous observations
|
||||
3. Accumulated context
|
||||
|
||||
The mocked LLM reasoning adapts its tool selection based on what it has learned
|
||||
in previous iterations, mimicking real agent behavior.
|
||||
"""
|
||||
# Arrange
|
||||
question = "What are the main exports of the largest city in Brazil by population?"
|
||||
|
||||
# Track reasoning and tool selection
|
||||
reasoning_log = []
|
||||
tool_invocation_log = []
|
||||
|
||||
def mock_llm_reasoning(question, history, available_tools):
|
||||
"""Mock LLM that reasons about tool selection based on context"""
|
||||
# Analyze what we know from history
|
||||
context = {}
|
||||
for step in history:
|
||||
if "observation" in step:
|
||||
# Extract information from observations
|
||||
obs = step["observation"]
|
||||
if "São Paulo" in obs:
|
||||
context["city"] = "São Paulo"
|
||||
if "largest city" in obs:
|
||||
context["is_largest"] = True
|
||||
if "million" in obs and "population" in obs:
|
||||
context["has_population"] = True
|
||||
if "exports" in obs:
|
||||
context["has_exports"] = True
|
||||
|
||||
# Decide next action based on what we know
|
||||
if not context.get("city"):
|
||||
# Step 1: Need to find the largest city
|
||||
reasoning = "I need to find the largest city in Brazil by population"
|
||||
tool = "geo_search"
|
||||
args = {"query": "largest city Brazil population"}
|
||||
elif not context.get("has_population"):
|
||||
# Step 2: Confirm population data
|
||||
reasoning = f"I found {context['city']}. Now I need to verify it's the largest by checking population"
|
||||
tool = "demographic_data"
|
||||
args = {"query": f"population {context['city']} Brazil"}
|
||||
elif not context.get("has_exports"):
|
||||
# Step 3: Get export information
|
||||
reasoning = f"Confirmed {context['city']} is the largest. Now I need export information"
|
||||
tool = "economic_data"
|
||||
args = {"query": f"main exports {context['city']} Brazil"}
|
||||
else:
|
||||
# Final: Have all information
|
||||
reasoning = "I have all the information needed to answer"
|
||||
tool = "final_answer"
|
||||
args = None
|
||||
|
||||
reasoning_log.append({"reasoning": reasoning, "tool": tool, "context": context.copy()})
|
||||
return reasoning, tool, args
|
||||
|
||||
def mock_geo_search(query):
|
||||
"""Mock geographic search tool"""
|
||||
tool_invocation_log.append(("geo_search", query))
|
||||
if "largest city brazil" in query.lower():
|
||||
return {
|
||||
"result": "São Paulo is the largest city in Brazil",
|
||||
"details": {"city": "São Paulo", "country": "Brazil", "rank": 1}
|
||||
}
|
||||
return {"error": "No results found"}
|
||||
|
||||
def mock_demographic_data(query):
|
||||
"""Mock demographic data tool"""
|
||||
tool_invocation_log.append(("demographic_data", query))
|
||||
if "são paulo" in query.lower():
|
||||
return {
|
||||
"result": "São Paulo has a population of 12.4 million in the city proper, 22.8 million in the metro area",
|
||||
"details": {"city_population": 12.4, "metro_population": 22.8, "unit": "million"}
|
||||
}
|
||||
return {"error": "No demographic data found"}
|
||||
|
||||
def mock_economic_data(query):
|
||||
"""Mock economic data tool"""
|
||||
tool_invocation_log.append(("economic_data", query))
|
||||
if "são paulo" in query.lower() and "export" in query.lower():
|
||||
return {
|
||||
"result": "São Paulo's main exports include aircraft, vehicles, machinery, coffee, and soybeans",
|
||||
"details": {
|
||||
"top_exports": ["aircraft", "vehicles", "machinery", "coffee", "soybeans"],
|
||||
"export_value_billions_usd": 65.2
|
||||
}
|
||||
}
|
||||
return {"error": "No economic data found"}
|
||||
|
||||
# Execute multi-iteration ReACT with dynamic tool selection
|
||||
def execute_dynamic_react(question, tools, llm_reasoner):
|
||||
"""Execute ReACT with dynamic LLM-based tool selection"""
|
||||
iterations = []
|
||||
history = []
|
||||
available_tools = list(tools.keys())
|
||||
|
||||
max_iterations = 4
|
||||
for i in range(max_iterations):
|
||||
# LLM reasons about next action
|
||||
reasoning, tool_name, args = llm_reasoner(question, history, available_tools)
|
||||
|
||||
if tool_name == "final_answer":
|
||||
# Agent has decided it has enough information
|
||||
final_answer = {
|
||||
"reasoning": reasoning,
|
||||
"answer": "São Paulo, Brazil's largest city with 12.4 million people, " +
|
||||
"has main exports including aircraft, vehicles, machinery, coffee, and soybeans."
|
||||
}
|
||||
break
|
||||
|
||||
# Execute selected tool
|
||||
iteration = {
|
||||
"iteration": i + 1,
|
||||
"think": reasoning,
|
||||
"act": {"tool": tool_name, "args": args},
|
||||
"observe": None
|
||||
}
|
||||
|
||||
# Get tool result
|
||||
if tool_name in tools:
|
||||
result = tools[tool_name](args["query"])
|
||||
iteration["observe"] = result.get("result", "No information found")
|
||||
else:
|
||||
iteration["observe"] = f"Tool {tool_name} not available"
|
||||
|
||||
iterations.append(iteration)
|
||||
|
||||
# Add to history for next iteration
|
||||
history.append({
|
||||
"thought": reasoning,
|
||||
"action": tool_name,
|
||||
"args": args,
|
||||
"observation": iteration["observe"]
|
||||
})
|
||||
|
||||
return {
|
||||
"iterations": iterations,
|
||||
"final_answer": final_answer if 'final_answer' in locals() else None,
|
||||
"reasoning_log": reasoning_log,
|
||||
"tool_invocations": len(tool_invocation_log)
|
||||
}
|
||||
|
||||
tools = {
|
||||
"geo_search": mock_geo_search,
|
||||
"demographic_data": mock_demographic_data,
|
||||
"economic_data": mock_economic_data
|
||||
}
|
||||
|
||||
# Act
|
||||
result = execute_dynamic_react(question, tools, mock_llm_reasoning)
|
||||
|
||||
# Assert - Verify dynamic multi-iteration execution
|
||||
assert len(result["iterations"]) == 3, "Should have 3 iterations before final answer"
|
||||
|
||||
# Verify reasoning adapts based on observations
|
||||
assert len(reasoning_log) == 4, "Should have 4 reasoning steps (3 tools + final)"
|
||||
|
||||
# Verify first iteration searches for largest city
|
||||
assert reasoning_log[0]["tool"] == "geo_search"
|
||||
assert "largest city" in reasoning_log[0]["reasoning"].lower()
|
||||
assert not reasoning_log[0]["context"].get("city")
|
||||
|
||||
# Verify second iteration uses city name from first observation
|
||||
assert reasoning_log[1]["tool"] == "demographic_data"
|
||||
assert "São Paulo" in reasoning_log[1]["reasoning"]
|
||||
assert reasoning_log[1]["context"]["city"] == "São Paulo"
|
||||
|
||||
# Verify third iteration builds on previous knowledge
|
||||
assert reasoning_log[2]["tool"] == "economic_data"
|
||||
assert "export" in reasoning_log[2]["reasoning"].lower()
|
||||
assert reasoning_log[2]["context"]["has_population"] is True
|
||||
|
||||
# Verify final reasoning has all information
|
||||
assert reasoning_log[3]["tool"] == "final_answer"
|
||||
assert reasoning_log[3]["context"]["has_exports"] is True
|
||||
|
||||
# Verify tool invocation sequence
|
||||
assert tool_invocation_log[0][0] == "geo_search"
|
||||
assert tool_invocation_log[1][0] == "demographic_data"
|
||||
assert tool_invocation_log[2][0] == "economic_data"
|
||||
|
||||
# Verify observations influence subsequent tool selection
|
||||
assert "São Paulo" in result["iterations"][1]["act"]["args"]["query"]
|
||||
assert "São Paulo" in result["iterations"][2]["act"]["args"]["query"]
|
||||
|
||||
# Verify final answer synthesizes all gathered information
|
||||
assert result["final_answer"] is not None
|
||||
assert "São Paulo" in result["final_answer"]["answer"]
|
||||
assert "12.4 million" in result["final_answer"]["answer"]
|
||||
assert "aircraft" in result["final_answer"]["answer"]
|
||||
assert "vehicles" in result["final_answer"]["answer"]
|
||||
|
||||
def test_error_handling_in_react_cycle(self):
|
||||
"""Test error handling during ReAct execution"""
|
||||
# Arrange
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue