Increase agent ReACT tool coverage (#460)

* Extra multi-step tool invocation * Multi-step reasoning test
2026-04-26 17:06:22 +02:00 · 2025-08-21 00:36:45 +01:00 · 2025-08-21 00:36:45 +01:00 · 54948e567f
commit 54948e567f
parent 1adcbc3a3a
1 changed files with 337 additions and 0 deletions
--- a/tests/unit/test_agent/test_react_processor.py
+++ b/tests/unit/test_agent/test_react_processor.py
@ -305,6 +305,343 @@ Answer: The capital of France is Paris."""
        assert reasoning_plan[1]["action"] == "find_population"
        assert all("step" in step for step in reasoning_plan)

+    def test_multi_iteration_react_execution(self):
+        """Test complete multi-iteration ReACT cycle with sequential tool invocations
+        
+        This test simulates a complex query that requires:
+        1. Tool #1: Search for initial information
+        2. Tool #2: Analyze/refine based on Tool #1's output  
+        3. Tool #3: Generate final answer using accumulated context
+        
+        Each iteration includes Think -> Act -> Observe phases with
+        observations feeding into subsequent thinking phases.
+        """
+        # Arrange
+        question = "Find the GDP of the capital of Japan and compare it to Tokyo's population"
+        
+        # Mock tools that build on each other's outputs
+        tool_invocation_log = []
+        
+        def mock_geo_search(query):
+            """Tool 1: Geographic information search"""
+            tool_invocation_log.append(("geo_search", query))
+            if "capital" in query.lower() and "japan" in query.lower():
+                return {"city": "Tokyo", "country": "Japan", "is_capital": True}
+            return {"error": "Location not found"}
+        
+        def mock_economic_data(query, context=None):
+            """Tool 2: Economic data retrieval (uses context from Tool 1)"""
+            tool_invocation_log.append(("economic_data", query, context))
+            if context and context.get("city") == "Tokyo":
+                return {"city": "Tokyo", "gdp_trillion_yen": 115.7, "year": 2023}
+            return {"error": "Economic data not available"}
+        
+        def mock_demographic_data(query, context=None):
+            """Tool 3: Demographic data and comparison (uses context from Tools 1 & 2)"""
+            tool_invocation_log.append(("demographic_data", query, context))
+            if context and context.get("city") == "Tokyo":
+                population_millions = 14.0
+                gdp_from_context = context.get("gdp_trillion_yen", 0)
+                return {
+                    "city": "Tokyo",
+                    "population_millions": population_millions,
+                    "gdp_trillion_yen": gdp_from_context,
+                    "gdp_per_capita_million_yen": round(gdp_from_context / population_millions, 2) if population_millions > 0 else 0
+                }
+            return {"error": "Demographic data not available"}
+        
+        # Execute multi-iteration ReACT cycle
+        def execute_multi_iteration_react(question, tools):
+            """Execute a complete multi-iteration ReACT cycle"""
+            iterations = []
+            context = {}
+            
+            # Iteration 1: Initial geographic search
+            iteration_1 = {
+                "iteration": 1,
+                "think": "I need to first identify the capital of Japan to get its GDP",
+                "act": {"tool": "geo_search", "query": "capital of Japan"},
+                "observe": None
+            }
+            result_1 = tools["geo_search"](iteration_1["act"]["query"])
+            iteration_1["observe"] = f"Found that {result_1['city']} is the capital of {result_1['country']}"
+            context.update(result_1)
+            iterations.append(iteration_1)
+            
+            # Iteration 2: Get economic data using context from iteration 1
+            iteration_2 = {
+                "iteration": 2,
+                "think": f"Now I know {context['city']} is the capital. I need to get its GDP data",
+                "act": {"tool": "economic_data", "query": f"GDP of {context['city']}"},
+                "observe": None
+            }
+            result_2 = tools["economic_data"](iteration_2["act"]["query"], context)
+            iteration_2["observe"] = f"Retrieved GDP data: {result_2['gdp_trillion_yen']} trillion yen for {result_2['year']}"
+            context.update(result_2)
+            iterations.append(iteration_2)
+            
+            # Iteration 3: Get demographic data and compare using accumulated context
+            iteration_3 = {
+                "iteration": 3,
+                "think": f"I have the GDP ({context['gdp_trillion_yen']} trillion yen). Now I need population data to compare",
+                "act": {"tool": "demographic_data", "query": f"population of {context['city']}"},
+                "observe": None
+            }
+            result_3 = tools["demographic_data"](iteration_3["act"]["query"], context)
+            iteration_3["observe"] = f"Population is {result_3['population_millions']} million. GDP per capita is {result_3['gdp_per_capita_million_yen']} million yen"
+            context.update(result_3)
+            iterations.append(iteration_3)
+            
+            # Final answer synthesis
+            final_answer = {
+                "think": "I now have all the information needed to answer the question",
+                "answer": f"Tokyo, the capital of Japan, has a GDP of {context['gdp_trillion_yen']} trillion yen and a population of {context['population_millions']} million people, resulting in a GDP per capita of {context['gdp_per_capita_million_yen']} million yen."
+            }
+            
+            return {
+                "iterations": iterations,
+                "final_answer": final_answer,
+                "context": context,
+                "tool_invocations": len(tool_invocation_log)
+            }
+        
+        tools = {
+            "geo_search": mock_geo_search,
+            "economic_data": mock_economic_data,
+            "demographic_data": mock_demographic_data
+        }
+        
+        # Act
+        result = execute_multi_iteration_react(question, tools)
+        
+        # Assert - Verify complete multi-iteration execution
+        assert len(result["iterations"]) == 3, "Should have exactly 3 iterations"
+        
+        # Verify each iteration has complete Think-Act-Observe cycle
+        for i, iteration in enumerate(result["iterations"], 1):
+            assert iteration["iteration"] == i
+            assert "think" in iteration and len(iteration["think"]) > 0
+            assert "act" in iteration and "tool" in iteration["act"]
+            assert "observe" in iteration and iteration["observe"] is not None
+        
+        # Verify sequential tool invocations
+        assert tool_invocation_log[0][0] == "geo_search"
+        assert tool_invocation_log[1][0] == "economic_data"
+        assert tool_invocation_log[2][0] == "demographic_data"
+        
+        # Verify context accumulation across iterations
+        assert "Tokyo" in tool_invocation_log[1][1], "Iteration 2 should use data from iteration 1"
+        assert tool_invocation_log[2][2].get("gdp_trillion_yen") == 115.7, "Iteration 3 should have accumulated GDP data"
+        
+        # Verify observations feed into subsequent thinking
+        assert "Tokyo" in result["iterations"][1]["think"], "Iteration 2 thinking should reference observation from iteration 1"
+        assert "115.7" in result["iterations"][2]["think"], "Iteration 3 thinking should reference GDP from iteration 2"
+        
+        # Verify final answer synthesis
+        assert "Tokyo" in result["final_answer"]["answer"]
+        assert "115.7" in result["final_answer"]["answer"]
+        assert "14.0" in result["final_answer"]["answer"]
+        assert "8.26" in result["final_answer"]["answer"], "Should include calculated GDP per capita"
+        
+        # Verify all 3 tools were invoked in sequence
+        assert result["tool_invocations"] == 3
+
+    def test_multi_iteration_with_dynamic_tool_selection(self):
+        """Test multi-iteration ReACT with mocked LLM reasoning dynamically selecting tools
+        
+        This test simulates how an LLM would dynamically choose tools based on:
+        1. The original question
+        2. Previous observations
+        3. Accumulated context
+        
+        The mocked LLM reasoning adapts its tool selection based on what it has learned
+        in previous iterations, mimicking real agent behavior.
+        """
+        # Arrange
+        question = "What are the main exports of the largest city in Brazil by population?"
+        
+        # Track reasoning and tool selection
+        reasoning_log = []
+        tool_invocation_log = []
+        
+        def mock_llm_reasoning(question, history, available_tools):
+            """Mock LLM that reasons about tool selection based on context"""
+            # Analyze what we know from history
+            context = {}
+            for step in history:
+                if "observation" in step:
+                    # Extract information from observations
+                    obs = step["observation"]
+                    if "São Paulo" in obs:
+                        context["city"] = "São Paulo"
+                    if "largest city" in obs:
+                        context["is_largest"] = True
+                    if "million" in obs and "population" in obs:
+                        context["has_population"] = True
+                    if "exports" in obs:
+                        context["has_exports"] = True
+            
+            # Decide next action based on what we know
+            if not context.get("city"):
+                # Step 1: Need to find the largest city
+                reasoning = "I need to find the largest city in Brazil by population"
+                tool = "geo_search"
+                args = {"query": "largest city Brazil population"}
+            elif not context.get("has_population"):
+                # Step 2: Confirm population data
+                reasoning = f"I found {context['city']}. Now I need to verify it's the largest by checking population"
+                tool = "demographic_data"
+                args = {"query": f"population {context['city']} Brazil"}
+            elif not context.get("has_exports"):
+                # Step 3: Get export information
+                reasoning = f"Confirmed {context['city']} is the largest. Now I need export information"
+                tool = "economic_data"
+                args = {"query": f"main exports {context['city']} Brazil"}
+            else:
+                # Final: Have all information
+                reasoning = "I have all the information needed to answer"
+                tool = "final_answer"
+                args = None
+            
+            reasoning_log.append({"reasoning": reasoning, "tool": tool, "context": context.copy()})
+            return reasoning, tool, args
+        
+        def mock_geo_search(query):
+            """Mock geographic search tool"""
+            tool_invocation_log.append(("geo_search", query))
+            if "largest city brazil" in query.lower():
+                return {
+                    "result": "São Paulo is the largest city in Brazil",
+                    "details": {"city": "São Paulo", "country": "Brazil", "rank": 1}
+                }
+            return {"error": "No results found"}
+        
+        def mock_demographic_data(query):
+            """Mock demographic data tool"""
+            tool_invocation_log.append(("demographic_data", query))
+            if "são paulo" in query.lower():
+                return {
+                    "result": "São Paulo has a population of 12.4 million in the city proper, 22.8 million in the metro area",
+                    "details": {"city_population": 12.4, "metro_population": 22.8, "unit": "million"}
+                }
+            return {"error": "No demographic data found"}
+        
+        def mock_economic_data(query):
+            """Mock economic data tool"""
+            tool_invocation_log.append(("economic_data", query))
+            if "são paulo" in query.lower() and "export" in query.lower():
+                return {
+                    "result": "São Paulo's main exports include aircraft, vehicles, machinery, coffee, and soybeans",
+                    "details": {
+                        "top_exports": ["aircraft", "vehicles", "machinery", "coffee", "soybeans"],
+                        "export_value_billions_usd": 65.2
+                    }
+                }
+            return {"error": "No economic data found"}
+        
+        # Execute multi-iteration ReACT with dynamic tool selection
+        def execute_dynamic_react(question, tools, llm_reasoner):
+            """Execute ReACT with dynamic LLM-based tool selection"""
+            iterations = []
+            history = []
+            available_tools = list(tools.keys())
+            
+            max_iterations = 4
+            for i in range(max_iterations):
+                # LLM reasons about next action
+                reasoning, tool_name, args = llm_reasoner(question, history, available_tools)
+                
+                if tool_name == "final_answer":
+                    # Agent has decided it has enough information
+                    final_answer = {
+                        "reasoning": reasoning,
+                        "answer": "São Paulo, Brazil's largest city with 12.4 million people, " +
+                                "has main exports including aircraft, vehicles, machinery, coffee, and soybeans."
+                    }
+                    break
+                
+                # Execute selected tool
+                iteration = {
+                    "iteration": i + 1,
+                    "think": reasoning,
+                    "act": {"tool": tool_name, "args": args},
+                    "observe": None
+                }
+                
+                # Get tool result
+                if tool_name in tools:
+                    result = tools[tool_name](args["query"])
+                    iteration["observe"] = result.get("result", "No information found")
+                else:
+                    iteration["observe"] = f"Tool {tool_name} not available"
+                
+                iterations.append(iteration)
+                
+                # Add to history for next iteration
+                history.append({
+                    "thought": reasoning,
+                    "action": tool_name,
+                    "args": args,
+                    "observation": iteration["observe"]
+                })
+            
+            return {
+                "iterations": iterations,
+                "final_answer": final_answer if 'final_answer' in locals() else None,
+                "reasoning_log": reasoning_log,
+                "tool_invocations": len(tool_invocation_log)
+            }
+        
+        tools = {
+            "geo_search": mock_geo_search,
+            "demographic_data": mock_demographic_data,
+            "economic_data": mock_economic_data
+        }
+        
+        # Act
+        result = execute_dynamic_react(question, tools, mock_llm_reasoning)
+        
+        # Assert - Verify dynamic multi-iteration execution
+        assert len(result["iterations"]) == 3, "Should have 3 iterations before final answer"
+        
+        # Verify reasoning adapts based on observations
+        assert len(reasoning_log) == 4, "Should have 4 reasoning steps (3 tools + final)"
+        
+        # Verify first iteration searches for largest city
+        assert reasoning_log[0]["tool"] == "geo_search"
+        assert "largest city" in reasoning_log[0]["reasoning"].lower()
+        assert not reasoning_log[0]["context"].get("city")
+        
+        # Verify second iteration uses city name from first observation
+        assert reasoning_log[1]["tool"] == "demographic_data"
+        assert "São Paulo" in reasoning_log[1]["reasoning"]
+        assert reasoning_log[1]["context"]["city"] == "São Paulo"
+        
+        # Verify third iteration builds on previous knowledge
+        assert reasoning_log[2]["tool"] == "economic_data"
+        assert "export" in reasoning_log[2]["reasoning"].lower()
+        assert reasoning_log[2]["context"]["has_population"] is True
+        
+        # Verify final reasoning has all information
+        assert reasoning_log[3]["tool"] == "final_answer"
+        assert reasoning_log[3]["context"]["has_exports"] is True
+        
+        # Verify tool invocation sequence
+        assert tool_invocation_log[0][0] == "geo_search"
+        assert tool_invocation_log[1][0] == "demographic_data"
+        assert tool_invocation_log[2][0] == "economic_data"
+        
+        # Verify observations influence subsequent tool selection
+        assert "São Paulo" in result["iterations"][1]["act"]["args"]["query"]
+        assert "São Paulo" in result["iterations"][2]["act"]["args"]["query"]
+        
+        # Verify final answer synthesizes all gathered information
+        assert result["final_answer"] is not None
+        assert "São Paulo" in result["final_answer"]["answer"]
+        assert "12.4 million" in result["final_answer"]["answer"]
+        assert "aircraft" in result["final_answer"]["answer"]
+        assert "vehicles" in result["final_answer"]["answer"]
+
    def test_error_handling_in_react_cycle(self):
        """Test error handling during ReAct execution"""
        # Arrange