Merge branch 'release/v1.2'

2026-06-12 00:05:13 +02:00 · 2025-08-26 19:18:01 +01:00 · 2025-08-26 19:18:01 +01:00 · 0bff629f87
commit 0bff629f87
parent f78bd64b8d 6e9e2a11b1
28 changed files with 3881 additions and 111 deletions
--- a/tests/unit/test_agent/test_react_processor.py
+++ b/tests/unit/test_agent/test_react_processor.py
@ -305,6 +305,639 @@ Answer: The capital of France is Paris."""
        assert reasoning_plan[1]["action"] == "find_population"
        assert all("step" in step for step in reasoning_plan)

+    def test_multi_iteration_react_execution(self):
+        """Test complete multi-iteration ReACT cycle with sequential tool invocations
+        
+        This test simulates a complex query that requires:
+        1. Tool #1: Search for initial information
+        2. Tool #2: Analyze/refine based on Tool #1's output  
+        3. Tool #3: Generate final answer using accumulated context
+        
+        Each iteration includes Think -> Act -> Observe phases with
+        observations feeding into subsequent thinking phases.
+        """
+        # Arrange
+        question = "Find the GDP of the capital of Japan and compare it to Tokyo's population"
+        
+        # Mock tools that build on each other's outputs
+        tool_invocation_log = []
+        
+        def mock_geo_search(query):
+            """Tool 1: Geographic information search"""
+            tool_invocation_log.append(("geo_search", query))
+            if "capital" in query.lower() and "japan" in query.lower():
+                return {"city": "Tokyo", "country": "Japan", "is_capital": True}
+            return {"error": "Location not found"}
+        
+        def mock_economic_data(query, context=None):
+            """Tool 2: Economic data retrieval (uses context from Tool 1)"""
+            tool_invocation_log.append(("economic_data", query, context))
+            if context and context.get("city") == "Tokyo":
+                return {"city": "Tokyo", "gdp_trillion_yen": 115.7, "year": 2023}
+            return {"error": "Economic data not available"}
+        
+        def mock_demographic_data(query, context=None):
+            """Tool 3: Demographic data and comparison (uses context from Tools 1 & 2)"""
+            tool_invocation_log.append(("demographic_data", query, context))
+            if context and context.get("city") == "Tokyo":
+                population_millions = 14.0
+                gdp_from_context = context.get("gdp_trillion_yen", 0)
+                return {
+                    "city": "Tokyo",
+                    "population_millions": population_millions,
+                    "gdp_trillion_yen": gdp_from_context,
+                    "gdp_per_capita_million_yen": round(gdp_from_context / population_millions, 2) if population_millions > 0 else 0
+                }
+            return {"error": "Demographic data not available"}
+        
+        # Execute multi-iteration ReACT cycle
+        def execute_multi_iteration_react(question, tools):
+            """Execute a complete multi-iteration ReACT cycle"""
+            iterations = []
+            context = {}
+            
+            # Iteration 1: Initial geographic search
+            iteration_1 = {
+                "iteration": 1,
+                "think": "I need to first identify the capital of Japan to get its GDP",
+                "act": {"tool": "geo_search", "query": "capital of Japan"},
+                "observe": None
+            }
+            result_1 = tools["geo_search"](iteration_1["act"]["query"])
+            iteration_1["observe"] = f"Found that {result_1['city']} is the capital of {result_1['country']}"
+            context.update(result_1)
+            iterations.append(iteration_1)
+            
+            # Iteration 2: Get economic data using context from iteration 1
+            iteration_2 = {
+                "iteration": 2,
+                "think": f"Now I know {context['city']} is the capital. I need to get its GDP data",
+                "act": {"tool": "economic_data", "query": f"GDP of {context['city']}"},
+                "observe": None
+            }
+            result_2 = tools["economic_data"](iteration_2["act"]["query"], context)
+            iteration_2["observe"] = f"Retrieved GDP data: {result_2['gdp_trillion_yen']} trillion yen for {result_2['year']}"
+            context.update(result_2)
+            iterations.append(iteration_2)
+            
+            # Iteration 3: Get demographic data and compare using accumulated context
+            iteration_3 = {
+                "iteration": 3,
+                "think": f"I have the GDP ({context['gdp_trillion_yen']} trillion yen). Now I need population data to compare",
+                "act": {"tool": "demographic_data", "query": f"population of {context['city']}"},
+                "observe": None
+            }
+            result_3 = tools["demographic_data"](iteration_3["act"]["query"], context)
+            iteration_3["observe"] = f"Population is {result_3['population_millions']} million. GDP per capita is {result_3['gdp_per_capita_million_yen']} million yen"
+            context.update(result_3)
+            iterations.append(iteration_3)
+            
+            # Final answer synthesis
+            final_answer = {
+                "think": "I now have all the information needed to answer the question",
+                "answer": f"Tokyo, the capital of Japan, has a GDP of {context['gdp_trillion_yen']} trillion yen and a population of {context['population_millions']} million people, resulting in a GDP per capita of {context['gdp_per_capita_million_yen']} million yen."
+            }
+            
+            return {
+                "iterations": iterations,
+                "final_answer": final_answer,
+                "context": context,
+                "tool_invocations": len(tool_invocation_log)
+            }
+        
+        tools = {
+            "geo_search": mock_geo_search,
+            "economic_data": mock_economic_data,
+            "demographic_data": mock_demographic_data
+        }
+        
+        # Act
+        result = execute_multi_iteration_react(question, tools)
+        
+        # Assert - Verify complete multi-iteration execution
+        assert len(result["iterations"]) == 3, "Should have exactly 3 iterations"
+        
+        # Verify each iteration has complete Think-Act-Observe cycle
+        for i, iteration in enumerate(result["iterations"], 1):
+            assert iteration["iteration"] == i
+            assert "think" in iteration and len(iteration["think"]) > 0
+            assert "act" in iteration and "tool" in iteration["act"]
+            assert "observe" in iteration and iteration["observe"] is not None
+        
+        # Verify sequential tool invocations
+        assert tool_invocation_log[0][0] == "geo_search"
+        assert tool_invocation_log[1][0] == "economic_data"
+        assert tool_invocation_log[2][0] == "demographic_data"
+        
+        # Verify context accumulation across iterations
+        assert "Tokyo" in tool_invocation_log[1][1], "Iteration 2 should use data from iteration 1"
+        assert tool_invocation_log[2][2].get("gdp_trillion_yen") == 115.7, "Iteration 3 should have accumulated GDP data"
+        
+        # Verify observations feed into subsequent thinking
+        assert "Tokyo" in result["iterations"][1]["think"], "Iteration 2 thinking should reference observation from iteration 1"
+        assert "115.7" in result["iterations"][2]["think"], "Iteration 3 thinking should reference GDP from iteration 2"
+        
+        # Verify final answer synthesis
+        assert "Tokyo" in result["final_answer"]["answer"]
+        assert "115.7" in result["final_answer"]["answer"]
+        assert "14.0" in result["final_answer"]["answer"]
+        assert "8.26" in result["final_answer"]["answer"], "Should include calculated GDP per capita"
+        
+        # Verify all 3 tools were invoked in sequence
+        assert result["tool_invocations"] == 3
+
+    def test_multi_iteration_with_dynamic_tool_selection(self):
+        """Test multi-iteration ReACT with mocked LLM reasoning dynamically selecting tools
+        
+        This test simulates how an LLM would dynamically choose tools based on:
+        1. The original question
+        2. Previous observations
+        3. Accumulated context
+        
+        The mocked LLM reasoning adapts its tool selection based on what it has learned
+        in previous iterations, mimicking real agent behavior.
+        """
+        # Arrange
+        question = "What are the main exports of the largest city in Brazil by population?"
+        
+        # Track reasoning and tool selection
+        reasoning_log = []
+        tool_invocation_log = []
+        
+        def mock_llm_reasoning(question, history, available_tools):
+            """Mock LLM that reasons about tool selection based on context"""
+            # Analyze what we know from history
+            context = {}
+            for step in history:
+                if "observation" in step:
+                    # Extract information from observations
+                    obs = step["observation"]
+                    if "São Paulo" in obs:
+                        context["city"] = "São Paulo"
+                    if "largest city" in obs:
+                        context["is_largest"] = True
+                    if "million" in obs and "population" in obs:
+                        context["has_population"] = True
+                    if "exports" in obs:
+                        context["has_exports"] = True
+            
+            # Decide next action based on what we know
+            if not context.get("city"):
+                # Step 1: Need to find the largest city
+                reasoning = "I need to find the largest city in Brazil by population"
+                tool = "geo_search"
+                args = {"query": "largest city Brazil population"}
+            elif not context.get("has_population"):
+                # Step 2: Confirm population data
+                reasoning = f"I found {context['city']}. Now I need to verify it's the largest by checking population"
+                tool = "demographic_data"
+                args = {"query": f"population {context['city']} Brazil"}
+            elif not context.get("has_exports"):
+                # Step 3: Get export information
+                reasoning = f"Confirmed {context['city']} is the largest. Now I need export information"
+                tool = "economic_data"
+                args = {"query": f"main exports {context['city']} Brazil"}
+            else:
+                # Final: Have all information
+                reasoning = "I have all the information needed to answer"
+                tool = "final_answer"
+                args = None
+            
+            reasoning_log.append({"reasoning": reasoning, "tool": tool, "context": context.copy()})
+            return reasoning, tool, args
+        
+        def mock_geo_search(query):
+            """Mock geographic search tool"""
+            tool_invocation_log.append(("geo_search", query))
+            if "largest city brazil" in query.lower():
+                return {
+                    "result": "São Paulo is the largest city in Brazil",
+                    "details": {"city": "São Paulo", "country": "Brazil", "rank": 1}
+                }
+            return {"error": "No results found"}
+        
+        def mock_demographic_data(query):
+            """Mock demographic data tool"""
+            tool_invocation_log.append(("demographic_data", query))
+            if "são paulo" in query.lower():
+                return {
+                    "result": "São Paulo has a population of 12.4 million in the city proper, 22.8 million in the metro area",
+                    "details": {"city_population": 12.4, "metro_population": 22.8, "unit": "million"}
+                }
+            return {"error": "No demographic data found"}
+        
+        def mock_economic_data(query):
+            """Mock economic data tool"""
+            tool_invocation_log.append(("economic_data", query))
+            if "são paulo" in query.lower() and "export" in query.lower():
+                return {
+                    "result": "São Paulo's main exports include aircraft, vehicles, machinery, coffee, and soybeans",
+                    "details": {
+                        "top_exports": ["aircraft", "vehicles", "machinery", "coffee", "soybeans"],
+                        "export_value_billions_usd": 65.2
+                    }
+                }
+            return {"error": "No economic data found"}
+        
+        # Execute multi-iteration ReACT with dynamic tool selection
+        def execute_dynamic_react(question, tools, llm_reasoner):
+            """Execute ReACT with dynamic LLM-based tool selection"""
+            iterations = []
+            history = []
+            available_tools = list(tools.keys())
+            
+            max_iterations = 4
+            for i in range(max_iterations):
+                # LLM reasons about next action
+                reasoning, tool_name, args = llm_reasoner(question, history, available_tools)
+                
+                if tool_name == "final_answer":
+                    # Agent has decided it has enough information
+                    final_answer = {
+                        "reasoning": reasoning,
+                        "answer": "São Paulo, Brazil's largest city with 12.4 million people, " +
+                                "has main exports including aircraft, vehicles, machinery, coffee, and soybeans."
+                    }
+                    break
+                
+                # Execute selected tool
+                iteration = {
+                    "iteration": i + 1,
+                    "think": reasoning,
+                    "act": {"tool": tool_name, "args": args},
+                    "observe": None
+                }
+                
+                # Get tool result
+                if tool_name in tools:
+                    result = tools[tool_name](args["query"])
+                    iteration["observe"] = result.get("result", "No information found")
+                else:
+                    iteration["observe"] = f"Tool {tool_name} not available"
+                
+                iterations.append(iteration)
+                
+                # Add to history for next iteration
+                history.append({
+                    "thought": reasoning,
+                    "action": tool_name,
+                    "args": args,
+                    "observation": iteration["observe"]
+                })
+            
+            return {
+                "iterations": iterations,
+                "final_answer": final_answer if 'final_answer' in locals() else None,
+                "reasoning_log": reasoning_log,
+                "tool_invocations": len(tool_invocation_log)
+            }
+        
+        tools = {
+            "geo_search": mock_geo_search,
+            "demographic_data": mock_demographic_data,
+            "economic_data": mock_economic_data
+        }
+        
+        # Act
+        result = execute_dynamic_react(question, tools, mock_llm_reasoning)
+        
+        # Assert - Verify dynamic multi-iteration execution
+        assert len(result["iterations"]) == 3, "Should have 3 iterations before final answer"
+        
+        # Verify reasoning adapts based on observations
+        assert len(reasoning_log) == 4, "Should have 4 reasoning steps (3 tools + final)"
+        
+        # Verify first iteration searches for largest city
+        assert reasoning_log[0]["tool"] == "geo_search"
+        assert "largest city" in reasoning_log[0]["reasoning"].lower()
+        assert not reasoning_log[0]["context"].get("city")
+        
+        # Verify second iteration uses city name from first observation
+        assert reasoning_log[1]["tool"] == "demographic_data"
+        assert "São Paulo" in reasoning_log[1]["reasoning"]
+        assert reasoning_log[1]["context"]["city"] == "São Paulo"
+        
+        # Verify third iteration builds on previous knowledge
+        assert reasoning_log[2]["tool"] == "economic_data"
+        assert "export" in reasoning_log[2]["reasoning"].lower()
+        assert reasoning_log[2]["context"]["has_population"] is True
+        
+        # Verify final reasoning has all information
+        assert reasoning_log[3]["tool"] == "final_answer"
+        assert reasoning_log[3]["context"]["has_exports"] is True
+        
+        # Verify tool invocation sequence
+        assert tool_invocation_log[0][0] == "geo_search"
+        assert tool_invocation_log[1][0] == "demographic_data"
+        assert tool_invocation_log[2][0] == "economic_data"
+        
+        # Verify observations influence subsequent tool selection
+        assert "São Paulo" in result["iterations"][1]["act"]["args"]["query"]
+        assert "São Paulo" in result["iterations"][2]["act"]["args"]["query"]
+        
+        # Verify final answer synthesizes all gathered information
+        assert result["final_answer"] is not None
+        assert "São Paulo" in result["final_answer"]["answer"]
+        assert "12.4 million" in result["final_answer"]["answer"]
+        assert "aircraft" in result["final_answer"]["answer"]
+        assert "vehicles" in result["final_answer"]["answer"]
+
+    def test_action_name_with_quotes_handling(self):
+        """Test that action names with quotes are properly stripped
+        
+        This test verifies the fix for when LLMs output action names wrapped
+        in quotes, e.g., Action: "get_bank_balance" instead of Action: get_bank_balance
+        """
+        # Arrange
+        def parse_react_output(text):
+            """Parse ReAct format output into structured steps"""
+            steps = []
+            lines = text.strip().split('\n')
+            
+            thought = None
+            action = None
+            args = None
+            
+            for line in lines:
+                line = line.strip()
+                if line.startswith('Think:') or line.startswith('Thought:'):
+                    thought = line.split(':', 1)[1].strip()
+                elif line.startswith('Action:'):
+                    action = line[7:].strip()
+                    # Strip quotes from action name - this is the fix being tested
+                    while action and action[0] == '"':
+                        action = action[1:]
+                    while action and action[-1] == '"':
+                        action = action[:-1]
+                elif line.startswith('Args:'):
+                    # Simple args parsing for test
+                    args_text = line[5:].strip()
+                    if args_text:
+                        import json
+                        try:
+                            args = json.loads(args_text)
+                        except:
+                            args = {"raw": args_text}
+            
+            return {
+                "thought": thought,
+                "action": action,
+                "args": args
+            }
+        
+        # Test cases with various quote patterns
+        test_cases = [
+            # Normal case without quotes
+            (
+                'Thought: I need to check the bank balance\nAction: get_bank_balance\nArgs: {"account": "12345"}',
+                "get_bank_balance"
+            ),
+            # Single quotes around action name
+            (
+                'Thought: I need to check the bank balance\nAction: "get_bank_balance"\nArgs: {"account": "12345"}',
+                "get_bank_balance"
+            ),
+            # Multiple quotes (nested)
+            (
+                'Thought: I need to check the bank balance\nAction: ""get_bank_balance""\nArgs: {"account": "12345"}',
+                "get_bank_balance"
+            ),
+            # Action with underscores and quotes
+            (
+                'Thought: I need to search\nAction: "search_knowledge_base"\nArgs: {"query": "test"}',
+                "search_knowledge_base"
+            ),
+            # Action with hyphens and quotes
+            (
+                'Thought: I need to search\nAction: "search-knowledge-base"\nArgs: {"query": "test"}',
+                "search-knowledge-base"
+            ),
+            # Edge case: just quotes (should result in empty string)
+            (
+                'Thought: Error case\nAction: ""\nArgs: {}',
+                ""
+            ),
+            # Mixed quotes at start and end
+            (
+                'Thought: Processing\nAction: """complex_tool"""\nArgs: {}',
+                "complex_tool"
+            ),
+        ]
+        
+        # Act & Assert
+        for llm_output, expected_action in test_cases:
+            result = parse_react_output(llm_output)
+            assert result["action"] == expected_action, \
+                f"Failed to parse action correctly from: {llm_output}\nExpected: {expected_action}, Got: {result['action']}"
+        
+        # Test with actual tool matching
+        tools = {
+            "get_bank_balance": {"description": "Get bank balance"},
+            "search_knowledge_base": {"description": "Search knowledge"},
+            "complex_tool": {"description": "Complex operations"}
+        }
+        
+        # Simulate tool lookup with quoted action names
+        quoted_actions = [
+            '"get_bank_balance"',
+            '""search_knowledge_base""',
+            'complex_tool',  # without quotes
+            '"complex_tool"'
+        ]
+        
+        for quoted_action in quoted_actions:
+            # Strip quotes as the fix does
+            clean_action = quoted_action
+            while clean_action and clean_action[0] == '"':
+                clean_action = clean_action[1:]
+            while clean_action and clean_action[-1] == '"':
+                clean_action = clean_action[:-1]
+            
+            # Verify the cleaned action exists in tools (except empty string case)
+            if clean_action:
+                assert clean_action in tools, \
+                    f"Cleaned action '{clean_action}' from '{quoted_action}' should be in tools"
+
+    def test_mcp_tool_arguments_support(self):
+        """Test that MCP tools can be configured with arguments and expose them correctly
+        
+        This test verifies the MCP tool arguments feature where:
+        1. MCP tool configurations can specify arguments
+        2. Configuration parsing extracts arguments correctly
+        3. Arguments are structured properly for tool use
+        """
+        # Define a simple Argument class for testing (mimics the real one)
+        class TestArgument:
+            def __init__(self, name, type, description):
+                self.name = name
+                self.type = type 
+                self.description = description
+        
+        # Define a mock McpToolImpl that mimics the new functionality
+        class MockMcpToolImpl:
+            def __init__(self, context, mcp_tool_id, arguments=None):
+                self.context = context
+                self.mcp_tool_id = mcp_tool_id
+                self.arguments = arguments or []
+            
+            def get_arguments(self):
+                return self.arguments
+        
+        # Test 1: MCP tool with arguments
+        test_arguments = [
+            TestArgument(
+                name="account_id",
+                type="string",
+                description="Bank account identifier"
+            ),
+            TestArgument(
+                name="date",
+                type="string",
+                description="Date for balance query (optional, format: YYYY-MM-DD)"
+            )
+        ]
+        
+        context_mock = lambda service_name: None
+        mcp_tool_with_args = MockMcpToolImpl(
+            context=context_mock,
+            mcp_tool_id="get_bank_balance",
+            arguments=test_arguments
+        )
+        
+        returned_args = mcp_tool_with_args.get_arguments()
+        
+        # Verify arguments are stored and returned correctly
+        assert len(returned_args) == 2
+        assert returned_args[0].name == "account_id"
+        assert returned_args[0].type == "string"
+        assert returned_args[0].description == "Bank account identifier"
+        assert returned_args[1].name == "date"
+        assert returned_args[1].type == "string"
+        assert "optional" in returned_args[1].description.lower()
+        
+        # Test 2: MCP tool without arguments (backward compatibility)
+        mcp_tool_no_args = MockMcpToolImpl(
+            context=context_mock,
+            mcp_tool_id="simple_tool"
+        )
+        
+        returned_args_empty = mcp_tool_no_args.get_arguments()
+        assert len(returned_args_empty) == 0
+        assert returned_args_empty == []
+        
+        # Test 3: MCP tool with empty arguments list
+        mcp_tool_empty_args = MockMcpToolImpl(
+            context=context_mock,
+            mcp_tool_id="another_tool",
+            arguments=[]
+        )
+        
+        returned_args_explicit_empty = mcp_tool_empty_args.get_arguments()
+        assert len(returned_args_explicit_empty) == 0
+        assert returned_args_explicit_empty == []
+        
+        # Test 4: Configuration parsing simulation
+        def simulate_config_parsing(config_data):
+            """Simulate how service.py parses MCP tool configuration"""
+            config_args = config_data.get("arguments", [])
+            arguments = [
+                TestArgument(
+                    name=arg.get("name"),
+                    type=arg.get("type"),
+                    description=arg.get("description")
+                )
+                for arg in config_args
+            ]
+            return arguments
+        
+        # Test configuration with arguments
+        config_with_args = {
+            "type": "mcp-tool",
+            "name": "get_bank_balance",
+            "description": "Get bank account balance",
+            "mcp-tool": "get_bank_balance",
+            "arguments": [
+                {
+                    "name": "account_id",
+                    "type": "string",
+                    "description": "Bank account identifier"
+                },
+                {
+                    "name": "date",
+                    "type": "string",
+                    "description": "Date for balance query (optional)"
+                }
+            ]
+        }
+        
+        parsed_args = simulate_config_parsing(config_with_args)
+        assert len(parsed_args) == 2
+        assert parsed_args[0].name == "account_id"
+        assert parsed_args[1].name == "date"
+        
+        # Test configuration without arguments
+        config_without_args = {
+            "type": "mcp-tool",
+            "name": "simple_tool",
+            "description": "Simple MCP tool",
+            "mcp-tool": "simple_tool"
+        }
+        
+        parsed_args_empty = simulate_config_parsing(config_without_args)
+        assert len(parsed_args_empty) == 0
+        
+        # Test 5: Argument structure validation
+        def validate_argument_structure(arg):
+            """Validate that an argument has required fields"""
+            required_fields = ['name', 'type', 'description']
+            return all(hasattr(arg, field) and getattr(arg, field) for field in required_fields)
+        
+        # Validate all parsed arguments have proper structure
+        for arg in parsed_args:
+            assert validate_argument_structure(arg), f"Argument {arg.name} missing required fields"
+        
+        # Test 6: Prompt template integration simulation
+        def simulate_prompt_template_rendering(tools):
+            """Simulate how agent prompts include tool arguments"""
+            tool_descriptions = []
+            
+            for tool in tools:
+                tool_desc = f"- **{tool.name}**: {tool.description}"
+                
+                # Add argument details if present
+                for arg in tool.arguments:
+                    tool_desc += f"\n  - Required: `\"{arg.name}\"` ({arg.type}): {arg.description}"
+                    
+                tool_descriptions.append(tool_desc)
+            
+            return "\n".join(tool_descriptions)
+        
+        # Create mock tools with our MCP tool
+        class MockTool:
+            def __init__(self, name, description, arguments):
+                self.name = name
+                self.description = description
+                self.arguments = arguments
+        
+        mock_tools = [
+            MockTool("search", "Search the web", []),  # Tool without arguments
+            MockTool("get_bank_balance", "Get bank account balance", parsed_args)  # MCP tool with arguments
+        ]
+        
+        prompt_section = simulate_prompt_template_rendering(mock_tools)
+        
+        # Verify the prompt includes MCP tool arguments
+        assert "get_bank_balance" in prompt_section
+        assert "account_id" in prompt_section
+        assert "Bank account identifier" in prompt_section
+        assert "date" in prompt_section
+        assert "(string)" in prompt_section
+        assert "Required:" in prompt_section
+        
+        # Verify tools without arguments still work
+        assert "search" in prompt_section
+        assert "Search the web" in prompt_section
+
    def test_error_handling_in_react_cycle(self):
        """Test error handling during ReAct execution"""
        # Arrange
@ -474,4 +1107,4 @@ Answer: The capital of France is Paris."""
                assert "error" in result
            else:
                assert result["tool_name"] == expected_tool
-                assert result["parameters"] == expected_params
+                assert result["parameters"] == expected_params