trustgraph/tests/unit/test_agent/test_react_processor.py

"""
Unit tests for ReAct processor logic

Tests the core business logic for the ReAct (Reasoning and Acting) pattern
without relying on external LLM services, focusing on the Think-Act-Observe
cycle and tool coordination.
"""

import pytest
from unittest.mock import Mock, AsyncMock, patch
import re


class TestReActProcessorLogic:
    """Test cases for ReAct processor business logic"""

    def test_react_cycle_parsing(self):
        """Test parsing of ReAct cycle components from LLM output"""
        # Arrange
        llm_output = """Think: I need to find information about the capital of France.
Act: knowledge_search: capital of France
Observe: The search returned that Paris is the capital of France.
Think: I now have enough information to answer.
Answer: The capital of France is Paris."""
        
        def parse_react_output(text):
            """Parse ReAct format output into structured steps"""
            steps = []
            lines = text.strip().split('\n')
            
            for line in lines:
                line = line.strip()
                if line.startswith('Think:'):
                    steps.append({
                        'type': 'think',
                        'content': line[6:].strip()
                    })
                elif line.startswith('Act:'):
                    act_content = line[4:].strip()
                    # Parse "tool_name: parameters" format
                    if ':' in act_content:
                        tool_name, params = act_content.split(':', 1)
                        steps.append({
                            'type': 'act',
                            'tool_name': tool_name.strip(),
                            'parameters': params.strip()
                        })
                    else:
                        steps.append({
                            'type': 'act',
                            'content': act_content
                        })
                elif line.startswith('Observe:'):
                    steps.append({
                        'type': 'observe',
                        'content': line[8:].strip()
                    })
                elif line.startswith('Answer:'):
                    steps.append({
                        'type': 'answer',
                        'content': line[7:].strip()
                    })
            
            return steps
        
        # Act
        steps = parse_react_output(llm_output)
        
        # Assert
        assert len(steps) == 5
        assert steps[0]['type'] == 'think'
        assert steps[1]['type'] == 'act'
        assert steps[1]['tool_name'] == 'knowledge_search'
        assert steps[1]['parameters'] == 'capital of France'
        assert steps[2]['type'] == 'observe'
        assert steps[3]['type'] == 'think'
        assert steps[4]['type'] == 'answer'

    def test_tool_selection_logic(self):
        """Test tool selection based on question type and context"""
        # Arrange
        test_cases = [
            ("What is 2 + 2?", "calculator"),
            ("Who is the president of France?", "knowledge_search"),
            ("Tell me about the relationship between Paris and France", "graph_rag"),
            ("What time is it?", "knowledge_search")  # Default to general search
        ]
        
        available_tools = {
            "calculator": {"description": "Perform mathematical calculations"},
            "knowledge_search": {"description": "Search knowledge base for facts"},
            "graph_rag": {"description": "Query knowledge graph for relationships"}
        }
        
        def select_tool(question, tools):
            """Select appropriate tool based on question content"""
            question_lower = question.lower()
            
            # Math keywords
            if any(word in question_lower for word in ['+', '-', '*', '/', 'calculate', 'math']):
                return "calculator"
            
            # Relationship/graph keywords
            if any(word in question_lower for word in ['relationship', 'between', 'connected', 'related']):
                return "graph_rag"
            
            # General knowledge keywords or default case
            if any(word in question_lower for word in ['who', 'what', 'where', 'when', 'why', 'how', 'time']):
                return "knowledge_search"
            
            return None
        
        # Act & Assert
        for question, expected_tool in test_cases:
            selected_tool = select_tool(question, available_tools)
            assert selected_tool == expected_tool, f"Question '{question}' should select {expected_tool}"

    def test_tool_execution_logic(self):
        """Test tool execution and result processing"""
        # Arrange
        def mock_knowledge_search(query):
            if "capital" in query.lower() and "france" in query.lower():
                return "Paris is the capital of France."
            return "Information not found."
        
        def mock_calculator(expression):
            try:
                # Simple expression evaluation
                if '+' in expression:
                    parts = expression.split('+')
                    return str(sum(int(p.strip()) for p in parts))
                return str(eval(expression))
            except:
                return "Error: Invalid expression"
        
        tools = {
            "knowledge_search": mock_knowledge_search,
            "calculator": mock_calculator
        }
        
        def execute_tool(tool_name, parameters, available_tools):
            """Execute tool with given parameters"""
            if tool_name not in available_tools:
                return {"error": f"Tool {tool_name} not available"}
            
            try:
                tool_function = available_tools[tool_name]
                result = tool_function(parameters)
                return {"success": True, "result": result}
            except Exception as e:
                return {"error": str(e)}
        
        # Act & Assert
        test_cases = [
            ("knowledge_search", "capital of France", "Paris is the capital of France."),
            ("calculator", "2 + 2", "4"),
            ("calculator", "invalid expression", "Error: Invalid expression"),
            ("nonexistent_tool", "anything", None)  # Error case
        ]
        
        for tool_name, params, expected in test_cases:
            result = execute_tool(tool_name, params, tools)
            
            if expected is None:
                assert "error" in result
            else:
                assert result.get("result") == expected

    def test_conversation_context_integration(self):
        """Test integration of conversation history into ReAct reasoning"""
        # Arrange
        conversation_history = [
            {"role": "user", "content": "What is 2 + 2?"},
            {"role": "assistant", "content": "2 + 2 = 4"},
            {"role": "user", "content": "What about 3 + 3?"}
        ]
        
        def build_context_prompt(question, history, max_turns=3):
            """Build context prompt from conversation history"""
            context_parts = []
            
            # Include recent conversation turns
            recent_history = history[-(max_turns*2):] if history else []
            
            for turn in recent_history:
                role = turn["role"]
                content = turn["content"]
                context_parts.append(f"{role}: {content}")
            
            current_question = f"user: {question}"
            context_parts.append(current_question)
            
            return "\n".join(context_parts)
        
        # Act
        context_prompt = build_context_prompt("What about 3 + 3?", conversation_history)
        
        # Assert
        assert "2 + 2" in context_prompt
        assert "2 + 2 = 4" in context_prompt
        assert "3 + 3" in context_prompt
        assert context_prompt.count("user:") == 3
        assert context_prompt.count("assistant:") == 1

    def test_react_cycle_validation(self):
        """Test validation of complete ReAct cycles"""
        # Arrange
        complete_cycle = [
            {"type": "think", "content": "I need to solve this math problem"},
            {"type": "act", "tool_name": "calculator", "parameters": "2 + 2"},
            {"type": "observe", "content": "The calculator returned 4"},
            {"type": "think", "content": "I can now provide the answer"},
            {"type": "answer", "content": "2 + 2 = 4"}
        ]
        
        incomplete_cycle = [
            {"type": "think", "content": "I need to solve this"},
            {"type": "act", "tool_name": "calculator", "parameters": "2 + 2"}
            # Missing observe and answer steps
        ]
        
        def validate_react_cycle(steps):
            """Validate that ReAct cycle is complete"""
            step_types = [step.get("type") for step in steps]
            
            # Must have at least one think, act, observe, and answer
            required_types = ["think", "act", "observe", "answer"]
            
            validation_results = {
                "is_complete": all(req_type in step_types for req_type in required_types),
                "has_reasoning": "think" in step_types,
                "has_action": "act" in step_types,
                "has_observation": "observe" in step_types,
                "has_answer": "answer" in step_types,
                "step_count": len(steps)
            }
            
            return validation_results
        
        # Act & Assert
        complete_validation = validate_react_cycle(complete_cycle)
        assert complete_validation["is_complete"] is True
        assert complete_validation["has_reasoning"] is True
        assert complete_validation["has_action"] is True
        assert complete_validation["has_observation"] is True
        assert complete_validation["has_answer"] is True
        
        incomplete_validation = validate_react_cycle(incomplete_cycle)
        assert incomplete_validation["is_complete"] is False
        assert incomplete_validation["has_reasoning"] is True
        assert incomplete_validation["has_action"] is True
        assert incomplete_validation["has_observation"] is False
        assert incomplete_validation["has_answer"] is False

    def test_multi_step_reasoning_logic(self):
        """Test multi-step reasoning chains"""
        # Arrange
        complex_question = "What is the population of the capital of France?"
        
        def plan_reasoning_steps(question):
            """Plan the reasoning steps needed for complex questions"""
            steps = []
            
            question_lower = question.lower()
            
            # Check if question requires multiple pieces of information
            if "capital of" in question_lower and ("population" in question_lower or "how many" in question_lower):
                steps.append({
                    "step": 1,
                    "action": "find_capital",
                    "description": "First find the capital city"
                })
                steps.append({
                    "step": 2,
                    "action": "find_population",
                    "description": "Then find the population of that city"
                })
            elif "capital of" in question_lower:
                steps.append({
                    "step": 1,
                    "action": "find_capital",
                    "description": "Find the capital city"
                })
            elif "population" in question_lower:
                steps.append({
                    "step": 1,
                    "action": "find_population",
                    "description": "Find the population"
                })
            else:
                steps.append({
                    "step": 1,
                    "action": "general_search",
                    "description": "Search for relevant information"
                })
            
            return steps
        
        # Act
        reasoning_plan = plan_reasoning_steps(complex_question)
        
        # Assert
        assert len(reasoning_plan) == 2
        assert reasoning_plan[0]["action"] == "find_capital"
        assert reasoning_plan[1]["action"] == "find_population"
        assert all("step" in step for step in reasoning_plan)

    def test_multi_iteration_react_execution(self):
        """Test complete multi-iteration ReACT cycle with sequential tool invocations
        
        This test simulates a complex query that requires:
        1. Tool #1: Search for initial information
        2. Tool #2: Analyze/refine based on Tool #1's output  
        3. Tool #3: Generate final answer using accumulated context
        
        Each iteration includes Think -> Act -> Observe phases with
        observations feeding into subsequent thinking phases.
        """
        # Arrange
        question = "Find the GDP of the capital of Japan and compare it to Tokyo's population"
        
        # Mock tools that build on each other's outputs
        tool_invocation_log = []
        
        def mock_geo_search(query):
            """Tool 1: Geographic information search"""
            tool_invocation_log.append(("geo_search", query))
            if "capital" in query.lower() and "japan" in query.lower():
                return {"city": "Tokyo", "country": "Japan", "is_capital": True}
            return {"error": "Location not found"}
        
        def mock_economic_data(query, context=None):
            """Tool 2: Economic data retrieval (uses context from Tool 1)"""
            tool_invocation_log.append(("economic_data", query, context))
            if context and context.get("city") == "Tokyo":
                return {"city": "Tokyo", "gdp_trillion_yen": 115.7, "year": 2023}
            return {"error": "Economic data not available"}
        
        def mock_demographic_data(query, context=None):
            """Tool 3: Demographic data and comparison (uses context from Tools 1 & 2)"""
            tool_invocation_log.append(("demographic_data", query, context))
            if context and context.get("city") == "Tokyo":
                population_millions = 14.0
                gdp_from_context = context.get("gdp_trillion_yen", 0)
                return {
                    "city": "Tokyo",
                    "population_millions": population_millions,
                    "gdp_trillion_yen": gdp_from_context,
                    "gdp_per_capita_million_yen": round(gdp_from_context / population_millions, 2) if population_millions > 0 else 0
                }
            return {"error": "Demographic data not available"}
        
        # Execute multi-iteration ReACT cycle
        def execute_multi_iteration_react(question, tools):
            """Execute a complete multi-iteration ReACT cycle"""
            iterations = []
            context = {}
            
            # Iteration 1: Initial geographic search
            iteration_1 = {
                "iteration": 1,
                "think": "I need to first identify the capital of Japan to get its GDP",
                "act": {"tool": "geo_search", "query": "capital of Japan"},
                "observe": None
            }
            result_1 = tools["geo_search"](iteration_1["act"]["query"])
            iteration_1["observe"] = f"Found that {result_1['city']} is the capital of {result_1['country']}"
            context.update(result_1)
            iterations.append(iteration_1)
            
            # Iteration 2: Get economic data using context from iteration 1
            iteration_2 = {
                "iteration": 2,
                "think": f"Now I know {context['city']} is the capital. I need to get its GDP data",
                "act": {"tool": "economic_data", "query": f"GDP of {context['city']}"},
                "observe": None
            }
            result_2 = tools["economic_data"](iteration_2["act"]["query"], context)
            iteration_2["observe"] = f"Retrieved GDP data: {result_2['gdp_trillion_yen']} trillion yen for {result_2['year']}"
            context.update(result_2)
            iterations.append(iteration_2)
            
            # Iteration 3: Get demographic data and compare using accumulated context
            iteration_3 = {
                "iteration": 3,
                "think": f"I have the GDP ({context['gdp_trillion_yen']} trillion yen). Now I need population data to compare",
                "act": {"tool": "demographic_data", "query": f"population of {context['city']}"},
                "observe": None
            }
            result_3 = tools["demographic_data"](iteration_3["act"]["query"], context)
            iteration_3["observe"] = f"Population is {result_3['population_millions']} million. GDP per capita is {result_3['gdp_per_capita_million_yen']} million yen"
            context.update(result_3)
            iterations.append(iteration_3)
            
            # Final answer synthesis
            final_answer = {
                "think": "I now have all the information needed to answer the question",
                "answer": f"Tokyo, the capital of Japan, has a GDP of {context['gdp_trillion_yen']} trillion yen and a population of {context['population_millions']} million people, resulting in a GDP per capita of {context['gdp_per_capita_million_yen']} million yen."
            }
            
            return {
                "iterations": iterations,
                "final_answer": final_answer,
                "context": context,
                "tool_invocations": len(tool_invocation_log)
            }
        
        tools = {
            "geo_search": mock_geo_search,
            "economic_data": mock_economic_data,
            "demographic_data": mock_demographic_data
        }
        
        # Act
        result = execute_multi_iteration_react(question, tools)
        
        # Assert - Verify complete multi-iteration execution
        assert len(result["iterations"]) == 3, "Should have exactly 3 iterations"
        
        # Verify each iteration has complete Think-Act-Observe cycle
        for i, iteration in enumerate(result["iterations"], 1):
            assert iteration["iteration"] == i
            assert "think" in iteration and len(iteration["think"]) > 0
            assert "act" in iteration and "tool" in iteration["act"]
            assert "observe" in iteration and iteration["observe"] is not None
        
        # Verify sequential tool invocations
        assert tool_invocation_log[0][0] == "geo_search"
        assert tool_invocation_log[1][0] == "economic_data"
        assert tool_invocation_log[2][0] == "demographic_data"
        
        # Verify context accumulation across iterations
        assert "Tokyo" in tool_invocation_log[1][1], "Iteration 2 should use data from iteration 1"
        assert tool_invocation_log[2][2].get("gdp_trillion_yen") == 115.7, "Iteration 3 should have accumulated GDP data"
        
        # Verify observations feed into subsequent thinking
        assert "Tokyo" in result["iterations"][1]["think"], "Iteration 2 thinking should reference observation from iteration 1"
        assert "115.7" in result["iterations"][2]["think"], "Iteration 3 thinking should reference GDP from iteration 2"
        
        # Verify final answer synthesis
        assert "Tokyo" in result["final_answer"]["answer"]
        assert "115.7" in result["final_answer"]["answer"]
        assert "14.0" in result["final_answer"]["answer"]
        assert "8.26" in result["final_answer"]["answer"], "Should include calculated GDP per capita"
        
        # Verify all 3 tools were invoked in sequence
        assert result["tool_invocations"] == 3

    def test_multi_iteration_with_dynamic_tool_selection(self):
        """Test multi-iteration ReACT with mocked LLM reasoning dynamically selecting tools
        
        This test simulates how an LLM would dynamically choose tools based on:
        1. The original question
        2. Previous observations
        3. Accumulated context
        
        The mocked LLM reasoning adapts its tool selection based on what it has learned
        in previous iterations, mimicking real agent behavior.
        """
        # Arrange
        question = "What are the main exports of the largest city in Brazil by population?"
        
        # Track reasoning and tool selection
        reasoning_log = []
        tool_invocation_log = []
        
        def mock_llm_reasoning(question, history, available_tools):
            """Mock LLM that reasons about tool selection based on context"""
            # Analyze what we know from history
            context = {}
            for step in history:
                if "observation" in step:
                    # Extract information from observations
                    obs = step["observation"]
                    if "São Paulo" in obs:
                        context["city"] = "São Paulo"
                    if "largest city" in obs:
                        context["is_largest"] = True
                    if "million" in obs and "population" in obs:
                        context["has_population"] = True
                    if "exports" in obs:
                        context["has_exports"] = True
            
            # Decide next action based on what we know
            if not context.get("city"):
                # Step 1: Need to find the largest city
                reasoning = "I need to find the largest city in Brazil by population"
                tool = "geo_search"
                args = {"query": "largest city Brazil population"}
            elif not context.get("has_population"):
                # Step 2: Confirm population data
                reasoning = f"I found {context['city']}. Now I need to verify it's the largest by checking population"
                tool = "demographic_data"
                args = {"query": f"population {context['city']} Brazil"}
            elif not context.get("has_exports"):
                # Step 3: Get export information
                reasoning = f"Confirmed {context['city']} is the largest. Now I need export information"
                tool = "economic_data"
                args = {"query": f"main exports {context['city']} Brazil"}
            else:
                # Final: Have all information
                reasoning = "I have all the information needed to answer"
                tool = "final_answer"
                args = None
            
            reasoning_log.append({"reasoning": reasoning, "tool": tool, "context": context.copy()})
            return reasoning, tool, args
        
        def mock_geo_search(query):
            """Mock geographic search tool"""
            tool_invocation_log.append(("geo_search", query))
            if "largest city brazil" in query.lower():
                return {
                    "result": "São Paulo is the largest city in Brazil",
                    "details": {"city": "São Paulo", "country": "Brazil", "rank": 1}
                }
            return {"error": "No results found"}
        
        def mock_demographic_data(query):
            """Mock demographic data tool"""
            tool_invocation_log.append(("demographic_data", query))
            if "são paulo" in query.lower():
                return {
                    "result": "São Paulo has a population of 12.4 million in the city proper, 22.8 million in the metro area",
                    "details": {"city_population": 12.4, "metro_population": 22.8, "unit": "million"}
                }
            return {"error": "No demographic data found"}
        
        def mock_economic_data(query):
            """Mock economic data tool"""
            tool_invocation_log.append(("economic_data", query))
            if "são paulo" in query.lower() and "export" in query.lower():
                return {
                    "result": "São Paulo's main exports include aircraft, vehicles, machinery, coffee, and soybeans",
                    "details": {
                        "top_exports": ["aircraft", "vehicles", "machinery", "coffee", "soybeans"],
                        "export_value_billions_usd": 65.2
                    }
                }
            return {"error": "No economic data found"}
        
        # Execute multi-iteration ReACT with dynamic tool selection
        def execute_dynamic_react(question, tools, llm_reasoner):
            """Execute ReACT with dynamic LLM-based tool selection"""
            iterations = []
            history = []
            available_tools = list(tools.keys())
            
            max_iterations = 4
            for i in range(max_iterations):
                # LLM reasons about next action
                reasoning, tool_name, args = llm_reasoner(question, history, available_tools)
                
                if tool_name == "final_answer":
                    # Agent has decided it has enough information
                    final_answer = {
                        "reasoning": reasoning,
                        "answer": "São Paulo, Brazil's largest city with 12.4 million people, " +
                                "has main exports including aircraft, vehicles, machinery, coffee, and soybeans."
                    }
                    break
                
                # Execute selected tool
                iteration = {
                    "iteration": i + 1,
                    "think": reasoning,
                    "act": {"tool": tool_name, "args": args},
                    "observe": None
                }
                
                # Get tool result
                if tool_name in tools:
                    result = tools[tool_name](args["query"])
                    iteration["observe"] = result.get("result", "No information found")
                else:
                    iteration["observe"] = f"Tool {tool_name} not available"
                
                iterations.append(iteration)
                
                # Add to history for next iteration
                history.append({
                    "thought": reasoning,
                    "action": tool_name,
                    "args": args,
                    "observation": iteration["observe"]
                })
            
            return {
                "iterations": iterations,
                "final_answer": final_answer if 'final_answer' in locals() else None,
                "reasoning_log": reasoning_log,
                "tool_invocations": len(tool_invocation_log)
            }
        
        tools = {
            "geo_search": mock_geo_search,
            "demographic_data": mock_demographic_data,
            "economic_data": mock_economic_data
        }
        
        # Act
        result = execute_dynamic_react(question, tools, mock_llm_reasoning)
        
        # Assert - Verify dynamic multi-iteration execution
        assert len(result["iterations"]) == 3, "Should have 3 iterations before final answer"
        
        # Verify reasoning adapts based on observations
        assert len(reasoning_log) == 4, "Should have 4 reasoning steps (3 tools + final)"
        
        # Verify first iteration searches for largest city
        assert reasoning_log[0]["tool"] == "geo_search"
        assert "largest city" in reasoning_log[0]["reasoning"].lower()
        assert not reasoning_log[0]["context"].get("city")
        
        # Verify second iteration uses city name from first observation
        assert reasoning_log[1]["tool"] == "demographic_data"
        assert "São Paulo" in reasoning_log[1]["reasoning"]
        assert reasoning_log[1]["context"]["city"] == "São Paulo"
        
        # Verify third iteration builds on previous knowledge
        assert reasoning_log[2]["tool"] == "economic_data"
        assert "export" in reasoning_log[2]["reasoning"].lower()
        assert reasoning_log[2]["context"]["has_population"] is True
        
        # Verify final reasoning has all information
        assert reasoning_log[3]["tool"] == "final_answer"
        assert reasoning_log[3]["context"]["has_exports"] is True
        
        # Verify tool invocation sequence
        assert tool_invocation_log[0][0] == "geo_search"
        assert tool_invocation_log[1][0] == "demographic_data"
        assert tool_invocation_log[2][0] == "economic_data"
        
        # Verify observations influence subsequent tool selection
        assert "São Paulo" in result["iterations"][1]["act"]["args"]["query"]
        assert "São Paulo" in result["iterations"][2]["act"]["args"]["query"]
        
        # Verify final answer synthesizes all gathered information
        assert result["final_answer"] is not None
        assert "São Paulo" in result["final_answer"]["answer"]
        assert "12.4 million" in result["final_answer"]["answer"]
        assert "aircraft" in result["final_answer"]["answer"]
        assert "vehicles" in result["final_answer"]["answer"]

    def test_action_name_with_quotes_handling(self):
        """Test that action names with quotes are properly stripped
        
        This test verifies the fix for when LLMs output action names wrapped
        in quotes, e.g., Action: "get_bank_balance" instead of Action: get_bank_balance
        """
        # Arrange
        def parse_react_output(text):
            """Parse ReAct format output into structured steps"""
            steps = []
            lines = text.strip().split('\n')
            
            thought = None
            action = None
            args = None
            
            for line in lines:
                line = line.strip()
                if line.startswith('Think:') or line.startswith('Thought:'):
                    thought = line.split(':', 1)[1].strip()
                elif line.startswith('Action:'):
                    action = line[7:].strip()
                    # Strip quotes from action name - this is the fix being tested
                    while action and action[0] == '"':
                        action = action[1:]
                    while action and action[-1] == '"':
                        action = action[:-1]
                elif line.startswith('Args:'):
                    # Simple args parsing for test
                    args_text = line[5:].strip()
                    if args_text:
                        import json
                        try:
                            args = json.loads(args_text)
                        except:
                            args = {"raw": args_text}
            
            return {
                "thought": thought,
                "action": action,
                "args": args
            }
        
        # Test cases with various quote patterns
        test_cases = [
            # Normal case without quotes
            (
                'Thought: I need to check the bank balance\nAction: get_bank_balance\nArgs: {"account": "12345"}',
                "get_bank_balance"
            ),
            # Single quotes around action name
            (
                'Thought: I need to check the bank balance\nAction: "get_bank_balance"\nArgs: {"account": "12345"}',
                "get_bank_balance"
            ),
            # Multiple quotes (nested)
            (
                'Thought: I need to check the bank balance\nAction: ""get_bank_balance""\nArgs: {"account": "12345"}',
                "get_bank_balance"
            ),
            # Action with underscores and quotes
            (
                'Thought: I need to search\nAction: "search_knowledge_base"\nArgs: {"query": "test"}',
                "search_knowledge_base"
            ),
            # Action with hyphens and quotes
            (
                'Thought: I need to search\nAction: "search-knowledge-base"\nArgs: {"query": "test"}',
                "search-knowledge-base"
            ),
            # Edge case: just quotes (should result in empty string)
            (
                'Thought: Error case\nAction: ""\nArgs: {}',
                ""
            ),
            # Mixed quotes at start and end
            (
                'Thought: Processing\nAction: """complex_tool"""\nArgs: {}',
                "complex_tool"
            ),
        ]
        
        # Act & Assert
        for llm_output, expected_action in test_cases:
            result = parse_react_output(llm_output)
            assert result["action"] == expected_action, \
                f"Failed to parse action correctly from: {llm_output}\nExpected: {expected_action}, Got: {result['action']}"
        
        # Test with actual tool matching
        tools = {
            "get_bank_balance": {"description": "Get bank balance"},
            "search_knowledge_base": {"description": "Search knowledge"},
            "complex_tool": {"description": "Complex operations"}
        }
        
        # Simulate tool lookup with quoted action names
        quoted_actions = [
            '"get_bank_balance"',
            '""search_knowledge_base""',
            'complex_tool',  # without quotes
            '"complex_tool"'
        ]
        
        for quoted_action in quoted_actions:
            # Strip quotes as the fix does
            clean_action = quoted_action
            while clean_action and clean_action[0] == '"':
                clean_action = clean_action[1:]
            while clean_action and clean_action[-1] == '"':
                clean_action = clean_action[:-1]
            
            # Verify the cleaned action exists in tools (except empty string case)
            if clean_action:
                assert clean_action in tools, \
                    f"Cleaned action '{clean_action}' from '{quoted_action}' should be in tools"

    def test_mcp_tool_arguments_support(self):
        """Test that MCP tools can be configured with arguments and expose them correctly
        
        This test verifies the MCP tool arguments feature where:
        1. MCP tool configurations can specify arguments
        2. Configuration parsing extracts arguments correctly
        3. Arguments are structured properly for tool use
        """
        # Define a simple Argument class for testing (mimics the real one)
        class TestArgument:
            def __init__(self, name, type, description):
                self.name = name
                self.type = type 
                self.description = description
        
        # Define a mock McpToolImpl that mimics the new functionality
        class MockMcpToolImpl:
            def __init__(self, context, mcp_tool_id, arguments=None):
                self.context = context
                self.mcp_tool_id = mcp_tool_id
                self.arguments = arguments or []
            
            def get_arguments(self):
                return self.arguments
        
        # Test 1: MCP tool with arguments
        test_arguments = [
            TestArgument(
                name="account_id",
                type="string",
                description="Bank account identifier"
            ),
            TestArgument(
                name="date",
                type="string",
                description="Date for balance query (optional, format: YYYY-MM-DD)"
            )
        ]
        
        context_mock = lambda service_name: None
        mcp_tool_with_args = MockMcpToolImpl(
            context=context_mock,
            mcp_tool_id="get_bank_balance",
            arguments=test_arguments
        )
        
        returned_args = mcp_tool_with_args.get_arguments()
        
        # Verify arguments are stored and returned correctly
        assert len(returned_args) == 2
        assert returned_args[0].name == "account_id"
        assert returned_args[0].type == "string"
        assert returned_args[0].description == "Bank account identifier"
        assert returned_args[1].name == "date"
        assert returned_args[1].type == "string"
        assert "optional" in returned_args[1].description.lower()
        
        # Test 2: MCP tool without arguments (backward compatibility)
        mcp_tool_no_args = MockMcpToolImpl(
            context=context_mock,
            mcp_tool_id="simple_tool"
        )
        
        returned_args_empty = mcp_tool_no_args.get_arguments()
        assert len(returned_args_empty) == 0
        assert returned_args_empty == []
        
        # Test 3: MCP tool with empty arguments list
        mcp_tool_empty_args = MockMcpToolImpl(
            context=context_mock,
            mcp_tool_id="another_tool",
            arguments=[]
        )
        
        returned_args_explicit_empty = mcp_tool_empty_args.get_arguments()
        assert len(returned_args_explicit_empty) == 0
        assert returned_args_explicit_empty == []
        
        # Test 4: Configuration parsing simulation
        def simulate_config_parsing(config_data):
            """Simulate how service.py parses MCP tool configuration"""
            config_args = config_data.get("arguments", [])
            arguments = [
                TestArgument(
                    name=arg.get("name"),
                    type=arg.get("type"),
                    description=arg.get("description")
                )
                for arg in config_args
            ]
            return arguments
        
        # Test configuration with arguments
        config_with_args = {
            "type": "mcp-tool",
            "name": "get_bank_balance",
            "description": "Get bank account balance",
            "mcp-tool": "get_bank_balance",
            "arguments": [
                {
                    "name": "account_id",
                    "type": "string",
                    "description": "Bank account identifier"
                },
                {
                    "name": "date",
                    "type": "string",
                    "description": "Date for balance query (optional)"
                }
            ]
        }
        
        parsed_args = simulate_config_parsing(config_with_args)
        assert len(parsed_args) == 2
        assert parsed_args[0].name == "account_id"
        assert parsed_args[1].name == "date"
        
        # Test configuration without arguments
        config_without_args = {
            "type": "mcp-tool",
            "name": "simple_tool",
            "description": "Simple MCP tool",
            "mcp-tool": "simple_tool"
        }
        
        parsed_args_empty = simulate_config_parsing(config_without_args)
        assert len(parsed_args_empty) == 0
        
        # Test 5: Argument structure validation
        def validate_argument_structure(arg):
            """Validate that an argument has required fields"""
            required_fields = ['name', 'type', 'description']
            return all(hasattr(arg, field) and getattr(arg, field) for field in required_fields)
        
        # Validate all parsed arguments have proper structure
        for arg in parsed_args:
            assert validate_argument_structure(arg), f"Argument {arg.name} missing required fields"
        
        # Test 6: Prompt template integration simulation
        def simulate_prompt_template_rendering(tools):
            """Simulate how agent prompts include tool arguments"""
            tool_descriptions = []
            
            for tool in tools:
                tool_desc = f"- **{tool.name}**: {tool.description}"
                
                # Add argument details if present
                for arg in tool.arguments:
                    tool_desc += f"\n  - Required: `\"{arg.name}\"` ({arg.type}): {arg.description}"
                    
                tool_descriptions.append(tool_desc)
            
            return "\n".join(tool_descriptions)
        
        # Create mock tools with our MCP tool
        class MockTool:
            def __init__(self, name, description, arguments):
                self.name = name
                self.description = description
                self.arguments = arguments
        
        mock_tools = [
            MockTool("search", "Search the web", []),  # Tool without arguments
            MockTool("get_bank_balance", "Get bank account balance", parsed_args)  # MCP tool with arguments
        ]
        
        prompt_section = simulate_prompt_template_rendering(mock_tools)
        
        # Verify the prompt includes MCP tool arguments
        assert "get_bank_balance" in prompt_section
        assert "account_id" in prompt_section
        assert "Bank account identifier" in prompt_section
        assert "date" in prompt_section
        assert "(string)" in prompt_section
        assert "Required:" in prompt_section
        
        # Verify tools without arguments still work
        assert "search" in prompt_section
        assert "Search the web" in prompt_section

    def test_error_handling_in_react_cycle(self):
        """Test error handling during ReAct execution"""
        # Arrange
        def execute_react_step_with_errors(step_type, content, tools=None):
            """Execute ReAct step with potential error handling"""
            try:
                if step_type == "think":
                    # Thinking step - validate reasoning
                    if not content or len(content.strip()) < 5:
                        return {"error": "Reasoning too brief"}
                    return {"success": True, "content": content}
                
                elif step_type == "act":
                    # Action step - validate tool exists and execute
                    if not tools or not content:
                        return {"error": "No tools available or no action specified"}
                    
                    # Parse tool and parameters
                    if ":" in content:
                        tool_name, params = content.split(":", 1)
                        tool_name = tool_name.strip()
                        params = params.strip()
                        
                        if tool_name not in tools:
                            return {"error": f"Tool {tool_name} not available"}
                        
                        # Execute tool
                        result = tools[tool_name](params)
                        return {"success": True, "tool_result": result}
                    else:
                        return {"error": "Invalid action format"}
                
                elif step_type == "observe":
                    # Observation step - validate observation
                    if not content:
                        return {"error": "No observation provided"}
                    return {"success": True, "content": content}
                
                else:
                    return {"error": f"Unknown step type: {step_type}"}
                    
            except Exception as e:
                return {"error": f"Execution error: {str(e)}"}
        
        # Test cases
        mock_tools = {
            "calculator": lambda x: str(eval(x)) if x.replace('+', '').replace('-', '').replace('*', '').replace('/', '').replace(' ', '').isdigit() else "Error"
        }
        
        test_cases = [
            ("think", "I need to calculate", {"success": True}),
            ("think", "", {"error": True}),  # Empty reasoning
            ("act", "calculator: 2 + 2", {"success": True}),
            ("act", "nonexistent: something", {"error": True}),  # Tool doesn't exist
            ("act", "invalid format", {"error": True}),  # Invalid format
            ("observe", "The result is 4", {"success": True}),
            ("observe", "", {"error": True}),  # Empty observation
            ("invalid_step", "content", {"error": True})  # Invalid step type
        ]
        
        # Act & Assert
        for step_type, content, expected in test_cases:
            result = execute_react_step_with_errors(step_type, content, mock_tools)
            
            if expected.get("error"):
                assert "error" in result, f"Expected error for step {step_type}: {content}"
            else:
                assert "success" in result, f"Expected success for step {step_type}: {content}"

    def test_response_synthesis_logic(self):
        """Test synthesis of final response from ReAct steps"""
        # Arrange
        react_steps = [
            {"type": "think", "content": "I need to find the capital of France"},
            {"type": "act", "tool_name": "knowledge_search", "tool_result": "Paris is the capital of France"},
            {"type": "observe", "content": "The search confirmed Paris is the capital"},
            {"type": "think", "content": "I have the information needed to answer"}
        ]
        
        def synthesize_response(steps, original_question):
            """Synthesize final response from ReAct steps"""
            # Extract key information from steps
            tool_results = []
            observations = []
            reasoning = []
            
            for step in steps:
                if step["type"] == "think":
                    reasoning.append(step["content"])
                elif step["type"] == "act" and "tool_result" in step:
                    tool_results.append(step["tool_result"])
                elif step["type"] == "observe":
                    observations.append(step["content"])
            
            # Build response based on available information
            if tool_results:
                # Use tool results as primary information source
                primary_info = tool_results[0]
                
                # Extract specific answer from tool result
                if "capital" in original_question.lower() and "Paris" in primary_info:
                    return "The capital of France is Paris."
                elif "+" in original_question and any(char.isdigit() for char in primary_info):
                    return f"The answer is {primary_info}."
                else:
                    return primary_info
            else:
                # Fallback to reasoning if no tool results
                return "I need more information to answer this question."
        
        # Act
        response = synthesize_response(react_steps, "What is the capital of France?")
        
        # Assert
        assert "Paris" in response
        assert "capital of france" in response.lower()
        assert len(response) > 10  # Should be a complete sentence

    def test_tool_parameter_extraction(self):
        """Test extraction and validation of tool parameters"""
        # Arrange
        def extract_tool_parameters(action_content, tool_schema):
            """Extract and validate parameters for tool execution"""
            # Parse action content for tool name and parameters
            if ":" not in action_content:
                return {"error": "Invalid action format - missing tool parameters"}
            
            tool_name, params_str = action_content.split(":", 1)
            tool_name = tool_name.strip()
            params_str = params_str.strip()
            
            if tool_name not in tool_schema:
                return {"error": f"Unknown tool: {tool_name}"}
            
            schema = tool_schema[tool_name]
            required_params = schema.get("required_parameters", [])
            
            # Simple parameter extraction (for more complex tools, this would be more sophisticated)
            if len(required_params) == 1 and required_params[0] == "query":
                # Single query parameter
                return {"tool_name": tool_name, "parameters": {"query": params_str}}
            elif len(required_params) == 1 and required_params[0] == "expression":
                # Single expression parameter  
                return {"tool_name": tool_name, "parameters": {"expression": params_str}}
            else:
                # Multiple parameters would need more complex parsing
                return {"tool_name": tool_name, "parameters": {"input": params_str}}
        
        tool_schema = {
            "knowledge_search": {"required_parameters": ["query"]},
            "calculator": {"required_parameters": ["expression"]},
            "graph_rag": {"required_parameters": ["query"]}
        }
        
        test_cases = [
            ("knowledge_search: capital of France", "knowledge_search", {"query": "capital of France"}),
            ("calculator: 2 + 2", "calculator", {"expression": "2 + 2"}),
            ("invalid format", None, None),  # No colon
            ("unknown_tool: something", None, None)  # Unknown tool
        ]
        
        # Act & Assert
        for action_content, expected_tool, expected_params in test_cases:
            result = extract_tool_parameters(action_content, tool_schema)
            
            if expected_tool is None:
                assert "error" in result
            else:
                assert result["tool_name"] == expected_tool
                assert result["parameters"] == expected_params