rm immature code, improve naming, add unittest test rsp cache

2026-05-15 11:02:36 +02:00 · 2024-01-30 21:04:33 +08:00 · 2024-01-30 21:04:33 +08:00 · 4a7929d880
commit 4a7929d880
parent 31a9410e6d
18 changed files with 275 additions and 704 deletions
--- a/metagpt/actions/ml_da_action.py
+++ b/metagpt/actions/ml_da_action.py
@ -1,28 +1,64 @@
 import json
+from typing import List, Tuple

 from metagpt.actions import Action
-from metagpt.prompts.ml_engineer import PRINT_DATA_COLUMNS, UPDATE_DATA_COLUMNS
-from metagpt.schema import Plan
+from metagpt.actions.write_analysis_code import WriteCodeWithTools
+from metagpt.prompts.ml_action import (
+    GENERATE_CODE_PROMPT,
+    ML_TOOL_USAGE_PROMPT,
+    PRINT_DATA_COLUMNS,
+    UPDATE_DATA_COLUMNS,
+)
+from metagpt.prompts.write_analysis_code import CODE_GENERATOR_WITH_TOOLS
+from metagpt.schema import Message, Plan
 from metagpt.utils.common import CodeParser, create_func_config, remove_comments


-class SummarizeAnalysis(Action):
-    PROMPT_TEMPLATE: str = """
-    # Context
-    {context}
-    # Summary
-    Output a 30-word summary on analysis tool and modeling algorithms you have used, and the corresponding result. Make sure to announce the complete path to your test prediction file. Your summary:
-    """
+class WriteCodeWithToolsML(WriteCodeWithTools):
+    async def run(
+        self,
+        context: List[Message],
+        plan: Plan = None,
+        column_info: str = "",
+        **kwargs,
+    ) -> Tuple[List[Message], str]:
+        # prepare tool schemas and tool-type-specific instruction
+        tool_schemas, tool_type_usage_prompt = await self._prepare_tools(plan=plan)

-    async def run(self, conmpleted_plan: Plan) -> str:
-        tasks = json.dumps(
-            [task.dict() for task in conmpleted_plan.tasks],
-            indent=4,
-            ensure_ascii=False,
-        )  # all tasks finished, return all task outputs
-        prompt = self.PROMPT_TEMPLATE.format(context=tasks)
-        summary = await self._aask(prompt)
-        return summary
+        # ML-specific variables to be used in prompt
+        code_steps = plan.current_task.code_steps
+        finished_tasks = plan.get_finished_tasks()
+        code_context = [remove_comments(task.code) for task in finished_tasks]
+        code_context = "\n\n".join(code_context)
+
+        # prepare prompt depending on tool availability & LLM call
+        if tool_schemas:
+            prompt = ML_TOOL_USAGE_PROMPT.format(
+                user_requirement=plan.goal,
+                history_code=code_context,
+                current_task=plan.current_task.instruction,
+                column_info=column_info,
+                tool_type_usage_prompt=tool_type_usage_prompt,
+                code_steps=code_steps,
+                tool_schemas=tool_schemas,
+            )
+
+        else:
+            prompt = GENERATE_CODE_PROMPT.format(
+                user_requirement=plan.goal,
+                history_code=code_context,
+                current_task=plan.current_task.instruction,
+                column_info=column_info,
+                tool_type_usage_prompt=tool_type_usage_prompt,
+                code_steps=code_steps,
+            )
+        tool_config = create_func_config(CODE_GENERATOR_WITH_TOOLS)
+        rsp = await self.llm.aask_code(prompt, **tool_config)
+
+        # Extra output to be used for potential debugging
+        context = [Message(content=prompt, role="user")]
+
+        return context, rsp


 class Reflect(Action):
--- a/metagpt/actions/write_analysis_code.py
+++ b/metagpt/actions/write_analysis_code.py
@ -4,19 +4,12 @@
@Author  :   orange-crow
@File    :   write_code_v2.py
 """
-import re
-from pathlib import Path
 from typing import Dict, List, Tuple, Union

-from tenacity import retry, stop_after_attempt, wait_fixed
-
 from metagpt.actions import Action
-from metagpt.llm import LLM
 from metagpt.logs import logger
-from metagpt.prompts.ml_engineer import (
+from metagpt.prompts.write_analysis_code import (
    CODE_GENERATOR_WITH_TOOLS,
-    GENERATE_CODE_PROMPT,
-    ML_TOOL_USAGE_PROMPT,
    SELECT_FUNCTION_TOOLS,
    TOOL_RECOMMENDATION_PROMPT,
    TOOL_USAGE_PROMPT,
@ -24,7 +17,7 @@ from metagpt.prompts.ml_engineer import (
 from metagpt.schema import Message, Plan
 from metagpt.tools import TOOL_REGISTRY
 from metagpt.tools.tool_registry import validate_tool_names
-from metagpt.utils.common import create_func_config, remove_comments
+from metagpt.utils.common import create_func_config


 class BaseWriteAnalysisCode(Action):
@ -195,133 +188,3 @@ class WriteCodeWithTools(BaseWriteAnalysisCode):
        rsp = await self.llm.aask_code(prompt, **tool_config)

        return rsp
-
-
-class WriteCodeWithToolsML(WriteCodeWithTools):
-    async def run(
-        self,
-        context: List[Message],
-        plan: Plan = None,
-        column_info: str = "",
-        **kwargs,
-    ) -> Tuple[List[Message], str]:
-        # prepare tool schemas and tool-type-specific instruction
-        tool_schemas, tool_type_usage_prompt = await self._prepare_tools(plan=plan)
-
-        # ML-specific variables to be used in prompt
-        code_steps = plan.current_task.code_steps
-        finished_tasks = plan.get_finished_tasks()
-        code_context = [remove_comments(task.code) for task in finished_tasks]
-        code_context = "\n\n".join(code_context)
-
-        # prepare prompt depending on tool availability & LLM call
-        if tool_schemas:
-            prompt = ML_TOOL_USAGE_PROMPT.format(
-                user_requirement=plan.goal,
-                history_code=code_context,
-                current_task=plan.current_task.instruction,
-                column_info=column_info,
-                tool_type_usage_prompt=tool_type_usage_prompt,
-                code_steps=code_steps,
-                tool_schemas=tool_schemas,
-            )
-
-        else:
-            prompt = GENERATE_CODE_PROMPT.format(
-                user_requirement=plan.goal,
-                history_code=code_context,
-                current_task=plan.current_task.instruction,
-                column_info=column_info,
-                tool_type_usage_prompt=tool_type_usage_prompt,
-                code_steps=code_steps,
-            )
-        tool_config = create_func_config(CODE_GENERATOR_WITH_TOOLS)
-        rsp = await self.llm.aask_code(prompt, **tool_config)
-
-        # Extra output to be used for potential debugging
-        context = [Message(content=prompt, role="user")]
-
-        return context, rsp
-
-
-class MakeTools(WriteCodeByGenerate):
-    DEFAULT_SYSTEM_MSG: str = """Convert any codes provied for you to a very General Function Code startswith `def`.\n
-    **Notice:
-    1. Your code must contain a general function start with `def`.
-    2. Refactor your code to get the most efficient implementation for large input data in the shortest amount of time.
-    3. Must use Google style for function docstring, and your docstring must be consistent with the code,without missing anything.
-    4. Write example code after `if __name__ == '__main__':`by using old varibales in old code,
-    and make sure it could be execute in the user's machine.
-    5. Only use the imported packages**
-    """
-
-    def __init__(self, name: str = "", context: list[Message] = None, llm: LLM = None, workspace: str = None):
-        """
-        :param str name: name, defaults to ''
-        :param list[Message] context: context, defaults to None
-        :param LLM llm: llm, defaults to None
-        :param str workspace: tools code saved file path dir, defaults to None
-        """
-        super().__init__(name, context, llm)
-        self.workspace = workspace or str(Path(__file__).parents[1].joinpath("./tools/functions/libs/udf"))
-        self.file_suffix: str = ".py"
-        self.context = []
-
-    def parse_function_name(self, function_code: str) -> str:
-        # 定义正则表达式模式
-        pattern = r"\bdef\s+([a-zA-Z_]\w*)\s*\("
-        # 在代码中搜索匹配的模式
-        match = re.search(pattern, function_code)
-        # 如果找到匹配项，则返回匹配的函数名；否则返回None
-        if match:
-            return match.group(1)
-        else:
-            return None
-
-    def save(self, tool_code: str) -> None:
-        func_name = self.parse_function_name(tool_code)
-        if func_name is None:
-            raise ValueError(f"No function name found in {tool_code}")
-        saved_path = Path(self.workspace).joinpath(func_name + self.file_suffix)
-        logger.info(f"Saved tool_code {func_name} in {str(saved_path)}.")
-        saved_path.write_text(tool_code, encoding="utf-8")
-
-    @retry(stop=stop_after_attempt(3), wait=wait_fixed(1))
-    async def run(self, code: Union[str, List[dict]], code_desc: str = None, **kwargs) -> str:
-        # 拼接code prompt
-        code_prompt = f"The following code is about {code_desc}, convert it to be a General Function, {code}"
-        if not self.context:
-            self.context = self.process_msg(code_prompt)
-        else:
-            self.context.append(self.process_msg(code_prompt)[-1])
-        logger.info(f"\n\nAsk to Make tools:\n{'-'*60}\n {self.context[-1]}")
-
-        # 更新kwargs
-        if "code" in kwargs:
-            kwargs.pop("code")
-        if "code_desc" in kwargs:
-            kwargs.pop("code_desc")
-
-        max_tries, current_try = 3, 0
-        while True:
-            tool_code = await self.llm.aask_code(self.context, **kwargs)
-            func_name = self.parse_function_name(tool_code["code"])
-            current_try += 1
-            # make tools failed, add error message to context.
-            if not func_name:
-                logger.info(f"\n\nTools Respond\n{'-'*60}\n: {tool_code}")
-                logger.error(f"No function name found in code, we will retry make tools.\n{tool_code['code']}\n")
-                self.context.append(
-                    {"role": "user", "content": "We need a general function in above code,but not found function."}
-                )
-            # end make tools
-            if func_name is not None or current_try >= max_tries:
-                if current_try >= max_tries:
-                    logger.error(
-                        f"We have tried the maximum number of attempts {max_tries}\
-                    and still have not created tools successfully, we will skip it."
-                    )
-                break
-        logger.info(f"\n\nTools Respond\n{'-'*60}\n: {tool_code}")
-        self.save(tool_code["code"])
-        return tool_code["code"]
--- a/metagpt/actions/write_code_steps.py
+++ b/metagpt/actions/write_code_steps.py
@ -1,116 +0,0 @@
-import json
-
-from metagpt.actions import Action
-from metagpt.schema import Plan
-from metagpt.utils.common import CodeParser
-
-# CODE_STEPS_PROMPT_TEMPLATE = """
-# # Context
-# {context}
-#
-# -----
-# Tasks are all code development tasks.
-# You are a professional engineer, the main goal is to plan out concise solution steps for Current Task before coding.
-# A planning process can reduce the difficulty and improve the quality of coding.
-# You may be given some code plans for the tasks ahead, but you don't have to follow the existing plan when planning the current task.
-# The output plan should following the subsequent principles:
-# 1.The plan is a rough checklist of steps outlining the entire program's structure.Try to keep the number of steps fewer than 5.
-# 2.The steps should be written concisely and at a high level, avoiding overly detailed implementation specifics.
-# 3.The execution of the plan happens sequentially, but the plan can incorporate conditional (if) and looping(loop) keywords for more complex structures.
-#
-# Output the code steps in a JSON format, as shown in this example:
-# ```json
-# {
-#     "Step 1": "",
-#     "Step 2": "",
-#     "Step 3": "",
-#     ...
-# }
-# ```
-# """
-
-CODE_STEPS_PROMPT_TEMPLATE = """
-# Context
-{context}
-
-----
-Tasks are all code development tasks.
-You are a professional engineer, the main goal is to plan out concise solution steps for Current Task before coding.
-A planning process can reduce the difficulty and improve the quality of coding.
-You may be given some code plans for the tasks ahead, but you don't have to follow the existing plan when planning the current task.
-The output plan should following the subsequent principles:
-1.The plan is a rough checklist of steps outlining the entire program's structure.Try to keep the number of steps fewer than 5.
-2.The steps should be written concisely and at a high level, avoiding overly detailed implementation specifics.
-3.The execution of the plan happens sequentially, but the plan can incorporate conditional (if) and looping(loop) keywords for more complex structures.
-4.Design and provide code steps by following the code logic. Analyze the provided code step by step and reuse the imported library.
-
-Output the code steps in a JSON format, as shown in this example:
-```json
-{
-    "Step 1": "",
-    "Step 2": "",
-    "Step 3": "",
-    ...
-}
-```
-"""
-
-# STRUCTURAL_CONTEXT = """
-# ## User Requirement
-# {user_requirement}
-# ## Current Plan
-# {tasks}
-# ## Current Task
-# {current_task}
-# """
-
-STRUCTURAL_CONTEXT = """
-## User Requirement
-{user_requirement}
-## Plan
-{tasks}
-## Codes
-{codes}
-## Current Task
-{current_task}
-"""
-
-
-class WriteCodeSteps(Action):
-    async def run(self, plan: Plan) -> str:
-        """Run of a task guide writing action, used in ml engineer
-
-        Args:
-            plan (plan): task plan
-            useful_memories (list): useful_memories
-        Returns:
-            str: The dataset_descriptions string.
-        """
-
-        context = self.get_context(plan)
-        code_steps_prompt = CODE_STEPS_PROMPT_TEMPLATE.replace("{context}", context)
-        code_steps = await self._aask(code_steps_prompt)
-        code_steps = CodeParser.parse_code(block=None, text=code_steps)
-        return code_steps
-
-    def get_context(self, plan: Plan):
-        user_requirement = plan.goal
-        # select_task_keys = ['task_id', 'instruction', 'is_finished', 'code']
-        # select_task_keys = ['task_id','instruction']
-
-        def process_task(task):
-            task_dict = task.dict()
-            # ptask = {k: task_dict[k] for k in task_dict if k in select_task_keys }
-            ptask = f"task_id_{task_dict['task_id']}:{task_dict['instruction']}"
-            return ptask
-
-        tasks = json.dumps([process_task(task) for task in plan.tasks], indent=4, ensure_ascii=False)
-
-        code_lists = [task.code for task in plan.tasks if task.is_finished == True]
-        codes = "\n\n".join(code_lists)
-        current_task = json.dumps(process_task(plan.current_task)) if plan.current_task else {}
-        context = STRUCTURAL_CONTEXT.format(
-            user_requirement=user_requirement, tasks=tasks, codes=codes, current_task=current_task
-        )
-        # print(context)
-        return context
--- a/metagpt/actions/write_plan.py
+++ b/metagpt/actions/write_plan.py
@ -10,7 +10,10 @@ from typing import Dict, List, Tuple

 from metagpt.actions import Action
 from metagpt.logs import logger
-from metagpt.prompts.ml_engineer import ASSIGN_TASK_TYPE_CONFIG, ASSIGN_TASK_TYPE_PROMPT
+from metagpt.prompts.write_analysis_code import (
+    ASSIGN_TASK_TYPE_CONFIG,
+    ASSIGN_TASK_TYPE_PROMPT,
+)
 from metagpt.schema import Message, Plan, Task
 from metagpt.tools import TOOL_REGISTRY
 from metagpt.utils.common import CodeParser, create_func_config
--- a/metagpt/prompts/ml_engineer.py
+++ b/metagpt/prompts/ml_engineer.py
@ -2,7 +2,7 @@
 # -*- coding: utf-8 -*-
 # @Time    : 2023/11/24 15:43
 # @Author  : lidanyang
-# @File    : ml_engineer
+# @File    : ml_action
 # @Desc    :
 UPDATE_DATA_COLUMNS = """
 # Background
@ -49,85 +49,6 @@ Output the information in a JSON format, as shown in this example:
 - Don't contain specific values or examples found in the data column.
 """

-ASSIGN_TASK_TYPE_PROMPT = """
-Please assign a task type to each task in the list below from the given categories:
-{task_list}
-
-## All Task Type:
-{task_type_desc}
-"""
-
-ASSIGN_TASK_TYPE_CONFIG = {
-    "name": "assign_task_type",
-    "description": "Assign task type to each task by order.",
-    "parameters": {
-        "type": "object",
-        "properties": {
-            "task_type": {
-                "type": "array",
-                "description": "List of task type. The length should as long as task list",
-                "items": {
-                    "type": "string",
-                },
-            },
-        },
-        "required": ["task_type"],
-    },
-}
-
-TOOL_RECOMMENDATION_PROMPT = """
-## User Requirement:
-{current_task}
-
-## Task
-Recommend up to five tools from 'Available Tools' that can help solve the 'User Requirement'. 
-This is a detailed code steps for current task. You can refer to it when recommending tools.
-{code_steps}
-
-## Available Tools:
-{available_tools}
-
-## Tool Selection and Instructions:
- Select tools most relevant to completing the 'User Requirement'.
- If you believe that no tools are suitable, indicate with an empty list.
- Only list the names of the tools, not the full schema of each tool.
- Ensure selected tools are listed in 'Available Tools'.
-"""
-
-SELECT_FUNCTION_TOOLS = {
-    "name": "select_function_tools",
-    "description": "For current task, select suitable tools for it.",
-    "parameters": {
-        "type": "object",
-        "properties": {
-            "recommend_tools": {
-                "type": "array",
-                "description": "List of tool names. Empty list if no tool is suitable.",
-                "items": {
-                    "type": "string",
-                },
-            },
-        },
-        "required": ["recommend_tools"],
-    },
-}
-
-CODE_GENERATOR_WITH_TOOLS = {
-    "name": "add_subtask_code",
-    "description": "Add new code cell of current task to the end of an active Jupyter notebook.",
-    "parameters": {
-        "type": "object",
-        "properties": {
-            "code": {
-                "type": "string",
-                "description": "The code to be added to a new cell in jupyter.",
-            },
-        },
-        "required": ["code"],
-    },
-}
-
-
 PRINT_DATA_COLUMNS = {
    "name": "print_column_info",
    "description": "Print the latest column information after 'Done Tasks' code if first read or data changed.",
@ -189,24 +110,6 @@ model.fit(train, y_train)
 - The output code should contain all steps implemented in 'Code Steps'.
 """

-TOOL_USAGE_PROMPT = """
-# Instruction
-Write complete code for 'Current Task'. And avoid duplicating code from finished tasks, such as repeated import of packages, reading data, etc.
-Specifically, {tool_type_usage_prompt}
-
-# Capabilities
- You can utilize pre-defined tools in any code lines from 'Available Tools' in the form of Python Class.
- You can freely combine the use of any other public packages, like sklearn, numpy, pandas, etc..
-
-# Available Tools (can be empty):
-Each Class tool is described in JSON format. When you call a tool, import the tool first.
-{tool_schemas}
-
-# Constraints:
- Ensure the output new code is executable in the same Jupyter notebook with previous tasks code have been executed.
- Always prioritize using pre-defined tools for the same functionality.
-"""
-
 ML_TOOL_USAGE_PROMPT = """
 # Background
 As a data scientist, you need to help user to achieve their goal [{user_requirement}] step-by-step in an continuous Jupyter notebook.
--- a/metagpt/prompts/write_analysis_code.py
+++ b/metagpt/prompts/write_analysis_code.py
@ -0,0 +1,95 @@
+ASSIGN_TASK_TYPE_PROMPT = """
+Please assign a task type to each task in the list below from the given categories:
+{task_list}
+
+## All Task Type:
+{task_type_desc}
+"""
+
+ASSIGN_TASK_TYPE_CONFIG = {
+    "name": "assign_task_type",
+    "description": "Assign task type to each task by order.",
+    "parameters": {
+        "type": "object",
+        "properties": {
+            "task_type": {
+                "type": "array",
+                "description": "List of task type. The length should as long as task list",
+                "items": {
+                    "type": "string",
+                },
+            },
+        },
+        "required": ["task_type"],
+    },
+}
+
+TOOL_RECOMMENDATION_PROMPT = """
+## User Requirement:
+{current_task}
+
+## Task
+Recommend up to five tools from 'Available Tools' that can help solve the 'User Requirement'. 
+This is a detailed code steps for current task. You can refer to it when recommending tools.
+{code_steps}
+
+## Available Tools:
+{available_tools}
+
+## Tool Selection and Instructions:
+- Select tools most relevant to completing the 'User Requirement'.
+- If you believe that no tools are suitable, indicate with an empty list.
+- Only list the names of the tools, not the full schema of each tool.
+- Ensure selected tools are listed in 'Available Tools'.
+"""
+
+SELECT_FUNCTION_TOOLS = {
+    "name": "select_function_tools",
+    "description": "For current task, select suitable tools for it.",
+    "parameters": {
+        "type": "object",
+        "properties": {
+            "recommend_tools": {
+                "type": "array",
+                "description": "List of tool names. Empty list if no tool is suitable.",
+                "items": {
+                    "type": "string",
+                },
+            },
+        },
+        "required": ["recommend_tools"],
+    },
+}
+
+CODE_GENERATOR_WITH_TOOLS = {
+    "name": "add_subtask_code",
+    "description": "Add new code cell of current task to the end of an active Jupyter notebook.",
+    "parameters": {
+        "type": "object",
+        "properties": {
+            "code": {
+                "type": "string",
+                "description": "The code to be added to a new cell in jupyter.",
+            },
+        },
+        "required": ["code"],
+    },
+}
+
+TOOL_USAGE_PROMPT = """
+# Instruction
+Write complete code for 'Current Task'. And avoid duplicating code from finished tasks, such as repeated import of packages, reading data, etc.
+Specifically, {tool_type_usage_prompt}
+
+# Capabilities
+- You can utilize pre-defined tools in any code lines from 'Available Tools' in the form of Python Class.
+- You can freely combine the use of any other public packages, like sklearn, numpy, pandas, etc..
+
+# Available Tools (can be empty):
+Each Class tool is described in JSON format. When you call a tool, import the tool first.
+{tool_schemas}
+
+# Constraints:
+- Ensure the output new code is executable in the same Jupyter notebook with previous tasks code have been executed.
+- Always prioritize using pre-defined tools for the same functionality.
+"""
--- a/metagpt/roles/code_interpreter.py
+++ b/metagpt/roles/code_interpreter.py
@ -3,7 +3,6 @@ from pydantic import Field
 from metagpt.actions.ask_review import ReviewConst
 from metagpt.actions.execute_code import ExecutePyCode
 from metagpt.actions.write_analysis_code import WriteCodeByGenerate, WriteCodeWithTools
-from metagpt.actions.write_code_steps import WriteCodeSteps
 from metagpt.logs import logger
 from metagpt.roles import Role
 from metagpt.schema import Message, Task, TaskResult
@ -12,7 +11,6 @@ from metagpt.schema import Message, Task, TaskResult
 class CodeInterpreter(Role):
    auto_run: bool = True
    use_tools: bool = False
-    use_code_steps: bool = False
    execute_code: ExecutePyCode = Field(default_factory=ExecutePyCode, exclude=True)
    tools: list[str] = []

@ -48,10 +46,6 @@ class CodeInterpreter(Role):
        return task_result

    async def _write_and_exec_code(self, max_retry: int = 3):
-        self.planner.current_task.code_steps = (
-            await WriteCodeSteps().run(self.planner.plan) if self.use_code_steps else ""
-        )
-
        counter = 0
        success = False

--- a/metagpt/roles/kaggle_manager.py
+++ b/metagpt/roles/kaggle_manager.py
@ -1,153 +0,0 @@
-import json
-import os
-import subprocess
-
-import fire
-import pandas as pd
-
-from metagpt.actions import Action, UserRequirement
-from metagpt.actions.ml_da_action import SummarizeAnalysis
-from metagpt.config import CONFIG
-from metagpt.logs import logger
-from metagpt.roles import Role
-from metagpt.schema import Message
-from metagpt.utils.common import CodeParser
-
-os.environ["KAGGLE_USERNAME"] = CONFIG.kaggle_username
-os.environ["KAGGLE_KEY"] = CONFIG.kaggle_key
-
-
-def run_command(cmd):
-    print(cmd)
-    output = subprocess.run(cmd, shell=True, capture_output=True, text=True)
-    if output.returncode != 0:
-        print("Error output:", output.stderr)
-        exit()
-    else:
-        print(output.stdout)
-    return output.stdout
-
-
-class DownloadData(Action):
-    async def run(self, competition, data_desc="") -> str:
-        data_path = CONFIG.workspace_path / competition
-
-        output = run_command(f"kaggle competitions list --search {competition}")
-        assert output != "No competitions found", "You must provide the correct competition name"
-
-        run_command(f"kaggle competitions download {competition} --path {WORKSPACE_ROOT}")
-
-        if not os.path.exists(data_path):
-            # if True:
-            # run_command(f"rm -r {data_path / '*'}")
-            run_command(f"unzip -o {CONFIG.workspace_path / '*.zip'} -d {data_path}")  # FIXME: not safe
-
-        file_list = run_command(f"ls {data_path}")
-
-        rsp = f"""
-        Location:
-        Data downloaded at {data_path} folder, including {file_list}
-        Data Description:
-        {data_desc}
-        """
-        return rsp
-
-
-class SubmitResult(Action):
-    PROMPT_TEMPLATE: str = """
-    # Summary
-    __summary__
-    # Your task
-    Extract the file path for test set prediction from the summary above, output a json following the format:
-    ```json
-    {"file_path": str = "the file path, for example, /path/to/the/prediction/file/xxx.csv, /path/to/the/prediction/file/xxx.xlsx"}
-    ```
-    """
-
-    def __init__(self, name: str = "", context=None, llm=None) -> str:
-        super().__init__(name, context, llm)
-
-    async def _parse_submit_file_path(self, context) -> str:
-        prompt = self.PROMPT_TEMPLATE.replace("__summary__", context)
-        rsp = await self._aask(prompt)
-        rsp = CodeParser.parse_code(block=None, text=rsp)
-        file_path = json.loads(rsp)["file_path"]
-        return file_path
-
-    async def run(self, competition, submit_message="") -> str:
-        submit_file_path = await self._parse_submit_file_path(submit_message)
-
-        data_path = CONFIG.workspace_path / competition
-        submit_message = submit_message.replace("'", "")
-
-        run_command(f"kaggle competitions submit {competition} -f {submit_file_path} -m '{submit_message}'")
-        run_command(f"kaggle competitions leaderboard --show --csv {competition} > {data_path / 'leaderboard.csv'}")
-        run_command(f"kaggle competitions submissions --csv {competition} > {data_path / 'submission.csv'}")
-
-        leaderboard = pd.read_csv(data_path / "leaderboard.csv")
-        submission = pd.read_csv(data_path / "submission.csv")
-        print(submission)  # submission.to_json(orient="records")
-
-        submission_score = submission.loc[0, "publicScore"]
-        best_score = max(submission["publicScore"])  # might be min
-        rank = leaderboard.loc[leaderboard["score"] == best_score].index[0]
-        rank_pct = round(rank / len(leaderboard), 4) * 100
-
-        submission_summary = f"""
-        # All histories:
-        {submission.head(5).to_string()}
-        # Current
-        Current submission score: {submission_score}, best score: {best_score}, best rank: {rank} (top {rank_pct}%)
-        """
-        logger.info(submission_summary)
-        return submission_summary
-
-
-class KaggleManager(Role):
-    def __init__(self, name="ABC", profile="KaggleManager", goal="", competition="titanic", data_desc=""):
-        super().__init__(name=name, profile=profile, goal=goal)
-        self._init_actions([DownloadData, SubmitResult])
-        self._watch([UserRequirement, SummarizeAnalysis])
-        self.competition = competition
-        self.data_desc = data_desc  # currently passed in, later can be scrapped down from web by another Role
-
-    async def _think(self):
-        observed = self.get_memories()[-1].cause_by
-        if observed == UserRequirement:
-            self._set_state(0)  # DownloadData, get competition of interest from human, download datasets
-        elif observed == SummarizeAnalysis:
-            self._set_state(1)  # SubmitResult, get prediction from MLEngineer and submit it to Kaggle
-
-    async def _act(self):
-        todo = self.rc.todo
-        logger.info(f"{self._setting}: ready to {self.rc.todo}")
-
-        if isinstance(todo, DownloadData):
-            rsp = await todo.run(self.competition, self.data_desc)
-
-        elif isinstance(todo, SubmitResult):
-            submit_message = self.get_memories()[
-                -1
-            ].content  # use analysis summary from MLEngineer as submission message
-            rsp = await todo.run(competition=self.competition, submit_message=submit_message)
-
-        msg = Message(content=rsp, role="user", cause_by=type(todo))
-
-        return msg
-
-
-if __name__ == "__main__":
-    competition, data_desc, requirement = (
-        "titanic",
-        "Training set is train.csv.\nTest set is test.csv. We also include gender_submission.csv, a set of predictions that assume all and only female passengers survive, as an example of what a submission file should look like.",
-        "Run EDA on the train dataset, train a model to predict survival (20% as validation) and save it, predict the test set using saved model, save the test result according to format",
-    )
-
-    summary = "I used Python with pandas for data preprocessing, sklearn's RandomForestClassifier for modeling, and achieved 82.12% accuracy on validation. Predictions saved at '/Users/gary/Desktop/data_agents_opt/workspace/titanic/gender_submission.csv'."
-
-    async def main(requirement: str = requirement):
-        role = KaggleManager(competition=competition, data_desc=data_desc)
-        # await role.run(Message(content="", cause_by=UserRequirement))
-        await role.run(Message(content=summary, cause_by=SummarizeAnalysis))
-
-    fire.Fire(main)
--- a/metagpt/roles/ml_engineer.py
+++ b/metagpt/roles/ml_engineer.py
@ -1,7 +1,6 @@
 from metagpt.actions.debug_code import DebugCode
 from metagpt.actions.execute_code import ExecutePyCode
-from metagpt.actions.ml_da_action import UpdateDataColumns
-from metagpt.actions.write_analysis_code import WriteCodeWithToolsML
+from metagpt.actions.ml_action import UpdateDataColumns, WriteCodeWithToolsML
 from metagpt.logs import logger
 from metagpt.roles.code_interpreter import CodeInterpreter
 from metagpt.tools.tool_data_type import ToolTypeEnum
--- a/metagpt/roles/tool_maker.py
+++ b/metagpt/roles/tool_maker.py
@ -1,53 +0,0 @@
-from pydantic import Field
-
-from metagpt.actions.ask_review import AskReview
-from metagpt.actions.execute_code import ExecutePyCode
-from metagpt.actions.write_analysis_code import MakeTools
-from metagpt.logs import logger
-from metagpt.roles import Role
-from metagpt.utils.common import remove_comments
-
-
-class ToolMaker(Role):
-    execute_code: ExecutePyCode = Field(default_factory=ExecutePyCode, exclude=True)
-
-    async def make_tool(self, code: str, instruction: str, task_id: str = "", auto_run=True):
-        if len(remove_comments(code).split("\n")) < 5:  # no need to consider trivial codes with fewer than 5 lines
-            return
-
-        logger.warning(
-            f"Making tools for task_id {task_id}: \
-            `{instruction}` \n code: \n {code}"
-        )
-        make_tools = MakeTools()
-        make_tool_retries, make_tool_current_retry = 3, 0
-        while True:
-            # start make tools
-            tool_code = await make_tools.run(code, instruction)
-            make_tool_current_retry += 1
-
-            # check tool_code by execute_code
-            logger.info(f"Checking task_id {task_id} tool code by executor...")
-            execute_result, execute_success = await self.execute_code.run(tool_code)
-            if not execute_success:
-                logger.error(f"Tool code faild to execute, \n{execute_result}\n.We will try to fix it ...")
-            # end make tools
-            if execute_success or make_tool_current_retry >= make_tool_retries:
-                if make_tool_current_retry >= make_tool_retries:
-                    logger.error(
-                        f"We have tried the maximum number of attempts {make_tool_retries}\
-                        and still have not created tools for task_id {task_id} successfully,\
-                            we will skip it."
-                    )
-                break
-        # save successful tool code in udf
-        if execute_success:
-            _, confirmed = await self.ask_review(auto_run=auto_run)
-            if confirmed:
-                make_tools.save(tool_code)
-
-    async def ask_review(self, auto_run: bool = True):
-        if not auto_run:
-            review, confirmed = await AskReview().run()
-            return review, confirmed
-        return "", True
--- a/tests/data/rsp_cache.json
+++ b/tests/data/rsp_cache.json
--- a/tests/metagpt/actions/test_write_analysis_code.py
+++ b/tests/metagpt/actions/test_write_analysis_code.py
@ -3,11 +3,8 @@ import asyncio
 import pytest

 from metagpt.actions.execute_code import ExecutePyCode
-from metagpt.actions.write_analysis_code import (
-    WriteCodeByGenerate,
-    WriteCodeWithTools,
-    WriteCodeWithToolsML,
-)
+from metagpt.actions.ml_action import WriteCodeWithToolsML
+from metagpt.actions.write_analysis_code import WriteCodeByGenerate, WriteCodeWithTools
 from metagpt.logs import logger
 from metagpt.plan.planner import STRUCTURAL_CONTEXT
 from metagpt.schema import Message, Plan, Task
--- a/tests/metagpt/roles/run_code_interpreter.py
+++ b/tests/metagpt/roles/run_code_interpreter.py
@ -9,7 +9,7 @@ from metagpt.schema import Plan
 from metagpt.utils.recovery_util import load_history, save_history


-async def run_code_interpreter(role_class, requirement, auto_run, use_tools, use_code_steps, save_dir, tools):
+async def run_code_interpreter(role_class, requirement, auto_run, use_tools, save_dir, tools):
    """
    The main function to run the MLEngineer with optional history loading.

@ -28,7 +28,6 @@ async def run_code_interpreter(role_class, requirement, auto_run, use_tools, use
        role = MLEngineer(
            auto_run=auto_run,
            use_tools=use_tools,
-            use_code_steps=use_code_steps,
            tools=tools,
        )

@ -75,10 +74,9 @@ if __name__ == "__main__":
        requirement: str = requirement,
        auto_run: bool = auto_run,
        use_tools: bool = use_tools,
-        use_code_steps: bool = False,
        save_dir: str = save_dir,
        tools=tools,
    ):
-        await run_code_interpreter(role_class, requirement, auto_run, use_tools, use_code_steps, save_dir, tools)
+        await run_code_interpreter(role_class, requirement, auto_run, use_tools, save_dir, tools)

    fire.Fire(main)
--- a/tests/metagpt/roles/test_code_interpreter.py
+++ b/tests/metagpt/roles/test_code_interpreter.py
@ -3,11 +3,24 @@ import pytest
 from metagpt.logs import logger
 from metagpt.roles.code_interpreter import CodeInterpreter

+# from metagpt.const import DATA_PATH
+

@pytest.mark.asyncio
-async def test_code_interpreter():
+@pytest.mark.parametrize("use_tools", [(True)])
+async def test_code_interpreter(use_tools):
    requirement = "Run data analysis on sklearn Iris dataset, include a plot"
-    ci = CodeInterpreter(goal=requirement, auto_run=True, use_tools=False)
+    # requirement = "Run data analysis on sklearn Wine recognition dataset, include a plot, and train a model to predict wine class (20% as validation), and show validation accuracy"
+    # data_path = f"{DATA_PATH}/titanic"
+    # requirement = f"This is a titanic passenger survival dataset, your goal is to predict passenger survival outcome. The target column is Survived. Perform data analysis, data preprocessing, feature engineering, and modeling to predict the target. Report accuracy on the eval data. Train data path: '{data_path}/split_train.csv', eval data path: '{data_path}/split_eval.csv'."
+    # data_path = f"{DATA_PATH}/icr-identify-age-related-conditions"
+    # requirement = f"This is a medical dataset with over fifty anonymized health characteristics linked to three age-related conditions. Your goal is to predict whether a subject has or has not been diagnosed with one of these conditions.The target column is Class. Perform data analysis, data preprocessing, feature engineering, and modeling to predict the target. Report f1 score on the eval data. Train data path: {data_path}/split_train.csv, eval data path: {data_path}/split_eval.csv."
+    # data_path = f"{DATA_PATH}/house-prices-advanced-regression-techniques"
+    # requirement = f"This is a house price dataset, your goal is to predict the sale price of a property based on its features. The target column is SalePrice. Perform data analysis, data preprocessing, feature engineering, and modeling to predict the target. Report RMSE between the logarithm of the predicted value and the logarithm of the observed sales price on the eval data. Train data path: '{data_path}/split_train.csv', eval data path: '{data_path}/split_eval.csv'."
+    tools = []
+    # tools = ["FillMissingValue", "CatCross", "a"]
+
+    ci = CodeInterpreter(auto_run=True, use_tools=use_tools, tools=tools)
    rsp = await ci.run(requirement)
    logger.info(rsp)
    assert len(rsp.content) > 0
--- a/tests/metagpt/roles/test_daml.py
+++ b/tests/metagpt/roles/test_daml.py
@ -1,50 +0,0 @@
-import pytest
-from tqdm import tqdm
-
-from metagpt.logs import logger
-from metagpt.roles.ml_engineer import ExecutePyCode, MLEngineer
-from metagpt.schema import Plan
-
-
-def reset(role):
-    """Restart role with the same goal."""
-    role.working_memory.clear()
-    role.planner.plan = Plan(goal=role.planner.plan.goal)
-    role.execute_code = ExecutePyCode()
-
-
-async def make_use_tools(requirement: str, auto_run: bool = True):
-    """make and use tools for requirement."""
-    role = MLEngineer(goal=requirement, auto_run=auto_run)
-    # make udfs
-    role.use_tools = False
-    role.use_code_steps = False
-    role.make_udfs = True
-    role.use_udfs = False
-    await role.run(requirement)
-    # use udfs
-    reset(role)
-    role.make_udfs = False
-    role.use_udfs = True
-    role.use_code_steps = False
-    role.use_tools = False
-    await role.run(requirement)
-
-
-@pytest.mark.asyncio
-async def test_make_use_tools():
-    requirements = [
-        "Run data analysis on sklearn Iris dataset, include a plot",
-        "Run data analysis on sklearn Diabetes dataset, include a plot",
-        "Run data analysis on sklearn Wine recognition dataset, include a plot, and train a model to predict wine class (20% as validation), and show validation accuracy",
-        "Run data analysis on sklearn Wisconsin Breast Cancer dataset, include a plot, train a model to predict targets (20% as validation), and show validation accuracy",
-        "Run EDA and visualization on this dataset, train a model to predict survival, report metrics on validation set (20%), dataset: tests/data/titanic.csv",
-    ]
-    success = 0
-    for requirement in tqdm(requirements, total=len(requirements)):
-        try:
-            await make_use_tools(requirement)
-            success += 1
-        except Exception as e:
-            logger.error(f"Found Error in {requirement}, {e}")
-    logger.info(f"success: {round(success/len(requirements), 1)*100}%")
--- a/tests/metagpt/roles/test_ml_engineer.py
+++ b/tests/metagpt/roles/test_ml_engineer.py
@ -0,0 +1,31 @@
+import pytest
+
+from metagpt.const import DATA_PATH
+from metagpt.logs import logger
+from metagpt.roles.ml_engineer import MLEngineer
+
+
+def test_mle_init():
+    ci = MLEngineer(goal="test", auto_run=True, use_tools=True, tools=["tool1", "tool2"])
+    assert ci.tools == []
+
+
+@pytest.mark.asyncio
+@pytest.mark.parametrize("use_tools", [(True)])
+async def test_code_interpreter(use_tools):
+    # requirement = "Run data analysis on sklearn Iris dataset, include a plot"
+    # requirement = "Run data analysis on sklearn Wine recognition dataset, include a plot, and train a model to predict wine class (20% as validation), and show validation accuracy"
+    data_path = f"{DATA_PATH}/titanic"
+    requirement = f"This is a titanic passenger survival dataset, your goal is to predict passenger survival outcome. The target column is Survived. Perform data analysis, data preprocessing, feature engineering, and modeling to predict the target. Report accuracy on the eval data. Train data path: '{data_path}/split_train.csv', eval data path: '{data_path}/split_eval.csv'."
+    # data_path = f"{DATA_PATH}/icr-identify-age-related-conditions"
+    # requirement = f"This is a medical dataset with over fifty anonymized health characteristics linked to three age-related conditions. Your goal is to predict whether a subject has or has not been diagnosed with one of these conditions.The target column is Class. Perform data analysis, data preprocessing, feature engineering, and modeling to predict the target. Report f1 score on the eval data. Train data path: {data_path}/split_train.csv, eval data path: {data_path}/split_eval.csv."
+    # data_path = f"{DATA_PATH}/santander-customer-transaction-prediction"
+    # requirement = f"This is a customers financial dataset. Your goal is to predict which customers will make a specific transaction in the future. The target column is target. Perform data analysis, data preprocessing, feature engineering, and modeling to predict the target. Report AUC Score on the eval data. Train data path: '{data_path}/split_train.csv', eval data path: '{data_path}/split_eval.csv' ."
+    # data_path = f"{DATA_PATH}/house-prices-advanced-regression-techniques"
+    # requirement = f"This is a house price dataset, your goal is to predict the sale price of a property based on its features. The target column is SalePrice. Perform data analysis, data preprocessing, feature engineering, and modeling to predict the target. Report RMSE between the logarithm of the predicted value and the logarithm of the observed sales price on the eval data. Train data path: '{data_path}/split_train.csv', eval data path: '{data_path}/split_eval.csv'."
+    tools = ["FillMissingValue", "CatCross", "dummy_tool"]
+
+    mle = MLEngineer(goal=requirement, auto_run=True, use_tools=use_tools, tools=tools)
+    rsp = await mle.run(requirement)
+    logger.info(rsp)
+    assert len(rsp.content) > 0
--- a/tests/metagpt/tools/libs/test_udf.py
+++ b/tests/metagpt/tools/libs/test_udf.py
@ -1,49 +0,0 @@
-import json
-
-import yaml
-
-from metagpt.logs import logger
-from metagpt.tools.libs.udf import UDFS, UDFS_YAML, docstring_to_yaml
-
-
-def test_udfs():
-    assert len(UDFS) > 0
-    assert "udf_name" in UDFS[0]
-    assert "udf_doc" in UDFS[0]
-    logger.info(UDFS)
-
-
-def test_docstring2yaml():
-    docstring = """Calculate the duration in hours between two datetime columns.
-
-    Args:
-        dataframe (pd.DataFrame): The dataframe containing the datetime columns.
-
-    Returns:
-        pd.DataFrame: The dataframe with an additional column 'duration_hour' added.
-    """
-
-    yaml_result = docstring_to_yaml(docstring, return_vars="dataframe")
-    assert "parameters" in yaml_result
-    assert "properties" in yaml_result["parameters"]
-    assert "dataframe" in yaml_result["parameters"]["properties"]
-
-
-def test_UDFS_YAML():
-    assert len(UDFS_YAML) > 0
-    logger.info(f"\n\n{json.dumps(UDFS_YAML, indent=2, ensure_ascii=False)}")
-    function_schema = UDFS_YAML
-    assert "description" in function_schema[list(function_schema.keys())[0]]
-    assert "type" in function_schema[list(function_schema.keys())[0]]
-    assert "parameters" in function_schema[list(function_schema.keys())[0]]
-    assert "properties" in function_schema[list(function_schema.keys())[0]]["parameters"]
-    assert "required" in function_schema[list(function_schema.keys())[0]]["parameters"]
-    assert "returns" in function_schema[list(function_schema.keys())[0]]
-    # 指定要保存的文件路径
-    file_path = "./tests/data/function_schema.yaml"
-
-    # 使用 PyYAML 将字典保存为 YAML 文件
-    with open(file_path, "w") as file:
-        yaml.dump(function_schema, file, default_flow_style=False)
-
-    print(f"Data has been saved to {file_path}")
--- a/tests/metagpt/utils/test_save_code.py
+++ b/tests/metagpt/utils/test_save_code.py
@ -9,7 +9,6 @@ import nbformat
 import pytest

 from metagpt.actions.execute_code import ExecutePyCode
-from metagpt.actions.write_analysis_code import WriteCodeByGenerate
 from metagpt.utils.save_code import DATA_PATH, save_code_file


@ -17,11 +16,6 @@ def test_save_code_file_python():
    save_code_file("example", "print('Hello, World!')")
    file_path = DATA_PATH / "output" / "example" / "code.py"
    assert os.path.exists(file_path), f"File does not exist: {file_path}"
-
-
-def test_save_code_file_python():
-    save_code_file("example", "print('Hello, World!')")
-    file_path = DATA_PATH / "output" / "example" / "code.py"
    with open(file_path, "r", encoding="utf-8") as fp:
        content = fp.read()
    assert "print('Hello, World!')" in content, "File content does not match"
@ -38,7 +32,7 @@ def test_save_code_file_json():

@pytest.mark.asyncio
 async def test_save_code_file_notebook():
-    code = await WriteCodeByGenerate().run(context="basic python, hello world", plan="", code_steps="", temperature=0.0)
+    code = "print('Hello, World!')"
    executor = ExecutePyCode()
    await executor.run(code)
    # Save as a Notebook file