Merge branch 'tool_manage_new' into 'code_intepreter'

Add tool registry, minimize MLEngineer See merge request agents/data_agents_opt!50
2026-07-11 16:22:15 +02:00 · 2024-01-18 16:11:41 +00:00 · 2024-01-18 16:11:41 +00:00 · c7d46d0b0f
commit c7d46d0b0f
parent 224bf820b2 23fccdde67
57 changed files with 1335 additions and 787 deletions
--- a/.gitignore
+++ b/.gitignore
@ -173,6 +173,7 @@ tests/metagpt/utils/file_repo_git
 *.png
 htmlcov
 htmlcov.*
+cov.xml
 *.dot
 *.pkl
 *-structure.csv
--- a/docs/FAQ-EN.md
+++ b/docs/FAQ-EN.md
@ -130,7 +130,7 @@
        1.  HTML Layout： Outputs the HTML code for the page.
        1.  CSS Styles (styles.css)： Outputs the CSS code for the page.

-    1.  Currently, the SD skill is a tool invoked by UIDesign. It instantiates the SDEngine, with specific code found in metagpt/tools/sd_engine.
+    1.  Currently, the SD skill is a tool invoked by UIDesign. It instantiates the SDEngine, with specific code found in metagpt/tools/libs/sd_engine.py.

    1.  Configuration instructions for SD Skills: The SD interface is currently deployed based on *https://github.com/AUTOMATIC1111/stable-diffusion-webui* **For environmental configurations and model downloads, please refer to the aforementioned GitHub repository. To initiate the SD service that supports API calls, run the command specified in cmd with the parameter nowebui, i.e.,

--- a/metagpt/actions/debug_code.py
+++ b/metagpt/actions/debug_code.py
@ -119,5 +119,4 @@ class DebugCode(BaseWriteAnalysisCode):
            runtime_result=runtime_result,
        )
        # 根据reflection结果重写代码
-        improv_code = reflection["improved_impl"]
-        return improv_code
+        return {"code": reflection["improved_impl"]}
--- a/metagpt/actions/write_analysis_code.py
+++ b/metagpt/actions/write_analysis_code.py
@ -8,11 +8,9 @@ import re
 from pathlib import Path
 from typing import Dict, List, Tuple, Union

-import yaml
 from tenacity import retry, stop_after_attempt, wait_fixed

 from metagpt.actions import Action
-from metagpt.const import TOOL_SCHEMA_PATH
 from metagpt.llm import LLM
 from metagpt.logs import logger
 from metagpt.prompts.ml_engineer import (
@ -24,12 +22,9 @@ from metagpt.prompts.ml_engineer import (
    TOOL_USAGE_PROMPT,
 )
 from metagpt.schema import Message, Plan
-from metagpt.tools import TOOL_TYPE_MAPPINGS
+from metagpt.tools.tool_registry import TOOL_REGISTRY
 from metagpt.utils.common import create_func_config, remove_comments

-TOOL_TYPE_MODULE = {k: v.module for k, v in TOOL_TYPE_MAPPINGS.items()}
-TOOL_TYPE_USAGE_PROMPT = {k: v.usage_prompt for k, v in TOOL_TYPE_MAPPINGS.items()}
-

 class BaseWriteAnalysisCode(Action):
    DEFAULT_SYSTEM_MSG: str = """You are Code Interpreter, a world-class programmer that can complete any goal by executing code. Strictly follow the plan and generate code step by step. Each step of the code will be executed on the user's machine, and the user will provide the code execution results to you.**Notice: The code for the next step depends on the code for the previous step. Must reuse variables in the lastest other code directly, dont creat it again, it is very import for you. Use !pip install in a standalone block to install missing packages.Usually the libraries you need are already installed.Dont check if packages already imported.**"""  # prompt reference: https://github.com/KillianLucas/open-interpreter/blob/v0.1.4/interpreter/system_message.txt
@ -64,7 +59,7 @@ class BaseWriteAnalysisCode(Action):
            }
        return messages

-    async def run(self, context: List[Message], plan: Plan = None) -> str:
+    async def run(self, context: List[Message], plan: Plan = None) -> dict:
        """Run of a code writing action, used in data analysis or modeling

        Args:
@ -72,7 +67,7 @@ class BaseWriteAnalysisCode(Action):
            plan (Plan, optional): Overall plan. Defaults to None.

        Returns:
-            str: The code string.
+            dict: code result in the format of {"code": "print('hello world')", "language": "python"}
        """


@ -95,49 +90,27 @@ class WriteCodeByGenerate(BaseWriteAnalysisCode):
 class WriteCodeWithTools(BaseWriteAnalysisCode):
    """Write code with help of local available tools. Choose tools first, then generate code to use the tools"""

-    schema_path: Union[Path, str] = TOOL_SCHEMA_PATH
    available_tools: dict = {}

    def __init__(self, **kwargs):
        super().__init__(**kwargs)
-        self._load_tools(self.schema_path)

-    def _load_tools(self, schema_path, schema_module=None):
-        """Load tools from yaml file"""
-        if isinstance(schema_path, dict):
-            schema_module = schema_module or "udf"
-            self.available_tools.update({schema_module: schema_path})
-        else:
-            if isinstance(schema_path, list):
-                yml_files = schema_path
-            elif isinstance(schema_path, Path) and schema_path.is_file():
-                yml_files = [schema_path]
-            else:
-                yml_files = schema_path.glob("*.yml")
-
-            for yml_file in yml_files:
-                module = yml_file.stem
-                with open(yml_file, "r", encoding="utf-8") as f:
-                    self.available_tools[module] = yaml.safe_load(f)
-
-    def _parse_recommend_tools(self, module: str, recommend_tools: list) -> dict:
+    def _parse_recommend_tools(self, recommend_tools: list) -> dict:
        """
        Parses and validates a list of recommended tools, and retrieves their schema from registry.

        Args:
-            module (str): The module name for querying tools in the registry.
            recommend_tools (list): A list of recommended tools.

        Returns:
            dict: A dict of valid tool schemas.
        """
        valid_tools = []
-        available_tools = self.available_tools[module].keys()
-        for tool in recommend_tools:
-            if tool in available_tools:
-                valid_tools.append(tool)
+        for tool_name in recommend_tools:
+            if TOOL_REGISTRY.has_tool(tool_name):
+                valid_tools.append(TOOL_REGISTRY.get_tool(tool_name))

-        tool_catalog = {tool: self.available_tools[module][tool] for tool in valid_tools}
+        tool_catalog = {tool.name: tool.schemas for tool in valid_tools}
        return tool_catalog

    async def _tool_recommendation(
@ -176,31 +149,24 @@ class WriteCodeWithTools(BaseWriteAnalysisCode):
        tool_type = (
            plan.current_task.task_type
        )  # find tool type from task type through exact match, can extend to retrieval in the future
-        available_tools = self.available_tools.get(tool_type, {})
-        special_prompt = TOOL_TYPE_USAGE_PROMPT.get(tool_type, "")
+        available_tools = TOOL_REGISTRY.get_tools_by_type(tool_type)
+        special_prompt = (
+            TOOL_REGISTRY.get_tool_type(tool_type).usage_prompt if TOOL_REGISTRY.has_tool_type(tool_type) else ""
+        )
        code_steps = plan.current_task.code_steps

-        finished_tasks = plan.get_finished_tasks()
-        code_context = [remove_comments(task.code) for task in finished_tasks]
-        code_context = "\n\n".join(code_context)
-
        tool_catalog = {}
-        module_name = ""

-        if len(available_tools) > 0:
-            available_tools = {k: v["description"] for k, v in available_tools.items()}
+        if available_tools:
+            available_tools = {tool_name: tool.schemas["description"] for tool_name, tool in available_tools.items()}

            recommend_tools = await self._tool_recommendation(
                plan.current_task.instruction, code_steps, available_tools
            )
-            tool_catalog = self._parse_recommend_tools(tool_type, recommend_tools)
+            tool_catalog = self._parse_recommend_tools(recommend_tools)
            logger.info(f"Recommended tools: \n{recommend_tools}")

-            module_name = TOOL_TYPE_MODULE[tool_type]
-
-        tools_instruction = TOOL_USAGE_PROMPT.format(
-            special_prompt=special_prompt, module_name=module_name, tool_catalog=tool_catalog
-        )
+        tools_instruction = TOOL_USAGE_PROMPT.format(special_prompt=special_prompt, tool_catalog=tool_catalog)

        context.append(Message(content=tools_instruction, role="user"))

@ -208,7 +174,7 @@ class WriteCodeWithTools(BaseWriteAnalysisCode):

        tool_config = create_func_config(CODE_GENERATOR_WITH_TOOLS)
        rsp = await self.llm.aask_code(prompt, **tool_config)
-        return rsp["code"]
+        return rsp


 class WriteCodeWithToolsML(WriteCodeWithTools):
@ -219,26 +185,28 @@ class WriteCodeWithToolsML(WriteCodeWithTools):
        column_info: str = "",
        **kwargs,
    ) -> Tuple[List[Message], str]:
-        tool_type = plan.current_task.task_type
-        available_tools = self.available_tools.get(tool_type, {})
-        special_prompt = TOOL_TYPE_USAGE_PROMPT.get(tool_type, "")
+        tool_type = (
+            plan.current_task.task_type
+        )  # find tool type from task type through exact match, can extend to retrieval in the future
+        available_tools = TOOL_REGISTRY.get_tools_by_type(tool_type)
+        special_prompt = (
+            TOOL_REGISTRY.get_tool_type(tool_type).usage_prompt if TOOL_REGISTRY.has_tool_type(tool_type) else ""
+        )
        code_steps = plan.current_task.code_steps

        finished_tasks = plan.get_finished_tasks()
        code_context = [remove_comments(task.code) for task in finished_tasks]
        code_context = "\n\n".join(code_context)

-        if len(available_tools) > 0:
-            available_tools = {k: v["description"] for k, v in available_tools.items()}
+        if available_tools:
+            available_tools = {tool_name: tool.schemas["description"] for tool_name, tool in available_tools.items()}

            recommend_tools = await self._tool_recommendation(
                plan.current_task.instruction, code_steps, available_tools
            )
-            tool_catalog = self._parse_recommend_tools(tool_type, recommend_tools)
+            tool_catalog = self._parse_recommend_tools(recommend_tools)
            logger.info(f"Recommended tools: \n{recommend_tools}")

-            module_name = TOOL_TYPE_MODULE[tool_type]
-
            prompt = ML_TOOL_USAGE_PROMPT.format(
                user_requirement=plan.goal,
                history_code=code_context,
@ -246,7 +214,6 @@ class WriteCodeWithToolsML(WriteCodeWithTools):
                column_info=column_info,
                special_prompt=special_prompt,
                code_steps=code_steps,
-                module_name=module_name,
                tool_catalog=tool_catalog,
            )

@ -263,7 +230,7 @@ class WriteCodeWithToolsML(WriteCodeWithTools):
        tool_config = create_func_config(CODE_GENERATOR_WITH_TOOLS)
        rsp = await self.llm.aask_code(prompt, **tool_config)
        context = [Message(content=prompt, role="user")]
-        return context, rsp["code"]
+        return context, rsp


 class MakeTools(WriteCodeByGenerate):
--- a/metagpt/actions/write_plan.py
+++ b/metagpt/actions/write_plan.py
@ -12,7 +12,7 @@ from metagpt.actions import Action
 from metagpt.logs import logger
 from metagpt.prompts.ml_engineer import ASSIGN_TASK_TYPE_CONFIG, ASSIGN_TASK_TYPE_PROMPT
 from metagpt.schema import Message, Plan, Task
-from metagpt.tools import TOOL_TYPE_MAPPINGS
+from metagpt.tools import TOOL_REGISTRY
 from metagpt.utils.common import CodeParser, create_func_config


@ -47,13 +47,16 @@ class WritePlan(Action):
            List[Dict]: tasks with task type assigned
        """
        task_list = "\n".join([f"Task {task['task_id']}: {task['instruction']}" for task in tasks])
-        task_type_desc = "\n".join([f"- **{item.name}**: {item.desc}" for item in TOOL_TYPE_MAPPINGS.values()])
+        task_type_desc = "\n".join(
+            [f"- **{tool_type.name}**: {tool_type.desc}" for tool_type in TOOL_REGISTRY.get_tool_types().values()]
+        )  # task type are binded with tool type now, should be improved in the future
        prompt = ASSIGN_TASK_TYPE_PROMPT.format(
            task_list=task_list, task_type_desc=task_type_desc
        )  # task types are set to be the same as tool types, for now
        tool_config = create_func_config(ASSIGN_TASK_TYPE_CONFIG)
        rsp = await self.llm.aask_code(prompt, **tool_config)
        task_type_list = rsp["task_type"]
+        print(f"assigned task types: {task_type_list}")
        for task, task_type in zip(tasks, task_type_list):
            task["task_type"] = task_type
        return json.dumps(tasks)
--- a/metagpt/const.py
+++ b/metagpt/const.py
@ -70,8 +70,8 @@ TMP = METAGPT_ROOT / "tmp"
 SOURCE_ROOT = METAGPT_ROOT / "metagpt"
 PROMPT_PATH = SOURCE_ROOT / "prompts"
 SKILL_DIRECTORY = SOURCE_ROOT / "skills"
-TOOL_SCHEMA_PATH = METAGPT_ROOT / "metagpt/tools/functions/schemas"
-TOOL_LIBS_PATH = METAGPT_ROOT / "metagpt/tools/functions/libs"
+TOOL_SCHEMA_PATH = METAGPT_ROOT / "metagpt/tools/schemas"
+TOOL_LIBS_PATH = METAGPT_ROOT / "metagpt/tools/libs"


 # REAL CONSTS
--- a/metagpt/prompts/ml_engineer.py
+++ b/metagpt/prompts/ml_engineer.py
@ -15,7 +15,7 @@ Keep dataset column information updated before model train.
 # Task
 Update and print the dataset's column information only if the train or test data has changed. Use the following code:
 ```python
-from metagpt.tools.functions.libs.data_preprocess import get_column_info
+from metagpt.tools.libs.data_preprocess import get_column_info

 column_info = get_column_info(df)
 print("column_info")
@ -134,16 +134,12 @@ PRINT_DATA_COLUMNS = {
    "parameters": {
        "type": "object",
        "properties": {
-            "is_update": {
-                "type": "boolean",
-                "description": "Whether need to update the column info.",
-            },
            "code": {
                "type": "string",
                "description": "The code to be added to a new cell in jupyter.",
            },
        },
-        "required": ["is_update", "code"],
+        "required": ["code"],
    },
 }

@ -203,7 +199,7 @@ Specifically, {special_prompt}
 - You can freely combine the use of any other public packages, like sklearn, numpy, pandas, etc..

 # Available Tools (can be empty):
-Each Class tool is described in JSON format. When you call a tool, import the tool from `{module_name}` first.
+Each Class tool is described in JSON format. When you call a tool, import the tool first.
 {tool_catalog}

 # Constraints:
@ -240,7 +236,7 @@ Strictly follow steps below when you writing code if it's convenient.
 - You can freely combine the use of any other public packages, like sklearn, numpy, pandas, etc..

 # Available Tools:
-Each Class tool is described in JSON format. When you call a tool, import the tool from `{module_name}` first.
+Each Class tool is described in JSON format. When you call a tool, import the tool from its path first.
 {tool_catalog}

 # Output Example:
@ -248,7 +244,7 @@ when current task is "do data preprocess, like fill missing value, handle outlie
 ```python
 # Step 1: fill missing value
 # Tools used: ['FillMissingValue']
-from metagpt.tools.functions.libs.data_preprocess import FillMissingValue
+from metagpt.tools.libs.data_preprocess import FillMissingValue

 train_processed = train.copy()
 test_processed = test.copy()
--- a/metagpt/prompts/tool_types.py
+++ b/metagpt/prompts/tool_types.py
@ -39,7 +39,7 @@ The current task is about evaluating a model, please note the following:
 """

 # Prompt for using tools of "vision" type
-VISION_PROMPT = """
+IMAGE2WEBPAGE_PROMPT = """
 The current task is about converting image into webpage code. please note the following:
 - Single-Step Code Generation: Execute the entire code generation process in a single step, encompassing HTML, CSS, and JavaScript. Avoid fragmenting the code generation into multiple separate steps to maintain consistency and simplify the development workflow.
-"""
+"""
--- a/metagpt/roles/code_interpreter.py
+++ b/metagpt/roles/code_interpreter.py
@ -5,6 +5,7 @@ from pydantic import Field
 from metagpt.actions.ask_review import ReviewConst
 from metagpt.actions.execute_code import ExecutePyCode
 from metagpt.actions.write_analysis_code import WriteCodeByGenerate, WriteCodeWithTools
+from metagpt.actions.write_code_steps import WriteCodeSteps
 from metagpt.logs import logger
 from metagpt.roles import Role
 from metagpt.roles.tool_maker import ToolMaker
@ -16,6 +17,7 @@ class CodeInterpreter(Role):
    auto_run: bool = True
    use_tools: bool = False
    make_udfs: bool = False  # whether to save user-defined functions
+    use_code_steps: bool = False
    execute_code: ExecutePyCode = Field(default_factory=ExecutePyCode, exclude=True)

    def __init__(
@ -52,10 +54,14 @@ class CodeInterpreter(Role):

    async def _act_on_task(self, current_task: Task) -> TaskResult:
        code, result, is_success = await self._write_and_exec_code()
-        task_result = TaskResult(code=code['code'], result=result, is_success=is_success)
+        task_result = TaskResult(code=code, result=result, is_success=is_success)
        return task_result

    async def _write_and_exec_code(self, max_retry: int = 3):
+        self.planner.current_task.code_steps = (
+            await WriteCodeSteps().run(self.planner.plan) if self.use_code_steps else ""
+        )
+
        counter = 0
        success = False

@ -63,7 +69,7 @@ class CodeInterpreter(Role):
            ### write code ###
            code, cause_by = await self._write_code()

-            self.working_memory.add(Message(content=code['code'], role="assistant", cause_by=cause_by))
+            self.working_memory.add(Message(content=code["code"], role="assistant", cause_by=cause_by))

            ### execute code ###
            result, success = await self.execute_code.run(**code)
@ -72,7 +78,7 @@ class CodeInterpreter(Role):
            self.working_memory.add(Message(content=result, role="user", cause_by=ExecutePyCode))

            ### process execution result ###
-            if "!pip" in code:
+            if "!pip" in code["code"]:
                success = False

            counter += 1
@ -83,17 +89,15 @@ class CodeInterpreter(Role):
                if ReviewConst.CHANGE_WORD[0] in review:
                    counter = 0  # redo the task again with help of human suggestions

-        return code, result, success
+        return code["code"], result, success

    async def _write_code(self):
        todo = WriteCodeByGenerate() if not self.use_tools else WriteCodeWithTools()
        logger.info(f"ready to {todo.name}")

        context = self.planner.get_useful_memories()
+        # print(*context, sep="\n***\n")
        code = await todo.run(context=context, plan=self.planner.plan, temperature=0.0)
-        # 暂时在这里转换 WriteCodeWithTools 的输出
-        if isinstance(code, str):
-            code = {'code': code, 'language': 'python'}

        return code, todo

--- a/metagpt/roles/ml_engineer.py
+++ b/metagpt/roles/ml_engineer.py
@ -1,64 +1,43 @@
-from metagpt.actions.ask_review import ReviewConst
 from metagpt.actions.debug_code import DebugCode
 from metagpt.actions.execute_code import ExecutePyCode
-from metagpt.actions.ml_da_action import Reflect, SummarizeAnalysis, UpdateDataColumns
+from metagpt.actions.ml_da_action import UpdateDataColumns
 from metagpt.actions.write_analysis_code import WriteCodeWithToolsML
-from metagpt.actions.write_code_steps import WriteCodeSteps
 from metagpt.logs import logger
 from metagpt.roles.code_interpreter import CodeInterpreter
-from metagpt.roles.kaggle_manager import DownloadData, SubmitResult
-from metagpt.schema import Message
+from metagpt.tools.tool_data_type import ToolTypeEnum
 from metagpt.utils.common import any_to_str


 class MLEngineer(CodeInterpreter):
-    use_code_steps: bool = False
-    use_udfs: bool = False
-    data_desc: dict = {}
    debug_context: list = []
    latest_code: str = ""

    def __init__(self, name="Mark", profile="MLEngineer", **kwargs):
        super().__init__(name=name, profile=profile, **kwargs)
-        # self._watch([DownloadData, SubmitResult])  # in multi-agent settings
-
-    async def _plan_and_act(self):
-        ### a new attempt on the data, relevant in a multi-agent multi-turn setting ###
-        await self._prepare_data_context()
-
-        ### general plan process ###
-        await super()._plan_and_act()
-
-        ### summarize analysis ###
-        summary = await SummarizeAnalysis().run(self.planner.plan)
-        rsp = Message(content=summary, cause_by=SummarizeAnalysis)
-        self.rc.memory.add(rsp)
-
-        return rsp
-
-    async def _write_and_exec_code(self, max_retry: int = 3):
-        self.planner.current_task.code_steps = (
-            await WriteCodeSteps().run(self.planner.plan) if self.use_code_steps else ""
-        )
-
-        code, result, success = await super()._write_and_exec_code(max_retry=max_retry)
-
-        if success:
-            if self.use_tools and self.planner.current_task.task_type in ["data_preprocess", "feature_engineering"]:
-                update_success, new_code = await self._update_data_columns()
-                if update_success:
-                    code = code + "\n\n" + new_code
-
-        return code, result, success

    async def _write_code(self):
        if not self.use_tools:
            return await super()._write_code()

-        code_execution_count = sum([msg.cause_by == any_to_str(ExecutePyCode) for msg in self.working_memory.get()])
+        # In a trial and errors settings, check whether this is our first attempt to tackle the task. If there is no code execution before, then it is.
+        is_first_trial = any_to_str(ExecutePyCode) not in [msg.cause_by for msg in self.working_memory.get()]

-        if code_execution_count > 0:
-            logger.warning("We got a bug code, now start to debug...")
+        if is_first_trial:
+            # For the first trial, write task code from scratch
+            column_info = await self._update_data_columns()
+
+            logger.info("Write code with tools")
+            tool_context, code = await WriteCodeWithToolsML().run(
+                context=[],  # context assembled inside the Action
+                plan=self.planner.plan,
+                column_info=column_info,
+            )
+            self.debug_context = tool_context
+            cause_by = WriteCodeWithToolsML
+
+        else:
+            # Previous trials resulted in error, debug and rewrite the code
+            logger.warning("We got a bug, now start to debug...")
            code = await DebugCode().run(
                code=self.latest_code,
                runtime_result=self.working_memory.get(),
@ -67,49 +46,21 @@ class MLEngineer(CodeInterpreter):
            logger.info(f"new code \n{code}")
            cause_by = DebugCode

-        else:
-            logger.info("Write code with tools")
-            tool_context, code = await WriteCodeWithToolsML().run(
-                context=[],  # context assembled inside the Action
-                plan=self.planner.plan,
-                column_info=self.data_desc.get("column_info", ""),
-            )
-            self.debug_context = tool_context
-            cause_by = WriteCodeWithToolsML
-
-        self.latest_code = code
+        self.latest_code = code["code"]

        return code, cause_by

    async def _update_data_columns(self):
+        current_task = self.planner.plan.current_task
+        if current_task.task_type not in [
+            ToolTypeEnum.DATA_PREPROCESS.value,
+            ToolTypeEnum.FEATURE_ENGINEERING.value,
+            ToolTypeEnum.MODEL_TRAIN.value,
+        ]:
+            return ""
        logger.info("Check columns in updated data")
-        rsp = await UpdateDataColumns().run(self.planner.plan)
-        is_update, code = rsp["is_update"], rsp["code"]
+        code = await UpdateDataColumns().run(self.planner.plan)
        success = False
-        if is_update:
-            result, success = await self.execute_code.run(code)
-            if success:
-                print(result)
-                self.data_desc["column_info"] = result
-        return success, code
-
-    async def _prepare_data_context(self):
-        memories = self.get_memories()
-        if memories:
-            latest_event = memories[-1].cause_by
-            if latest_event == DownloadData:
-                self.planner.plan.context = memories[-1].content
-            elif latest_event == SubmitResult:
-                # self reflect on previous plan outcomes and think about how to improve the plan, add to working  memory
-                await self._reflect()
-
-                # get feedback for improvement from human, add to working memory
-                await self.planner.ask_review(trigger=ReviewConst.TASK_REVIEW_TRIGGER)
-
-    async def _reflect(self):
-        context = self.get_memories()
-        context = "\n".join([str(msg) for msg in context])
-
-        reflection = await Reflect().run(context=context)
-        self.working_memory.add(Message(content=reflection, role="assistant"))
-        self.working_memory.add(Message(content=Reflect.REWRITE_PLAN_INSTRUCTION, role="user"))
+        result, success = await self.execute_code.run(**code)
+        print(result)
+        return result if success else ""
--- a/metagpt/tools/init.py
+++ b/metagpt/tools/init.py
@ -7,17 +7,11 @@
 """

 from enum import Enum
+from metagpt.tools import tool_types  # this registers all tool types
+from metagpt.tools import libs  # this registers all tools
+from metagpt.tools.tool_registry import TOOL_REGISTRY

-from pydantic import BaseModel
-
-from metagpt.const import TOOL_LIBS_PATH
-from metagpt.prompts.tool_type import (
-    DATA_PREPROCESS_PROMPT,
-    FEATURE_ENGINEERING_PROMPT,
-    MODEL_TRAIN_PROMPT,
-    MODEL_EVALUATE_PROMPT,
-    VISION_PROMPT,
-)
+_ = tool_types, libs, TOOL_REGISTRY  # Avoid pre-commit error


 class SearchEngineType(Enum):
@ -37,62 +31,3 @@ class WebBrowserEngineType(Enum):
    def __missing__(cls, key):
        """Default type conversion"""
        return cls.CUSTOM
-
-
-class ToolType(BaseModel):
-    name: str
-    module: str = ""
-    desc: str
-    usage_prompt: str = ""
-
-
-TOOL_TYPE_MAPPINGS = {
-    "data_preprocess": ToolType(
-        name="data_preprocess",
-        module=str(TOOL_LIBS_PATH / "data_preprocess"),
-        desc="Only for changing value inplace.",
-        usage_prompt=DATA_PREPROCESS_PROMPT,
-    ),
-    "feature_engineering": ToolType(
-        name="feature_engineering",
-        module=str(TOOL_LIBS_PATH / "feature_engineering"),
-        desc="Only for creating new columns for input data.",
-        usage_prompt=FEATURE_ENGINEERING_PROMPT,
-    ),
-    "model_train": ToolType(
-        name="model_train",
-        module="",
-        desc="Only for training model.",
-        usage_prompt=MODEL_TRAIN_PROMPT,
-    ),
-    "model_evaluate": ToolType(
-        name="model_evaluate",
-        module="",
-        desc="Only for evaluating model.",
-        usage_prompt=MODEL_EVALUATE_PROMPT,
-    ),
-    "stable_diffusion": ToolType(
-        name="stable_diffusion",
-        module="metagpt.tools.sd_engine",
-        desc="Related to text2image, image2image using stable diffusion model.",
-        usage_prompt="",
-    ),
-    "scrape_web": ToolType(
-        name="scrape_web",
-        module="metagpt.tools.functions.libs.scrape_web.scrape_web",
-        desc="Scrape data from web page.",
-        usage_prompt="",
-    ),
-    "vision": ToolType(
-        name="vision",
-        module=str(TOOL_LIBS_PATH / "vision"),
-        desc="Only for converting image into webpage code.",
-        usage_prompt=VISION_PROMPT,
-    ),
-    "other": ToolType(
-        name="other",
-        module="",
-        desc="Any tasks that do not fit into the previous categories",
-        usage_prompt="",
-    ),
-}
--- a/metagpt/tools/functions/init.py
+++ b/metagpt/tools/functions/init.py
@ -1,6 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-# @Time    : 2023/11/16 16:32
-# @Author  : lidanyang
-# @File    : __init__.py
-# @Desc    :
--- a/metagpt/tools/functions/libs/init.py
+++ b/metagpt/tools/functions/libs/init.py
@ -1,6 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-# @Time    : 2023/11/16 16:32
-# @Author  : lidanyang
-# @File    : __init__.py
-# @Desc    :
--- a/metagpt/tools/functions/libs/base.py
+++ b/metagpt/tools/functions/libs/base.py
@ -1,16 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-# @Time    : 2023/12/10 20:12
-# @Author  : lidanyang
-# @File    : base
-# @Desc    :
-class MLProcess(object):
-    def fit(self, df):
-        raise NotImplementedError
-
-    def transform(self, df):
-        raise NotImplementedError
-
-    def fit_transform(self, df):
-        self.fit(df)
-        return self.transform(df)
--- a/metagpt/tools/functions/libs/scrape_web/init.py
+++ b/metagpt/tools/functions/libs/scrape_web/init.py
@ -1 +0,0 @@
-from metagpt.tools.functions.libs.scrape_web.scrape_web import scrape_web
--- a/metagpt/tools/functions/libs/udf/init.py
+++ b/metagpt/tools/functions/libs/udf/init.py
@ -1,126 +0,0 @@
-import ast
-import os
-import re
-import yaml
-import inspect
-import importlib
-from pathlib import Path
-from typing import List
-from metagpt.logs import logger
-
-
-def extract_function_signatures(file_path):
-    with open(file_path, "r", encoding="utf-8") as file:
-        source_code = file.read()
-
-    tree = ast.parse(source_code)
-    function_signatures = []
-    function_returns = []
-    for node in ast.walk(tree):
-        if isinstance(node, ast.FunctionDef):
-            # 只提取用户自定义函数，排除内置函数
-            if not (node.name.startswith("__") and node.name.endswith("__")):
-                # 获取函数名
-                function_name = node.name
-                # 获取参数列表
-                args = [arg.arg for arg in node.args.args]
-                # 获取函数签名
-                function_signature = f"{function_name}({', '.join(args)})"
-                # 导入函数
-                module_name = Path(file_path).parts[-1][: -len(Path(file_path).suffix)]
-                module = importlib.import_module(f"metagpt.tools.functions.libs.udf.{module_name}")
-                # 将函数导入到当前命名空间
-                globals().update({function_name: getattr(module, function_name)})
-                # 获取函数注释和函数路径
-                function_schema = {
-                    "udf_name": function_signature,
-                    "udf_path": f"from metagpt.tools.functions.libs.udf.{module_name} import {function_name}",
-                    "udf_doc": inspect.getdoc(getattr(module, function_name)),
-                }
-                function_signatures.append(function_schema)
-                # 获取函数返回变量名
-                source_lines, _ = inspect.getsourcelines(getattr(module, function_name))
-                for line in source_lines:
-                    if line.strip().startswith("return "):
-                        function_returns.append(
-                            {
-                                "udf_name": function_name,
-                                "udf_returns": [var.strip() for var in line.strip()[len("return ") :].split(",")],
-                            }
-                        )
-                        break
-
-                # 没有返回值的函数
-                if not function_returns or function_returns[-1]["udf_name"] != function_name:
-                    function_returns.append({"udf_name": function_name, "udf_returns": [None]})
-    return function_signatures, function_returns
-
-
-def get_function_signatures_in_folder(folder_path):
-    python_files = [f for f in os.listdir(folder_path) if f.endswith(".py") and f != "__init__.py"]
-    all_function_signatures = []
-    all_function_returns = []
-
-    for file_name in python_files:
-        file_path = os.path.join(folder_path, file_name)
-        function_signatures, function_returns = extract_function_signatures(file_path)
-        all_function_signatures.extend(function_signatures)
-        all_function_returns.extend(function_returns)
-    return all_function_signatures, all_function_returns
-
-
-# Create Tools Yaml Style Schema
-def docstring_to_yaml(docstring: str, return_vars: List[str] = None):
-    logger.debug(f"\n\nFunction Docstring: \n{'-'*60}\n {docstring} \n\nFunction Returns: \n{'-'*60}\n{return_vars}\n")
-    if docstring is None:
-        return {}
-    # 匹配简介部分
-    description_match = re.search(r"^(.*?)(?:Args:|Returns:|Raises:|$)", docstring, re.DOTALL)
-    description = description_match.group(1).strip() if description_match else ""
-
-    # 匹配Args部分
-    args_match = re.search(r"Args:\s*(.*?)(?:Returns:|Raises:|$)", docstring, re.DOTALL)
-    _args = args_match.group(1).strip() if args_match else ""
-    variable_pattern = re.compile(r"(\w+)\s*\((.*?)\):\s*(.*)")
-    params = variable_pattern.findall(_args)
-    if not params:
-        params = ((None, None, None),)
-    # 匹配Returns部分
-    returns_match = re.search(r"Returns:\s*(.*?)(?:Raises:|$)", docstring, re.DOTALL)
-    returns = returns_match.group(1).strip() if returns_match else ""
-    return_pattern = re.compile(r"^(.*)\s*:\s*(.*)$")
-    # 添加返回值变量名
-    return_vars = return_vars if isinstance(return_vars, list) else [return_vars]
-    returns = [(r, *r_desc) for r_desc, r in zip(return_pattern.findall(returns), return_vars)]
-    # 构建YAML字典
-    yaml_data = {
-        "description": description.strip(".").strip(),
-        "parameters": {
-            "properties": {
-                param[0]: {"type": param[1], "description": param[2]} for param in params if param[0] is not None
-            },
-            "required": [param[0] for param in params if param[0] is not None],
-        },
-        "returns": {ret[0]: {"type": ret[1], "description": ret[2]} for ret in returns},
-    }
-    return yaml_data
-
-
-def extract_function_schema_yaml_in_folder(folder_path: str):
-    function_signatures, function_returns = get_function_signatures_in_folder(folder_path)
-    function_schema_yaml_data = {}
-    for func_docstring, func_returns in zip(function_signatures, function_returns):
-        if func_docstring["udf_doc"]:
-            fun_yaml_data = docstring_to_yaml(func_docstring["udf_doc"], func_returns["udf_returns"])
-            fun_yaml_data.update({"type": "function"})
-            function_schema_yaml_data.update({func_returns["udf_name"]: fun_yaml_data})
-    return yaml.dump(function_schema_yaml_data, default_flow_style=False)
-
-
-folder_path = str(Path(__file__).parent.absolute())
-function_signatures, function_returns = get_function_signatures_in_folder(folder_path)
-
-UDFS = [func for func in function_signatures]
-
-UDFS_YAML_STR: str = extract_function_schema_yaml_in_folder(folder_path)
-UDFS_YAML: dict = yaml.load(UDFS_YAML_STR, Loader=yaml.FullLoader)
--- a/metagpt/tools/functions/schemas/data_preprocess.yml
+++ b/metagpt/tools/functions/schemas/data_preprocess.yml
@ -1,306 +0,0 @@
-FillMissingValue:
-  type: class
-  description: "Completing missing values with simple strategies"
-  methods:
-    __init__:
-      description: "Initialize self."
-      parameters:
-        properties:
-          features:
-            type: list
-            description: "columns to be processed"
-          strategy:
-            type: str
-            description: "the imputation strategy, notice mean/median can only be used for numeric features"
-            default: mean
-            enum:
-              - mean
-              - median
-              - most_frequent
-              - constant
-          fill_value:
-            type: int
-            description: "fill_value is used to replace all occurrences of missing_values"
-            default: null
-        required:
-          - features
-    fit:
-      description: "Fit the FillMissingValue model."
-      parameters:
-        properties:
-          df:
-            type: DataFrame
-            description: "The input DataFrame."
-        required:
-          - df
-    transform:
-      description: "Transform the input DataFrame with the fitted model."
-      parameters:
-        properties:
-          df:
-            type: DataFrame
-            description: "The input DataFrame."
-        required:
-          - df
-      returns:
-        df:
-          type: DataFrame
-          description: "The transformed DataFrame."
-    fit_transform:
-      description: "Fit and transform the input DataFrame."
-      parameters:
-        properties:
-          df:
-            type: DataFrame
-            description: "The input DataFrame."
-        required:
-          - df
-      returns:
-        df:
-          type: DataFrame
-          description: "The transformed DataFrame."
-
-MinMaxScale:
-  type: class
-  description: "Transform features by scaling each feature to a range, witch is (0, 1)"
-  methods:
-    __init__:
-      description: "Initialize self."
-      parameters:
-        properties:
-          features:
-            type: list
-            description: "columns to be processed"
-        required:
-          - features
-    fit:
-      description: "Fit the MinMaxScale model."
-      parameters:
-        properties:
-          df:
-            type: DataFrame
-            description: "The input DataFrame."
-        required:
-          - df
-    transform:
-      description: "Transform the input DataFrame with the fitted model."
-      parameters:
-        properties:
-          df:
-            type: DataFrame
-            description: "The input DataFrame."
-        required:
-          - df
-      returns:
-        df:
-          type: DataFrame
-          description: "The transformed DataFrame."
-    fit_transform:
-      description: "Fit and transform the input DataFrame."
-      parameters:
-        properties:
-          df:
-            type: DataFrame
-            description: "The input DataFrame."
-        required:
-          - df
-      returns:
-        df:
-          type: DataFrame
-          description: "The transformed DataFrame."
-
-StandardScale:
-  type: class
-  description: "Standardize features by removing the mean and scaling to unit variance"
-  methods:
-    __init__:
-      description: "Initialize self."
-      parameters:
-        properties:
-          features:
-            type: list
-            description: "columns to be processed"
-        required:
-          - features
-    fit:
-      description: "Fit the StandardScale model."
-      parameters:
-        properties:
-          df:
-            type: DataFrame
-            description: "The input DataFrame."
-        required:
-          - df
-    transform:
-      description: "Transform the input DataFrame with the fitted model."
-      parameters:
-        properties:
-          df:
-            type: DataFrame
-            description: "The input DataFrame."
-        required:
-          - df
-      returns:
-        df:
-          type: DataFrame
-          description: "The transformed DataFrame."
-    fit_transform:
-      description: "Fit and transform the input DataFrame."
-      parameters:
-        properties:
-          df:
-            type: DataFrame
-            description: "The input DataFrame."
-        required:
-          - df
-      returns:
-        df:
-          type: DataFrame
-          description: "The transformed DataFrame."
-
-MaxAbsScale:
-  type: class
-  description: "cale each feature by its maximum absolute value"
-  methods:
-    __init__:
-      description: "Initialize self."
-      parameters:
-        properties:
-          features:
-            type: list
-            description: "columns to be processed"
-        required:
-          - features
-    fit:
-      description: "Fit the MaxAbsScale model."
-      parameters:
-        properties:
-          df:
-            type: DataFrame
-            description: "The input DataFrame."
-        required:
-          - df
-    transform:
-      description: "Transform the input DataFrame with the fitted model."
-      parameters:
-        properties:
-          df:
-            type: DataFrame
-            description: "The input DataFrame."
-        required:
-          - df
-      returns:
-        df:
-          type: DataFrame
-          description: "The transformed DataFrame."
-    fit_transform:
-      description: "Fit and transform the input DataFrame."
-      parameters:
-        properties:
-          df:
-            type: DataFrame
-            description: "The input DataFrame."
-        required:
-          - df
-      returns:
-        df:
-          type: DataFrame
-          description: "The transformed DataFrame."
-
-LabelEncode:
-  type: class
-  description: "Apply label encoding to specified categorical columns in-place."
-  methods:
-    __init__:
-      description: "Initialize self."
-      parameters:
-        properties:
-          features:
-            type: list
-            description: "Categorical columns to be label encoded"
-        required:
-          - features
-    fit:
-      description: "Fit the LabelEncode model."
-      parameters:
-        properties:
-          df:
-            type: DataFrame
-            description: "The input DataFrame."
-        required:
-          - df
-    transform:
-      description: "Transform the input DataFrame with the fitted model."
-      parameters:
-        properties:
-          df:
-            type: DataFrame
-            description: "The input DataFrame."
-        required:
-          - df
-      returns:
-        df:
-          type: DataFrame
-          description: "The transformed DataFrame."
-    fit_transform:
-      description: "Fit and transform the input DataFrame."
-      parameters:
-        properties:
-          df:
-            type: DataFrame
-            description: "The input DataFrame."
-        required:
-          - df
-      returns:
-        df:
-          type: DataFrame
-          description: "The transformed DataFrame."
-
-OneHotEncode:
-  type: class
-  description: "Apply one-hot encoding to specified categorical columns, the original columns will be dropped."
-  methods:
-    __init__:
-      description: "Initialize self."
-      parameters:
-        properties:
-          features:
-            type: list
-            description: "Categorical columns to be one-hot encoded and dropped"
-        required:
-          - features
-    fit:
-      description: "Fit the OneHotEncoding model."
-      parameters:
-        properties:
-          df:
-            type: DataFrame
-            description: "The input DataFrame."
-        required:
-          - df
-    transform:
-      description: "Transform the input DataFrame with the fitted model."
-      parameters:
-        properties:
-          df:
-            type: DataFrame
-            description: "The input DataFrame."
-        required:
-          - df
-      returns:
-        df:
-          type: DataFrame
-          description: "The transformed DataFrame."
-    fit_transform:
-      description: "Fit and transform the input DataFrame."
-      parameters:
-        properties:
-          df:
-            type: DataFrame
-            description: "The input DataFrame."
-        required:
-          - df
-      returns:
-        df:
-          type: DataFrame
-          description: "The transformed DataFrame."
--- a/metagpt/tools/libs/init.py
+++ b/metagpt/tools/libs/init.py
@ -0,0 +1,15 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# @Time    : 2023/11/16 16:32
+# @Author  : lidanyang
+# @File    : __init__.py
+# @Desc    :
+from metagpt.tools.libs import (
+    data_preprocess,
+    feature_engineering,
+    sd_engine,
+    gpt_v_generator,
+    web_scrapping,
+)
+
+_ = data_preprocess, feature_engineering, sd_engine, gpt_v_generator, web_scrapping  # Avoid pre-commit error
--- a/metagpt/tools/functions/libs/data_preprocess.py
+++ b/metagpt/tools/functions/libs/data_preprocess.py
@ -13,9 +13,25 @@ from sklearn.preprocessing import (
    StandardScaler,
 )

-from metagpt.tools.functions.libs.base import MLProcess
+from metagpt.tools.tool_data_type import ToolTypeEnum
+from metagpt.tools.tool_registry import register_tool
+
+TOOL_TYPE = ToolTypeEnum.DATA_PREPROCESS.value


+class MLProcess(object):
+    def fit(self, df):
+        raise NotImplementedError
+
+    def transform(self, df):
+        raise NotImplementedError
+
+    def fit_transform(self, df):
+        self.fit(df)
+        return self.transform(df)
+
+
+@register_tool(tool_type=TOOL_TYPE)
 class FillMissingValue(MLProcess):
    def __init__(
        self,
@ -42,6 +58,7 @@ class FillMissingValue(MLProcess):
        return new_df


+@register_tool(tool_type=TOOL_TYPE)
 class MinMaxScale(MLProcess):
    def __init__(
        self,
@ -60,6 +77,7 @@ class MinMaxScale(MLProcess):
        return new_df


+@register_tool(tool_type=TOOL_TYPE)
 class StandardScale(MLProcess):
    def __init__(
        self,
@ -78,6 +96,7 @@ class StandardScale(MLProcess):
        return new_df


+@register_tool(tool_type=TOOL_TYPE)
 class MaxAbsScale(MLProcess):
    def __init__(
        self,
@ -96,6 +115,7 @@ class MaxAbsScale(MLProcess):
        return new_df


+@register_tool(tool_type=TOOL_TYPE)
 class RobustScale(MLProcess):
    def __init__(
        self,
@ -114,6 +134,7 @@ class RobustScale(MLProcess):
        return new_df


+@register_tool(tool_type=TOOL_TYPE)
 class OrdinalEncode(MLProcess):
    def __init__(
        self,
@ -132,6 +153,7 @@ class OrdinalEncode(MLProcess):
        return new_df


+@register_tool(tool_type=TOOL_TYPE)
 class OneHotEncode(MLProcess):
    def __init__(
        self,
@ -153,6 +175,7 @@ class OneHotEncode(MLProcess):
        return new_df


+@register_tool(tool_type=TOOL_TYPE)
 class LabelEncode(MLProcess):
    def __init__(
        self,
@ -181,6 +204,7 @@ class LabelEncode(MLProcess):
        return new_df


+@register_tool(tool_type=TOOL_TYPE)
 def get_column_info(df: pd.DataFrame) -> dict:
    column_info = {
        "Category": [],
--- a/metagpt/tools/functions/libs/feature_engineering.py
+++ b/metagpt/tools/functions/libs/feature_engineering.py
@ -6,7 +6,7 @@
 # @Desc    : Feature Engineering Tools
 import itertools

-import lightgbm as lgb
+# import lightgbm as lgb
 import numpy as np
 import pandas as pd
 from joblib import Parallel, delayed
@ -15,9 +15,14 @@ from sklearn.feature_selection import VarianceThreshold
 from sklearn.model_selection import KFold
 from sklearn.preprocessing import KBinsDiscretizer, PolynomialFeatures

-from metagpt.tools.functions.libs.base import MLProcess
+from metagpt.tools.libs.data_preprocess import MLProcess
+from metagpt.tools.tool_data_type import ToolTypeEnum
+from metagpt.tools.tool_registry import register_tool
+
+TOOL_TYPE = ToolTypeEnum.FEATURE_ENGINEERING.value


+@register_tool(tool_type=TOOL_TYPE)
 class PolynomialExpansion(MLProcess):
    def __init__(self, cols: list, degree: int = 2, label_col: str = None):
        self.cols = cols
@ -48,6 +53,7 @@ class PolynomialExpansion(MLProcess):
        return new_df


+@register_tool(tool_type=TOOL_TYPE)
 class CatCount(MLProcess):
    def __init__(self, col: str):
        self.col = col
@ -62,6 +68,7 @@ class CatCount(MLProcess):
        return new_df


+@register_tool(tool_type=TOOL_TYPE)
 class TargetMeanEncoder(MLProcess):
    def __init__(self, col: str, label: str):
        self.col = col
@ -77,6 +84,7 @@ class TargetMeanEncoder(MLProcess):
        return new_df


+@register_tool(tool_type=TOOL_TYPE)
 class KFoldTargetMeanEncoder(MLProcess):
    def __init__(self, col: str, label: str, n_splits: int = 5, random_state: int = 2021):
        self.col = col
@ -103,6 +111,7 @@ class KFoldTargetMeanEncoder(MLProcess):
        return new_df


+@register_tool(tool_type=TOOL_TYPE)
 class CatCross(MLProcess):
    def __init__(self, cols: list, max_cat_num: int = 100):
        self.cols = cols
@ -138,6 +147,7 @@ class CatCross(MLProcess):
        return new_df


+@register_tool(tool_type=TOOL_TYPE)
 class GroupStat(MLProcess):
    def __init__(self, group_col: str, agg_col: str, agg_funcs: list):
        self.group_col = group_col
@ -157,6 +167,7 @@ class GroupStat(MLProcess):
        return new_df


+@register_tool(tool_type=TOOL_TYPE)
 class SplitBins(MLProcess):
    def __init__(self, cols: list, strategy: str = "quantile"):
        self.cols = cols
@ -173,6 +184,7 @@ class SplitBins(MLProcess):
        return new_df


+@register_tool(tool_type=TOOL_TYPE)
 class ExtractTimeComps(MLProcess):
    def __init__(self, time_col: str, time_comps: list):
        self.time_col = time_col
@ -201,6 +213,7 @@ class ExtractTimeComps(MLProcess):
        return new_df


+@register_tool(tool_type=TOOL_TYPE)
 class GeneralSelection(MLProcess):
    def __init__(self, label_col: str):
        self.label_col = label_col
@ -228,6 +241,7 @@ class GeneralSelection(MLProcess):
        return new_df


+# skip for now because lgb is needed
 class TreeBasedSelection(MLProcess):
    def __init__(self, label_col: str, task_type: str):
        self.label_col = label_col
@ -270,6 +284,7 @@ class TreeBasedSelection(MLProcess):
        return new_df


+@register_tool(tool_type=TOOL_TYPE)
 class VarianceBasedSelection(MLProcess):
    def __init__(self, label_col: str, threshold: float = 0):
        self.label_col = label_col
--- a/metagpt/tools/functions/libs/vision.py
+++ b/metagpt/tools/functions/libs/vision.py
@ -5,18 +5,13 @@
@Author  : mannaandpoem
@File    : vision.py
 """
+import base64
 from pathlib import Path

 import requests

-import base64
-
-from metagpt.config import CONFIG
-
-OPENAI_API_BASE = CONFIG.OPENAI_BASE_URL
-API_KEY = CONFIG.OPENAI_API_KEY
-MODEL = CONFIG.OPENAI_VISION_MODEL
-MAX_TOKENS = CONFIG.VISION_MAX_TOKENS
+from metagpt.tools.tool_data_type import ToolTypeEnum
+from metagpt.tools.tool_registry import register_tool

 ANALYZE_LAYOUT_PROMPT = """You are now a UI/UX, please generate layout information for this image:

@ -33,8 +28,15 @@ As the design pays tribute to large companies, sometimes it is normal for some c
 Now, please generate the corresponding webpage code including HTML, CSS and JavaScript:"""


-class Vision:
+@register_tool(tool_type=ToolTypeEnum.IMAGE2WEBPAGE.value)
+class GPTvGenerator:
    def __init__(self):
+        from metagpt.config import CONFIG
+
+        OPENAI_API_BASE = CONFIG.OPENAI_BASE_URL
+        API_KEY = CONFIG.OPENAI_API_KEY
+        MODEL = CONFIG.OPENAI_VISION_MODEL
+        MAX_TOKENS = CONFIG.VISION_MAX_TOKENS
        self.api_key = API_KEY
        self.api_base = OPENAI_API_BASE
        self.model = MODEL
@ -51,10 +53,7 @@ class Vision:

    def get_result(self, image_path, prompt):
        base64_image = self.encode_image(image_path)
-        headers = {
-            "Content-Type": "application/json",
-            "Authorization": f"Bearer {self.api_key}"
-        }
+        headers = {"Content-Type": "application/json", "Authorization": f"Bearer {self.api_key}"}
        payload = {
            "model": self.model,
            "messages": [
@ -62,11 +61,8 @@ class Vision:
                    "role": "user",
                    "content": [
                        {"type": "text", "text": prompt},
-                        {
-                            "type": "image_url",
-                            "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"}
-                        }
-                    ]
+                        {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"}},
+                    ],
                }
            ],
            "max_tokens": self.max_tokens,
@ -81,7 +77,7 @@ class Vision:
    @staticmethod
    def encode_image(image_path):
        with open(image_path, "rb") as image_file:
-            return base64.b64encode(image_file.read()).decode('utf-8')
+            return base64.b64encode(image_file.read()).decode("utf-8")

    @staticmethod
    def save_webpages(image_path, webpages) -> Path:
--- a/metagpt/tools/libs/sd_engine.py
+++ b/metagpt/tools/libs/sd_engine.py
@ -13,9 +13,10 @@ import requests
 from aiohttp import ClientSession
 from PIL import Image, PngImagePlugin

-from metagpt.config import CONFIG
 from metagpt.const import SD_OUTPUT_FILE_REPO
 from metagpt.logs import logger
+from metagpt.tools.tool_data_type import ToolTypeEnum
+from metagpt.tools.tool_registry import register_tool

 payload = {
    "prompt": "",
@ -51,8 +52,11 @@ payload = {
 default_negative_prompt = "(easynegative:0.8),black, dark,Low resolution"


+@register_tool(tool_type=ToolTypeEnum.STABLE_DIFFUSION.value)
 class SDEngine:
    def __init__(self, sd_url=""):
+        from metagpt.config import CONFIG
+
        # Initialize the SDEngine with configuration
        self.sd_url = sd_url if sd_url else CONFIG.get("SD_URL")
        self.sd_t2i_url = f"{self.sd_url}{CONFIG.get('SD_T2I_API')}"
--- a/metagpt/tools/functions/libs/scrape_web/scrape_web.py
+++ b/metagpt/tools/functions/libs/scrape_web/scrape_web.py
@ -1,9 +1,10 @@
-import asyncio
-
+from metagpt.tools.tool_data_type import ToolTypeEnum
+from metagpt.tools.tool_registry import register_tool
 from metagpt.tools.web_browser_engine_playwright import PlaywrightWrapper


-async def scrape_web(url, *urls):
+@register_tool(tool_type=ToolTypeEnum.WEBSCRAPING.value)
+async def scrape_web_playwright(url, *urls):
    """
    Scrape and save the HTML structure and inner text content of a web page using Playwright.

@ -19,5 +20,3 @@ async def scrape_web(url, *urls):

    # Return the inner text content of the web page
    return {"inner_text": web.inner_text, "html": web.html}
-
-# 需要改三个地方: yaml, 对应路径下init, MetaGPT/metagpt/prompts/ml_engineer.py中ML_MODULE_MAP
--- a/metagpt/tools/functions/schemas/init.py
+++ b/metagpt/tools/functions/schemas/init.py
--- a/metagpt/tools/schemas/data_preprocess/FillMissingValue.yml
+++ b/metagpt/tools/schemas/data_preprocess/FillMissingValue.yml
@ -0,0 +1,61 @@
+FillMissingValue:
+  type: class
+  description: "Completing missing values with simple strategies"
+  methods:
+    __init__:
+      description: "Initialize self."
+      parameters:
+        properties:
+          features:
+            type: list
+            description: "columns to be processed"
+          strategy:
+            type: str
+            description: "the imputation strategy, notice mean/median can only be used for numeric features"
+            default: mean
+            enum:
+              - mean
+              - median
+              - most_frequent
+              - constant
+          fill_value:
+            type: int
+            description: "fill_value is used to replace all occurrences of missing_values"
+            default: null
+        required:
+          - features
+    fit:
+      description: "Fit the FillMissingValue model."
+      parameters:
+        properties:
+          df:
+            type: DataFrame
+            description: "The input DataFrame."
+        required:
+          - df
+    transform:
+      description: "Transform the input DataFrame with the fitted model."
+      parameters:
+        properties:
+          df:
+            type: DataFrame
+            description: "The input DataFrame."
+        required:
+          - df
+      returns:
+        df:
+          type: DataFrame
+          description: "The transformed DataFrame."
+    fit_transform:
+      description: "Fit and transform the input DataFrame."
+      parameters:
+        properties:
+          df:
+            type: DataFrame
+            description: "The input DataFrame."
+        required:
+          - df
+      returns:
+        df:
+          type: DataFrame
+          description: "The transformed DataFrame."
--- a/metagpt/tools/schemas/data_preprocess/LabelEncode.yml
+++ b/metagpt/tools/schemas/data_preprocess/LabelEncode.yml
@ -0,0 +1,48 @@
+LabelEncode:
+  type: class
+  description: "Apply label encoding to specified categorical columns in-place."
+  methods:
+    __init__:
+      description: "Initialize self."
+      parameters:
+        properties:
+          features:
+            type: list
+            description: "Categorical columns to be label encoded"
+        required:
+          - features
+    fit:
+      description: "Fit the LabelEncode model."
+      parameters:
+        properties:
+          df:
+            type: DataFrame
+            description: "The input DataFrame."
+        required:
+          - df
+    transform:
+      description: "Transform the input DataFrame with the fitted model."
+      parameters:
+        properties:
+          df:
+            type: DataFrame
+            description: "The input DataFrame."
+        required:
+          - df
+      returns:
+        df:
+          type: DataFrame
+          description: "The transformed DataFrame."
+    fit_transform:
+      description: "Fit and transform the input DataFrame."
+      parameters:
+        properties:
+          df:
+            type: DataFrame
+            description: "The input DataFrame."
+        required:
+          - df
+      returns:
+        df:
+          type: DataFrame
+          description: "The transformed DataFrame."
--- a/metagpt/tools/schemas/data_preprocess/MaxAbsScale.yml
+++ b/metagpt/tools/schemas/data_preprocess/MaxAbsScale.yml
@ -0,0 +1,48 @@
+MaxAbsScale:
+  type: class
+  description: "cale each feature by its maximum absolute value"
+  methods:
+    __init__:
+      description: "Initialize self."
+      parameters:
+        properties:
+          features:
+            type: list
+            description: "columns to be processed"
+        required:
+          - features
+    fit:
+      description: "Fit the MaxAbsScale model."
+      parameters:
+        properties:
+          df:
+            type: DataFrame
+            description: "The input DataFrame."
+        required:
+          - df
+    transform:
+      description: "Transform the input DataFrame with the fitted model."
+      parameters:
+        properties:
+          df:
+            type: DataFrame
+            description: "The input DataFrame."
+        required:
+          - df
+      returns:
+        df:
+          type: DataFrame
+          description: "The transformed DataFrame."
+    fit_transform:
+      description: "Fit and transform the input DataFrame."
+      parameters:
+        properties:
+          df:
+            type: DataFrame
+            description: "The input DataFrame."
+        required:
+          - df
+      returns:
+        df:
+          type: DataFrame
+          description: "The transformed DataFrame."
--- a/metagpt/tools/schemas/data_preprocess/MinMaxScale.yml
+++ b/metagpt/tools/schemas/data_preprocess/MinMaxScale.yml
@ -0,0 +1,48 @@
+MinMaxScale:
+  type: class
+  description: "Transform features by scaling each feature to a range, witch is (0, 1)"
+  methods:
+    __init__:
+      description: "Initialize self."
+      parameters:
+        properties:
+          features:
+            type: list
+            description: "columns to be processed"
+        required:
+          - features
+    fit:
+      description: "Fit the MinMaxScale model."
+      parameters:
+        properties:
+          df:
+            type: DataFrame
+            description: "The input DataFrame."
+        required:
+          - df
+    transform:
+      description: "Transform the input DataFrame with the fitted model."
+      parameters:
+        properties:
+          df:
+            type: DataFrame
+            description: "The input DataFrame."
+        required:
+          - df
+      returns:
+        df:
+          type: DataFrame
+          description: "The transformed DataFrame."
+    fit_transform:
+      description: "Fit and transform the input DataFrame."
+      parameters:
+        properties:
+          df:
+            type: DataFrame
+            description: "The input DataFrame."
+        required:
+          - df
+      returns:
+        df:
+          type: DataFrame
+          description: "The transformed DataFrame."
--- a/metagpt/tools/schemas/data_preprocess/OneHotEncode.yml
+++ b/metagpt/tools/schemas/data_preprocess/OneHotEncode.yml
@ -0,0 +1,48 @@
+OneHotEncode:
+  type: class
+  description: "Apply one-hot encoding to specified categorical columns, the original columns will be dropped."
+  methods:
+    __init__:
+      description: "Initialize self."
+      parameters:
+        properties:
+          features:
+            type: list
+            description: "Categorical columns to be one-hot encoded and dropped"
+        required:
+          - features
+    fit:
+      description: "Fit the OneHotEncoding model."
+      parameters:
+        properties:
+          df:
+            type: DataFrame
+            description: "The input DataFrame."
+        required:
+          - df
+    transform:
+      description: "Transform the input DataFrame with the fitted model."
+      parameters:
+        properties:
+          df:
+            type: DataFrame
+            description: "The input DataFrame."
+        required:
+          - df
+      returns:
+        df:
+          type: DataFrame
+          description: "The transformed DataFrame."
+    fit_transform:
+      description: "Fit and transform the input DataFrame."
+      parameters:
+        properties:
+          df:
+            type: DataFrame
+            description: "The input DataFrame."
+        required:
+          - df
+      returns:
+        df:
+          type: DataFrame
+          description: "The transformed DataFrame."
--- a/metagpt/tools/schemas/data_preprocess/StandardScale.yml
+++ b/metagpt/tools/schemas/data_preprocess/StandardScale.yml
@ -0,0 +1,48 @@
+StandardScale:
+  type: class
+  description: "Standardize features by removing the mean and scaling to unit variance"
+  methods:
+    __init__:
+      description: "Initialize self."
+      parameters:
+        properties:
+          features:
+            type: list
+            description: "columns to be processed"
+        required:
+          - features
+    fit:
+      description: "Fit the StandardScale model."
+      parameters:
+        properties:
+          df:
+            type: DataFrame
+            description: "The input DataFrame."
+        required:
+          - df
+    transform:
+      description: "Transform the input DataFrame with the fitted model."
+      parameters:
+        properties:
+          df:
+            type: DataFrame
+            description: "The input DataFrame."
+        required:
+          - df
+      returns:
+        df:
+          type: DataFrame
+          description: "The transformed DataFrame."
+    fit_transform:
+      description: "Fit and transform the input DataFrame."
+      parameters:
+        properties:
+          df:
+            type: DataFrame
+            description: "The input DataFrame."
+        required:
+          - df
+      returns:
+        df:
+          type: DataFrame
+          description: "The transformed DataFrame."
--- a/metagpt/tools/schemas/feature_engineering/CatCount.yml
+++ b/metagpt/tools/schemas/feature_engineering/CatCount.yml
@ -0,0 +1,48 @@
+CatCount:
+  type: class
+  description: "Add value counts of a categorical column as new feature."
+  methods:
+    __init__:
+      description: "Initialize self."
+      parameters:
+        properties:
+          col:
+            type: str
+            description: "Column for value counts."
+        required:
+          - col
+    fit:
+      description: "Fit the CatCount model."
+      parameters:
+        properties:
+          df:
+            type: DataFrame
+            description: "The input DataFrame."
+        required:
+          - df
+    transform:
+      description: "Transform the input DataFrame with the fitted model."
+      parameters:
+        properties:
+          df:
+            type: DataFrame
+            description: "The input DataFrame."
+        required:
+          - df
+      returns:
+        df:
+          type: DataFrame
+          description: "The transformed DataFrame."
+    fit_transform:
+      description: "Fit and transform the input DataFrame."
+      parameters:
+        properties:
+          df:
+            type: DataFrame
+            description: "The input DataFrame."
+        required:
+          - df
+      returns:
+        df:
+          type: DataFrame
+          description: "The transformed DataFrame."
--- a/metagpt/tools/schemas/feature_engineering/CatCross.yml
+++ b/metagpt/tools/schemas/feature_engineering/CatCross.yml
@ -0,0 +1,52 @@
+CatCross:
+  type: class
+  description: "Add pairwise crossed features and convert them to numerical features."
+  methods:
+    __init__:
+      description: "Initialize self."
+      parameters:
+        properties:
+          cols:
+            type: list
+            description: "Columns to be pairwise crossed, at least 2 columns."
+          max_cat_num:
+            type: int
+            description: "Maximum unique categories per crossed feature."
+            default: 100
+      required:
+        - cols
+    fit:
+      description: "Fit the CatCross model."
+      parameters:
+        properties:
+          df:
+            type: DataFrame
+            description: "The input DataFrame."
+        required:
+          - df
+    transform:
+      description: "Transform the input DataFrame with the fitted model."
+      parameters:
+        properties:
+          df:
+            type: DataFrame
+            description: "The input DataFrame."
+        required:
+          - df
+      returns:
+        df:
+          type: DataFrame
+          description: "The transformed DataFrame."
+    fit_transform:
+      description: "Fit and transform the input DataFrame."
+      parameters:
+        properties:
+          df:
+            type: DataFrame
+            description: "The input DataFrame."
+        required:
+          - df
+      returns:
+        df:
+          type: DataFrame
+          description: "The transformed DataFrame."
--- a/metagpt/tools/schemas/feature_engineering/GeneralSelection.yml
+++ b/metagpt/tools/schemas/feature_engineering/GeneralSelection.yml
@ -0,0 +1,48 @@
+GeneralSelection:
+  type: class
+  description: "Drop all nan feats and feats with only one unique value."
+  methods:
+    __init__:
+      description: "Initialize self."
+      parameters:
+        properties:
+          label_col:
+            type: str
+            description: "Label column name."
+        required:
+          - label_col
+    fit:
+      description: "Fit the GeneralSelection model."
+      parameters:
+        properties:
+          df:
+            type: DataFrame
+            description: "The input DataFrame."
+        required:
+          - df
+    transform:
+      description: "Transform the input DataFrame with the fitted model."
+      parameters:
+        properties:
+          df:
+            type: DataFrame
+            description: "The input DataFrame."
+        required:
+          - df
+      returns:
+        df:
+          type: DataFrame
+          description: "The transformed DataFrame."
+    fit_transform:
+      description: "Fit and transform the input DataFrame."
+      parameters:
+        properties:
+          df:
+            type: DataFrame
+            description: "The input DataFrame."
+        required:
+          - df
+      returns:
+        df:
+          type: DataFrame
+          description: "The transformed DataFrame."
--- a/metagpt/tools/schemas/feature_engineering/GroupStat.yml
+++ b/metagpt/tools/schemas/feature_engineering/GroupStat.yml
@ -0,0 +1,58 @@
+GroupStat:
+  type: class
+  description: "Aggregate specified column in a DataFrame grouped by another column, adding new features named '<agg_col>_<agg_func>_by_<group_col>'."
+  methods:
+    __init__:
+      description: "Initialize self."
+      parameters:
+        properties:
+          group_col:
+            type: str
+            description: "Column used for grouping."
+          agg_col:
+            type: str
+            description: "Column on which aggregation is performed."
+          agg_funcs:
+            type: list
+            description: >-
+              List of aggregation functions to apply, such as ['mean', 'std'].
+              Each function must be supported by pandas.
+        required:
+          - group_col
+          - agg_col
+          - agg_funcs
+    fit:
+      description: "Fit the GroupStat model."
+      parameters:
+        properties:
+          df:
+            type: DataFrame
+            description: "The input DataFrame."
+        required:
+          - df
+    transform:
+      description: "Transform the input DataFrame with the fitted model."
+      parameters:
+        properties:
+          df:
+            type: DataFrame
+            description: "The input DataFrame."
+        required:
+          - df
+      returns:
+        df:
+          type: DataFrame
+          description: "The transformed DataFrame."
+    fit_transform:
+      description: "Fit and transform the input DataFrame."
+      parameters:
+        properties:
+          df:
+            type: DataFrame
+            description: "The input DataFrame."
+        required:
+          - df
+      returns:
+        df:
+          type: DataFrame
+          description: "The transformed DataFrame."
--- a/metagpt/tools/schemas/feature_engineering/KFoldTargetMeanEncoder.yml
+++ b/metagpt/tools/schemas/feature_engineering/KFoldTargetMeanEncoder.yml
@ -0,0 +1,60 @@
+KFoldTargetMeanEncoder:
+  type: class
+  description: "Adds a new feature to the DataFrame by k-fold mean encoding of a categorical column using the label column."
+  methods:
+    __init__:
+      description: "Initialize self."
+      parameters:
+        properties:
+          col:
+            type: str
+            description: "Column to be k-fold mean encoded."
+          label:
+            type: str
+            description: "Predicted label column."
+          n_splits:
+            type: int
+            description: "Number of splits for K-fold."
+            default: 5
+          random_state:
+            type: int
+            description: "Random seed."
+            default: 2021
+        required:
+          - col
+          - label
+    fit:
+      description: "Fit the KFoldTargetMeanEncoder model."
+      parameters:
+        properties:
+          df:
+            type: DataFrame
+            description: "The input DataFrame."
+        required:
+          - df
+    transform:
+      description: "Transform the input DataFrame with the fitted model."
+      parameters:
+        properties:
+          df:
+            type: DataFrame
+            description: "The input DataFrame."
+        required:
+          - df
+      returns:
+        df:
+          type: DataFrame
+          description: "The transformed DataFrame."
+    fit_transform:
+      description: "Fit and transform the input DataFrame."
+      parameters:
+        properties:
+          df:
+            type: DataFrame
+            description: "The input DataFrame."
+        required:
+          - df
+      returns:
+        df:
+          type: DataFrame
+          description: "The transformed DataFrame."
--- a/metagpt/tools/schemas/feature_engineering/PolynomialExpansion.yml
+++ b/metagpt/tools/schemas/feature_engineering/PolynomialExpansion.yml
--- a/metagpt/tools/schemas/feature_engineering/SplitBins.yml
+++ b/metagpt/tools/schemas/feature_engineering/SplitBins.yml
@ -0,0 +1,56 @@
+SplitBins:
+  type: class
+  description: "Inplace binning of continuous data into intervals, returning integer-encoded bin identifiers directly."
+  methods:
+    __init__:
+      description: "Initialize self."
+      parameters:
+        properties:
+          cols:
+            type: list
+            description: "Columns to be binned inplace."
+          strategy:
+            type: str
+            description: "Strategy used to define the widths of the bins."
+            default: quantile
+            enum:
+              - quantile
+              - uniform
+              - kmeans
+        required:
+          - cols
+    fit:
+      description: "Fit the SplitBins model."
+      parameters:
+        properties:
+          df:
+            type: DataFrame
+            description: "The input DataFrame."
+        required:
+          - df
+    transform:
+      description: "Transform the input DataFrame with the fitted model."
+      parameters:
+        properties:
+          df:
+            type: DataFrame
+            description: "The input DataFrame."
+        required:
+          - df
+      returns:
+        df:
+          type: DataFrame
+          description: "The transformed DataFrame."
+    fit_transform:
+      description: "Fit and transform the input DataFrame."
+      parameters:
+        properties:
+          df:
+            type: DataFrame
+            description: "The input DataFrame."
+        required:
+          - df
+      returns:
+        df:
+          type: DataFrame
+          description: "The transformed DataFrame."
--- a/metagpt/tools/schemas/feature_engineering/TargetMeanEncoder.yml
+++ b/metagpt/tools/schemas/feature_engineering/TargetMeanEncoder.yml
@ -0,0 +1,52 @@
+TargetMeanEncoder:
+  type: class
+  description: "Encodes a categorical column by the mean of the label column, and adds the result as a new feature."
+  methods:
+    __init__:
+      description: "Initialize self."
+      parameters:
+        properties:
+          col:
+            type: str
+            description: "Column to be mean encoded."
+          label:
+            type: str
+            description: "Predicted label column."
+        required:
+          - col
+          - label
+    fit:
+      description: "Fit the TargetMeanEncoder model."
+      parameters:
+        properties:
+          df:
+            type: DataFrame
+            description: "The input DataFrame."
+        required:
+          - df
+    transform:
+      description: "Transform the input DataFrame with the fitted model."
+      parameters:
+        properties:
+          df:
+            type: DataFrame
+            description: "The input DataFrame."
+        required:
+          - df
+      returns:
+        df:
+          type: DataFrame
+          description: "The transformed DataFrame."
+    fit_transform:
+      description: "Fit and transform the input DataFrame."
+      parameters:
+        properties:
+          df:
+            type: DataFrame
+            description: "The input DataFrame."
+        required:
+          - df
+      returns:
+        df:
+          type: DataFrame
+          description: "The transformed DataFrame."
--- a/metagpt/tools/schemas/feature_engineering/TreeBasedSelection.yml
+++ b/metagpt/tools/schemas/feature_engineering/TreeBasedSelection.yml
@ -0,0 +1,56 @@
+TreeBasedSelection:
+  type: class
+  description: "Select features based on tree-based model and remove features with low importance."
+  methods:
+    __init__:
+      description: "Initialize self."
+      parameters:
+        properties:
+          label_col:
+            type: str
+            description: "Label column name."
+          task_type:
+            type: str
+            description: "Task type, 'cls' for classification, 'mcls' for multi-class classification, 'reg' for regression."
+            enum:
+              - cls
+              - mcls
+              - reg
+        required:
+          - label_col
+          - task_type
+    fit:
+      description: "Fit the TreeBasedSelection model."
+      parameters:
+        properties:
+          df:
+            type: DataFrame
+            description: "The input DataFrame."
+        required:
+          - df
+    transform:
+      description: "Transform the input DataFrame with the fitted model."
+      parameters:
+        properties:
+          df:
+            type: DataFrame
+            description: "The input DataFrame."
+        required:
+          - df
+      returns:
+        df:
+          type: DataFrame
+          description: "The transformed DataFrame contain label_col."
+    fit_transform:
+      description: "Fit and transform the input DataFrame."
+      parameters:
+        properties:
+          df:
+            type: DataFrame
+            description: "The input DataFrame."
+        required:
+          - df
+      returns:
+        df:
+          type: DataFrame
+          description: "The transformed DataFrame contain label_col."
--- a/metagpt/tools/schemas/feature_engineering/VarianceBasedSelection.yml
+++ b/metagpt/tools/schemas/feature_engineering/VarianceBasedSelection.yml
@ -0,0 +1,52 @@
+VarianceBasedSelection:
+  type: class
+  description: "Select features based on variance and remove features with low variance."
+  methods:
+    __init__:
+      description: "Initialize self."
+      parameters:
+        properties:
+          label_col:
+            type: str
+            description: "Label column name."
+          threshold:
+            type: float
+            description: "Threshold for variance."
+            default: 0.0
+        required:
+          - label_col
+    fit:
+      description: "Fit the VarianceBasedSelection model."
+      parameters:
+        properties:
+          df:
+            type: DataFrame
+            description: "The input DataFrame."
+        required:
+          - df
+    transform:
+      description: "Transform the input DataFrame with the fitted model."
+      parameters:
+        properties:
+          df:
+            type: DataFrame
+            description: "The input DataFrame."
+        required:
+          - df
+      returns:
+        df:
+          type: DataFrame
+          description: "The transformed DataFrame contain label_col."
+    fit_transform:
+      description: "Fit and transform the input DataFrame."
+      parameters:
+        properties:
+          df:
+            type: DataFrame
+            description: "The input DataFrame."
+        required:
+          - df
+      returns:
+        df:
+          type: DataFrame
+          description: "The transformed DataFrame contain label_col."
--- a/metagpt/tools/schemas/image2webpage/GPTvGenerator.yml
+++ b/metagpt/tools/schemas/image2webpage/GPTvGenerator.yml
@ -1,4 +1,4 @@
-Vision:
+GPTvGenerator:
  type: class
  description: "Class for generating web pages at once."
  methods:
--- a/metagpt/tools/functions/schemas/stable_diffusion.yml
+++ b/metagpt/tools/functions/schemas/stable_diffusion.yml
--- a/metagpt/tools/schemas/web_scrapping/scrape_web_playwright.yml
+++ b/metagpt/tools/schemas/web_scrapping/scrape_web_playwright.yml
@ -1,4 +1,4 @@
-scrape_web:
+scrape_web_playwright:
  type: async funciton
  description: "Scrape and save the HTML structure and inner text content of a web page using Playwright."
  parameters:
--- a/metagpt/tools/tool_data_type.py
+++ b/metagpt/tools/tool_data_type.py
@ -0,0 +1,35 @@
+from enum import Enum
+
+from pydantic import BaseModel
+
+
+class ToolTypeEnum(Enum):
+    EDA = "eda"
+    DATA_PREPROCESS = "data_preprocess"
+    FEATURE_ENGINEERING = "feature_engineering"
+    MODEL_TRAIN = "model_train"
+    MODEL_EVALUATE = "model_evaluate"
+    STABLE_DIFFUSION = "stable_diffusion"
+    IMAGE2WEBPAGE = "image2webpage"
+    WEBSCRAPING = "web_scraping"
+    OTHER = "other"
+
+    def __missing__(self, key):
+        return self.OTHER
+
+
+class ToolType(BaseModel):
+    name: str
+    desc: str
+    usage_prompt: str = ""
+
+
+class ToolSchema(BaseModel):
+    name: str
+
+
+class Tool(BaseModel):
+    name: str
+    path: str
+    schemas: dict = {}
+    code: str = ""
--- a/metagpt/tools/tool_registry.py
+++ b/metagpt/tools/tool_registry.py
@ -0,0 +1,126 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+"""
+@Time    : 2023/01/12 17:07
+@Author  : garylin2099
+@File    : tool_registry.py
+"""
+import inspect
+import os
+import re
+from collections import defaultdict
+
+import yaml
+
+from metagpt.const import TOOL_SCHEMA_PATH
+from metagpt.logs import logger
+from metagpt.tools.tool_data_type import Tool, ToolSchema, ToolType
+
+
+class ToolRegistry:
+    def __init__(self):
+        self.tools = {}
+        self.tool_types = {}
+        self.tools_by_types = defaultdict(dict)  # two-layer k-v, {tool_type: {tool_name: {...}, ...}, ...}
+
+    def register_tool_type(self, tool_type: ToolType):
+        self.tool_types[tool_type.name] = tool_type
+        logger.info(f"tool type {tool_type.name} registered")
+
+    def register_tool(
+        self,
+        tool_name,
+        tool_path,
+        schema_path=None,
+        tool_code="",
+        tool_type="other",
+        make_schema_if_not_exists=False,
+    ):
+        if self.has_tool(tool_name):
+            return
+
+        schema_path = schema_path or TOOL_SCHEMA_PATH / tool_type / f"{tool_name}.yml"
+
+        if not os.path.exists(schema_path):
+            if make_schema_if_not_exists:
+                logger.warning(f"no schema found, will make schema at {schema_path}")
+                make_schema(tool_code, schema_path)
+            else:
+                logger.warning(f"no schema found at assumed schema_path {schema_path}, skip registering {tool_name}")
+                return
+
+        with open(schema_path, "r", encoding="utf-8") as f:
+            schema_dict = yaml.safe_load(f)
+            schemas = schema_dict.get(tool_name) or list(schema_dict.values())[0]
+        schemas["tool_path"] = tool_path  # corresponding code file path of the tool
+        try:
+            ToolSchema(**schemas)  # validation
+        except Exception:
+            pass
+            # logger.warning(
+            #     f"{tool_name} schema not conforms to required format, but will be used anyway. Mismatch: {e}"
+            # )
+        tool = Tool(name=tool_name, path=tool_path, schemas=schemas, code=tool_code)
+        self.tools[tool_name] = tool
+        self.tools_by_types[tool_type][tool_name] = tool
+        logger.info(f"{tool_name} registered")
+
+    def has_tool(self, key):
+        return key in self.tools
+
+    def get_tool(self, key):
+        return self.tools.get(key)
+
+    def get_tools_by_type(self, key):
+        return self.tools_by_types.get(key)
+
+    def has_tool_type(self, key):
+        return key in self.tool_types
+
+    def get_tool_type(self, key):
+        return self.tool_types.get(key)
+
+    def get_tool_types(self):
+        return self.tool_types
+
+
+# Registry instance
+TOOL_REGISTRY = ToolRegistry()
+
+
+def register_tool_type(cls):
+    """register a tool type to registry"""
+    TOOL_REGISTRY.register_tool_type(tool_type=cls())
+    return cls
+
+
+def register_tool(tool_name="", tool_type="other", schema_path=None):
+    """register a tool to registry"""
+
+    def decorator(cls, tool_name=tool_name):
+        tool_name = tool_name or cls.__name__
+
+        # Get the file path where the function / class is defined and the source code
+        file_path = inspect.getfile(cls)
+        if "metagpt" in file_path:
+            file_path = re.search("metagpt.+", file_path).group(0)
+        source_code = inspect.getsource(cls)
+
+        TOOL_REGISTRY.register_tool(
+            tool_name=tool_name,
+            tool_path=file_path,
+            schema_path=schema_path,
+            tool_code=source_code,
+            tool_type=tool_type,
+        )
+        return cls
+
+    return decorator
+
+
+def make_schema(tool_code, path):
+    os.makedirs(os.path.dirname(path), exist_ok=True)  # Create the necessary directories
+    schema = {}  # an empty schema for now
+    with open(path, "w", encoding="utf-8") as f:
+        yaml.dump(schema, f)
+    return path
--- a/metagpt/tools/tool_types.py
+++ b/metagpt/tools/tool_types.py
@ -0,0 +1,68 @@
+from metagpt.prompts.tool_types import (
+    DATA_PREPROCESS_PROMPT,
+    FEATURE_ENGINEERING_PROMPT,
+    IMAGE2WEBPAGE_PROMPT,
+    MODEL_EVALUATE_PROMPT,
+    MODEL_TRAIN_PROMPT,
+)
+from metagpt.tools.tool_data_type import ToolType, ToolTypeEnum
+from metagpt.tools.tool_registry import register_tool_type
+
+
+@register_tool_type
+class EDA(ToolType):
+    name: str = ToolTypeEnum.EDA.value
+    desc: str = "For performing exploratory data analysis"
+
+
+@register_tool_type
+class DataPreprocess(ToolType):
+    name: str = ToolTypeEnum.DATA_PREPROCESS.value
+    desc: str = "Only for changing value inplace."
+    usage_prompt: str = DATA_PREPROCESS_PROMPT
+
+
+@register_tool_type
+class FeatureEngineer(ToolType):
+    name: str = ToolTypeEnum.FEATURE_ENGINEERING.value
+    desc: str = "Only for creating new columns for input data."
+    usage_prompt: str = FEATURE_ENGINEERING_PROMPT
+
+
+@register_tool_type
+class ModelTrain(ToolType):
+    name: str = ToolTypeEnum.MODEL_TRAIN.value
+    desc: str = "Only for training model."
+    usage_prompt: str = MODEL_TRAIN_PROMPT
+
+
+@register_tool_type
+class ModelEvaluate(ToolType):
+    name: str = ToolTypeEnum.MODEL_EVALUATE.value
+    desc: str = "Only for evaluating model."
+    usage_prompt: str = MODEL_EVALUATE_PROMPT
+
+
+@register_tool_type
+class StableDiffusion(ToolType):
+    name: str = ToolTypeEnum.STABLE_DIFFUSION.value
+    desc: str = "Related to text2image, image2image using stable diffusion model."
+
+
+@register_tool_type
+class Image2Webpage(ToolType):
+    name: str = ToolTypeEnum.IMAGE2WEBPAGE.value
+    desc: str = "For converting image into webpage code."
+    usage_prompt: str = IMAGE2WEBPAGE_PROMPT
+
+
+@register_tool_type
+class WebScraping(ToolType):
+    name: str = ToolTypeEnum.WEBSCRAPING.value
+    desc: str = "For scraping data from web pages."
+
+
+@register_tool_type
+class Other(ToolType):
+    name: str = ToolTypeEnum.OTHER.value
+    desc: str = "Any tools not in the defined categories"
--- a/metagpt/tools/web_browser_engine_playwright.py
+++ b/metagpt/tools/web_browser_engine_playwright.py
@ -12,7 +12,6 @@ from typing import Literal

 from playwright.async_api import async_playwright

-from metagpt.config import CONFIG
 from metagpt.logs import logger
 from metagpt.utils.parse_html import WebPage

@ -32,6 +31,8 @@ class PlaywrightWrapper:
        launch_kwargs: dict | None = None,
        **kwargs,
    ) -> None:
+        from metagpt.config import CONFIG
+
        if browser_type is None:
            browser_type = CONFIG.playwright_browser_type
        self.browser_type = browser_type
--- a/tests/metagpt/tools/functions/init.py
+++ b/tests/metagpt/tools/functions/init.py
@ -1,6 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-# @Time    : 2023/11/17 10:24
-# @Author  : lidanyang
-# @File    : __init__.py
-# @Desc    :
--- a/tests/metagpt/tools/functions/libs/test_vision.py
+++ b/tests/metagpt/tools/functions/libs/test_vision.py
@ -1,48 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-"""
-@Time    : 2024/01/15
-@Author  : mannaandpoem
-@File    : test_vision.py
-"""
-import pytest
-
-from metagpt import logs
-from metagpt.tools.functions.libs.vision import Vision
-
-
-@pytest.fixture
-def mock_webpages():
-    return """```html\n<html>\n<script src="scripts.js"></script>
-<link rel="stylesheet" href="styles.css(">\n</html>\n```\n
-```css\n.class { ... }\n```\n
-```javascript\nfunction() { ... }\n```\n"""
-
-
-def test_vision_generate_webpages(mocker, mock_webpages):
-    mocker.patch(
-        "metagpt.tools.functions.libs.vision.Vision.generate_web_pages",
-        return_value=mock_webpages
-    )
-    image_path = "image.png"
-    vision = Vision()
-    rsp = vision.generate_web_pages(image_path=image_path)
-    logs.logger.info(rsp)
-    assert "html" in rsp
-    assert "css" in rsp
-    assert "javascript" in rsp
-
-
-def test_save_webpages(mocker, mock_webpages):
-    mocker.patch(
-        "metagpt.tools.functions.libs.vision.Vision.generate_web_pages",
-        return_value=mock_webpages
-    )
-    image_path = "image.png"
-    vision = Vision()
-    webpages = vision.generate_web_pages(image_path)
-    webpages_dir = vision.save_webpages(image_path=image_path, webpages=webpages)
-    logs.logger.info(webpages_dir)
-    assert webpages_dir.exists()
-
-
--- a/tests/metagpt/tools/functions/libs/init.py
+++ b/tests/metagpt/tools/functions/libs/init.py
--- a/tests/metagpt/tools/functions/libs/test_data_preprocess.py
+++ b/tests/metagpt/tools/functions/libs/test_data_preprocess.py
@ -5,7 +5,7 @@ import numpy.testing as npt
 import pandas as pd
 import pytest

-from metagpt.tools.functions.libs.data_preprocess import (
+from metagpt.tools.libs.data_preprocess import (
    FillMissingValue,
    LabelEncode,
    MaxAbsScale,
--- a/tests/metagpt/tools/functions/libs/test_feature_engineering.py
+++ b/tests/metagpt/tools/functions/libs/test_feature_engineering.py
@ -3,7 +3,7 @@ import pandas as pd
 import pytest
 from sklearn.datasets import fetch_california_housing, load_breast_cancer, load_iris

-from metagpt.tools.functions.libs.feature_engineering import (
+from metagpt.tools.libs.feature_engineering import (
    CatCount,
    CatCross,
    ExtractTimeComps,
@ -147,6 +147,7 @@ def test_general_selection(mock_dataset):
    assert "cat2" not in transformed.columns


+@pytest.mark.skip  # skip because TreeBasedSelection needs lgb as dependency
 def test_tree_based_selection(mock_dataset):
    # regression
    data = load_sklearn_data("housing")
--- a/tests/metagpt/tools/libs/test_gpt_v_generator.py
+++ b/tests/metagpt/tools/libs/test_gpt_v_generator.py
@ -0,0 +1,40 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+"""
+@Time    : 2024/01/15
+@Author  : mannaandpoem
+@File    : test_vision.py
+"""
+import pytest
+
+from metagpt import logs
+from metagpt.tools.libs.gpt_v_generator import GPTvGenerator
+
+
+@pytest.fixture
+def mock_webpages(mocker):
+    mock_data = """```html\n<html>\n<script src="scripts.js"></script>
+<link rel="stylesheet" href="styles.css(">\n</html>\n```\n
+```css\n.class { ... }\n```\n
+```javascript\nfunction() { ... }\n```\n"""
+    mocker.patch("metagpt.tools.libs.gpt_v_generator.GPTvGenerator.generate_web_pages", return_value=mock_data)
+    return mocker
+
+
+def test_vision_generate_webpages(mock_webpages):
+    image_path = "image.png"
+    generator = GPTvGenerator()
+    rsp = generator.generate_web_pages(image_path=image_path)
+    logs.logger.info(rsp)
+    assert "html" in rsp
+    assert "css" in rsp
+    assert "javascript" in rsp
+
+
+def test_save_webpages(mock_webpages):
+    image_path = "image.png"
+    generator = GPTvGenerator()
+    webpages = generator.generate_web_pages(image_path)
+    webpages_dir = generator.save_webpages(image_path=image_path, webpages=webpages)
+    logs.logger.info(webpages_dir)
+    assert webpages_dir.exists()
--- a/tests/metagpt/tools/libs/test_sd_engine.py
+++ b/tests/metagpt/tools/libs/test_sd_engine.py
@ -4,7 +4,7 @@
 # @Desc    :
 import pytest

-from metagpt.tools.sd_engine import SDEngine
+from metagpt.tools.libs.sd_engine import SDEngine


 def test_sd_tools():
--- a/tests/metagpt/tools/functions/test_udf.py
+++ b/tests/metagpt/tools/functions/test_udf.py
@ -3,7 +3,7 @@ import json
 import yaml

 from metagpt.logs import logger
-from metagpt.tools.functions.libs.udf import UDFS, UDFS_YAML, docstring_to_yaml
+from metagpt.tools.libs.udf import UDFS, UDFS_YAML, docstring_to_yaml


 def test_udfs():
--- a/tests/metagpt/tools/test_tool_registry.py
+++ b/tests/metagpt/tools/test_tool_registry.py
@ -0,0 +1,101 @@
+import pytest
+
+from metagpt.tools.tool_registry import ToolRegistry
+from metagpt.tools.tool_types import ToolType
+
+
+@pytest.fixture
+def tool_registry():
+    return ToolRegistry()
+
+
+@pytest.fixture
+def schema_yaml(mocker):
+    mock_yaml_content = """
+    tool_name:
+        key1: value1
+        key2: value2
+    """
+    mocker.patch("os.path.exists", return_value=True)
+    mocker.patch("builtins.open", mocker.mock_open(read_data=mock_yaml_content))
+    return mocker
+
+
+# Test Initialization
+def test_initialization(tool_registry):
+    assert isinstance(tool_registry, ToolRegistry)
+    assert tool_registry.tools == {}
+    assert tool_registry.tool_types == {}
+    assert tool_registry.tools_by_types == {}
+
+
+# Test Tool Type Registration
+def test_register_tool_type(tool_registry):
+    tool_type = ToolType(name="TestType", desc="test")
+    tool_registry.register_tool_type(tool_type)
+    assert "TestType" in tool_registry.tool_types
+
+
+# Test Tool Registration
+def test_register_tool(tool_registry, schema_yaml):
+    tool_registry.register_tool("TestTool", "/path/to/tool")
+    assert "TestTool" in tool_registry.tools
+
+
+# Test Tool Registration with Non-existing Schema
+def test_register_tool_no_schema(tool_registry, mocker):
+    mocker.patch("os.path.exists", return_value=False)
+    tool_registry.register_tool("TestTool", "/path/to/tool")
+    assert "TestTool" not in tool_registry.tools
+
+
+# Test Tool Existence Checks
+def test_has_tool(tool_registry, schema_yaml):
+    tool_registry.register_tool("TestTool", "/path/to/tool")
+    assert tool_registry.has_tool("TestTool")
+    assert not tool_registry.has_tool("NonexistentTool")
+
+
+# Test Tool Retrieval
+def test_get_tool(tool_registry, schema_yaml):
+    tool_registry.register_tool("TestTool", "/path/to/tool")
+    tool = tool_registry.get_tool("TestTool")
+    assert tool is not None
+    assert tool.name == "TestTool"
+    assert tool.path == "/path/to/tool"
+
+
+# Similar tests for has_tool_type, get_tool_type, get_tools_by_type
+def test_has_tool_type(tool_registry):
+    tool_type = ToolType(name="TestType", desc="test")
+    tool_registry.register_tool_type(tool_type)
+    assert tool_registry.has_tool_type("TestType")
+    assert not tool_registry.has_tool_type("NonexistentType")
+
+
+def test_get_tool_type(tool_registry):
+    tool_type = ToolType(name="TestType", desc="test")
+    tool_registry.register_tool_type(tool_type)
+    retrieved_type = tool_registry.get_tool_type("TestType")
+    assert retrieved_type is not None
+    assert retrieved_type.name == "TestType"
+
+
+def test_get_tools_by_type(tool_registry, schema_yaml):
+    tool_type_name = "TestType"
+    tool_name = "TestTool"
+    tool_path = "/path/to/tool"
+    tool_type = ToolType(name=tool_type_name, desc="test")
+    tool_registry.register_tool_type(tool_type)
+
+    tool_registry.register_tool(tool_name, tool_path, tool_type=tool_type_name)
+
+    tools_by_type = tool_registry.get_tools_by_type(tool_type_name)
+    assert tools_by_type is not None
+    assert tool_name in tools_by_type
+
+
+# Test case for when the tool type does not exist
+def test_get_tools_by_nonexistent_type(tool_registry):
+    tools_by_type = tool_registry.get_tools_by_type("NonexistentType")
+    assert tools_by_type is None
--- a/tests/mock/mock_llm.py
+++ b/tests/mock/mock_llm.py
@ -69,7 +69,6 @@ class MockLLM(OriginalLLM):
        A copy of metagpt.provider.openai_api.OpenAILLM.aask_code, we can't use super().aask because it will be mocked.
        Since openai_api.OpenAILLM.aask_code is different from base_llm.BaseLLM.aask_code, we use the former.
        """
-        messages = self._process_message(messages)
        rsp = await self._achat_completion_function(messages, **kwargs)
        return self.get_choice_function_arguments(rsp)
				`@ -1 +0,0 @@`
				`from metagpt.tools.functions.libs.scrape_web.scrape_web import scrape_web`