diff --git a/metagpt/actions/write_analysis_code.py b/metagpt/actions/write_analysis_code.py
index cf806a986..c6e504b9e 100644
--- a/metagpt/actions/write_analysis_code.py
+++ b/metagpt/actions/write_analysis_code.py
@@ -22,7 +22,8 @@ from metagpt.prompts.ml_engineer import (
     TOOL_USAGE_PROMPT,
 )
 from metagpt.schema import Message, Plan
-from metagpt.tools.tool_registry import TOOL_REGISTRY
+from metagpt.tools import TOOL_REGISTRY
+from metagpt.tools.tool_registry import validate_tool_names
 from metagpt.utils.common import create_func_config, remove_comments
 
 
@@ -90,30 +91,29 @@ class WriteCodeByGenerate(BaseWriteAnalysisCode):
 class WriteCodeWithTools(BaseWriteAnalysisCode):
     """Write code with help of local available tools. Choose tools first, then generate code to use the tools"""
 
-    available_tools: dict = {}
+    # selected tools to choose from, listed by their names. En empty list means selection from all tools.
+    selected_tools: list[str] = []
 
-    def __init__(self, **kwargs):
-        super().__init__(**kwargs)
-
-    def _parse_recommend_tools(self, recommend_tools: list) -> dict:
+    def _get_tools_by_type(self, tool_type: str) -> dict:
         """
-        Parses and validates a list of recommended tools, and retrieves their schema from registry.
+        Retreive tools by tool type from registry, but filtered by pre-selected tool list
 
         Args:
-            recommend_tools (list): A list of recommended tools.
+            tool_type (str): Tool type to retrieve from the registry
 
         Returns:
-            dict: A dict of valid tool schemas.
+            dict: A dict of tool name to Tool object, representing available tools under the type
         """
-        valid_tools = []
-        for tool_name in recommend_tools:
-            if TOOL_REGISTRY.has_tool(tool_name):
-                valid_tools.append(TOOL_REGISTRY.get_tool(tool_name))
+        candidate_tools = TOOL_REGISTRY.get_tools_by_type(tool_type)
+        if self.selected_tools:
+            candidate_tools = {
+                tool_name: candidate_tools[tool_name]
+                for tool_name in self.selected_tools
+                if tool_name in candidate_tools
+            }
+        return candidate_tools
 
-        tool_catalog = {tool.name: tool.schemas for tool in valid_tools}
-        return tool_catalog
-
-    async def _tool_recommendation(
+    async def _recommend_tool(
         self,
         task: str,
         code_steps: str,
@@ -128,7 +128,7 @@ class WriteCodeWithTools(BaseWriteAnalysisCode):
             available_tools (dict): the available tools description
 
         Returns:
-            list: recommended tools for the specified task
+            dict: schemas of recommended tools for the specified task
         """
         prompt = TOOL_RECOMMENDATION_PROMPT.format(
             current_task=task,
@@ -138,42 +138,62 @@ class WriteCodeWithTools(BaseWriteAnalysisCode):
         tool_config = create_func_config(SELECT_FUNCTION_TOOLS)
         rsp = await self.llm.aask_code(prompt, **tool_config)
         recommend_tools = rsp["recommend_tools"]
-        return recommend_tools
+        logger.info(f"Recommended tools: \n{recommend_tools}")
+
+        # Parses and validates the  recommended tools, for LLM might hallucinate and recommend non-existing tools
+        valid_tools = validate_tool_names(recommend_tools, return_tool_object=True)
+
+        tool_schemas = {tool.name: tool.schemas for tool in valid_tools}
+
+        return tool_schemas
+
+    async def _prepare_tools(self, plan: Plan) -> Tuple[dict, str]:
+        """Prepare tool schemas and usage instructions according to current task
+
+        Args:
+            plan (Plan): The overall plan containing task information.
+
+        Returns:
+            Tuple[dict, str]: A tool schemas ({tool_name: tool_schema_dict}) and a usage prompt for the type of tools selected
+        """
+        # find tool type from task type through exact match, can extend to retrieval in the future
+        tool_type = plan.current_task.task_type
+
+        # prepare tool-type-specific instruction
+        tool_type_usage_prompt = (
+            TOOL_REGISTRY.get_tool_type(tool_type).usage_prompt if TOOL_REGISTRY.has_tool_type(tool_type) else ""
+        )
+
+        # prepare schemas of available tools
+        tool_schemas = {}
+        available_tools = self._get_tools_by_type(tool_type)
+        if available_tools:
+            available_tools = {tool_name: tool.schemas["description"] for tool_name, tool in available_tools.items()}
+            code_steps = plan.current_task.code_steps
+            tool_schemas = await self._recommend_tool(plan.current_task.instruction, code_steps, available_tools)
+
+        return tool_schemas, tool_type_usage_prompt
 
     async def run(
         self,
         context: List[Message],
-        plan: Plan = None,
+        plan: Plan,
         **kwargs,
     ) -> str:
-        tool_type = (
-            plan.current_task.task_type
-        )  # find tool type from task type through exact match, can extend to retrieval in the future
-        available_tools = TOOL_REGISTRY.get_tools_by_type(tool_type)
-        special_prompt = (
-            TOOL_REGISTRY.get_tool_type(tool_type).usage_prompt if TOOL_REGISTRY.has_tool_type(tool_type) else ""
+        # prepare tool schemas and tool-type-specific instruction
+        tool_schemas, tool_type_usage_prompt = await self._prepare_tools(plan=plan)
+
+        # form a complete tool usage instruction and include it as a message in context
+        tools_instruction = TOOL_USAGE_PROMPT.format(
+            tool_schemas=tool_schemas, tool_type_usage_prompt=tool_type_usage_prompt
         )
-        code_steps = plan.current_task.code_steps
-
-        tool_catalog = {}
-
-        if available_tools:
-            available_tools = {tool_name: tool.schemas["description"] for tool_name, tool in available_tools.items()}
-
-            recommend_tools = await self._tool_recommendation(
-                plan.current_task.instruction, code_steps, available_tools
-            )
-            tool_catalog = self._parse_recommend_tools(recommend_tools)
-            logger.info(f"Recommended tools: \n{recommend_tools}")
-
-        tools_instruction = TOOL_USAGE_PROMPT.format(special_prompt=special_prompt, tool_catalog=tool_catalog)
-
         context.append(Message(content=tools_instruction, role="user"))
 
+        # prepare prompt & LLM call
         prompt = self.process_msg(context)
-
         tool_config = create_func_config(CODE_GENERATOR_WITH_TOOLS)
         rsp = await self.llm.aask_code(prompt, **tool_config)
+
         return rsp
 
 
@@ -185,36 +205,25 @@ class WriteCodeWithToolsML(WriteCodeWithTools):
         column_info: str = "",
         **kwargs,
     ) -> Tuple[List[Message], str]:
-        tool_type = (
-            plan.current_task.task_type
-        )  # find tool type from task type through exact match, can extend to retrieval in the future
-        available_tools = TOOL_REGISTRY.get_tools_by_type(tool_type)
-        special_prompt = (
-            TOOL_REGISTRY.get_tool_type(tool_type).usage_prompt if TOOL_REGISTRY.has_tool_type(tool_type) else ""
-        )
-        code_steps = plan.current_task.code_steps
+        # prepare tool schemas and tool-type-specific instruction
+        tool_schemas, tool_type_usage_prompt = await self._prepare_tools(plan=plan)
 
+        # ML-specific variables to be used in prompt
+        code_steps = plan.current_task.code_steps
         finished_tasks = plan.get_finished_tasks()
         code_context = [remove_comments(task.code) for task in finished_tasks]
         code_context = "\n\n".join(code_context)
 
-        if available_tools:
-            available_tools = {tool_name: tool.schemas["description"] for tool_name, tool in available_tools.items()}
-
-            recommend_tools = await self._tool_recommendation(
-                plan.current_task.instruction, code_steps, available_tools
-            )
-            tool_catalog = self._parse_recommend_tools(recommend_tools)
-            logger.info(f"Recommended tools: \n{recommend_tools}")
-
+        # prepare prompt depending on tool availability & LLM call
+        if tool_schemas:
             prompt = ML_TOOL_USAGE_PROMPT.format(
                 user_requirement=plan.goal,
                 history_code=code_context,
                 current_task=plan.current_task.instruction,
                 column_info=column_info,
-                special_prompt=special_prompt,
+                tool_type_usage_prompt=tool_type_usage_prompt,
                 code_steps=code_steps,
-                tool_catalog=tool_catalog,
+                tool_schemas=tool_schemas,
             )
 
         else:
@@ -223,13 +232,15 @@ class WriteCodeWithToolsML(WriteCodeWithTools):
                 history_code=code_context,
                 current_task=plan.current_task.instruction,
                 column_info=column_info,
-                special_prompt=special_prompt,
+                tool_type_usage_prompt=tool_type_usage_prompt,
                 code_steps=code_steps,
             )
-
         tool_config = create_func_config(CODE_GENERATOR_WITH_TOOLS)
         rsp = await self.llm.aask_code(prompt, **tool_config)
+
+        # Extra output to be used for potential debugging
         context = [Message(content=prompt, role="user")]
+
         return context, rsp
 
 
diff --git a/metagpt/prompts/ml_engineer.py b/metagpt/prompts/ml_engineer.py
index 3fd895e6e..ac95e14bd 100644
--- a/metagpt/prompts/ml_engineer.py
+++ b/metagpt/prompts/ml_engineer.py
@@ -161,7 +161,7 @@ Latest data info after previous tasks:
 
 # Task
 Write complete code for 'Current Task'. And avoid duplicating code from 'Done Tasks', such as repeated import of packages, reading data, etc.
-Specifically, {special_prompt}
+Specifically, {tool_type_usage_prompt}
 
 # Code Steps:
 Strictly follow steps below when you writing code if it's convenient.
@@ -192,7 +192,7 @@ model.fit(train, y_train)
 TOOL_USAGE_PROMPT = """
 # Instruction
 Write complete code for 'Current Task'. And avoid duplicating code from finished tasks, such as repeated import of packages, reading data, etc.
-Specifically, {special_prompt}
+Specifically, {tool_type_usage_prompt}
 
 # Capabilities
 - You can utilize pre-defined tools in any code lines from 'Available Tools' in the form of Python Class.
@@ -200,7 +200,7 @@ Specifically, {special_prompt}
 
 # Available Tools (can be empty):
 Each Class tool is described in JSON format. When you call a tool, import the tool first.
-{tool_catalog}
+{tool_schemas}
 
 # Constraints:
 - Ensure the output new code is executable in the same Jupyter notebook with previous tasks code have been executed.
@@ -225,7 +225,7 @@ Latest data info after previous tasks:
 
 # Task
 Write complete code for 'Current Task'. And avoid duplicating code from 'Done Tasks', such as repeated import of packages, reading data, etc.
-Specifically, {special_prompt}
+Specifically, {tool_type_usage_prompt}
 
 # Code Steps:
 Strictly follow steps below when you writing code if it's convenient.
@@ -237,7 +237,7 @@ Strictly follow steps below when you writing code if it's convenient.
 
 # Available Tools:
 Each Class tool is described in JSON format. When you call a tool, import the tool from its path first.
-{tool_catalog}
+{tool_schemas}
 
 # Output Example:
 when current task is "do data preprocess, like fill missing value, handle outliers, etc.", and their are two steps in 'Code Steps', the code be like:
diff --git a/metagpt/roles/code_interpreter.py b/metagpt/roles/code_interpreter.py
index f972e72e2..11ede6068 100644
--- a/metagpt/roles/code_interpreter.py
+++ b/metagpt/roles/code_interpreter.py
@@ -19,6 +19,7 @@ class CodeInterpreter(Role):
     make_udfs: bool = False  # whether to save user-defined functions
     use_code_steps: bool = False
     execute_code: ExecutePyCode = Field(default_factory=ExecutePyCode, exclude=True)
+    tools: list[str] = []
 
     def __init__(
         self,
@@ -27,13 +28,20 @@ class CodeInterpreter(Role):
         goal="",
         auto_run=True,
         use_tools=False,
-        make_udfs=False,
+        tools=[],
         **kwargs,
     ):
         super().__init__(
-            name=name, profile=profile, goal=goal, auto_run=auto_run, use_tools=use_tools, make_udfs=make_udfs, **kwargs
+            name=name, profile=profile, goal=goal, auto_run=auto_run, use_tools=use_tools, tools=tools, **kwargs
         )
         self._set_react_mode(react_mode="plan_and_act", auto_run=auto_run, use_tools=use_tools)
+        if use_tools and tools:
+            from metagpt.tools.tool_registry import (
+                validate_tool_names,  # import upon use
+            )
+
+            self.tools = validate_tool_names(tools)
+            logger.info(f"will only use {self.tools} as tools")
 
     @property
     def working_memory(self):
@@ -92,7 +100,7 @@ class CodeInterpreter(Role):
         return code["code"], result, success
 
     async def _write_code(self):
-        todo = WriteCodeByGenerate() if not self.use_tools else WriteCodeWithTools()
+        todo = WriteCodeByGenerate() if not self.use_tools else WriteCodeWithTools(selected_tools=self.tools)
         logger.info(f"ready to {todo.name}")
 
         context = self.planner.get_useful_memories()
diff --git a/metagpt/roles/ml_engineer.py b/metagpt/roles/ml_engineer.py
index 6b671f9c2..d1a22b9d3 100644
--- a/metagpt/roles/ml_engineer.py
+++ b/metagpt/roles/ml_engineer.py
@@ -27,7 +27,7 @@ class MLEngineer(CodeInterpreter):
             column_info = await self._update_data_columns()
 
             logger.info("Write code with tools")
-            tool_context, code = await WriteCodeWithToolsML().run(
+            tool_context, code = await WriteCodeWithToolsML(selected_tools=self.tools).run(
                 context=[],  # context assembled inside the Action
                 plan=self.planner.plan,
                 column_info=column_info,
diff --git a/metagpt/roles/role.py b/metagpt/roles/role.py
index a2f2f2e9d..21e48a127 100644
--- a/metagpt/roles/role.py
+++ b/metagpt/roles/role.py
@@ -477,7 +477,7 @@ class Role(SerializationMixin, is_polymorphic_base=True):
 
             else:
                 # update plan according to user's feedback and to take on changed tasks
-                await self.planner.update_plan(review)
+                await self.planner.update_plan()
 
         completed_plan_memory = self.planner.get_useful_memories()  # completed plan as a outcome
 
diff --git a/metagpt/tools/libs/__init__.py b/metagpt/tools/libs/__init__.py
index 442f57149..c9767c1e5 100644
--- a/metagpt/tools/libs/__init__.py
+++ b/metagpt/tools/libs/__init__.py
@@ -9,7 +9,7 @@ from metagpt.tools.libs import (
     feature_engineering,
     sd_engine,
     gpt_v_generator,
-    web_scrapping,
+    web_scraping,
 )
 
-_ = data_preprocess, feature_engineering, sd_engine, gpt_v_generator, web_scrapping  # Avoid pre-commit error
+_ = data_preprocess, feature_engineering, sd_engine, gpt_v_generator, web_scraping  # Avoid pre-commit error
diff --git a/metagpt/tools/libs/data_preprocess.py b/metagpt/tools/libs/data_preprocess.py
index 3891f9df0..0480e71a7 100644
--- a/metagpt/tools/libs/data_preprocess.py
+++ b/metagpt/tools/libs/data_preprocess.py
@@ -26,31 +26,64 @@ class MLProcess(object):
     def transform(self, df):
         raise NotImplementedError
 
-    def fit_transform(self, df):
+    def fit_transform(self, df) -> pd.DataFrame:
+        """
+        Fit and transform the input DataFrame.
+
+        Args:
+            df (pd.DataFrame): The input DataFrame.
+
+        Returns:
+            pd.DataFrame: The transformed DataFrame.
+        """
         self.fit(df)
         return self.transform(df)
 
 
 @register_tool(tool_type=TOOL_TYPE)
 class FillMissingValue(MLProcess):
-    def __init__(
-        self,
-        features: list,
-        strategy: str = "mean",
-        fill_value=None,
-    ):
+    """
+    Completing missing values with simple strategies.
+    """
+
+    def __init__(self, features: list, strategy: str = "mean", fill_value=None):
+        """
+        Initialize self.
+
+        Args:
+            features (list): Columns to be processed.
+            strategy (str, optional): The imputation strategy, notice 'mean' and 'median' can only
+                                      be used for numeric features. Enum: ['mean', 'median', 'most_frequent', 'constant']. Defaults to 'mean'.
+            fill_value (int, optional): Fill_value is used to replace all occurrences of missing_values.
+                                        Defaults to None.
+        """
         self.features = features
         self.strategy = strategy
         self.fill_value = fill_value
         self.si = None
 
     def fit(self, df: pd.DataFrame):
+        """
+        Fit the FillMissingValue model.
+
+        Args:
+            df (pd.DataFrame): The input DataFrame.
+        """
         if len(self.features) == 0:
             return
         self.si = SimpleImputer(strategy=self.strategy, fill_value=self.fill_value)
         self.si.fit(df[self.features])
 
-    def transform(self, df: pd.DataFrame):
+    def transform(self, df: pd.DataFrame) -> pd.DataFrame:
+        """
+        Transform the input DataFrame with the fitted model.
+
+        Args:
+            df (pd.DataFrame): The input DataFrame.
+
+        Returns:
+            pd.DataFrame: The transformed DataFrame.
+        """
         if len(self.features) == 0:
             return df
         new_df = df.copy()
@@ -60,18 +93,40 @@ class FillMissingValue(MLProcess):
 
 @register_tool(tool_type=TOOL_TYPE)
 class MinMaxScale(MLProcess):
-    def __init__(
-        self,
-        features: list,
-    ):
+    """
+    Transform features by scaling each feature to a range, which is (0, 1).
+    """
+
+    def __init__(self, features: list):
+        """
+        Initialize self.
+
+        Args:
+            features (list): Columns to be processed.
+        """
         self.features = features
         self.mms = None
 
     def fit(self, df: pd.DataFrame):
+        """
+        Fit the MinMaxScale model.
+
+        Args:
+            df (pd.DataFrame): The input DataFrame.
+        """
         self.mms = MinMaxScaler()
         self.mms.fit(df[self.features])
 
-    def transform(self, df: pd.DataFrame):
+    def transform(self, df: pd.DataFrame) -> pd.DataFrame:
+        """
+        Transform the input DataFrame with the fitted model.
+
+        Args:
+            df (pd.DataFrame): The input DataFrame.
+
+        Returns:
+            pd.DataFrame: The transformed DataFrame.
+        """
         new_df = df.copy()
         new_df[self.features] = self.mms.transform(new_df[self.features])
         return new_df
@@ -79,18 +134,40 @@ class MinMaxScale(MLProcess):
 
 @register_tool(tool_type=TOOL_TYPE)
 class StandardScale(MLProcess):
-    def __init__(
-        self,
-        features: list,
-    ):
+    """
+    Standardize features by removing the mean and scaling to unit variance.
+    """
+
+    def __init__(self, features: list):
+        """
+        Initialize self.
+
+        Args:
+            features (list): Columns to be processed.
+        """
         self.features = features
         self.ss = None
 
     def fit(self, df: pd.DataFrame):
+        """
+        Fit the StandardScale model.
+
+        Args:
+            df (pd.DataFrame): The input DataFrame.
+        """
         self.ss = StandardScaler()
         self.ss.fit(df[self.features])
 
-    def transform(self, df: pd.DataFrame):
+    def transform(self, df: pd.DataFrame) -> pd.DataFrame:
+        """
+        Transform the input DataFrame with the fitted model.
+
+        Args:
+            df (pd.DataFrame): The input DataFrame.
+
+        Returns:
+            pd.DataFrame: The transformed DataFrame.
+        """
         new_df = df.copy()
         new_df[self.features] = self.ss.transform(new_df[self.features])
         return new_df
@@ -98,18 +175,40 @@ class StandardScale(MLProcess):
 
 @register_tool(tool_type=TOOL_TYPE)
 class MaxAbsScale(MLProcess):
-    def __init__(
-        self,
-        features: list,
-    ):
+    """
+    Scale each feature by its maximum absolute value.
+    """
+
+    def __init__(self, features: list):
+        """
+        Initialize self.
+
+        Args:
+            features (list): Columns to be processed.
+        """
         self.features = features
         self.mas = None
 
     def fit(self, df: pd.DataFrame):
+        """
+        Fit the MaxAbsScale model.
+
+        Args:
+            df (pd.DataFrame): The input DataFrame.
+        """
         self.mas = MaxAbsScaler()
         self.mas.fit(df[self.features])
 
-    def transform(self, df: pd.DataFrame):
+    def transform(self, df: pd.DataFrame) -> pd.DataFrame:
+        """
+        Transform the input DataFrame with the fitted model.
+
+        Args:
+            df (pd.DataFrame): The input DataFrame.
+
+        Returns:
+            pd.DataFrame: The transformed DataFrame.
+        """
         new_df = df.copy()
         new_df[self.features] = self.mas.transform(new_df[self.features])
         return new_df
@@ -117,18 +216,40 @@ class MaxAbsScale(MLProcess):
 
 @register_tool(tool_type=TOOL_TYPE)
 class RobustScale(MLProcess):
-    def __init__(
-        self,
-        features: list,
-    ):
+    """
+    Apply the RobustScaler to scale features using statistics that are robust to outliers.
+    """
+
+    def __init__(self, features: list):
+        """
+        Initialize the RobustScale instance with feature names.
+
+        Args:
+            features (list): List of feature names to be scaled.
+        """
         self.features = features
         self.rs = None
 
     def fit(self, df: pd.DataFrame):
+        """
+        Compute the median and IQR for scaling.
+
+        Args:
+            df (pd.DataFrame): Dataframe containing the features.
+        """
         self.rs = RobustScaler()
         self.rs.fit(df[self.features])
 
     def transform(self, df: pd.DataFrame):
+        """
+        Scale features using the previously computed median and IQR.
+
+        Args:
+            df (pd.DataFrame): Dataframe containing the features to be scaled.
+
+        Returns:
+            pd.DataFrame: A new dataframe with scaled features.
+        """
         new_df = df.copy()
         new_df[self.features] = self.rs.transform(new_df[self.features])
         return new_df
@@ -136,18 +257,40 @@ class RobustScale(MLProcess):
 
 @register_tool(tool_type=TOOL_TYPE)
 class OrdinalEncode(MLProcess):
-    def __init__(
-        self,
-        features: list,
-    ):
+    """
+    Encode categorical features as ordinal integers.
+    """
+
+    def __init__(self, features: list):
+        """
+        Initialize the OrdinalEncode instance with feature names.
+
+        Args:
+            features (list): List of categorical feature names to be encoded.
+        """
         self.features = features
         self.oe = None
 
     def fit(self, df: pd.DataFrame):
+        """
+        Learn the ordinal encodings for the features.
+
+        Args:
+            df (pd.DataFrame): Dataframe containing the categorical features.
+        """
         self.oe = OrdinalEncoder()
         self.oe.fit(df[self.features])
 
     def transform(self, df: pd.DataFrame):
+        """
+        Convert the categorical features to ordinal integers.
+
+        Args:
+            df (pd.DataFrame): Dataframe containing the categorical features to be encoded.
+
+        Returns:
+            pd.DataFrame: A new dataframe with the encoded features.
+        """
         new_df = df.copy()
         new_df[self.features] = self.oe.transform(new_df[self.features])
         return new_df
@@ -155,18 +298,40 @@ class OrdinalEncode(MLProcess):
 
 @register_tool(tool_type=TOOL_TYPE)
 class OneHotEncode(MLProcess):
-    def __init__(
-        self,
-        features: list,
-    ):
+    """
+    Apply one-hot encoding to specified categorical columns, the original columns will be dropped.
+    """
+
+    def __init__(self, features: list):
+        """
+        Initialize self.
+
+        Args:
+            features (list): Categorical columns to be one-hot encoded and dropped.
+        """
         self.features = features
         self.ohe = None
 
     def fit(self, df: pd.DataFrame):
+        """
+        Fit the OneHotEncoding model.
+
+        Args:
+            df (pd.DataFrame): The input DataFrame.
+        """
         self.ohe = OneHotEncoder(handle_unknown="ignore", sparse=False)
         self.ohe.fit(df[self.features])
 
-    def transform(self, df: pd.DataFrame):
+    def transform(self, df: pd.DataFrame) -> pd.DataFrame:
+        """
+        Transform the input DataFrame with the fitted model.
+
+        Args:
+            df (pd.DataFrame): The input DataFrame.
+
+        Returns:
+            pd.DataFrame: The transformed DataFrame.
+        """
         ts_data = self.ohe.transform(df[self.features])
         new_columns = self.ohe.get_feature_names_out(self.features)
         ts_data = pd.DataFrame(ts_data, columns=new_columns, index=df.index)
@@ -177,21 +342,43 @@ class OneHotEncode(MLProcess):
 
 @register_tool(tool_type=TOOL_TYPE)
 class LabelEncode(MLProcess):
-    def __init__(
-        self,
-        features: list,
-    ):
+    """
+    Apply label encoding to specified categorical columns in-place.
+    """
+
+    def __init__(self, features: list):
+        """
+        Initialize self.
+
+        Args:
+            features (list): Categorical columns to be label encoded.
+        """
         self.features = features
         self.le_encoders = []
 
     def fit(self, df: pd.DataFrame):
+        """
+        Fit the LabelEncode model.
+
+        Args:
+            df (pd.DataFrame): The input DataFrame.
+        """
         if len(self.features) == 0:
             return
         for col in self.features:
             le = LabelEncoder().fit(df[col].astype(str).unique().tolist() + ["unknown"])
             self.le_encoders.append(le)
 
-    def transform(self, df: pd.DataFrame):
+    def transform(self, df: pd.DataFrame) -> pd.DataFrame:
+        """
+        Transform the input DataFrame with the fitted model.
+
+        Args:
+            df (pd.DataFrame): The input DataFrame.
+
+        Returns:
+            pd.DataFrame: The transformed DataFrame.
+        """
         if len(self.features) == 0:
             return df
         new_df = df.copy()
@@ -204,8 +391,17 @@ class LabelEncode(MLProcess):
         return new_df
 
 
-@register_tool(tool_type=TOOL_TYPE)
 def get_column_info(df: pd.DataFrame) -> dict:
+    """
+    Analyzes a DataFrame and categorizes its columns based on data types.
+
+    Args:
+        df (pd.DataFrame): The DataFrame to be analyzed.
+
+    Returns:
+        dict: A dictionary with four keys ('Category', 'Numeric', 'Datetime', 'Others').
+              Each key corresponds to a list of column names belonging to that category.
+    """
     column_info = {
         "Category": [],
         "Numeric": [],
diff --git a/metagpt/tools/libs/feature_engineering.py b/metagpt/tools/libs/feature_engineering.py
index 308150f9b..79e1c1b07 100644
--- a/metagpt/tools/libs/feature_engineering.py
+++ b/metagpt/tools/libs/feature_engineering.py
@@ -184,7 +184,7 @@ class SplitBins(MLProcess):
         return new_df
 
 
-@register_tool(tool_type=TOOL_TYPE)
+# @register_tool(tool_type=TOOL_TYPE)
 class ExtractTimeComps(MLProcess):
     def __init__(self, time_col: str, time_comps: list):
         self.time_col = time_col
@@ -242,6 +242,7 @@ class GeneralSelection(MLProcess):
 
 
 # skip for now because lgb is needed
+# @register_tool(tool_type=TOOL_TYPE)
 class TreeBasedSelection(MLProcess):
     def __init__(self, label_col: str, task_type: str):
         self.label_col = label_col
diff --git a/metagpt/tools/libs/web_scrapping.py b/metagpt/tools/libs/web_scraping.py
similarity index 100%
rename from metagpt/tools/libs/web_scrapping.py
rename to metagpt/tools/libs/web_scraping.py
diff --git a/metagpt/tools/schemas/data_preprocess/OrdinalEncode.yml b/metagpt/tools/schemas/data_preprocess/OrdinalEncode.yml
new file mode 100644
index 000000000..79ebaf37c
--- /dev/null
+++ b/metagpt/tools/schemas/data_preprocess/OrdinalEncode.yml
@@ -0,0 +1,46 @@
+OrdinalEncode:
+  type: class
+  description: Encode categorical features as ordinal integers.
+  methods:
+    __init__:
+      description: 'Initialize the OrdinalEncode instance with feature names. '
+      parameters:
+        properties:
+          features:
+            type: list
+            description: List of categorical feature names to be encoded.
+        required:
+        - features
+    fit:
+      description: 'Learn the ordinal encodings for the features. '
+      parameters:
+        properties:
+          df:
+            type: pd.DataFrame
+            description: Dataframe containing the categorical features.
+        required:
+        - df
+    fit_transform:
+      description: 'Fit and transform the input DataFrame. '
+      parameters:
+        properties:
+          df:
+            type: pd.DataFrame
+            description: The input DataFrame.
+        required:
+        - df
+      returns:
+      - type: pd.DataFrame
+        description: The transformed DataFrame.
+    transform:
+      description: 'Convert the categorical features to ordinal integers. '
+      parameters:
+        properties:
+          df:
+            type: pd.DataFrame
+            description: Dataframe containing the categorical features to be encoded.
+        required:
+        - df
+      returns:
+      - type: pd.DataFrame
+        description: A new dataframe with the encoded features.
diff --git a/metagpt/tools/schemas/data_preprocess/RobustScale.yml b/metagpt/tools/schemas/data_preprocess/RobustScale.yml
new file mode 100644
index 000000000..6d5dfaf3a
--- /dev/null
+++ b/metagpt/tools/schemas/data_preprocess/RobustScale.yml
@@ -0,0 +1,47 @@
+RobustScale:
+  type: class
+  description: Apply the RobustScaler to scale features using statistics that are
+    robust to outliers.
+  methods:
+    __init__:
+      description: 'Initialize the RobustScale instance with feature names. '
+      parameters:
+        properties:
+          features:
+            type: list
+            description: List of feature names to be scaled.
+        required:
+        - features
+    fit:
+      description: 'Compute the median and IQR for scaling. '
+      parameters:
+        properties:
+          df:
+            type: pd.DataFrame
+            description: Dataframe containing the features.
+        required:
+        - df
+    fit_transform:
+      description: 'Fit and transform the input DataFrame. '
+      parameters:
+        properties:
+          df:
+            type: pd.DataFrame
+            description: The input DataFrame.
+        required:
+        - df
+      returns:
+      - type: pd.DataFrame
+        description: The transformed DataFrame.
+    transform:
+      description: 'Scale features using the previously computed median and IQR. '
+      parameters:
+        properties:
+          df:
+            type: pd.DataFrame
+            description: Dataframe containing the features to be scaled.
+        required:
+        - df
+      returns:
+      - type: pd.DataFrame
+        description: A new dataframe with scaled features.
diff --git a/metagpt/tools/schemas/web_scrapping/scrape_web_playwright.yml b/metagpt/tools/schemas/web_scraping/scrape_web_playwright.yml
similarity index 100%
rename from metagpt/tools/schemas/web_scrapping/scrape_web_playwright.yml
rename to metagpt/tools/schemas/web_scraping/scrape_web_playwright.yml
diff --git a/metagpt/tools/tool_convert.py b/metagpt/tools/tool_convert.py
new file mode 100644
index 000000000..b8377e67a
--- /dev/null
+++ b/metagpt/tools/tool_convert.py
@@ -0,0 +1,72 @@
+import inspect
+
+from metagpt.utils.parse_docstring import GoogleDocstringParser, remove_spaces
+
+
+def convert_code_to_tool_schema(obj, include: list[str] = []):
+    docstring = inspect.getdoc(obj)
+    assert docstring, "no docstring found for the objects, skip registering"
+
+    if inspect.isclass(obj):
+        schema = {"type": "class", "description": remove_spaces(docstring), "methods": {}}
+        for name, method in inspect.getmembers(obj, inspect.isfunction):
+            if include and name not in include:
+                continue
+            method_doc = inspect.getdoc(method)
+            if method_doc:
+                schema["methods"][name] = docstring_to_schema(method_doc)
+
+    elif inspect.isfunction(obj):
+        schema = {
+            "type": "function",
+            **docstring_to_schema(docstring),
+        }
+
+    schema = {obj.__name__: schema}
+
+    return schema
+
+
+def docstring_to_schema(docstring: str):
+    if docstring is None:
+        return {}
+
+    parser = GoogleDocstringParser(docstring=docstring)
+
+    # 匹配简介部分
+    description = parser.parse_desc()
+
+    # 匹配Args部分
+    params = parser.parse_params()
+    parameter_schema = {"properties": {}, "required": []}
+    for param in params:
+        param_name, param_type, param_desc = param
+        # check required or optional
+        is_optional, param_type = parser.check_and_parse_optional(param_type)
+        if not is_optional:
+            parameter_schema["required"].append(param_name)
+        # type and desc
+        param_dict = {"type": param_type, "description": remove_spaces(param_desc)}
+        # match Default for optional args
+        has_default_val, default_val = parser.check_and_parse_default_value(param_desc)
+        if has_default_val:
+            param_dict["default"] = default_val
+        # match Enum
+        has_enum, enum_vals = parser.check_and_parse_enum(param_desc)
+        if has_enum:
+            param_dict["enum"] = enum_vals
+        # add to parameter schema
+        parameter_schema["properties"].update({param_name: param_dict})
+
+    # 匹配Returns部分
+    returns = parser.parse_returns()
+
+    # 构建YAML字典
+    schema = {
+        "description": description,
+        "parameters": parameter_schema,
+    }
+    if returns:
+        schema["returns"] = [{"type": ret[0], "description": remove_spaces(ret[1])} for ret in returns]
+
+    return schema
diff --git a/metagpt/tools/tool_registry.py b/metagpt/tools/tool_registry.py
index 52ad25ce4..d16defa0a 100644
--- a/metagpt/tools/tool_registry.py
+++ b/metagpt/tools/tool_registry.py
@@ -11,17 +11,18 @@ import re
 from collections import defaultdict
 
 import yaml
+from pydantic import BaseModel
 
 from metagpt.const import TOOL_SCHEMA_PATH
 from metagpt.logs import logger
+from metagpt.tools.tool_convert import convert_code_to_tool_schema
 from metagpt.tools.tool_data_type import Tool, ToolSchema, ToolType
 
 
-class ToolRegistry:
-    def __init__(self):
-        self.tools = {}
-        self.tool_types = {}
-        self.tools_by_types = defaultdict(dict)  # two-layer k-v, {tool_type: {tool_name: {...}, ...}, ...}
+class ToolRegistry(BaseModel):
+    tools: dict = {}
+    tool_types: dict = {}
+    tools_by_types: dict = defaultdict(dict)  # two-layer k-v, {tool_type: {tool_name: {...}, ...}, ...}
 
     def register_tool_type(self, tool_type: ToolType):
         self.tool_types[tool_type.name] = tool_type
@@ -34,7 +35,9 @@ class ToolRegistry:
         schema_path=None,
         tool_code="",
         tool_type="other",
-        make_schema_if_not_exists=False,
+        tool_source_object=None,
+        include_functions=[],
+        make_schema_if_not_exists=True,
     ):
         if self.has_tool(tool_name):
             return
@@ -44,14 +47,16 @@ class ToolRegistry:
         if not os.path.exists(schema_path):
             if make_schema_if_not_exists:
                 logger.warning(f"no schema found, will make schema at {schema_path}")
-                make_schema(tool_code, schema_path)
+                schema_dict = make_schema(tool_source_object, include_functions, schema_path)
             else:
                 logger.warning(f"no schema found at assumed schema_path {schema_path}, skip registering {tool_name}")
                 return
-
-        with open(schema_path, "r", encoding="utf-8") as f:
-            schema_dict = yaml.safe_load(f)
-            schemas = schema_dict.get(tool_name) or list(schema_dict.values())[0]
+        else:
+            with open(schema_path, "r", encoding="utf-8") as f:
+                schema_dict = yaml.safe_load(f)
+        if not schema_dict:
+            return
+        schemas = schema_dict.get(tool_name) or list(schema_dict.values())[0]
         schemas["tool_path"] = tool_path  # corresponding code file path of the tool
         try:
             ToolSchema(**schemas)  # validation
@@ -65,22 +70,22 @@ class ToolRegistry:
         self.tools_by_types[tool_type][tool_name] = tool
         logger.info(f"{tool_name} registered")
 
-    def has_tool(self, key):
+    def has_tool(self, key: str) -> Tool:
         return key in self.tools
 
-    def get_tool(self, key):
+    def get_tool(self, key) -> Tool:
         return self.tools.get(key)
 
-    def get_tools_by_type(self, key):
-        return self.tools_by_types.get(key)
+    def get_tools_by_type(self, key) -> dict[str, Tool]:
+        return self.tools_by_types.get(key, {})
 
-    def has_tool_type(self, key):
+    def has_tool_type(self, key) -> bool:
         return key in self.tool_types
 
-    def get_tool_type(self, key):
+    def get_tool_type(self, key) -> ToolType:
         return self.tool_types.get(key)
 
-    def get_tool_types(self):
+    def get_tool_types(self) -> dict[str, ToolType]:
         return self.tool_types
 
 
@@ -94,7 +99,7 @@ def register_tool_type(cls):
     return cls
 
 
-def register_tool(tool_name="", tool_type="other", schema_path=None):
+def register_tool(tool_name="", tool_type="other", schema_path=None, **kwargs):
     """register a tool to registry"""
 
     def decorator(cls, tool_name=tool_name):
@@ -112,15 +117,39 @@ def register_tool(tool_name="", tool_type="other", schema_path=None):
             schema_path=schema_path,
             tool_code=source_code,
             tool_type=tool_type,
+            tool_source_object=cls,
+            **kwargs,
         )
         return cls
 
     return decorator
 
 
-def make_schema(tool_code, path):
+def make_schema(tool_source_object, include, path):
     os.makedirs(os.path.dirname(path), exist_ok=True)  # Create the necessary directories
-    schema = {}  # an empty schema for now
-    with open(path, "w", encoding="utf-8") as f:
-        yaml.dump(schema, f)
-    return path
+    try:
+        schema = convert_code_to_tool_schema(tool_source_object, include=include)
+        with open(path, "w", encoding="utf-8") as f:
+            yaml.dump(schema, f, sort_keys=False)
+        # import json
+        # with open(str(path).replace("yml", "json"), "w", encoding="utf-8") as f:
+        #     json.dump(schema, f, ensure_ascii=False, indent=4)
+        logger.info(f"schema made at {path}")
+    except Exception as e:
+        schema = {}
+        logger.error(f"Fail to make schema: {e}")
+
+    return schema
+
+
+def validate_tool_names(tools: list[str], return_tool_object=False) -> list[str]:
+    valid_tools = []
+    for tool_name in tools:
+        if not TOOL_REGISTRY.has_tool(tool_name):
+            logger.warning(
+                f"Specified tool {tool_name} not found and was skipped. Check if you have registered it properly"
+            )
+        else:
+            valid_tool = TOOL_REGISTRY.get_tool(tool_name) if return_tool_object else tool_name
+            valid_tools.append(valid_tool)
+    return valid_tools
diff --git a/metagpt/utils/parse_docstring.py b/metagpt/utils/parse_docstring.py
new file mode 100644
index 000000000..8a017e1f7
--- /dev/null
+++ b/metagpt/utils/parse_docstring.py
@@ -0,0 +1,87 @@
+import re
+from typing import Tuple
+
+from pydantic import BaseModel
+
+
+def remove_spaces(text):
+    return re.sub(r"\s+", " ", text)
+
+
+class DocstringParser(BaseModel):
+    docstring: str
+
+    def parse_desc(self) -> str:
+        """Parse and return the description from the docstring."""
+
+    def parse_params(self) -> list[Tuple[str, str, str]]:
+        """Parse and return the parameters from the docstring.
+
+        Returns:
+            list[Tuple[str, str, str]]: A list of input paramter info. Each info is a triple of (param name, param type, param description)
+        """
+
+    def parse_returns(self) -> list[Tuple[str, str]]:
+        """Parse and return the output information from the docstring.
+
+        Returns:
+            list[Tuple[str, str]]: A list of output info. Each info is a tuple of (return type, return description)
+        """
+
+    @staticmethod
+    def check_and_parse_optional(param_type: str) -> Tuple[bool, str]:
+        """Check if a parameter is optional and return a processed param_type rid of the optionality info if so"""
+
+    @staticmethod
+    def check_and_parse_default_value(param_desc: str) -> Tuple[bool, str]:
+        """Check if a parameter has a default value and return the default value if so"""
+
+    @staticmethod
+    def check_and_parse_enum(param_desc: str) -> Tuple[bool, str]:
+        """Check if a parameter description includes an enum and return enum values if so"""
+
+
+class reSTDocstringParser(DocstringParser):
+    """A parser for reStructuredText (reST) docstring"""
+
+
+class GoogleDocstringParser(DocstringParser):
+    """A parser for Google-stype docstring"""
+
+    docstring: str
+
+    def parse_desc(self) -> str:
+        description_match = re.search(r"^(.*?)(?:Args:|Returns:|Raises:|$)", self.docstring, re.DOTALL)
+        description = remove_spaces(description_match.group(1)) if description_match else ""
+        return description
+
+    def parse_params(self) -> list[Tuple[str, str, str]]:
+        args_match = re.search(r"Args:\s*(.*?)(?:Returns:|Raises:|$)", self.docstring, re.DOTALL)
+        _args = args_match.group(1).strip() if args_match else ""
+        # variable_pattern = re.compile(r"(\w+)\s*\((.*?)\):\s*(.*)")
+        variable_pattern = re.compile(
+            r"(\w+)\s*\((.*?)\):\s*(.*?)(?=\n\s*\w+\s*\(|\Z)", re.DOTALL
+        )  # (?=\n\w+\s*\(|\Z) is to assert that what follows is either the start of the next parameter (indicated by a newline, some word characters, and an opening parenthesis) or the end of the string (\Z).
+        params = variable_pattern.findall(_args)
+        return params
+
+    def parse_returns(self) -> list[Tuple[str, str]]:
+        returns_match = re.search(r"Returns:\s*(.*?)(?:Raises:|$)", self.docstring, re.DOTALL)
+        returns = returns_match.group(1).strip() if returns_match else ""
+        return_pattern = re.compile(r"^(.*)\s*:\s*(.*)$")
+        returns = return_pattern.findall(returns)
+        return returns
+
+    @staticmethod
+    def check_and_parse_optional(param_type: str) -> Tuple[bool, str]:
+        return "optional" in param_type, param_type.replace(", optional", "")
+
+    @staticmethod
+    def check_and_parse_default_value(param_desc: str) -> Tuple[bool, str]:
+        default_val = re.search(r"Defaults to (.+?)\.", param_desc)
+        return (True, default_val.group(1)) if default_val else (False, "")
+
+    @staticmethod
+    def check_and_parse_enum(param_desc: str) -> Tuple[bool, str]:
+        enum_val = re.search(r"Enum: \[(.+?)\]", param_desc)
+        return (True, [e.strip() for e in enum_val.group(1).split(",")]) if enum_val else (False, [])
diff --git a/tests/metagpt/roles/run_code_interpreter.py b/tests/metagpt/roles/run_code_interpreter.py
index 539b20286..766a25998 100644
--- a/tests/metagpt/roles/run_code_interpreter.py
+++ b/tests/metagpt/roles/run_code_interpreter.py
@@ -10,7 +10,7 @@ from metagpt.utils.recovery_util import load_history, save_history
 
 
 async def run_code_interpreter(
-    role_class, requirement, auto_run, use_tools, use_code_steps, make_udfs, use_udfs, save_dir
+    role_class, requirement, auto_run, use_tools, use_code_steps, make_udfs, use_udfs, save_dir, tools
 ):
     """
     The main function to run the MLEngineer with optional history loading.
@@ -25,7 +25,9 @@ async def run_code_interpreter(
     """
 
     if role_class == "ci":
-        role = CodeInterpreter(goal=requirement, auto_run=auto_run, use_tools=use_tools, make_udfs=make_udfs)
+        role = CodeInterpreter(
+            goal=requirement, auto_run=auto_run, use_tools=use_tools, make_udfs=make_udfs, tools=tools
+        )
     else:
         role = MLEngineer(
             goal=requirement,
@@ -33,7 +35,7 @@ async def run_code_interpreter(
             use_tools=use_tools,
             use_code_steps=use_code_steps,
             make_udfs=make_udfs,
-            use_udfs=use_udfs,
+            tools=tools,
         )
 
     if save_dir:
@@ -73,6 +75,8 @@ if __name__ == "__main__":
     use_tools = True
     make_udfs = False
     use_udfs = False
+    tools = []
+    # tools = ["FillMissingValue", "CatCross", "non_existing_test"]
 
     async def main(
         role_class: str = role_class,
@@ -83,9 +87,10 @@ if __name__ == "__main__":
         make_udfs: bool = make_udfs,
         use_udfs: bool = use_udfs,
         save_dir: str = save_dir,
+        tools=tools,
     ):
         await run_code_interpreter(
-            role_class, requirement, auto_run, use_tools, use_code_steps, make_udfs, use_udfs, save_dir
+            role_class, requirement, auto_run, use_tools, use_code_steps, make_udfs, use_udfs, save_dir, tools
         )
 
     fire.Fire(main)
diff --git a/tests/metagpt/tools/test_tool_convert.py b/tests/metagpt/tools/test_tool_convert.py
new file mode 100644
index 000000000..1dad997bd
--- /dev/null
+++ b/tests/metagpt/tools/test_tool_convert.py
@@ -0,0 +1,158 @@
+import pandas as pd
+
+from metagpt.tools.tool_convert import convert_code_to_tool_schema, docstring_to_schema
+
+
+def test_docstring_to_schema():
+    docstring = """
+    Some test desc.
+
+    Args:
+        features (list): Columns to be processed.
+        strategy (str, optional): The imputation strategy, notice 'mean' and 'median' can only be
+                                used for numeric features. Enum: ['mean', 'median', 'most_frequent', 'constant']. Defaults to 'mean'.
+        fill_value (int, optional): Fill_value is used to replace all occurrences of missing_values.
+                                    Defaults to None.
+    Returns:
+        pd.DataFrame: The transformed DataFrame.
+    """
+    expected = {
+        "description": " Some test desc. ",
+        "parameters": {
+            "properties": {
+                "features": {"type": "list", "description": "Columns to be processed."},
+                "strategy": {
+                    "type": "str",
+                    "description": "The imputation strategy, notice 'mean' and 'median' can only be used for numeric features. Enum: ['mean', 'median', 'most_frequent', 'constant']. Defaults to 'mean'.",
+                    "default": "'mean'",
+                    "enum": ["'mean'", "'median'", "'most_frequent'", "'constant'"],
+                },
+                "fill_value": {
+                    "type": "int",
+                    "description": "Fill_value is used to replace all occurrences of missing_values. Defaults to None.",
+                    "default": "None",
+                },
+            },
+            "required": ["features"],
+        },
+        "returns": [{"type": "pd.DataFrame", "description": "The transformed DataFrame."}],
+    }
+    schema = docstring_to_schema(docstring)
+    assert schema == expected
+
+
+class DummyClass:
+    """
+    Completing missing values with simple strategies.
+    """
+
+    def __init__(self, features: list, strategy: str = "mean", fill_value=None):
+        """
+        Initialize self.
+
+        Args:
+            features (list): Columns to be processed.
+            strategy (str, optional): The imputation strategy, notice 'mean' and 'median' can only
+                                      be used for numeric features. Enum: ['mean', 'median', 'most_frequent', 'constant']. Defaults to 'mean'.
+            fill_value (int, optional): Fill_value is used to replace all occurrences of missing_values.
+                                        Defaults to None.
+        """
+        pass
+
+    def fit(self, df: pd.DataFrame):
+        """
+        Fit the FillMissingValue model.
+
+        Args:
+            df (pd.DataFrame): The input DataFrame.
+        """
+        pass
+
+    def transform(self, df: pd.DataFrame) -> pd.DataFrame:
+        """
+        Transform the input DataFrame with the fitted model.
+
+        Args:
+            df (pd.DataFrame): The input DataFrame.
+
+        Returns:
+            pd.DataFrame: The transformed DataFrame.
+        """
+        pass
+
+
+def dummy_fn(df: pd.DataFrame) -> dict:
+    """
+    Analyzes a DataFrame and categorizes its columns based on data types.
+
+    Args:
+        df (pd.DataFrame): The DataFrame to be analyzed.
+
+    Returns:
+        dict: A dictionary with four keys ('Category', 'Numeric', 'Datetime', 'Others').
+              Each key corresponds to a list of column names belonging to that category.
+    """
+    pass
+
+
+def test_convert_code_to_tool_schema_class():
+    expected = {
+        "DummyClass": {
+            "type": "class",
+            "description": "Completing missing values with simple strategies.",
+            "methods": {
+                "__init__": {
+                    "description": "Initialize self. ",
+                    "parameters": {
+                        "properties": {
+                            "features": {"type": "list", "description": "Columns to be processed."},
+                            "strategy": {
+                                "type": "str",
+                                "description": "The imputation strategy, notice 'mean' and 'median' can only be used for numeric features. Enum: ['mean', 'median', 'most_frequent', 'constant']. Defaults to 'mean'.",
+                                "default": "'mean'",
+                                "enum": ["'mean'", "'median'", "'most_frequent'", "'constant'"],
+                            },
+                            "fill_value": {
+                                "type": "int",
+                                "description": "Fill_value is used to replace all occurrences of missing_values. Defaults to None.",
+                                "default": "None",
+                            },
+                        },
+                        "required": ["features"],
+                    },
+                },
+                "fit": {
+                    "description": "Fit the FillMissingValue model. ",
+                    "parameters": {
+                        "properties": {"df": {"type": "pd.DataFrame", "description": "The input DataFrame."}},
+                        "required": ["df"],
+                    },
+                },
+                "transform": {
+                    "description": "Transform the input DataFrame with the fitted model. ",
+                    "parameters": {
+                        "properties": {"df": {"type": "pd.DataFrame", "description": "The input DataFrame."}},
+                        "required": ["df"],
+                    },
+                    "returns": [{"type": "pd.DataFrame", "description": "The transformed DataFrame."}],
+                },
+            },
+        }
+    }
+    schema = convert_code_to_tool_schema(DummyClass)
+    assert schema == expected
+
+
+def test_convert_code_to_tool_schema_function():
+    expected = {
+        "dummy_fn": {
+            "type": "function",
+            "description": "Analyzes a DataFrame and categorizes its columns based on data types. ",
+            "parameters": {
+                "properties": {"df": {"type": "pd.DataFrame", "description": "The DataFrame to be analyzed."}},
+                "required": ["df"],
+            },
+        }
+    }
+    schema = convert_code_to_tool_schema(dummy_fn)
+    assert schema == expected
diff --git a/tests/metagpt/tools/test_tool_registry.py b/tests/metagpt/tools/test_tool_registry.py
index 582c368a8..c24122e39 100644
--- a/tests/metagpt/tools/test_tool_registry.py
+++ b/tests/metagpt/tools/test_tool_registry.py
@@ -98,4 +98,4 @@ def test_get_tools_by_type(tool_registry, schema_yaml):
 # Test case for when the tool type does not exist
 def test_get_tools_by_nonexistent_type(tool_registry):
     tools_by_type = tool_registry.get_tools_by_type("NonexistentType")
-    assert tools_by_type is None
+    assert not tools_by_type