tool management at one place, add aask_code mock, azure mock

2026-06-23 15:48:11 +02:00 · 2024-01-11 22:55:31 +08:00 · 2024-01-11 22:55:31 +08:00 · e99c5f29f4
commit e99c5f29f4
parent 9e0b9745be
9 changed files with 167 additions and 74 deletions
--- a/metagpt/actions/write_analysis_code.py
+++ b/metagpt/actions/write_analysis_code.py
@ -20,14 +20,16 @@ from metagpt.prompts.ml_engineer import (
    GENERATE_CODE_PROMPT,
    ML_TOOL_USAGE_PROMPT,
    SELECT_FUNCTION_TOOLS,
-    TASK_MODULE_MAP,
-    TASK_SPECIFIC_PROMPT,
    TOOL_RECOMMENDATION_PROMPT,
    TOOL_USAGE_PROMPT,
 )
 from metagpt.schema import Message, Plan
+from metagpt.tools import TOOL_TYPE_MAPPINGS
 from metagpt.utils.common import create_func_config, remove_comments

+TOOL_TYPE_MODULE = {k: v.module for k, v in TOOL_TYPE_MAPPINGS.items()}
+TOOL_TYPE_USAGE_PROMPT = {k: v.usage_prompt for k, v in TOOL_TYPE_MAPPINGS.items()}
+

 class BaseWriteAnalysisCode(Action):
    DEFAULT_SYSTEM_MSG: str = """You are Code Interpreter, a world-class programmer that can complete any goal by executing code. Strictly follow the plan and generate code step by step. Each step of the code will be executed on the user's machine, and the user will provide the code execution results to you.**Notice: The code for the next step depends on the code for the previous step. Must reuse variables in the lastest other code directly, dont creat it again, it is very import for you. Use !pip install in a standalone block to install missing packages.Usually the libraries you need are already installed.Dont check if packages already imported.**"""  # prompt reference: https://github.com/KillianLucas/open-interpreter/blob/v0.1.4/interpreter/system_message.txt
@ -171,9 +173,11 @@ class WriteCodeWithTools(BaseWriteAnalysisCode):
        plan: Plan = None,
        **kwargs,
    ) -> str:
-        task_type = plan.current_task.task_type
-        available_tools = self.available_tools.get(task_type, {})
-        special_prompt = TASK_SPECIFIC_PROMPT.get(task_type, "")
+        tool_type = (
+            plan.current_task.task_type
+        )  # find tool type from task type through exact match, can extend to retrieval in the future
+        available_tools = self.available_tools.get(tool_type, {})
+        special_prompt = TOOL_TYPE_USAGE_PROMPT.get(tool_type, "")
        code_steps = plan.current_task.code_steps

        finished_tasks = plan.get_finished_tasks()
@ -189,10 +193,10 @@ class WriteCodeWithTools(BaseWriteAnalysisCode):
            recommend_tools = await self._tool_recommendation(
                plan.current_task.instruction, code_steps, available_tools
            )
-            tool_catalog = self._parse_recommend_tools(task_type, recommend_tools)
+            tool_catalog = self._parse_recommend_tools(tool_type, recommend_tools)
            logger.info(f"Recommended tools: \n{recommend_tools}")

-            module_name = TASK_MODULE_MAP[task_type]
+            module_name = TOOL_TYPE_MODULE[tool_type]

        tools_instruction = TOOL_USAGE_PROMPT.format(
            special_prompt=special_prompt, module_name=module_name, tool_catalog=tool_catalog
@ -215,9 +219,9 @@ class WriteCodeWithToolsML(WriteCodeWithTools):
        column_info: str = "",
        **kwargs,
    ) -> Tuple[List[Message], str]:
-        task_type = plan.current_task.task_type
-        available_tools = self.available_tools.get(task_type, {})
-        special_prompt = TASK_SPECIFIC_PROMPT.get(task_type, "")
+        tool_type = plan.current_task.task_type
+        available_tools = self.available_tools.get(tool_type, {})
+        special_prompt = TOOL_TYPE_USAGE_PROMPT.get(tool_type, "")
        code_steps = plan.current_task.code_steps

        finished_tasks = plan.get_finished_tasks()
@ -230,10 +234,10 @@ class WriteCodeWithToolsML(WriteCodeWithTools):
            recommend_tools = await self._tool_recommendation(
                plan.current_task.instruction, code_steps, available_tools
            )
-            tool_catalog = self._parse_recommend_tools(task_type, recommend_tools)
+            tool_catalog = self._parse_recommend_tools(tool_type, recommend_tools)
            logger.info(f"Recommended tools: \n{recommend_tools}")

-            module_name = TASK_MODULE_MAP[task_type]
+            module_name = TOOL_TYPE_MODULE[tool_type]

            prompt = ML_TOOL_USAGE_PROMPT.format(
                user_requirement=plan.goal,
--- a/metagpt/actions/write_plan.py
+++ b/metagpt/actions/write_plan.py
@ -12,6 +12,7 @@ from metagpt.actions import Action
 from metagpt.logs import logger
 from metagpt.prompts.ml_engineer import ASSIGN_TASK_TYPE_CONFIG, ASSIGN_TASK_TYPE_PROMPT
 from metagpt.schema import Message, Plan, Task
+from metagpt.tools import TOOL_TYPE_MAPPINGS
 from metagpt.utils.common import CodeParser, create_func_config


@ -46,7 +47,10 @@ class WritePlan(Action):
            List[Dict]: tasks with task type assigned
        """
        task_list = "\n".join([f"Task {task['task_id']}: {task['instruction']}" for task in tasks])
-        prompt = ASSIGN_TASK_TYPE_PROMPT.format(task_list=task_list)
+        task_type_desc = "\n".join([f"- **{item.name}**: {item.desc}" for item in TOOL_TYPE_MAPPINGS.values()])
+        prompt = ASSIGN_TASK_TYPE_PROMPT.format(
+            task_list=task_list, task_type_desc=task_type_desc
+        )  # task types are set to be the same as tool types, for now
        tool_config = create_func_config(ASSIGN_TASK_TYPE_CONFIG)
        rsp = await self.llm.aask_code(prompt, **tool_config)
        task_type_list = rsp["task_type"]
--- a/metagpt/prompts/ml_engineer.py
+++ b/metagpt/prompts/ml_engineer.py
@ -54,11 +54,7 @@ Please assign a task type to each task in the list below from the given categori
 {task_list}

 ## All Task Type:
- **feature_engineering**: Only for creating new columns for input data.
- **data_preprocess**: Only for changing value inplace.
- **model_train**: Only for training model.
- **model_evaluate**: Only for evaluating model.
- **other**: Any tasks that do not fit into the previous categories, such as visualization, summarizing findings, etc.
+{task_type_desc}
 """

 ASSIGN_TASK_TYPE_CONFIG = {
@ -278,52 +274,3 @@ for col in num_cols:
 - The output code should contain all steps implemented correctly in 'Code Steps'.
 """
 # - If 'Code Steps' contains step done in 'Done Tasks', such as reading data, don't repeat it.
-
-DATA_PREPROCESS_PROMPT = """
-The current task is about data preprocessing, please note the following:
- Monitor data types per column, applying appropriate methods.
- Ensure operations are on existing dataset columns.
- Avoid writing processed data to files.
- Avoid any change to label column, such as standardization, etc.
- Prefer alternatives to one-hot encoding for categorical data.
- Only encode or scale necessary columns to allow for potential feature-specific engineering tasks (like time_extract, binning, extraction, etc.) later.
- Each step do data preprocessing to train, must do same for test separately at the same time.
-"""
-
-FEATURE_ENGINEERING_PROMPT = """
-The current task is about feature engineering. when performing it, please adhere to the following principles:
- Generate as diverse features as possible to improve the model's performance step-by-step. 
- If potential impactful features are not included in 'Code Steps', add new steps to generate them.
- Avoid creating redundant or excessively numerous features in one step.
- Exclude ID columns from feature generation and remove them.
- Each step do feature engineering to train, must do same for test separately at the same time.
- Avoid using the label column to create features, except for cat encoding.
- Use the data from previous task result if exist, do not mock or reload data yourself.
-"""
-
-MODEL_TRAIN_PROMPT = """
-The current task is about training a model, please ensure high performance:
- Keep in mind that your user prioritizes results and is highly focused on model performance. So, when needed, feel free to use models of any complexity to improve effectiveness, such as lightGBM, XGBoost, CatBoost, etc.
- If non-numeric columns exist, perform label encode together with all steps.
- Use the data from previous task result directly, do not mock or reload data yourself.
- Set suitable hyperparameters for the model, make metrics as high as possible.
-"""
-
-MODEL_EVALUATE_PROMPT = """
-The current task is about evaluating a model, please note the following:
- Ensure that the evaluated data is same processed as the training data. If not, remember use object in 'Done Tasks' to transform the data.
- Use trained model from previous task result directly, do not mock or reload model yourself.
-"""
-
-TASK_SPECIFIC_PROMPT = {
-    "data_preprocess": DATA_PREPROCESS_PROMPT,
-    "feature_engineering": FEATURE_ENGINEERING_PROMPT,
-    "model_train": MODEL_TRAIN_PROMPT,
-    "model_evaluate": MODEL_EVALUATE_PROMPT,
-}
-
-TASK_MODULE_MAP = {
-    "data_preprocess": "metagpt.tools.functions.libs.data_preprocess",
-    "feature_engineering": "metagpt.tools.functions.libs.feature_engineering",
-    "udf": "metagpt.tools.functions.libs.udf",
-}
--- a/metagpt/prompts/tool_type.py
+++ b/metagpt/prompts/tool_type.py
@ -0,0 +1,35 @@
+DATA_PREPROCESS_PROMPT = """
+The current task is about data preprocessing, please note the following:
+- Monitor data types per column, applying appropriate methods.
+- Ensure operations are on existing dataset columns.
+- Avoid writing processed data to files.
+- Avoid any change to label column, such as standardization, etc.
+- Prefer alternatives to one-hot encoding for categorical data.
+- Only encode or scale necessary columns to allow for potential feature-specific engineering tasks (like time_extract, binning, extraction, etc.) later.
+- Each step do data preprocessing to train, must do same for test separately at the same time.
+"""
+
+FEATURE_ENGINEERING_PROMPT = """
+The current task is about feature engineering. when performing it, please adhere to the following principles:
+- Generate as diverse features as possible to improve the model's performance step-by-step. 
+- If potential impactful features are not included in 'Code Steps', add new steps to generate them.
+- Avoid creating redundant or excessively numerous features in one step.
+- Exclude ID columns from feature generation and remove them.
+- Each step do feature engineering to train, must do same for test separately at the same time.
+- Avoid using the label column to create features, except for cat encoding.
+- Use the data from previous task result if exist, do not mock or reload data yourself.
+"""
+
+MODEL_TRAIN_PROMPT = """
+The current task is about training a model, please ensure high performance:
+- Keep in mind that your user prioritizes results and is highly focused on model performance. So, when needed, feel free to use models of any complexity to improve effectiveness, such as lightGBM, XGBoost, CatBoost, etc.
+- If non-numeric columns exist, perform label encode together with all steps.
+- Use the data from previous task result directly, do not mock or reload data yourself.
+- Set suitable hyperparameters for the model, make metrics as high as possible.
+"""
+
+MODEL_EVALUATE_PROMPT = """
+The current task is about evaluating a model, please note the following:
+- Ensure that the evaluated data is same processed as the training data. If not, remember use object in 'Done Tasks' to transform the data.
+- Use trained model from previous task result directly, do not mock or reload model yourself.
+"""
--- a/metagpt/tools/init.py
+++ b/metagpt/tools/init.py
@ -9,6 +9,16 @@

 from enum import Enum

+from pydantic import BaseModel
+
+from metagpt.const import TOOL_SCHEMA_PATH
+from metagpt.prompts.tool_type import (
+    DATA_PREPROCESS_PROMPT,
+    FEATURE_ENGINEERING_PROMPT,
+    MODEL_TRAIN_PROMPT,
+    MODEL_EVALUATE_PROMPT,
+)
+

 class SearchEngineType(Enum):
    SERPAPI_GOOGLE = "serpapi"
@ -27,3 +37,44 @@ class WebBrowserEngineType(Enum):
    def __missing__(cls, key):
        """Default type conversion"""
        return cls.CUSTOM
+
+
+class ToolType(BaseModel):
+    name: str
+    module: str = ""
+    desc: str
+    usage_prompt: str = ""
+
+
+TOOL_TYPE_MAPPINGS = {
+    "data_preprocess": ToolType(
+        name="data_preprocess",
+        module=str(TOOL_SCHEMA_PATH / "data_preprocess"),
+        desc="Only for changing value inplace.",
+        usage_prompt=DATA_PREPROCESS_PROMPT,
+    ),
+    "feature_engineering": ToolType(
+        name="feature_engineering",
+        module=str(TOOL_SCHEMA_PATH / "feature_engineering"),
+        desc="Only for creating new columns for input data.",
+        usage_prompt=FEATURE_ENGINEERING_PROMPT,
+    ),
+    "model_train": ToolType(
+        name="model_train",
+        module="",
+        desc="Only for training model.",
+        usage_prompt=MODEL_TRAIN_PROMPT,
+    ),
+    "model_evaluate": ToolType(
+        name="model_evaluate",
+        module="",
+        desc="Only for evaluating model.",
+        usage_prompt=MODEL_EVALUATE_PROMPT,
+    ),
+    "other": ToolType(
+        name="other",
+        module="",
+        desc="Any tasks that do not fit into the previous categories",
+        usage_prompt="",
+    ),
+}