assign task_type for task

2026-06-11 15:15:18 +02:00 · 2023-12-01 11:57:58 +08:00 · 2023-12-01 11:57:58 +08:00 · aad201e06f
commit aad201e06f
parent b81fefffa1
2 changed files with 63 additions and 10 deletions
--- a/metagpt/actions/write_plan.py
+++ b/metagpt/actions/write_plan.py
@ -4,12 +4,14 @@
@Author  :   orange-crow
@File    :   plan.py
 """
-from typing import List
+from typing import List, Dict
 import json

 from metagpt.actions import Action
+from metagpt.prompts.ml_engineer import ASSIGN_TASK_TYPE_PROMPT, ASSIGN_TASK_TYPE
 from metagpt.schema import Message, Task
-from metagpt.utils.common import CodeParser
+from metagpt.utils.common import CodeParser, create_func_config
+

 class WritePlan(Action):
    PROMPT_TEMPLATE = """
@ -30,7 +32,28 @@ class WritePlan(Action):
    ]
    ```
    """
-    async def run(self, context: List[Message], max_tasks: int = 5) -> str:
+
+    async def assign_task_type(self, tasks: List[Dict]) -> List[Dict]:
+        """Assign task type to each task in tasks
+
+        Args:
+            tasks (List[Dict]): tasks to be assigned task type
+
+        Returns:
+            List[Dict]: tasks with task type assigned
+        """
+        task_list = "\n".join(
+            [f"Task {task['task_id']}: {task['instruction']}" for task in tasks]
+        )
+        prompt = ASSIGN_TASK_TYPE_PROMPT.format(task_list=task_list)
+        tool_config = create_func_config(ASSIGN_TASK_TYPE)
+        rsp = await self.llm.aask_code(prompt, **tool_config)
+        task_type_list = rsp["task_type"]
+        for task, task_type in zip(tasks, task_type_list):
+            task["task_type"] = task_type
+        return tasks
+
+    async def run(self, context: List[Message], max_tasks: int = 5) -> List[Dict]:
        prompt = (
            self.PROMPT_TEMPLATE.replace("__context__", "\n".join([str(ct) for ct in context]))
            # .replace("__current_plan__", current_plan)
@ -38,10 +61,10 @@ class WritePlan(Action):
        )
        rsp = await self._aask(prompt)
        rsp = CodeParser.parse_code(block=None, text=rsp)
+        rsp = await self.assign_task_type(json.loads(rsp))
        return rsp

    @staticmethod
-    def rsp_to_tasks(rsp: str) -> List[Task]:
-        rsp = json.loads(rsp)
+    def rsp_to_tasks(rsp: List[Dict]) -> List[Task]:
        tasks = [Task(**task_config) for task_config in rsp]
        return tasks
--- a/metagpt/prompts/ml_engineer.py
+++ b/metagpt/prompts/ml_engineer.py
@ -4,6 +4,35 @@
 # @Author  : lidanyang
 # @File    : ml_engineer
 # @Desc    :
+ASSIGN_TASK_TYPE_PROMPT = """
+## All Task Type:
+- **data_preprocess**: Only involve cleaning and preparing data through techniques like imputation, scaling, and encoding, not containing reading data, feature engineering, model training, etc.
+- **feature_engineering**: Involves enhancing data features through techniques like encoding, aggregation, time component analysis, and creating polynomial and interaction features, etc.
+- **other**: Any tasks that do not fit into the previous categories, such as visualization, summarizing findings, build model, etc.
+
+Please assign a task type to each task in the list below from the given categories:
+{task_list}
+"""
+
+ASSIGN_TASK_TYPE = {
+    "name": "assign_task_type",
+    "description": "assign task type to each task by order",
+    "parameters": {
+        "type": "object",
+        "properties": {
+            "task_type": {
+                "type": "array",
+                "description": "List of task type.",
+                "items": {
+                    "type": "string",
+                },
+            },
+        },
+        "required": ["task_type"],
+    },
+}
+
+
 TOOL_RECOMMENDATION_PROMPT = """
 ## Comprehensive Task Description:
 {task}
@ -137,11 +166,12 @@ When performing feature engineering, please adhere to the following principles:
 - Importantly, provide detailed comments explaining the purpose of each feature and how it might enhance model performance, especially when the features are generated based on semantic understanding without clear user directives.
 """

-CLASSIFICATION_MODEL_PROMPT = """
+MODEL_TRAIN_PROMPT = """
+When selecting and training a model, please follow these guidelines to ensure optimal performance:
+- Keep in mind that your user prioritizes results and is highly focused on model performance. So, when needed, feel free to use models of any complexity to improve effectiveness, such as lightGBM, XGBoost, CatBoost, etc.
+— If user specifies a model, use that model. Otherwise, use the model you believe will best solve the problem.
 """

-REGRESSION_MODEL_PROMPT = """
-"""

 DATA_PREPROCESS_OUTPUT_DESC = "Please note that all functions uniformly output a processed pandas.DataFrame, facilitating seamless integration into the broader workflow."

@ -155,8 +185,8 @@ REGRESSION_MODEL_OUTPUT_DESC = ""
 ML_SPECIFIC_PROMPT = {
    "data_preprocess": DATA_PREPROCESS_PROMPT,
    "feature_engineering": FEATURE_ENGINEERING_PROMPT,
-    "classification_model": CLASSIFICATION_MODEL_PROMPT,
-    "regression_model": REGRESSION_MODEL_PROMPT,
+    "classification_model": MODEL_TRAIN_PROMPT,
+    "regression_model": MODEL_TRAIN_PROMPT,
 }

 TOOL_OUTPUT_DESC = {