Merge branch 'dev_wbh_code_plan' into 'dev'

update write task guide, add code plan See merge request agents/data_agents_opt!14
2026-06-11 15:15:18 +02:00 · 2023-12-06 08:35:50 +00:00 · 2023-12-06 08:35:50 +00:00 · 81eddf264a
commit 81eddf264a
parent 20a918bf39 58e8e4c879
4 changed files with 97 additions and 22 deletions
--- a/metagpt/actions/write_analysis_code.py
+++ b/metagpt/actions/write_analysis_code.py
@ -85,7 +85,7 @@ class WriteCodeByGenerate(BaseWriteAnalysisCode):
        self,
        context: [List[Message]],
        plan: Plan = None,
-        task_guide: str = "",
+        code_steps: str = "",
        system_msg: str = None,
        **kwargs,
    ) -> str:
@ -155,7 +155,7 @@ class WriteCodeWithTools(BaseWriteAnalysisCode):
        self,
        context: List[Message],
        plan: Plan = None,
-        task_guide: str = "",
+        code_steps: str = "",
        data_desc: str = "",
    ) -> str:
        task_type = plan.current_task.task_type
@ -165,12 +165,12 @@ class WriteCodeWithTools(BaseWriteAnalysisCode):
            {k: tool[k] for k in ["name", "description"] if k in tool}
            for tool in available_tools
        ]
-        task_guide = "\n".join(
-            [f"Step {step.strip()}" for step in task_guide.split("\n")]
+        code_steps = "\n".join(
+            [f"Step {step.strip()}" for step in code_steps.split("\n")]
        )

        recommend_tools = await self._tool_recommendation(
-            task, task_guide, available_tools
+            task, code_steps, available_tools
        )
        recommend_tools, tool_catalog = self._parse_recommend_tools(task_type, recommend_tools)
        logger.info(f"Recommended tools for every steps: {recommend_tools}")
@ -194,7 +194,7 @@ class WriteCodeWithTools(BaseWriteAnalysisCode):
            completed_code=completed_code,
            data_desc=data_desc,
            special_prompt=special_prompt,
-            code_steps=task_guide,
+            code_steps=code_steps,
            module_name=module_name,
            output_desc=output_desc,
            available_tools=recommend_tools,
--- a/metagpt/actions/write_code_steps.py
+++ b/metagpt/actions/write_code_steps.py
@ -0,0 +1,77 @@
+
+import json
+from typing import Dict, List, Union
+
+from metagpt.actions import Action
+from metagpt.schema import Message, Task, Plan
+
+
+CODE_STEPS_PROMPT_TEMPLATE = """
+# Context
+{context}
+
+##  Format example
+1.
+2.
+3.
+...
+
+-----
+Tasks are all code development tasks.
+You are a professional engineer, the main goal is to plan out concise solution steps for Current Task before coding.
+A planning process can reduce the difficulty and improve the quality of coding.
+You may be given some code plans for the tasks ahead, but you don't have to follow the existing plan when planning the current task.
+The output plan should following the subsequent principles:
+1.The plan is a rough checklist of steps outlining the entire program's structure.Try to keep the number of steps fewer than 5.
+2.The steps should be written concisely and at a high level, avoiding overly detailed implementation specifics.
+3.The execution of the plan happens sequentially, but the plan can incorporate conditional (if) and looping(loop) keywords for more complex structures.
+4.Output carefully referenced "Format example" in format.
+"""
+
+STRUCTURAL_CONTEXT = """
+## User Requirement
+{user_requirement}
+## Current Plan
+{tasks}
+## Current Task
+{current_task}
+"""
+
+
+class WriteCodeSteps(Action):
+
+    async def run(self, plan: Plan) -> str:
+        """Run of a task guide writing action, used in ml engineer
+
+        Args:
+            plan (plan): task plan
+            useful_memories (list): useful_memories
+        Returns:
+            str: The dataset_descriptions string.
+        """
+
+        context = self.get_context(plan)
+        code_steps_prompt = CODE_STEPS_PROMPT_TEMPLATE.format(
+            context=context,
+        )
+        code_steps = await self._aask(code_steps_prompt)
+        return code_steps
+
+    def get_context(self, plan: Plan):
+        user_requirement = plan.goal
+        select_task_keys = ['task_id', 'instruction', 'is_finished', 'code_steps']
+
+        def process_task(task):
+            task_dict = task.dict()
+            ptask = {k: task_dict[k] for k in task_dict if k in select_task_keys}
+            return ptask
+        tasks = json.dumps(
+            [process_task(task) for task in plan.tasks], indent=4, ensure_ascii=False
+        )
+        current_task = json.dumps(process_task(plan.current_task)) if plan.current_task else {}
+        context = STRUCTURAL_CONTEXT.format(
+            user_requirement=user_requirement, tasks=tasks, current_task=current_task
+        )
+        # print(context)
+        return context
+
--- a/metagpt/roles/ml_engineer.py
+++ b/metagpt/roles/ml_engineer.py
@ -12,6 +12,7 @@ from metagpt.logs import logger
 from metagpt.actions.write_plan import WritePlan
 from metagpt.actions.write_analysis_code import WriteCodeByGenerate, WriteCodeWithTools
 from metagpt.actions.execute_code import ExecutePyCode
+from metagpt.actions.write_code_steps import WriteCodeSteps

 STRUCTURAL_CONTEXT = """
 ## User Requirement
@ -66,11 +67,6 @@ class AskReview(Action):
        return rsp, confirmed


-class WriteTaskGuide(Action):
-    async def run(self, task_instruction: str, data_desc: str = "") -> str:
-        return ""
-
-
 class MLEngineer(Role):
    def __init__(
        self, name="ABC", profile="MLEngineer", goal="", auto_run: bool = False
@ -79,7 +75,7 @@ class MLEngineer(Role):
        self._set_react_mode(react_mode="plan_and_act")
        self.plan = Plan(goal=goal)
        self.use_tools = False
-        self.use_task_guide = False
+        self.use_code_steps = True
        self.execute_code = ExecutePyCode()
        self.auto_run = auto_run

@ -92,7 +88,7 @@ class MLEngineer(Role):
            logger.info(f"ready to take on task {task}")

            # take on current task
-            code, result, success = await self._write_and_exec_code()
+            code, result, success, code_steps = await self._write_and_exec_code()

            # ask for acceptance, users can other refuse and change tasks in the plan
            task_result_confirmed = await self._ask_review()
@ -101,6 +97,7 @@ class MLEngineer(Role):
                # tick off this task and record progress
                task.code = code
                task.result = result
+                task.code_steps = code_steps
                self.plan.finish_current_task()
                self.working_memory.clear()

@ -109,9 +106,9 @@ class MLEngineer(Role):
                await self._update_plan()

    async def _write_and_exec_code(self, max_retry: int = 3):
-        task_guide = (
-            await WriteTaskGuide().run(self.plan.current_task.instruction)
-            if self.use_task_guide
+        code_steps = (
+            await WriteCodeSteps().run(self.plan)
+            if self.use_code_steps
            else ""
        )

@ -128,12 +125,12 @@ class MLEngineer(Role):
            if not self.use_tools or self.plan.current_task.task_type == "other":
                # code = "print('abc')"
                code = await WriteCodeByGenerate().run(
-                    context=context, plan=self.plan, task_guide=task_guide, temperature=0.0
+                    context=context, plan=self.plan, code_steps=code_steps, temperature=0.0
                )
                cause_by = WriteCodeByGenerate
            else:
                code = await WriteCodeWithTools().run(
-                    context=context, plan=self.plan, task_guide=task_guide, data_desc=""
+                    context=context, plan=self.plan, code_steps=code_steps, data_desc=""
                )
                cause_by = WriteCodeWithTools

@ -156,7 +153,7 @@ class MLEngineer(Role):

            counter += 1

-        return code, result, success
+        return code, result, success, code_steps

    async def _ask_review(self):
        if not self.auto_run:
@ -185,7 +182,7 @@ class MLEngineer(Role):

    def get_useful_memories(self) -> List[Message]:
        """find useful memories only to reduce context length and improve performance"""
-
+        # TODO dataset description , code steps
        user_requirement = self.plan.goal
        tasks = json.dumps(
            [task.dict() for task in self.plan.tasks], indent=4, ensure_ascii=False
@ -204,11 +201,11 @@ class MLEngineer(Role):


 if __name__ == "__main__":
-    requirement = "Run data analysis on sklearn Iris dataset, include a plot"
+    # requirement = "Run data analysis on sklearn Iris dataset, include a plot"
    # requirement = "Run data analysis on sklearn Diabetes dataset, include a plot"
    # requirement = "Run data analysis on sklearn Wine recognition dataset, include a plot, and train a model to predict wine class (20% as validation), and show validation accuracy"
    # requirement = "Run data analysis on sklearn Wisconsin Breast Cancer dataset, include a plot, train a model to predict targets (20% as validation), and show validation accuracy"
-    # requirement = "Run EDA and visualization on this dataset, train a model to predict survival, report metrics on validation set (20%), dataset: workspace/titanic/train.csv"
+    requirement = "Run EDA and visualization on this dataset, train a model to predict survival, report metrics on validation set (20%), dataset: workspace/titanic/train.csv"

    async def main(requirement: str = requirement, auto_run: bool = False):
        role = MLEngineer(goal=requirement, auto_run=auto_run)
--- a/metagpt/schema.py
+++ b/metagpt/schema.py
@ -81,6 +81,7 @@ class Task(BaseModel):
    code: str = ""
    result: str = ""
    is_finished: bool = False
+    code_steps: str = ""


 class Plan(BaseModel):