update code step prompts

2026-05-05 05:42:37 +02:00 · 2023-12-13 16:26:24 +08:00 · 2023-12-13 16:26:24 +08:00 · 8d694d47d9
commit 8d694d47d9
parent abad52da85
3 changed files with 35 additions and 31 deletions
--- a/metagpt/actions/write_analysis_code.py
+++ b/metagpt/actions/write_analysis_code.py
@ -26,7 +26,7 @@ from metagpt.utils.common import create_func_config, remove_comments
 class BaseWriteAnalysisCode(Action):
    DEFAULT_SYSTEM_MSG = """You are Code Interpreter, a world-class programmer that can complete any goal by executing code. Strictly follow the plan and generate code step by step. Each step of the code will be executed on the user's machine, and the user will provide the code execution results to you."""  # prompt reference: https://github.com/KillianLucas/open-interpreter/blob/v0.1.4/interpreter/system_message.txt
    REUSE_CODE_INSTRUCTION = """ATTENTION: DONT include codes from previous tasks in your current code block, include new codes only, DONT repeat codes!"""
-
+    
    def process_msg(self, prompt: Union[str, List[Dict], Message, List[Message]], system_msg: str = None):
        default_system_msg = system_msg or self.DEFAULT_SYSTEM_MSG
        # 全部转成list
@ -45,7 +45,7 @@ class BaseWriteAnalysisCode(Action):
                    messages.append(p.to_dict())
                elif isinstance(p.content, dict) and "code" in p.content:
                    messages.append(p.content["code"])
-
+        
        # 添加默认的提示词
        if (
                default_system_msg not in messages[0]["content"]
@ -61,7 +61,7 @@ class BaseWriteAnalysisCode(Action):
                "content": messages[0]["content"] + default_system_msg,
            }
        return messages
-
+    
    async def run(
            self, context: List[Message], plan: Plan = None, code_steps: str = ""
    ) -> str:
@ -79,10 +79,10 @@ class BaseWriteAnalysisCode(Action):

 class WriteCodeByGenerate(BaseWriteAnalysisCode):
    """Write code fully by generation"""
-
+    
    def __init__(self, name: str = "", context=None, llm=None) -> str:
        super().__init__(name, context, llm)
-
+    
    async def run(
            self,
            context: [List[Message]],
@ -99,15 +99,15 @@ class WriteCodeByGenerate(BaseWriteAnalysisCode):

 class WriteCodeWithTools(BaseWriteAnalysisCode):
    """Write code with help of local available tools. Choose tools first, then generate code to use the tools"""
-
+    
    def __init__(self, name: str = "", context=None, llm=None, schema_path=None):
        super().__init__(name, context, llm)
        self.schema_path = schema_path
        self.available_tools = {}
-
+        
        if self.schema_path is not None:
            self._load_tools(schema_path)
-
+    
    def _load_tools(self, schema_path):
        """Load tools from yaml file"""
        yml_files = schema_path.glob("*.yml")
@ -115,7 +115,7 @@ class WriteCodeWithTools(BaseWriteAnalysisCode):
            module = yml_file.stem
            with open(yml_file, "r", encoding="utf-8") as f:
                self.available_tools[module] = yaml.safe_load(f)
-
+    
    def _parse_recommend_tools(self, module: str, recommend_tools: list) -> dict:
        """
        Parses and validates a list of recommended tools, and retrieves their schema from registry.
@ -132,15 +132,15 @@ class WriteCodeWithTools(BaseWriteAnalysisCode):
        for tool in recommend_tools:
            if tool in available_tools:
                valid_tools.append(tool)
-
+        
        tool_catalog = {tool: self.available_tools[module][tool] for tool in valid_tools}
        return tool_catalog
-
+    
    async def _tool_recommendation(
-        self,
-        task: str,
-        code_steps: str,
-        available_tools: dict,
+            self,
+            task: str,
+            code_steps: str,
+            available_tools: dict,
    ) -> list:
        """
        Recommend tools for the specified task.
@ -162,26 +162,26 @@ class WriteCodeWithTools(BaseWriteAnalysisCode):
        rsp = await self.llm.aask_code(prompt, **tool_config)
        recommend_tools = rsp["recommend_tools"]
        return recommend_tools
-
+    
    async def run(
-        self,
-        context: List[Message],
-        plan: Plan = None,
-        code_steps: str = "",
-        column_info: str = "",
-        **kwargs,
+            self,
+            context: List[Message],
+            plan: Plan = None,
+            code_steps: str = "",
+            column_info: str = "",
+            **kwargs,
    ) -> Tuple[List[Message], str]:
        task_type = plan.current_task.task_type
        available_tools = self.available_tools.get(task_type, {})
        special_prompt = ML_SPECIFIC_PROMPT.get(task_type, "")
-
+        
        finished_tasks = plan.get_finished_tasks()
        code_context = [remove_comments(task.code) for task in finished_tasks]
        code_context = "\n\n".join(code_context)
-
+        
        if len(available_tools) > 0:
            available_tools = {k: v["description"] for k, v in available_tools.items()}
-
+            
            recommend_tools = await self._tool_recommendation(
                plan.current_task.instruction,
                code_steps,
@ -189,8 +189,9 @@ class WriteCodeWithTools(BaseWriteAnalysisCode):
            )
            tool_catalog = self._parse_recommend_tools(task_type, recommend_tools)
            logger.info(f"Recommended tools: \n{recommend_tools}")
-
+            
            module_name = ML_MODULE_MAP[task_type]
+            
            prompt = TOOL_USAGE_PROMPT.format(
                user_requirement=plan.goal,
                history_code=code_context,
@ -201,6 +202,8 @@ class WriteCodeWithTools(BaseWriteAnalysisCode):
                module_name=module_name,
                tool_catalog=tool_catalog,
            )
+        
+        
        else:
            prompt = GENERATE_CODE_PROMPT.format(
                user_requirement=plan.goal,
@ -210,7 +213,7 @@ class WriteCodeWithTools(BaseWriteAnalysisCode):
                special_prompt=special_prompt,
                code_steps=code_steps,
            )
-
+        
        tool_config = create_func_config(CODE_GENERATOR_WITH_TOOLS)
        rsp = await self.llm.aask_code(prompt, **tool_config)
        context = [Message(content=prompt, role="user")]
--- a/metagpt/actions/write_code_steps.py
+++ b/metagpt/actions/write_code_steps.py
@ -44,7 +44,7 @@ The output plan should following the subsequent principles:
 1.The plan is a rough checklist of steps outlining the entire program's structure.Try to keep the number of steps fewer than 5.
 2.The steps should be written concisely and at a high level, avoiding overly detailed implementation specifics.
 3.The execution of the plan happens sequentially, but the plan can incorporate conditional (if) and looping(loop) keywords for more complex structures.
-4.Follow the code logic to design and provide the code steps. You can analysis it step by step
+4.Design and provide code steps by following the code logic. Analyze the provided code step by step and reuse the imported library.

 Output the code steps in a JSON format, as shown in this example:
 ```json
@ -101,11 +101,12 @@ class WriteCodeSteps(Action):
    def get_context(self, plan: Plan):
        user_requirement = plan.goal
        # select_task_keys = ['task_id', 'instruction', 'is_finished', 'code']
-        select_task_keys = ['task_id','instruction']
+        # select_task_keys = ['task_id','instruction']
        
        def process_task(task):
            task_dict = task.dict()
-            ptask = {k: task_dict[k] for k in task_dict if k in select_task_keys }
+            # ptask = {k: task_dict[k] for k in task_dict if k in select_task_keys }
+            ptask = f"task_id_{task_dict['task_id']}:{task_dict['instruction']}\n"
            return ptask
        
        
--- a/metagpt/prompts/ml_engineer.py
+++ b/metagpt/prompts/ml_engineer.py
@ -231,8 +231,8 @@ for col in num_cols:
 # Constraints:
 - Prioritize using pre-defined tools for the same functionality.
 - Copy DataFrame before processing if needed.
- If 'Code Steps' contains step done in 'Done Tasks', such as reading data, don't repeat it.
 """
+#- If 'Code Steps' contains step done in 'Done Tasks', such as reading data, don't repeat it.

 DATA_PREPROCESS_PROMPT = """
 The current task is about data preprocessing, please note the following: