Merge branch 'dev' into dev_pipeline

2026-06-20 15:38:09 +02:00 · 2023-11-30 14:50:59 +08:00 · 2023-11-30 14:50:59 +08:00 · 5522742002
commit 5522742002
parent 3461b1b4c0 725dac2525
5 changed files with 351 additions and 30 deletions
--- a/metagpt/actions/write_analysis_code.py
+++ b/metagpt/actions/write_analysis_code.py
@ -4,25 +4,40 @@
@Author  :   orange-crow
@File    :   write_code_v2.py
 """
+import json
 from typing import Dict, List, Union

 from metagpt.actions import Action
+from metagpt.prompts.ml_engineer import (
+    TOOL_RECOMMENDATION_PROMPT,
+    SELECT_FUNCTION_TOOLS,
+    CODE_GENERATOR_WITH_TOOLS,
+    TOO_ORGANIZATION_PROMPT,
+    ML_SPECIFIC_PROMPT,
+    ML_MODULE_MAP,
+    TOOL_OUTPUT_DESC,
+)
 from metagpt.schema import Message, Plan
+from metagpt.tools.functions import registry
+from metagpt.utils.common import create_func_config
+

 class BaseWriteAnalysisCode(Action):
-
-    async def run(self, context: List[Message], plan: Plan = None, task_guide: str = "") -> str:
+    async def run(
+        self, context: List[Message], plan: Plan = None, task_guide: str = ""
+    ) -> str:
        """Run of a code writing action, used in data analysis or modeling

        Args:
            context (List[Message]): Action output history, source action denoted by Message.cause_by
            plan (Plan, optional): Overall plan. Defaults to None.
            task_guide (str, optional): suggested step breakdown for the current task. Defaults to "".
-        
+
        Returns:
            str: The code string.
        """

+
 class WriteCodeByGenerate(BaseWriteAnalysisCode):
    """Write code fully by generation"""
    DEFAULT_SYSTEM_MSG = """You are Code Interpreter, a world-class programmer that can complete any goal by executing code. Strictly follow the plan and generate code step by step. Each step of the code will be executed on the user's machine, and the user will provide the code execution results to you.**Notice: Use !pip install in a standalone block to install missing packages.**""" # prompt reference: https://github.com/KillianLucas/open-interpreter/blob/v0.1.4/interpreter/system_message.txt
@ -41,24 +56,38 @@ class WriteCodeByGenerate(BaseWriteAnalysisCode):
        messages = []
        for p in prompt:
            if isinstance(p, str):
-                messages.append({'role': 'user', 'content': p})
+                messages.append({"role": "user", "content": p})
            elif isinstance(p, dict):
                messages.append(p)
            elif isinstance(p, Message):
                if isinstance(p.content, str):
                    messages.append(p.to_dict())
-                elif isinstance(p.content, dict) and 'code' in p.content:
-                    messages.append(p.content['code'])
+                elif isinstance(p.content, dict) and "code" in p.content:
+                    messages.append(p.content["code"])

        # 添加默认的提示词
-        if default_system_msg not in messages[0]['content'] and messages[0]['role'] != 'system':
-            messages.insert(0, {'role': 'system', 'content': default_system_msg})
-        elif default_system_msg not in messages[0]['content'] and messages[0]['role'] == 'system':
-            messages[0] = {'role': 'system', 'content': messages[0]['content']+default_system_msg}
+        if (
+            default_system_msg not in messages[0]["content"]
+            and messages[0]["role"] != "system"
+        ):
+            messages.insert(0, {"role": "system", "content": default_system_msg})
+        elif (
+            default_system_msg not in messages[0]["content"]
+            and messages[0]["role"] == "system"
+        ):
+            messages[0] = {
+                "role": "system",
+                "content": messages[0]["content"] + default_system_msg,
+            }
        return messages

    async def run(
-        self, context: [List[Message]], plan: Plan = None, task_guide: str = "", system_msg: str = None, **kwargs
+        self,
+        context: [List[Message]],
+        plan: Plan = None,
+        task_guide: str = "",
+        system_msg: str = None,
+        **kwargs,
    ) -> str:
        context.append(Message(content=self.REUSE_CODE_INSTRUCTION, role="user"))
        prompt = self.process_msg(context, system_msg)
@ -69,5 +98,99 @@ class WriteCodeByGenerate(BaseWriteAnalysisCode):
 class WriteCodeWithTools(BaseWriteAnalysisCode):
    """Write code with help of local available tools. Choose tools first, then generate code to use the tools"""

-    async def run(self, context: List[Message], plan: Plan = None, task_guide: str = "") -> str:
-        return "print('abc')"
+    @staticmethod
+    def _parse_recommend_tools(module: str, recommend_tools: list) -> str:
+        """
+        Converts recommended tools to a JSON string and checks tool availability in the registry.
+
+        Args:
+            module (str): The module name for querying tools in the registry.
+            recommend_tools (list): A list of lists of recommended tools for each step.
+
+        Returns:
+            str: A JSON string with available tools and their schemas for each step.
+        """
+        valid_tools = {}
+        available_tools = registry.get_all_by_module(module).keys()
+        for index, tools in enumerate(recommend_tools):
+            key = f"Step {index + 1}"
+            tools = [tool for tool in tools if tool in available_tools]
+            valid_tools[key] = registry.get_schemas(module, tools)
+        return json.dumps(valid_tools)
+
+    async def _tool_recommendation(
+        self, task: str, data_desc: str, code_steps: str, available_tools: list
+    ) -> list:
+        """
+        Recommend tools for each step of the specified task
+
+        Args:
+            task (str): the task description
+            data_desc (str): the description of the dataset for the task
+            code_steps (str): the code steps to generate the full code for the task
+            available_tools (list): the available tools for the task
+
+        Returns:
+            list: recommended tools for each step of the specified task
+        """
+        prompt = TOOL_RECOMMENDATION_PROMPT.format(
+            task=task,
+            data_desc=data_desc,
+            code_steps=code_steps,
+            available_tools=available_tools,
+        )
+        tool_config = create_func_config(SELECT_FUNCTION_TOOLS)
+        rsp = await self.llm.aask_code(prompt, **tool_config)
+        recommend_tools = rsp["recommend_tools"]
+        return recommend_tools
+
+    async def run(
+        self,
+        context: List[Message],
+        plan: Plan = None,
+        task_guide: str = "",
+        data_desc: str = "",
+    ) -> str:
+        task_type = plan.current_task.task_type
+        task = plan.current_task.instruction
+        available_tools = registry.get_all_schema_by_module(task_type)
+        available_tools = [
+            {k: tool[k] for k in ["name", "description"] if k in tool}
+            for tool in available_tools
+        ]
+        task_guide = "\n".join(
+            [f"Step {step.strip()}" for step in task_guide.split("\n")]
+        )
+
+        recommend_tools = await self._tool_recommendation(
+            task, task_guide, available_tools
+        )
+        recommend_tools = self._parse_recommend_tools(task_type, recommend_tools)
+
+        special_prompt = ML_SPECIFIC_PROMPT.get(task_type, "")
+        module_name = ML_MODULE_MAP[task_type]
+        output_desc = TOOL_OUTPUT_DESC.get(task_type, "")
+        all_tasks = ""
+        completed_code = ""
+
+        for i, task in enumerate(plan.tasks):
+            stats = "DONE" if task.is_finished else "TODO"
+            all_tasks += f"Subtask {task.task_id}: {task.instruction}({stats})\n"
+
+        for task in plan.tasks:
+            if task.code:
+                completed_code += task.code + "\n"
+
+        prompt = TOO_ORGANIZATION_PROMPT.format(
+            all_tasks=all_tasks,
+            completed_code=completed_code,
+            data_desc=data_desc,
+            special_prompt=special_prompt,
+            code_steps=task_guide,
+            module_name=module_name,
+            output_desc=output_desc,
+            available_tools=recommend_tools,
+        )
+        tool_config = create_func_config(CODE_GENERATOR_WITH_TOOLS)
+        rsp = await self.llm.aask_code(prompt, **tool_config)
+        return rsp["code"]