diff --git a/metagpt/actions/__init__.py b/metagpt/actions/__init__.py index c8c966c3d..3f88fbcf3 100644 --- a/metagpt/actions/__init__.py +++ b/metagpt/actions/__init__.py @@ -22,7 +22,7 @@ from metagpt.actions.write_code_review import WriteCodeReview from metagpt.actions.write_prd import WritePRD from metagpt.actions.write_prd_review import WritePRDReview from metagpt.actions.write_test import WriteTest -from metagpt.actions.execute_code import ExecutePyCode +from metagpt.actions.execute_nb_code import ExecuteNbCode from metagpt.actions.write_analysis_code import WriteCodeByGenerate from metagpt.actions.write_plan import WritePlan @@ -45,7 +45,7 @@ class ActionType(Enum): COLLECT_LINKS = CollectLinks WEB_BROWSE_AND_SUMMARIZE = WebBrowseAndSummarize CONDUCT_RESEARCH = ConductResearch - EXECUTE_PYCODE = ExecutePyCode + EXECUTE_NB_CODE = ExecuteNbCode WRITE_CODE_BY_GENERATE = WriteCodeByGenerate WRITE_PLAN = WritePlan diff --git a/metagpt/actions/ask_review.py b/metagpt/actions/ask_review.py index a20395104..25b4314fe 100644 --- a/metagpt/actions/ask_review.py +++ b/metagpt/actions/ask_review.py @@ -1,4 +1,4 @@ -from typing import List +from typing import Tuple from metagpt.actions import Action from metagpt.logs import logger @@ -8,22 +8,24 @@ from metagpt.schema import Message, Plan class ReviewConst: TASK_REVIEW_TRIGGER = "task" CODE_REVIEW_TRIGGER = "code" - CONTINUE_WORD = ["confirm", "continue", "c", "yes", "y"] - CHANGE_WORD = ["change"] - EXIT_WORD = ["exit"] + CONTINUE_WORDS = ["confirm", "continue", "c", "yes", "y"] + CHANGE_WORDS = ["change"] + EXIT_WORDS = ["exit"] TASK_REVIEW_INSTRUCTION = ( - f"If you want to change, add, delete a task or merge tasks in the plan, say '{CHANGE_WORD[0]} task task_id or current task, ... (things to change)' " - f"If you confirm the output from the current task and wish to continue, type: {CONTINUE_WORD[0]}" + f"If you want to change, add, delete a task or merge tasks in the plan, say '{CHANGE_WORDS[0]} task task_id or current task, ... (things to change)' " + f"If you confirm the output from the current task and wish to continue, type: {CONTINUE_WORDS[0]}" ) CODE_REVIEW_INSTRUCTION = ( - f"If you want the codes to be rewritten, say '{CHANGE_WORD[0]} ... (your change advice)' " - f"If you want to leave it as is, type: {CONTINUE_WORD[0]} or {CONTINUE_WORD[1]}" + f"If you want the codes to be rewritten, say '{CHANGE_WORDS[0]} ... (your change advice)' " + f"If you want to leave it as is, type: {CONTINUE_WORDS[0]} or {CONTINUE_WORDS[1]}" ) - EXIT_INSTRUCTION = f"If you want to terminate the process, type: {EXIT_WORD[0]}" + EXIT_INSTRUCTION = f"If you want to terminate the process, type: {EXIT_WORDS[0]}" class AskReview(Action): - async def run(self, context: List[Message] = [], plan: Plan = None, trigger: str = "task"): + async def run( + self, context: list[Message] = [], plan: Plan = None, trigger: str = ReviewConst.TASK_REVIEW_TRIGGER + ) -> Tuple[str, bool]: if plan: logger.info("Current overall plan:") logger.info( @@ -32,7 +34,7 @@ class AskReview(Action): ) ) - logger.info("most recent context:") + logger.info("Most recent context:") latest_action = context[-1].cause_by if context and context[-1].cause_by else "" review_instruction = ( ReviewConst.TASK_REVIEW_INSTRUCTION @@ -48,11 +50,11 @@ class AskReview(Action): rsp = input(prompt) - if rsp.lower() in ReviewConst.EXIT_WORD: + if rsp.lower() in ReviewConst.EXIT_WORDS: exit() # Confirmation can be one of "confirm", "continue", "c", "yes", "y" exactly, or sentences containing "confirm". # One could say "confirm this task, but change the next task to ..." - confirmed = rsp.lower() in ReviewConst.CONTINUE_WORD or ReviewConst.CONTINUE_WORD[0] in rsp.lower() + confirmed = rsp.lower() in ReviewConst.CONTINUE_WORDS or ReviewConst.CONTINUE_WORDS[0] in rsp.lower() return rsp, confirmed diff --git a/metagpt/actions/debug_code.py b/metagpt/actions/debug_code.py index 121c126c4..d63fa3396 100644 --- a/metagpt/actions/debug_code.py +++ b/metagpt/actions/debug_code.py @@ -3,7 +3,7 @@ from typing import List from metagpt.actions.write_analysis_code import BaseWriteAnalysisCode from metagpt.logs import logger from metagpt.schema import Message -from metagpt.utils.common import create_func_config +from metagpt.utils.common import create_func_call_config DEBUG_REFLECTION_EXAMPLE = ''' Example 1: @@ -100,7 +100,7 @@ class DebugCode(BaseWriteAnalysisCode): info.append(Message(role="system", content=system_prompt)) info.append(Message(role="user", content=reflection_prompt)) - resp = await self.llm.aask_code(messages=info, **create_func_config(CODE_REFLECTION)) + resp = await self.llm.aask_code(messages=info, **create_func_call_config(CODE_REFLECTION)) logger.info(f"reflection is {resp}") return resp diff --git a/metagpt/actions/execute_code.py b/metagpt/actions/execute_nb_code.py similarity index 94% rename from metagpt/actions/execute_code.py rename to metagpt/actions/execute_nb_code.py index 6a4a9abb8..7dfbecb5c 100644 --- a/metagpt/actions/execute_code.py +++ b/metagpt/actions/execute_nb_code.py @@ -7,7 +7,6 @@ import asyncio import re import traceback -from abc import ABC, abstractmethod from pathlib import Path from typing import Any, Dict, List, Tuple, Union @@ -28,30 +27,8 @@ from metagpt.logs import logger from metagpt.schema import Message -class ExecuteCode(ABC): - @abstractmethod - async def build(self): - """build code executor""" - ... - - @abstractmethod - async def run(self, code: str): - """run code""" - ... - - @abstractmethod - async def terminate(self): - """terminate executor""" - ... - - @abstractmethod - async def reset(self): - """reset executor""" - ... - - -class ExecutePyCode(ExecuteCode, Action): - """execute code, return result to llm, and display it.""" +class ExecuteNbCode(Action): + """execute notebook code block, return result to llm, and display it.""" nb: Any nb_client: Any diff --git a/metagpt/actions/ml_action.py b/metagpt/actions/ml_action.py index a61233e5a..d419026fa 100644 --- a/metagpt/actions/ml_action.py +++ b/metagpt/actions/ml_action.py @@ -11,7 +11,7 @@ from metagpt.prompts.ml_action import ( ) from metagpt.prompts.write_analysis_code import CODE_GENERATOR_WITH_TOOLS from metagpt.schema import Message, Plan -from metagpt.utils.common import CodeParser, create_func_config, remove_comments +from metagpt.utils.common import CodeParser, create_func_call_config, remove_comments class WriteCodeWithToolsML(WriteCodeWithTools): @@ -52,7 +52,7 @@ class WriteCodeWithToolsML(WriteCodeWithTools): tool_type_usage_prompt=tool_type_usage_prompt, code_steps=code_steps, ) - tool_config = create_func_config(CODE_GENERATOR_WITH_TOOLS) + tool_config = create_func_call_config(CODE_GENERATOR_WITH_TOOLS) rsp = await self.llm.aask_code(prompt, **tool_config) # Extra output to be used for potential debugging @@ -97,6 +97,6 @@ class UpdateDataColumns(Action): code_context = [remove_comments(task.code) for task in finished_tasks] code_context = "\n\n".join(code_context) prompt = UPDATE_DATA_COLUMNS.format(history_code=code_context) - tool_config = create_func_config(PRINT_DATA_COLUMNS) + tool_config = create_func_call_config(PRINT_DATA_COLUMNS) rsp = await self.llm.aask_code(prompt, **tool_config) return rsp diff --git a/metagpt/actions/write_analysis_code.py b/metagpt/actions/write_analysis_code.py index 5cea9fe51..bf00e8ed1 100644 --- a/metagpt/actions/write_analysis_code.py +++ b/metagpt/actions/write_analysis_code.py @@ -2,9 +2,9 @@ """ @Date : 2023/11/20 13:19:39 @Author : orange-crow -@File : write_code_v2.py +@File : write_analysis_code.py """ -from typing import Dict, List, Tuple, Union +from typing import Dict, Tuple, Union from metagpt.actions import Action from metagpt.logs import logger @@ -17,14 +17,14 @@ from metagpt.prompts.write_analysis_code import ( from metagpt.schema import Message, Plan from metagpt.tools import TOOL_REGISTRY from metagpt.tools.tool_registry import validate_tool_names -from metagpt.utils.common import create_func_config +from metagpt.utils.common import create_func_call_config class BaseWriteAnalysisCode(Action): DEFAULT_SYSTEM_MSG: str = """You are Code Interpreter, a world-class programmer that can complete any goal by executing code. Strictly follow the plan and generate code step by step. Each step of the code will be executed on the user's machine, and the user will provide the code execution results to you.**Notice: The code for the next step depends on the code for the previous step. Must reuse variables in the lastest other code directly, dont creat it again, it is very import for you. Use !pip install in a standalone block to install missing packages.Usually the libraries you need are already installed.Dont check if packages already imported.**""" # prompt reference: https://github.com/KillianLucas/open-interpreter/blob/v0.1.4/interpreter/system_message.txt # REUSE_CODE_INSTRUCTION = """ATTENTION: DONT include codes from previous tasks in your current code block, include new codes only, DONT repeat codes!""" - def process_msg(self, prompt: Union[str, List[Dict], Message, List[Message]], system_msg: str = None): + def process_msg(self, prompt: Union[str, list[Dict], Message, list[Message]], system_msg: str = None): default_system_msg = system_msg or self.DEFAULT_SYSTEM_MSG # 全部转成list if not isinstance(prompt, list): @@ -53,16 +53,17 @@ class BaseWriteAnalysisCode(Action): } return messages - async def run(self, context: List[Message], plan: Plan = None) -> dict: + async def run(self, context: list[Message], plan: Plan = None) -> dict: """Run of a code writing action, used in data analysis or modeling Args: - context (List[Message]): Action output history, source action denoted by Message.cause_by + context (list[Message]): Action output history, source action denoted by Message.cause_by plan (Plan, optional): Overall plan. Defaults to None. Returns: dict: code result in the format of {"code": "print('hello world')", "language": "python"} """ + raise NotImplementedError class WriteCodeByGenerate(BaseWriteAnalysisCode): @@ -70,7 +71,7 @@ class WriteCodeByGenerate(BaseWriteAnalysisCode): async def run( self, - context: [List[Message]], + context: [list[Message]], plan: Plan = None, system_msg: str = None, **kwargs, @@ -128,7 +129,7 @@ class WriteCodeWithTools(BaseWriteAnalysisCode): code_steps=code_steps, available_tools=available_tools, ) - tool_config = create_func_config(SELECT_FUNCTION_TOOLS) + tool_config = create_func_call_config(SELECT_FUNCTION_TOOLS) rsp = await self.llm.aask_code(prompt, **tool_config) recommend_tools = rsp["recommend_tools"] logger.info(f"Recommended tools: \n{recommend_tools}") @@ -169,7 +170,7 @@ class WriteCodeWithTools(BaseWriteAnalysisCode): async def run( self, - context: List[Message], + context: list[Message], plan: Plan, **kwargs, ) -> str: @@ -184,7 +185,7 @@ class WriteCodeWithTools(BaseWriteAnalysisCode): # prepare prompt & LLM call prompt = self.process_msg(context) - tool_config = create_func_config(CODE_GENERATOR_WITH_TOOLS) + tool_config = create_func_call_config(CODE_GENERATOR_WITH_TOOLS) rsp = await self.llm.aask_code(prompt, **tool_config) return rsp diff --git a/metagpt/actions/write_plan.py b/metagpt/actions/write_plan.py index 335a09841..77b52b78e 100644 --- a/metagpt/actions/write_plan.py +++ b/metagpt/actions/write_plan.py @@ -16,7 +16,7 @@ from metagpt.prompts.write_analysis_code import ( ) from metagpt.schema import Message, Plan, Task from metagpt.tools import TOOL_REGISTRY -from metagpt.utils.common import CodeParser, create_func_config +from metagpt.utils.common import CodeParser, create_func_call_config class WritePlan(Action): @@ -56,7 +56,7 @@ class WritePlan(Action): prompt = ASSIGN_TASK_TYPE_PROMPT.format( task_list=task_list, task_type_desc=task_type_desc ) # task types are set to be the same as tool types, for now - tool_config = create_func_config(ASSIGN_TASK_TYPE_CONFIG) + tool_config = create_func_call_config(ASSIGN_TASK_TYPE_CONFIG) rsp = await self.llm.aask_code(prompt, **tool_config) task_type_list = rsp["task_type"] print(f"assigned task types: {task_type_list}") diff --git a/metagpt/plan/planner.py b/metagpt/plan/planner.py index 0d8870fd3..6e866ec22 100644 --- a/metagpt/plan/planner.py +++ b/metagpt/plan/planner.py @@ -87,7 +87,11 @@ class Planner(BaseModel): await self.update_plan() async def ask_review( - self, task_result: TaskResult = None, auto_run: bool = None, trigger: str = ReviewConst.TASK_REVIEW_TRIGGER + self, + task_result: TaskResult = None, + auto_run: bool = None, + trigger: str = ReviewConst.TASK_REVIEW_TRIGGER, + review_context_len: int = 5, ): """ Ask to review the task result, reviewer needs to provide confirmation or request change. @@ -97,7 +101,9 @@ class Planner(BaseModel): auto_run = auto_run or self.auto_run if not auto_run: context = self.get_useful_memories() - review, confirmed = await AskReview().run(context=context[-5:], plan=self.plan, trigger=trigger) + review, confirmed = await AskReview().run( + context=context[-review_context_len:], plan=self.plan, trigger=trigger + ) if not confirmed: self.working_memory.add(Message(content=review, role="user", cause_by=AskReview)) return review, confirmed @@ -110,7 +116,7 @@ class Planner(BaseModel): self.working_memory.clear() confirmed_and_more = ( - ReviewConst.CONTINUE_WORD[0] in review.lower() and review.lower() not in ReviewConst.CONTINUE_WORD[0] + ReviewConst.CONTINUE_WORDS[0] in review.lower() and review.lower() not in ReviewConst.CONTINUE_WORDS[0] ) # "confirm, ... (more content, such as changing downstream tasks)" if confirmed_and_more: self.working_memory.add(Message(content=review, role="user", cause_by=AskReview)) diff --git a/metagpt/roles/code_interpreter.py b/metagpt/roles/code_interpreter.py index b4f9622d3..1ae4feec7 100644 --- a/metagpt/roles/code_interpreter.py +++ b/metagpt/roles/code_interpreter.py @@ -1,7 +1,7 @@ from pydantic import Field from metagpt.actions.ask_review import ReviewConst -from metagpt.actions.execute_code import ExecutePyCode +from metagpt.actions.execute_nb_code import ExecuteNbCode from metagpt.actions.write_analysis_code import WriteCodeByGenerate, WriteCodeWithTools from metagpt.logs import logger from metagpt.roles import Role @@ -11,7 +11,7 @@ from metagpt.schema import Message, Task, TaskResult class CodeInterpreter(Role): auto_run: bool = True use_tools: bool = False - execute_code: ExecutePyCode = Field(default_factory=ExecutePyCode, exclude=True) + execute_code: ExecuteNbCode = Field(default_factory=ExecuteNbCode, exclude=True) tools: list[str] = [] def __init__( @@ -59,7 +59,7 @@ class CodeInterpreter(Role): result, success = await self.execute_code.run(**code) print(result) - self.working_memory.add(Message(content=result, role="user", cause_by=ExecutePyCode)) + self.working_memory.add(Message(content=result, role="user", cause_by=ExecuteNbCode)) ### process execution result ### if "!pip" in code["code"]: @@ -70,7 +70,7 @@ class CodeInterpreter(Role): if not success and counter >= max_retry: logger.info("coding failed!") review, _ = await self.planner.ask_review(auto_run=False, trigger=ReviewConst.CODE_REVIEW_TRIGGER) - if ReviewConst.CHANGE_WORD[0] in review: + if ReviewConst.CHANGE_WORDS[0] in review: counter = 0 # redo the task again with help of human suggestions py_code = ( diff --git a/metagpt/roles/ml_engineer.py b/metagpt/roles/ml_engineer.py index e7abee560..19c34f62d 100644 --- a/metagpt/roles/ml_engineer.py +++ b/metagpt/roles/ml_engineer.py @@ -1,5 +1,5 @@ from metagpt.actions.debug_code import DebugCode -from metagpt.actions.execute_code import ExecutePyCode +from metagpt.actions.execute_nb_code import ExecuteNbCode from metagpt.actions.ml_action import UpdateDataColumns, WriteCodeWithToolsML from metagpt.logs import logger from metagpt.roles.code_interpreter import CodeInterpreter @@ -19,7 +19,7 @@ class MLEngineer(CodeInterpreter): return await super()._write_code() # In a trial and errors settings, check whether this is our first attempt to tackle the task. If there is no code execution before, then it is. - is_first_trial = any_to_str(ExecutePyCode) not in [msg.cause_by for msg in self.working_memory.get()] + is_first_trial = any_to_str(ExecuteNbCode) not in [msg.cause_by for msg in self.working_memory.get()] if is_first_trial: # For the first trial, write task code from scratch diff --git a/metagpt/utils/common.py b/metagpt/utils/common.py index 7d3d47680..55f4ce378 100644 --- a/metagpt/utils/common.py +++ b/metagpt/utils/common.py @@ -352,7 +352,7 @@ def parse_recipient(text): return "" -def create_func_config(func_schema: dict) -> dict: +def create_func_call_config(func_schema: dict) -> dict: """Create new function call config""" tools = [{"type": "function", "function": func_schema}] tool_choice = {"type": "function", "function": {"name": func_schema["name"]}} @@ -362,7 +362,7 @@ def create_func_config(func_schema: dict) -> dict: } -def remove_comments(code_str): +def remove_comments(code_str: str) -> str: """Remove comments from code.""" pattern = r"(\".*?\"|\'.*?\')|(\#.*?$)" diff --git a/metagpt/utils/recovery_util.py b/metagpt/utils/recovery_util.py index 3405b9587..d0b197e69 100644 --- a/metagpt/utils/recovery_util.py +++ b/metagpt/utils/recovery_util.py @@ -10,12 +10,13 @@ import nbformat from metagpt.const import DATA_PATH from metagpt.roles.role import Role +from metagpt.utils.common import read_json_file from metagpt.utils.save_code import save_code_file def load_history(save_dir: str = ""): """ - Load history from the specified save directory. + Load plan and code execution history from the specified save directory. Args: save_dir (str): The directory from which to load the history. @@ -26,14 +27,14 @@ def load_history(save_dir: str = ""): plan_path = Path(save_dir) / "plan.json" nb_path = Path(save_dir) / "history_nb" / "code.ipynb" - plan = json.load(open(plan_path, "r", encoding="utf-8")) + plan = read_json_file(plan_path) nb = nbformat.read(open(nb_path, "r", encoding="utf-8"), as_version=nbformat.NO_CONVERT) return plan, nb def save_history(role: Role, save_dir: str = ""): """ - Save history to the specified directory. + Save plan and code execution history to the specified directory. Args: role (Role): The role containing the plan and execute_code attributes. diff --git a/tests/metagpt/actions/test_execute_code.py b/tests/metagpt/actions/test_execute_code.py deleted file mode 100644 index 21627e6f9..000000000 --- a/tests/metagpt/actions/test_execute_code.py +++ /dev/null @@ -1,121 +0,0 @@ -import pytest - -from metagpt.actions.execute_code import ExecutePyCode, truncate - - -@pytest.mark.asyncio -async def test_code_running(): - pi = ExecutePyCode() - output = await pi.run("print('hello world!')") - assert output[1] is True - output = await pi.run({"code": "print('hello world!')", "language": "python"}) - assert output[1] is True - - -@pytest.mark.asyncio -async def test_split_code_running(): - pi = ExecutePyCode() - output = await pi.run("x=1\ny=2") - output = await pi.run("z=x+y") - output = await pi.run("assert z==3") - assert output[1] is True - - -@pytest.mark.asyncio -async def test_execute_error(): - pi = ExecutePyCode() - output = await pi.run("z=1/0") - assert output[1] is False - - -@pytest.mark.asyncio -async def test_plotting_code(): - pi = ExecutePyCode() - code = """ - import numpy as np - import matplotlib.pyplot as plt - - # 生成随机数据 - random_data = np.random.randn(1000) # 生成1000个符合标准正态分布的随机数 - - # 绘制直方图 - plt.hist(random_data, bins=30, density=True, alpha=0.7, color='blue', edgecolor='black') - - # 添加标题和标签 - plt.title('Histogram of Random Data') - plt.xlabel('Value') - plt.ylabel('Frequency') - - # 显示图形 - plt.show() - plt.close() - """ - output = await pi.run(code) - assert output[1] is True - - -def test_truncate(): - # 代码执行成功 - output, is_success = truncate("hello world", 5, True) - assert "Truncated to show only first 5 characters\nhello" in output - assert is_success - # 代码执行失败 - output, is_success = truncate("hello world", 5, False) - assert "Truncated to show only last 5 characters\nworld" in output - assert not is_success - # 异步 - output, is_success = truncate("