From fdef9c8547d743d41116d8bcf16fb3dd38b13e2d Mon Sep 17 00:00:00 2001 From: yzlin Date: Fri, 24 Nov 2023 14:05:11 +0800 Subject: [PATCH] add more components in pipeline --- metagpt/actions/__init__.py | 4 +- ...ode_function.py => write_analysis_code.py} | 22 ++++-- metagpt/actions/write_plan.py | 6 +- metagpt/roles/ml_engineer.py | 70 +++++++++++++------ metagpt/schema.py | 1 + ...unction.py => test_write_analysis_code.py} | 8 +-- tests/metagpt/test_schema.py | 12 ++-- 7 files changed, 82 insertions(+), 41 deletions(-) rename metagpt/actions/{write_code_function.py => write_analysis_code.py} (76%) rename tests/metagpt/actions/{test_write_code_function.py => test_write_analysis_code.py} (86%) diff --git a/metagpt/actions/__init__.py b/metagpt/actions/__init__.py index ba2170cbd..5055ce276 100644 --- a/metagpt/actions/__init__.py +++ b/metagpt/actions/__init__.py @@ -24,7 +24,7 @@ from metagpt.actions.write_prd import WritePRD from metagpt.actions.write_prd_review import WritePRDReview from metagpt.actions.write_test import WriteTest from metagpt.actions.execute_code import ExecutePyCode -from metagpt.actions.write_code_function import WriteCodeFunction +from metagpt.actions.write_analysis_code import WriteCodeByGenerate from metagpt.actions.write_plan import WritePlan @@ -49,7 +49,7 @@ class ActionType(Enum): WEB_BROWSE_AND_SUMMARIZE = WebBrowseAndSummarize CONDUCT_RESEARCH = ConductResearch EXECUTE_PYCODE = ExecutePyCode - WRITE_CODE_FUNCTION = WriteCodeFunction + WRITE_CODE_BY_GENERATE = WriteCodeByGenerate WRITE_PLAN = WritePlan diff --git a/metagpt/actions/write_code_function.py b/metagpt/actions/write_analysis_code.py similarity index 76% rename from metagpt/actions/write_code_function.py rename to metagpt/actions/write_analysis_code.py index 4ec565eb1..84922ada4 100644 --- a/metagpt/actions/write_code_function.py +++ b/metagpt/actions/write_analysis_code.py @@ -11,17 +11,20 @@ from metagpt.schema import Message, Plan class BaseWriteAnalysisCode(Action): - async def run(self, context: List[Message], plan: Plan = None, task_guidance: str = ""): + async def run(self, context: List[Message], plan: Plan = None, task_guide: str = "") -> str: """Run of a code writing action, used in data analysis or modeling Args: context (List[Message]): Action output history, source action denoted by Message.cause_by plan (Plan, optional): Overall plan. Defaults to None. - task_guidance (str, optional): suggested step breakdown for the current task. Defaults to "". + task_guide (str, optional): suggested step breakdown for the current task. Defaults to "". + + Returns: + str: The code string. """ -class WriteCodeFunction(BaseWriteAnalysisCode): - """Use openai function to generate code.""" +class WriteCodeByGenerate(BaseWriteAnalysisCode): + """Write code fully by generation""" def __init__(self, name: str = "", context=None, llm=None) -> str: super().__init__(name, context, llm) @@ -54,8 +57,15 @@ class WriteCodeFunction(BaseWriteAnalysisCode): return prompt async def run( - self, context: [List[Message]], plan: Plan = None, task_guidance: str = "", system_msg: str = None, **kwargs + self, context: [List[Message]], plan: Plan = None, task_guide: str = "", system_msg: str = None, **kwargs ) -> str: prompt = self.process_msg(context, system_msg) code_content = await self.llm.aask_code(prompt, **kwargs) - return code_content + return code_content["code"] + + +class WriteCodeWithTools(BaseWriteAnalysisCode): + """Write code with help of local available tools. Choose tools first, then generate code to use the tools""" + + async def run(self, context: List[Message], plan: Plan = None, task_guide: str = "") -> str: + return "print('abc')" diff --git a/metagpt/actions/write_plan.py b/metagpt/actions/write_plan.py index 48cb1aad5..e35ba7a92 100644 --- a/metagpt/actions/write_plan.py +++ b/metagpt/actions/write_plan.py @@ -9,6 +9,7 @@ import json from metagpt.actions import Action from metagpt.schema import Message, Task +from metagpt.utils.common import CodeParser class WritePlan(Action): PROMPT_TEMPLATE = """ @@ -20,14 +21,16 @@ class WritePlan(Action): Based on the context, write a plan or modify an existing plan of what you should do to achieve the goal. A plan consists of one to __max_tasks__ tasks. If you are modifying an existing plan, carefully follow the instruction, don't make unnecessary changes. Output a list of jsons following the format: + ```json [ { - "task_id": str = "unique identifier for a task in plan, can be a ordinal", + "task_id": str = "unique identifier for a task in plan, can be an ordinal", "dependent_task_ids": list[str] = "ids of tasks prerequisite to this task", "instruction": "what you should do in this task, one short phrase or sentence", }, ... ] + ``` """ async def run(self, context: List[Message], current_plan: str = "", max_tasks: int = 5) -> str: prompt = ( @@ -35,6 +38,7 @@ class WritePlan(Action): .replace("__current_plan__", current_plan).replace("__max_tasks__", str(max_tasks)) ) rsp = await self._aask(prompt) + rsp = CodeParser.parse_code(block=None, text=rsp) return rsp @staticmethod diff --git a/metagpt/roles/ml_engineer.py b/metagpt/roles/ml_engineer.py index c795bda11..480f6cecf 100644 --- a/metagpt/roles/ml_engineer.py +++ b/metagpt/roles/ml_engineer.py @@ -9,30 +9,41 @@ from metagpt.actions import Action from metagpt.schema import Message, Task, Plan from metagpt.logs import logger from metagpt.actions.write_plan import WritePlan -from metagpt.actions.write_code_function import WriteCodeFunction +from metagpt.actions.write_analysis_code import WriteCodeByGenerate, WriteCodeWithTools from metagpt.actions.execute_code import ExecutePyCode class AskReview(Action): async def run(self, context: List[Message], plan: Plan = None): - prompt = "\n".join( - [f"{msg.cause_by() if msg.cause_by else 'Main Requirement'}: {msg.content}" for msg in context] - ) + logger.info("Current overall plan:") + logger.info("\n".join([f"{task.task_id}: {task.instruction}" for task in plan.tasks])) - latest_action = context[-1].cause_by() - - prompt += f"\nPlease review output from {latest_action}, " \ - "provide feedback or type YES to continue with the process:\n" + logger.info("most recent context:") + # prompt = "\n".join( + # [f"{msg.cause_by.__name__ if msg.cause_by else 'Main Requirement'}: {msg.content}" for msg in context] + # ) + prompt = "" + latest_action = context[-1].cause_by.__name__ + prompt += f"\nPlease review output from {latest_action}:\n" \ + "If you want to change a task in the plan, say 'change task task_id, ... (things to change)'\n" \ + "If you confirm the output and wish to continue with the current process, type CONFIRM:\n" rsp = input(prompt) - confirmed = "yes" in rsp.lower() + confirmed = "confirm" in rsp.lower() + return rsp, confirmed +class WriteTaskGuide(Action): + + async def run(self, task_instruction: str, data_desc: str = "") -> str: + return "" class MLEngineer(Role): - def __init__(self, name="ABC", profile="MLEngineer"): - super().__init__(name=name, profile=profile) + def __init__(self, name="ABC", profile="MLEngineer", goal=""): + super().__init__(name=name, profile=profile, goal=goal) self._set_react_mode(react_mode="plan_and_act") - self.plan = Plan() + self.plan = Plan(goal=goal) + self.use_tools = False + self.use_task_guide = False async def _plan_and_act(self): @@ -60,18 +71,28 @@ class MLEngineer(Role): await self._update_plan() async def _write_and_exec_code(self, max_retry: int = 3): + + task_guide = await WriteTaskGuide().run(self.plan.current_task.instruction) if self.use_task_guide else "" + counter = 0 success = False while not success and counter < max_retry: - context = self.get_memories() + context = self.get_useful_memories() - code = "print('abc')" - # code = await WriteCodeFunction().run(context=context) - # code = await WriteCodeWithOps.run(context, task, result) - self._rc.memory.add(Message(content=code, role="assistant", cause_by=WriteCodeFunction)) + if not self.use_tools: + # code = "print('abc')" + code = await WriteCodeByGenerate().run(context=context, plan=self.plan, task_guide=task_guide) + cause_by = WriteCodeByGenerate + + else: + code = await WriteCodeWithTools().run(context=context, plan=self.plan, task_guide=task_guide) + cause_by = WriteCodeWithTools + + self._rc.memory.add(Message(content=code, role="assistant", cause_by=cause_by)) result, success = await ExecutePyCode().run(code) - self._rc.memory.add(Message(content=result, role="assistant", cause_by=ExecutePyCode)) + print(result) + self._rc.memory.add(Message(content=result, role="user", cause_by=ExecutePyCode)) # if not success: # await self._ask_review() @@ -81,16 +102,16 @@ class MLEngineer(Role): return code, result, success async def _ask_review(self): - context = self.get_memories() + context = self.get_useful_memories() review, confirmed = await AskReview().run(context=context[-5:], plan=self.plan) - self._rc.memory.add(Message(content=review, role="assistant", cause_by=AskReview)) + self._rc.memory.add(Message(content=review, role="user", cause_by=AskReview)) return confirmed async def _update_plan(self, max_tasks: int = 3): current_plan = str([task.json() for task in self.plan.tasks]) plan_confirmed = False while not plan_confirmed: - context = self.get_memories() + context = self.get_useful_memories() rsp = await WritePlan().run(context, current_plan=current_plan, max_tasks=max_tasks) self._rc.memory.add(Message(content=rsp, role="assistant", cause_by=WritePlan)) plan_confirmed = await self._ask_review() @@ -98,13 +119,18 @@ class MLEngineer(Role): tasks = WritePlan.rsp_to_tasks(rsp) self.plan.add_tasks(tasks) + def get_useful_memories(self, current_task_memories: List[str] = []) -> List[Message]: + """find useful memories only to reduce context length and improve performance""" + memories = super().get_memories() + return memories + if __name__ == "__main__": # requirement = "create a normal distribution and visualize it" requirement = "run some analysis on iris dataset" async def main(requirement: str = requirement): - role = MLEngineer() + role = MLEngineer(goal=requirement) await role.run(requirement) fire.Fire(main) diff --git a/metagpt/schema.py b/metagpt/schema.py index 3cd7d9730..e39f54a0c 100644 --- a/metagpt/schema.py +++ b/metagpt/schema.py @@ -84,6 +84,7 @@ class Task(BaseModel): class Plan(BaseModel): + goal: str tasks: list[Task] = [] task_map: dict[str, Task] = {} current_task_id = "" diff --git a/tests/metagpt/actions/test_write_code_function.py b/tests/metagpt/actions/test_write_analysis_code.py similarity index 86% rename from tests/metagpt/actions/test_write_code_function.py rename to tests/metagpt/actions/test_write_analysis_code.py index 4ff1a63c4..41c0479a9 100644 --- a/tests/metagpt/actions/test_write_code_function.py +++ b/tests/metagpt/actions/test_write_analysis_code.py @@ -1,12 +1,12 @@ import pytest -from metagpt.actions.write_code_function import WriteCodeFunction +from metagpt.actions.write_analysis_code import WriteCodeByGenerate from metagpt.actions.execute_code import ExecutePyCode @pytest.mark.asyncio async def test_write_code(): - write_code = WriteCodeFunction() + write_code = WriteCodeByGenerate() code = await write_code.run("Write a hello world code.") assert "language" in code.content assert "code" in code.content @@ -15,7 +15,7 @@ async def test_write_code(): @pytest.mark.asyncio async def test_write_code_by_list_prompt(): - write_code = WriteCodeFunction() + write_code = WriteCodeByGenerate() msg = ["a=[1,2,5,10,-10]", "写出求a中最大值的代码python"] code = await write_code.run(msg) assert "language" in code.content @@ -25,7 +25,7 @@ async def test_write_code_by_list_prompt(): @pytest.mark.asyncio async def test_write_code_by_list_plan(): - write_code = WriteCodeFunction() + write_code = WriteCodeByGenerate() execute_code = ExecutePyCode() messages = [] plan = ["随机生成一个pandas DataFrame时间序列", "绘制这个时间序列的直方图", "求均值"] diff --git a/tests/metagpt/test_schema.py b/tests/metagpt/test_schema.py index 6aae82006..8f65d3785 100644 --- a/tests/metagpt/test_schema.py +++ b/tests/metagpt/test_schema.py @@ -24,7 +24,7 @@ def test_messages(): class TestPlan: def test_add_tasks_ordering(self): - plan = Plan() + plan = Plan(goal="") tasks = [ Task(task_id="1", dependent_task_ids=["2", "3"], instruction="Third"), @@ -36,7 +36,7 @@ class TestPlan: assert [task.task_id for task in plan.tasks] == ["2", "3", "1"] def test_add_tasks_to_existing_no_common_prefix(self): - plan = Plan() + plan = Plan(goal="") tasks = [ Task(task_id="1", dependent_task_ids=["2", "3"], instruction="Third"), @@ -52,7 +52,7 @@ class TestPlan: assert not plan.tasks[0].is_finished # must be the new unfinished task def test_add_tasks_to_existing_with_common_prefix(self): - plan = Plan() + plan = Plan(goal="") tasks = [ Task(task_id="1", dependent_task_ids=["2", "3"], instruction="Third"), @@ -75,7 +75,7 @@ class TestPlan: assert plan.current_task_id == "4" def test_current_task(self): - plan = Plan() + plan = Plan(goal="") tasks = [ Task(task_id="1", dependent_task_ids=["2"], instruction="Second"), Task(task_id="2", instruction="First") @@ -84,7 +84,7 @@ class TestPlan: assert plan.current_task.task_id == "2" def test_finish_task(self): - plan = Plan() + plan = Plan(goal="") tasks = [ Task(task_id="1", instruction="First"), Task(task_id="2", dependent_task_ids=["1"], instruction="Second") @@ -94,7 +94,7 @@ class TestPlan: assert plan.current_task.task_id == "2" def test_finished_tasks(self): - plan = Plan() + plan = Plan(goal="") tasks = [ Task(task_id="1", instruction="First"), Task(task_id="2", dependent_task_ids=["1"], instruction="Second")