From cc523e43fc9261ce2769c00aa658a05b1f121f7a Mon Sep 17 00:00:00 2001 From: garylin2099 Date: Fri, 26 Jul 2024 18:44:28 +0800 Subject: [PATCH 1/5] remove unnecessary DA tools & use clearer response at mgx_env --- metagpt/environment/mgx/mgx_env.py | 5 +++-- tests/metagpt/environment/mgx_env/run_mgx_env.py | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/metagpt/environment/mgx/mgx_env.py b/metagpt/environment/mgx/mgx_env.py index 3a492b3ac..873358252 100644 --- a/metagpt/environment/mgx/mgx_env.py +++ b/metagpt/environment/mgx/mgx_env.py @@ -87,11 +87,12 @@ class MGXEnv(Environment): async def ask_human(self, question: str, sent_from: Role = None) -> str: # NOTE: Can be overwritten in remote setting - return await get_human_input(question) + rsp = await get_human_input(question) + return "Human response: " + rsp async def reply_to_human(self, content: str, sent_from: Role = None) -> str: # NOTE: Can be overwritten in remote setting - return "The monitor has verified the message, confirmation acknowledged. Refrain from resending duplicate messages." + return "SUCCESS, human has received your reply. Refrain from resending duplicate messages." def message_within_software_sop(self, message: Message) -> bool: # Engineer, QaEngineer can be end of the SOP. Their msg requires routing outside. diff --git a/tests/metagpt/environment/mgx_env/run_mgx_env.py b/tests/metagpt/environment/mgx_env/run_mgx_env.py index ecc2facca..b6d5341de 100644 --- a/tests/metagpt/environment/mgx_env/run_mgx_env.py +++ b/tests/metagpt/environment/mgx_env/run_mgx_env.py @@ -28,7 +28,7 @@ async def main(requirement="", enable_human_input=False, use_fixed_sop=False, al ProjectManager(use_fixed_sop=use_fixed_sop), engineer, # QaEngineer(), - DataAnalyst(tools=[""]), + DataAnalyst(), SWEAgent(), ] ) From 744a4bc4feee560f3a3854f6f0adb9882822a391 Mon Sep 17 00:00:00 2001 From: garylin2099 Date: Sat, 27 Jul 2024 00:44:11 +0800 Subject: [PATCH 2/5] tackle duplicate problem --- metagpt/prompts/di/role_zero.py | 36 ++++++++++++++++++++---- metagpt/roles/di/role_zero.py | 25 ++++++++++++++++ metagpt/roles/di/team_leader.py | 3 ++ metagpt/strategy/experience_retriever.py | 32 ++++++++++++++++++--- 4 files changed, 86 insertions(+), 10 deletions(-) diff --git a/metagpt/prompts/di/role_zero.py b/metagpt/prompts/di/role_zero.py index d3f978c15..96b921930 100644 --- a/metagpt/prompts/di/role_zero.py +++ b/metagpt/prompts/di/role_zero.py @@ -51,10 +51,7 @@ In your response, include at least one command. # Your commands in a json array, in the following output format with correct command_name and args. If there is nothing to do, use the pass or end command: Some text indicating your thoughts before JSON is required, such as what tasks have been completed, what tasks are next, how you should update the plan status, respond to inquiry, or seek for help. Then a json array of commands. You must output ONE and ONLY ONE json array. DON'T output multiple json arrays with thoughts between them. Output should adhere to the following format. -Firstly, describe the actions you have taken recently. -Secondly, describe the messages you have received recently, with a particular emphasis on messages from users. -Thirdly, describe your current task . Review the histroy, if you find that the current task is identical to a previously completed one, it indicates that the current task has already been accomplished. If all tasks are finished and current task is empty, use the end command to terminate. -Then, articulate your thoughts and list the commands, adhering closely to the instructions provided. +{thought_guidance} ```json [ {{ @@ -67,7 +64,23 @@ Then, articulate your thoughts and list the commands, adhering closely to the in Notice: your output JSON data must be a command list. Notice: your output JSON data section must start with **```json [** """ - +THOUGHT_GUIDANCE = """ +Firstly, describe the actions you have taken recently. +Secondly, describe the messages you have received recently, with a particular emphasis on messages from users. +Thirdly, describe the plan status and the current task. Review the histroy, if `Current Task` has been undertaken and completed by you or anyone, you MUST use the **Plan.finish_current_task** command to finish it first before taking any action, the command will automatically move you to the next task. +Fourthly, describe any necessary human interaction. Use **RoleZero.reply_to_human** to report your progress, pay attention to the history, DON'T repeat reporting. Use **RoleZero.ask_human** if you failed the current task or if you are unsure of the situation encountered or if you need any help from human. +Fifthly, describe if you should terminate, you should use **end** command to terminate if any of the following is met: + - You have completed the overall user requirement + - All tasks are finished and current task is empty +Finally, combine your thoughts, describe what you want to do conscisely in 20 words, then follow your thoughts to list the commands, adhering closely to the instructions provided. +""" +REGENERATE_PROMPT = """ +Review the history carefully and consider human's feedback: +{human_rsp} +provide different commands. +Describe if you should terminate using **end** command, or try a different approach and output different commands. You are NOT allowed to provide the same commands again. +Your reflection, then the commands in a json array: +""" JSON_REPAIR_PROMPT = """ ## json data {json_data} @@ -81,7 +94,6 @@ Help check if there are any formatting issues with the JSON data? If so, please If no issues are detected, the original json data should be returned unchanged. Output the JSON data in a format that can be loaded by the json.loads() function. """ - QUICK_THINK_PROMPT = """ Decide if the latest user message previously is a quick question. Quick questions include common-sense, logical, math, multiple-choice questions, greetings, or casual chat that you can answer directly. @@ -91,3 +103,15 @@ Software development tasks are NOT quick questions. However, these programming-related tasks are quick questions: writing trivial code snippets (fewer than 30 lines), filling a single function or class, explaining concepts, writing tutorials and documentation. Respond with a concise thought then a YES if the question is a quick question, otherwise, a NO. Your response: """ +ASK_HUMAN_COMMAND = """ +```json +[ + { + "command_name": "RoleZero.ask_human", + "args": { + "question": "I'm a little uncertain about the next step, could you provide me with some guidance?" + } + } +] +``` +""" diff --git a/metagpt/roles/di/role_zero.py b/metagpt/roles/di/role_zero.py index 40c03ccad..0580ea470 100644 --- a/metagpt/roles/di/role_zero.py +++ b/metagpt/roles/di/role_zero.py @@ -15,10 +15,13 @@ from metagpt.exp_pool.context_builders import RoleZeroContextBuilder from metagpt.exp_pool.serializers import RoleZeroSerializer from metagpt.logs import logger from metagpt.prompts.di.role_zero import ( + ASK_HUMAN_COMMAND, CMD_PROMPT, JSON_REPAIR_PROMPT, QUICK_THINK_PROMPT, + REGENERATE_PROMPT, ROLE_INSTRUCTION, + THOUGHT_GUIDANCE, ) from metagpt.roles import Role from metagpt.schema import AIMessage, Message, UserMessage @@ -155,6 +158,7 @@ class RoleZero(Role): plan_status=plan_status, current_task=current_task, instruction=instruction, + thought_guidance=THOUGHT_GUIDANCE, latest_observation=memory[-1].content, ) memory = await self.parse_browser_actions(memory) @@ -168,6 +172,8 @@ class RoleZero(Role): ) self.command_rsp = await self.llm_cached_aask(req=req, system_msgs=self.system_msg, state_data=state_data) + self.command_rsp = await self._check_duplicates(req, self.command_rsp) + self.rc.memory.add(AIMessage(content=self.command_rsp)) return True @@ -260,6 +266,25 @@ class RoleZero(Role): return rsp_msg + async def _check_duplicates(self, req: list[dict], command_rsp: str): + past_rsp = [mem.content for mem in self.rc.memory.get(self.memory_k)] + if command_rsp in past_rsp: + # Normal response with thought contents are highly unlikely to reproduce + # If an identical response is detected, it is a bad response, mostly due to LLM repeating generated content + # In this case, ask human for help and regenerate + # TODO: switch to llm_cached_aask + logger.warning(f"Duplicate response detected: {command_rsp}") + human_rsp = await self.ask_human( + question="I'm a little uncertain about the next step, could you provide me with some guidance?" + ) + regenerate_req = req + [ + AIMessage(content=ASK_HUMAN_COMMAND), + UserMessage(content=REGENERATE_PROMPT.format(human_rsp=human_rsp)), + ] + regenerate_req = self.llm.format_msg(regenerate_req) + command_rsp = await self.llm.aask(regenerate_req) + return command_rsp + async def _parse_commands(self) -> Tuple[List[Dict], bool]: """Retrieves commands from the Large Language Model (LLM). diff --git a/metagpt/roles/di/team_leader.py b/metagpt/roles/di/team_leader.py index 6dde4565e..fca45f5a8 100644 --- a/metagpt/roles/di/team_leader.py +++ b/metagpt/roles/di/team_leader.py @@ -72,6 +72,9 @@ class TeamLeader(RoleZero): Publish a message to a team member, use member name to fill send_to args. You may copy the full original content or add additional information from upstream. This will make team members start their work. DONT omit any necessary info such as path, link, environment, programming language, framework, requirement, constraint from original content to team members because you are their sole info source. """ + self._set_state(-1) # each time publishing a message, pause to wait for the response + if send_to == self.name: + return # Avoid sending message to self # Specify the outer send_to to overwrite the default "no one" value. Use UserMessage because message from self is like a user request for others. self.publish_message( UserMessage(content=content, sent_from=self.name, send_to=send_to, cause_by=RunCommand), send_to=send_to diff --git a/metagpt/strategy/experience_retriever.py b/metagpt/strategy/experience_retriever.py index 43a9677f8..8a87df3fd 100644 --- a/metagpt/strategy/experience_retriever.py +++ b/metagpt/strategy/experience_retriever.py @@ -566,7 +566,7 @@ Explanation: DON'T decompose requirement if it is a DATA-RELATED task, assign a Conversation History: [ ..., - {'role': 'assistant', 'content': 'from Alice(Product Manager) to {'Bob'}: {'docs': {'20240424153821.json': {'root_path': 'docs/prd', 'filename': '20240424153821.json', 'content': '{"Language":"en_us","Programming Language":"Python","Original Requirements":"create a cli snake game","Project Name":"snake_game","Product Goals":["Develop an intuitive and addictive snake game",...], ...}}}}}, + {'role': 'assistant', 'content': 'from Alice(Product Manager) to {''}: Request is completed, with outputs: Command WritePRD executed: PRD filename: "/tmp/workspace/snake_game/docs/prd.json"'}, ] Explanation: You received a message from Alice, the Product Manager, that she has completed the PRD, use Plan.finish_current_task to mark her task as finished and moves the plan to the next task. Based on plan status, next task is for Bob (Architect), publish a message asking him to start. The message content should contain important path info. ```json @@ -578,7 +578,7 @@ Explanation: You received a message from Alice, the Product Manager, that she ha { "command_name": "TeamLeader.publish_message", "args": { - "content": "Please design the software architecture for the snake game based on the PRD created by Alice. The PRD is at 'docs/prd/20240424153821.json'. Include the choice of programming language, libraries, and data flow, etc.", + "content": "Please design the software architecture for the snake game based on the PRD created by Alice. The PRD is at '/tmp/workspace/snake_game/docs/prd.json'. Include the choice of programming language, libraries, and data flow, etc.", "send_to": "Bob" } }, @@ -820,7 +820,7 @@ editor.read(path="./main.py") ENGINEER_EXAMPLE = """ ## example 1 User Requirement: Please implement the core game logic for the 2048 game, including tile movements, merging logic, score tracking, and keyboard interaction. Refer to the project schedule located at '/tmp/project_schedule.json' and the system design document at '/tmp/system_design.json' for detailed information. -Explanation: I will first need to read the system design document and the project schedule to understand the specific requirements and architecture outlined for the game development. +Explanation: I will first need to read the system design document and the project schedule to understand the specific requirements and architecture outlined for the game development. I should NOT create tasks at this stage. ```json [ @@ -840,7 +840,8 @@ Explanation: I will first need to read the system design document and the projec ``` ## example 2 -To achieve the goal of writing a 2048 game using JavaScript and HTML without any frameworks, I will create a plan consisting of three tasks, each corresponding to the creation of one of the required files: `index.html`, `style.css`, and `script.js`. Following the completion of these tasks, I will add a code review task for each file to ensure the implementation aligns with the provided system design and project schedule documents. +Consider this example only after you have obtained the content of system design and project schedule documents. +Suppose the system design and project schedule prescribes three files index.html, style.css, script.js, to follow the design and schedule, I will create a plan consisting of three tasks, each corresponding to the creation of one of the required files: `index.html`, `style.css`, and `script.js`. Following the completion of these tasks, I will add a code review task for each file to ensure the implementation aligns with the provided system design and project schedule documents. Here's the plan: @@ -911,6 +912,25 @@ Let's start by appending the first task to the plan. ``` ## example 3 +Explanation: Take on one task, such as writing a file. Upon completion, finish current task + +```json +[ + { + "command_name": "Editor.write", + "args": { + "path": "/Users/gary/Files/temp/workspace/snake_game/src/index.html", + "content": "the code ..." + } + }, + { + "command_name": "Plan.finish_current_task", + "args": {{}} + } +] +``` + +## example 4 I will now review the code in `script.js`. Explanation: to review the code, call ReviewAndRewriteCode.run. @@ -924,6 +944,10 @@ Explanation: to review the code, call ReviewAndRewriteCode.run. "project_schedule_input": "/tmp/docs/project_schedule.json", "code_review_k_times": 2 } + }, + { + "command_name": "Plan.finish_current_task", + "args": {{}} } ] ``` From f1f49c076cd2c3f3905dafdf6e9851109ad81dab Mon Sep 17 00:00:00 2001 From: garylin2099 Date: Sat, 27 Jul 2024 01:34:41 +0800 Subject: [PATCH 3/5] dynamic correction --- metagpt/prompts/di/role_zero.py | 20 +++----------------- metagpt/roles/di/role_zero.py | 9 +-------- 2 files changed, 4 insertions(+), 25 deletions(-) diff --git a/metagpt/prompts/di/role_zero.py b/metagpt/prompts/di/role_zero.py index 96b921930..f0f11754a 100644 --- a/metagpt/prompts/di/role_zero.py +++ b/metagpt/prompts/di/role_zero.py @@ -73,12 +73,10 @@ Fifthly, describe if you should terminate, you should use **end** command to ter - You have completed the overall user requirement - All tasks are finished and current task is empty Finally, combine your thoughts, describe what you want to do conscisely in 20 words, then follow your thoughts to list the commands, adhering closely to the instructions provided. -""" +""".strip() REGENERATE_PROMPT = """ -Review the history carefully and consider human's feedback: -{human_rsp} -provide different commands. -Describe if you should terminate using **end** command, or try a different approach and output different commands. You are NOT allowed to provide the same commands again. +Review and reflect on the history carefully, provide different commands. +Describe if you should terminate using **end** command, or use **RoleZero.ask_human** to ask human for help, or try a different approach and output different commands. You are NOT allowed to provide the same commands again. Your reflection, then the commands in a json array: """ JSON_REPAIR_PROMPT = """ @@ -103,15 +101,3 @@ Software development tasks are NOT quick questions. However, these programming-related tasks are quick questions: writing trivial code snippets (fewer than 30 lines), filling a single function or class, explaining concepts, writing tutorials and documentation. Respond with a concise thought then a YES if the question is a quick question, otherwise, a NO. Your response: """ -ASK_HUMAN_COMMAND = """ -```json -[ - { - "command_name": "RoleZero.ask_human", - "args": { - "question": "I'm a little uncertain about the next step, could you provide me with some guidance?" - } - } -] -``` -""" diff --git a/metagpt/roles/di/role_zero.py b/metagpt/roles/di/role_zero.py index 0580ea470..6d6eb26ce 100644 --- a/metagpt/roles/di/role_zero.py +++ b/metagpt/roles/di/role_zero.py @@ -15,7 +15,6 @@ from metagpt.exp_pool.context_builders import RoleZeroContextBuilder from metagpt.exp_pool.serializers import RoleZeroSerializer from metagpt.logs import logger from metagpt.prompts.di.role_zero import ( - ASK_HUMAN_COMMAND, CMD_PROMPT, JSON_REPAIR_PROMPT, QUICK_THINK_PROMPT, @@ -274,13 +273,7 @@ class RoleZero(Role): # In this case, ask human for help and regenerate # TODO: switch to llm_cached_aask logger.warning(f"Duplicate response detected: {command_rsp}") - human_rsp = await self.ask_human( - question="I'm a little uncertain about the next step, could you provide me with some guidance?" - ) - regenerate_req = req + [ - AIMessage(content=ASK_HUMAN_COMMAND), - UserMessage(content=REGENERATE_PROMPT.format(human_rsp=human_rsp)), - ] + regenerate_req = req + [UserMessage(content=REGENERATE_PROMPT)] regenerate_req = self.llm.format_msg(regenerate_req) command_rsp = await self.llm.aask(regenerate_req) return command_rsp From 21775647b5f2ebd84e21db1d101806cd546cede8 Mon Sep 17 00:00:00 2001 From: garylin2099 Date: Mon, 29 Jul 2024 13:58:28 +0800 Subject: [PATCH 4/5] minor update on prompt --- metagpt/actions/write_prd.py | 2 +- metagpt/prompts/di/role_zero.py | 13 +++++++------ 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/metagpt/actions/write_prd.py b/metagpt/actions/write_prd.py index ae1b5dd81..a062ece54 100644 --- a/metagpt/actions/write_prd.py +++ b/metagpt/actions/write_prd.py @@ -108,7 +108,7 @@ class WritePRD(Action): Example: # Write a new PRD (Product Requirement Document) - >>> user_requirement = "Write PRD for a snake game" + >>> user_requirement = "Write a snake game" >>> output_pathname = "snake_game/docs/prd.json" >>> extra_info = "YOUR EXTRA INFO, if any" >>> write_prd = WritePRD() diff --git a/metagpt/prompts/di/role_zero.py b/metagpt/prompts/di/role_zero.py index f0f11754a..478f5c0d4 100644 --- a/metagpt/prompts/di/role_zero.py +++ b/metagpt/prompts/di/role_zero.py @@ -65,17 +65,18 @@ Notice: your output JSON data must be a command list. Notice: your output JSON data section must start with **```json [** """ THOUGHT_GUIDANCE = """ -Firstly, describe the actions you have taken recently. -Secondly, describe the messages you have received recently, with a particular emphasis on messages from users. -Thirdly, describe the plan status and the current task. Review the histroy, if `Current Task` has been undertaken and completed by you or anyone, you MUST use the **Plan.finish_current_task** command to finish it first before taking any action, the command will automatically move you to the next task. -Fourthly, describe any necessary human interaction. Use **RoleZero.reply_to_human** to report your progress, pay attention to the history, DON'T repeat reporting. Use **RoleZero.ask_human** if you failed the current task or if you are unsure of the situation encountered or if you need any help from human. -Fifthly, describe if you should terminate, you should use **end** command to terminate if any of the following is met: +First, describe the actions you have taken recently. +Second, describe the messages you have received recently, with a particular emphasis on messages from users. +Third, describe the plan status and the current task. Review the histroy, if `Current Task` has been undertaken and completed by you or anyone, you MUST use the **Plan.finish_current_task** command to finish it first before taking any action, the command will automatically move you to the next task. +Fourth, describe any necessary human interaction. Use **RoleZero.reply_to_human** to report your progress if you complete a task or the overall requirement, pay attention to the history, DON'T repeat reporting. Use **RoleZero.ask_human** if you failed the current task, unsure of the situation encountered, need any help from human, or executing repetitive commands but receiving repetitive feedbacks without making progress. +Fifth, describe if you should terminate, you should use **end** command to terminate if any of the following is met: - You have completed the overall user requirement - All tasks are finished and current task is empty + - You are repetitively replying to human Finally, combine your thoughts, describe what you want to do conscisely in 20 words, then follow your thoughts to list the commands, adhering closely to the instructions provided. """.strip() REGENERATE_PROMPT = """ -Review and reflect on the history carefully, provide different commands. +Review and reflect on the history carefully, provide a different response. Describe if you should terminate using **end** command, or use **RoleZero.ask_human** to ask human for help, or try a different approach and output different commands. You are NOT allowed to provide the same commands again. Your reflection, then the commands in a json array: """ From cc949edc998fca89003bfbd50bbf08cb1a2c3d5f Mon Sep 17 00:00:00 2001 From: garylin2099 Date: Mon, 29 Jul 2024 13:59:23 +0800 Subject: [PATCH 5/5] update DA's tools --- metagpt/roles/di/data_analyst.py | 5 ++++- metagpt/tools/tool_recommend.py | 4 ++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/metagpt/roles/di/data_analyst.py b/metagpt/roles/di/data_analyst.py index abb501830..03d04980d 100644 --- a/metagpt/roles/di/data_analyst.py +++ b/metagpt/roles/di/data_analyst.py @@ -31,7 +31,7 @@ class DataAnalyst(RoleZero): task_type_desc: str = TASK_TYPE_DESC tools: list[str] = ["Plan", "DataAnalyst", "RoleZero", "Browser"] - custom_tools: list[str] = ["machine learning", "web scraping", "Terminal"] + custom_tools: list[str] = ["web scraping", "Terminal"] custom_tool_recommender: ToolRecommender = None experience_retriever: ExpRetriever = KeywordExpRetriever() @@ -72,6 +72,9 @@ class DataAnalyst(RoleZero): Args: instruction: The specific task description for which the code needs to be written. """ + if self.planner.plan: + logger.info(f"Current task {self.planner.plan.current_task}") + counter = 0 success = False await self.execute_code.init_code() diff --git a/metagpt/tools/tool_recommend.py b/metagpt/tools/tool_recommend.py index 4bea137c3..cca5cb3ae 100644 --- a/metagpt/tools/tool_recommend.py +++ b/metagpt/tools/tool_recommend.py @@ -157,6 +157,10 @@ class ToolRecommender(BaseModel): ranked_tools = list(ranked_tools.values())[0] # -------------结束--------------- + if not isinstance(ranked_tools, list): + logger.warning(f"Invalid rank result: {ranked_tools}, will use the recalled tools instead.") + ranked_tools = list(available_tools.keys()) + valid_tools = validate_tool_names(ranked_tools) return list(valid_tools.values())[:topk]