From 63a41ba81ded62d306fe9b406dfaaefdc635a6d7 Mon Sep 17 00:00:00 2001 From: hongjiongteng Date: Tue, 18 Jun 2024 21:35:46 +0800 Subject: [PATCH 01/30] add code_review tool to engineer2 to enhance code effectiveness --- metagpt/actions/di/rewrite_code.py | 80 +++++++++++++++++++++++++ metagpt/prompts/di/engineer2.py | 9 +++ metagpt/roles/di/engineer2.py | 13 +++- tests/metagpt/roles/di/run_engineer2.py | 8 +-- 4 files changed, 105 insertions(+), 5 deletions(-) create mode 100644 metagpt/actions/di/rewrite_code.py diff --git a/metagpt/actions/di/rewrite_code.py b/metagpt/actions/di/rewrite_code.py new file mode 100644 index 000000000..a8586d1a6 --- /dev/null +++ b/metagpt/actions/di/rewrite_code.py @@ -0,0 +1,80 @@ +from tenacity import retry, stop_after_attempt, wait_random_exponential + +from metagpt.actions.action import Action +from metagpt.actions.write_code_review import ( + EXAMPLE_AND_INSTRUCTION, + FORMAT_EXAMPLE, + PROMPT_TEMPLATE, + REWRITE_CODE_TEMPLATE, +) +from metagpt.tools.tool_registry import register_tool +from metagpt.utils.common import CodeParser, aread, awrite + + +@register_tool(tags=["RewriteCode"], include_functions=["run"]) +class RewriteCode(Action): + """Accordding design doc and task doc to review the code, to make the complete and correct code.""" + + name: str = "RewriteCode" + + async def run(self, code_path: str, design_doc: str = "", task_doc: str = "", code_review_k_times: int = 2) -> str: + """Reviews the provided code based on the accompanying design and task documentation, return the complete and correct code. + + Read the code from `code_path`, and write the final code to `code_path`. + + Args: + code_path (str): The file path of the code snippet to be reviewed. This should be a string containing the path to the source code file. + design_doc (str): The design document associated with the code. This should describe the system architecture, used in the code. It helps provide context for the review process. + task_doc (str): The task document describing what the code is intended to accomplish. This should outline the functional requirements or objectives of the code. + code_review_k_times (int, optional): The number of iterations for reviewing and potentially rewriting the code. Defaults to 2. + + Returns: + str: The potentially corrected or approved code after review. + + Example Usage: + # Example of how to call the run method with a code snippet and documentation + await WriteCodeReview().run( + code_path="/tmp/game.js", + design_doc='{"Implementation approach":"We will implement the 2048 game using plain JavaScript and HTML, ensuring no frameworks are used. The game logic will handle tile movements, merging, and game state updates. The UI will be simple and clean, with a responsive design to fit different screen sizes. We will use CSS for styling and ensure the game is playable with keyboard arrow keys. The game will display the current score, have a restart button, and show a game over message when no more moves are possible.","File list":["index.html","style.css","script.js"],"Data structures and interfaces":"\nclassDiagram\n class Game {\n -grid: int[][]\n -score: int\n +init(): void\n +move(direction: str): void\n +merge(direction: str): void\n +isGameOver(): bool\n +restart(): void\n }\n class UI {\n -game: Game\n +init(): void\n +update(): void\n +showGameOver(): void\n +bindEvents(): void\n }\n Game --> UI\n","Program call flow":"\nsequenceDiagram\n participant U as UI\n participant G as Game\n U->>G: init()\n G-->>U: return\n U->>U: bindEvents()\n U->>G: move(direction)\n G->>G: merge(direction)\n G->>U: update()\n U->>U: update()\n U->>G: isGameOver()\n G-->>U: return bool\n alt Game Over\n U->>U: showGameOver()\n end\n U->>G: restart()\n G-->>U: return\n","Anything UNCLEAR":"Clarify if there are any specific design preferences or additional features required beyond the basic 2048 game functionality."}', + task_doc='{"Required packages":["No third-party dependencies required"],"Required Other language third-party packages":["No third-party dependencies required"],"Logic Analysis":[["script.js","Contains Game and UI classes, and their methods: init, move, merge, isGameOver, restart, update, showGameOver, bindEvents"],["index.html","Contains the HTML structure for the game UI"],["style.css","Contains the CSS styles for the game UI"]],"Task list":["index.html","style.css","script.js"],"Full API spec":"","Shared Knowledge":"The `script.js` file will contain the core game logic and UI handling. The `index.html` file will provide the structure for the game, and `style.css` will handle the styling.","Anything UNCLEAR":"Clarify if there are any specific design preferences or additional features required beyond the basic 2048 game functionality."}' + ) + """ + code = await aread(code_path) + + context = "\n".join( + [ + "## System Design\n" + design_doc + "\n", + "## Task\n" + task_doc + "\n", + ] + ) + + for _ in range(code_review_k_times): + context_prompt = PROMPT_TEMPLATE.format(context=context, code=code, filename=code_path) + cr_prompt = EXAMPLE_AND_INSTRUCTION.format( + format_example=FORMAT_EXAMPLE.format(filename=code_path), + ) + result, rewrited_code = await self.write_code_review_and_rewrite( + context_prompt, cr_prompt, filename=code_path + ) + + if "LBTM" in result: + code = rewrited_code + elif "LGTM" in result: + break + + await awrite(filename=code_path, data=code) + + return code + + @retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6)) + async def write_code_review_and_rewrite(self, context_prompt: str, cr_prompt: str, filename: str): + cr_rsp = await self._aask(context_prompt + cr_prompt) + result = CodeParser.parse_block("Code Review Result", cr_rsp) + if "LGTM" in result: + return result, None + + # if LBTM, rewrite code + rewrite_prompt = f"{context_prompt}\n{cr_rsp}\n{REWRITE_CODE_TEMPLATE.format(filename=filename)}" + code_rsp = await self._aask(rewrite_prompt) + code = CodeParser.parse_code(block="", text=code_rsp) + return result, code diff --git a/metagpt/prompts/di/engineer2.py b/metagpt/prompts/di/engineer2.py index 4fd52e320..8e2722b49 100644 --- a/metagpt/prompts/di/engineer2.py +++ b/metagpt/prompts/di/engineer2.py @@ -5,5 +5,14 @@ EXTRA_INSTRUCTION = """ 5. Take on ONE task and write ONE code file in each response. DON'T attempt all tasks in one response. 6. When not specified, you should write files in a folder named "src". If you know the project path, then write in a "src" folder under the project path. 7. When provided system design or project schedule, read them first, then adhere to them in your implementation. +8. Write at most one file per task, do your best to implement THE ONLY ONE FILE. CAREFULLY CHECK THAT YOU DONT MISS ANY NECESSARY CLASS/FUNCTION IN THIS FILE. +9. COMPLETE CODE: Your code will be part of the entire project, so please implement complete, reliable, reusable code snippets. +10. When provided system design, YOU MUST FOLLOW "Data structures and interfaces". DONT CHANGE ANY DESIGN. Do not use public member functions that do not exist in your design. +11. Write out EVERY CODE DETAIL, DON'T LEAVE TODO. +12. Do not use Editor to find start_line and end_line, just rewrite the file with the all complete code. +13. Revise task is to use RewriteCode.run to correct code. +14. At the end of the plan, add a Revise task for each file; for example, if there are three files, add three Revise tasks. """ + + ENGINEER2_INSTRUCTION = ROLE_INSTRUCTION + EXTRA_INSTRUCTION.strip() diff --git a/metagpt/roles/di/engineer2.py b/metagpt/roles/di/engineer2.py index e013ef09e..2720364ef 100644 --- a/metagpt/roles/di/engineer2.py +++ b/metagpt/roles/di/engineer2.py @@ -1,5 +1,6 @@ from __future__ import annotations +from metagpt.actions.di.rewrite_code import RewriteCode from metagpt.prompts.di.engineer2 import ENGINEER2_INSTRUCTION from metagpt.roles.di.role_zero import RoleZero @@ -10,4 +11,14 @@ class Engineer2(RoleZero): goal: str = "Take on game, app, and web development" instruction: str = ENGINEER2_INSTRUCTION - tools: str = ["Plan", "Editor:write,read,write_content", "RoleZero"] + tools: str = ["Plan", "Editor:write,read,write_content", "RoleZero", "RewriteCode"] + + def _update_tool_execution(self): + rewrite_code = RewriteCode() + + self.tool_execution_map.update( + { + "RewriteCode.run": rewrite_code.run, + "RewriteCode": rewrite_code.run, + } + ) diff --git a/tests/metagpt/roles/di/run_engineer2.py b/tests/metagpt/roles/di/run_engineer2.py index 4e948bad7..e5ae74485 100644 --- a/tests/metagpt/roles/di/run_engineer2.py +++ b/tests/metagpt/roles/di/run_engineer2.py @@ -67,18 +67,18 @@ Create a 2048 game, follow the design doc and task doc. Write your code under /U After writing all codes, write a code review for the codes, make improvement or adjustment based on the review. Notice: You MUST implement the full code, don't leave comment without implementation! Design doc: -{TASK_DOC_2048} -Task doc: {DESIGN_DOC_2048} +Task doc: +{TASK_DOC_2048} """ GAME_REQ_SNAKE = f""" Create a snake game, follow the design doc and task doc. Write your code under /Users/gary/Files/temp/workspace/snake_game/src. After writing all codes, write a code review for the codes, make improvement or adjustment based on the review. Notice: You MUST implement the full code, don't leave comment without implementation! Design doc: -{TASK_DOC_SNAKE} -Task doc: {DESIGN_DOC_SNAKE} +Task doc: +{TASK_DOC_SNAKE} """ GAME_REQ_2048_NO_DOC = """ Create a 2048 game with pygame. Write your code under /Users/gary/Files/temp/workspace/2048_game/src. From 47df4c09ba2e716e21c5f38047cb79f8af0cd67c Mon Sep 17 00:00:00 2001 From: hongjiongteng Date: Wed, 19 Jun 2024 15:42:07 +0800 Subject: [PATCH 02/30] add code_review tool to engineer2 to enhance code effectiveness --- metagpt/actions/di/rewrite_code.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/metagpt/actions/di/rewrite_code.py b/metagpt/actions/di/rewrite_code.py index a8586d1a6..57abbc56c 100644 --- a/metagpt/actions/di/rewrite_code.py +++ b/metagpt/actions/di/rewrite_code.py @@ -35,8 +35,8 @@ class RewriteCode(Action): # Example of how to call the run method with a code snippet and documentation await WriteCodeReview().run( code_path="/tmp/game.js", - design_doc='{"Implementation approach":"We will implement the 2048 game using plain JavaScript and HTML, ensuring no frameworks are used. The game logic will handle tile movements, merging, and game state updates. The UI will be simple and clean, with a responsive design to fit different screen sizes. We will use CSS for styling and ensure the game is playable with keyboard arrow keys. The game will display the current score, have a restart button, and show a game over message when no more moves are possible.","File list":["index.html","style.css","script.js"],"Data structures and interfaces":"\nclassDiagram\n class Game {\n -grid: int[][]\n -score: int\n +init(): void\n +move(direction: str): void\n +merge(direction: str): void\n +isGameOver(): bool\n +restart(): void\n }\n class UI {\n -game: Game\n +init(): void\n +update(): void\n +showGameOver(): void\n +bindEvents(): void\n }\n Game --> UI\n","Program call flow":"\nsequenceDiagram\n participant U as UI\n participant G as Game\n U->>G: init()\n G-->>U: return\n U->>U: bindEvents()\n U->>G: move(direction)\n G->>G: merge(direction)\n G->>U: update()\n U->>U: update()\n U->>G: isGameOver()\n G-->>U: return bool\n alt Game Over\n U->>U: showGameOver()\n end\n U->>G: restart()\n G-->>U: return\n","Anything UNCLEAR":"Clarify if there are any specific design preferences or additional features required beyond the basic 2048 game functionality."}', - task_doc='{"Required packages":["No third-party dependencies required"],"Required Other language third-party packages":["No third-party dependencies required"],"Logic Analysis":[["script.js","Contains Game and UI classes, and their methods: init, move, merge, isGameOver, restart, update, showGameOver, bindEvents"],["index.html","Contains the HTML structure for the game UI"],["style.css","Contains the CSS styles for the game UI"]],"Task list":["index.html","style.css","script.js"],"Full API spec":"","Shared Knowledge":"The `script.js` file will contain the core game logic and UI handling. The `index.html` file will provide the structure for the game, and `style.css` will handle the styling.","Anything UNCLEAR":"Clarify if there are any specific design preferences or additional features required beyond the basic 2048 game functionality."}' + design_doc='{"Implementation approach":"We will implement the 2048 game..."}', + task_doc='{"Required packages":["No third-party dependencies required"],"..."}' ) """ code = await aread(code_path) From 942a6d61bb73eb9847c7c9b1f6a932964a03751a Mon Sep 17 00:00:00 2001 From: hongjiongteng Date: Wed, 19 Jun 2024 23:41:09 +0800 Subject: [PATCH 03/30] opt engineer2 --- metagpt/actions/di/rewrite_code.py | 8 +++++++- metagpt/prompts/di/engineer2.py | 6 +++--- metagpt/prompts/di/team_leader.py | 2 +- 3 files changed, 11 insertions(+), 5 deletions(-) diff --git a/metagpt/actions/di/rewrite_code.py b/metagpt/actions/di/rewrite_code.py index 57abbc56c..4ad612002 100644 --- a/metagpt/actions/di/rewrite_code.py +++ b/metagpt/actions/di/rewrite_code.py @@ -7,6 +7,7 @@ from metagpt.actions.write_code_review import ( PROMPT_TEMPLATE, REWRITE_CODE_TEMPLATE, ) +from metagpt.logs import logger from metagpt.tools.tool_registry import register_tool from metagpt.utils.common import CodeParser, aread, awrite @@ -21,6 +22,7 @@ class RewriteCode(Action): """Reviews the provided code based on the accompanying design and task documentation, return the complete and correct code. Read the code from `code_path`, and write the final code to `code_path`. + If there is no `design_doc` or `task_doc`, it will return and do nothing. Args: code_path (str): The file path of the code snippet to be reviewed. This should be a string containing the path to the source code file. @@ -39,6 +41,9 @@ class RewriteCode(Action): task_doc='{"Required packages":["No third-party dependencies required"],"..."}' ) """ + if not design_doc or not task_doc: + return + code = await aread(code_path) context = "\n".join( @@ -48,11 +53,12 @@ class RewriteCode(Action): ] ) - for _ in range(code_review_k_times): + for i in range(code_review_k_times): context_prompt = PROMPT_TEMPLATE.format(context=context, code=code, filename=code_path) cr_prompt = EXAMPLE_AND_INSTRUCTION.format( format_example=FORMAT_EXAMPLE.format(filename=code_path), ) + logger.info(f"The {i+1}th time to CodeReview: {code_path}.") result, rewrited_code = await self.write_code_review_and_rewrite( context_prompt, cr_prompt, filename=code_path ) diff --git a/metagpt/prompts/di/engineer2.py b/metagpt/prompts/di/engineer2.py index 8e2722b49..55947f3b9 100644 --- a/metagpt/prompts/di/engineer2.py +++ b/metagpt/prompts/di/engineer2.py @@ -9,9 +9,9 @@ EXTRA_INSTRUCTION = """ 9. COMPLETE CODE: Your code will be part of the entire project, so please implement complete, reliable, reusable code snippets. 10. When provided system design, YOU MUST FOLLOW "Data structures and interfaces". DONT CHANGE ANY DESIGN. Do not use public member functions that do not exist in your design. 11. Write out EVERY CODE DETAIL, DON'T LEAVE TODO. -12. Do not use Editor to find start_line and end_line, just rewrite the file with the all complete code. -13. Revise task is to use RewriteCode.run to correct code. -14. At the end of the plan, add a Revise task for each file; for example, if there are three files, add three Revise tasks. +12. To modify code in a file, read the entire file, make changes, and use Editor.write instead of Editor.write_content to update the file with the complete code. +13. Revise task is to use RewriteCode.run to correct code, must pass the content of system design and project schedule instead of just file path of them. +14. Only When provided system design, at the end of the plan, add a Revise task for each file; for example, if there are three files, add three Revise tasks. """ diff --git a/metagpt/prompts/di/team_leader.py b/metagpt/prompts/di/team_leader.py index 7af5a6e07..f166cee38 100644 --- a/metagpt/prompts/di/team_leader.py +++ b/metagpt/prompts/di/team_leader.py @@ -20,7 +20,7 @@ Note: 3. If the requirement contains both DATA-RELATED part mentioned in 1 and software development part mentioned in 2, you should decompose the software development part and assign them to different team members based on their expertise, and assign the DATA-RELATED part to Data Analyst David directly. 4. If the requirement is a common-sense, logical, or math problem, you should respond directly without assigning any task to team members. 5. If you think the requirement is not clear or ambiguous, you should ask the user for clarification immediately. Assign tasks only after all info is clear. -6. It is helpful for Engineer to have both the system design and the project schedule for writing the code, so include paths of both files (if available) when publishing message to Engineer. +6. It is helpful for Engineer to have both the system design and the project schedule for writing the code, so include paths of both files (if available) and remind Engineer to definitely read them when publishing message to Engineer. """ FINISH_CURRENT_TASK_CMD = """ From 599eecf1f460d2795a12d8945dd43de82c33dfa2 Mon Sep 17 00:00:00 2001 From: hongjiongteng Date: Thu, 20 Jun 2024 10:34:20 +0800 Subject: [PATCH 04/30] opt engineer2 --- metagpt/actions/di/rewrite_code.py | 22 ++++++++++++++-------- metagpt/prompts/di/engineer2.py | 2 +- 2 files changed, 15 insertions(+), 9 deletions(-) diff --git a/metagpt/actions/di/rewrite_code.py b/metagpt/actions/di/rewrite_code.py index 4ad612002..772a9ee99 100644 --- a/metagpt/actions/di/rewrite_code.py +++ b/metagpt/actions/di/rewrite_code.py @@ -1,3 +1,5 @@ +import asyncio + from tenacity import retry, stop_after_attempt, wait_random_exponential from metagpt.actions.action import Action @@ -18,16 +20,18 @@ class RewriteCode(Action): name: str = "RewriteCode" - async def run(self, code_path: str, design_doc: str = "", task_doc: str = "", code_review_k_times: int = 2) -> str: + async def run( + self, code_path: str, design_doc_path: str = "", task_doc_path: str = "", code_review_k_times: int = 2 + ) -> str: """Reviews the provided code based on the accompanying design and task documentation, return the complete and correct code. Read the code from `code_path`, and write the final code to `code_path`. - If there is no `design_doc` or `task_doc`, it will return and do nothing. + If there is no `design_doc_path` or `task_doc_path`, it will return and do nothing. Args: code_path (str): The file path of the code snippet to be reviewed. This should be a string containing the path to the source code file. - design_doc (str): The design document associated with the code. This should describe the system architecture, used in the code. It helps provide context for the review process. - task_doc (str): The task document describing what the code is intended to accomplish. This should outline the functional requirements or objectives of the code. + design_doc_path (str): The file path of the design document associated with the code. This should describe the system architecture, used in the code. It helps provide context for the review process. + task_doc_path (str): The file path of the task document describing what the code is intended to accomplish. This should outline the functional requirements or objectives of the code. code_review_k_times (int, optional): The number of iterations for reviewing and potentially rewriting the code. Defaults to 2. Returns: @@ -37,14 +41,16 @@ class RewriteCode(Action): # Example of how to call the run method with a code snippet and documentation await WriteCodeReview().run( code_path="/tmp/game.js", - design_doc='{"Implementation approach":"We will implement the 2048 game..."}', - task_doc='{"Required packages":["No third-party dependencies required"],"..."}' + design_doc="/tmp/design_doc.json", + task_doc="/tmp/task_doc.json" ) """ - if not design_doc or not task_doc: + if not design_doc_path or not design_doc_path: return - code = await aread(code_path) + code, design_doc, task_doc = await asyncio.gather( + aread(code_path), aread(design_doc_path), aread(task_doc_path) + ) context = "\n".join( [ diff --git a/metagpt/prompts/di/engineer2.py b/metagpt/prompts/di/engineer2.py index 55947f3b9..eab8b44ff 100644 --- a/metagpt/prompts/di/engineer2.py +++ b/metagpt/prompts/di/engineer2.py @@ -10,7 +10,7 @@ EXTRA_INSTRUCTION = """ 10. When provided system design, YOU MUST FOLLOW "Data structures and interfaces". DONT CHANGE ANY DESIGN. Do not use public member functions that do not exist in your design. 11. Write out EVERY CODE DETAIL, DON'T LEAVE TODO. 12. To modify code in a file, read the entire file, make changes, and use Editor.write instead of Editor.write_content to update the file with the complete code. -13. Revise task is to use RewriteCode.run to correct code, must pass the content of system design and project schedule instead of just file path of them. +13. Revise task is to use RewriteCode.run to correct code. 14. Only When provided system design, at the end of the plan, add a Revise task for each file; for example, if there are three files, add three Revise tasks. """ From 7538e715edd5af8577038cfbe39bfddb9314bbfb Mon Sep 17 00:00:00 2001 From: hongjiongteng Date: Thu, 20 Jun 2024 10:36:19 +0800 Subject: [PATCH 05/30] opt engineer2 --- metagpt/actions/di/rewrite_code.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/metagpt/actions/di/rewrite_code.py b/metagpt/actions/di/rewrite_code.py index 772a9ee99..0b00cd1b5 100644 --- a/metagpt/actions/di/rewrite_code.py +++ b/metagpt/actions/di/rewrite_code.py @@ -41,8 +41,8 @@ class RewriteCode(Action): # Example of how to call the run method with a code snippet and documentation await WriteCodeReview().run( code_path="/tmp/game.js", - design_doc="/tmp/design_doc.json", - task_doc="/tmp/task_doc.json" + design_doc_path="/tmp/design_doc.json", + task_doc_path="/tmp/task_doc.json" ) """ if not design_doc_path or not design_doc_path: From 0d3187e3089a64ea11db976d4815502ccf6d197d Mon Sep 17 00:00:00 2001 From: hongjiongteng Date: Thu, 20 Jun 2024 11:32:07 +0800 Subject: [PATCH 06/30] update --- metagpt/actions/di/rewrite_code.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metagpt/actions/di/rewrite_code.py b/metagpt/actions/di/rewrite_code.py index 0b00cd1b5..03831f912 100644 --- a/metagpt/actions/di/rewrite_code.py +++ b/metagpt/actions/di/rewrite_code.py @@ -45,7 +45,7 @@ class RewriteCode(Action): task_doc_path="/tmp/task_doc.json" ) """ - if not design_doc_path or not design_doc_path: + if not design_doc_path or not task_doc_path: return code, design_doc, task_doc = await asyncio.gather( From 256e73e878b41ce261bfe8c71bc67f654c76cd48 Mon Sep 17 00:00:00 2001 From: hongjiongteng Date: Thu, 20 Jun 2024 12:29:37 +0800 Subject: [PATCH 07/30] rewrite_code accept content or file_path of design doc --- metagpt/actions/di/rewrite_code.py | 33 ++++++++++++++++++------------ 1 file changed, 20 insertions(+), 13 deletions(-) diff --git a/metagpt/actions/di/rewrite_code.py b/metagpt/actions/di/rewrite_code.py index 03831f912..ea093b36c 100644 --- a/metagpt/actions/di/rewrite_code.py +++ b/metagpt/actions/di/rewrite_code.py @@ -1,4 +1,4 @@ -import asyncio +import os from tenacity import retry, stop_after_attempt, wait_random_exponential @@ -21,17 +21,17 @@ class RewriteCode(Action): name: str = "RewriteCode" async def run( - self, code_path: str, design_doc_path: str = "", task_doc_path: str = "", code_review_k_times: int = 2 + self, code_path: str, design_doc_input: str = "", task_doc_input: str = "", code_review_k_times: int = 2 ) -> str: """Reviews the provided code based on the accompanying design and task documentation, return the complete and correct code. Read the code from `code_path`, and write the final code to `code_path`. - If there is no `design_doc_path` or `task_doc_path`, it will return and do nothing. + If there is no `design_doc_input` or `task_doc_input`, it will return and do nothing. Args: code_path (str): The file path of the code snippet to be reviewed. This should be a string containing the path to the source code file. - design_doc_path (str): The file path of the design document associated with the code. This should describe the system architecture, used in the code. It helps provide context for the review process. - task_doc_path (str): The file path of the task document describing what the code is intended to accomplish. This should outline the functional requirements or objectives of the code. + design_doc_input (str): Content or file path of the design document associated with the code. This should describe the system architecture, used in the code. It helps provide context for the review process. + task_doc_input (str): Content or file path of the task document describing what the code is intended to accomplish. This should outline the functional requirements or objectives of the code. code_review_k_times (int, optional): The number of iterations for reviewing and potentially rewriting the code. Defaults to 2. Returns: @@ -41,21 +41,28 @@ class RewriteCode(Action): # Example of how to call the run method with a code snippet and documentation await WriteCodeReview().run( code_path="/tmp/game.js", - design_doc_path="/tmp/design_doc.json", - task_doc_path="/tmp/task_doc.json" + design_doc_input="/tmp/design_doc.json", + task_doc_input='{"Required packages":["No third-party dependencies required"], ...}' ) """ - if not design_doc_path or not task_doc_path: + if not design_doc_input or not task_doc_input: return - code, design_doc, task_doc = await asyncio.gather( - aread(code_path), aread(design_doc_path), aread(task_doc_path) - ) + code = await aread(code_path) + + # Check if design_doc_input and task_doc_input are paths or content, and read if they are paths + if os.path.exists(design_doc_input): + logger.info(f"read from {design_doc_input}") + design_doc_input = await aread(design_doc_input) + + if os.path.exists(task_doc_input): + logger.info(f"read from {task_doc_input}") + task_doc_input = await aread(task_doc_input) context = "\n".join( [ - "## System Design\n" + design_doc + "\n", - "## Task\n" + task_doc + "\n", + "## System Design\n" + design_doc_input + "\n", + "## Task\n" + task_doc_input + "\n", ] ) From 76f0d5aad825cd7ba2138150fdc78baa8bc9489f Mon Sep 17 00:00:00 2001 From: hongjiongteng Date: Thu, 20 Jun 2024 12:31:38 +0800 Subject: [PATCH 08/30] update comment --- metagpt/actions/di/rewrite_code.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metagpt/actions/di/rewrite_code.py b/metagpt/actions/di/rewrite_code.py index ea093b36c..504ff72c8 100644 --- a/metagpt/actions/di/rewrite_code.py +++ b/metagpt/actions/di/rewrite_code.py @@ -39,7 +39,7 @@ class RewriteCode(Action): Example Usage: # Example of how to call the run method with a code snippet and documentation - await WriteCodeReview().run( + await RewriteCode().run( code_path="/tmp/game.js", design_doc_input="/tmp/design_doc.json", task_doc_input='{"Required packages":["No third-party dependencies required"], ...}' From 896c7d8d4cd774b644b0bbddc0c0bcc13bf6c818 Mon Sep 17 00:00:00 2001 From: hongjiongteng Date: Thu, 20 Jun 2024 13:03:51 +0800 Subject: [PATCH 09/30] rewrite_code accept content or file_path of design doc --- metagpt/actions/di/rewrite_code.py | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/metagpt/actions/di/rewrite_code.py b/metagpt/actions/di/rewrite_code.py index 504ff72c8..0b03b534e 100644 --- a/metagpt/actions/di/rewrite_code.py +++ b/metagpt/actions/di/rewrite_code.py @@ -1,3 +1,4 @@ +import asyncio import os from tenacity import retry, stop_after_attempt, wait_random_exponential @@ -45,24 +46,18 @@ class RewriteCode(Action): task_doc_input='{"Required packages":["No third-party dependencies required"], ...}' ) """ + if not design_doc_input or not task_doc_input: return - code = await aread(code_path) - - # Check if design_doc_input and task_doc_input are paths or content, and read if they are paths - if os.path.exists(design_doc_input): - logger.info(f"read from {design_doc_input}") - design_doc_input = await aread(design_doc_input) - - if os.path.exists(task_doc_input): - logger.info(f"read from {task_doc_input}") - task_doc_input = await aread(task_doc_input) + code, design_doc, task_doc = await asyncio.gather( + aread(code_path), self._try_aread(design_doc_input), self._try_aread(task_doc_input) + ) context = "\n".join( [ - "## System Design\n" + design_doc_input + "\n", - "## Task\n" + task_doc_input + "\n", + "## System Design\n" + design_doc + "\n", + "## Task\n" + task_doc + "\n", ] ) @@ -97,3 +92,12 @@ class RewriteCode(Action): code_rsp = await self._aask(rewrite_prompt) code = CodeParser.parse_code(block="", text=code_rsp) return result, code + + @staticmethod + async def _try_aread(input: str) -> str: + """Try to read from the path if it's a file; return input directly if not.""" + + if os.path.exists(input): + return await aread(input) + + return input From 6c4cb765c78d1e58c69b6a9bc2a80aa3f515f470 Mon Sep 17 00:00:00 2001 From: hongjiongteng Date: Thu, 20 Jun 2024 14:54:01 +0800 Subject: [PATCH 10/30] reuse write_code_review_and_rewrite --- metagpt/actions/di/rewrite_code.py | 35 ++++++++++++++---------------- 1 file changed, 16 insertions(+), 19 deletions(-) diff --git a/metagpt/actions/di/rewrite_code.py b/metagpt/actions/di/rewrite_code.py index 0b03b534e..659635ad5 100644 --- a/metagpt/actions/di/rewrite_code.py +++ b/metagpt/actions/di/rewrite_code.py @@ -1,18 +1,18 @@ import asyncio import os - -from tenacity import retry, stop_after_attempt, wait_random_exponential +from pathlib import Path from metagpt.actions.action import Action from metagpt.actions.write_code_review import ( EXAMPLE_AND_INSTRUCTION, FORMAT_EXAMPLE, PROMPT_TEMPLATE, - REWRITE_CODE_TEMPLATE, + WriteCodeReview, ) from metagpt.logs import logger +from metagpt.schema import CodingContext, Document from metagpt.tools.tool_registry import register_tool -from metagpt.utils.common import CodeParser, aread, awrite +from metagpt.utils.common import aread, awrite @register_tool(tags=["RewriteCode"], include_functions=["run"]) @@ -53,6 +53,8 @@ class RewriteCode(Action): code, design_doc, task_doc = await asyncio.gather( aread(code_path), self._try_aread(design_doc_input), self._try_aread(task_doc_input) ) + code_doc = self._create_code_doc(code_path=code_path, code=code) + reviewer = WriteCodeReview(i_context=CodingContext(filename=code_doc.filename)) context = "\n".join( [ @@ -67,8 +69,8 @@ class RewriteCode(Action): format_example=FORMAT_EXAMPLE.format(filename=code_path), ) logger.info(f"The {i+1}th time to CodeReview: {code_path}.") - result, rewrited_code = await self.write_code_review_and_rewrite( - context_prompt, cr_prompt, filename=code_path + result, rewrited_code = await reviewer.write_code_review_and_rewrite( + context_prompt, cr_prompt, doc=code_doc ) if "LBTM" in result: @@ -80,19 +82,6 @@ class RewriteCode(Action): return code - @retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6)) - async def write_code_review_and_rewrite(self, context_prompt: str, cr_prompt: str, filename: str): - cr_rsp = await self._aask(context_prompt + cr_prompt) - result = CodeParser.parse_block("Code Review Result", cr_rsp) - if "LGTM" in result: - return result, None - - # if LBTM, rewrite code - rewrite_prompt = f"{context_prompt}\n{cr_rsp}\n{REWRITE_CODE_TEMPLATE.format(filename=filename)}" - code_rsp = await self._aask(rewrite_prompt) - code = CodeParser.parse_code(block="", text=code_rsp) - return result, code - @staticmethod async def _try_aread(input: str) -> str: """Try to read from the path if it's a file; return input directly if not.""" @@ -101,3 +90,11 @@ class RewriteCode(Action): return await aread(input) return input + + @staticmethod + def _create_code_doc(code_path: str, code: str) -> Document: + """Create a Document to represent the code doc.""" + + path = Path(code_path) + + return Document(root_path=str(path.parent), filename=path.name, content=code) From 050bec2ccfcb23f21ef1170f2df5a6881a927f27 Mon Sep 17 00:00:00 2001 From: hongjiongteng Date: Thu, 20 Jun 2024 15:40:56 +0800 Subject: [PATCH 11/30] update prompt --- metagpt/prompts/di/engineer2.py | 4 ++-- metagpt/roles/di/engineer2.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/metagpt/prompts/di/engineer2.py b/metagpt/prompts/di/engineer2.py index eab8b44ff..102792001 100644 --- a/metagpt/prompts/di/engineer2.py +++ b/metagpt/prompts/di/engineer2.py @@ -4,12 +4,12 @@ EXTRA_INSTRUCTION = """ 4. Each time you write a code in your response, write with the Editor directly without preparing a repetitive code block beforehand. 5. Take on ONE task and write ONE code file in each response. DON'T attempt all tasks in one response. 6. When not specified, you should write files in a folder named "src". If you know the project path, then write in a "src" folder under the project path. -7. When provided system design or project schedule, read them first, then adhere to them in your implementation. +7. When provided system design or project schedule, read them first, then adhere to them in your implementation, especially in the programming language, package, or framework. You MUST implement all code files prescribed in the system design or project schedule. You can create a plan first with each task corresponding to implementing one code file. 8. Write at most one file per task, do your best to implement THE ONLY ONE FILE. CAREFULLY CHECK THAT YOU DONT MISS ANY NECESSARY CLASS/FUNCTION IN THIS FILE. 9. COMPLETE CODE: Your code will be part of the entire project, so please implement complete, reliable, reusable code snippets. 10. When provided system design, YOU MUST FOLLOW "Data structures and interfaces". DONT CHANGE ANY DESIGN. Do not use public member functions that do not exist in your design. 11. Write out EVERY CODE DETAIL, DON'T LEAVE TODO. -12. To modify code in a file, read the entire file, make changes, and use Editor.write instead of Editor.write_content to update the file with the complete code. +12. To modify code in a file, read the entire file, make changes, and update the file with the complete code. 13. Revise task is to use RewriteCode.run to correct code. 14. Only When provided system design, at the end of the plan, add a Revise task for each file; for example, if there are three files, add three Revise tasks. """ diff --git a/metagpt/roles/di/engineer2.py b/metagpt/roles/di/engineer2.py index 2720364ef..152c62ca2 100644 --- a/metagpt/roles/di/engineer2.py +++ b/metagpt/roles/di/engineer2.py @@ -11,7 +11,7 @@ class Engineer2(RoleZero): goal: str = "Take on game, app, and web development" instruction: str = ENGINEER2_INSTRUCTION - tools: str = ["Plan", "Editor:write,read,write_content", "RoleZero", "RewriteCode"] + tools: str = ["Plan", "Editor:write,read", "RoleZero", "RewriteCode"] def _update_tool_execution(self): rewrite_code = RewriteCode() From 97e9409eadb6ef2356f34f57cb825e3058d09faf Mon Sep 17 00:00:00 2001 From: lidanyang Date: Thu, 20 Jun 2024 16:42:23 +0800 Subject: [PATCH 12/30] restrict to json lang --- metagpt/roles/di/data_analyst.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metagpt/roles/di/data_analyst.py b/metagpt/roles/di/data_analyst.py index 71e6bacd4..d4d67742b 100644 --- a/metagpt/roles/di/data_analyst.py +++ b/metagpt/roles/di/data_analyst.py @@ -83,7 +83,7 @@ class DataAnalyst(DataInterpreter): # print(*context, sep="\n" + "*" * 5 + "\n") async with ThoughtReporter(enable_llm_stream=True): rsp = await self.llm.aask(context) - self.commands = json.loads(CodeParser.parse_code(block=None, text=rsp)) + self.commands = json.loads(CodeParser.parse_code(block=None, lang='json', text=rsp)) self.rc.working_memory.add(Message(content=rsp, role="assistant")) await run_commands(self, self.commands, self.rc.working_memory) From a48ccfdcf9fa7b9ebfaeb69bea7d6675aead4dc0 Mon Sep 17 00:00:00 2001 From: hongjiongteng Date: Thu, 20 Jun 2024 20:09:25 +0800 Subject: [PATCH 13/30] move RewriteCode to actions/write_code_review.py --- metagpt/actions/di/rewrite_code.py | 100 --------------------------- metagpt/actions/write_code_review.py | 91 +++++++++++++++++++++++- metagpt/prompts/di/engineer2.py | 3 +- metagpt/roles/di/engineer2.py | 2 +- 4 files changed, 92 insertions(+), 104 deletions(-) delete mode 100644 metagpt/actions/di/rewrite_code.py diff --git a/metagpt/actions/di/rewrite_code.py b/metagpt/actions/di/rewrite_code.py deleted file mode 100644 index 659635ad5..000000000 --- a/metagpt/actions/di/rewrite_code.py +++ /dev/null @@ -1,100 +0,0 @@ -import asyncio -import os -from pathlib import Path - -from metagpt.actions.action import Action -from metagpt.actions.write_code_review import ( - EXAMPLE_AND_INSTRUCTION, - FORMAT_EXAMPLE, - PROMPT_TEMPLATE, - WriteCodeReview, -) -from metagpt.logs import logger -from metagpt.schema import CodingContext, Document -from metagpt.tools.tool_registry import register_tool -from metagpt.utils.common import aread, awrite - - -@register_tool(tags=["RewriteCode"], include_functions=["run"]) -class RewriteCode(Action): - """Accordding design doc and task doc to review the code, to make the complete and correct code.""" - - name: str = "RewriteCode" - - async def run( - self, code_path: str, design_doc_input: str = "", task_doc_input: str = "", code_review_k_times: int = 2 - ) -> str: - """Reviews the provided code based on the accompanying design and task documentation, return the complete and correct code. - - Read the code from `code_path`, and write the final code to `code_path`. - If there is no `design_doc_input` or `task_doc_input`, it will return and do nothing. - - Args: - code_path (str): The file path of the code snippet to be reviewed. This should be a string containing the path to the source code file. - design_doc_input (str): Content or file path of the design document associated with the code. This should describe the system architecture, used in the code. It helps provide context for the review process. - task_doc_input (str): Content or file path of the task document describing what the code is intended to accomplish. This should outline the functional requirements or objectives of the code. - code_review_k_times (int, optional): The number of iterations for reviewing and potentially rewriting the code. Defaults to 2. - - Returns: - str: The potentially corrected or approved code after review. - - Example Usage: - # Example of how to call the run method with a code snippet and documentation - await RewriteCode().run( - code_path="/tmp/game.js", - design_doc_input="/tmp/design_doc.json", - task_doc_input='{"Required packages":["No third-party dependencies required"], ...}' - ) - """ - - if not design_doc_input or not task_doc_input: - return - - code, design_doc, task_doc = await asyncio.gather( - aread(code_path), self._try_aread(design_doc_input), self._try_aread(task_doc_input) - ) - code_doc = self._create_code_doc(code_path=code_path, code=code) - reviewer = WriteCodeReview(i_context=CodingContext(filename=code_doc.filename)) - - context = "\n".join( - [ - "## System Design\n" + design_doc + "\n", - "## Task\n" + task_doc + "\n", - ] - ) - - for i in range(code_review_k_times): - context_prompt = PROMPT_TEMPLATE.format(context=context, code=code, filename=code_path) - cr_prompt = EXAMPLE_AND_INSTRUCTION.format( - format_example=FORMAT_EXAMPLE.format(filename=code_path), - ) - logger.info(f"The {i+1}th time to CodeReview: {code_path}.") - result, rewrited_code = await reviewer.write_code_review_and_rewrite( - context_prompt, cr_prompt, doc=code_doc - ) - - if "LBTM" in result: - code = rewrited_code - elif "LGTM" in result: - break - - await awrite(filename=code_path, data=code) - - return code - - @staticmethod - async def _try_aread(input: str) -> str: - """Try to read from the path if it's a file; return input directly if not.""" - - if os.path.exists(input): - return await aread(input) - - return input - - @staticmethod - def _create_code_doc(code_path: str, code: str) -> Document: - """Create a Document to represent the code doc.""" - - path = Path(code_path) - - return Document(root_path=str(path.parent), filename=path.name, content=code) diff --git a/metagpt/actions/write_code_review.py b/metagpt/actions/write_code_review.py index ad99de2dd..fce29c5bc 100644 --- a/metagpt/actions/write_code_review.py +++ b/metagpt/actions/write_code_review.py @@ -7,6 +7,9 @@ @Modified By: mashenquan, 2023/11/27. Following the think-act principle, solidify the task parameters when creating the WriteCode object, rather than passing them in when calling the run function. """ +import asyncio +import os +from pathlib import Path from typing import Optional from pydantic import BaseModel, Field @@ -16,7 +19,8 @@ from metagpt.actions import WriteCode from metagpt.actions.action import Action from metagpt.logs import logger from metagpt.schema import CodingContext, Document -from metagpt.utils.common import CodeParser +from metagpt.tools.tool_registry import register_tool +from metagpt.utils.common import CodeParser, aread, awrite from metagpt.utils.project_repo import ProjectRepo from metagpt.utils.report import EditorReporter @@ -205,3 +209,88 @@ class WriteCodeReview(Action): # 如果rewrited_code是None(原code perfect),那么直接返回code self.i_context.code_doc.content = iterative_code return self.i_context + + +@register_tool(tags=["CodeReview"], include_functions=["run"]) +class RewriteCode(Action): + """Accordding design doc and task doc to review the code, to make the complete and correct code.""" + + name: str = "RewriteCode" + + async def run( + self, code_path: str, design_doc_input: str = "", task_doc_input: str = "", code_review_k_times: int = 2 + ) -> str: + """Reviews the provided code based on the accompanying design and task documentation, return the complete and correct code. + + Read the code from `code_path`, and write the final code to `code_path`. + If there is no `design_doc_input` or `task_doc_input`, it will return and do nothing. + + Args: + code_path (str): The file path of the code snippet to be reviewed. This should be a string containing the path to the source code file. + design_doc_input (str): Content or file path of the design document associated with the code. This should describe the system architecture, used in the code. It helps provide context for the review process. + task_doc_input (str): Content or file path of the task document describing what the code is intended to accomplish. This should outline the functional requirements or objectives of the code. + code_review_k_times (int, optional): The number of iterations for reviewing and potentially rewriting the code. Defaults to 2. + + Returns: + str: The potentially corrected or approved code after review. + + Example Usage: + # Example of how to call the run method with a code snippet and documentation + await RewriteCode().run( + code_path="/tmp/game.js", + design_doc_input="/tmp/design_doc.json", + task_doc_input='{"Required packages":["No third-party dependencies required"], ...}' + ) + """ + + if not design_doc_input or not task_doc_input: + return + + code, design_doc, task_doc = await asyncio.gather( + aread(code_path), self._try_aread(design_doc_input), self._try_aread(task_doc_input) + ) + code_doc = self._create_code_doc(code_path=code_path, code=code) + reviewer = WriteCodeReview(i_context=CodingContext(filename=code_doc.filename)) + + context = "\n".join( + [ + "## System Design\n" + design_doc + "\n", + "## Task\n" + task_doc + "\n", + ] + ) + + for i in range(code_review_k_times): + context_prompt = PROMPT_TEMPLATE.format(context=context, code=code, filename=code_path) + cr_prompt = EXAMPLE_AND_INSTRUCTION.format( + format_example=FORMAT_EXAMPLE.format(filename=code_path), + ) + logger.info(f"The {i+1}th time to CodeReview: {code_path}.") + result, rewrited_code = await reviewer.write_code_review_and_rewrite( + context_prompt, cr_prompt, doc=code_doc + ) + + if "LBTM" in result: + code = rewrited_code + elif "LGTM" in result: + break + + await awrite(filename=code_path, data=code) + + return code + + @staticmethod + async def _try_aread(input: str) -> str: + """Try to read from the path if it's a file; return input directly if not.""" + + if os.path.exists(input): + return await aread(input) + + return input + + @staticmethod + def _create_code_doc(code_path: str, code: str) -> Document: + """Create a Document to represent the code doc.""" + + path = Path(code_path) + + return Document(root_path=str(path.parent), filename=path.name, content=code) diff --git a/metagpt/prompts/di/engineer2.py b/metagpt/prompts/di/engineer2.py index 102792001..3e8c3723b 100644 --- a/metagpt/prompts/di/engineer2.py +++ b/metagpt/prompts/di/engineer2.py @@ -10,8 +10,7 @@ EXTRA_INSTRUCTION = """ 10. When provided system design, YOU MUST FOLLOW "Data structures and interfaces". DONT CHANGE ANY DESIGN. Do not use public member functions that do not exist in your design. 11. Write out EVERY CODE DETAIL, DON'T LEAVE TODO. 12. To modify code in a file, read the entire file, make changes, and update the file with the complete code. -13. Revise task is to use RewriteCode.run to correct code. -14. Only When provided system design, at the end of the plan, add a Revise task for each file; for example, if there are three files, add three Revise tasks. +13. Only with a system design, at the end of the plan, add a CodeReview Task for each file; for example, if there are three files, add three CodeReview Tasks. """ diff --git a/metagpt/roles/di/engineer2.py b/metagpt/roles/di/engineer2.py index 152c62ca2..107101e82 100644 --- a/metagpt/roles/di/engineer2.py +++ b/metagpt/roles/di/engineer2.py @@ -1,6 +1,6 @@ from __future__ import annotations -from metagpt.actions.di.rewrite_code import RewriteCode +from metagpt.actions.write_code_review import RewriteCode from metagpt.prompts.di.engineer2 import ENGINEER2_INSTRUCTION from metagpt.roles.di.role_zero import RoleZero From bd3b4dc8c0744a22bdaf38a59b26752caae9f7e9 Mon Sep 17 00:00:00 2001 From: hongjiongteng Date: Fri, 21 Jun 2024 15:25:00 +0800 Subject: [PATCH 14/30] update action name --- metagpt/actions/write_code_review.py | 23 ++++++++++++----------- metagpt/prompts/di/engineer2.py | 4 ++-- metagpt/roles/di/engineer2.py | 10 +++++----- 3 files changed, 19 insertions(+), 18 deletions(-) diff --git a/metagpt/actions/write_code_review.py b/metagpt/actions/write_code_review.py index fce29c5bc..62188786d 100644 --- a/metagpt/actions/write_code_review.py +++ b/metagpt/actions/write_code_review.py @@ -211,11 +211,11 @@ class WriteCodeReview(Action): return self.i_context -@register_tool(tags=["CodeReview"], include_functions=["run"]) -class RewriteCode(Action): - """Accordding design doc and task doc to review the code, to make the complete and correct code.""" +@register_tool(include_functions=["run"]) +class ReviewAndRewriteCode(Action): + """According to the design and task documents, review the code to ensure it is complete and correct.""" - name: str = "RewriteCode" + name: str = "ReviewAndRewriteCode" async def run( self, code_path: str, design_doc_input: str = "", task_doc_input: str = "", code_review_k_times: int = 2 @@ -223,7 +223,7 @@ class RewriteCode(Action): """Reviews the provided code based on the accompanying design and task documentation, return the complete and correct code. Read the code from `code_path`, and write the final code to `code_path`. - If there is no `design_doc_input` or `task_doc_input`, it will return and do nothing. + If both `design_doc_input` and `task_doc_input are absent`, it will return and do nothing. Args: code_path (str): The file path of the code snippet to be reviewed. This should be a string containing the path to the source code file. @@ -236,21 +236,22 @@ class RewriteCode(Action): Example Usage: # Example of how to call the run method with a code snippet and documentation - await RewriteCode().run( + await ReviewAndRewriteCode().run( code_path="/tmp/game.js", - design_doc_input="/tmp/design_doc.json", - task_doc_input='{"Required packages":["No third-party dependencies required"], ...}' + design_doc_input="/tmp/system_design.json", + task_doc_input="/tmp/project_task_list.json" ) """ - if not design_doc_input or not task_doc_input: + if not design_doc_input and not task_doc_input: + logger.info("Both design_doc_input and task_doc_input are absent, ReviewAndRewriteCode will do nothing.") return code, design_doc, task_doc = await asyncio.gather( aread(code_path), self._try_aread(design_doc_input), self._try_aread(task_doc_input) ) code_doc = self._create_code_doc(code_path=code_path, code=code) - reviewer = WriteCodeReview(i_context=CodingContext(filename=code_doc.filename)) + review_action = WriteCodeReview(i_context=CodingContext(filename=code_doc.filename)) context = "\n".join( [ @@ -265,7 +266,7 @@ class RewriteCode(Action): format_example=FORMAT_EXAMPLE.format(filename=code_path), ) logger.info(f"The {i+1}th time to CodeReview: {code_path}.") - result, rewrited_code = await reviewer.write_code_review_and_rewrite( + result, rewrited_code = await review_action.write_code_review_and_rewrite( context_prompt, cr_prompt, doc=code_doc ) diff --git a/metagpt/prompts/di/engineer2.py b/metagpt/prompts/di/engineer2.py index 3e8c3723b..08259d380 100644 --- a/metagpt/prompts/di/engineer2.py +++ b/metagpt/prompts/di/engineer2.py @@ -4,13 +4,13 @@ EXTRA_INSTRUCTION = """ 4. Each time you write a code in your response, write with the Editor directly without preparing a repetitive code block beforehand. 5. Take on ONE task and write ONE code file in each response. DON'T attempt all tasks in one response. 6. When not specified, you should write files in a folder named "src". If you know the project path, then write in a "src" folder under the project path. -7. When provided system design or project schedule, read them first, then adhere to them in your implementation, especially in the programming language, package, or framework. You MUST implement all code files prescribed in the system design or project schedule. You can create a plan first with each task corresponding to implementing one code file. +7. When provided system design or project schedule, you MUST read them first before making a plan, then adhere to them in your implementation, especially in the programming language, package, or framework. You MUST implement all code files prescribed in the system design or project schedule. You can create a plan first with each task corresponding to implementing one code file. 8. Write at most one file per task, do your best to implement THE ONLY ONE FILE. CAREFULLY CHECK THAT YOU DONT MISS ANY NECESSARY CLASS/FUNCTION IN THIS FILE. 9. COMPLETE CODE: Your code will be part of the entire project, so please implement complete, reliable, reusable code snippets. 10. When provided system design, YOU MUST FOLLOW "Data structures and interfaces". DONT CHANGE ANY DESIGN. Do not use public member functions that do not exist in your design. 11. Write out EVERY CODE DETAIL, DON'T LEAVE TODO. 12. To modify code in a file, read the entire file, make changes, and update the file with the complete code. -13. Only with a system design, at the end of the plan, add a CodeReview Task for each file; for example, if there are three files, add three CodeReview Tasks. +13. Only when a system design or a project schedule is provided, at the end of the plan, add a ReviewAndRewriteCode Task for each file; for example, if there are three files, add three ReviewAndRewriteCode Tasks. """ diff --git a/metagpt/roles/di/engineer2.py b/metagpt/roles/di/engineer2.py index 107101e82..845dd8960 100644 --- a/metagpt/roles/di/engineer2.py +++ b/metagpt/roles/di/engineer2.py @@ -1,6 +1,6 @@ from __future__ import annotations -from metagpt.actions.write_code_review import RewriteCode +from metagpt.actions.write_code_review import ReviewAndRewriteCode from metagpt.prompts.di.engineer2 import ENGINEER2_INSTRUCTION from metagpt.roles.di.role_zero import RoleZero @@ -11,14 +11,14 @@ class Engineer2(RoleZero): goal: str = "Take on game, app, and web development" instruction: str = ENGINEER2_INSTRUCTION - tools: str = ["Plan", "Editor:write,read", "RoleZero", "RewriteCode"] + tools: str = ["Plan", "Editor:write,read", "RoleZero", "ReviewAndRewriteCode"] def _update_tool_execution(self): - rewrite_code = RewriteCode() + review = ReviewAndRewriteCode() self.tool_execution_map.update( { - "RewriteCode.run": rewrite_code.run, - "RewriteCode": rewrite_code.run, + "ReviewAndRewriteCode.run": review.run, + "ReviewAndRewriteCode": review.run, } ) From 0f65ff5cb07d91a080298a3dafa8091055db4a4b Mon Sep 17 00:00:00 2001 From: hongjiongteng Date: Fri, 21 Jun 2024 16:31:10 +0800 Subject: [PATCH 15/30] change parameter names --- metagpt/actions/write_code_review.py | 24 +++++++++++++++--------- metagpt/prompts/di/engineer2.py | 2 +- 2 files changed, 16 insertions(+), 10 deletions(-) diff --git a/metagpt/actions/write_code_review.py b/metagpt/actions/write_code_review.py index 62188786d..3740a80fe 100644 --- a/metagpt/actions/write_code_review.py +++ b/metagpt/actions/write_code_review.py @@ -218,17 +218,21 @@ class ReviewAndRewriteCode(Action): name: str = "ReviewAndRewriteCode" async def run( - self, code_path: str, design_doc_input: str = "", task_doc_input: str = "", code_review_k_times: int = 2 + self, + code_path: str, + system_design_input: str = "", + project_schedule_input: str = "", + code_review_k_times: int = 2, ) -> str: """Reviews the provided code based on the accompanying design and task documentation, return the complete and correct code. Read the code from `code_path`, and write the final code to `code_path`. - If both `design_doc_input` and `task_doc_input are absent`, it will return and do nothing. + If both `system_design_input` and `project_schedule_input are absent`, it will return and do nothing. Args: code_path (str): The file path of the code snippet to be reviewed. This should be a string containing the path to the source code file. - design_doc_input (str): Content or file path of the design document associated with the code. This should describe the system architecture, used in the code. It helps provide context for the review process. - task_doc_input (str): Content or file path of the task document describing what the code is intended to accomplish. This should outline the functional requirements or objectives of the code. + system_design_input (str): Content or file path of the design document associated with the code. This should describe the system architecture, used in the code. It helps provide context for the review process. + project_schedule_input (str): Content or file path of the task document describing what the code is intended to accomplish. This should outline the functional requirements or objectives of the code. code_review_k_times (int, optional): The number of iterations for reviewing and potentially rewriting the code. Defaults to 2. Returns: @@ -238,17 +242,19 @@ class ReviewAndRewriteCode(Action): # Example of how to call the run method with a code snippet and documentation await ReviewAndRewriteCode().run( code_path="/tmp/game.js", - design_doc_input="/tmp/system_design.json", - task_doc_input="/tmp/project_task_list.json" + system_design_input="/tmp/system_design.json", + project_schedule_input="/tmp/project_task_list.json" ) """ - if not design_doc_input and not task_doc_input: - logger.info("Both design_doc_input and task_doc_input are absent, ReviewAndRewriteCode will do nothing.") + if not system_design_input and not project_schedule_input: + logger.info( + "Both `system_design_input` and `project_schedule_input` are absent, ReviewAndRewriteCode will do nothing." + ) return code, design_doc, task_doc = await asyncio.gather( - aread(code_path), self._try_aread(design_doc_input), self._try_aread(task_doc_input) + aread(code_path), self._try_aread(system_design_input), self._try_aread(project_schedule_input) ) code_doc = self._create_code_doc(code_path=code_path, code=code) review_action = WriteCodeReview(i_context=CodingContext(filename=code_doc.filename)) diff --git a/metagpt/prompts/di/engineer2.py b/metagpt/prompts/di/engineer2.py index 08259d380..54b68416a 100644 --- a/metagpt/prompts/di/engineer2.py +++ b/metagpt/prompts/di/engineer2.py @@ -10,7 +10,7 @@ EXTRA_INSTRUCTION = """ 10. When provided system design, YOU MUST FOLLOW "Data structures and interfaces". DONT CHANGE ANY DESIGN. Do not use public member functions that do not exist in your design. 11. Write out EVERY CODE DETAIL, DON'T LEAVE TODO. 12. To modify code in a file, read the entire file, make changes, and update the file with the complete code. -13. Only when a system design or a project schedule is provided, at the end of the plan, add a ReviewAndRewriteCode Task for each file; for example, if there are three files, add three ReviewAndRewriteCode Tasks. +13. If a system design or project schedule is provided, at the end of the plan, add a ReviewAndRewriteCode Task for each file; for example, if there are three files, add three ReviewAndRewriteCode Tasks. """ From 17e606e1c313aa9aa56548c16075115c219b41f5 Mon Sep 17 00:00:00 2001 From: hongjiongteng Date: Fri, 21 Jun 2024 18:13:47 +0800 Subject: [PATCH 16/30] update prompt --- metagpt/prompts/di/engineer2.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metagpt/prompts/di/engineer2.py b/metagpt/prompts/di/engineer2.py index 54b68416a..8a7a48cc2 100644 --- a/metagpt/prompts/di/engineer2.py +++ b/metagpt/prompts/di/engineer2.py @@ -10,7 +10,7 @@ EXTRA_INSTRUCTION = """ 10. When provided system design, YOU MUST FOLLOW "Data structures and interfaces". DONT CHANGE ANY DESIGN. Do not use public member functions that do not exist in your design. 11. Write out EVERY CODE DETAIL, DON'T LEAVE TODO. 12. To modify code in a file, read the entire file, make changes, and update the file with the complete code. -13. If a system design or project schedule is provided, at the end of the plan, add a ReviewAndRewriteCode Task for each file; for example, if there are three files, add three ReviewAndRewriteCode Tasks. +13. If a system design or project schedule is provided, at the end of the plan, add a CodeReview Task for each file; for example, if there are three files, add three CodeReview Tasks; Each CodeReview Task should execute ReviewAndRewriteCode.run to review and update the code. """ From 7f732799698acc0f7727bde75a359df083d0ec6e Mon Sep 17 00:00:00 2001 From: hongjiongteng Date: Fri, 21 Jun 2024 19:56:16 +0800 Subject: [PATCH 17/30] update prompt --- metagpt/prompts/di/engineer2.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/metagpt/prompts/di/engineer2.py b/metagpt/prompts/di/engineer2.py index 8a7a48cc2..5caa5e1a9 100644 --- a/metagpt/prompts/di/engineer2.py +++ b/metagpt/prompts/di/engineer2.py @@ -9,8 +9,8 @@ EXTRA_INSTRUCTION = """ 9. COMPLETE CODE: Your code will be part of the entire project, so please implement complete, reliable, reusable code snippets. 10. When provided system design, YOU MUST FOLLOW "Data structures and interfaces". DONT CHANGE ANY DESIGN. Do not use public member functions that do not exist in your design. 11. Write out EVERY CODE DETAIL, DON'T LEAVE TODO. -12. To modify code in a file, read the entire file, make changes, and update the file with the complete code. -13. If a system design or project schedule is provided, at the end of the plan, add a CodeReview Task for each file; for example, if there are three files, add three CodeReview Tasks; Each CodeReview Task should execute ReviewAndRewriteCode.run to review and update the code. +12. To modify code in a file, read the entire file, make changes, and update the file with the complete code, ensuring that no line numbers are included in the final write. +13. Only if a system design or project schedule is provided, at the end of the plan, add a CodeReview Task for each file; for example, if there are three files, add three CodeReview Tasks; the CodeReview Task involves using ReviewAndRewriteCode to correct the code. """ From 9454c4dfd644507fa771cd83e266f9c1485f0864 Mon Sep 17 00:00:00 2001 From: hongjiongteng Date: Sun, 23 Jun 2024 23:01:25 +0800 Subject: [PATCH 18/30] update prompt --- metagpt/actions/write_code_review.py | 2 +- metagpt/prompts/di/engineer2.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/metagpt/actions/write_code_review.py b/metagpt/actions/write_code_review.py index 3740a80fe..e72fe5cd1 100644 --- a/metagpt/actions/write_code_review.py +++ b/metagpt/actions/write_code_review.py @@ -224,7 +224,7 @@ class ReviewAndRewriteCode(Action): project_schedule_input: str = "", code_review_k_times: int = 2, ) -> str: - """Reviews the provided code based on the accompanying design and task documentation, return the complete and correct code. + """Reviews the provided code based on the accompanying system design and project schedule documentation, return the complete and correct code. Read the code from `code_path`, and write the final code to `code_path`. If both `system_design_input` and `project_schedule_input are absent`, it will return and do nothing. diff --git a/metagpt/prompts/di/engineer2.py b/metagpt/prompts/di/engineer2.py index 5caa5e1a9..4ac386b80 100644 --- a/metagpt/prompts/di/engineer2.py +++ b/metagpt/prompts/di/engineer2.py @@ -10,7 +10,7 @@ EXTRA_INSTRUCTION = """ 10. When provided system design, YOU MUST FOLLOW "Data structures and interfaces". DONT CHANGE ANY DESIGN. Do not use public member functions that do not exist in your design. 11. Write out EVERY CODE DETAIL, DON'T LEAVE TODO. 12. To modify code in a file, read the entire file, make changes, and update the file with the complete code, ensuring that no line numbers are included in the final write. -13. Only if a system design or project schedule is provided, at the end of the plan, add a CodeReview Task for each file; for example, if there are three files, add three CodeReview Tasks; the CodeReview Task involves using ReviewAndRewriteCode to correct the code. +13. When a system design or project schedule is provided, at the end of the plan, add a Special Task for each file; for example, if there are three files, add three Special Tasks. For each Special Task, just call ReviewAndRewriteCode.run. """ From e44f4739f110f935cb04ffecabc120805a9314ad Mon Sep 17 00:00:00 2001 From: garylin2099 Date: Mon, 24 Jun 2024 10:59:06 +0800 Subject: [PATCH 19/30] add alias to tolerate fault for architect --- metagpt/roles/architect.py | 1 + 1 file changed, 1 insertion(+) diff --git a/metagpt/roles/architect.py b/metagpt/roles/architect.py index afa234a3c..8650f2640 100644 --- a/metagpt/roles/architect.py +++ b/metagpt/roles/architect.py @@ -50,5 +50,6 @@ class Architect(RoleZero): { "WriteDesign.run": wd.run, "WriteDesign": wd.run, # alias + "run": wd.run, # alias } ) From 5d0e99bb5dca191a35a5bf13602940f35d86f6c7 Mon Sep 17 00:00:00 2001 From: hongjiongteng Date: Mon, 24 Jun 2024 21:22:43 +0800 Subject: [PATCH 20/30] add example to engineer --- metagpt/prompts/di/engineer2.py | 2 +- metagpt/roles/di/engineer2.py | 4 + metagpt/strategy/experience_retriever.py | 112 +++++++++++++++++++++++ 3 files changed, 117 insertions(+), 1 deletion(-) diff --git a/metagpt/prompts/di/engineer2.py b/metagpt/prompts/di/engineer2.py index 4ac386b80..f3f2155b6 100644 --- a/metagpt/prompts/di/engineer2.py +++ b/metagpt/prompts/di/engineer2.py @@ -10,7 +10,7 @@ EXTRA_INSTRUCTION = """ 10. When provided system design, YOU MUST FOLLOW "Data structures and interfaces". DONT CHANGE ANY DESIGN. Do not use public member functions that do not exist in your design. 11. Write out EVERY CODE DETAIL, DON'T LEAVE TODO. 12. To modify code in a file, read the entire file, make changes, and update the file with the complete code, ensuring that no line numbers are included in the final write. -13. When a system design or project schedule is provided, at the end of the plan, add a Special Task for each file; for example, if there are three files, add three Special Tasks. For each Special Task, just call ReviewAndRewriteCode.run. +13. When a system design or project schedule is provided, at the end of the plan, add a CodeRview Task for each file; for example, if there are three files, add three CodeRview Tasks. For each CodeRview Task, just call ReviewAndRewriteCode.run. """ diff --git a/metagpt/roles/di/engineer2.py b/metagpt/roles/di/engineer2.py index 845dd8960..8ea823c74 100644 --- a/metagpt/roles/di/engineer2.py +++ b/metagpt/roles/di/engineer2.py @@ -3,6 +3,7 @@ from __future__ import annotations from metagpt.actions.write_code_review import ReviewAndRewriteCode from metagpt.prompts.di.engineer2 import ENGINEER2_INSTRUCTION from metagpt.roles.di.role_zero import RoleZero +from metagpt.strategy.experience_retriever import ENGINEER_EXAMPLE class Engineer2(RoleZero): @@ -22,3 +23,6 @@ class Engineer2(RoleZero): "ReviewAndRewriteCode": review.run, } ) + + def _retrieve_experience(self) -> str: + return ENGINEER_EXAMPLE diff --git a/metagpt/strategy/experience_retriever.py b/metagpt/strategy/experience_retriever.py index 2bb734a8b..5e85b056a 100644 --- a/metagpt/strategy/experience_retriever.py +++ b/metagpt/strategy/experience_retriever.py @@ -336,3 +336,115 @@ editor.read(path="./main.py") - If no specific file is provided, search the symbol in the whole codebase to locate the issue. - If no specific symbol is provided, directly open and read the file to diagnose the problem. """ + +ENGINEER_EXAMPLE = """ +## example 1 +User Requirement: Please implement the core game logic for the 2048 game, including tile movements, merging logic, score tracking, and keyboard interaction. Refer to the project schedule located at '/tmp/project_schedule.json' and the system design document at '/tmp/system_design.json' for detailed information. +Explanation: I will first need to read the system design document and the project schedule to understand the specific requirements and architecture outlined for the game development. + +```json +[ + { + "command_name": "Editor.read", + "args": { + "path": "/tmp/docs/project_schedule.json" + } + }, + { + "command_name": "Editor.read", + "args": { + "path": "/tmp/docs/system_design.json" + } + } +] +``` + +## example 2 +To achieve the goal of writing a 2048 game using JavaScript and HTML without any frameworks, I will create a plan consisting of three tasks, each corresponding to the creation of one of the required files: `index.html`, `style.css`, and `script.js`. Following the completion of these tasks, I will add a code review task for each file to ensure the implementation aligns with the provided system design and project schedule documents. + +Here's the plan: + +1. **Task 1**: Create `index.html` - This file will contain the HTML structure necessary for the game's UI. +2. **Task 2**: Create `style.css` - This file will define the CSS styles to make the game visually appealing and responsive. +3. **Task 3**: Create `script.js` - This file will contain the JavaScript code for the game logic and UI interactions. +4. **Code Review Tasks**: Review each file to ensure they meet the project requirements and adhere to the system design. + +Let's start by appending the first task to the plan. + +```json +[ + { + "command_name": "Plan.append_task", + "args": { + "task_id": "1", + "dependent_task_ids": [], + "instruction": "Create the index.html file with the basic HTML structure for the 2048 game.", + "assignee": "Alex" + } + }, + { + "command_name": "Plan.append_task", + "args": { + "task_id": "2", + "dependent_task_ids": ["1"], + "instruction": "Create the style.css file with the necessary CSS to style the 2048 game.", + "assignee": "Alex" + } + }, + { + "command_name": "Plan.append_task", + "args": { + "task_id": "3", + "dependent_task_ids": ["1", "2"], + "instruction": "Create the script.js file containing the JavaScript logic for the 2048 game.", + "assignee": "Alex" + } + }, + { + "command_name": "Plan.append_task", + "args": { + "task_id": "4", + "dependent_task_ids": ["1"], + "instruction": "Use ReviewAndRewriteCode to review the code in index.html to ensure it meets the design specifications.", + "assignee": "Alex" + } + }, + { + "command_name": "Plan.append_task", + "args": { + "task_id": "5", + "dependent_task_ids": ["2"], + "instruction": "Use ReviewAndRewriteCode to review the code in style.css to ensure it meets the design specifications.", + "assignee": "Alex" + } + }, + { + "command_name": "Plan.append_task", + "args": { + "task_id": "6", + "dependent_task_ids": ["3"], + "instruction": "Use ReviewAndRewriteCode to review the code in script.js to ensure it meets the design specifications. ", + "assignee": "Alex" + } + } +] +``` + +## example 3 +I will now review the code in `script.js`. +Explanation: to review the code, call ReviewAndRewriteCode.run. + +```json +[ + { + "command_name": "ReviewAndRewriteCode.run", + "args": { + "code_path": "/tmp/src/script.js", + "system_design_input": "/tmp/docs/system_design.json", + "project_schedule_input": "/tmp/docs/project_schedule.json", + "code_review_k_times": 2 + } + } +] +``` +""" From f870cbb45b2fe83a793f8f25bfc60301621052c4 Mon Sep 17 00:00:00 2001 From: shenchucheng Date: Mon, 24 Jun 2024 21:53:18 +0800 Subject: [PATCH 21/30] add a11y tree browser tool --- metagpt/roles/di/role_zero.py | 22 +- metagpt/tools/libs/browser.py | 393 ++++++++++++++-------------------- metagpt/utils/a11y_tree.py | 306 ++++++++++++++++++++++++++ 3 files changed, 490 insertions(+), 231 deletions(-) create mode 100644 metagpt/utils/a11y_tree.py diff --git a/metagpt/roles/di/role_zero.py b/metagpt/roles/di/role_zero.py index b5342409f..d60ebb91f 100644 --- a/metagpt/roles/di/role_zero.py +++ b/metagpt/roles/di/role_zero.py @@ -87,6 +87,23 @@ class RoleZero(Role): "RoleZero.ask_human": self.ask_human, "RoleZero.reply_to_human": self.reply_to_human, } + self.tool_execution_map.update( + { + f"Browser.{i}": getattr(self.browser, i) + for i in [ + "click", + "close_tab", + "go_back", + "go_forward", + "goto", + "hover", + "press", + "scroll", + "tab_focus", + "type", + ] + } + ) # can be updated by subclass self._update_tool_execution() return self @@ -125,7 +142,10 @@ class RoleZero(Role): available_commands=tool_info, instruction=self.instruction.strip(), ) - context = self.llm.format_msg(self.rc.memory.get(self.memory_k) + [UserMessage(content=prompt)]) + memory = self.rc.memory.get(self.memory_k) + if not self.browser.is_empty_page: + memory.append(UserMessage(cause_by="browser", content=await self.browser.view())) + context = self.llm.format_msg(memory + [UserMessage(content=prompt)]) # print(*context, sep="\n" + "*" * 5 + "\n") async with ThoughtReporter(enable_llm_stream=True): self.command_rsp = await self.llm.aask(context, system_msgs=self.system_msg) diff --git a/metagpt/tools/libs/browser.py b/metagpt/tools/libs/browser.py index 1b1b3d82d..aa80abb4d 100644 --- a/metagpt/tools/libs/browser.py +++ b/metagpt/tools/libs/browser.py @@ -1,261 +1,194 @@ from __future__ import annotations -import contextlib -from uuid import uuid4 +import time +from typing import Literal, Optional -from playwright.async_api import async_playwright +from playwright.async_api import Browser as _Browser +from playwright.async_api import ( + BrowserContext, + Frame, + Page, + Playwright, + Request, + async_playwright, +) -from metagpt.const import DEFAULT_WORKSPACE_ROOT from metagpt.tools.tool_registry import register_tool -from metagpt.utils.file import MemoryFileSystem -from metagpt.utils.parse_html import simplify_html -from metagpt.utils.report import BrowserReporter +from metagpt.utils.a11y_tree import ( + click_element, + get_accessibility_tree, + get_backend_node_id, + hover_element, + key_press, + parse_accessibility_tree, + scroll_page, + type_text, +) -@register_tool(tags=["web", "browse", "scrape"]) +@register_tool( + tags=["web", "browse", "scrape"], + include_functions=[ + "click", + "close_tab", + "go_back", + "go_forward", + "goto", + "hover", + "press", + "scroll", + "tab_focus", + "type", + ], +) class Browser: - """ - A tool for browsing the web and scraping. Don't initialize a new instance of this class if one already exists. - Note: Combine searching and scrolling together to achieve most effective browsing. DON'T stick to one method. + """A tool for browsing the web. Don't initialize a new instance of this class if one already exists. + + Note: If you plan to use the browser to assist you in completing tasks, then using the browser should be a standalone + task, executing actions each time based on the content seen on the webpage before proceeding to the next step. + + ## Example + Issue: The details of the latest issue in the geekan/MetaGPT repository. + Plan: Use a browser to view the details of the latest issue in the geekan/MetaGPT repository. + Solution: + Let's first open the issue page of the MetaGPT repository with the `Browser.goto` command + + >>> await browser.goto("https://github.com/geekan/MetaGPT/issues") + + From the output webpage, we've identified that the latest issue can be accessed by clicking on the element with ID "1141". + + >>> await browser.click(1141) + + Finally, we have found the webpage for the latest issue, we can close the tab and finish current task. + + >>> await browser.close_tab() """ def __init__(self): - """initiate the browser, create pages placeholder later to be managed as {page_url: page object}""" - self.browser = None + self.playwright: Optional[Playwright] = None + self.browser: Optional[_Browser] = None + self.browser_ctx: Optional[BrowserContext] = None + self.page: Optional[Page] = None + self.accessibility_tree: list = [] + self.headless: bool = True + self.proxy = None + self.is_empty_page = True - # browser status management - self.pages = {} - self.current_page_url = None - self.current_page = None - self.reporter = BrowserReporter() - - async def start(self): + async def start(self) -> None: """Starts Playwright and launches a browser""" - self.playwright = await async_playwright().start() - self.browser = await self.playwright.chromium.launch() + if self.playwright is None: + self.playwright = playwright = await async_playwright().start() + browser = self.browser = await playwright.chromium.launch(headless=self.headless, proxy=self.proxy) + browser_ctx = self.browser_ctx = await browser.new_context() + self.page = await browser_ctx.new_page() - async def _set_current_page(self, page, url): - self.current_page = page - self.current_page_url = url - print("Now on page ", url) - await self._view() + async def stop(self): + if self.playwright: + playwright = self.playwright + self.playwright = None + self.browser = None + self.browser_ctx = None + await playwright.stop() - async def open_new_page(self, url: str, timeout: float = 30000): - """open a new page in the browser and view the page""" - async with self.reporter as reporter: - page = await self.browser.new_page() - await reporter.async_report(url, "url") - await page.goto(url, timeout=timeout) - self.pages[url] = page - await self._set_current_page(page, url) - await reporter.async_report(page, "page") + async def click(self, element_id: int): + """clicks on an element with a specific id on the webpage.""" + await click_element(self.page, get_backend_node_id(element_id, self.accessibility_tree)) + return await self._wait_page() - async def view_page_element_to_scrape(self, requirement: str, keep_links: bool = False) -> None: - """view the HTML content of current page to understand the structure. When executed, the content will be printed out + async def type(self, element_id: int, content: str, press_enter_after: bool = False): + """Use this to type the content into the field with id.""" + if press_enter_after: + content += "\n" + await click_element(self.page, get_backend_node_id(element_id, self.accessibility_tree)) + await type_text(self.page, content) + return await self._wait_page() - Args: - requirement (str): Providing a clear and detailed requirement helps in focusing the inspection on the desired elements. - keep_links (bool): Whether to keep the hyperlinks in the HTML content. Set to True if links are required - """ - html = await self.current_page.content() - html = simplify_html(html, url=self.current_page.url, keep_links=keep_links) - mem_fs = MemoryFileSystem() - filename = f"{uuid4().hex}.html" - with mem_fs.open(filename, "w") as f: - f.write(html) + async def hover(self, element_id: int): + """Hover over an element with id.""" + await hover_element(self.page, get_backend_node_id(element_id, self.accessibility_tree)) + return await self._wait_page() - # Since RAG is an optional optimization, if it fails, the simplified HTML can be used as a fallback. - with contextlib.suppress(Exception): - from metagpt.rag.engines import SimpleEngine # avoid circular import + async def press(self, key_comb: str): + """Simulates the pressing of a key combination on the keyboard (e.g., Ctrl+v).""" + await key_press(self.page, key_comb) + return await self._wait_page() - # TODO make `from_docs` asynchronous - engine = SimpleEngine.from_docs(input_files=[filename], fs=mem_fs) - nodes = await engine.aretrieve(requirement) - html = "\n".join(i.text for i in nodes) + async def scroll(self, direction: Literal["down", "up"]): + """Scroll the page up or down.""" + await scroll_page(self.page, direction) + return await self._wait_page() - mem_fs.rm_file(filename) - print(html) + async def goto(self, url: str, timeout: float = 30000): + """Navigate to a specific URL.""" + await self.page.goto(url, timeout=timeout) + self.is_empty_page = False + return await self._wait_page() - async def get_page_content(self) -> str: - """Get the HTML content of current page.""" - html = await self.current_page.content() - html_content = html.strip() - return html_content + async def go_back(self): + """Navigate to the previously viewed page.""" + await self.page.go_back() + return await self._wait_page() - async def switch_page(self, url: str): - """switch to an opened page in the browser and view the page""" - if url in self.pages: - await self._set_current_page(self.pages[url], url) - await self.reporter.async_report(self.current_page, "page") + async def go_forward(self): + """Navigate to the next page (if a previous 'go_back' action was performed).""" + await self.page.go_forward() + return await self._wait_page() + + async def tab_focus(self, page_number: int): + """Open a new, empty browser tab.""" + page = self.browser_ctx.pages[page_number] + await page.bring_to_front() + return await self._wait_page() + + async def close_tab(self): + """Close the currently active tab.""" + await self.page.close() + if len(self.browser_ctx.pages) > 0: + self.page = self.browser_ctx.pages[-1] else: - print(f"Page not found: {url}") + self.page = await self.browser_ctx.new_page() + self.is_empty_page = True + return await self._wait_page() - async def _view_page_html(self, keep_len: int = 5000) -> str: - """view the HTML content of current page, return the HTML content as a string. When executed, the content will be printed out""" - html = await self.current_page.content() - html_content = html.strip()[:keep_len] - return html_content + async def _wait_page(self): + page = self.page + await self._wait_until_page_idle(page) + self.accessibility_tree = await get_accessibility_tree(page) + return f"SUCCESS, URL: {page.url}" - async def search_content_all(self, search_term: str) -> list[dict]: - """search all occurences of search term in the current page and return the search results with their position. - Useful if you have a keyword or sentence in mind and want to quickly narrow down the content relevant to it. + def _register_page_event(self, page: Page): + page.last_busy_time = time.time() + page.requests = set() + page.on("domcontentloaded", self._update_page_last_busy_time) + page.on("load", self._update_page_last_busy_time) + page.on("request", self._on_page_request) + page.on("requestfailed", self._on_page_requestfinished) + page.on("requestfinished", self._on_page_requestfinished) + page.on("frameattached", self._on_frame_change) + page.on("framenavigated", self._on_frame_change) - Args: - search_term (str): the search term + async def _wait_until_page_idle(self, page) -> None: + if not hasattr(page, "last_busy_time"): + self._register_page_event(page) + else: + page.last_busy_time = time.time() + while time.time() - page.last_busy_time < 0.5: + await page.wait_for_timeout(100) - Returns: - list[dict]: a list of dictionaries containing the elements and their positions, e.g. - [ - { - "index": ..., - "content": { - "text_block": ..., - "links": [ - {"text": ..., "href": ...}, - ... - ] - }, - "position": {from_top: ..., from_left: ...}, - }, - ... - ] - """ - locator = self.current_page.locator(f"text={search_term}") - count = await locator.count() - search_results = [] - for i in range(count): - element = locator.nth(i) - if await element.is_visible(): - position = await element.evaluate("e => ({ from_top: e.offsetTop, from_left: e.offsetLeft })") + async def _update_page_last_busy_time(self, page: Page): + page.last_busy_time = time.time() - # Retrieve the surrounding block of text and links with their text - content = await element.evaluate(SEARCH_CONTENT_JS) + async def _on_page_request(self, request: Request): + page = request.frame.page + page.requests.add(request) + await self._update_page_last_busy_time(page) - search_results.append( - {"index": len(search_results), "content": content, "position": position, "element_obj": element} - ) + async def _on_page_requestfinished(self, request: Request): + request.frame.page.requests.discard(request) - print(f"Found {len(search_results)} instances of the term '{search_term}':\n\n{search_results}") + async def _on_frame_change(self, frame: Frame): + await self._update_page_last_busy_time(frame.page) - return search_results - - async def scroll_to_search_result(self, search_results: list[dict], index: int = 0): - """Scroll to the index-th search result, potentially for subsequent perception. - Useful if you have located a search result, the search result does not fulfill your requirement, and you need more information around that search result. Can only be used after search_all_content. - - Args: - search_results (list[dict]): search_results from search_content_all - index (int, optional): the index of the search result to scroll to. Index starts from 0. Defaults to 0. - """ - if not search_results: - return {} - if index >= len(search_results): - print(f"Index {index} is out of range. Scrolling to the last instance.") - index = len(search_results) - 1 - element = search_results[index]["element_obj"] - await element.scroll_into_view_if_needed() - await self.reporter.async_report(self.current_page, "page") - - print(f"Successfully scrolled to the {index}-th search result") - print(await self._view()) - - # async def find_links(self) -> list: - # """Finds all links in the current page and returns a list of dictionaries with link text and the URL. - # Useful for navigating to more pages and exploring more resources. - - # Returns: - # list: A list of dictionaries, each containing 'text' and 'href' keys. - # """ - # # Use a CSS selector to find all elements in the page. - # links = await self.current_page.query_selector_all("a") - - # # Prepare an empty list to hold link information. - # link_info = [] - - # # Iterate over each link element to extract its text and href attributes. - # for link in links: - # text = await link.text_content() - # href = await link.get_attribute("href") - # link_info.append({"text": text, "href": href}) - - # print(f"Found {len(link_info)} links:\n\n{link_info}") - - # return link_info - - async def screenshot(self, path: str = DEFAULT_WORKSPACE_ROOT / "screenshot_temp.png"): - """Take a screenshot of the current page and save it to the specified path.""" - await self.current_page.screenshot(path=path) - print(f"Screenshot saved to: {path}") - - async def _view(self, keep_len: int = 5000) -> str: - """simulate human viewing the current page, return the visible text with links""" - visible_text_with_links = await self.current_page.evaluate(VIEW_CONTENT_JS) - print("The visible text and their links (if any): ", visible_text_with_links[:keep_len]) - # html_content = await self._view_page_html(keep_len=keep_len) - # print("The html content: ", html_content) - - async def scroll_current_page(self, offset: int = 500): - """scroll the current page by offset pixels, negative value means scrolling up, will print out observed content after scrolling""" - await self.current_page.evaluate(f"window.scrollBy(0, {offset})") - await self.reporter.async_report(self.current_page, "page") - - print(f"Scrolled current page by {offset} pixels.") - print(await self._view()) - - def check_all_pages(self) -> dict: - """return all pages opened in the browser, a dictionary with {page_url: page_title}, useful for understanding the current browser state""" - pages_info = {url: page.title() for url, page in self.pages.items()} - return pages_info - - async def close(self): - """close the browser and all pages""" - await self.browser.close() - await self.playwright.stop() - - -async def get_scroll_position(page): - return await page.evaluate("() => ({ x: window.scrollX, y: window.scrollY })") - - -SEARCH_CONTENT_JS = """ -(element) => { - // const block = element.closest('p, div, section, article'); - const block = element.parentElement; - return { - text_block: block.innerText, - // Create an array of objects, each containing the text and href of a link - links: Array.from(block.querySelectorAll('a')).map(a => ({ - text: a.innerText, - href: a.href - })) - }; -} -""" - - -VIEW_CONTENT_JS = """ -() => { - return Array.from(document.querySelectorAll('body *')).filter(el => { - if (!(el.offsetWidth || el.offsetHeight || el.getClientRects().length)) return false; - const style = window.getComputedStyle(el); - if (style.display === 'none' || style.visibility !== 'visible' || style.opacity === '0') return false; - const rect = el.getBoundingClientRect(); - const elemCenter = { - x: rect.left + rect.width / 2, - y: rect.top + rect.height / 2 - }; - if (elemCenter.x < 0 || elemCenter.y < 0 || elemCenter.x > window.innerWidth || elemCenter.y > window.innerHeight) return false; - if (document.elementFromPoint(elemCenter.x, elemCenter.y) !== el) return false; - return true; - }).map(el => { - let text = el.innerText || ''; - text = text.trim(); - if (!text.length) return ''; - const parentAnchor = el.closest('a'); - if (parentAnchor && parentAnchor.href) { - return `${text} (${parentAnchor.href})`; - } - return text; - }).filter(text => text.length > 0).join("\\n"); -} -""" + async def view(self): + observation = parse_accessibility_tree(self.accessibility_tree) + return f"Current _Browser Viewer\n URL: {self.page.url}\nOBSERVATION:\n{observation[0]}\n" diff --git a/metagpt/utils/a11y_tree.py b/metagpt/utils/a11y_tree.py new file mode 100644 index 000000000..59acbc6dc --- /dev/null +++ b/metagpt/utils/a11y_tree.py @@ -0,0 +1,306 @@ +"""See https://github.com/web-arena-x/webarena +""" +from __future__ import annotations + +import re + +from playwright.async_api import BrowserContext, Page + + +async def get_accessibility_tree(page: Page): + cdp_session = await get_page_cdp_session(page) + resp = await cdp_session.send("Accessibility.getFullAXTree") + + seen_ids = set() + accessibility_tree = [] + for node in resp["nodes"]: + if node["nodeId"] not in seen_ids: + accessibility_tree.append(node) + seen_ids.add(node["nodeId"]) + return accessibility_tree + + +async def execute_step(step: str, page: Page, browser_ctx: BrowserContext, accessibility_tree: list): + step = step.strip() + func = step.split("[")[0].strip() if "[" in step else step.split()[0].strip() + if func == "None": + return "" + elif func == "click": + match = re.search(r"click ?\[(\d+)\]", step) + if not match: + raise ValueError(f"Invalid click action {step}") + element_id = match.group(1) + await click_element(page, get_backend_node_id(element_id, accessibility_tree)) + elif func == "hover": + match = re.search(r"hover ?\[(\d+)\]", step) + if not match: + raise ValueError(f"Invalid hover action {step}") + element_id = match.group(1) + await hover_element(page, get_backend_node_id(element_id, accessibility_tree)) + elif func == "type": + # add default enter flag + if not (step.endswith("[0]") or step.endswith("[1]")): + step += " [1]" + + match = re.search(r"type ?\[(\d+)\] ?\[(.+)\] ?\[(\d+)\]", step) + if not match: + raise ValueError(f"Invalid type action {step}") + element_id, text, enter_flag = ( + match.group(1), + match.group(2), + match.group(3), + ) + if enter_flag == "1": + text += "\n" + await click_element(page, get_backend_node_id(element_id, accessibility_tree)) + await type_text(page, text) + elif func == "press": + match = re.search(r"press ?\[(.+)\]", step) + if not match: + raise ValueError(f"Invalid press action {step}") + key = match.group(1) + await key_press(page, key) + elif func == "scroll": + # up or down + match = re.search(r"scroll ?\[?(up|down)\]?", step) + if not match: + raise ValueError(f"Invalid scroll action {step}") + direction = match.group(1) + await scroll_page(page, direction) + elif func == "goto": + match = re.search(r"goto ?\[(.+)\]", step) + if not match: + raise ValueError(f"Invalid goto action {step}") + url = match.group(1) + await page.goto(url) + elif func == "new_tab": + page = await browser_ctx.new_page() + elif func == "go_back": + await page.go_back() + elif func == "go_forward": + await page.go_forward() + elif func == "tab_focus": + match = re.search(r"tab_focus ?\[(\d+)\]", step) + if not match: + raise ValueError(f"Invalid tab_focus action {step}") + page_number = int(match.group(1)) + page = browser_ctx.pages[page_number] + await page.bring_to_front() + elif func == "close_tab": + await page.close() + if len(browser_ctx.pages) > 0: + page = browser_ctx.pages[-1] + else: + page = await browser_ctx.new_page() + elif func == "stop": + match = re.search(r'stop\(?"(.+)?"\)', step) + answer = match.group(1) if match else "" + return answer + else: + raise ValueError + await page.wait_for_load_state("domcontentloaded") + return page + + +async def type_text(page: Page, text: str): + await page.keyboard.type(text) + + +async def click_element(page: Page, backend_node_id: int): + cdp_session = await get_page_cdp_session(page) + resp = await get_bounding_rect(cdp_session, backend_node_id) + node_info = resp["result"]["value"] + x, y = await get_element_center(node_info) + await page.mouse.click(x, y) + + +async def hover_element(page: Page, backend_node_id: int) -> None: + cdp_session = await get_page_cdp_session(page) + resp = await get_bounding_rect(cdp_session, backend_node_id) + node_info = resp["result"]["value"] + x, y = await get_element_center(node_info) + await page.mouse.move(x, y) + + +async def scroll_page(page: Page, direction: str) -> None: + # perform the action + # code from natbot + if direction == "up": + await page.evaluate( + "(document.scrollingElement || document.body).scrollTop = (document.scrollingElement || document.body).scrollTop - window.innerHeight;" + ) + elif direction == "down": + await page.evaluate( + "(document.scrollingElement || document.body).scrollTop = (document.scrollingElement || document.body).scrollTop + window.innerHeight;" + ) + + +async def key_press(page: Page, key: str) -> None: + """Press a key.""" + if "Meta" in key and "Mac" not in await page.evaluate("navigator.platform"): + key = key.replace("Meta", "Control") + await page.keyboard.press(key) + + +async def get_element_outer_html(page: Page, backend_node_id: int): + cdp_session = await get_page_cdp_session(page) + try: + outer_html = await cdp_session.send("DOM.getOuterHTML", {"backendNodeId": int(backend_node_id)}) + return outer_html["outerHTML"] + except Exception as e: + raise ValueError("Element not found") from e + + +async def get_element_center(node_info): + x, y, width, height = node_info["x"], node_info["y"], node_info["width"], node_info["height"] + center_x = x + width / 2 + center_y = y + height / 2 + return center_x, center_y + + +def extract_step(response: str, action_splitter: str = "```") -> str: + # find the first occurence of action + pattern = rf"{action_splitter}((.|\n)*?){action_splitter}" + match = re.search(pattern, response) + if match: + return match.group(1).strip() + else: + raise ValueError(f'Cannot find the answer phrase "{response}"') + + +async def get_bounding_rect(cdp_session, backend_node_id: str): + try: + remote_object = await cdp_session.send("DOM.resolveNode", {"backendNodeId": int(backend_node_id)}) + remote_object_id = remote_object["object"]["objectId"] + response = await cdp_session.send( + "Runtime.callFunctionOn", + { + "objectId": remote_object_id, + "functionDeclaration": """ + function() { + if (this.nodeType == 3) { + var range = document.createRange(); + range.selectNode(this); + var rect = range.getBoundingClientRect().toJSON(); + range.detach(); + return rect; + } else { + return this.getBoundingClientRect().toJSON(); + } + } + """, + "returnByValue": True, + }, + ) + return response + except Exception as e: + raise ValueError("Element not found") from e + + +IGNORED_ACTREE_PROPERTIES = ( + "focusable", + "editable", + "readonly", + "level", + "settable", + "multiline", + "invalid", +) + + +def parse_accessibility_tree(accessibility_tree): + """Parse the accessibility tree into a string text""" + node_id_to_idx = {} + for idx, node in enumerate(accessibility_tree): + node_id_to_idx[node["nodeId"]] = idx + + obs_nodes_info = {} + + def dfs(idx: int, obs_node_id: str, depth: int) -> str: + tree_str = "" + node = accessibility_tree[idx] + indent = "\t" * depth + valid_node = True + try: + role = node["role"]["value"] + name = node["name"]["value"] + node_str = f"[{obs_node_id}] {role} {repr(name)}" + properties = [] + for property in node.get("properties", []): + try: + if property["name"] in IGNORED_ACTREE_PROPERTIES: + continue + properties.append(f'{property["name"]}: {property["value"]["value"]}') + except KeyError: + pass + + if properties: + node_str += " " + " ".join(properties) + + # check valid + if not node_str.strip(): + valid_node = False + + # empty generic node + if not name.strip(): + if not properties: + if role in [ + "generic", + "img", + "list", + "strong", + "paragraph", + "banner", + "navigation", + "Section", + "LabelText", + "Legend", + "listitem", + ]: + valid_node = False + elif role in ["listitem"]: + valid_node = False + + if valid_node: + tree_str += f"{indent}{node_str}" + obs_nodes_info[obs_node_id] = { + "backend_id": node["backendDOMNodeId"], + "union_bound": node["union_bound"], + "text": node_str, + } + + except Exception: + valid_node = False + + for _, child_node_id in enumerate(node["childIds"]): + if child_node_id not in node_id_to_idx: + continue + # mark this to save some tokens + child_depth = depth + 1 if valid_node else depth + child_str = dfs(node_id_to_idx[child_node_id], child_node_id, child_depth) + if child_str.strip(): + if tree_str.strip(): + tree_str += "\n" + tree_str += child_str + + return tree_str + + tree_str = dfs(0, accessibility_tree[0]["nodeId"], 0) + return tree_str, obs_nodes_info + + +async def get_page_cdp_session(page): + if hasattr(page, "cdp_session"): + return page.cdp_session + + cdp_session = await page.context.new_cdp_session(page) + page.cdp_session = cdp_session + return cdp_session + + +def get_backend_node_id(element_id, accessibility_tree): + element_id = str(element_id) + for i in accessibility_tree: + if i["nodeId"] == element_id: + return i.get("backendDOMNodeId") + raise ValueError(f"Element {element_id} not found") From 877b06bfdc01512fe221f50cc30c19c6cec777af Mon Sep 17 00:00:00 2001 From: shenchucheng Date: Tue, 25 Jun 2024 10:26:39 +0800 Subject: [PATCH 22/30] browser report --- metagpt/tools/libs/browser.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/metagpt/tools/libs/browser.py b/metagpt/tools/libs/browser.py index aa80abb4d..df9d43135 100644 --- a/metagpt/tools/libs/browser.py +++ b/metagpt/tools/libs/browser.py @@ -24,6 +24,7 @@ from metagpt.utils.a11y_tree import ( scroll_page, type_text, ) +from metagpt.utils.report import BrowserReporter @register_tool( @@ -73,6 +74,7 @@ class Browser: self.headless: bool = True self.proxy = None self.is_empty_page = True + self.reporter = BrowserReporter() async def start(self) -> None: """Starts Playwright and launches a browser""" @@ -120,9 +122,11 @@ class Browser: async def goto(self, url: str, timeout: float = 30000): """Navigate to a specific URL.""" - await self.page.goto(url, timeout=timeout) - self.is_empty_page = False - return await self._wait_page() + async with self.reporter as reporter: + await reporter.async_report(url, "url") + await self.page.goto(url, timeout=timeout) + self.is_empty_page = False + return await self._wait_page() async def go_back(self): """Navigate to the previously viewed page.""" @@ -154,6 +158,7 @@ class Browser: page = self.page await self._wait_until_page_idle(page) self.accessibility_tree = await get_accessibility_tree(page) + await self.reporter.async_report(page, "page") return f"SUCCESS, URL: {page.url}" def _register_page_event(self, page: Page): From bd675d5178f86a39a2c4ed17122c97e0b351d30d Mon Sep 17 00:00:00 2001 From: shenchucheng Date: Tue, 25 Jun 2024 11:04:43 +0800 Subject: [PATCH 23/30] add view_page_element_to_scrape tool --- examples/di/crawl_webpage.py | 6 ++-- metagpt/rag/engines/simple.py | 5 ++- metagpt/tools/libs/browser.py | 9 ++++- metagpt/tools/libs/web_scraping.py | 56 +++++++++++++++++++++++------- 4 files changed, 57 insertions(+), 19 deletions(-) diff --git a/examples/di/crawl_webpage.py b/examples/di/crawl_webpage.py index 92e3c32b0..c4e1b6599 100644 --- a/examples/di/crawl_webpage.py +++ b/examples/di/crawl_webpage.py @@ -6,9 +6,7 @@ """ from metagpt.roles.di.data_interpreter import DataInterpreter - -__import__("metagpt.tools.libs.browser", fromlist=["Browser"]) # To skip pre-commit check - +from metagpt.tools.libs.web_scraping import view_page_element_to_scrape PAPER_LIST_REQ = """" Get data from `paperlist` table in https://papercopilot.com/statistics/iclr-statistics/iclr-2024-statistics/, @@ -34,7 +32,7 @@ NEWS_36KR_REQ = """从36kr创投平台https://pitchhub.36kr.com/financing-flash async def main(): - di = DataInterpreter(tools=["Browser"]) + di = DataInterpreter(tools=[view_page_element_to_scrape.__name__]) await di.run(ECOMMERCE_REQ) diff --git a/metagpt/rag/engines/simple.py b/metagpt/rag/engines/simple.py index c237dcf69..8a9ccaffd 100644 --- a/metagpt/rag/engines/simple.py +++ b/metagpt/rag/engines/simple.py @@ -4,6 +4,7 @@ import json import os from typing import Any, Optional, Union +import fsspec from llama_index.core import SimpleDirectoryReader from llama_index.core.callbacks.base import CallbackManager from llama_index.core.embeddings import BaseEmbedding @@ -83,6 +84,7 @@ class SimpleEngine(RetrieverQueryEngine): llm: LLM = None, retriever_configs: list[BaseRetrieverConfig] = None, ranker_configs: list[BaseRankerConfig] = None, + fs: Optional[fsspec.AbstractFileSystem] = None, ) -> "SimpleEngine": """From docs. @@ -96,11 +98,12 @@ class SimpleEngine(RetrieverQueryEngine): llm: Must supported by llama index. Default OpenAI. retriever_configs: Configuration for retrievers. If more than one config, will use SimpleHybridRetriever. ranker_configs: Configuration for rankers. + fs: File system to use. """ if not input_dir and not input_files: raise ValueError("Must provide either `input_dir` or `input_files`.") - documents = SimpleDirectoryReader(input_dir=input_dir, input_files=input_files).load_data() + documents = SimpleDirectoryReader(input_dir=input_dir, input_files=input_files, fs=fs).load_data() cls._fix_document_metadata(documents) transformations = transformations or cls._default_transformations() diff --git a/metagpt/tools/libs/browser.py b/metagpt/tools/libs/browser.py index df9d43135..9d24d4baf 100644 --- a/metagpt/tools/libs/browser.py +++ b/metagpt/tools/libs/browser.py @@ -28,7 +28,7 @@ from metagpt.utils.report import BrowserReporter @register_tool( - tags=["web", "browse", "scrape"], + tags=["web", "browse"], include_functions=[ "click", "close_tab", @@ -197,3 +197,10 @@ class Browser: async def view(self): observation = parse_accessibility_tree(self.accessibility_tree) return f"Current _Browser Viewer\n URL: {self.page.url}\nOBSERVATION:\n{observation[0]}\n" + + async def __aenter__(self): + await self.start() + return self + + async def __aexit__(self, *args, **kwargs): + await self.stop() diff --git a/metagpt/tools/libs/web_scraping.py b/metagpt/tools/libs/web_scraping.py index bc34b1306..489c3a472 100644 --- a/metagpt/tools/libs/web_scraping.py +++ b/metagpt/tools/libs/web_scraping.py @@ -1,20 +1,50 @@ +import contextlib +from uuid import uuid4 + +from metagpt.tools.libs.browser import Browser from metagpt.tools.tool_registry import register_tool -from metagpt.tools.web_browser_engine_playwright import PlaywrightWrapper +from metagpt.utils.file import MemoryFileSystem +from metagpt.utils.parse_html import simplify_html -@register_tool(tags=["web scraping", "web"]) -async def scrape_web_playwright(url): - """ - Asynchronously Scrape and save the HTML structure and inner text content of a web page using Playwright. +@register_tool(tags=["web scraping"]) +async def view_page_element_to_scrape(url: str, requirement: str, keep_links: bool = False) -> None: + """view the HTML content of current page to understand the structure. When executed, the content will be printed out Args: - url (str): The main URL to fetch inner text from. - - Returns: - dict: The inner text content and html structure of the web page, keys are 'inner_text', 'html'. + url (str): The URL of the web page to scrape. + requirement (str): Providing a clear and detailed requirement helps in focusing the inspection on the desired elements. + keep_links (bool): Whether to keep the hyperlinks in the HTML content. Set to True if links are required """ - # Create a PlaywrightWrapper instance for the Chromium browser - web = await PlaywrightWrapper().run(url) + async with Browser() as browser: + await browser.goto(url) + page = browser.page + html = await page.content() + html = simplify_html(html, url=page.url, keep_links=keep_links) + mem_fs = MemoryFileSystem() + filename = f"{uuid4().hex}.html" + with mem_fs.open(filename, "w") as f: + f.write(html) - # Return the inner text content of the web page - return {"inner_text": web.inner_text.strip(), "html": web.html.strip()} + # Since RAG is an optional optimization, if it fails, the simplified HTML can be used as a fallback. + with contextlib.suppress(Exception): + from metagpt.rag.engines import SimpleEngine # avoid circular import + + # TODO make `from_docs` asynchronous + engine = SimpleEngine.from_docs(input_files=[filename], fs=mem_fs) + nodes = await engine.aretrieve(requirement) + html = "\n".join(i.text for i in nodes) + + mem_fs.rm_file(filename) + print(html) + + +# async def get_elements_outerhtml(self, element_ids: list[int]): +# """Inspect the outer HTML of the elements in Current Browser Viewer. +# """ +# page = self.page +# data = [] +# for element_id in element_ids: +# html = await get_element_outer_html(page, get_backend_node_id(element_id, self.accessibility_tree)) +# data.append(html) +# return "\n".join(f"[{element_id}]. {html}" for element_id, html in zip(element_ids, data)) From 1dbe88b96d71b6ed93195b464a95a65ad89179ce Mon Sep 17 00:00:00 2001 From: shenchucheng Date: Tue, 25 Jun 2024 21:34:49 +0800 Subject: [PATCH 24/30] add browser view to memory after executing browser tool --- metagpt/roles/di/role_zero.py | 7 ++++++- metagpt/tools/libs/browser.py | 10 +++++----- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/metagpt/roles/di/role_zero.py b/metagpt/roles/di/role_zero.py index d60ebb91f..39338471a 100644 --- a/metagpt/roles/di/role_zero.py +++ b/metagpt/roles/di/role_zero.py @@ -2,6 +2,7 @@ from __future__ import annotations import inspect import json +import re import traceback from typing import Callable, Literal, Tuple @@ -144,7 +145,11 @@ class RoleZero(Role): ) memory = self.rc.memory.get(self.memory_k) if not self.browser.is_empty_page: - memory.append(UserMessage(cause_by="browser", content=await self.browser.view())) + pattern = re.compile(r"Command Browser\.(\w+) executed") + for index, msg in zip(range(len(memory), 0, -1), memory[::-1]): + if pattern.match(msg.content): + memory.insert(index, UserMessage(cause_by="browser", content=await self.browser.view())) + break context = self.llm.format_msg(memory + [UserMessage(content=prompt)]) # print(*context, sep="\n" + "*" * 5 + "\n") async with ThoughtReporter(enable_llm_stream=True): diff --git a/metagpt/tools/libs/browser.py b/metagpt/tools/libs/browser.py index 9d24d4baf..c6ea71bd5 100644 --- a/metagpt/tools/libs/browser.py +++ b/metagpt/tools/libs/browser.py @@ -3,7 +3,7 @@ from __future__ import annotations import time from typing import Literal, Optional -from playwright.async_api import Browser as _Browser +from playwright.async_api import Browser as Browser_ from playwright.async_api import ( BrowserContext, Frame, @@ -67,7 +67,7 @@ class Browser: def __init__(self): self.playwright: Optional[Playwright] = None - self.browser: Optional[_Browser] = None + self.browser_instance: Optional[Browser_] = None self.browser_ctx: Optional[BrowserContext] = None self.page: Optional[Page] = None self.accessibility_tree: list = [] @@ -80,7 +80,7 @@ class Browser: """Starts Playwright and launches a browser""" if self.playwright is None: self.playwright = playwright = await async_playwright().start() - browser = self.browser = await playwright.chromium.launch(headless=self.headless, proxy=self.proxy) + browser = self.browser_instance = await playwright.chromium.launch(headless=self.headless, proxy=self.proxy) browser_ctx = self.browser_ctx = await browser.new_context() self.page = await browser_ctx.new_page() @@ -88,7 +88,7 @@ class Browser: if self.playwright: playwright = self.playwright self.playwright = None - self.browser = None + self.browser_instance = None self.browser_ctx = None await playwright.stop() @@ -196,7 +196,7 @@ class Browser: async def view(self): observation = parse_accessibility_tree(self.accessibility_tree) - return f"Current _Browser Viewer\n URL: {self.page.url}\nOBSERVATION:\n{observation[0]}\n" + return f"Current Browser Viewer\n URL: {self.page.url}\nOBSERVATION:\n{observation[0]}\n" async def __aenter__(self): await self.start() From d1834260e08c0dde4d82fa21a9d8be21c5a4746b Mon Sep 17 00:00:00 2001 From: zhanglei Date: Wed, 26 Jun 2024 17:38:44 +0800 Subject: [PATCH 25/30] =?UTF-8?q?update:=20LLMType=E5=A2=9E=E5=8A=A0openro?= =?UTF-8?q?uter?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- metagpt/configs/llm_config.py | 1 + metagpt/provider/openai_api.py | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/metagpt/configs/llm_config.py b/metagpt/configs/llm_config.py index af8f56372..39f6e61f1 100644 --- a/metagpt/configs/llm_config.py +++ b/metagpt/configs/llm_config.py @@ -31,6 +31,7 @@ class LLMType(Enum): MOONSHOT = "moonshot" MISTRAL = "mistral" YI = "yi" # lingyiwanwu + OPEN_ROUTER = "open_router" def __missing__(self, key): return self.OPENAI diff --git a/metagpt/provider/openai_api.py b/metagpt/provider/openai_api.py index 120c1d3cb..0263da989 100644 --- a/metagpt/provider/openai_api.py +++ b/metagpt/provider/openai_api.py @@ -40,7 +40,8 @@ from metagpt.utils.token_counter import ( ) -@register_provider([LLMType.OPENAI, LLMType.FIREWORKS, LLMType.OPEN_LLM, LLMType.MOONSHOT, LLMType.MISTRAL, LLMType.YI]) +@register_provider([LLMType.OPENAI, LLMType.FIREWORKS, LLMType.OPEN_LLM, LLMType.MOONSHOT, LLMType.MISTRAL, LLMType.YI, + LLMType.OPEN_ROUTER]) class OpenAILLM(BaseLLM): """Check https://platform.openai.com/examples for examples""" From 1e091b5675e36dc3cee409e43adb8497ca89c073 Mon Sep 17 00:00:00 2001 From: zhanglei Date: Thu, 27 Jun 2024 13:44:06 +0800 Subject: [PATCH 26/30] update: role_zero add json_repair --- metagpt/prompts/di/role_zero.py | 12 ++++++++++++ metagpt/roles/di/role_zero.py | 12 +++++++++++- 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/metagpt/prompts/di/role_zero.py b/metagpt/prompts/di/role_zero.py index 4d52476aa..7f9f37ca2 100644 --- a/metagpt/prompts/di/role_zero.py +++ b/metagpt/prompts/di/role_zero.py @@ -50,3 +50,15 @@ Some text indicating your thoughts, such as how you should update the plan statu ] ``` """ + + +JSON_REPAIR_PROMPT = """ + ## json data + {json_data} + + ## Output Format + ```json + Formatted JSON data + ``` + Help check if there are any formatting issues with the JSON data? If so, please help format it +""" diff --git a/metagpt/roles/di/role_zero.py b/metagpt/roles/di/role_zero.py index b5342409f..39b44ccf3 100644 --- a/metagpt/roles/di/role_zero.py +++ b/metagpt/roles/di/role_zero.py @@ -10,7 +10,7 @@ from pydantic import model_validator from metagpt.actions import Action from metagpt.actions.di.run_command import RunCommand from metagpt.logs import logger -from metagpt.prompts.di.role_zero import CMD_PROMPT, ROLE_INSTRUCTION +from metagpt.prompts.di.role_zero import CMD_PROMPT, ROLE_INSTRUCTION, JSON_REPAIR_PROMPT from metagpt.roles import Role from metagpt.schema import AIMessage, Message, UserMessage from metagpt.strategy.experience_retriever import DummyExpRetriever, ExpRetriever @@ -21,6 +21,7 @@ from metagpt.tools.tool_recommend import BM25ToolRecommender, ToolRecommender from metagpt.tools.tool_registry import register_tool from metagpt.utils.common import CodeParser from metagpt.utils.report import ThoughtReporter +from metagpt.utils.repair_llm_raw_output import repair_llm_raw_output, RepairType @register_tool(include_functions=["ask_human", "reply_to_human"]) @@ -138,6 +139,9 @@ class RoleZero(Role): return await super()._act() try: + commands = json.loads(repair_llm_raw_output(output=CodeParser.parse_code(block=None, lang="json", text=self.command_rsp), req_keys=[None], repair_type=RepairType.JSON)) + except json.JSONDecodeError as e: + self.command_rsp = await self.llm.aask(msg=JSON_REPAIR_PROMPT.format(json_data=self.command_rsp)) commands = json.loads(CodeParser.parse_code(block=None, lang="json", text=self.command_rsp)) except Exception as e: tb = traceback.format_exc() @@ -145,6 +149,12 @@ class RoleZero(Role): error_msg = UserMessage(content=str(e)) self.rc.memory.add(error_msg) return error_msg + + if isinstance(commands, dict): + if "commands" in commands: + commands = commands["commands"] + else: + commands = [commands] outputs = await self._run_commands(commands) self.rc.memory.add(UserMessage(content=outputs)) return AIMessage( From 28108bb37815f0b6989e43fbbe6c5d84de962ea1 Mon Sep 17 00:00:00 2001 From: zhanglei Date: Thu, 27 Jun 2024 16:40:28 +0800 Subject: [PATCH 27/30] update: fix cr comment --- metagpt/prompts/di/role_zero.py | 3 +-- metagpt/roles/di/role_zero.py | 10 +++++----- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/metagpt/prompts/di/role_zero.py b/metagpt/prompts/di/role_zero.py index 7f9f37ca2..6df7b01ff 100644 --- a/metagpt/prompts/di/role_zero.py +++ b/metagpt/prompts/di/role_zero.py @@ -49,9 +49,8 @@ Some text indicating your thoughts, such as how you should update the plan statu ... ] ``` + """ - - JSON_REPAIR_PROMPT = """ ## json data {json_data} diff --git a/metagpt/roles/di/role_zero.py b/metagpt/roles/di/role_zero.py index 39b44ccf3..e4c81278b 100644 --- a/metagpt/roles/di/role_zero.py +++ b/metagpt/roles/di/role_zero.py @@ -139,7 +139,8 @@ class RoleZero(Role): return await super()._act() try: - commands = json.loads(repair_llm_raw_output(output=CodeParser.parse_code(block=None, lang="json", text=self.command_rsp), req_keys=[None], repair_type=RepairType.JSON)) + self.command_rsp = CodeParser.parse_code(block=None, lang="json", text=self.command_rsp) + commands = json.loads(repair_llm_raw_output(output=self.command_rsp, req_keys=[None], repair_type=RepairType.JSON)) except json.JSONDecodeError as e: self.command_rsp = await self.llm.aask(msg=JSON_REPAIR_PROMPT.format(json_data=self.command_rsp)) commands = json.loads(CodeParser.parse_code(block=None, lang="json", text=self.command_rsp)) @@ -150,11 +151,10 @@ class RoleZero(Role): self.rc.memory.add(error_msg) return error_msg + # 为了对LLM不按格式生成进行容错 if isinstance(commands, dict): - if "commands" in commands: - commands = commands["commands"] - else: - commands = [commands] + commands = commands["commands"] if "commands" in commands else [commands] + outputs = await self._run_commands(commands) self.rc.memory.add(UserMessage(content=outputs)) return AIMessage( From c648f0704a18a3378ea87b061e0810f0be1b1fff Mon Sep 17 00:00:00 2001 From: zhanglei Date: Thu, 27 Jun 2024 17:00:40 +0800 Subject: [PATCH 28/30] update: fix cr comment --- metagpt/prompts/di/role_zero.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/metagpt/prompts/di/role_zero.py b/metagpt/prompts/di/role_zero.py index 6df7b01ff..a33487d81 100644 --- a/metagpt/prompts/di/role_zero.py +++ b/metagpt/prompts/di/role_zero.py @@ -52,12 +52,12 @@ Some text indicating your thoughts, such as how you should update the plan statu """ JSON_REPAIR_PROMPT = """ - ## json data - {json_data} - - ## Output Format - ```json - Formatted JSON data - ``` - Help check if there are any formatting issues with the JSON data? If so, please help format it +## json data +{json_data} + +## Output Format +```json +Formatted JSON data +``` +Help check if there are any formatting issues with the JSON data? If so, please help format it """ From ff6389ae6126d8914fa7578830903a4078cfcb7e Mon Sep 17 00:00:00 2001 From: zhanglei Date: Thu, 27 Jun 2024 17:13:17 +0800 Subject: [PATCH 29/30] update: fix cr comment --- metagpt/roles/di/role_zero.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/metagpt/roles/di/role_zero.py b/metagpt/roles/di/role_zero.py index e4c81278b..f528fe638 100644 --- a/metagpt/roles/di/role_zero.py +++ b/metagpt/roles/di/role_zero.py @@ -139,11 +139,11 @@ class RoleZero(Role): return await super()._act() try: - self.command_rsp = CodeParser.parse_code(block=None, lang="json", text=self.command_rsp) - commands = json.loads(repair_llm_raw_output(output=self.command_rsp, req_keys=[None], repair_type=RepairType.JSON)) + commands = CodeParser.parse_code(block=None, lang="json", text=self.command_rsp) + commands = json.loads(repair_llm_raw_output(output=commands, req_keys=[None], repair_type=RepairType.JSON)) except json.JSONDecodeError as e: - self.command_rsp = await self.llm.aask(msg=JSON_REPAIR_PROMPT.format(json_data=self.command_rsp)) - commands = json.loads(CodeParser.parse_code(block=None, lang="json", text=self.command_rsp)) + commands = await self.llm.aask(msg=JSON_REPAIR_PROMPT.format(json_data=self.command_rsp)) + commands = json.loads(CodeParser.parse_code(block=None, lang="json", text=commands)) except Exception as e: tb = traceback.format_exc() print(tb) From 47eb31d1ce91f892cfa1e2e84a1df95dc8748e4f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9E=97=E4=B9=89=E7=AB=A0?= Date: Thu, 27 Jun 2024 09:18:44 +0000 Subject: [PATCH 30/30] format --- metagpt/prompts/di/role_zero.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metagpt/prompts/di/role_zero.py b/metagpt/prompts/di/role_zero.py index a33487d81..d8dd2cb60 100644 --- a/metagpt/prompts/di/role_zero.py +++ b/metagpt/prompts/di/role_zero.py @@ -49,8 +49,8 @@ Some text indicating your thoughts, such as how you should update the plan statu ... ] ``` - """ + JSON_REPAIR_PROMPT = """ ## json data {json_data}