diff --git a/metagpt/actions/analyze_requirements.py b/metagpt/actions/analyze_requirements.py index d81da3e14..86088d824 100644 --- a/metagpt/actions/analyze_requirements.py +++ b/metagpt/actions/analyze_requirements.py @@ -48,7 +48,7 @@ INSTRUCTIONS = """ You must output in the same language as the Requirements. First, This language should be consistent with the language used in the requirement description. determine the natural language you must respond in. If the requirements specify a special language, follow those instructions. The default language for responses is English. Second, extract the restrictions in the requirements, specifically the steps. Do not include detailed demand descriptions; focus only on the restrictions. -Third, if the requirements is a software development, extract the program language. If If no specific programming language is required, Use HTML (*.html), CSS (*.css), and JavaScript (*.js) +Third, if the requirements is a software development, extract the program language. If no specific programming language is required, Use HTML (*.html), CSS (*.css), and JavaScript (*.js) Note: 1. if there is not restrictions, requirements_restrictions must be "" diff --git a/metagpt/actions/research.py b/metagpt/actions/research.py index 98edfddb0..99f72b076 100644 --- a/metagpt/actions/research.py +++ b/metagpt/actions/research.py @@ -3,6 +3,7 @@ from __future__ import annotations import asyncio +from datetime import datetime from typing import Any, Callable, Coroutine, Optional, Union from pydantic import TypeAdapter, model_validator @@ -43,9 +44,10 @@ COLLECT_AND_RANKURLS_PROMPT = """### Topic {results} ### Requirements -Please remove irrelevant search results that are not related to the query or topic. Then, sort the remaining search results \ -based on the link credibility. If two results have equal credibility, prioritize them based on the relevance. Provide the -ranked results' indices in JSON format, like [0, 1, 3, 4, ...], without including other words. +Please remove irrelevant search results that are not related to the query or topic. +If the query is time-sensitive or specifies a certain time frame, please also remove search results that are outdated or outside the specified time frame. Notice that the current time is {time_stamp}. +Then, sort the remaining search results based on the link credibility. If two results have equal credibility, prioritize them based on the relevance. +Provide the ranked results' indices in JSON format, like [0, 1, 3, 4, ...], without including other words. """ WEB_BROWSE_AND_SUMMARIZE_PROMPT = """### Requirements @@ -165,7 +167,8 @@ class CollectLinks(Action): max_results = max_num_results or max(num_results * 2, 6) results = await self._search_urls(query, max_results=max_results) _results = "\n".join(f"{i}: {j}" for i, j in zip(range(max_results), results)) - prompt = COLLECT_AND_RANKURLS_PROMPT.format(topic=topic, query=query, results=_results) + time_stamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S") + prompt = COLLECT_AND_RANKURLS_PROMPT.format(topic=topic, query=query, results=_results, time_stamp=time_stamp) logger.debug(prompt) indices = await self._aask(prompt) try: diff --git a/metagpt/ext/cr/actions/code_review.py b/metagpt/ext/cr/actions/code_review.py index 5f861c3e3..0235dc2c6 100644 --- a/metagpt/ext/cr/actions/code_review.py +++ b/metagpt/ext/cr/actions/code_review.py @@ -175,13 +175,16 @@ class CodeReview(Action): async def cr_by_points(self, patch: PatchSet, points: list[Point]): comments = [] + valid_patch_count = 0 for patched_file in patch: if not patched_file: continue if patched_file.path.endswith(".py"): points = [p for p in points if p.language == "Python"] + valid_patch_count += 1 elif patched_file.path.endswith(".java"): points = [p for p in points if p.language == "Java"] + valid_patch_count += 1 else: continue group_points = [points[i : i + 3] for i in range(0, len(points), 3)] @@ -198,6 +201,9 @@ class CodeReview(Action): c["commented_file"] = patched_file_path comments.extend(comments_batch) + if valid_patch_count == 0: + raise ValueError("Only code reviews for Python and Java languages are supported.") + return comments async def run(self, patch: PatchSet, points: list[Point], output_file: str): diff --git a/metagpt/prompts/di/engineer2.py b/metagpt/prompts/di/engineer2.py index afe904c92..2e8d2872e 100644 --- a/metagpt/prompts/di/engineer2.py +++ b/metagpt/prompts/di/engineer2.py @@ -75,12 +75,7 @@ Note: 18. Use Engineer2.write_new_code to create or modify a file. Write only one code file each time. If you only need to code one file, provide all the necessary information in one response. 19. When the requirement is simple, you don't need to create a plan, just do it right away. 20. If the code exists, use the Editor tool's open and edit commands to modify it. Since it is not a new code, do not use write_new_code. - -22. When using the editor, pay attention to the editor's current directory. When you use editor tools, the paths must be either absolute or relative to the editor's current directory. -""" -""" -21. Forbidden to run code in the terminal. -Do Not run the code. +21. When using the editor, pay attention to the editor's current directory. When you use editor tools, the paths must be either absolute or relative to the editor's current directory. """ CURRENT_STATE = """ The current editor state is: diff --git a/metagpt/roles/di/engineer2.py b/metagpt/roles/di/engineer2.py index 9ec22c077..5b264e85e 100644 --- a/metagpt/roles/di/engineer2.py +++ b/metagpt/roles/di/engineer2.py @@ -16,6 +16,7 @@ from metagpt.prompts.di.engineer2 import ( from metagpt.roles.di.role_zero import RoleZero from metagpt.schema import Message, UserMessage from metagpt.strategy.experience_retriever import ENGINEER_EXAMPLE +from metagpt.tools.libs.cr import CodeReview from metagpt.tools.libs.git import git_create_pull from metagpt.tools.libs.terminal import Terminal from metagpt.tools.tool_registry import register_tool @@ -40,6 +41,7 @@ class Engineer2(RoleZero): "git_create_pull", "SearchEnhancedQA", "Engineer2", + "CodeReview", ] # SWE Agent parameter run_eval: bool = False @@ -64,11 +66,15 @@ class Engineer2(RoleZero): self.cmd_prompt_current_state = CURRENT_STATE.format(**state).strip() def _update_tool_execution(self): + # validate = ValidateAndRewriteCode() + cr = CodeReview() self.tool_execution_map.update( { "Terminal.run_command": self.terminal.run_command, "git_create_pull": git_create_pull, "Engineer2.write_new_code": self.write_new_code, + "CodeReview.review": cr.review, + "CodeReview.fix": cr.fix, # "ValidateAndRewriteCode.run": validate.run, # "ValidateAndRewriteCode": validate.run, } diff --git a/metagpt/roles/di/role_zero.py b/metagpt/roles/di/role_zero.py index 9d309bfc6..386dbd43d 100644 --- a/metagpt/roles/di/role_zero.py +++ b/metagpt/roles/di/role_zero.py @@ -89,7 +89,7 @@ class RoleZero(Role): # Others command_rsp: str = "" # the raw string containing the commands commands: list[dict] = [] # commands to be executed - memory_k: int = 100 # number of memories (messages) to use as historical context + memory_k: int = 200 # number of memories (messages) to use as historical context use_fixed_sop: bool = False requirements_constraints: str = "" # the constraints in user requirements use_summary: bool = True # whether to summarize at the end diff --git a/metagpt/roles/di/swe_agent.py b/metagpt/roles/di/swe_agent.py index 3d3ce4b57..731b00b0b 100644 --- a/metagpt/roles/di/swe_agent.py +++ b/metagpt/roles/di/swe_agent.py @@ -11,7 +11,7 @@ from metagpt.prompts.di.swe_agent import ( from metagpt.roles.di.role_zero import RoleZero from metagpt.schema import Message from metagpt.tools.libs.git import git_create_pull -from metagpt.tools.libs.terminal import Terminal +from metagpt.tools.libs.terminal import Bash class SWEAgent(RoleZero): @@ -19,8 +19,13 @@ class SWEAgent(RoleZero): profile: str = "Issue Solver" goal: str = "Resolve GitHub issue or bug in any existing codebase" _instruction: str = NEXT_STEP_TEMPLATE - tools: list[str] = ["Browser:goto,scroll", "RoleZero", "git_create_pull", "Editor", "Terminal"] - terminal: Terminal = Field(default_factory=Terminal, exclude=True) + tools: list[str] = [ + "Bash", + "Browser:goto,scroll", + "RoleZero", + "git_create_pull", + ] + terminal: Bash = Field(default_factory=Bash, exclude=True) output_diff: str = "" max_react_loop: int = 40 run_eval: bool = False @@ -33,29 +38,14 @@ class SWEAgent(RoleZero): def _update_tool_execution(self): self.tool_execution_map.update( { - "Terminal.run_command": self.eval_terminal_run if self.run_eval else self.terminal.run_command, + "Bash.run": self.terminal.run, "git_create_pull": git_create_pull, } ) - async def eval_terminal_run(self, cmd): - """change command pull/push/commit to end.""" - if any([cmd_key_word in cmd for cmd_key_word in ["pull", "push", "commit"]]): - # Observe that SWEAgent tries to submit the repo after fixing the bug. - # Set self.rc.todo to None and use git -diff to record the change. - logger.info("SWEAgent use cmd:{cmd}") - logger.info("finish current task") - # stop the sweagent - self._set_state(-1) - command_output = "Current test case is finished." - else: - command_output = await self.terminal.run_command(cmd) - return command_output - async def _format_instruction(self): """ Formats the instruction message for the SWE agent. - Runs the "state" command in the terminal, parses its output as JSON, and uses it to format the `_instruction` template. """ @@ -66,16 +56,14 @@ class SWEAgent(RoleZero): async def _act(self) -> Message: message = await super()._act() if self.run_eval: - await self._parse_commands_for_eval() + self._parse_commands_for_eval() return message async def _parse_commands_for_eval(self): """ Handles actions based on parsed commands. - Parses commands, checks for a "submit" action, and generates a patch using `git diff`. Stores the cleaned patch in `output_diff`. Logs any exceptions. - This function is specifically added for SWE bench evaluation. """ # If todo switches to None, it indicates that this is the final round of reactions, and the Swe-Agent will stop. Use git diff to store any changes made. @@ -88,7 +76,6 @@ class SWEAgent(RoleZero): logger.info(f"Diff output: \n{clear_diff}") if clear_diff: self.output_diff = clear_diff - except Exception as e: logger.error(f"Error during submission: {e}") diff --git a/metagpt/tools/libs/cr.py b/metagpt/tools/libs/cr.py index 7d156b4d6..0a53dd194 100644 --- a/metagpt/tools/libs/cr.py +++ b/metagpt/tools/libs/cr.py @@ -45,11 +45,15 @@ class CodeReview: """ patch = await self._get_patch_content(patch_path) point_file = point_file if point_file else Path(metagpt.ext.cr.__file__).parent / "points.json" + await EditorReporter().async_report(str(point_file), "path") async with aiofiles.open(point_file, "rb") as f: cr_point_content = await f.read() cr_points = [Point(**i) for i in json.loads(cr_point_content)] - comments = await CodeReview_().run(patch, cr_points, output_file) - return f"The number of defects: {len(comments)} and the comments are stored in {output_file}" + try: + comments = await CodeReview_().run(patch, cr_points, output_file) + except ValueError as e: + return str(e) + return f"The number of defects: {len(comments)}, the comments are stored in {output_file}, and the checkpoints are stored in {str(point_file)}" async def fix( self,