Merge branch 'mgx_ops' into add_swe_agent_ablilities_to_engineer2

2026-06-08 15:05:17 +02:00 · 2024-09-05 20:04:57 +08:00 · 2024-09-05 20:04:57 +08:00 · 166eb2db79
commit 166eb2db79
parent 5aab97554c 5687b2ce61
8 changed files with 38 additions and 37 deletions
--- a/metagpt/actions/analyze_requirements.py
+++ b/metagpt/actions/analyze_requirements.py
@ -48,7 +48,7 @@ INSTRUCTIONS = """
 You must output in the same language as the Requirements.
 First, This language should be consistent with the language used in the requirement description. determine the natural language you must respond in. If the requirements specify a special language, follow those instructions. The default language for responses is English.
 Second, extract the restrictions in the requirements, specifically the steps. Do not include detailed demand descriptions; focus only on the restrictions.
-Third, if the requirements is a software development, extract the program language. If If no specific programming language is required, Use HTML (*.html), CSS (*.css), and JavaScript (*.js)
+Third, if the requirements is a software development, extract the program language. If no specific programming language is required, Use HTML (*.html), CSS (*.css), and JavaScript (*.js)

 Note:
 1. if there is not restrictions, requirements_restrictions must be ""
--- a/metagpt/actions/research.py
+++ b/metagpt/actions/research.py
@ -3,6 +3,7 @@
 from __future__ import annotations

 import asyncio
+from datetime import datetime
 from typing import Any, Callable, Coroutine, Optional, Union

 from pydantic import TypeAdapter, model_validator
@ -43,9 +44,10 @@ COLLECT_AND_RANKURLS_PROMPT = """### Topic
 {results}

 ### Requirements
-Please remove irrelevant search results that are not related to the query or topic. Then, sort the remaining search results \
-based on the link credibility. If two results have equal credibility, prioritize them based on the relevance. Provide the
-ranked results' indices in JSON format, like [0, 1, 3, 4, ...], without including other words.
+Please remove irrelevant search results that are not related to the query or topic.
+If the query is time-sensitive or specifies a certain time frame, please also remove search results that are outdated or outside the specified time frame. Notice that the current time is {time_stamp}.
+Then, sort the remaining search results based on the link credibility. If two results have equal credibility, prioritize them based on the relevance.
+Provide the ranked results' indices in JSON format, like [0, 1, 3, 4, ...], without including other words.
 """

 WEB_BROWSE_AND_SUMMARIZE_PROMPT = """### Requirements
@ -165,7 +167,8 @@ class CollectLinks(Action):
        max_results = max_num_results or max(num_results * 2, 6)
        results = await self._search_urls(query, max_results=max_results)
        _results = "\n".join(f"{i}: {j}" for i, j in zip(range(max_results), results))
-        prompt = COLLECT_AND_RANKURLS_PROMPT.format(topic=topic, query=query, results=_results)
+        time_stamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+        prompt = COLLECT_AND_RANKURLS_PROMPT.format(topic=topic, query=query, results=_results, time_stamp=time_stamp)
        logger.debug(prompt)
        indices = await self._aask(prompt)
        try:
--- a/metagpt/ext/cr/actions/code_review.py
+++ b/metagpt/ext/cr/actions/code_review.py
@ -175,13 +175,16 @@ class CodeReview(Action):

    async def cr_by_points(self, patch: PatchSet, points: list[Point]):
        comments = []
+        valid_patch_count = 0
        for patched_file in patch:
            if not patched_file:
                continue
            if patched_file.path.endswith(".py"):
                points = [p for p in points if p.language == "Python"]
+                valid_patch_count += 1
            elif patched_file.path.endswith(".java"):
                points = [p for p in points if p.language == "Java"]
+                valid_patch_count += 1
            else:
                continue
            group_points = [points[i : i + 3] for i in range(0, len(points), 3)]
@ -198,6 +201,9 @@ class CodeReview(Action):
                        c["commented_file"] = patched_file_path
                    comments.extend(comments_batch)

+        if valid_patch_count == 0:
+            raise ValueError("Only code reviews for Python and Java languages are supported.")
+
        return comments

    async def run(self, patch: PatchSet, points: list[Point], output_file: str):
--- a/metagpt/prompts/di/engineer2.py
+++ b/metagpt/prompts/di/engineer2.py
@ -75,12 +75,7 @@ Note:
 18. Use Engineer2.write_new_code to create or modify a file. Write only one code file each time. If you only need to code one file, provide all the necessary information in one response.
 19. When the requirement is simple, you don't need to create a plan, just do it right away.
 20. If the code exists, use the Editor tool's open and edit commands to modify it. Since it is not a new code, do not use write_new_code.
-
-22. When using the editor, pay attention to the editor's current directory. When you use editor tools, the paths must be either absolute or relative to the editor's current directory.
-"""
-"""
-21. Forbidden to run code in the terminal.
-Do Not run the code. 
+21. When using the editor, pay attention to the editor's current directory. When you use editor tools, the paths must be either absolute or relative to the editor's current directory.
 """
 CURRENT_STATE = """
 The current editor state is:
--- a/metagpt/roles/di/engineer2.py
+++ b/metagpt/roles/di/engineer2.py
@ -16,6 +16,7 @@ from metagpt.prompts.di.engineer2 import (
 from metagpt.roles.di.role_zero import RoleZero
 from metagpt.schema import Message, UserMessage
 from metagpt.strategy.experience_retriever import ENGINEER_EXAMPLE
+from metagpt.tools.libs.cr import CodeReview
 from metagpt.tools.libs.git import git_create_pull
 from metagpt.tools.libs.terminal import Terminal
 from metagpt.tools.tool_registry import register_tool
@ -40,6 +41,7 @@ class Engineer2(RoleZero):
        "git_create_pull",
        "SearchEnhancedQA",
        "Engineer2",
+        "CodeReview",
    ]
    # SWE Agent parameter
    run_eval: bool = False
@ -64,11 +66,15 @@ class Engineer2(RoleZero):
        self.cmd_prompt_current_state = CURRENT_STATE.format(**state).strip()

    def _update_tool_execution(self):
+        # validate = ValidateAndRewriteCode()
+        cr = CodeReview()
        self.tool_execution_map.update(
            {
                "Terminal.run_command": self.terminal.run_command,
                "git_create_pull": git_create_pull,
                "Engineer2.write_new_code": self.write_new_code,
+                "CodeReview.review": cr.review,
+                "CodeReview.fix": cr.fix,
                # "ValidateAndRewriteCode.run": validate.run,
                # "ValidateAndRewriteCode": validate.run,
            }
--- a/metagpt/roles/di/role_zero.py
+++ b/metagpt/roles/di/role_zero.py
@ -89,7 +89,7 @@ class RoleZero(Role):
    # Others
    command_rsp: str = ""  # the raw string containing the commands
    commands: list[dict] = []  # commands to be executed
-    memory_k: int = 100  # number of memories (messages) to use as historical context
+    memory_k: int = 200  # number of memories (messages) to use as historical context
    use_fixed_sop: bool = False
    requirements_constraints: str = ""  # the constraints in user requirements
    use_summary: bool = True  # whether to summarize at the end
--- a/metagpt/roles/di/swe_agent.py
+++ b/metagpt/roles/di/swe_agent.py
@ -11,7 +11,7 @@ from metagpt.prompts.di.swe_agent import (
 from metagpt.roles.di.role_zero import RoleZero
 from metagpt.schema import Message
 from metagpt.tools.libs.git import git_create_pull
-from metagpt.tools.libs.terminal import Terminal
+from metagpt.tools.libs.terminal import Bash


 class SWEAgent(RoleZero):
@ -19,8 +19,13 @@ class SWEAgent(RoleZero):
    profile: str = "Issue Solver"
    goal: str = "Resolve GitHub issue or bug in any existing codebase"
    _instruction: str = NEXT_STEP_TEMPLATE
-    tools: list[str] = ["Browser:goto,scroll", "RoleZero", "git_create_pull", "Editor", "Terminal"]
-    terminal: Terminal = Field(default_factory=Terminal, exclude=True)
+    tools: list[str] = [
+        "Bash",
+        "Browser:goto,scroll",
+        "RoleZero",
+        "git_create_pull",
+    ]
+    terminal: Bash = Field(default_factory=Bash, exclude=True)
    output_diff: str = ""
    max_react_loop: int = 40
    run_eval: bool = False
@ -33,29 +38,14 @@ class SWEAgent(RoleZero):
    def _update_tool_execution(self):
        self.tool_execution_map.update(
            {
-                "Terminal.run_command": self.eval_terminal_run if self.run_eval else self.terminal.run_command,
+                "Bash.run": self.terminal.run,
                "git_create_pull": git_create_pull,
            }
        )

-    async def eval_terminal_run(self, cmd):
-        """change command pull/push/commit to end."""
-        if any([cmd_key_word in cmd for cmd_key_word in ["pull", "push", "commit"]]):
-            # Observe that SWEAgent tries to submit the repo after fixing the bug.
-            # Set self.rc.todo to None and use git -diff to record the change.
-            logger.info("SWEAgent use cmd:{cmd}")
-            logger.info("finish current task")
-            # stop the sweagent
-            self._set_state(-1)
-            command_output = "Current test case is finished."
-        else:
-            command_output = await self.terminal.run_command(cmd)
-        return command_output
-
    async def _format_instruction(self):
        """
        Formats the instruction message for the SWE agent.
-
        Runs the "state" command in the terminal, parses its output as JSON,
        and uses it to format the `_instruction` template.
        """
@ -66,16 +56,14 @@ class SWEAgent(RoleZero):
    async def _act(self) -> Message:
        message = await super()._act()
        if self.run_eval:
-            await self._parse_commands_for_eval()
+            self._parse_commands_for_eval()
        return message

    async def _parse_commands_for_eval(self):
        """
        Handles actions based on parsed commands.
-
        Parses commands, checks for a "submit" action, and generates a patch using `git diff`.
        Stores the cleaned patch in `output_diff`. Logs any exceptions.
-
        This function is specifically added for SWE bench evaluation.
        """
        # If todo switches to None, it indicates that this is the final round of reactions, and the Swe-Agent will stop. Use git diff to store any changes made.
@ -88,7 +76,6 @@ class SWEAgent(RoleZero):
                logger.info(f"Diff output: \n{clear_diff}")
                if clear_diff:
                    self.output_diff = clear_diff
-
            except Exception as e:
                logger.error(f"Error during submission: {e}")

--- a/metagpt/tools/libs/cr.py
+++ b/metagpt/tools/libs/cr.py
@ -45,11 +45,15 @@ class CodeReview:
        """
        patch = await self._get_patch_content(patch_path)
        point_file = point_file if point_file else Path(metagpt.ext.cr.__file__).parent / "points.json"
+        await EditorReporter().async_report(str(point_file), "path")
        async with aiofiles.open(point_file, "rb") as f:
            cr_point_content = await f.read()
            cr_points = [Point(**i) for i in json.loads(cr_point_content)]
-        comments = await CodeReview_().run(patch, cr_points, output_file)
-        return f"The number of defects: {len(comments)} and the comments are stored in {output_file}"
+        try:
+            comments = await CodeReview_().run(patch, cr_points, output_file)
+        except ValueError as e:
+            return str(e)
+        return f"The number of defects: {len(comments)}, the comments are stored in {output_file}, and the checkpoints are stored in {str(point_file)}"

    async def fix(
        self,