Merge branch 'mgx_ops' into add_swe_agent_ablilities_to_engineer2

This commit is contained in:
黄伟韬 2024-09-05 20:04:57 +08:00
commit 166eb2db79
8 changed files with 38 additions and 37 deletions

View file

@ -48,7 +48,7 @@ INSTRUCTIONS = """
You must output in the same language as the Requirements.
First, This language should be consistent with the language used in the requirement description. determine the natural language you must respond in. If the requirements specify a special language, follow those instructions. The default language for responses is English.
Second, extract the restrictions in the requirements, specifically the steps. Do not include detailed demand descriptions; focus only on the restrictions.
Third, if the requirements is a software development, extract the program language. If If no specific programming language is required, Use HTML (*.html), CSS (*.css), and JavaScript (*.js)
Third, if the requirements is a software development, extract the program language. If no specific programming language is required, Use HTML (*.html), CSS (*.css), and JavaScript (*.js)
Note:
1. if there is not restrictions, requirements_restrictions must be ""

View file

@ -3,6 +3,7 @@
from __future__ import annotations
import asyncio
from datetime import datetime
from typing import Any, Callable, Coroutine, Optional, Union
from pydantic import TypeAdapter, model_validator
@ -43,9 +44,10 @@ COLLECT_AND_RANKURLS_PROMPT = """### Topic
{results}
### Requirements
Please remove irrelevant search results that are not related to the query or topic. Then, sort the remaining search results \
based on the link credibility. If two results have equal credibility, prioritize them based on the relevance. Provide the
ranked results' indices in JSON format, like [0, 1, 3, 4, ...], without including other words.
Please remove irrelevant search results that are not related to the query or topic.
If the query is time-sensitive or specifies a certain time frame, please also remove search results that are outdated or outside the specified time frame. Notice that the current time is {time_stamp}.
Then, sort the remaining search results based on the link credibility. If two results have equal credibility, prioritize them based on the relevance.
Provide the ranked results' indices in JSON format, like [0, 1, 3, 4, ...], without including other words.
"""
WEB_BROWSE_AND_SUMMARIZE_PROMPT = """### Requirements
@ -165,7 +167,8 @@ class CollectLinks(Action):
max_results = max_num_results or max(num_results * 2, 6)
results = await self._search_urls(query, max_results=max_results)
_results = "\n".join(f"{i}: {j}" for i, j in zip(range(max_results), results))
prompt = COLLECT_AND_RANKURLS_PROMPT.format(topic=topic, query=query, results=_results)
time_stamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
prompt = COLLECT_AND_RANKURLS_PROMPT.format(topic=topic, query=query, results=_results, time_stamp=time_stamp)
logger.debug(prompt)
indices = await self._aask(prompt)
try:

View file

@ -175,13 +175,16 @@ class CodeReview(Action):
async def cr_by_points(self, patch: PatchSet, points: list[Point]):
comments = []
valid_patch_count = 0
for patched_file in patch:
if not patched_file:
continue
if patched_file.path.endswith(".py"):
points = [p for p in points if p.language == "Python"]
valid_patch_count += 1
elif patched_file.path.endswith(".java"):
points = [p for p in points if p.language == "Java"]
valid_patch_count += 1
else:
continue
group_points = [points[i : i + 3] for i in range(0, len(points), 3)]
@ -198,6 +201,9 @@ class CodeReview(Action):
c["commented_file"] = patched_file_path
comments.extend(comments_batch)
if valid_patch_count == 0:
raise ValueError("Only code reviews for Python and Java languages are supported.")
return comments
async def run(self, patch: PatchSet, points: list[Point], output_file: str):

View file

@ -75,12 +75,7 @@ Note:
18. Use Engineer2.write_new_code to create or modify a file. Write only one code file each time. If you only need to code one file, provide all the necessary information in one response.
19. When the requirement is simple, you don't need to create a plan, just do it right away.
20. If the code exists, use the Editor tool's open and edit commands to modify it. Since it is not a new code, do not use write_new_code.
22. When using the editor, pay attention to the editor's current directory. When you use editor tools, the paths must be either absolute or relative to the editor's current directory.
"""
"""
21. Forbidden to run code in the terminal.
Do Not run the code.
21. When using the editor, pay attention to the editor's current directory. When you use editor tools, the paths must be either absolute or relative to the editor's current directory.
"""
CURRENT_STATE = """
The current editor state is:

View file

@ -16,6 +16,7 @@ from metagpt.prompts.di.engineer2 import (
from metagpt.roles.di.role_zero import RoleZero
from metagpt.schema import Message, UserMessage
from metagpt.strategy.experience_retriever import ENGINEER_EXAMPLE
from metagpt.tools.libs.cr import CodeReview
from metagpt.tools.libs.git import git_create_pull
from metagpt.tools.libs.terminal import Terminal
from metagpt.tools.tool_registry import register_tool
@ -40,6 +41,7 @@ class Engineer2(RoleZero):
"git_create_pull",
"SearchEnhancedQA",
"Engineer2",
"CodeReview",
]
# SWE Agent parameter
run_eval: bool = False
@ -64,11 +66,15 @@ class Engineer2(RoleZero):
self.cmd_prompt_current_state = CURRENT_STATE.format(**state).strip()
def _update_tool_execution(self):
# validate = ValidateAndRewriteCode()
cr = CodeReview()
self.tool_execution_map.update(
{
"Terminal.run_command": self.terminal.run_command,
"git_create_pull": git_create_pull,
"Engineer2.write_new_code": self.write_new_code,
"CodeReview.review": cr.review,
"CodeReview.fix": cr.fix,
# "ValidateAndRewriteCode.run": validate.run,
# "ValidateAndRewriteCode": validate.run,
}

View file

@ -89,7 +89,7 @@ class RoleZero(Role):
# Others
command_rsp: str = "" # the raw string containing the commands
commands: list[dict] = [] # commands to be executed
memory_k: int = 100 # number of memories (messages) to use as historical context
memory_k: int = 200 # number of memories (messages) to use as historical context
use_fixed_sop: bool = False
requirements_constraints: str = "" # the constraints in user requirements
use_summary: bool = True # whether to summarize at the end

View file

@ -11,7 +11,7 @@ from metagpt.prompts.di.swe_agent import (
from metagpt.roles.di.role_zero import RoleZero
from metagpt.schema import Message
from metagpt.tools.libs.git import git_create_pull
from metagpt.tools.libs.terminal import Terminal
from metagpt.tools.libs.terminal import Bash
class SWEAgent(RoleZero):
@ -19,8 +19,13 @@ class SWEAgent(RoleZero):
profile: str = "Issue Solver"
goal: str = "Resolve GitHub issue or bug in any existing codebase"
_instruction: str = NEXT_STEP_TEMPLATE
tools: list[str] = ["Browser:goto,scroll", "RoleZero", "git_create_pull", "Editor", "Terminal"]
terminal: Terminal = Field(default_factory=Terminal, exclude=True)
tools: list[str] = [
"Bash",
"Browser:goto,scroll",
"RoleZero",
"git_create_pull",
]
terminal: Bash = Field(default_factory=Bash, exclude=True)
output_diff: str = ""
max_react_loop: int = 40
run_eval: bool = False
@ -33,29 +38,14 @@ class SWEAgent(RoleZero):
def _update_tool_execution(self):
self.tool_execution_map.update(
{
"Terminal.run_command": self.eval_terminal_run if self.run_eval else self.terminal.run_command,
"Bash.run": self.terminal.run,
"git_create_pull": git_create_pull,
}
)
async def eval_terminal_run(self, cmd):
"""change command pull/push/commit to end."""
if any([cmd_key_word in cmd for cmd_key_word in ["pull", "push", "commit"]]):
# Observe that SWEAgent tries to submit the repo after fixing the bug.
# Set self.rc.todo to None and use git -diff to record the change.
logger.info("SWEAgent use cmd:{cmd}")
logger.info("finish current task")
# stop the sweagent
self._set_state(-1)
command_output = "Current test case is finished."
else:
command_output = await self.terminal.run_command(cmd)
return command_output
async def _format_instruction(self):
"""
Formats the instruction message for the SWE agent.
Runs the "state" command in the terminal, parses its output as JSON,
and uses it to format the `_instruction` template.
"""
@ -66,16 +56,14 @@ class SWEAgent(RoleZero):
async def _act(self) -> Message:
message = await super()._act()
if self.run_eval:
await self._parse_commands_for_eval()
self._parse_commands_for_eval()
return message
async def _parse_commands_for_eval(self):
"""
Handles actions based on parsed commands.
Parses commands, checks for a "submit" action, and generates a patch using `git diff`.
Stores the cleaned patch in `output_diff`. Logs any exceptions.
This function is specifically added for SWE bench evaluation.
"""
# If todo switches to None, it indicates that this is the final round of reactions, and the Swe-Agent will stop. Use git diff to store any changes made.
@ -88,7 +76,6 @@ class SWEAgent(RoleZero):
logger.info(f"Diff output: \n{clear_diff}")
if clear_diff:
self.output_diff = clear_diff
except Exception as e:
logger.error(f"Error during submission: {e}")

View file

@ -45,11 +45,15 @@ class CodeReview:
"""
patch = await self._get_patch_content(patch_path)
point_file = point_file if point_file else Path(metagpt.ext.cr.__file__).parent / "points.json"
await EditorReporter().async_report(str(point_file), "path")
async with aiofiles.open(point_file, "rb") as f:
cr_point_content = await f.read()
cr_points = [Point(**i) for i in json.loads(cr_point_content)]
comments = await CodeReview_().run(patch, cr_points, output_file)
return f"The number of defects: {len(comments)} and the comments are stored in {output_file}"
try:
comments = await CodeReview_().run(patch, cr_points, output_file)
except ValueError as e:
return str(e)
return f"The number of defects: {len(comments)}, the comments are stored in {output_file}, and the checkpoints are stored in {str(point_file)}"
async def fix(
self,