mirror of
https://github.com/FoundationAgents/MetaGPT.git
synced 2026-05-02 20:32:38 +02:00
Merge branch 'add_swe_agent_ablilities_to_engineer2' into 'mgx_ops'
Add swe agent ablilities to engineer2 See merge request pub/MetaGPT!356
This commit is contained in:
commit
8d93f5750c
11 changed files with 557 additions and 132 deletions
|
|
@ -96,7 +96,7 @@ class MGXEnv(Environment, SerializationMixin):
|
|||
|
||||
async def reply_to_human(self, content: str, sent_from: Role = None) -> str:
|
||||
# NOTE: Can be overwritten in remote setting
|
||||
return "SUCCESS, human has received your reply. Refrain from resending duplicate messages."
|
||||
return "SUCCESS, human has received your reply. Refrain from resending duplicate messages. If you no longer need to take action, use the command ‘end’ to stop."
|
||||
|
||||
def message_within_software_sop(self, message: Message) -> bool:
|
||||
# Engineer, QaEngineer can be end of the SOP. Their msg requires routing outside.
|
||||
|
|
|
|||
|
|
@ -1,32 +1,89 @@
|
|||
from metagpt.prompts.di.role_zero import ROLE_INSTRUCTION
|
||||
|
||||
EXTRA_INSTRUCTION_DEPRECATED = """
|
||||
4. Each time you write a code in your response, write with the Editor directly without preparing a repetitive code block beforehand.
|
||||
5. Take on ONE task and write ONE code file in each response. DON'T attempt all tasks in one response.
|
||||
6. When not specified, you should write files in a folder named "src". If you know the project path, then write in a "src" folder under the project path.
|
||||
7. When provided system design or project schedule, you MUST read them first before making a plan, then adhere to them in your implementation, especially in the programming language, package, or framework. You MUST implement all code files prescribed in the system design or project schedule. You can create a plan first with each task corresponding to implementing one code file.
|
||||
8. Write at most one file per task, do your best to implement THE ONLY ONE FILE. CAREFULLY CHECK THAT YOU DONT MISS ANY NECESSARY CLASS/FUNCTION IN THIS FILE.
|
||||
9. COMPLETE CODE: Your code will be part of the entire project, so please implement complete, reliable, reusable code snippets.
|
||||
10. When provided system design, YOU MUST FOLLOW "Data structures and interfaces". DONT CHANGE ANY DESIGN. Do not use public member functions that do not exist in your design.
|
||||
11. Write out EVERY CODE DETAIL, DON'T LEAVE TODO.
|
||||
12. To modify code in a file, read the entire file, make changes, and update the file with the complete code, ensuring that no line numbers are included in the final write.
|
||||
13. When a system design or project schedule is provided, at the end of the plan, add a Validate Task for each file; for example, if there are three files, add three Validate Tasks. For each Validate Task, just call ValidateAndRewriteCode.run.
|
||||
14. When planning, initially list the files for coding, then outline all coding and review tasks in your first response.
|
||||
15. Note 'Task for {file_name} completed.' — signifies the {file_name} coding task is done.
|
||||
16. Avoid re-reviewing or re-coding the same code. When you decide to take a write or review action, include the command 'finish current task' in the same response.
|
||||
17. When coding JavaScript, avoid using '\'' in strings.
|
||||
18. If you plan to read a file, do not include other plans in the same response.
|
||||
"""
|
||||
|
||||
EXTRA_INSTRUCTION = """
|
||||
6. When not specified, you should write files in a folder named "src". If you know the project path, then write in a "src" folder under the project path.
|
||||
7. When provided system design or project schedule, you MUST read them first before making a plan, then adhere to them in your implementation, especially in the programming language, package, or framework. You MUST implement all code files prescribed in the system design or project schedule. You can create a plan first with each task corresponding to implementing one code file.
|
||||
8. When planning, initially list the files for coding, then outline all coding and review tasks in your first response.
|
||||
9. If you plan to read a file, do not include other plans in the same response.
|
||||
10. Use Engineer2.write_new_code to create or modify a file. Write only one code file each time.
|
||||
11. When the requirement is simple, you don't need to create a plan, just do it right away.
|
||||
"""
|
||||
You are an autonomous programmer
|
||||
|
||||
The special interface consists of a file editor that shows you 100 lines of a file at a time.
|
||||
|
||||
You can use terminal commands (e.g., cat, ls, cd) by calling Terminal.run_command.
|
||||
|
||||
|
||||
You should carefully observe the behavior and results of the previous action, and avoid triggering repeated errors.
|
||||
|
||||
In addition to the terminal, I also provide additional tools. If provided an issue link, you MUST navigate to the issue page using Browser tool to understand the issue, before starting your fix.
|
||||
|
||||
Your first action must be to check if the repository exists at the current path. If it exists, navigate to the repository path. If the repository doesn't exist, please download it and then navigate to it.
|
||||
All subsequent actions must be performed within this repository path. Do not leave this directory to execute any actions at any time.
|
||||
|
||||
Note:
|
||||
|
||||
1. If you open a file and need to get to an area around a specific line that is not in the first 100 lines, say line 583, don't just use the scroll_down command multiple times. Instead, use the Editor.goto_line command. It's much quicker.
|
||||
2. Always make sure to look at the currently open file and the current working directory (which appears right after the currently open file). The currently open file might be in a different directory than the working directory! Note that some commands, such as 'create', open files, so they might change the current open file.
|
||||
3. When using Editor.edit_file_by_replace, if there is no exact match, take the difference in indentation into consideration.
|
||||
4. After editing, verify the changes to ensure correct line numbers and proper indentation. Adhere to PEP8 standards for Python code.
|
||||
5. NOTE ABOUT THE EDIT COMMAND: Indentation really matters! When editing a file, make sure to insert appropriate indentation before each line! Ensuring the code adheres to PEP8 standards. If a edit command fails, you can try to edit the file again to correct the indentation, but don't repeat the same command without changes.
|
||||
6. To avoid syntax errors when editing files multiple times, consider opening the file to view the surrounding code related to the error line and make modifications based on this context.
|
||||
7. Ensure to observe the currently open file and the current working directory, which is displayed right after the open file. The open file might be in a different directory than the working directory. Remember, commands like 'create' open files and might alter the current open file.
|
||||
8. Effectively using Use search commands (`search_dir`, `search_file`, `find_file`) and navigation commands (`open_file`, `goto_line`) to locate and modify files efficiently. The Editor tool can fully satisfy the requirements. Follow these steps and considerations for optimal results:
|
||||
**General Search Guidelines:**
|
||||
- Ensure you are in the repository's root directory before starting your search.
|
||||
- Always double-check the current working directory and the currently open file to avoid confusion.
|
||||
- Avoid repeating failed search commands without modifications to improve efficiency.
|
||||
|
||||
**Strategies for Searching and Navigating Files:**
|
||||
|
||||
1. **If you know the file's location:**
|
||||
- Use the `open_file` command directly to open the file.
|
||||
- Use `search_file` to find the `search_term` within the currently open file.
|
||||
- Alternatively, use the `goto_line` command to jump to the specified line.
|
||||
- **Boundary Consideration:** Ensure the file path is correctly specified and accessible.
|
||||
|
||||
2. **If you know the filename but not the exact location:**
|
||||
- Use `find_file` to locate the file in the directory.
|
||||
- Use `open_file` to open the file once located.
|
||||
- Use `search_file` to find the `search_term` within the file.
|
||||
- Use `goto_line` to jump to the specified line if needed.
|
||||
- **Boundary Consideration:** Handle cases where the file may exist in multiple directories by verifying the correct path before opening.
|
||||
|
||||
3. **If you know the symbol but not the file's location:**
|
||||
- Use "search_dir" to find files containing the symbol within the directory.
|
||||
- Review the search results to identify the relevant file(s).
|
||||
- Use `open_file` to open the identified file.
|
||||
- Use `search_file` to locate the `search_term` within the open file.
|
||||
- Use `goto_line` to jump to the specified line.
|
||||
- **Boundary Consideration:** Be thorough in reviewing multiple search results to ensure you open the correct file. Consider using more specific search terms if initial searches return too many results.
|
||||
|
||||
**Search Tips:**
|
||||
- The `<search_term>` for `search_dir`, `find_file`, or `search_file` should be an existing class name, function name, or file name.
|
||||
- Enclose terms like `def` or `class` in quotes when searching for functions or classes (e.g., `search_dir 'def apow'` or `search_file 'class Pow'`).
|
||||
- Use wildcard characters (`*`, `?`) in search terms to broaden or narrow down your search scope.
|
||||
- If search commands return too many results, refine your search criteria or use more specific terms.
|
||||
- If a search command fails, modify the search criteria, check for search_term or paths, and then try again.
|
||||
- Based on feedback of observation or Terminal command in trajectory to guide adjustments in your search strategy.
|
||||
|
||||
9. When the edit fails, try to enlarge the range of code.
|
||||
10. You must use the Editor.open_file command to open a file before using the Editor tool's edit command to modify it. When you open a file, any currently open file will be automatically closed.
|
||||
11. Remember, when you use Editor.insert_content_at_line or Editor.edit_file_by_replace, the line numbers will change after the operation. Therefore, if there are multiple operations, perform only the first operation in the current response, and defer the subsequent operations to the next turn.
|
||||
11.1 Do not use Editor.insert_content_at_line or Editor.edit_file_by_replace more than once per command list.
|
||||
12. If you choose Editor.insert_content_at_line, you must ensure that there is no duplication between the inserted content and the original code. If there is overlap between the new code and the original code, use Editor.edit_file_by_replace instead.
|
||||
13. If you choose Editor.edit_file_by_replace, the original code that needs to be replaced must start at the beginning of the line and end at the end of the line
|
||||
|
||||
14. When not specified, you should write files in a folder named "src". If you know the project path, then write in a "src" folder under the project path.
|
||||
15. When provided system design or project schedule, you MUST read them first before making a plan, then adhere to them in your implementation, especially in the programming language, package, or framework. You MUST implement all code files prescribed in the system design or project schedule. You can create a plan first with each task corresponding to implementing one code file.
|
||||
16. When planning, initially list the files for coding, then outline all coding tasks based on the file organization in your first response.
|
||||
17. If you plan to read a file, do not include other plans in the same response.
|
||||
18. Write only one code file each time and provide its full implementation.
|
||||
19. When the requirement is simple, you don't need to create a plan, just do it right away.
|
||||
20. If the code exists, use the Editor tool's open and edit commands to modify it. Since it is not a new code, do not use write_new_code.
|
||||
21. When using the editor, pay attention to the editor's current directory. When you use editor tools, the paths must be either absolute or relative to the editor's current directory.
|
||||
"""
|
||||
CURRENT_STATE = """
|
||||
The current editor state is:
|
||||
(Editor current directory: {editor_current_directory})
|
||||
(Editor open file: {editor_open_file})
|
||||
The current terminal state is:
|
||||
(Terminal current directory: {terminal_current_directory})
|
||||
"""
|
||||
ENGINEER2_INSTRUCTION = ROLE_INSTRUCTION + EXTRA_INSTRUCTION.strip()
|
||||
|
||||
WRITE_CODE_SYSTEM_PROMPT = """
|
||||
|
|
@ -35,7 +92,7 @@ You are a world-class engineer, your goal is to write google-style, elegant, mod
|
|||
Pay attention to the conversation history and the following constraints:
|
||||
1. When provided system design, YOU MUST FOLLOW "Data structures and interfaces". DONT CHANGE ANY DESIGN. Do not use public member functions that do not exist in your design.
|
||||
2. When modifying a code, rewrite the full code instead of updating or inserting a snippet.
|
||||
3. Write out EVERY CODE DETAIL, DON'T LEAVE TODO.
|
||||
3. Write out EVERY CODE DETAIL, DON'T LEAVE TODO OR PLACEHOLDER.
|
||||
"""
|
||||
|
||||
WRITE_CODE_PROMPT = """
|
||||
|
|
|
|||
|
|
@ -71,6 +71,7 @@ Pay close attention to the Example provided, you can reuse the example for your
|
|||
You may use any of the available commands to create a plan or update the plan. You may output mutiple commands, they will be executed sequentially.
|
||||
If you finish current task, you will automatically take the next task in the existing plan, use Plan.finish_task, DON'T append a new task.
|
||||
Review the latest plan's outcome, focusing on achievements. If your completed task matches the current, consider it finished.
|
||||
Using Editor.insert_content_at_line and Editor.edit_file_by_replace more than once in the current command list is forbidden. Because the command is mutually exclusive and will change the line number after execution.
|
||||
In your response, include at least one command.
|
||||
|
||||
# Your commands in a json array, in the following output format with correct command_name and args. If there is nothing to do, use the pass or end command:
|
||||
|
|
@ -103,6 +104,7 @@ Fifth, describe if you should terminate, you should use **end** command to termi
|
|||
REGENERATE_PROMPT = """
|
||||
Review and reflect on the history carefully, provide a different response.
|
||||
Describe if you should terminate using **end** command, or use **RoleZero.ask_human** to ask human for help, or try a different approach and output different commands. You are NOT allowed to provide the same commands again.
|
||||
You should use "end" to stop when all tasks have been completed and the requirements are satisfied.
|
||||
Your reflection, then the commands in a json array:
|
||||
"""
|
||||
ASK_HUMAN_COMMAND = """
|
||||
|
|
|
|||
|
|
@ -24,7 +24,6 @@ Note:
|
|||
- XL: Social media platform, e-commerce app, real-time multiplayer game
|
||||
- For XS and S requirements, you don't need the standard software development process, you may directly ask Engineer to write the code. Otherwise, estimate if any part of the standard software development process may contribute to a better final code. If so, assign team members accordingly.
|
||||
3.1 If the task involves code review (CR) or code checking, you should assign it to Engineer.
|
||||
3.2. If the requirement is to fix a bug or issue, you should assign it to Issue Solver. However, if the code is written by Engineer, Engineer must maintain the code.
|
||||
4. If the requirement is a common-sense, logical, or math problem, you should respond directly without assigning any task to team members.
|
||||
5. If you think the requirement is not clear or ambiguous, you should ask the user for clarification immediately. Assign tasks only after all info is clear.
|
||||
6. It is helpful for Engineer to have both the system design and the project schedule for writing the code, so include paths of both files (if available) and remind Engineer to definitely read them when publishing message to Engineer.
|
||||
|
|
@ -43,7 +42,6 @@ Sixth, describe the requirements as they pertain to software development, data a
|
|||
Seventh, describe the technologies you must use.
|
||||
"""
|
||||
)
|
||||
|
||||
TL_INFO = """
|
||||
{role_info}
|
||||
Your team member:
|
||||
|
|
|
|||
|
|
@ -4,8 +4,11 @@ from pathlib import Path
|
|||
|
||||
from pydantic import Field
|
||||
|
||||
from metagpt.logs import logger
|
||||
|
||||
# from metagpt.actions.write_code_review import ValidateAndRewriteCode
|
||||
from metagpt.prompts.di.engineer2 import (
|
||||
CURRENT_STATE,
|
||||
ENGINEER2_INSTRUCTION,
|
||||
WRITE_CODE_PROMPT,
|
||||
WRITE_CODE_SYSTEM_PROMPT,
|
||||
|
|
@ -14,6 +17,7 @@ from metagpt.roles.di.role_zero import RoleZero
|
|||
from metagpt.schema import UserMessage
|
||||
from metagpt.strategy.experience_retriever import ENGINEER_EXAMPLE
|
||||
from metagpt.tools.libs.cr import CodeReview
|
||||
from metagpt.tools.libs.git import git_create_pull
|
||||
from metagpt.tools.libs.terminal import Terminal
|
||||
from metagpt.tools.tool_registry import register_tool
|
||||
from metagpt.utils.common import CodeParser, awrite
|
||||
|
|
@ -26,24 +30,69 @@ class Engineer2(RoleZero):
|
|||
profile: str = "Engineer"
|
||||
goal: str = "Take on game, app, and web development."
|
||||
instruction: str = ENGINEER2_INSTRUCTION
|
||||
|
||||
terminal: Terminal = Field(default_factory=Terminal, exclude=True)
|
||||
|
||||
tools: list[str] = ["Plan", "Editor:read", "RoleZero", "Terminal:run_command", "Engineer2", "SearchEnhancedQA", "CodeReview"]
|
||||
tools: list[str] = [
|
||||
"Plan",
|
||||
"Editor",
|
||||
"RoleZero",
|
||||
"Terminal:run_command",
|
||||
"Browser:goto,scroll",
|
||||
"git_create_pull",
|
||||
"SearchEnhancedQA",
|
||||
"Engineer2",
|
||||
"CodeReview",
|
||||
]
|
||||
# SWE Agent parameter
|
||||
run_eval: bool = False
|
||||
output_diff: str = ""
|
||||
max_react_loop: int = 40
|
||||
|
||||
async def _think(self) -> bool:
|
||||
await self._format_instruction()
|
||||
res = await super()._think()
|
||||
return res
|
||||
|
||||
async def _format_instruction(self):
|
||||
"""
|
||||
Display the current terminal and editor state.
|
||||
This information will be dynamically added to the command prompt.
|
||||
"""
|
||||
state = {
|
||||
"editor_open_file": self.editor.current_file,
|
||||
"editor_current_directory": self.editor.working_dir,
|
||||
"terminal_current_directory": await self.terminal.run_command("pwd"),
|
||||
}
|
||||
self.cmd_prompt_current_state = CURRENT_STATE.format(**state).strip()
|
||||
|
||||
def _update_tool_execution(self):
|
||||
# validate = ValidateAndRewriteCode()
|
||||
cr = CodeReview()
|
||||
self.tool_execution_map.update(
|
||||
{
|
||||
"Terminal.run_command": self.terminal.run_command,
|
||||
"Engineer2.write_new_code": self.write_new_code,
|
||||
"CodeReview.review": cr.review,
|
||||
"CodeReview.fix": cr.fix,
|
||||
# "ValidateAndRewriteCode.run": validate.run,
|
||||
# "ValidateAndRewriteCode": validate.run,
|
||||
}
|
||||
)
|
||||
self.exclusive_tool_commands.append("Engineer2.write_new_code")
|
||||
if self.run_eval is True:
|
||||
# Evalute tool map
|
||||
self.tool_execution_map.update(
|
||||
{
|
||||
"git_create_pull": git_create_pull,
|
||||
"Engineer2.write_new_code": self.write_new_code,
|
||||
"CodeReview.review": cr.review,
|
||||
"CodeReview.fix": cr.fix,
|
||||
"Terminal.run_command": self._eval_terminal_run,
|
||||
"RoleZero.ask_human": self._end,
|
||||
"RoleZero.reply_to_human": self._end,
|
||||
}
|
||||
)
|
||||
else:
|
||||
# Default tool map
|
||||
self.tool_execution_map.update(
|
||||
{
|
||||
"git_create_pull": git_create_pull,
|
||||
"Engineer2.write_new_code": self.write_new_code,
|
||||
"CodeReview.review": cr.review,
|
||||
"CodeReview.fix": cr.fix,
|
||||
"Terminal.run_command": self.terminal.run_command,
|
||||
}
|
||||
)
|
||||
|
||||
def _retrieve_experience(self) -> str:
|
||||
return ENGINEER_EXAMPLE
|
||||
|
|
@ -82,3 +131,14 @@ class Engineer2(RoleZero):
|
|||
|
||||
# TODO: Consider adding line no to be ready for editing.
|
||||
return f"The file {path} has been successfully created, with content:\n{code}"
|
||||
|
||||
async def _eval_terminal_run(self, cmd):
|
||||
"""change command pull/push/commit to end."""
|
||||
if any([cmd_key_word in cmd for cmd_key_word in ["pull", "push", "commit"]]):
|
||||
# The Engineer2 attempts to submit the repository after fixing the bug, thereby reaching the end of the fixing process.
|
||||
logger.info("Engineer2 use cmd:{cmd}\nCurrent test case is finished.")
|
||||
# Set self.rc.todo to None to stop the engineer.
|
||||
self._set_state(-1)
|
||||
else:
|
||||
command_output = await self.terminal.run_command(cmd)
|
||||
return command_output
|
||||
|
|
|
|||
|
|
@ -73,8 +73,15 @@ class RoleZero(Role):
|
|||
tool_recommender: Optional[ToolRecommender] = None
|
||||
tool_execution_map: Annotated[dict[str, Callable], Field(exclude=True)] = {}
|
||||
special_tool_commands: list[str] = ["Plan.finish_current_task", "end", "Bash.run"]
|
||||
# List of exclusive tool commands.
|
||||
# If multiple instances of these commands appear, only the first occurrence will be retained.
|
||||
exclusive_tool_commands: list[str] = [
|
||||
"Editor.edit_file_by_replace",
|
||||
"Editor.insert_content_at_line",
|
||||
"Editor.append_file",
|
||||
]
|
||||
# Equipped with three basic tools by default for optional use
|
||||
editor: Editor = Editor()
|
||||
editor: Editor = Editor(enable_auto_lint=True)
|
||||
browser: Browser = Browser()
|
||||
|
||||
# Experience
|
||||
|
|
@ -148,7 +155,7 @@ class RoleZero(Role):
|
|||
"scroll_up",
|
||||
"search_dir",
|
||||
"search_file",
|
||||
"set_workdir",
|
||||
# "set_workdir",
|
||||
"write",
|
||||
]
|
||||
}
|
||||
|
|
@ -216,10 +223,8 @@ class RoleZero(Role):
|
|||
async with ThoughtReporter(enable_llm_stream=True) as reporter:
|
||||
await reporter.async_report({"type": "react"})
|
||||
self.command_rsp = await self.llm_cached_aask(req=req, system_msgs=[system_prompt], state_data=state_data)
|
||||
|
||||
self.command_rsp = await self._check_duplicates(req, self.command_rsp)
|
||||
|
||||
self.rc.memory.add(AIMessage(content=self.command_rsp))
|
||||
return True
|
||||
|
||||
@exp_cache(context_builder=RoleZeroContextBuilder(), serializer=RoleZeroSerializer())
|
||||
|
|
@ -259,7 +264,8 @@ class RoleZero(Role):
|
|||
if self.use_fixed_sop:
|
||||
return await super()._act()
|
||||
|
||||
commands, ok = await self._parse_commands(self.command_rsp)
|
||||
commands, ok, self.command_rsp = await self._parse_commands(self.command_rsp)
|
||||
self.rc.memory.add(AIMessage(content=self.command_rsp))
|
||||
if not ok:
|
||||
error_msg = commands
|
||||
self.rc.memory.add(UserMessage(content=error_msg))
|
||||
|
|
@ -407,12 +413,27 @@ class RoleZero(Role):
|
|||
tb = traceback.format_exc()
|
||||
print(tb)
|
||||
error_msg = str(e)
|
||||
return error_msg, False
|
||||
return error_msg, False, command_rsp
|
||||
|
||||
# 为了对LLM不按格式生成进行容错
|
||||
if isinstance(commands, dict):
|
||||
commands = commands["commands"] if "commands" in commands else [commands]
|
||||
return commands, True
|
||||
|
||||
# Set the exclusive command flag to False.
|
||||
command_flag = [command["command_name"] not in self.exclusive_tool_commands for command in commands]
|
||||
if command_flag.count(False) > 1:
|
||||
# Keep only the first exclusive command
|
||||
index_of_first_exclusive = command_flag.index(False)
|
||||
commands = [
|
||||
cmd
|
||||
for index, cmd in enumerate(commands)
|
||||
if index == index_of_first_exclusive or cmd["command_name"] not in self.exclusive_tool_commands
|
||||
]
|
||||
command_rsp = "```json\n" + json.dumps(commands, indent=4, ensure_ascii=False) + "\n```json"
|
||||
logger.info(
|
||||
"exclusive command more than one in current command list. change the command list.\n" + command_rsp
|
||||
)
|
||||
return commands, True, command_rsp
|
||||
|
||||
async def _run_commands(self, commands) -> str:
|
||||
outputs = []
|
||||
|
|
@ -456,7 +477,9 @@ class RoleZero(Role):
|
|||
if cmd["command_name"] == "Plan.finish_current_task":
|
||||
if not self.planner.plan.is_plan_finished():
|
||||
self.planner.plan.finish_current_task()
|
||||
command_output = "Current task is finished. If all tasks are finished, use 'end' to stop."
|
||||
command_output = (
|
||||
"Current task is finished. If you no longer need to take action, use the command ‘end’ to stop."
|
||||
)
|
||||
|
||||
elif cmd["command_name"] == "end":
|
||||
command_output = await self._end()
|
||||
|
|
@ -471,6 +494,7 @@ class RoleZero(Role):
|
|||
)
|
||||
else:
|
||||
command_output += f"\n[command]: {cmd['args']['cmd']} \n[command output] : {tool_output}"
|
||||
|
||||
return command_output
|
||||
|
||||
def _get_plan_status(self) -> Tuple[str, str]:
|
||||
|
|
@ -519,7 +543,7 @@ class RoleZero(Role):
|
|||
return "Not in MGXEnv, command will not be executed."
|
||||
return await self.rc.env.reply_to_human(content, sent_from=self)
|
||||
|
||||
async def _end(self):
|
||||
async def _end(self, **kwarg):
|
||||
self._set_state(-1)
|
||||
memory = self.rc.memory.get(self.memory_k)
|
||||
# Ensure reply to the human before the "end" command is executed. Hard code k=5 for checking.
|
||||
|
|
|
|||
|
|
@ -46,7 +46,6 @@ class SWEAgent(RoleZero):
|
|||
async def _format_instruction(self):
|
||||
"""
|
||||
Formats the instruction message for the SWE agent.
|
||||
|
||||
Runs the "state" command in the terminal, parses its output as JSON,
|
||||
and uses it to format the `_instruction` template.
|
||||
"""
|
||||
|
|
@ -63,10 +62,8 @@ class SWEAgent(RoleZero):
|
|||
async def _parse_commands_for_eval(self):
|
||||
"""
|
||||
Handles actions based on parsed commands.
|
||||
|
||||
Parses commands, checks for a "submit" action, and generates a patch using `git diff`.
|
||||
Stores the cleaned patch in `output_diff`. Logs any exceptions.
|
||||
|
||||
This function is specifically added for SWE bench evaluation.
|
||||
"""
|
||||
# If todo switches to None, it indicates that this is the final round of reactions, and the Swe-Agent will stop. Use git diff to store any changes made.
|
||||
|
|
@ -79,7 +76,6 @@ class SWEAgent(RoleZero):
|
|||
logger.info(f"Diff output: \n{clear_diff}")
|
||||
if clear_diff:
|
||||
self.output_diff = clear_diff
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error during submission: {e}")
|
||||
|
||||
|
|
|
|||
|
|
@ -842,7 +842,7 @@ Explanation: I will first need to read the system design document and the projec
|
|||
|
||||
## example 2
|
||||
Consider this example only after you have obtained the content of system design and project schedule documents.
|
||||
Suppose the system design and project schedule prescribes three files index.html, style.css, script.js, to follow the design and schedule, I will create a plan consisting of three tasks, each corresponding to the creation of one of the required files: `index.html`, `style.css`, and `script.js`. Following the completion of these tasks, I will add a code review task for each file to ensure the implementation aligns with the provided system design and project schedule documents.
|
||||
Suppose the system design and project schedule prescribes three files index.html, style.css, script.js, to follow the design and schedule, I will create a plan consisting of three tasks, each corresponding to the creation of one of the required files: `index.html`, `style.css`, and `script.js`.
|
||||
|
||||
Here's the plan:
|
||||
|
||||
|
|
@ -901,8 +901,132 @@ Explanation: Take on one task, such as writing a file. Upon completion, finish c
|
|||
}
|
||||
]
|
||||
```
|
||||
"""
|
||||
|
||||
## example 4
|
||||
I have received a GitHub issue URL.
|
||||
I will use browser to review the detailed information of this issue in order to understand the problem.
|
||||
```json
|
||||
[
|
||||
{
|
||||
"command_name": "Browser.goto",
|
||||
"args": {
|
||||
"url": "https://github.com/geekan/MetaGPT/issues/1275"
|
||||
}
|
||||
}
|
||||
]
|
||||
```
|
||||
|
||||
## example 6
|
||||
I need to locating the `openai_api.py` file, so I will search for the `openai_api.py` file.
|
||||
```json
|
||||
[
|
||||
{
|
||||
"command_name": "Editor.find_file",
|
||||
"args": {
|
||||
"file_name": "openai_api.py"
|
||||
}
|
||||
}
|
||||
]
|
||||
```
|
||||
|
||||
|
||||
|
||||
## example 7
|
||||
I have located the openai_api.py file. I want to edit this file, so I will open it first.
|
||||
```json
|
||||
[
|
||||
{
|
||||
"command_name": "Editor.open_file",
|
||||
"args": {
|
||||
"path": "/workspace/MetaGPT/provider/openai_api.py"
|
||||
}
|
||||
}
|
||||
]
|
||||
```
|
||||
|
||||
## example 8
|
||||
I have opened the openai_api.py file. However, the range of lines shown is from 001 to 100, and I want to see more. Therefore, I want to use the scroll_down command to view additional lines.
|
||||
```json
|
||||
[
|
||||
{
|
||||
"command_name": "Editor.scroll_down",
|
||||
"args": {{}}
|
||||
}
|
||||
]
|
||||
```
|
||||
|
||||
## example 9
|
||||
I've found the bug and will start fixing it. I'll pay close attention to the indentation.
|
||||
Since I only need to modify a few lines in this file, I will use Editor.edit_file_by_replace. The original content will be replaced by the new code.
|
||||
Editor tool is exclusive. If I use this tool, I cannot use any other commands in the current response.
|
||||
```json
|
||||
[
|
||||
{
|
||||
"command_name": "Editor.edit_file_by_replace",
|
||||
"args": {
|
||||
"file_name":"/workspace/MetaGPT/provider/openai_api.py",
|
||||
"to_replace": " inv_trig_table = ["asin", "acos", "atan", "acot"]"
|
||||
"new_content": " inv_trig_table = ["asin", "acos", "atan", "acsc", "asec", "acot"]"
|
||||
}
|
||||
}
|
||||
]
|
||||
```
|
||||
|
||||
## example 10
|
||||
I only need to add a few lines to the file, so I will use Editor.insert_content_at_line. The new code will not cover the original code.
|
||||
Note that the Editor command must be executed in a single response, so this step will only involve using the Editor command.
|
||||
```json
|
||||
[
|
||||
{
|
||||
"command_name": "Editor.insert_content_at_line",
|
||||
"args": {
|
||||
"file_name":"/workspace/MetaGPT/provider/openai_api.py"
|
||||
"line_number":727,
|
||||
"content": "if hasattr(self, '_print_' + func) and not isinstance(expr.func, UndefinedFunction):\\n return getattr(self, '_print_' + func)(expr, exp)"
|
||||
}
|
||||
}
|
||||
]
|
||||
```
|
||||
|
||||
## example 10.1
|
||||
To enhance the functionality of the 2048 game, including game end detection and score tracking, we need to add these features to the existing game_2048.py file. First, we will add a score tracking feature, and then we will insert game end detection logic into the game loop.
|
||||
We will use the Editor.insert_content_at_line command to insert new code into the file for adding score tracking and game end detection.
|
||||
Since Editor.insert_content_at_line can only be used once per response, this time I will use it to create the variable self.score
|
||||
```json
|
||||
[
|
||||
{
|
||||
"command_name": "Editor.insert_content_at_line",
|
||||
"args": {
|
||||
"file_name": "/home/mgx/mgx/MetaGPT/workspace/2048_game_py/game_2048.py",
|
||||
"line_number": 4,
|
||||
"content": " self.score = 0\n"
|
||||
}
|
||||
}
|
||||
]
|
||||
```
|
||||
In the next turn, I will try to add another code snippet
|
||||
|
||||
## example 11
|
||||
|
||||
Create a pull request (Optional): Merge the changes from the new branch into the master branch.
|
||||
Thought: Now that the changes have been pushed to the remote repository, due to the user's requirement, let's create a pull request to merge the changes into the master branch.
|
||||
```json
|
||||
[
|
||||
{
|
||||
"command_name": "git_create_pull",
|
||||
"args": {
|
||||
"base": "master",
|
||||
"head": "test-fix",
|
||||
"base_repo_name": "garylin2099/MetaGPT",
|
||||
"head_repo_name": "seeker-jie/MetaGPT",
|
||||
"app_name": "github",
|
||||
"title": "Fix Issue #1275: produced TypeError: openai.types.completion_usage.CompletionUsage() argument after ** must be a mapping, not NoneType"",
|
||||
"body": "This pull request addresses issue #1275 by ensuring that chunk.usage is not None before passing it to CompletionUsage."
|
||||
}
|
||||
}
|
||||
]
|
||||
```
|
||||
"""
|
||||
|
||||
WEB_SCRAPING_EXAMPLE = """
|
||||
## action 1
|
||||
|
|
|
|||
|
|
@ -24,10 +24,47 @@ from metagpt.utils.repo_to_markdown import is_text_file
|
|||
from metagpt.utils.report import EditorReporter
|
||||
|
||||
# This is also used in unit tests!
|
||||
MSG_FILE_UPDATED = "[File updated (edited at line {line_number}). Please review the changes and make sure they are correct (correct indentation, no duplicate lines, etc). Edit the file again if necessary.]"
|
||||
LINTER_ERROR_MSG = "[Your proposed edit has introduced new syntax error(s). Please understand the errors and retry your edit command.]\n"
|
||||
|
||||
|
||||
INDENTATION_INFO = """
|
||||
The previous line is:
|
||||
"{pre_line}"
|
||||
The indentation has {pre_line_indent} spaces.
|
||||
|
||||
The error line is:
|
||||
"{insert_line}"
|
||||
The indentation has {insert_line_indent} spaces.
|
||||
|
||||
Please check the indentation of the code to ensure that it is not causing any errors.
|
||||
Try using indentation with either {sub_4_space} or {add_4_space} spaces.
|
||||
"""
|
||||
|
||||
ERROR_GUIDANCE = """
|
||||
{linter_error_msg}
|
||||
|
||||
[This is how your edit would have looked if applied]
|
||||
-------------------------------------------------
|
||||
{window_after_applied}
|
||||
-------------------------------------------------
|
||||
|
||||
[This is the original code before your edit]
|
||||
-------------------------------------------------
|
||||
{window_before_applied}
|
||||
-------------------------------------------------
|
||||
|
||||
Your changes have NOT been applied. Please fix your edit command and try again
|
||||
{guidance_message}
|
||||
|
||||
"""
|
||||
|
||||
SUCCESS_EDIT_INFO = """
|
||||
[File: {file_name} ({n_total_lines} lines total after edit)]
|
||||
{window_after_applied}
|
||||
[File updated (edited at line {line_number}). Please review the changes and make sure they are correct (correct indentation, no duplicate lines, etc). Edit the file again if necessary.]
|
||||
"""
|
||||
|
||||
|
||||
class FileBlock(BaseModel):
|
||||
"""A block of content in a file"""
|
||||
|
||||
|
|
@ -277,7 +314,7 @@ class Editor(BaseModel):
|
|||
return ""
|
||||
return f"[File: {current_file.resolve()} ({total_lines} lines total)]\n"
|
||||
|
||||
def set_workdir(self, path: str) -> None:
|
||||
def _set_workdir(self, path: str) -> None:
|
||||
"""
|
||||
Sets the working directory to the given path. eg: repo directory.
|
||||
You MUST to set it up before open the file.
|
||||
|
|
@ -321,6 +358,7 @@ class Editor(BaseModel):
|
|||
|
||||
output = self._cur_file_header(path, total_lines)
|
||||
output += self._print_window(path, self.current_line, self._clamp(context_lines, 1, 2000))
|
||||
self.resource.report(path, "path")
|
||||
return output
|
||||
|
||||
def goto_line(self, line_number: int) -> str:
|
||||
|
|
@ -499,6 +537,25 @@ class Editor(BaseModel):
|
|||
content = "".join(new_lines)
|
||||
return content, n_added_lines
|
||||
|
||||
def _get_indentation_info(self, content, first_line):
|
||||
"""
|
||||
The indentation of the first insert line and the previous line, along with guidance for the next attempt.
|
||||
"""
|
||||
content_lines = content.split("\n")
|
||||
pre_line = content_lines[first_line - 2] if first_line - 2 >= 0 else ""
|
||||
pre_line_indent = len(pre_line) - len(pre_line.lstrip())
|
||||
insert_line = content_lines[first_line - 1]
|
||||
insert_line_indent = len(insert_line) - len(insert_line.lstrip())
|
||||
ret_str = INDENTATION_INFO.format(
|
||||
pre_line=pre_line,
|
||||
pre_line_indent=pre_line_indent,
|
||||
insert_line=insert_line,
|
||||
insert_line_indent=insert_line_indent,
|
||||
sub_4_space=max(insert_line_indent - 4, 0),
|
||||
add_4_space=insert_line_indent + 4,
|
||||
)
|
||||
return ret_str
|
||||
|
||||
def _edit_file_impl(
|
||||
self,
|
||||
file_name: Path,
|
||||
|
|
@ -518,7 +575,6 @@ class Editor(BaseModel):
|
|||
is_insert: bool = False: Whether to insert content at the given line number instead of editing.
|
||||
is_append: bool = False: Whether to append content to the file instead of editing.
|
||||
"""
|
||||
ret_str = ""
|
||||
|
||||
ERROR_MSG = f"[Error editing file {file_name}. Please confirm the file is correct.]"
|
||||
ERROR_MSG_SUFFIX = (
|
||||
|
|
@ -568,14 +624,12 @@ class Editor(BaseModel):
|
|||
try:
|
||||
content, n_added_lines = self._insert_impl(lines, start, content)
|
||||
except LineNumberError as e:
|
||||
ret_str += (f"{ERROR_MSG}\n" f"{e}\n" f"{ERROR_MSG_SUFFIX}") + "\n"
|
||||
return ret_str
|
||||
return (f"{ERROR_MSG}\n" f"{e}\n" f"{ERROR_MSG_SUFFIX}") + "\n"
|
||||
else:
|
||||
try:
|
||||
content, n_added_lines = self._edit_impl(lines, start, end, content)
|
||||
except LineNumberError as e:
|
||||
ret_str += (f"{ERROR_MSG}\n" f"{e}\n" f"{ERROR_MSG_SUFFIX}") + "\n"
|
||||
return ret_str
|
||||
return (f"{ERROR_MSG}\n" f"{e}\n" f"{ERROR_MSG_SUFFIX}") + "\n"
|
||||
|
||||
if not content.endswith("\n"):
|
||||
content += "\n"
|
||||
|
|
@ -622,9 +676,11 @@ class Editor(BaseModel):
|
|||
first_error_line = None
|
||||
|
||||
if lint_error is not None:
|
||||
if first_error_line is not None:
|
||||
show_line = int(first_error_line)
|
||||
elif is_append:
|
||||
# if first_error_line is not None:
|
||||
# show_line = int(first_error_line)
|
||||
|
||||
# show the first insert line.
|
||||
if is_append:
|
||||
# original end-of-file
|
||||
show_line = len(lines)
|
||||
# insert OR edit WILL provide meaningful line numbers
|
||||
|
|
@ -633,52 +689,52 @@ class Editor(BaseModel):
|
|||
else:
|
||||
raise ValueError("Invalid state. This should never happen.")
|
||||
|
||||
ret_str += LINTER_ERROR_MSG
|
||||
ret_str += lint_error + "\n"
|
||||
|
||||
editor_lines = n_added_lines + 20
|
||||
|
||||
ret_str += "[This is how your edit would have looked if applied]\n"
|
||||
ret_str += "-------------------------------------------------\n"
|
||||
ret_str += self._print_window(file_name, show_line, editor_lines, return_str=True) + "\n"
|
||||
ret_str += "-------------------------------------------------\n\n"
|
||||
|
||||
ret_str += "[This is the original code before your edit]\n"
|
||||
ret_str += "-------------------------------------------------\n"
|
||||
ret_str += (
|
||||
self._print_window(
|
||||
original_file_backup_path,
|
||||
show_line,
|
||||
editor_lines,
|
||||
)
|
||||
+ "\n"
|
||||
)
|
||||
ret_str += "-------------------------------------------------\n"
|
||||
|
||||
ret_str += (
|
||||
"Your changes have NOT been applied. Please fix your edit command and try again.\n"
|
||||
guidance_message = self._get_indentation_info(content, start or len(lines))
|
||||
guidance_message += (
|
||||
"You either need to 1) Specify the correct start/end line arguments or 2) Correct your edit code.\n"
|
||||
"DO NOT re-run the same failed edit command. Running it again will lead to the same error."
|
||||
)
|
||||
lint_error_info = ERROR_GUIDANCE.format(
|
||||
linter_error_msg=LINTER_ERROR_MSG + lint_error,
|
||||
window_after_applied=self._print_window(file_name, show_line, n_added_lines + 20),
|
||||
window_before_applied=self._print_window(
|
||||
original_file_backup_path, show_line, n_added_lines + 20
|
||||
),
|
||||
guidance_message=guidance_message,
|
||||
).strip()
|
||||
|
||||
# recover the original file
|
||||
with original_file_backup_path.open() as fin, file_name.open("w") as fout:
|
||||
fout.write(fin.read())
|
||||
original_file_backup_path.unlink()
|
||||
return ret_str
|
||||
return lint_error_info
|
||||
|
||||
except FileNotFoundError as e:
|
||||
ret_str += f"File not found: {e}\n"
|
||||
return f"File not found: {e}\n"
|
||||
except IOError as e:
|
||||
ret_str += f"An error occurred while handling the file: {e}\n"
|
||||
return f"An error occurred while handling the file: {e}\n"
|
||||
except ValueError as e:
|
||||
ret_str += f"Invalid input: {e}\n"
|
||||
return f"Invalid input: {e}\n"
|
||||
except Exception as e:
|
||||
guidance_message = self._get_indentation_info(content, start or len(lines))
|
||||
guidance_message += (
|
||||
"You either need to 1) Specify the correct start/end line arguments or 2) Enlarge the range of original code.\n"
|
||||
"DO NOT re-run the same failed edit command. Running it again will lead to the same error."
|
||||
)
|
||||
error_info = ERROR_GUIDANCE.format(
|
||||
linter_error_msg=LINTER_ERROR_MSG + str(e),
|
||||
window_after_applied=self._print_window(file_name, start or len(lines), 40),
|
||||
window_before_applied=self._print_window(original_file_backup_path, start or len(lines), 40),
|
||||
guidance_message=guidance_message,
|
||||
).strip()
|
||||
# Clean up the temporary file if an error occurs
|
||||
with original_file_backup_path.open() as fin, file_name.open("w") as fout:
|
||||
fout.write(fin.read())
|
||||
if temp_file_path and Path(temp_file_path).exists():
|
||||
Path(temp_file_path).unlink()
|
||||
logger.warning(f"An unexpected error occurred: {e}")
|
||||
raise e
|
||||
|
||||
# logger.warning(f"An unexpected error occurred: {e}")
|
||||
raise Exception(f"{error_info}") from e
|
||||
|
||||
# Update the file information and print the updated content
|
||||
with file_name.open("r", encoding="utf-8") as file:
|
||||
|
|
@ -690,11 +746,13 @@ class Editor(BaseModel):
|
|||
self.current_line = max(1, len(lines)) # end of original file
|
||||
else:
|
||||
self.current_line = start or n_total_lines or 1
|
||||
ret_str += f"[File: {file_name.resolve()} ({n_total_lines} lines total after edit)]\n"
|
||||
CURRENT_FILE = file_name
|
||||
ret_str += self._print_window(CURRENT_FILE, self.current_line, self.window) + "\n"
|
||||
ret_str += MSG_FILE_UPDATED.format(line_number=self.current_line)
|
||||
return ret_str
|
||||
success_edit_info = SUCCESS_EDIT_INFO.format(
|
||||
file_name=file_name.resolve(),
|
||||
n_total_lines=n_total_lines,
|
||||
window_after_applied=self._print_window(file_name, self.current_line, self.window),
|
||||
line_number=self.current_line,
|
||||
).strip()
|
||||
return success_edit_info
|
||||
|
||||
def edit_file_by_replace(self, file_name: str, to_replace: str, new_content: str) -> str:
|
||||
"""Edit a file. This will search for `to_replace` in the given file and replace it with `new_content`.
|
||||
|
|
@ -741,6 +799,10 @@ class Editor(BaseModel):
|
|||
file_name: str: The name of the file to edit.
|
||||
to_replace: str: The content to search for and replace.
|
||||
new_content: str: The new content to replace the old content with.
|
||||
|
||||
NOTE:
|
||||
This tool is exclusive. If you use this tool, you cannot use any other commands in the current response.
|
||||
If you need to use it multiple times, wait for the next turn.
|
||||
"""
|
||||
# FIXME: support replacing *all* occurrences
|
||||
if to_replace.strip() == "":
|
||||
|
|
@ -792,6 +854,7 @@ class Editor(BaseModel):
|
|||
)
|
||||
# lint_error = bool(LINTER_ERROR_MSG in ret_str)
|
||||
# TODO: automatically tries to fix linter error (maybe involve some static analysis tools on the location near the edit to figure out indentation)
|
||||
self.resource.report(file_name, "path")
|
||||
return ret_str
|
||||
|
||||
def insert_content_at_line(self, file_name: str, line_number: int, content: str) -> str:
|
||||
|
|
@ -816,6 +879,9 @@ class Editor(BaseModel):
|
|||
file_name: str: The name of the file to edit.
|
||||
line_number: int: The line number (starting from 1) to insert the content after.
|
||||
content: str: The content to insert.
|
||||
NOTE:
|
||||
This tool is exclusive. If you use this tool, you cannot use any other commands in the current response.
|
||||
If you need to use it multiple times, wait for the next turn.
|
||||
"""
|
||||
file_name = self._try_fix_path(file_name)
|
||||
|
||||
|
|
@ -836,6 +902,9 @@ class Editor(BaseModel):
|
|||
Args:
|
||||
file_name: str: The name of the file to edit.
|
||||
content: str: The content to insert.
|
||||
NOTE:
|
||||
This tool is exclusive. If you use this tool, you cannot use any other commands in the current response.
|
||||
If you need to use it multiple times, wait for the next turn.
|
||||
"""
|
||||
file_name = self._try_fix_path(file_name)
|
||||
|
||||
|
|
@ -914,6 +983,9 @@ class Editor(BaseModel):
|
|||
res_list.append(f'[End of matches for "{search_term}" in {file_path}]')
|
||||
else:
|
||||
res_list.append(f'[No matches found for "{search_term}" in {file_path}]')
|
||||
|
||||
extra = {"type": "search", "symbol": search_term, "lines": [i[0] - 1 for i in matches]} if matches else None
|
||||
self.resource.report(file_path, "path", extra=extra)
|
||||
return "\n".join(res_list)
|
||||
|
||||
def find_file(self, file_name: str, dir_path: str = "./") -> str:
|
||||
|
|
|
|||
|
|
@ -8,7 +8,6 @@ from metagpt.environment.mgx.mgx_env import MGXEnv
|
|||
from metagpt.roles import Architect, Engineer, ProductManager, ProjectManager
|
||||
from metagpt.roles.di.data_analyst import DataAnalyst
|
||||
from metagpt.roles.di.engineer2 import Engineer2
|
||||
from metagpt.roles.di.swe_agent import SWEAgent
|
||||
from metagpt.roles.di.team_leader import TeamLeader
|
||||
from metagpt.schema import Message
|
||||
|
||||
|
|
@ -29,7 +28,6 @@ async def main(requirement="", enable_human_input=False, use_fixed_sop=False, al
|
|||
engineer,
|
||||
# QaEngineer(),
|
||||
DataAnalyst(),
|
||||
SWEAgent(),
|
||||
]
|
||||
)
|
||||
|
||||
|
|
|
|||
|
|
@ -1,16 +1,23 @@
|
|||
import argparse
|
||||
import asyncio
|
||||
import json
|
||||
import os
|
||||
import shutil
|
||||
import sys
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
|
||||
from metagpt.config2 import Config
|
||||
from metagpt.const import DEFAULT_WORKSPACE_ROOT, METAGPT_ROOT
|
||||
from metagpt.logs import logger
|
||||
from metagpt.roles.di.swe_agent import SWEAgent
|
||||
from metagpt.roles.di.engineer2 import Engineer2
|
||||
from metagpt.tools.libs.editor import Editor
|
||||
from metagpt.tools.libs.terminal import Terminal
|
||||
from metagpt.tools.swe_agent_commands.swe_agent_utils import load_hf_dataset
|
||||
|
||||
config = Config.default()
|
||||
# Specify by yourself
|
||||
GLOBAL_TERMINAL = Terminal()
|
||||
TEST_REPO_DIR = METAGPT_ROOT / "data" / "test_repo"
|
||||
DATA_DIR = METAGPT_ROOT / "data/hugging_face"
|
||||
|
||||
|
|
@ -51,20 +58,61 @@ def check_instance_status(instance, swe_result_dir):
|
|||
return True
|
||||
|
||||
|
||||
async def run(instance, swe_result_dir):
|
||||
async def terminal_run_command(cmd):
|
||||
cmd_output = await GLOBAL_TERMINAL.run_command(cmd)
|
||||
logger.info(f"command:{cmd} output:\n {cmd_output}")
|
||||
return cmd_output
|
||||
|
||||
|
||||
async def refresh_repo(instance, test_repo_dir, reclone_existing_repo=False):
|
||||
repo_path = Path(test_repo_dir) / (
|
||||
instance["repo"].replace("-", "_").replace("/", "__") + "_" + instance["version"]
|
||||
)
|
||||
repo_identifier = instance["repo"]
|
||||
base_commit = instance["base_commit"]
|
||||
if os.path.exists(repo_path) and reclone_existing_repo is True:
|
||||
logger.info(f"remove exist repo path:{repo_path}")
|
||||
shutil.rmtree(repo_path)
|
||||
|
||||
if os.path.exists(repo_path):
|
||||
logger.info(f"reset exist repo path:{repo_path}")
|
||||
await terminal_run_command(f"cd {repo_path} && git reset --hard && git clean -n -d && git clean -f -d")
|
||||
await terminal_run_command("BRANCH=$(git remote show origin | awk '/HEAD branch/ {print $NF}')")
|
||||
await terminal_run_command("echo $BRANCH")
|
||||
await terminal_run_command('git checkout "$BRANCH"')
|
||||
else:
|
||||
logger.info(f"clone repo to path:{repo_path}")
|
||||
clone_command = f"git clone 'https://github.com/{repo_identifier}.git' {repo_path}"
|
||||
checkout_command = f"cd {repo_path} " + "&& git checkout -f {base_commit}" if base_commit else ""
|
||||
await terminal_run_command(clone_command)
|
||||
await terminal_run_command(checkout_command)
|
||||
|
||||
await terminal_run_command("git branch")
|
||||
# ignore backup file
|
||||
await terminal_run_command("echo '.backup.*' >> .gitignore")
|
||||
|
||||
return repo_path
|
||||
|
||||
|
||||
async def get_git_diff():
|
||||
git_diff = ""
|
||||
try:
|
||||
await terminal_run_command("git add -A")
|
||||
git_diff = await terminal_run_command("git diff --cached")
|
||||
except Exception as e:
|
||||
logger.error(f"Error during submission: {e}")
|
||||
return git_diff
|
||||
|
||||
|
||||
async def run(instance, swe_result_dir, args):
|
||||
if not check_instance_status(instance, swe_result_dir):
|
||||
logger.info(f"Instance {instance['instance_id']} already exists, skipping execution.")
|
||||
return
|
||||
|
||||
repo_path = TEST_REPO_DIR / (instance["repo"].replace("-", "_").replace("/", "__") + "_" + instance["version"])
|
||||
|
||||
# 前处理
|
||||
terminal = Terminal()
|
||||
await terminal.run_command(f"cd {repo_path} && git reset --hard && git clean -n -d && git clean -f -d")
|
||||
await terminal.run_command("BRANCH=$(git remote show origin | awk '/HEAD branch/ {print $NF}')")
|
||||
logger.info(await terminal.run_command("echo $BRANCH"))
|
||||
logger.info(await terminal.run_command('git checkout "$BRANCH"'))
|
||||
logger.info(await terminal.run_command("git branch"))
|
||||
# preparation for the repo
|
||||
logger.info(f"**** Preparing to run {instance['instance_id']}****")
|
||||
test_repo_dir = args.test_repo_dir
|
||||
repo_path = await refresh_repo(instance, test_repo_dir, args.reclone_existing_repo)
|
||||
|
||||
user_requirement_and_issue = INSTANCE_TEMPLATE.format(
|
||||
issue=instance["problem_statement"],
|
||||
|
|
@ -75,18 +123,22 @@ async def run(instance, swe_result_dir):
|
|||
)
|
||||
|
||||
logger.info(f"**** Starting to run {instance['instance_id']}****")
|
||||
swe_agent = SWEAgent()
|
||||
swe_agent.run_eval = True
|
||||
await swe_agent.run(user_requirement_and_issue)
|
||||
save_predictions(swe_agent, instance, swe_result_dir)
|
||||
logger.info("User Requirement", user_requirement_and_issue)
|
||||
try:
|
||||
engineer = Engineer2(run_eval=True, editor=Editor(enable_auto_lint=True))
|
||||
await asyncio.wait_for(engineer.run(user_requirement_and_issue), timeout=args.max_wait_time_per_case * 60)
|
||||
except Exception as e:
|
||||
logger.warning(f"**** exception lead to end: {instance['instance_id']}****\n\nerror:{e}")
|
||||
# save the difference of repo
|
||||
await save_predictions(engineer, instance, swe_result_dir)
|
||||
logger.info(f"**** Finished running {instance['instance_id']}****")
|
||||
|
||||
|
||||
def save_predictions(swe_agent: SWEAgent, instance, swe_result_dir):
|
||||
async def save_predictions(engineer, instance, swe_result_dir):
|
||||
output_file = swe_result_dir / "all_preds.jsonl"
|
||||
instance["model_name_or_path"] = swe_agent.config.llm.model
|
||||
instance["model_patch"] = swe_agent.output_diff
|
||||
|
||||
instance["model_name_or_path"] = engineer.config.llm.model
|
||||
instance["model_patch"] = await get_git_diff()
|
||||
logger.info(f"'model_patch':\n{instance['model_patch']}")
|
||||
logger.info(f"Preparing to save predictions to {output_file}")
|
||||
|
||||
# Save the predictions to a JSONL file
|
||||
|
|
@ -96,19 +148,61 @@ def save_predictions(swe_agent: SWEAgent, instance, swe_result_dir):
|
|||
logger.info(f"Saved prediction of {instance['instance_id']} to {output_file}")
|
||||
|
||||
|
||||
async def async_main():
|
||||
async def async_main(args):
|
||||
dataset_path = "manna-ai/SWE-bench_Nano" # "princeton-nlp/SWE-bench_Lite" #"manna-ai/SWE-bench_Nano"
|
||||
|
||||
dataset = load_hf_dataset(dataset_name_or_path=dataset_path, cache_dir=DATA_DIR, split="test")
|
||||
date_time = datetime.now().strftime("%m%d")
|
||||
_round = "first"
|
||||
# _round = "second"
|
||||
exp_name = f"nano_mgx_{date_time}_{_round}"
|
||||
swe_result_dir = DEFAULT_WORKSPACE_ROOT / f"result_{config.llm.model.replace('/', '_')}" / exp_name
|
||||
swe_result_dir = Path(args.save_folder)
|
||||
if swe_result_dir.exists():
|
||||
logger.info(f"{swe_result_dir} exists; resuming test from last checkpoint.")
|
||||
swe_result_dir.mkdir(parents=True, exist_ok=True)
|
||||
for instance in dataset:
|
||||
await run(instance, swe_result_dir)
|
||||
for index, instance in enumerate(dataset):
|
||||
# switch to a new logger file
|
||||
logger.remove()
|
||||
logger.add(sys.stderr, level="INFO")
|
||||
logger.add(swe_result_dir / "logs" / f"{index+1}_{instance['instance_id']}.log", level="DEBUG")
|
||||
await run(instance, swe_result_dir, args)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(async_main())
|
||||
parser = argparse.ArgumentParser(description="the argument of scripts")
|
||||
# 添加参数
|
||||
swe_result_dir = (
|
||||
DEFAULT_WORKSPACE_ROOT
|
||||
/ f"result_{config.llm.model.replace('/', '_')}_start_time_{datetime.now().strftime('%Y_%m_%d_%H_%M_%S') }"
|
||||
)
|
||||
test_repo_dir = TEST_REPO_DIR.absolute()
|
||||
swe_result_dir = swe_result_dir.absolute()
|
||||
parser.add_argument(
|
||||
"-rw", "--test_repo_dir", default=test_repo_dir, help="The directory to save temporary repositories", type=str
|
||||
)
|
||||
parser.add_argument("-s", "--save_folder", default=swe_result_dir, help="Folder to save results and logs", type=str)
|
||||
parser.add_argument(
|
||||
"-mwtc", "--max_wait_time_per_case", help="Maximum wait time allowed per test case (in minutes)", type=int
|
||||
)
|
||||
parser.add_argument(
|
||||
"-o",
|
||||
"--reclone_existing_repo",
|
||||
action="store_true",
|
||||
help="If set, the existing repository will be removed and recloned.",
|
||||
)
|
||||
# 解析命令行参数
|
||||
args = parser.parse_args()
|
||||
asyncio.run(async_main(args))
|
||||
|
||||
|
||||
"""
|
||||
#
|
||||
python tests/metagpt/roles/di/run_swe_agent_for_benchmark.py \
|
||||
--test_repo_dir "./data/test_repo" \
|
||||
--save_folder "./workspace/deepseek_coder_0907" \
|
||||
--max_wait_time_per_case 10
|
||||
"""
|
||||
|
||||
"""
|
||||
# 重新克隆仓库
|
||||
python tests/metagpt/roles/di/run_swe_agent_for_benchmark.py \
|
||||
--test_repo_dir "./data/test_repo" \
|
||||
--save_folder "./workspace/deepseek_coder_0907" \
|
||||
--max_wait_time_per_case 10 \
|
||||
--reclone_existing_repo
|
||||
"""
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue