mirror of
https://github.com/FoundationAgents/MetaGPT.git
synced 2026-06-26 15:49:42 +02:00
update code
change dir, add new role
This commit is contained in:
parent
3fac156d66
commit
7bf4505d90
11 changed files with 338 additions and 158 deletions
3
swe_bench/__init__.py
Normal file
3
swe_bench/__init__.py
Normal file
|
|
@ -0,0 +1,3 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
# @Author : stellahong (stellahong@fuzhi.ai)
|
||||
# @Desc :
|
||||
99
swe_bench/gitagent.py
Normal file
99
swe_bench/gitagent.py
Normal file
|
|
@ -0,0 +1,99 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
# @Author : stellahong (stellahong@fuzhi.ai)
|
||||
# @Desc :
|
||||
from typing import Literal, Union
|
||||
|
||||
from metagpt.actions.di.ask_review import ReviewConst
|
||||
from metagpt.logs import logger
|
||||
from metagpt.roles.di.data_interpreter import DataInterpreter
|
||||
from metagpt.schema import Message
|
||||
|
||||
|
||||
class GitAgent(DataInterpreter):
|
||||
name: str = "Jacky"
|
||||
profile: str = "Solve git issues proficiently"
|
||||
auto_run: bool = True
|
||||
use_plan: bool = True
|
||||
use_reflection: bool = False
|
||||
react_mode: Literal["plan_and_act", "react"] = "react"
|
||||
script_names: Union[str, list[str]] = []
|
||||
instance_id: str = ""
|
||||
|
||||
async def critique(self, result, review_format):
|
||||
review_result = (
|
||||
"Finally, return a boolean value (True or False) to indicate the result of the review. "
|
||||
"Note: If the result is good enough, return True; otherwise, return False."
|
||||
)
|
||||
status = await self.llm.aask(
|
||||
[
|
||||
Message(content=review_format, role="user"),
|
||||
Message(content=result, role="assistant"),
|
||||
Message(content=review_result, role="user"),
|
||||
]
|
||||
)
|
||||
logger.info(status)
|
||||
|
||||
return status
|
||||
|
||||
async def review_patch(self, code):
|
||||
review_format = (
|
||||
"Please ensure that the code {code} and original script {original_script} can fix the issue {memory} in patch format. "
|
||||
"If it is not in patch format, please convert it to patch format."
|
||||
)
|
||||
|
||||
results = []
|
||||
for script in self.script_names:
|
||||
with open(script, "r", encoding="utf-8") as fp:
|
||||
original_script = fp.read()
|
||||
|
||||
memory = self.get_memories()[0].content
|
||||
review_prompt = review_format.format(code=code, original_script=original_script, memory=memory)
|
||||
# todo: extract issue and remove image urls
|
||||
result = await self.llm.aask(review_prompt)
|
||||
|
||||
results.append(result)
|
||||
# fixme: merge results to a single patch file
|
||||
result = "\n".join(results)
|
||||
|
||||
return result, review_prompt
|
||||
|
||||
async def _write_and_exec_code(self, max_retry: int = 3):
|
||||
counter = 0
|
||||
success = False
|
||||
|
||||
# plan info
|
||||
plan_status = self.planner.get_plan_status() if self.use_plan else ""
|
||||
|
||||
# tool info
|
||||
if self.tools:
|
||||
context = (
|
||||
self.working_memory.get()[-1].content if self.working_memory.get() else ""
|
||||
) # thoughts from _think stage in 'react' mode
|
||||
plan = self.planner.plan if self.use_plan else None
|
||||
tool_info = await self.tool_recommender.get_recommended_tool_info(context=context, plan=plan)
|
||||
else:
|
||||
tool_info = ""
|
||||
|
||||
while not success and counter < max_retry:
|
||||
### write code ###
|
||||
code, cause_by = await self._write_code(counter, plan_status, tool_info)
|
||||
|
||||
self.working_memory.add(Message(content=code, role="assistant", cause_by=cause_by))
|
||||
|
||||
result, format_prompt = await self.review_patch(code)
|
||||
|
||||
success = await self.critique(result, format_prompt)
|
||||
await self.execute_code.run(code)
|
||||
### execute code ###
|
||||
# todo: execute: git apply
|
||||
|
||||
### process execution result ###
|
||||
counter += 1
|
||||
|
||||
if not success and counter >= max_retry:
|
||||
logger.info("coding failed!")
|
||||
review, _ = await self.planner.ask_review(auto_run=False, trigger=ReviewConst.CODE_REVIEW_TRIGGER)
|
||||
if ReviewConst.CHANGE_WORDS[0] in review:
|
||||
counter = 0 # redo the task again with help of human suggestions
|
||||
|
||||
return code, result, success
|
||||
3
swe_bench/inference/__init__.py
Normal file
3
swe_bench/inference/__init__.py
Normal file
|
|
@ -0,0 +1,3 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
# @Author : stellahong (stellahong@fuzhi.ai)
|
||||
# @Desc :
|
||||
17
swe_bench/inference/run.py
Normal file
17
swe_bench/inference/run.py
Normal file
|
|
@ -0,0 +1,17 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
# @Author : stellahong (stellahong@fuzhi.ai)
|
||||
# @Desc :
|
||||
import runpy
|
||||
import sys
|
||||
|
||||
original_argv = sys.argv.copy()
|
||||
|
||||
try:
|
||||
# 设置你想要传递给脚本的命令行参数
|
||||
dataset_path = "SWE-bench_oracle" # "SWE-bench_bm25_27K" # "SWE-bench_13k"
|
||||
sys.argv = ["run_api.py", "--dataset_name_or_path", f"princeton-nlp/{dataset_path}", "--output_dir", "./outputs"]
|
||||
# 执行脚本
|
||||
runpy.run_path(path_name="run_api.py", run_name="__main__")
|
||||
finally:
|
||||
# 恢复原始的sys.argv以避免对后续代码的潜在影响
|
||||
sys.argv = original_argv
|
||||
74
swe_bench/inference/run_agent.py
Normal file
74
swe_bench/inference/run_agent.py
Normal file
|
|
@ -0,0 +1,74 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
# @Author : stellahong (stellahong@fuzhi.ai)
|
||||
# @Desc :
|
||||
import re
|
||||
|
||||
from tenacity import retry, stop_after_attempt, wait_random_exponential
|
||||
|
||||
from metagpt.logs import logger
|
||||
from metagpt.utils.exceptions import handle_exception
|
||||
from metagpt.utils.recovery_util import save_history
|
||||
from swe_bench.gitagent import GitAgent
|
||||
from swe_bench.make_datasets.make_dataset import reset_task_env
|
||||
from swe_bench.utils.utils import extract_scripts_from_codetext
|
||||
|
||||
PATCH_FORMAT = """
|
||||
```diff
|
||||
--- original_file.py
|
||||
+++ modified_file.py
|
||||
@@ -line_number,context_lines +line_number,context_lines @@
|
||||
- original line of code to be replaced or removed
|
||||
+ new line of code to be added or to replace the original
|
||||
```
|
||||
"""
|
||||
|
||||
|
||||
def _prepare(inputs):
|
||||
requirement = "Please rewrite the code to address the issues. "
|
||||
system_messages = inputs.split("\n", 1)[0]
|
||||
user_message = inputs.split("\n", 1)[1]
|
||||
cleaned_user_message = re.sub("<patch>.*?</patch>", "", user_message, flags=re.DOTALL)
|
||||
|
||||
issues = re.findall("<issue>(.*?)</issue>", user_message, flags=re.DOTALL)
|
||||
|
||||
return requirement, system_messages, cleaned_user_message, issues
|
||||
|
||||
|
||||
def construct_prompt(inputs, script_names):
|
||||
prompt = (
|
||||
f"You only need to modify the code file listed here {script_names}."
|
||||
f"Notice: "
|
||||
f"1. Analysis the issue, especially for the ValueError, and identify influence code lines.\n"
|
||||
f"2. Only change a few lines, and make sure I can use git diff and git apply to resolve the issue .\n"
|
||||
f"3. I need you to solve this issue by generating a single patch file that I can apply directly to this repository using git apply.\n"
|
||||
f"4. use the format as : {PATCH_FORMAT}"
|
||||
)
|
||||
|
||||
requirement, system_messages, cleaned_user_message, issues = _prepare(inputs)
|
||||
return requirement, system_messages, cleaned_user_message, issues, prompt
|
||||
|
||||
|
||||
@handle_exception(exception_type=Exception)
|
||||
@retry(wait=wait_random_exponential(min=30, max=600), stop=stop_after_attempt(5))
|
||||
async def run_agent(inputs, agent, **kwargs):
|
||||
script_names = kwargs.get("script_names", [])
|
||||
requirement, system_messages, cleaned_user_message, issues, prompt = construct_prompt(inputs, script_names)
|
||||
system_messages = system_messages.replace(" ", "")
|
||||
cleaned_user_message = cleaned_user_message.replace(" ", "")
|
||||
await agent.run([requirement, system_messages, cleaned_user_message, prompt])
|
||||
return agent.get_last_cell_source()
|
||||
|
||||
|
||||
async def run_instance(instance, use_reflection=True):
|
||||
ga = GitAgent(use_reflection=use_reflection)
|
||||
script_names = extract_scripts_from_codetext(instance["text"])
|
||||
ga.script_names = script_names
|
||||
|
||||
patch, repo, repo_path = reset_task_env(instance)
|
||||
if repo_path is None:
|
||||
return
|
||||
|
||||
response = await run_agent(f"{instance['text']}\n\n", agent=ga, script_names=script_names)
|
||||
logger.info(f"Final response: {response}")
|
||||
save_history(ga)
|
||||
return response
|
||||
114
swe_bench/inference/run_api.py
Normal file
114
swe_bench/inference/run_api.py
Normal file
|
|
@ -0,0 +1,114 @@
|
|||
import json
|
||||
from pathlib import Path
|
||||
|
||||
import fire
|
||||
from tqdm.auto import tqdm
|
||||
|
||||
from data.load_dataset import load_oracle_dataset
|
||||
from metagpt.config2 import config
|
||||
from metagpt.logs import logger
|
||||
from metagpt.utils import count_string_tokens
|
||||
from swe_bench.inference.run_agent import run_instance
|
||||
from swe_bench.utils.utils import check_existing_ids, extract_diff
|
||||
|
||||
# Replace with your own
|
||||
MAX_TOKEN = 128000
|
||||
|
||||
|
||||
async def openai_inference(
|
||||
test_dataset,
|
||||
model_name_or_path,
|
||||
output_file,
|
||||
existing_ids,
|
||||
use_reflection,
|
||||
):
|
||||
"""
|
||||
Runs inference on a dataset using the openai API.
|
||||
|
||||
Args:
|
||||
test_dataset (datasets.Dataset): The dataset to run inference on.
|
||||
model_name_or_path (str): The name or path of the model to use.
|
||||
output_file (str): The path to the output file.
|
||||
existing_ids (set): A set of ids that have already been processed.
|
||||
"""
|
||||
test_dataset = test_dataset.filter(
|
||||
lambda x: count_string_tokens(x["text"], model_name_or_path) <= MAX_TOKEN,
|
||||
desc="Filtering",
|
||||
load_from_cache_file=False,
|
||||
)
|
||||
basic_args = {
|
||||
"model_name_or_path": model_name_or_path,
|
||||
}
|
||||
logger.info(f"Filtered to {len(test_dataset)} instances")
|
||||
data = []
|
||||
with open(output_file, "a+") as f:
|
||||
for datum in tqdm(test_dataset, desc=f"Inference for {model_name_or_path}"):
|
||||
instance_id = datum["instance_id"]
|
||||
|
||||
if instance_id in existing_ids:
|
||||
continue
|
||||
version = datum["version"]
|
||||
repo = datum["repo"]
|
||||
repo_prefix = repo.replace("/", "__")
|
||||
output_dict = {"instance_id": instance_id}
|
||||
output_dict.update(basic_args)
|
||||
output_dict["text"] = f"{datum['text']}\n\n"
|
||||
logger.info(f"{repo_prefix}_{version}")
|
||||
data.append(f"{repo_prefix}_{version}")
|
||||
|
||||
# import pdb;pdb.set_trace()
|
||||
response = await run_instance(instance=datum)
|
||||
if response is None:
|
||||
continue
|
||||
logger.info(f"Final response: {response}")
|
||||
|
||||
output_dict["full_output"] = response
|
||||
output_dict["model_patch"] = extract_diff(response)
|
||||
print(json.dumps(output_dict), file=f, flush=True)
|
||||
# print(data)
|
||||
|
||||
|
||||
async def main(
|
||||
dataset_name_or_path,
|
||||
split="test",
|
||||
model_name_or_path=config.llm.model,
|
||||
output_dir="outputs",
|
||||
use_reflection=True,
|
||||
):
|
||||
"""
|
||||
Performs inference on SWE-bench dataset using the Data Interpreter.
|
||||
|
||||
Args:
|
||||
dataset_name_or_path: HuggingFace dataset name or local path
|
||||
split: Dataset split to use (default: test)
|
||||
model_name_or_path: Name of the model to use (default: config.llm.model)
|
||||
param output_dir: Path to the output directory (default: outputs)
|
||||
"""
|
||||
model_nickname = Path(model_name_or_path).name if isinstance(model_name_or_path, Path) else model_name_or_path
|
||||
output_file = f"{model_nickname}__{dataset_name_or_path.split('/')[-1]}__{split}"
|
||||
output_file = Path(output_dir, output_file + ".jsonl")
|
||||
print(output_file.absolute())
|
||||
output_file.parent.mkdir(parents=True, exist_ok=True)
|
||||
logger.info(f"Will write to {output_file}")
|
||||
|
||||
# check existing results
|
||||
existing_ids = check_existing_ids(output_file)
|
||||
# load dataset
|
||||
dataset = load_oracle_dataset(dataset_name_or_path)
|
||||
|
||||
inference_args = {
|
||||
"test_dataset": dataset,
|
||||
"model_name_or_path": model_name_or_path,
|
||||
"output_file": output_file,
|
||||
"existing_ids": existing_ids,
|
||||
"use_reflection": use_reflection,
|
||||
}
|
||||
if model_name_or_path.startswith("gpt"):
|
||||
await openai_inference(**inference_args)
|
||||
else:
|
||||
raise ValueError(f"Invalid model name or path {model_name_or_path}")
|
||||
logger.info("Done!")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
fire.Fire(main)
|
||||
3
swe_bench/utils/__init__.py
Normal file
3
swe_bench/utils/__init__.py
Normal file
|
|
@ -0,0 +1,3 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
# @Author : stellahong (stellahong@fuzhi.ai)
|
||||
# @Desc :
|
||||
Loading…
Add table
Add a link
Reference in a new issue