mirror of
https://github.com/FoundationAgents/MetaGPT.git
synced 2026-06-20 15:38:09 +02:00
fix conflict
This commit is contained in:
commit
1797fdc1f8
15 changed files with 952 additions and 246 deletions
|
|
@ -8,7 +8,6 @@ from metagpt.environment.mgx.mgx_env import MGXEnv
|
|||
from metagpt.roles import Architect, Engineer, ProductManager, ProjectManager
|
||||
from metagpt.roles.di.data_analyst import DataAnalyst
|
||||
from metagpt.roles.di.engineer2 import Engineer2
|
||||
from metagpt.roles.di.swe_agent import SWEAgent
|
||||
from metagpt.roles.di.team_leader import TeamLeader
|
||||
from metagpt.schema import Message
|
||||
|
||||
|
|
@ -29,7 +28,6 @@ async def main(requirement="", enable_human_input=False, use_fixed_sop=False, al
|
|||
engineer,
|
||||
# QaEngineer(),
|
||||
DataAnalyst(),
|
||||
SWEAgent(),
|
||||
]
|
||||
)
|
||||
|
||||
|
|
|
|||
|
|
@ -1,16 +1,23 @@
|
|||
import argparse
|
||||
import asyncio
|
||||
import json
|
||||
import os
|
||||
import shutil
|
||||
import sys
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
|
||||
from metagpt.config2 import Config
|
||||
from metagpt.const import DEFAULT_WORKSPACE_ROOT, METAGPT_ROOT
|
||||
from metagpt.logs import logger
|
||||
from metagpt.roles.di.swe_agent import SWEAgent
|
||||
from metagpt.roles.di.engineer2 import Engineer2
|
||||
from metagpt.tools.libs.editor import Editor
|
||||
from metagpt.tools.libs.terminal import Terminal
|
||||
from metagpt.tools.swe_agent_commands.swe_agent_utils import load_hf_dataset
|
||||
|
||||
config = Config.default()
|
||||
# Specify by yourself
|
||||
GLOBAL_TERMINAL = Terminal()
|
||||
TEST_REPO_DIR = METAGPT_ROOT / "data" / "test_repo"
|
||||
DATA_DIR = METAGPT_ROOT / "data/hugging_face"
|
||||
|
||||
|
|
@ -51,20 +58,61 @@ def check_instance_status(instance, swe_result_dir):
|
|||
return True
|
||||
|
||||
|
||||
async def run(instance, swe_result_dir):
|
||||
async def terminal_run_command(cmd):
|
||||
cmd_output = await GLOBAL_TERMINAL.run_command(cmd)
|
||||
logger.info(f"command:{cmd} output:\n {cmd_output}")
|
||||
return cmd_output
|
||||
|
||||
|
||||
async def refresh_repo(instance, test_repo_dir, reclone_existing_repo=False):
|
||||
repo_path = Path(test_repo_dir) / (
|
||||
instance["repo"].replace("-", "_").replace("/", "__") + "_" + instance["version"]
|
||||
)
|
||||
repo_identifier = instance["repo"]
|
||||
base_commit = instance["base_commit"]
|
||||
if os.path.exists(repo_path) and reclone_existing_repo is True:
|
||||
logger.info(f"remove exist repo path:{repo_path}")
|
||||
shutil.rmtree(repo_path)
|
||||
|
||||
if os.path.exists(repo_path):
|
||||
logger.info(f"reset exist repo path:{repo_path}")
|
||||
await terminal_run_command(f"cd {repo_path} && git reset --hard && git clean -n -d && git clean -f -d")
|
||||
await terminal_run_command("BRANCH=$(git remote show origin | awk '/HEAD branch/ {print $NF}')")
|
||||
await terminal_run_command("echo $BRANCH")
|
||||
await terminal_run_command('git checkout "$BRANCH"')
|
||||
else:
|
||||
logger.info(f"clone repo to path:{repo_path}")
|
||||
clone_command = f"git clone 'https://github.com/{repo_identifier}.git' {repo_path}"
|
||||
checkout_command = f"cd {repo_path} " + "&& git checkout -f {base_commit}" if base_commit else ""
|
||||
await terminal_run_command(clone_command)
|
||||
await terminal_run_command(checkout_command)
|
||||
|
||||
await terminal_run_command("git branch")
|
||||
# ignore backup file
|
||||
await terminal_run_command("echo '.backup.*' >> .gitignore")
|
||||
|
||||
return repo_path
|
||||
|
||||
|
||||
async def get_git_diff():
|
||||
git_diff = ""
|
||||
try:
|
||||
await terminal_run_command("git add -A")
|
||||
git_diff = await terminal_run_command("git diff --cached")
|
||||
except Exception as e:
|
||||
logger.error(f"Error during submission: {e}")
|
||||
return git_diff
|
||||
|
||||
|
||||
async def run(instance, swe_result_dir, args):
|
||||
if not check_instance_status(instance, swe_result_dir):
|
||||
logger.info(f"Instance {instance['instance_id']} already exists, skipping execution.")
|
||||
return
|
||||
|
||||
repo_path = TEST_REPO_DIR / (instance["repo"].replace("-", "_").replace("/", "__") + "_" + instance["version"])
|
||||
|
||||
# 前处理
|
||||
terminal = Terminal()
|
||||
await terminal.run_command(f"cd {repo_path} && git reset --hard && git clean -n -d && git clean -f -d")
|
||||
await terminal.run_command("BRANCH=$(git remote show origin | awk '/HEAD branch/ {print $NF}')")
|
||||
logger.info(await terminal.run_command("echo $BRANCH"))
|
||||
logger.info(await terminal.run_command('git checkout "$BRANCH"'))
|
||||
logger.info(await terminal.run_command("git branch"))
|
||||
# preparation for the repo
|
||||
logger.info(f"**** Preparing to run {instance['instance_id']}****")
|
||||
test_repo_dir = args.test_repo_dir
|
||||
repo_path = await refresh_repo(instance, test_repo_dir, args.reclone_existing_repo)
|
||||
|
||||
user_requirement_and_issue = INSTANCE_TEMPLATE.format(
|
||||
issue=instance["problem_statement"],
|
||||
|
|
@ -75,18 +123,22 @@ async def run(instance, swe_result_dir):
|
|||
)
|
||||
|
||||
logger.info(f"**** Starting to run {instance['instance_id']}****")
|
||||
swe_agent = SWEAgent()
|
||||
swe_agent.run_eval = True
|
||||
await swe_agent.run(user_requirement_and_issue)
|
||||
save_predictions(swe_agent, instance, swe_result_dir)
|
||||
logger.info("User Requirement", user_requirement_and_issue)
|
||||
try:
|
||||
engineer = Engineer2(run_eval=True, editor=Editor(enable_auto_lint=True))
|
||||
await asyncio.wait_for(engineer.run(user_requirement_and_issue), timeout=args.max_wait_time_per_case * 60)
|
||||
except Exception as e:
|
||||
logger.warning(f"**** exception lead to end: {instance['instance_id']}****\n\nerror:{e}")
|
||||
# save the difference of repo
|
||||
await save_predictions(engineer, instance, swe_result_dir)
|
||||
logger.info(f"**** Finished running {instance['instance_id']}****")
|
||||
|
||||
|
||||
def save_predictions(swe_agent: SWEAgent, instance, swe_result_dir):
|
||||
async def save_predictions(engineer, instance, swe_result_dir):
|
||||
output_file = swe_result_dir / "all_preds.jsonl"
|
||||
instance["model_name_or_path"] = swe_agent.config.llm.model
|
||||
instance["model_patch"] = swe_agent.output_diff
|
||||
|
||||
instance["model_name_or_path"] = engineer.config.llm.model
|
||||
instance["model_patch"] = await get_git_diff()
|
||||
logger.info(f"'model_patch':\n{instance['model_patch']}")
|
||||
logger.info(f"Preparing to save predictions to {output_file}")
|
||||
|
||||
# Save the predictions to a JSONL file
|
||||
|
|
@ -96,19 +148,61 @@ def save_predictions(swe_agent: SWEAgent, instance, swe_result_dir):
|
|||
logger.info(f"Saved prediction of {instance['instance_id']} to {output_file}")
|
||||
|
||||
|
||||
async def async_main():
|
||||
async def async_main(args):
|
||||
dataset_path = "manna-ai/SWE-bench_Nano" # "princeton-nlp/SWE-bench_Lite" #"manna-ai/SWE-bench_Nano"
|
||||
|
||||
dataset = load_hf_dataset(dataset_name_or_path=dataset_path, cache_dir=DATA_DIR, split="test")
|
||||
date_time = datetime.now().strftime("%m%d")
|
||||
_round = "first"
|
||||
# _round = "second"
|
||||
exp_name = f"nano_mgx_{date_time}_{_round}"
|
||||
swe_result_dir = DEFAULT_WORKSPACE_ROOT / f"result_{config.llm.model.replace('/', '_')}" / exp_name
|
||||
swe_result_dir = Path(args.save_folder)
|
||||
if swe_result_dir.exists():
|
||||
logger.info(f"{swe_result_dir} exists; resuming test from last checkpoint.")
|
||||
swe_result_dir.mkdir(parents=True, exist_ok=True)
|
||||
for instance in dataset:
|
||||
await run(instance, swe_result_dir)
|
||||
for index, instance in enumerate(dataset):
|
||||
# switch to a new logger file
|
||||
logger.remove()
|
||||
logger.add(sys.stderr, level="INFO")
|
||||
logger.add(swe_result_dir / "logs" / f"{index+1}_{instance['instance_id']}.log", level="DEBUG")
|
||||
await run(instance, swe_result_dir, args)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(async_main())
|
||||
parser = argparse.ArgumentParser(description="the argument of scripts")
|
||||
# 添加参数
|
||||
swe_result_dir = (
|
||||
DEFAULT_WORKSPACE_ROOT
|
||||
/ f"result_{config.llm.model.replace('/', '_')}_start_time_{datetime.now().strftime('%Y_%m_%d_%H_%M_%S') }"
|
||||
)
|
||||
test_repo_dir = TEST_REPO_DIR.absolute()
|
||||
swe_result_dir = swe_result_dir.absolute()
|
||||
parser.add_argument(
|
||||
"-rw", "--test_repo_dir", default=test_repo_dir, help="The directory to save temporary repositories", type=str
|
||||
)
|
||||
parser.add_argument("-s", "--save_folder", default=swe_result_dir, help="Folder to save results and logs", type=str)
|
||||
parser.add_argument(
|
||||
"-mwtc", "--max_wait_time_per_case", help="Maximum wait time allowed per test case (in minutes)", type=int
|
||||
)
|
||||
parser.add_argument(
|
||||
"-o",
|
||||
"--reclone_existing_repo",
|
||||
action="store_true",
|
||||
help="If set, the existing repository will be removed and recloned.",
|
||||
)
|
||||
# 解析命令行参数
|
||||
args = parser.parse_args()
|
||||
asyncio.run(async_main(args))
|
||||
|
||||
|
||||
"""
|
||||
#
|
||||
python tests/metagpt/roles/di/run_swe_agent_for_benchmark.py \
|
||||
--test_repo_dir "./data/test_repo" \
|
||||
--save_folder "./workspace/deepseek_coder_0907" \
|
||||
--max_wait_time_per_case 10
|
||||
"""
|
||||
|
||||
"""
|
||||
# 重新克隆仓库
|
||||
python tests/metagpt/roles/di/run_swe_agent_for_benchmark.py \
|
||||
--test_repo_dir "./data/test_repo" \
|
||||
--save_folder "./workspace/deepseek_coder_0907" \
|
||||
--max_wait_time_per_case 10 \
|
||||
--reclone_existing_repo
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -1,7 +1,19 @@
|
|||
import os
|
||||
import shutil
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
from metagpt.const import TEST_DATA_PATH
|
||||
from metagpt.tools.libs.editor import Editor
|
||||
from metagpt.tools.libs.index_repo import (
|
||||
CHATS_INDEX_ROOT,
|
||||
CHATS_ROOT,
|
||||
UPLOAD_ROOT,
|
||||
UPLOADS_INDEX_ROOT,
|
||||
IndexRepo,
|
||||
)
|
||||
from metagpt.utils.common import list_files
|
||||
|
||||
TEST_FILE_CONTENT = """
|
||||
# this is line one
|
||||
|
|
@ -645,5 +657,54 @@ def test_append_to_single_empty_line_file():
|
|||
assert n_added_lines == 1
|
||||
|
||||
|
||||
async def mock_index_repo():
|
||||
chat_id = "1"
|
||||
chat_path = Path(CHATS_ROOT) / chat_id
|
||||
chat_path.mkdir(parents=True, exist_ok=True)
|
||||
src_path = TEST_DATA_PATH / "requirements"
|
||||
command = f"cp -rf {str(src_path)} {str(chat_path)}"
|
||||
os.system(command)
|
||||
filenames = list_files(chat_path)
|
||||
chat_files = [i for i in filenames if Path(i).suffix in {".md", ".txt", ".json", ".pdf"}]
|
||||
chat_repo = IndexRepo(
|
||||
persist_path=str(Path(CHATS_INDEX_ROOT) / chat_id), root_path=str(chat_path), min_token_count=0
|
||||
)
|
||||
await chat_repo.add(chat_files)
|
||||
assert chat_files
|
||||
|
||||
Path(UPLOAD_ROOT).mkdir(parents=True, exist_ok=True)
|
||||
command = f"cp -rf {str(src_path)} {str(UPLOAD_ROOT)}"
|
||||
os.system(command)
|
||||
filenames = list_files(UPLOAD_ROOT)
|
||||
uploads_files = [i for i in filenames if Path(i).suffix in {".md", ".txt", ".json", ".pdf"}]
|
||||
uploads_repo = IndexRepo(persist_path=UPLOADS_INDEX_ROOT, root_path=UPLOAD_ROOT, min_token_count=0)
|
||||
await uploads_repo.add(uploads_files)
|
||||
assert uploads_files
|
||||
|
||||
filenames = list_files(src_path)
|
||||
other_files = [i for i in filenames if Path(i).suffix in {".md", ".txt", ".json", ".pdf"}]
|
||||
assert other_files
|
||||
|
||||
return chat_path, UPLOAD_ROOT, src_path
|
||||
|
||||
|
||||
@pytest.mark.skip
|
||||
@pytest.mark.asyncio
|
||||
async def test_index_repo():
|
||||
# mock data
|
||||
chat_path, UPLOAD_ROOT, src_path = await mock_index_repo()
|
||||
|
||||
editor = Editor()
|
||||
rsp = await editor.search_index_repo(query="业务线", file_or_path=chat_path)
|
||||
assert rsp
|
||||
rsp = await editor.search_index_repo(query="业务线", file_or_path=UPLOAD_ROOT)
|
||||
assert rsp
|
||||
rsp = await editor.search_index_repo(query="业务线", file_or_path=src_path)
|
||||
assert rsp
|
||||
|
||||
shutil.rmtree(CHATS_ROOT)
|
||||
shutil.rmtree(UPLOAD_ROOT)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
pytest.main([__file__, "-s"])
|
||||
|
|
|
|||
|
|
@ -1,11 +1,17 @@
|
|||
import shutil
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
from metagpt.const import DEFAULT_WORKSPACE_ROOT, TEST_DATA_PATH
|
||||
from metagpt.tools.libs.index_repo import IndexRepo
|
||||
from metagpt.tools.libs.index_repo import (
|
||||
CHATS_INDEX_ROOT,
|
||||
UPLOADS_INDEX_ROOT,
|
||||
IndexRepo,
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.skip
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.parametrize(("path", "query"), [(TEST_DATA_PATH / "requirements", "业务线")])
|
||||
async def test_index_repo(path, query):
|
||||
|
|
@ -28,5 +34,22 @@ async def test_index_repo(path, query):
|
|||
shutil.rmtree(index_path)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
("paths", "path_type", "root"),
|
||||
[
|
||||
(["/data/uploads"], UPLOADS_INDEX_ROOT, "/data/uploads"),
|
||||
(["/data/uploads/"], UPLOADS_INDEX_ROOT, "/data/uploads"),
|
||||
(["/data/chats/1/1.txt"], str(Path(CHATS_INDEX_ROOT) / "1"), "/data/chats/1"),
|
||||
(["/data/chats/1/2.txt"], str(Path(CHATS_INDEX_ROOT) / "1"), "/data/chats/1"),
|
||||
(["/data/chats/2/2.txt", "/data/chats/2/2.txt"], str(Path(CHATS_INDEX_ROOT) / "2"), "/data/chats/2"),
|
||||
(["/data/chats.txt"], "other", ""),
|
||||
],
|
||||
)
|
||||
def test_classify_path(paths, path_type, root):
|
||||
result, result_root = IndexRepo.classify_path(paths)
|
||||
assert path_type in set(result.keys())
|
||||
assert root == result_root.get(path_type, "")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
pytest.main([__file__, "-s"])
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue