add repo utils

This commit is contained in:
stellahsr 2024-03-19 17:50:10 +08:00
parent 1e44d70202
commit 45eb352054
3 changed files with 133 additions and 0 deletions

View file

@ -0,0 +1,83 @@
import os
import subprocess
from typing import Dict
from traceback import format_exc
from pathlib import Path
import git
from git.exc import GitError
from metagpt.logs import logger
KEY_INSTANCE_ID = "instance_id"
RESET_FAILED = ">>>>> Reset Failed"
class ExecWrapper:
def __init__(self, subprocess_args: Dict = None):
self.subprocess_args = subprocess_args or {}
def __call__(self, cmd, raise_error=True, **kwargs):
try:
combined_args = {**self.subprocess_args, **kwargs}
output = subprocess.run(cmd, **combined_args)
return output
except subprocess.CalledProcessError as e:
if raise_error:
error_message = f"Error: {e}\nError stdout: {e.stdout}\nError stderr: {e.stderr}\nError traceback: {format_exc()}"
logger.error(error_message)
raise e
class EnvManager:
def __init__(self, testbed):
shellenv = os.environ.copy()
self.testbed = testbed
self.exec = ExecWrapper(
subprocess_args={
"check": True,
"shell": False,
"capture_output": True,
"text": True,
"env": shellenv,
}
)
def clone_repo(self, repo_name: str, path: str, token: str = None):
if token is None:
token = os.environ.get("GITHUB_TOKEN", "git")
if not token:
raise ValueError("GitHub token is required for cloning repositories.")
repo_url = f"https://{token}@github.com/swe-bench/{repo_name.replace('/', '__')}.git"
try:
# Ensure the destination directory exists
os.makedirs(path, exist_ok=True)
# Clone the repository
git.Repo.clone_from(repo_url, path)
print(f"Repository '{repo_name}' cloned successfully.")
except GitError as e:
print(f"Failed to clone repository '{repo_name}': {e}")
def reset_task_env(self, instance: Dict):
"""
Reset task environment + testbed and checkout base commit of given task instance
"""
try:
gitignore_path = Path(".gitignore")
if gitignore_path.exists():
self.exec(["git", "ls-files", "--ignored", "--exclude-standard", "-o", "-z"], raise_error=False)
self.exec(["xargs", "-0", "-r", "rm", "-rf"], input=gitignore_path.read_text())
self.exec(["git", "restore", "."])
self.exec(["git", "reset", "HEAD", "."])
self.exec(["git", "clean", "-fdx"])
self.exec(["git", "-c", "advice.detachedHead=false", "checkout", instance['base_commit']])
logger.info(f"[{instance['instance_id']}] Reset task environment to {instance['base_commit']}")
return True
except Exception as e:
err_msg = f"{RESET_FAILED}; Failed to reset task environment to {instance['base_commit']}: {e}"
logger.error(err_msg)
return False

View file

@ -0,0 +1,28 @@
import re
def extract_diff(response):
"""
Extracts the diff from a response formatted in different ways
"""
if response is None:
return None
diff_matches = []
other_matches = []
pattern = re.compile(r"\<([\w-]+)\>(.*?)\<\/\1\>", re.DOTALL)
for code, match in pattern.findall(response):
if code in {"diff", "patch"}:
diff_matches.append(match)
else:
other_matches.append(match)
pattern = re.compile(r"```(\w+)?\n(.*?)```", re.DOTALL)
for code, match in pattern.findall(response):
if code in {"diff", "patch"}:
diff_matches.append(match)
else:
other_matches.append(match)
if diff_matches:
return diff_matches[0]
if other_matches:
return other_matches[0]
return response.split("</s>")[0]

22
data/inference/run.py Normal file
View file

@ -0,0 +1,22 @@
# -*- coding: utf-8 -*-
# @Author : stellahong (stellahong@fuzhi.ai)
# @Desc :
import runpy
import sys
# 备份原始的sys.argv
original_argv = sys.argv.copy()
try:
# 设置你想要传递给脚本的命令行参数
sys.argv = ['run_api.py', '--dataset_name_or_path', 'princeton-nlp/SWE-bench_oracle', '--output_dir',
'./outputs']
# 添加其他可选参数到sys.argv中例如:
# sys.argv.extend(['--some_option', 'some_value'])
# 执行脚本
runpy.run_path(path_name='run_api.py', run_name='__main__')
finally:
# 恢复原始的sys.argv以避免对后续代码的潜在影响
sys.argv = original_argv