mirror of
https://github.com/FoundationAgents/MetaGPT.git
synced 2026-05-04 21:32:38 +02:00
update code
change dir, add new role
This commit is contained in:
parent
3fac156d66
commit
7bf4505d90
11 changed files with 338 additions and 158 deletions
|
|
@ -28,22 +28,38 @@ SCIKIT_LEARN_IDS = [
|
|||
"scikit-learn__scikit-learn-10459",
|
||||
]
|
||||
|
||||
MATPLOTLIB_IDS = [
|
||||
"matplotlib__matplotlib-24362",
|
||||
"matplotlib__matplotlib-20584",
|
||||
"matplotlib__matplotlib-23188",
|
||||
"matplotlib__matplotlib-24403",
|
||||
# 'matplotlib__matplotlib-21443',
|
||||
# 'matplotlib__matplotlib-23047'
|
||||
]
|
||||
|
||||
|
||||
def read_sub_set_instance(path=SUBSET_DATASET, tag="scikit-learn"):
|
||||
try:
|
||||
df = pd.read_excel(path)
|
||||
pass_filters = df["instance_id_pass"].tolist()
|
||||
fail_filters = df["instance_id_fail"].tolist()
|
||||
pass_filters = [s for s in pass_filters if tag in s]
|
||||
fail_filters = [s for s in fail_filters if tag in s]
|
||||
print(pass_filters)
|
||||
print(fail_filters)
|
||||
# Filter for instances containing the tag in either column
|
||||
pass_filter = df["instance_id_pass"].str.contains(tag, na=False)
|
||||
fail_filter = df["instance_id_fail"].str.contains(tag, na=False)
|
||||
# pass_filter = df["instance_id_pass"].str.contains(tag, na=False)
|
||||
# fail_filter = df["instance_id_fail"].str.contains(tag, na=False)
|
||||
|
||||
# Combine the filters using | (OR operator) for efficiency
|
||||
combined_filter = pass_filter | fail_filter
|
||||
# combined_filter = pass_filters | fail_filters
|
||||
|
||||
# print(df[combined_filter])
|
||||
# Apply combined filter and select the specific columns
|
||||
filtered_df = df[combined_filter][["instance_id_pass", "instance_id_fail"]]
|
||||
# filtered_df = df[combined_filter][["instance_id_pass", "instance_id_fail"]]
|
||||
|
||||
# Flatten the DataFrame into a list and remove NaN values
|
||||
subset_instance = filtered_df.stack().dropna().tolist()
|
||||
subset_instance = pass_filters + fail_filters
|
||||
|
||||
return subset_instance
|
||||
except FileNotFoundError:
|
||||
|
|
@ -52,3 +68,7 @@ def read_sub_set_instance(path=SUBSET_DATASET, tag="scikit-learn"):
|
|||
except Exception as e:
|
||||
print(f"An error occurred: {e}")
|
||||
return []
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
print(read_sub_set_instance(tag="matplotlib__matplotlib"))
|
||||
|
|
|
|||
|
|
@ -1,38 +0,0 @@
|
|||
import re
|
||||
|
||||
|
||||
def extract_scripts_from_codetext(codetext: str):
|
||||
"""
|
||||
Extracts Python script file names from a given text that contains multiple sections.
|
||||
Each section starts with '[start of <script_name>.py]' and ends with '[end of <script_name>.py]'.
|
||||
|
||||
Parameters:
|
||||
- codetext (str): A string that may contain multiple sections, each indicating the start of a Python script file.
|
||||
|
||||
Returns:
|
||||
- list: A list of extracted Python script file names.
|
||||
|
||||
Example of codetext:
|
||||
'''
|
||||
[end of README.rst]
|
||||
[start of sklearn/compose/_target.py]
|
||||
... file content ...
|
||||
[end of sklearn/compose/_target.py]
|
||||
[start of another_module/example.py]
|
||||
... file content ...
|
||||
[end of another_module/example.py]
|
||||
'''
|
||||
"""
|
||||
script_names = []
|
||||
|
||||
# Match all occurrences of '[start of <script_name>.py]'
|
||||
matches = re.findall(r"\[start of ([^\]]+\.py)\]", codetext)
|
||||
|
||||
if matches:
|
||||
for script_name in matches:
|
||||
print("Extracted script name:", script_name)
|
||||
script_names.append(script_name)
|
||||
else:
|
||||
print("No script names found in the text.")
|
||||
|
||||
return script_names
|
||||
|
|
@ -1,87 +0,0 @@
|
|||
import os
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
from traceback import format_exc
|
||||
from typing import Dict
|
||||
|
||||
import git
|
||||
from git.exc import GitError
|
||||
|
||||
from metagpt.logs import logger
|
||||
|
||||
KEY_INSTANCE_ID = "instance_id"
|
||||
RESET_FAILED = ">>>>> Reset Failed"
|
||||
|
||||
|
||||
class ExecWrapper:
|
||||
def __init__(self, subprocess_args: Dict = None):
|
||||
self.subprocess_args = subprocess_args or {}
|
||||
|
||||
def __call__(self, cmd, raise_error=True, **kwargs):
|
||||
try:
|
||||
combined_args = {**self.subprocess_args, **kwargs}
|
||||
output = subprocess.run(cmd, **combined_args)
|
||||
return output
|
||||
except subprocess.CalledProcessError as e:
|
||||
if raise_error:
|
||||
error_message = (
|
||||
f"Error: {e}\nError stdout: {e.stdout}\nError stderr: {e.stderr}\nError traceback: {format_exc()}"
|
||||
)
|
||||
logger.error(error_message)
|
||||
raise e
|
||||
|
||||
|
||||
class EnvManager:
|
||||
def __init__(self, testbed):
|
||||
shellenv = os.environ.copy()
|
||||
self.testbed = testbed
|
||||
|
||||
self.exec = ExecWrapper(
|
||||
subprocess_args={
|
||||
"check": True,
|
||||
"shell": False,
|
||||
"capture_output": True,
|
||||
"text": True,
|
||||
"env": shellenv,
|
||||
}
|
||||
)
|
||||
|
||||
def clone_repo(self, repo_name: str, path: str, token: str = None):
|
||||
if token is None:
|
||||
token = os.environ.get("GITHUB_TOKEN", "git")
|
||||
if not token:
|
||||
raise ValueError("GitHub token is required for cloning repositories.")
|
||||
|
||||
repo_url = f"https://{token}@github.com/swe-bench/{repo_name.replace('/', '__')}.git"
|
||||
|
||||
try:
|
||||
# Ensure the destination directory exists
|
||||
os.makedirs(path, exist_ok=True)
|
||||
|
||||
# Clone the repository
|
||||
git.Repo.clone_from(repo_url, path)
|
||||
print(f"Repository '{repo_name}' cloned successfully.")
|
||||
except GitError as e:
|
||||
print(f"Failed to clone repository '{repo_name}': {e}")
|
||||
|
||||
def reset_task_env(self, instance: Dict):
|
||||
"""
|
||||
Reset task environment + testbed and checkout base commit of given task instance
|
||||
"""
|
||||
try:
|
||||
gitignore_path = Path(".gitignore")
|
||||
if gitignore_path.exists():
|
||||
self.exec(["git", "ls-files", "--ignored", "--exclude-standard", "-o", "-z"], raise_error=False)
|
||||
# fixme: need detect platform and change this cmd
|
||||
# self.exec(["xargs", "-0", "-r", "rm", "-rf"], input=gitignore_path.read_text())
|
||||
|
||||
self.exec(["git", "restore", "."])
|
||||
self.exec(["git", "reset", "HEAD", "."])
|
||||
self.exec(["git", "clean", "-fdx"])
|
||||
self.exec(["git", "-c", "advice.detachedHead=false", "checkout", instance["base_commit"]])
|
||||
logger.info(f"[{instance['instance_id']}] Reset task environment to {instance['base_commit']}")
|
||||
return True
|
||||
except Exception as e:
|
||||
err_msg = f"{RESET_FAILED}; Failed to reset task environment to {instance['base_commit']}: {e}"
|
||||
logger.error(err_msg)
|
||||
return False
|
||||
|
|
@ -1,28 +0,0 @@
|
|||
import re
|
||||
|
||||
|
||||
def extract_diff(response):
|
||||
"""
|
||||
Extracts the diff from a response formatted in different ways
|
||||
"""
|
||||
if response is None:
|
||||
return None
|
||||
diff_matches = []
|
||||
other_matches = []
|
||||
pattern = re.compile(r"\<([\w-]+)\>(.*?)\<\/\1\>", re.DOTALL)
|
||||
for code, match in pattern.findall(response):
|
||||
if code in {"diff", "patch"}:
|
||||
diff_matches.append(match)
|
||||
else:
|
||||
other_matches.append(match)
|
||||
pattern = re.compile(r"```(\w+)?\n(.*?)```", re.DOTALL)
|
||||
for code, match in pattern.findall(response):
|
||||
if code in {"diff", "patch"}:
|
||||
diff_matches.append(match)
|
||||
else:
|
||||
other_matches.append(match)
|
||||
if diff_matches:
|
||||
return diff_matches[0]
|
||||
if other_matches:
|
||||
return other_matches[0]
|
||||
return response.split("</s>")[0]
|
||||
Loading…
Add table
Add a link
Reference in a new issue