From e4d02ca68c610e3d50f927f6dbdb0b4f0d9a1129 Mon Sep 17 00:00:00 2001
From: stellahsr <stellahsr@126.com>
Date: Fri, 22 Mar 2024 16:26:30 +0800
Subject: [PATCH 1/7] add code for oracle_collapsed dataset generation

---
 swe_bench/utils/enums.py      |  12 ++++
 swe_bench/utils/parse_diff.py | 115 ++++++++++++++++++++++++++++++++++
 2 files changed, 127 insertions(+)
 create mode 100644 swe_bench/utils/enums.py
 create mode 100644 swe_bench/utils/parse_diff.py

diff --git a/swe_bench/utils/enums.py b/swe_bench/utils/enums.py
new file mode 100644
index 000000000..7c052a8ff
--- /dev/null
+++ b/swe_bench/utils/enums.py
@@ -0,0 +1,12 @@
+from enum import Enum, auto
+
+
+class FileChangeMode(Enum):
+    create = auto()
+    delete = auto()
+    change = auto()
+
+
+class LineChangeType(Enum):
+    addition = auto()
+    deletion = auto()
diff --git a/swe_bench/utils/parse_diff.py b/swe_bench/utils/parse_diff.py
new file mode 100644
index 000000000..d2b32d2a1
--- /dev/null
+++ b/swe_bench/utils/parse_diff.py
@@ -0,0 +1,115 @@
+import re
+from typing import Dict, List
+
+from metagpt.logs import logger
+from swe_bench.utils.enums import FileChangeMode, LineChangeType
+
+
+def extract_changes_from_patch(diff: str) -> List[Dict[str, any]]:
+    """Parses the patch text through the standard syntax of git diff, outputs the information of added and deleted lines.
+
+    Extracts detailed information about file changes based on the output content of git diff.
+
+    Args:
+        diff: A string containing the output of git diff.
+
+    Returns:
+        A list of dictionaries containing information about each file change.
+    """
+    changes = []
+    current_file = None
+
+    file_pattern = re.compile(r"^diff --git a/(.+) b/(.+)$")
+    line_change_pattern = re.compile(r"^@@ -(\d+),\d+ \+(\d+),\d+ @@.*$")
+    new_file_flag_line = "--- /dev/null"
+    deleted_file_flag_line = "+++ /dev/null"
+
+    for line in diff.splitlines():
+        file_section_start = file_pattern.match(line)
+        if file_section_start:
+            if current_file:
+                changes.append(current_file)
+            file_a, file_b = file_section_start.groups()
+            current_file = start_new_file_section(file_a, file_b)
+            current_file["mode"] = FileChangeMode.change
+        elif current_file:
+            # 匹配到新文件模式，标记当前文件为新增
+            if line == new_file_flag_line:
+                current_file["mode"] = FileChangeMode.create
+            # 匹配到删除文件模式，标记当前文件为删除
+            elif line == deleted_file_flag_line:
+                current_file["mode"] = FileChangeMode.delete
+            update_file_changes(current_file, line, line_change_pattern)
+
+    if current_file:
+        changes.append(current_file)
+
+    return changes
+
+
+def start_new_file_section(file_before_change: str, file_after_change: str) -> Dict[str, any]:
+    """Function to initialize a new file section
+
+    When encountering a new file change, this function is called to initialize a dictionary recording the file change information.
+
+    Args:
+        file_before_change: The file name before the change
+        file_after_change: The file name after the change, or "/dev/null" if the file was deleted.
+
+    Returns:
+        A dictionary representing the file change.
+    """
+    return {
+        "file_before_change": file_before_change,
+        "file_after_change": file_after_change,
+        "changes": [],
+    }
+
+
+def update_file_changes(current_file: Dict[str, any], line: str, line_change_pattern: re.Pattern):
+    """Updates the current file change information
+
+    Updates the current file's change record based on a line from the diff.
+
+    Args:
+        current_file: The current file information being processed
+        line: The current line from the diff
+        line_change_pattern: The regex pattern used to identify line changes
+    """
+    line_change_match = line_change_pattern.match(line)
+    if line_change_match:
+        current_file["base_line"], current_file["changed_line"] = map(int, line_change_match.groups())
+    elif line.startswith("+"):
+        current_file["changes"].append(
+            {"type": LineChangeType.addition, "line": current_file.get("changed_line", 1), "content": line[1:]}
+        )
+        current_file["changed_line"] = current_file.get("changed_line", 0) + 1
+    elif line.startswith("-"):
+        current_file["changes"].append(
+            {"type": LineChangeType.deletion, "line": current_file.get("base_line", 1), "content": line[1:]}
+        )
+        current_file["base_line"] = current_file.get("base_line", 0) + 1
+
+
+def filter_changed_line(patch):
+    """Filters changed lines
+
+    Filters the part of the change record of the current file that needs to be used.
+
+    Args:
+        patch: The git diff text
+    """
+    parsed_changes = extract_changes_from_patch(patch)
+    res = {}
+    for change in parsed_changes:
+        file_name = change["file_before_change"]
+        res[file_name] = []
+        # 新增的文件略过
+        if change["mode"] is FileChangeMode.create:
+            continue
+        for c in change["changes"]:
+            if c["type"] is LineChangeType.addition:
+                continue
+            logger.debug(f"  {c['type']} at line {c['line']}: {c['content']}")
+            res[file_name].append(c)
+    return res

From f26a5cd1de55aa7115a2760df39d3f3e3d1354b4 Mon Sep 17 00:00:00 2001
From: stellahsr <stellahsr@126.com>
Date: Fri, 22 Mar 2024 16:27:00 +0800
Subject: [PATCH 2/7] add load dataset

---
 data/load_dataset.py | 35 +++++++++++++++++++++++++++++++++++
 1 file changed, 35 insertions(+)
 create mode 100644 data/load_dataset.py

diff --git a/data/load_dataset.py b/data/load_dataset.py
new file mode 100644
index 000000000..ed871ffab
--- /dev/null
+++ b/data/load_dataset.py
@@ -0,0 +1,35 @@
+# -*- coding: utf-8 -*-
+# @Author  : stellahong (stellahong@fuzhi.ai)
+# @Desc    :
+from pathlib import Path
+
+import numpy as np
+from datasets import load_dataset, load_from_disk
+
+from data.inference.const import SCIKIT_LEARN_IDS
+
+
+def load_oracle_dataset(dataset_name_or_path: str = "", split: str = "test", existing_ids: list = []):
+    if Path(dataset_name_or_path).exists():
+        dataset = load_from_disk(dataset_name_or_path)
+    else:
+        dataset = load_dataset(dataset_name_or_path)
+    if split not in dataset:
+        raise ValueError(f"Invalid split {split} for dataset {dataset_name_or_path}")
+    dataset = dataset[split]
+    lens = np.array(list(map(len, dataset["text"])))
+    dataset = dataset.select(np.argsort(lens))
+
+    if len(existing_ids) > 0:
+        dataset = dataset.filter(
+            lambda x: x["instance_id"] not in existing_ids,
+            desc="Filtering out existing ids",
+            load_from_cache_file=False,
+        )
+    if len(SCIKIT_LEARN_IDS) > 0:
+        dataset = dataset.filter(
+            lambda x: x["instance_id"] in SCIKIT_LEARN_IDS,
+            desc="Filtering out subset_instance_ids",
+            load_from_cache_file=False,
+        )
+    return dataset

From 91db2ef112058e7a9867213fdaf5cdd90731278a Mon Sep 17 00:00:00 2001
From: stellahsr <stellahsr@126.com>
Date: Tue, 26 Mar 2024 14:44:19 +0800
Subject: [PATCH 3/7] update code, change data path

---
 .gitignore                               | 1 +
 {data => swe_bench/data}/load_dataset.py | 2 +-
 {data => swe_bench}/inference/const.py   | 2 +-
 swe_bench/inference/run_api.py           | 5 +++--
 swe_bench/make_datasets/make_dataset.py  | 2 +-
 5 files changed, 7 insertions(+), 5 deletions(-)
 rename {data => swe_bench/data}/load_dataset.py (95%)
 rename {data => swe_bench}/inference/const.py (97%)

diff --git a/.gitignore b/.gitignore
index 922116d12..9350e0616 100644
--- a/.gitignore
+++ b/.gitignore
@@ -188,3 +188,4 @@ cov.xml
 *-structure.json
 *.dot
 .python-version
+/data/inference
diff --git a/data/load_dataset.py b/swe_bench/data/load_dataset.py
similarity index 95%
rename from data/load_dataset.py
rename to swe_bench/data/load_dataset.py
index ed871ffab..12f55da5a 100644
--- a/data/load_dataset.py
+++ b/swe_bench/data/load_dataset.py
@@ -6,7 +6,7 @@ from pathlib import Path
 import numpy as np
 from datasets import load_dataset, load_from_disk
 
-from data.inference.const import SCIKIT_LEARN_IDS
+from swe_bench.inference.const import SCIKIT_LEARN_IDS
 
 
 def load_oracle_dataset(dataset_name_or_path: str = "", split: str = "test", existing_ids: list = []):
diff --git a/data/inference/const.py b/swe_bench/inference/const.py
similarity index 97%
rename from data/inference/const.py
rename to swe_bench/inference/const.py
index 69a274310..84c5b0726 100644
--- a/data/inference/const.py
+++ b/swe_bench/inference/const.py
@@ -7,7 +7,7 @@ from metagpt.const import DATA_PATH, METAGPT_ROOT
 
 SUBSET_DATASET = METAGPT_ROOT / "sub_swebench_dataset" / "sub_swebench.csv"
 SUBSET_DATASET_SKLERARN = METAGPT_ROOT / "sub_swebench_dataset" / "scikit-learn-68.csv"
-TESTBED = DATA_PATH / "repos"
+TESTBED = METAGPT_ROOT / "swe-bench" / "data" / "repos"
 
 # SCIKIT_LEARN_IDS: A list of instance identifiers from 'sub_swebench.csv' within SUBSET_DATASET.
 # This collection represents a subset specifically related to scikit-learn content.
diff --git a/swe_bench/inference/run_api.py b/swe_bench/inference/run_api.py
index 5fc71f516..2ffe87c5b 100644
--- a/swe_bench/inference/run_api.py
+++ b/swe_bench/inference/run_api.py
@@ -2,7 +2,7 @@ import json
 from pathlib import Path
 
 import fire
-from data.load_dataset import load_oracle_dataset
+
 from tqdm.auto import tqdm
 
 from metagpt.config2 import config
@@ -10,6 +10,7 @@ from metagpt.logs import logger
 from metagpt.utils import count_string_tokens
 from swe_bench.inference.run_agent import run_instance
 from swe_bench.utils.utils import check_existing_ids, extract_diff
+from swe_bench.data.load_dataset import load_oracle_dataset
 
 # Replace with your own
 MAX_TOKEN = 128000
@@ -56,7 +57,7 @@ async def openai_inference(
             logger.info(f"{repo_prefix}_{version}")
             data.append(f"{repo_prefix}_{version}")
 
-            response = await run_instance(instance=datum)
+            response = await run_instance(instance=datum, use_reflection=use_reflection)
             if response is None:
                 continue
             logger.info(f"Final response: {response}")
diff --git a/swe_bench/make_datasets/make_dataset.py b/swe_bench/make_datasets/make_dataset.py
index ee4fc8c41..c5df53b1d 100644
--- a/swe_bench/make_datasets/make_dataset.py
+++ b/swe_bench/make_datasets/make_dataset.py
@@ -6,11 +6,11 @@ from pathlib import Path
 
 from tqdm.auto import tqdm
 
-from data.inference.const import TESTBED
 from metagpt.logs import logger
 from swe_bench.make_datasets.make_instance import prompt_style_2_edits_only
 from swe_bench.utils.parse_diff import filter_changed_line
 from swe_bench.utils.repo_utils import EnvManager
+from swe_bench.inference.const import TESTBED
 
 
 def reset_task_env(instance: dict = {}):

From e88b0fdf164db280626ad0acab88a3d6b309557b Mon Sep 17 00:00:00 2001
From: stellahsr <stellahsr@126.com>
Date: Tue, 26 Mar 2024 14:50:53 +0800
Subject: [PATCH 4/7] format code

---
 swe_bench/inference/const.py            | 2 +-
 swe_bench/inference/run_api.py          | 3 +--
 swe_bench/make_datasets/make_dataset.py | 2 +-
 3 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/swe_bench/inference/const.py b/swe_bench/inference/const.py
index 84c5b0726..9611a209c 100644
--- a/swe_bench/inference/const.py
+++ b/swe_bench/inference/const.py
@@ -3,7 +3,7 @@
 # @Desc    :
 import pandas as pd
 
-from metagpt.const import DATA_PATH, METAGPT_ROOT
+from metagpt.const import METAGPT_ROOT
 
 SUBSET_DATASET = METAGPT_ROOT / "sub_swebench_dataset" / "sub_swebench.csv"
 SUBSET_DATASET_SKLERARN = METAGPT_ROOT / "sub_swebench_dataset" / "scikit-learn-68.csv"
diff --git a/swe_bench/inference/run_api.py b/swe_bench/inference/run_api.py
index 2ffe87c5b..37d87d6ce 100644
--- a/swe_bench/inference/run_api.py
+++ b/swe_bench/inference/run_api.py
@@ -2,15 +2,14 @@ import json
 from pathlib import Path
 
 import fire
-
 from tqdm.auto import tqdm
 
 from metagpt.config2 import config
 from metagpt.logs import logger
 from metagpt.utils import count_string_tokens
+from swe_bench.data.load_dataset import load_oracle_dataset
 from swe_bench.inference.run_agent import run_instance
 from swe_bench.utils.utils import check_existing_ids, extract_diff
-from swe_bench.data.load_dataset import load_oracle_dataset
 
 # Replace with your own
 MAX_TOKEN = 128000
diff --git a/swe_bench/make_datasets/make_dataset.py b/swe_bench/make_datasets/make_dataset.py
index c5df53b1d..233f6b01c 100644
--- a/swe_bench/make_datasets/make_dataset.py
+++ b/swe_bench/make_datasets/make_dataset.py
@@ -7,10 +7,10 @@ from pathlib import Path
 from tqdm.auto import tqdm
 
 from metagpt.logs import logger
+from swe_bench.inference.const import TESTBED
 from swe_bench.make_datasets.make_instance import prompt_style_2_edits_only
 from swe_bench.utils.parse_diff import filter_changed_line
 from swe_bench.utils.repo_utils import EnvManager
-from swe_bench.inference.const import TESTBED
 
 
 def reset_task_env(instance: dict = {}):

From 50f4953ea7d2d9f69a17ef7c6fa11a1b8a432507 Mon Sep 17 00:00:00 2001
From: stellahsr <stellahsr@126.com>
Date: Tue, 26 Mar 2024 15:28:37 +0800
Subject: [PATCH 5/7] update path

---
 {swe_bench => benchmark/swe_bench}/__init__.py            | 0
 {swe_bench => benchmark/swe_bench}/data/load_dataset.py   | 2 +-
 .../swe_bench/data/repos}/__init__.py                     | 0
 {swe_bench => benchmark/swe_bench}/gitagent.py            | 0
 .../swe_bench/inference}/__init__.py                      | 0
 {swe_bench => benchmark/swe_bench}/inference/const.py     | 2 +-
 {swe_bench => benchmark/swe_bench}/inference/run.py       | 0
 {swe_bench => benchmark/swe_bench}/inference/run_agent.py | 6 +++---
 {swe_bench => benchmark/swe_bench}/inference/run_api.py   | 6 +++---
 .../swe_bench/make_datasets}/__init__.py                  | 0
 .../swe_bench}/make_datasets/make_dataset.py              | 8 ++++----
 .../swe_bench}/make_datasets/make_instance.py             | 0
 benchmark/swe_bench/utils/__init__.py                     | 3 +++
 {swe_bench => benchmark/swe_bench}/utils/enums.py         | 0
 {swe_bench => benchmark/swe_bench}/utils/parse_diff.py    | 3 ++-
 {swe_bench => benchmark/swe_bench}/utils/repo_utils.py    | 0
 {swe_bench => benchmark/swe_bench}/utils/utils.py         | 0
 17 files changed, 17 insertions(+), 13 deletions(-)
 rename {swe_bench => benchmark/swe_bench}/__init__.py (100%)
 rename {swe_bench => benchmark/swe_bench}/data/load_dataset.py (94%)
 rename {swe_bench/inference => benchmark/swe_bench/data/repos}/__init__.py (100%)
 rename {swe_bench => benchmark/swe_bench}/gitagent.py (100%)
 rename {swe_bench/make_datasets => benchmark/swe_bench/inference}/__init__.py (100%)
 rename {swe_bench => benchmark/swe_bench}/inference/const.py (96%)
 rename {swe_bench => benchmark/swe_bench}/inference/run.py (100%)
 rename {swe_bench => benchmark/swe_bench}/inference/run_agent.py (93%)
 rename {swe_bench => benchmark/swe_bench}/inference/run_api.py (94%)
 rename {swe_bench/utils => benchmark/swe_bench/make_datasets}/__init__.py (100%)
 rename {swe_bench => benchmark/swe_bench}/make_datasets/make_dataset.py (86%)
 rename {swe_bench => benchmark/swe_bench}/make_datasets/make_instance.py (100%)
 create mode 100644 benchmark/swe_bench/utils/__init__.py
 rename {swe_bench => benchmark/swe_bench}/utils/enums.py (100%)
 rename {swe_bench => benchmark/swe_bench}/utils/parse_diff.py (99%)
 rename {swe_bench => benchmark/swe_bench}/utils/repo_utils.py (100%)
 rename {swe_bench => benchmark/swe_bench}/utils/utils.py (100%)

diff --git a/swe_bench/__init__.py b/benchmark/swe_bench/__init__.py
similarity index 100%
rename from swe_bench/__init__.py
rename to benchmark/swe_bench/__init__.py
diff --git a/swe_bench/data/load_dataset.py b/benchmark/swe_bench/data/load_dataset.py
similarity index 94%
rename from swe_bench/data/load_dataset.py
rename to benchmark/swe_bench/data/load_dataset.py
index 12f55da5a..03a4b7df7 100644
--- a/swe_bench/data/load_dataset.py
+++ b/benchmark/swe_bench/data/load_dataset.py
@@ -6,7 +6,7 @@ from pathlib import Path
 import numpy as np
 from datasets import load_dataset, load_from_disk
 
-from swe_bench.inference.const import SCIKIT_LEARN_IDS
+from benchmark.swe_bench.inference.const import SCIKIT_LEARN_IDS
 
 
 def load_oracle_dataset(dataset_name_or_path: str = "", split: str = "test", existing_ids: list = []):
diff --git a/swe_bench/inference/__init__.py b/benchmark/swe_bench/data/repos/__init__.py
similarity index 100%
rename from swe_bench/inference/__init__.py
rename to benchmark/swe_bench/data/repos/__init__.py
diff --git a/swe_bench/gitagent.py b/benchmark/swe_bench/gitagent.py
similarity index 100%
rename from swe_bench/gitagent.py
rename to benchmark/swe_bench/gitagent.py
diff --git a/swe_bench/make_datasets/__init__.py b/benchmark/swe_bench/inference/__init__.py
similarity index 100%
rename from swe_bench/make_datasets/__init__.py
rename to benchmark/swe_bench/inference/__init__.py
diff --git a/swe_bench/inference/const.py b/benchmark/swe_bench/inference/const.py
similarity index 96%
rename from swe_bench/inference/const.py
rename to benchmark/swe_bench/inference/const.py
index 9611a209c..4d616c5f3 100644
--- a/swe_bench/inference/const.py
+++ b/benchmark/swe_bench/inference/const.py
@@ -7,7 +7,7 @@ from metagpt.const import METAGPT_ROOT
 
 SUBSET_DATASET = METAGPT_ROOT / "sub_swebench_dataset" / "sub_swebench.csv"
 SUBSET_DATASET_SKLERARN = METAGPT_ROOT / "sub_swebench_dataset" / "scikit-learn-68.csv"
-TESTBED = METAGPT_ROOT / "swe-bench" / "data" / "repos"
+TESTBED = METAGPT_ROOT / "benchmark" / "swe-bench" / "data" / "repos"
 
 # SCIKIT_LEARN_IDS: A list of instance identifiers from 'sub_swebench.csv' within SUBSET_DATASET.
 # This collection represents a subset specifically related to scikit-learn content.
diff --git a/swe_bench/inference/run.py b/benchmark/swe_bench/inference/run.py
similarity index 100%
rename from swe_bench/inference/run.py
rename to benchmark/swe_bench/inference/run.py
diff --git a/swe_bench/inference/run_agent.py b/benchmark/swe_bench/inference/run_agent.py
similarity index 93%
rename from swe_bench/inference/run_agent.py
rename to benchmark/swe_bench/inference/run_agent.py
index 2e8c381ab..bdcad0bd9 100644
--- a/swe_bench/inference/run_agent.py
+++ b/benchmark/swe_bench/inference/run_agent.py
@@ -5,12 +5,12 @@ import re
 
 from tenacity import retry, stop_after_attempt, wait_random_exponential
 
+from benchmark.swe_bench.gitagent import GitAgent
+from benchmark.swe_bench.make_datasets.make_dataset import reset_task_env
+from benchmark.swe_bench.utils.utils import extract_scripts_from_codetext
 from metagpt.logs import logger
 from metagpt.utils.exceptions import handle_exception
 from metagpt.utils.recovery_util import save_history
-from swe_bench.gitagent import GitAgent
-from swe_bench.make_datasets.make_dataset import reset_task_env
-from swe_bench.utils.utils import extract_scripts_from_codetext
 
 PATCH_FORMAT = """
 ```diff
diff --git a/swe_bench/inference/run_api.py b/benchmark/swe_bench/inference/run_api.py
similarity index 94%
rename from swe_bench/inference/run_api.py
rename to benchmark/swe_bench/inference/run_api.py
index 37d87d6ce..e9d57f1a5 100644
--- a/swe_bench/inference/run_api.py
+++ b/benchmark/swe_bench/inference/run_api.py
@@ -4,12 +4,12 @@ from pathlib import Path
 import fire
 from tqdm.auto import tqdm
 
+from benchmark.swe_bench.data.load_dataset import load_oracle_dataset
+from benchmark.swe_bench.inference.run_agent import run_instance
+from benchmark.swe_bench.utils.utils import check_existing_ids, extract_diff
 from metagpt.config2 import config
 from metagpt.logs import logger
 from metagpt.utils import count_string_tokens
-from swe_bench.data.load_dataset import load_oracle_dataset
-from swe_bench.inference.run_agent import run_instance
-from swe_bench.utils.utils import check_existing_ids, extract_diff
 
 # Replace with your own
 MAX_TOKEN = 128000
diff --git a/swe_bench/utils/__init__.py b/benchmark/swe_bench/make_datasets/__init__.py
similarity index 100%
rename from swe_bench/utils/__init__.py
rename to benchmark/swe_bench/make_datasets/__init__.py
diff --git a/swe_bench/make_datasets/make_dataset.py b/benchmark/swe_bench/make_datasets/make_dataset.py
similarity index 86%
rename from swe_bench/make_datasets/make_dataset.py
rename to benchmark/swe_bench/make_datasets/make_dataset.py
index 233f6b01c..60c54181b 100644
--- a/swe_bench/make_datasets/make_dataset.py
+++ b/benchmark/swe_bench/make_datasets/make_dataset.py
@@ -6,11 +6,11 @@ from pathlib import Path
 
 from tqdm.auto import tqdm
 
+from benchmark.swe_bench.inference.const import TESTBED
+from benchmark.swe_bench.make_datasets.make_instance import prompt_style_2_edits_only
+from benchmark.swe_bench.utils.parse_diff import filter_changed_line
+from benchmark.swe_bench.utils.repo_utils import EnvManager
 from metagpt.logs import logger
-from swe_bench.inference.const import TESTBED
-from swe_bench.make_datasets.make_instance import prompt_style_2_edits_only
-from swe_bench.utils.parse_diff import filter_changed_line
-from swe_bench.utils.repo_utils import EnvManager
 
 
 def reset_task_env(instance: dict = {}):
diff --git a/swe_bench/make_datasets/make_instance.py b/benchmark/swe_bench/make_datasets/make_instance.py
similarity index 100%
rename from swe_bench/make_datasets/make_instance.py
rename to benchmark/swe_bench/make_datasets/make_instance.py
diff --git a/benchmark/swe_bench/utils/__init__.py b/benchmark/swe_bench/utils/__init__.py
new file mode 100644
index 000000000..f12b94354
--- /dev/null
+++ b/benchmark/swe_bench/utils/__init__.py
@@ -0,0 +1,3 @@
+# -*- coding: utf-8 -*-
+# @Author  : stellahong (stellahong@fuzhi.ai)
+# @Desc    :
diff --git a/swe_bench/utils/enums.py b/benchmark/swe_bench/utils/enums.py
similarity index 100%
rename from swe_bench/utils/enums.py
rename to benchmark/swe_bench/utils/enums.py
diff --git a/swe_bench/utils/parse_diff.py b/benchmark/swe_bench/utils/parse_diff.py
similarity index 99%
rename from swe_bench/utils/parse_diff.py
rename to benchmark/swe_bench/utils/parse_diff.py
index d2b32d2a1..6b0190e34 100644
--- a/swe_bench/utils/parse_diff.py
+++ b/benchmark/swe_bench/utils/parse_diff.py
@@ -1,9 +1,10 @@
 import re
 from typing import Dict, List
 
-from metagpt.logs import logger
 from swe_bench.utils.enums import FileChangeMode, LineChangeType
 
+from metagpt.logs import logger
+
 
 def extract_changes_from_patch(diff: str) -> List[Dict[str, any]]:
     """Parses the patch text through the standard syntax of git diff, outputs the information of added and deleted lines.
diff --git a/swe_bench/utils/repo_utils.py b/benchmark/swe_bench/utils/repo_utils.py
similarity index 100%
rename from swe_bench/utils/repo_utils.py
rename to benchmark/swe_bench/utils/repo_utils.py
diff --git a/swe_bench/utils/utils.py b/benchmark/swe_bench/utils/utils.py
similarity index 100%
rename from swe_bench/utils/utils.py
rename to benchmark/swe_bench/utils/utils.py

From f99c7b354eb243b5f0c8066ccd894b1205e2f323 Mon Sep 17 00:00:00 2001
From: stellahsr <stellahsr@126.com>
Date: Tue, 26 Mar 2024 15:39:10 +0800
Subject: [PATCH 6/7] fix typo and import error

---
 benchmark/swe_bench/inference/const.py  | 2 +-
 benchmark/swe_bench/utils/parse_diff.py | 3 +--
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/benchmark/swe_bench/inference/const.py b/benchmark/swe_bench/inference/const.py
index 4d616c5f3..57c2bbe4b 100644
--- a/benchmark/swe_bench/inference/const.py
+++ b/benchmark/swe_bench/inference/const.py
@@ -7,7 +7,7 @@ from metagpt.const import METAGPT_ROOT
 
 SUBSET_DATASET = METAGPT_ROOT / "sub_swebench_dataset" / "sub_swebench.csv"
 SUBSET_DATASET_SKLERARN = METAGPT_ROOT / "sub_swebench_dataset" / "scikit-learn-68.csv"
-TESTBED = METAGPT_ROOT / "benchmark" / "swe-bench" / "data" / "repos"
+TESTBED = METAGPT_ROOT / "benchmark" / "swe_bench" / "data" / "repos"
 
 # SCIKIT_LEARN_IDS: A list of instance identifiers from 'sub_swebench.csv' within SUBSET_DATASET.
 # This collection represents a subset specifically related to scikit-learn content.
diff --git a/benchmark/swe_bench/utils/parse_diff.py b/benchmark/swe_bench/utils/parse_diff.py
index 6b0190e34..67c22d33c 100644
--- a/benchmark/swe_bench/utils/parse_diff.py
+++ b/benchmark/swe_bench/utils/parse_diff.py
@@ -1,8 +1,7 @@
 import re
 from typing import Dict, List
 
-from swe_bench.utils.enums import FileChangeMode, LineChangeType
-
+from benchmark.swe_bench.utils.enums import FileChangeMode, LineChangeType
 from metagpt.logs import logger
 
 

From 3b8c83db3b905fe6f58d08d5e6da74d31872148c Mon Sep 17 00:00:00 2001
From: stellahsr <stellahsr@126.com>
Date: Wed, 27 Mar 2024 11:31:34 +0800
Subject: [PATCH 7/7] mv dir

---
 .../sub_swebench_dataset}/readme.md                 |   0
 .../sub_swebench_dataset}/scikit-learn-68.csv       | Bin
 .../sub_swebench_dataset}/sub_swebench.csv          | Bin
 benchmark/swe_bench/data/load_dataset.py            |   4 ++--
 benchmark/swe_bench/inference/const.py              |   4 ++--
 5 files changed, 4 insertions(+), 4 deletions(-)
 rename {sub_swebench_dataset => benchmark/sub_swebench_dataset}/readme.md (100%)
 rename {sub_swebench_dataset => benchmark/sub_swebench_dataset}/scikit-learn-68.csv (100%)
 rename {sub_swebench_dataset => benchmark/sub_swebench_dataset}/sub_swebench.csv (100%)

diff --git a/sub_swebench_dataset/readme.md b/benchmark/sub_swebench_dataset/readme.md
similarity index 100%
rename from sub_swebench_dataset/readme.md
rename to benchmark/sub_swebench_dataset/readme.md
diff --git a/sub_swebench_dataset/scikit-learn-68.csv b/benchmark/sub_swebench_dataset/scikit-learn-68.csv
similarity index 100%
rename from sub_swebench_dataset/scikit-learn-68.csv
rename to benchmark/sub_swebench_dataset/scikit-learn-68.csv
diff --git a/sub_swebench_dataset/sub_swebench.csv b/benchmark/sub_swebench_dataset/sub_swebench.csv
similarity index 100%
rename from sub_swebench_dataset/sub_swebench.csv
rename to benchmark/sub_swebench_dataset/sub_swebench.csv
diff --git a/benchmark/swe_bench/data/load_dataset.py b/benchmark/swe_bench/data/load_dataset.py
index 03a4b7df7..715d33c2f 100644
--- a/benchmark/swe_bench/data/load_dataset.py
+++ b/benchmark/swe_bench/data/load_dataset.py
@@ -20,13 +20,13 @@ def load_oracle_dataset(dataset_name_or_path: str = "", split: str = "test", exi
     lens = np.array(list(map(len, dataset["text"])))
     dataset = dataset.select(np.argsort(lens))
 
-    if len(existing_ids) > 0:
+    if existing_ids:
         dataset = dataset.filter(
             lambda x: x["instance_id"] not in existing_ids,
             desc="Filtering out existing ids",
             load_from_cache_file=False,
         )
-    if len(SCIKIT_LEARN_IDS) > 0:
+    if SCIKIT_LEARN_IDS:
         dataset = dataset.filter(
             lambda x: x["instance_id"] in SCIKIT_LEARN_IDS,
             desc="Filtering out subset_instance_ids",
diff --git a/benchmark/swe_bench/inference/const.py b/benchmark/swe_bench/inference/const.py
index 57c2bbe4b..1183c1d7c 100644
--- a/benchmark/swe_bench/inference/const.py
+++ b/benchmark/swe_bench/inference/const.py
@@ -5,8 +5,8 @@ import pandas as pd
 
 from metagpt.const import METAGPT_ROOT
 
-SUBSET_DATASET = METAGPT_ROOT / "sub_swebench_dataset" / "sub_swebench.csv"
-SUBSET_DATASET_SKLERARN = METAGPT_ROOT / "sub_swebench_dataset" / "scikit-learn-68.csv"
+SUBSET_DATASET = METAGPT_ROOT / "benchmark" / "swe_bench" / "sub_swebench_dataset" / "sub_swebench.csv"
+SUBSET_DATASET_SKLERARN = METAGPT_ROOT / "benchmark" / "sub_swebench_dataset" / "scikit-learn-68.csv"
 TESTBED = METAGPT_ROOT / "benchmark" / "swe_bench" / "data" / "repos"
 
 # SCIKIT_LEARN_IDS: A list of instance identifiers from 'sub_swebench.csv' within SUBSET_DATASET.