update code, change data path

2026-06-17 15:35:21 +02:00 · 2024-03-26 14:44:19 +08:00 · 2024-03-26 14:44:19 +08:00 · 91db2ef112
commit 91db2ef112
parent f26a5cd1de
5 changed files with 7 additions and 5 deletions
--- a/.gitignore
+++ b/.gitignore
@ -188,3 +188,4 @@ cov.xml
 *-structure.json
 *.dot
 .python-version
+/data/inference
--- a/swe_bench/data/load_dataset.py
+++ b/swe_bench/data/load_dataset.py
@ -6,7 +6,7 @@ from pathlib import Path
 import numpy as np
 from datasets import load_dataset, load_from_disk

-from data.inference.const import SCIKIT_LEARN_IDS
+from swe_bench.inference.const import SCIKIT_LEARN_IDS


 def load_oracle_dataset(dataset_name_or_path: str = "", split: str = "test", existing_ids: list = []):
--- a/swe_bench/inference/const.py
+++ b/swe_bench/inference/const.py
@ -7,7 +7,7 @@ from metagpt.const import DATA_PATH, METAGPT_ROOT

 SUBSET_DATASET = METAGPT_ROOT / "sub_swebench_dataset" / "sub_swebench.csv"
 SUBSET_DATASET_SKLERARN = METAGPT_ROOT / "sub_swebench_dataset" / "scikit-learn-68.csv"
-TESTBED = DATA_PATH / "repos"
+TESTBED = METAGPT_ROOT / "swe-bench" / "data" / "repos"

 # SCIKIT_LEARN_IDS: A list of instance identifiers from 'sub_swebench.csv' within SUBSET_DATASET.
 # This collection represents a subset specifically related to scikit-learn content.
--- a/swe_bench/inference/run_api.py
+++ b/swe_bench/inference/run_api.py
@ -2,7 +2,7 @@ import json
 from pathlib import Path

 import fire
-from data.load_dataset import load_oracle_dataset
+
 from tqdm.auto import tqdm

 from metagpt.config2 import config
@ -10,6 +10,7 @@ from metagpt.logs import logger
 from metagpt.utils import count_string_tokens
 from swe_bench.inference.run_agent import run_instance
 from swe_bench.utils.utils import check_existing_ids, extract_diff
+from swe_bench.data.load_dataset import load_oracle_dataset

 # Replace with your own
 MAX_TOKEN = 128000
@ -56,7 +57,7 @@ async def openai_inference(
            logger.info(f"{repo_prefix}_{version}")
            data.append(f"{repo_prefix}_{version}")

-            response = await run_instance(instance=datum)
+            response = await run_instance(instance=datum, use_reflection=use_reflection)
            if response is None:
                continue
            logger.info(f"Final response: {response}")
--- a/swe_bench/make_datasets/make_dataset.py
+++ b/swe_bench/make_datasets/make_dataset.py
@ -6,11 +6,11 @@ from pathlib import Path

 from tqdm.auto import tqdm

-from data.inference.const import TESTBED
 from metagpt.logs import logger
 from swe_bench.make_datasets.make_instance import prompt_style_2_edits_only
 from swe_bench.utils.parse_diff import filter_changed_line
 from swe_bench.utils.repo_utils import EnvManager
+from swe_bench.inference.const import TESTBED


 def reset_task_env(instance: dict = {}):