This commit is contained in:
stellahsr 2024-03-27 11:31:34 +08:00
parent f99c7b354e
commit 3b8c83db3b
5 changed files with 4 additions and 4 deletions

View file

Can't render this file because it contains an unexpected character in line 2 and column 280.

View file

Can't render this file because it has a wrong number of fields in line 2.

View file

@ -20,13 +20,13 @@ def load_oracle_dataset(dataset_name_or_path: str = "", split: str = "test", exi
lens = np.array(list(map(len, dataset["text"])))
dataset = dataset.select(np.argsort(lens))
if len(existing_ids) > 0:
if existing_ids:
dataset = dataset.filter(
lambda x: x["instance_id"] not in existing_ids,
desc="Filtering out existing ids",
load_from_cache_file=False,
)
if len(SCIKIT_LEARN_IDS) > 0:
if SCIKIT_LEARN_IDS:
dataset = dataset.filter(
lambda x: x["instance_id"] in SCIKIT_LEARN_IDS,
desc="Filtering out subset_instance_ids",

View file

@ -5,8 +5,8 @@ import pandas as pd
from metagpt.const import METAGPT_ROOT
SUBSET_DATASET = METAGPT_ROOT / "sub_swebench_dataset" / "sub_swebench.csv"
SUBSET_DATASET_SKLERARN = METAGPT_ROOT / "sub_swebench_dataset" / "scikit-learn-68.csv"
SUBSET_DATASET = METAGPT_ROOT / "benchmark" / "swe_bench" / "sub_swebench_dataset" / "sub_swebench.csv"
SUBSET_DATASET_SKLERARN = METAGPT_ROOT / "benchmark" / "sub_swebench_dataset" / "scikit-learn-68.csv"
TESTBED = METAGPT_ROOT / "benchmark" / "swe_bench" / "data" / "repos"
# SCIKIT_LEARN_IDS: A list of instance identifiers from 'sub_swebench.csv' within SUBSET_DATASET.