mirror of
https://github.com/FoundationAgents/MetaGPT.git
synced 2026-06-11 15:15:18 +02:00
支持跑通mle bench
This commit is contained in:
parent
a91003a7fe
commit
1d4a845120
4 changed files with 34 additions and 8 deletions
|
|
@ -8,7 +8,7 @@ import shutil
|
|||
import numpy as np
|
||||
import pandas as pd
|
||||
|
||||
from expo.data.custom_task import get_mle_bench_requirements
|
||||
from expo.data.custom_task import get_mle_bench_requirements, get_mle_task_id
|
||||
from expo.data.dataset import generate_task_requirement, get_split_dataset_path
|
||||
from expo.evaluation.evaluation import evaluate_score
|
||||
from expo.insights.instruction_generator import InstructionGenerator
|
||||
|
|
@ -35,6 +35,8 @@ def create_initial_state(
|
|||
datasets_dir = args.custom_dataset_dir
|
||||
requirement = get_mle_bench_requirements(args.custom_dataset_dir, data_config)
|
||||
exp_pool_path = None
|
||||
# external_eval = False # make sure external eval is false if custom dataset is used
|
||||
task = get_mle_task_id(args.custom_dataset_dir)
|
||||
else:
|
||||
dataset_config = data_config["datasets"][task]
|
||||
datasets_dir = get_split_dataset_path(task, data_config)
|
||||
|
|
@ -120,7 +122,7 @@ class Node:
|
|||
return f"{self.parent.id}-{num_sibling}"
|
||||
|
||||
def is_terminal(self):
|
||||
return int(self.state["start_task_id"]) == self.max_depth + 1
|
||||
return int(self.state["start_task_id"]) == self.max_depth + 1 # TODO: Check if this is correct or +1
|
||||
|
||||
def is_fully_expanded(self):
|
||||
return len(self.children) > 0
|
||||
|
|
|
|||
|
|
@ -22,19 +22,26 @@ COMPETITION INSTRUCTIONS
|
|||
## More Instructions
|
||||
- output_dir: {output_dir}
|
||||
- Besides `submission.csv`, you should also save your output in the output directory.
|
||||
- Save the prediction results of BOTH the dev set and test set in `dev_predictions.csv` and `test_predictions.csv` respectively in the output directory.
|
||||
|
||||
Do not make visualizations.
|
||||
- You should split the training data into train and dev set.
|
||||
- Save the prediction results of BOTH the dev set and test set in `dev_predictions.csv` and `test_predictions.csv` respectively in the output directory. They should be in the same format as the `submission.csv`.
|
||||
- Perform data analysis, data preprocessing, feature engineering, and modeling to predict the target.
|
||||
**Do not make any plots or visualizations.**
|
||||
"""
|
||||
|
||||
|
||||
def get_mle_task_id(dataset_dir):
|
||||
return dataset_dir.split("/")[-3]
|
||||
|
||||
|
||||
def get_mle_bench_requirements(dataset_dir, data_config, obfuscated=False):
|
||||
work_dir = data_config["work_dir"]
|
||||
output_dir = f"{work_dir}/output"
|
||||
task = get_mle_task_id(dataset_dir)
|
||||
output_dir = f"{work_dir}/{task}"
|
||||
final_output_dir = f"{work_dir}/submission"
|
||||
os.makedirs(output_dir, exist_ok=True)
|
||||
|
||||
if obfuscated:
|
||||
instructions = INSTRUCTIONS_OBFUSCATED.format(dataset_dir=dataset_dir, output_dir=output_dir)
|
||||
instructions = INSTRUCTIONS_OBFUSCATED.format(dataset_dir=dataset_dir, output_dir=final_output_dir)
|
||||
task_file = "description_obfuscated.md"
|
||||
else:
|
||||
instructions = INSTRUCTIONS.format(dataset_dir=dataset_dir, output_dir=output_dir)
|
||||
|
|
|
|||
|
|
@ -1,3 +1,5 @@
|
|||
from pathlib import Path
|
||||
|
||||
import numpy as np
|
||||
from sklearn.metrics import accuracy_score, f1_score, mean_squared_error, roc_auc_score
|
||||
|
||||
|
|
@ -33,4 +35,14 @@ def node_evaluate_score_sela(node):
|
|||
|
||||
def node_evaluate_score_mlebench(node):
|
||||
# TODO
|
||||
return 0
|
||||
from mlebench.grade import grade_csv
|
||||
from mlebench.registry import registry
|
||||
|
||||
competition_id = node.state["task"]
|
||||
pred_path = node.get_predictions_path("test")
|
||||
new_registry = registry.set_data_dir(Path(registry.get_data_dir()))
|
||||
competition = new_registry.get_competition(competition_id)
|
||||
submission = Path(pred_path)
|
||||
report = grade_csv(submission, competition).to_dict()
|
||||
report["submission_path"] = str(submission)
|
||||
return report
|
||||
|
|
|
|||
|
|
@ -60,6 +60,11 @@ def get_di_args(parser):
|
|||
|
||||
|
||||
async def main(args):
|
||||
if args.custom_dataset_dir:
|
||||
args.external_eval = False
|
||||
args.eval_func = "mlebench"
|
||||
args.from_scratch = True
|
||||
|
||||
if args.exp_mode == "mcts":
|
||||
experimenter = MCTSExperimenter(args)
|
||||
elif args.exp_mode == "greedy":
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue