update readme

This commit is contained in:
Yizhou Chi 2024-09-03 13:40:23 +08:00
parent 6aafe680c1
commit 3ec6dcd5df
8 changed files with 181 additions and 81 deletions

View file

@ -300,7 +300,7 @@ class MCTS():
mcts_logger.log("MCTS", f"Tree loaded: {tree_loaded}")
if not tree_loaded:
rollouts -= 2
rollouts -= 2 # 2 rollouts for the initial tree
if rollouts < 0:
raise ValueError("Rollouts must be greater than 2 if there is no tree to load")
self.children[root] = []

View file

@ -1,25 +1,76 @@
# Expo
## Setup
In the root directory, `pip install -e .`
`cd expo`
`pip install -r requirements.txt`
## Instruction
## 1. Data Preparation
- 下载数据集https://deepwisdom.feishu.cn/drive/folder/RVyofv9cvlvtxKdddt2cyn3BnTc?from=from_copylink
- 修改`data.yaml``datasets_dir`为数据集合集根目录存储位置
## Examples
### Run Base DI
`python run_experiment.py --exp_mode base --task titanic`
## 2. Configs
### Run DI RandExp
### Data Config
`datasets.yaml` 提供数据集对应的指标和基础提示词
`data.yaml` 继承了`datasets.yaml`以及一些路径信息,需要将`datasets_dir`指到数据集合集的根目录下
### LLM Config
```
llm:
api_type: 'openai'
model: deepseek-coder
base_url: "https://oneapi.deepwisdom.ai/v1"
api_key: sk-xxx
temperature: 0.5
```
### Budget
实验轮次 k = 10, 20
### 提示词使用
通过执行`dataset.py`中的`generate_task_requirement`函数获取提示词
## 3. Evaluation
运行各个框架运行后框架需要提供Dev和Test的`dev_predictions.csv``test_predictions.csv` column name为target
两种评估方式
1. `evaluation.py` 提供pred和原始的gt1D iterable以及需要使用的metric返回evaluation score
2. 使用`CustomExperimenter`
```
experimenter = CustomExperimenter(task="titanic")
score_dict = experimenter.evaluate_pred_files(dev_pred_path, test_pred_path)
```
## 4. Baselines
### DS Agent
提供github链接并说明使用的命令以及参数设置
### AIDE
提供github链接并说明使用的命令以及参数设置
### Autogluon
提供github链接并说明使用的命令以及参数设置
### Base DI
For setup, check 5.
- `python run_experiment.py --exp_mode base --task titanic`
### DI RandomSearch
For setup, check 5.
- Single insight
`python run_experiment.py --exp_mode aug --task titanic --aug_mode single`
@ -28,30 +79,36 @@ ### Run DI RandExp
`python run_experiment.py --exp_mode aug --task titanic --aug_mode set`
## 5. DI MCTS
### Run DI MCTS
`python run_experiment.py --exp_mode mcts --task titanic --rollout 5`
#### Setup
In the root directory,
```
pip install -e .
cd expo
pip install -r requirements.txt
```
#### Run
- `python run_experiment.py --exp_mode mcts --task titanic --rollout 5`
If the dataset has reg metric, remember to use `--low_is_better`:
- `python run_experiment.py --exp_mode mcts --task househouse_prices --rollout 5 --low_is_better`
## Custom Experimenter
## Code and Configs Explanation
`datasets.yaml` 提供数据集对应的指标和基础提示词
`data.yaml` 继承了`datasets.yaml`以及一些路径信息,需要将`datasets_dir`指到数据集合集的根目录下
完整的DI提示词参考`dataset.py`中的`generate_task_requirement`函数
## Evaluation
`evaluation.py` 提供pred和原始的gt1D iterable以及需要使用的metric返回evaluation score

View file

@ -4,22 +4,23 @@ datasets:
titanic:
dataset: 04_titanic
metric: f1
target_col: Survived
user_requirement: "This is a 04_titanic dataset. Your goal is to predict the target\
\ column `Survived`.\nPerform data analysis, data preprocessing, feature engineering,\
\ and modeling to predict the target. \nReport f1 on the eval data. Do not plot\
\ or make any visualizations.\n"
house_prices:
house-prices:
dataset: 05_house-prices-advanced-regression-techniques
metric: rmse
target_col: SalePrice
user_requirement: "This is a 05_house-prices-advanced-regression-techniques dataset.\
\ Your goal is to predict the target column `SalePrice`.\nPerform data analysis,\
\ data preprocessing, feature engineering, and modeling to predict the target.\
\ \nReport rmse on the eval data. Do not plot or make any visualizations.\n"
santander_customers:
santander-customer:
dataset: 06_santander-customer-transaction-prediction
metric: f1
target_col: target
user_requirement: "This is a 06_santander-customer-transaction-prediction dataset.\
\ Your goal is to predict the target column `target`.\nPerform data analysis,\
\ data preprocessing, feature engineering, and modeling to predict the target.\
@ -27,126 +28,127 @@ datasets:
icr:
dataset: 07_icr-identify-age-related-conditions
metric: f1
target_col: Class
user_requirement: "This is a 07_icr-identify-age-related-conditions dataset. Your\
\ goal is to predict the target column `Class`.\nPerform data analysis, data\
\ preprocessing, feature engineering, and modeling to predict the target. \n\
Report f1 on the eval data. Do not plot or make any visualizations.\n"
lick_prediction_small:
Click_prediction_small:
dataset: Click_prediction_small
metric: f1
target_col: click
user_requirement: "This is a Click_prediction_small dataset. Your goal is to predict\
\ the target column `click`.\nPerform data analysis, data preprocessing, feature\
\ engineering, and modeling to predict the target. \nReport f1 on the eval data.\
\ Do not plot or make any visualizations.\n"
GesturePhaseSegmentationProcessed:
dataset: GesturePhaseSegmentationProcessed
metric: f1 weighted
target_col: Phase
user_requirement: "This is a GesturePhaseSegmentationProcessed dataset. Your goal\
\ is to predict the target column `Phase`.\nPerform data analysis, data preprocessing,\
\ feature engineering, and modeling to predict the target. \nReport f1 weighted\
\ on the eval data. Do not plot or make any visualizations.\n"
Moneyball:
dataset: Moneyball
metric: rmse
target_col: RS
user_requirement: "This is a Moneyball dataset. Your goal is to predict the target\
\ column `RS`.\nPerform data analysis, data preprocessing, feature engineering,\
\ and modeling to predict the target. \nReport rmse on the eval data. Do not\
\ plot or make any visualizations.\n"
SAT11-HAND-runtime-regression:
dataset: SAT11-HAND-runtime-regression
metric: rmse
target_col: runtime
user_requirement: "This is a SAT11-HAND-runtime-regression dataset. Your goal\
\ is to predict the target column `runtime`.\nPerform data analysis, data preprocessing,\
\ feature engineering, and modeling to predict the target. \nReport rmse on\
\ the eval data. Do not plot or make any visualizations.\n"
boston:
dataset: boston
metric: rmse
target_col: MEDV
user_requirement: "This is a boston dataset. Your goal is to predict the target\
\ column `MEDV`.\nPerform data analysis, data preprocessing, feature engineering,\
\ and modeling to predict the target. \nReport rmse on the eval data. Do not\
\ plot or make any visualizations.\n"
colleges:
dataset: colleges
metric: rmse
target_col: percent_pell_grant
user_requirement: "This is a colleges dataset. Your goal is to predict the target\
\ column `percent_pell_grant`.\nPerform data analysis, data preprocessing, feature\
\ engineering, and modeling to predict the target. \nReport rmse on the eval\
\ data. Do not plot or make any visualizations.\n"
credit-g:
dataset: credit-g
metric: f1
target_col: class
user_requirement: "This is a credit-g dataset. Your goal is to predict the target\
\ column `class`.\nPerform data analysis, data preprocessing, feature engineering,\
\ and modeling to predict the target. \nReport f1 on the eval data. Do not plot\
\ or make any visualizations.\n"
diamonds:
dataset: diamonds
metric: rmse
target_col: price
user_requirement: "This is a diamonds dataset. Your goal is to predict the target\
\ column `price`.\nPerform data analysis, data preprocessing, feature engineering,\
\ and modeling to predict the target. \nReport rmse on the eval data. Do not\
\ plot or make any visualizations.\n"
jasmine:
dataset: jasmine
metric: f1
target_col: class
user_requirement: "This is a jasmine dataset. Your goal is to predict the target\
\ column `class`.\nPerform data analysis, data preprocessing, feature engineering,\
\ and modeling to predict the target. \nReport f1 on the eval data. Do not plot\
\ or make any visualizations.\n"
kc1:
dataset: kc1
metric: f1
target_col: defects
user_requirement: "This is a kc1 dataset. Your goal is to predict the target column\
\ `defects`.\nPerform data analysis, data preprocessing, feature engineering,\
\ and modeling to predict the target. \nReport f1 on the eval data. Do not plot\
\ or make any visualizations.\n"
kick:
dataset: kick
metric: f1
target_col: IsBadBuy
user_requirement: "This is a kick dataset. Your goal is to predict the target\
\ column `IsBadBuy`.\nPerform data analysis, data preprocessing, feature engineering,\
\ and modeling to predict the target. \nReport f1 on the eval data. Do not plot\
\ or make any visualizations.\n"
mfeat-factors:
dataset: mfeat-factors
metric: f1 weighted
target_col: class
user_requirement: "This is a mfeat-factors dataset. Your goal is to predict the\
\ target column `class`.\nPerform data analysis, data preprocessing, feature\
\ engineering, and modeling to predict the target. \nReport f1 weighted on the\
\ eval data. Do not plot or make any visualizations.\n"
segment:
dataset: segment
metric: f1 weighted
target_col: class
user_requirement: "This is a segment dataset. Your goal is to predict the target\
\ column `class`.\nPerform data analysis, data preprocessing, feature engineering,\
\ and modeling to predict the target. \nReport f1 weighted on the eval data.\
\ Do not plot or make any visualizations.\n"
steel-plates-fault:
dataset: steel-plates-fault
metric: f1 weighted
target_col: target
user_requirement: "This is a steel-plates-fault dataset. Your goal is to predict\
\ the target column `target`.\nPerform data analysis, data preprocessing, feature\
\ engineering, and modeling to predict the target. \nReport f1 weighted on the\
\ eval data. Do not plot or make any visualizations.\n"
wine-quality-white:
dataset: wine-quality-white
metric: f1 weighted
target_col: Class
user_requirement: "This is a wine-quality-white dataset. Your goal is to predict\
\ the target column `Class`.\nPerform data analysis, data preprocessing, feature\
\ engineering, and modeling to predict the target. \nReport f1 weighted on the\

View file

@ -21,13 +21,13 @@ TASK_PROMPT = """\
1. Please do not leak the target label in any form during training.
2. Dev and Test sets do not have the target column.
3. You should perform transformations on all sets at the same step.
4. If labels are transformed during training, they should be transformed back to the original format before saving the predictions.
## Saving Dev and Test Predictions
1. Save the prediction results of BOTH the dev set and test set in `dev_predictions.csv` and `test_predictions.csv` respectively in the output directory.
- Both files should contain a single column named `target` with the predicted values.
2. Make sure the prediction results are in the same format as the target column in the training set.
- The labels should be transformed back to the original format if any transformation was applied during training.
- If the original target column was categorical or string, the predictions MUST be in the same format.
## Output Training Set Performance
Make sure the performance of the model is printed in python in the last step even if it has been printed in the previous steps. The value should be a float number.
@ -119,7 +119,8 @@ def create_dataset_dict(dataset):
dataset_dict = {
"dataset": dataset.name,
"user_requirement": dataset.create_base_requirement(),
"metric": dataset.get_metric()
"metric": dataset.get_metric(),
"target_col": dataset.target_col
}
return dataset_dict
@ -289,23 +290,24 @@ class OpenMLExpDataset(ExpDataset):
# def __init__(self, name, dataset_dir, dataset_name, **kwargs):
# super().__init__(name, dataset_dir, **kwargs)
async def process_dataset(dataset, solution_designer, save_analysis_pool, datasets_dict):
if save_analysis_pool:
asyncio.run(solution_designer.generate_solutions(dataset.get_dataset_info(), dataset.name))
dataset_dict = create_dataset_dict(dataset)
datasets_dict["datasets"][dataset.name] = dataset_dict
if __name__ == "__main__":
datasets_dir = "D:/work/automl/datasets"
force_update = True
force_update = False
save_analysis_pool = False
datasets_dict = {"datasets": {}}
solution_designer = SolutionDesigner()
for dataset_id in OPENML_DATASET_IDS:
openml_dataset = OpenMLExpDataset("", datasets_dir, dataset_id, force_update=force_update)
asyncio.run(solution_designer.generate_solutions(openml_dataset.get_dataset_info(), openml_dataset.name))
dataset_dict = create_dataset_dict(openml_dataset)
datasets_dict["datasets"][openml_dataset.name] = dataset_dict
asyncio.run(process_dataset(openml_dataset, solution_designer, save_analysis_pool, datasets_dict))
for dataset_name, target_col in CUSTOM_DATASETS:
custom_dataset = ExpDataset(dataset_name, datasets_dir, target_col=target_col, force_update=force_update)
asyncio.run(solution_designer.generate_solutions(custom_dataset.get_dataset_info(), custom_dataset.name))
dataset_dict = create_dataset_dict(custom_dataset)
datasets_dict["datasets"][custom_dataset.name] = dataset_dict
asyncio.run(process_dataset(custom_dataset, solution_designer, save_analysis_pool, datasets_dict))
save_datasets_dict_to_yaml(datasets_dict)

View file

@ -1,28 +1,32 @@
datasets:
04_titanic:
titanic:
dataset: 04_titanic
metric: f1
target_col: Survived
user_requirement: "This is a 04_titanic dataset. Your goal is to predict the target\
\ column `Survived`.\nPerform data analysis, data preprocessing, feature engineering,\
\ and modeling to predict the target. \nReport f1 on the eval data. Do not plot\
\ or make any visualizations.\n"
05_house-prices-advanced-regression-techniques:
house-prices:
dataset: 05_house-prices-advanced-regression-techniques
metric: rmse
target_col: SalePrice
user_requirement: "This is a 05_house-prices-advanced-regression-techniques dataset.\
\ Your goal is to predict the target column `SalePrice`.\nPerform data analysis,\
\ data preprocessing, feature engineering, and modeling to predict the target.\
\ \nReport rmse on the eval data. Do not plot or make any visualizations.\n"
06_santander-customer-transaction-prediction:
santander-customer:
dataset: 06_santander-customer-transaction-prediction
metric: f1
target_col: target
user_requirement: "This is a 06_santander-customer-transaction-prediction dataset.\
\ Your goal is to predict the target column `target`.\nPerform data analysis,\
\ data preprocessing, feature engineering, and modeling to predict the target.\
\ \nReport f1 on the eval data. Do not plot or make any visualizations.\n"
07_icr-identify-age-related-conditions:
icr:
dataset: 07_icr-identify-age-related-conditions
metric: f1
target_col: Class
user_requirement: "This is a 07_icr-identify-age-related-conditions dataset. Your\
\ goal is to predict the target column `Class`.\nPerform data analysis, data\
\ preprocessing, feature engineering, and modeling to predict the target. \n\
@ -30,6 +34,7 @@ datasets:
Click_prediction_small:
dataset: Click_prediction_small
metric: f1
target_col: click
user_requirement: "This is a Click_prediction_small dataset. Your goal is to predict\
\ the target column `click`.\nPerform data analysis, data preprocessing, feature\
\ engineering, and modeling to predict the target. \nReport f1 on the eval data.\
@ -37,6 +42,7 @@ datasets:
GesturePhaseSegmentationProcessed:
dataset: GesturePhaseSegmentationProcessed
metric: f1 weighted
target_col: Phase
user_requirement: "This is a GesturePhaseSegmentationProcessed dataset. Your goal\
\ is to predict the target column `Phase`.\nPerform data analysis, data preprocessing,\
\ feature engineering, and modeling to predict the target. \nReport f1 weighted\
@ -44,6 +50,7 @@ datasets:
Moneyball:
dataset: Moneyball
metric: rmse
target_col: RS
user_requirement: "This is a Moneyball dataset. Your goal is to predict the target\
\ column `RS`.\nPerform data analysis, data preprocessing, feature engineering,\
\ and modeling to predict the target. \nReport rmse on the eval data. Do not\
@ -51,6 +58,7 @@ datasets:
SAT11-HAND-runtime-regression:
dataset: SAT11-HAND-runtime-regression
metric: rmse
target_col: runtime
user_requirement: "This is a SAT11-HAND-runtime-regression dataset. Your goal\
\ is to predict the target column `runtime`.\nPerform data analysis, data preprocessing,\
\ feature engineering, and modeling to predict the target. \nReport rmse on\
@ -58,6 +66,7 @@ datasets:
boston:
dataset: boston
metric: rmse
target_col: MEDV
user_requirement: "This is a boston dataset. Your goal is to predict the target\
\ column `MEDV`.\nPerform data analysis, data preprocessing, feature engineering,\
\ and modeling to predict the target. \nReport rmse on the eval data. Do not\
@ -65,6 +74,7 @@ datasets:
colleges:
dataset: colleges
metric: rmse
target_col: percent_pell_grant
user_requirement: "This is a colleges dataset. Your goal is to predict the target\
\ column `percent_pell_grant`.\nPerform data analysis, data preprocessing, feature\
\ engineering, and modeling to predict the target. \nReport rmse on the eval\
@ -72,6 +82,7 @@ datasets:
credit-g:
dataset: credit-g
metric: f1
target_col: class
user_requirement: "This is a credit-g dataset. Your goal is to predict the target\
\ column `class`.\nPerform data analysis, data preprocessing, feature engineering,\
\ and modeling to predict the target. \nReport f1 on the eval data. Do not plot\
@ -79,6 +90,7 @@ datasets:
diamonds:
dataset: diamonds
metric: rmse
target_col: price
user_requirement: "This is a diamonds dataset. Your goal is to predict the target\
\ column `price`.\nPerform data analysis, data preprocessing, feature engineering,\
\ and modeling to predict the target. \nReport rmse on the eval data. Do not\
@ -86,6 +98,7 @@ datasets:
jasmine:
dataset: jasmine
metric: f1
target_col: class
user_requirement: "This is a jasmine dataset. Your goal is to predict the target\
\ column `class`.\nPerform data analysis, data preprocessing, feature engineering,\
\ and modeling to predict the target. \nReport f1 on the eval data. Do not plot\
@ -93,6 +106,7 @@ datasets:
kc1:
dataset: kc1
metric: f1
target_col: defects
user_requirement: "This is a kc1 dataset. Your goal is to predict the target column\
\ `defects`.\nPerform data analysis, data preprocessing, feature engineering,\
\ and modeling to predict the target. \nReport f1 on the eval data. Do not plot\
@ -100,6 +114,7 @@ datasets:
kick:
dataset: kick
metric: f1
target_col: IsBadBuy
user_requirement: "This is a kick dataset. Your goal is to predict the target\
\ column `IsBadBuy`.\nPerform data analysis, data preprocessing, feature engineering,\
\ and modeling to predict the target. \nReport f1 on the eval data. Do not plot\
@ -107,6 +122,7 @@ datasets:
mfeat-factors:
dataset: mfeat-factors
metric: f1 weighted
target_col: class
user_requirement: "This is a mfeat-factors dataset. Your goal is to predict the\
\ target column `class`.\nPerform data analysis, data preprocessing, feature\
\ engineering, and modeling to predict the target. \nReport f1 weighted on the\
@ -114,6 +130,7 @@ datasets:
segment:
dataset: segment
metric: f1 weighted
target_col: class
user_requirement: "This is a segment dataset. Your goal is to predict the target\
\ column `class`.\nPerform data analysis, data preprocessing, feature engineering,\
\ and modeling to predict the target. \nReport f1 weighted on the eval data.\
@ -121,6 +138,7 @@ datasets:
steel-plates-fault:
dataset: steel-plates-fault
metric: f1 weighted
target_col: target
user_requirement: "This is a steel-plates-fault dataset. Your goal is to predict\
\ the target column `target`.\nPerform data analysis, data preprocessing, feature\
\ engineering, and modeling to predict the target. \nReport f1 weighted on the\
@ -128,6 +146,7 @@ datasets:
wine-quality-white:
dataset: wine-quality-white
metric: f1 weighted
target_col: Class
user_requirement: "This is a wine-quality-white dataset. Your goal is to predict\
\ the target column `Class`.\nPerform data analysis, data preprocessing, feature\
\ engineering, and modeling to predict the target. \nReport f1 weighted on the\

View file

@ -9,10 +9,12 @@ class CustomExperimenter(Experimenter):
def __init__(self, args, **kwargs):
super().__init__(args, **kwargs)
self.framework = kwargs["framework"]
self.framework = kwargs["framework"] # todo
self.task = kwargs.get("task", self.args.task)
self.low_is_better = kwargs.get("low_is_better", self.args.low_is_better)
self.name = kwargs.get("name", "")
self.result_path = f"results/custom_{self.name}"
self.state = create_initial_state(self.args.task, start_task_id=1, data_config=self.data_config, low_is_better=self.args.low_is_better, name="")
self.state = create_initial_state(self.task, start_task_id=1, data_config=self.data_config, low_is_better=self.low_is_better, name=self.name)
async def run_experiment(self):
user_requirement = self.state["requirement"]
@ -30,6 +32,15 @@ class CustomExperimenter(Experimenter):
}
self.save_result(results)
def evaluate_pred_files(self, dev_pred_path, test_pred_path):
dev_preds = pd.read_csv(dev_pred_path)["target"]
test_preds = pd.read_csv(test_pred_path)["target"]
score_dict = {
"dev_score": self.evaluate_score(dev_preds, "dev"),
"test_score": self.evaluate_score(test_preds, "test")
}
return score_dict
def evaluate_predictions(self, preds, split):
metric = self.state["dataset_config"]["metric"]
gt_path = os.path.join(self.state["datasets_dir"][f"{split}_target"])

View file

@ -20,16 +20,24 @@ class Experimenter:
async def run_experiment(self):
state = create_initial_state(self.args.task, start_task_id=1, data_config=self.data_config, low_is_better=self.args.low_is_better, name="")
user_requirement = state["requirement"]
di = ResearchAssistant(node_id="0", use_reflection=self.args.reflection)
await di.run(user_requirement)
score_dict = await di.get_score()
score_dict = self.evaluate(score_dict, state)
results = {
"score_dict": score_dict,
"user_requirement": user_requirement,
"args": vars(self.args)
}
results = []
for i in range(self.args.num_experiments):
di = ResearchAssistant(node_id="0", use_reflection=self.args.reflection)
await di.run(user_requirement)
score_dict = await di.get_score()
score_dict = self.evaluate(score_dict, state)
results.append({
"idx": i,
"score_dict": score_dict,
"user_requirement": user_requirement,
"args": vars(self.args)
})
scores = [result["score_dict"]["test_score"] for result in results]
avg_score = sum(scores) / len(scores)
best_score = max(scores) if not self.args.low_is_better else min(scores)
best_score_idx = scores.index(best_score)
results.insert(0, {"avg_score": avg_score, "best_score": best_score, "best_score_idx": best_score_idx})
self.save_result(results)
def evaluate_prediction(self, split, state):

View file

@ -22,18 +22,19 @@ class MCTSExperimenter(Experimenter):
text += f"Best node: {best_node}, score: {best_node.raw_reward}\n"
text += f"Dev best node: {dev_best_node}, score: {dev_best_node.raw_reward}\n"
print(text)
self.save_tree(text)
if self.args.rollouts > 0:
self.save_tree(text)
results = {
"best_node": best_node.id,
"best_node_score": best_node.raw_reward,
"dev_best_node": dev_best_node.id,
"dev_best_node_score": dev_best_node.raw_reward,
"num_generated_codes": num_generated_codes,
"user_requirement": best_node.state["requirement"],
"args": vars(self.args)
}
self.save_result(results)
results = {
"best_node": best_node.id,
"best_node_score": best_node.raw_reward,
"dev_best_node": dev_best_node.id,
"dev_best_node_score": dev_best_node.raw_reward,
"num_generated_codes": num_generated_codes,
"user_requirement": best_node.state["requirement"],
"args": vars(self.args)
}
self.save_result(results)