mirror of
https://github.com/FoundationAgents/MetaGPT.git
synced 2026-06-11 15:15:18 +02:00
update readme
This commit is contained in:
parent
6aafe680c1
commit
3ec6dcd5df
8 changed files with 181 additions and 81 deletions
|
|
@ -300,7 +300,7 @@ class MCTS():
|
|||
mcts_logger.log("MCTS", f"Tree loaded: {tree_loaded}")
|
||||
|
||||
if not tree_loaded:
|
||||
rollouts -= 2
|
||||
rollouts -= 2 # 2 rollouts for the initial tree
|
||||
if rollouts < 0:
|
||||
raise ValueError("Rollouts must be greater than 2 if there is no tree to load")
|
||||
self.children[root] = []
|
||||
|
|
|
|||
101
expo/README.md
101
expo/README.md
|
|
@ -1,25 +1,76 @@
|
|||
# Expo
|
||||
|
||||
|
||||
## Setup
|
||||
In the root directory, `pip install -e .`
|
||||
|
||||
`cd expo`
|
||||
|
||||
`pip install -r requirements.txt`
|
||||
|
||||
## Instruction
|
||||
## 1. Data Preparation
|
||||
|
||||
- 下载数据集:https://deepwisdom.feishu.cn/drive/folder/RVyofv9cvlvtxKdddt2cyn3BnTc?from=from_copylink
|
||||
- 修改`data.yaml`的`datasets_dir`为数据集合集根目录存储位置
|
||||
|
||||
## Examples
|
||||
|
||||
### Run Base DI
|
||||
|
||||
`python run_experiment.py --exp_mode base --task titanic`
|
||||
## 2. Configs
|
||||
|
||||
### Run DI RandExp
|
||||
### Data Config
|
||||
|
||||
`datasets.yaml` 提供数据集对应的指标和基础提示词
|
||||
|
||||
`data.yaml` 继承了`datasets.yaml`以及一些路径信息,需要将`datasets_dir`指到数据集合集的根目录下
|
||||
|
||||
|
||||
### LLM Config
|
||||
|
||||
```
|
||||
llm:
|
||||
api_type: 'openai'
|
||||
model: deepseek-coder
|
||||
base_url: "https://oneapi.deepwisdom.ai/v1"
|
||||
api_key: sk-xxx
|
||||
temperature: 0.5
|
||||
```
|
||||
|
||||
### Budget
|
||||
实验轮次 k = 10, 20
|
||||
|
||||
|
||||
### 提示词使用
|
||||
|
||||
通过执行`dataset.py`中的`generate_task_requirement`函数获取提示词
|
||||
|
||||
|
||||
## 3. Evaluation
|
||||
|
||||
运行各个框架,运行后框架需要提供Dev和Test的`dev_predictions.csv`和`test_predictions.csv`, column name为target
|
||||
|
||||
两种评估方式
|
||||
|
||||
1. `evaluation.py` 提供pred和原始的gt(1D iterable)以及需要使用的metric,返回evaluation score
|
||||
|
||||
2. 使用`CustomExperimenter`
|
||||
```
|
||||
experimenter = CustomExperimenter(task="titanic")
|
||||
score_dict = experimenter.evaluate_pred_files(dev_pred_path, test_pred_path)
|
||||
```
|
||||
|
||||
## 4. Baselines
|
||||
### DS Agent
|
||||
提供github链接,并说明使用的命令以及参数设置
|
||||
|
||||
|
||||
### AIDE
|
||||
提供github链接,并说明使用的命令以及参数设置
|
||||
|
||||
### Autogluon
|
||||
提供github链接,并说明使用的命令以及参数设置
|
||||
|
||||
### Base DI
|
||||
For setup, check 5.
|
||||
|
||||
- `python run_experiment.py --exp_mode base --task titanic`
|
||||
|
||||
|
||||
### DI RandomSearch
|
||||
For setup, check 5.
|
||||
|
||||
- Single insight
|
||||
`python run_experiment.py --exp_mode aug --task titanic --aug_mode single`
|
||||
|
|
@ -28,30 +79,36 @@ ### Run DI RandExp
|
|||
`python run_experiment.py --exp_mode aug --task titanic --aug_mode set`
|
||||
|
||||
|
||||
## 5. DI MCTS
|
||||
|
||||
### Run DI MCTS
|
||||
`python run_experiment.py --exp_mode mcts --task titanic --rollout 5`
|
||||
|
||||
#### Setup
|
||||
In the root directory,
|
||||
|
||||
```
|
||||
pip install -e .
|
||||
|
||||
cd expo
|
||||
|
||||
pip install -r requirements.txt
|
||||
```
|
||||
|
||||
#### Run
|
||||
|
||||
- `python run_experiment.py --exp_mode mcts --task titanic --rollout 5`
|
||||
|
||||
If the dataset has reg metric, remember to use `--low_is_better`:
|
||||
|
||||
- `python run_experiment.py --exp_mode mcts --task househouse_prices --rollout 5 --low_is_better`
|
||||
|
||||
## Custom Experimenter
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
## Code and Configs Explanation
|
||||
|
||||
`datasets.yaml` 提供数据集对应的指标和基础提示词
|
||||
|
||||
`data.yaml` 继承了`datasets.yaml`以及一些路径信息,需要将`datasets_dir`指到数据集合集的根目录下
|
||||
|
||||
完整的DI提示词参考`dataset.py`中的`generate_task_requirement`函数
|
||||
|
||||
|
||||
## Evaluation
|
||||
|
||||
`evaluation.py` 提供pred和原始的gt(1D iterable)以及需要使用的metric,返回evaluation score
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -4,22 +4,23 @@ datasets:
|
|||
titanic:
|
||||
dataset: 04_titanic
|
||||
metric: f1
|
||||
target_col: Survived
|
||||
user_requirement: "This is a 04_titanic dataset. Your goal is to predict the target\
|
||||
\ column `Survived`.\nPerform data analysis, data preprocessing, feature engineering,\
|
||||
\ and modeling to predict the target. \nReport f1 on the eval data. Do not plot\
|
||||
\ or make any visualizations.\n"
|
||||
|
||||
house_prices:
|
||||
house-prices:
|
||||
dataset: 05_house-prices-advanced-regression-techniques
|
||||
metric: rmse
|
||||
target_col: SalePrice
|
||||
user_requirement: "This is a 05_house-prices-advanced-regression-techniques dataset.\
|
||||
\ Your goal is to predict the target column `SalePrice`.\nPerform data analysis,\
|
||||
\ data preprocessing, feature engineering, and modeling to predict the target.\
|
||||
\ \nReport rmse on the eval data. Do not plot or make any visualizations.\n"
|
||||
|
||||
santander_customers:
|
||||
santander-customer:
|
||||
dataset: 06_santander-customer-transaction-prediction
|
||||
metric: f1
|
||||
target_col: target
|
||||
user_requirement: "This is a 06_santander-customer-transaction-prediction dataset.\
|
||||
\ Your goal is to predict the target column `target`.\nPerform data analysis,\
|
||||
\ data preprocessing, feature engineering, and modeling to predict the target.\
|
||||
|
|
@ -27,126 +28,127 @@ datasets:
|
|||
icr:
|
||||
dataset: 07_icr-identify-age-related-conditions
|
||||
metric: f1
|
||||
target_col: Class
|
||||
user_requirement: "This is a 07_icr-identify-age-related-conditions dataset. Your\
|
||||
\ goal is to predict the target column `Class`.\nPerform data analysis, data\
|
||||
\ preprocessing, feature engineering, and modeling to predict the target. \n\
|
||||
Report f1 on the eval data. Do not plot or make any visualizations.\n"
|
||||
|
||||
lick_prediction_small:
|
||||
Click_prediction_small:
|
||||
dataset: Click_prediction_small
|
||||
metric: f1
|
||||
target_col: click
|
||||
user_requirement: "This is a Click_prediction_small dataset. Your goal is to predict\
|
||||
\ the target column `click`.\nPerform data analysis, data preprocessing, feature\
|
||||
\ engineering, and modeling to predict the target. \nReport f1 on the eval data.\
|
||||
\ Do not plot or make any visualizations.\n"
|
||||
|
||||
GesturePhaseSegmentationProcessed:
|
||||
dataset: GesturePhaseSegmentationProcessed
|
||||
metric: f1 weighted
|
||||
target_col: Phase
|
||||
user_requirement: "This is a GesturePhaseSegmentationProcessed dataset. Your goal\
|
||||
\ is to predict the target column `Phase`.\nPerform data analysis, data preprocessing,\
|
||||
\ feature engineering, and modeling to predict the target. \nReport f1 weighted\
|
||||
\ on the eval data. Do not plot or make any visualizations.\n"
|
||||
|
||||
Moneyball:
|
||||
dataset: Moneyball
|
||||
metric: rmse
|
||||
target_col: RS
|
||||
user_requirement: "This is a Moneyball dataset. Your goal is to predict the target\
|
||||
\ column `RS`.\nPerform data analysis, data preprocessing, feature engineering,\
|
||||
\ and modeling to predict the target. \nReport rmse on the eval data. Do not\
|
||||
\ plot or make any visualizations.\n"
|
||||
|
||||
SAT11-HAND-runtime-regression:
|
||||
dataset: SAT11-HAND-runtime-regression
|
||||
metric: rmse
|
||||
target_col: runtime
|
||||
user_requirement: "This is a SAT11-HAND-runtime-regression dataset. Your goal\
|
||||
\ is to predict the target column `runtime`.\nPerform data analysis, data preprocessing,\
|
||||
\ feature engineering, and modeling to predict the target. \nReport rmse on\
|
||||
\ the eval data. Do not plot or make any visualizations.\n"
|
||||
|
||||
boston:
|
||||
dataset: boston
|
||||
metric: rmse
|
||||
target_col: MEDV
|
||||
user_requirement: "This is a boston dataset. Your goal is to predict the target\
|
||||
\ column `MEDV`.\nPerform data analysis, data preprocessing, feature engineering,\
|
||||
\ and modeling to predict the target. \nReport rmse on the eval data. Do not\
|
||||
\ plot or make any visualizations.\n"
|
||||
|
||||
colleges:
|
||||
dataset: colleges
|
||||
metric: rmse
|
||||
target_col: percent_pell_grant
|
||||
user_requirement: "This is a colleges dataset. Your goal is to predict the target\
|
||||
\ column `percent_pell_grant`.\nPerform data analysis, data preprocessing, feature\
|
||||
\ engineering, and modeling to predict the target. \nReport rmse on the eval\
|
||||
\ data. Do not plot or make any visualizations.\n"
|
||||
|
||||
credit-g:
|
||||
dataset: credit-g
|
||||
metric: f1
|
||||
target_col: class
|
||||
user_requirement: "This is a credit-g dataset. Your goal is to predict the target\
|
||||
\ column `class`.\nPerform data analysis, data preprocessing, feature engineering,\
|
||||
\ and modeling to predict the target. \nReport f1 on the eval data. Do not plot\
|
||||
\ or make any visualizations.\n"
|
||||
|
||||
diamonds:
|
||||
dataset: diamonds
|
||||
metric: rmse
|
||||
target_col: price
|
||||
user_requirement: "This is a diamonds dataset. Your goal is to predict the target\
|
||||
\ column `price`.\nPerform data analysis, data preprocessing, feature engineering,\
|
||||
\ and modeling to predict the target. \nReport rmse on the eval data. Do not\
|
||||
\ plot or make any visualizations.\n"
|
||||
|
||||
jasmine:
|
||||
dataset: jasmine
|
||||
metric: f1
|
||||
target_col: class
|
||||
user_requirement: "This is a jasmine dataset. Your goal is to predict the target\
|
||||
\ column `class`.\nPerform data analysis, data preprocessing, feature engineering,\
|
||||
\ and modeling to predict the target. \nReport f1 on the eval data. Do not plot\
|
||||
\ or make any visualizations.\n"
|
||||
|
||||
kc1:
|
||||
dataset: kc1
|
||||
metric: f1
|
||||
target_col: defects
|
||||
user_requirement: "This is a kc1 dataset. Your goal is to predict the target column\
|
||||
\ `defects`.\nPerform data analysis, data preprocessing, feature engineering,\
|
||||
\ and modeling to predict the target. \nReport f1 on the eval data. Do not plot\
|
||||
\ or make any visualizations.\n"
|
||||
|
||||
kick:
|
||||
dataset: kick
|
||||
metric: f1
|
||||
target_col: IsBadBuy
|
||||
user_requirement: "This is a kick dataset. Your goal is to predict the target\
|
||||
\ column `IsBadBuy`.\nPerform data analysis, data preprocessing, feature engineering,\
|
||||
\ and modeling to predict the target. \nReport f1 on the eval data. Do not plot\
|
||||
\ or make any visualizations.\n"
|
||||
|
||||
mfeat-factors:
|
||||
dataset: mfeat-factors
|
||||
metric: f1 weighted
|
||||
target_col: class
|
||||
user_requirement: "This is a mfeat-factors dataset. Your goal is to predict the\
|
||||
\ target column `class`.\nPerform data analysis, data preprocessing, feature\
|
||||
\ engineering, and modeling to predict the target. \nReport f1 weighted on the\
|
||||
\ eval data. Do not plot or make any visualizations.\n"
|
||||
|
||||
segment:
|
||||
dataset: segment
|
||||
metric: f1 weighted
|
||||
target_col: class
|
||||
user_requirement: "This is a segment dataset. Your goal is to predict the target\
|
||||
\ column `class`.\nPerform data analysis, data preprocessing, feature engineering,\
|
||||
\ and modeling to predict the target. \nReport f1 weighted on the eval data.\
|
||||
\ Do not plot or make any visualizations.\n"
|
||||
|
||||
steel-plates-fault:
|
||||
dataset: steel-plates-fault
|
||||
metric: f1 weighted
|
||||
target_col: target
|
||||
user_requirement: "This is a steel-plates-fault dataset. Your goal is to predict\
|
||||
\ the target column `target`.\nPerform data analysis, data preprocessing, feature\
|
||||
\ engineering, and modeling to predict the target. \nReport f1 weighted on the\
|
||||
\ eval data. Do not plot or make any visualizations.\n"
|
||||
|
||||
wine-quality-white:
|
||||
dataset: wine-quality-white
|
||||
metric: f1 weighted
|
||||
target_col: Class
|
||||
user_requirement: "This is a wine-quality-white dataset. Your goal is to predict\
|
||||
\ the target column `Class`.\nPerform data analysis, data preprocessing, feature\
|
||||
\ engineering, and modeling to predict the target. \nReport f1 weighted on the\
|
||||
|
|
|
|||
|
|
@ -21,13 +21,13 @@ TASK_PROMPT = """\
|
|||
1. Please do not leak the target label in any form during training.
|
||||
2. Dev and Test sets do not have the target column.
|
||||
3. You should perform transformations on all sets at the same step.
|
||||
4. If labels are transformed during training, they should be transformed back to the original format before saving the predictions.
|
||||
|
||||
## Saving Dev and Test Predictions
|
||||
1. Save the prediction results of BOTH the dev set and test set in `dev_predictions.csv` and `test_predictions.csv` respectively in the output directory.
|
||||
- Both files should contain a single column named `target` with the predicted values.
|
||||
2. Make sure the prediction results are in the same format as the target column in the training set.
|
||||
- The labels should be transformed back to the original format if any transformation was applied during training.
|
||||
- If the original target column was categorical or string, the predictions MUST be in the same format.
|
||||
|
||||
## Output Training Set Performance
|
||||
Make sure the performance of the model is printed in python in the last step even if it has been printed in the previous steps. The value should be a float number.
|
||||
|
|
@ -119,7 +119,8 @@ def create_dataset_dict(dataset):
|
|||
dataset_dict = {
|
||||
"dataset": dataset.name,
|
||||
"user_requirement": dataset.create_base_requirement(),
|
||||
"metric": dataset.get_metric()
|
||||
"metric": dataset.get_metric(),
|
||||
"target_col": dataset.target_col
|
||||
}
|
||||
return dataset_dict
|
||||
|
||||
|
|
@ -289,23 +290,24 @@ class OpenMLExpDataset(ExpDataset):
|
|||
# def __init__(self, name, dataset_dir, dataset_name, **kwargs):
|
||||
# super().__init__(name, dataset_dir, **kwargs)
|
||||
|
||||
|
||||
async def process_dataset(dataset, solution_designer, save_analysis_pool, datasets_dict):
|
||||
if save_analysis_pool:
|
||||
asyncio.run(solution_designer.generate_solutions(dataset.get_dataset_info(), dataset.name))
|
||||
dataset_dict = create_dataset_dict(dataset)
|
||||
datasets_dict["datasets"][dataset.name] = dataset_dict
|
||||
|
||||
if __name__ == "__main__":
|
||||
datasets_dir = "D:/work/automl/datasets"
|
||||
force_update = True
|
||||
force_update = False
|
||||
save_analysis_pool = False
|
||||
datasets_dict = {"datasets": {}}
|
||||
solution_designer = SolutionDesigner()
|
||||
for dataset_id in OPENML_DATASET_IDS:
|
||||
openml_dataset = OpenMLExpDataset("", datasets_dir, dataset_id, force_update=force_update)
|
||||
asyncio.run(solution_designer.generate_solutions(openml_dataset.get_dataset_info(), openml_dataset.name))
|
||||
dataset_dict = create_dataset_dict(openml_dataset)
|
||||
datasets_dict["datasets"][openml_dataset.name] = dataset_dict
|
||||
asyncio.run(process_dataset(openml_dataset, solution_designer, save_analysis_pool, datasets_dict))
|
||||
|
||||
for dataset_name, target_col in CUSTOM_DATASETS:
|
||||
custom_dataset = ExpDataset(dataset_name, datasets_dir, target_col=target_col, force_update=force_update)
|
||||
asyncio.run(solution_designer.generate_solutions(custom_dataset.get_dataset_info(), custom_dataset.name))
|
||||
dataset_dict = create_dataset_dict(custom_dataset)
|
||||
datasets_dict["datasets"][custom_dataset.name] = dataset_dict
|
||||
|
||||
asyncio.run(process_dataset(custom_dataset, solution_designer, save_analysis_pool, datasets_dict))
|
||||
|
||||
save_datasets_dict_to_yaml(datasets_dict)
|
||||
|
|
|
|||
|
|
@ -1,28 +1,32 @@
|
|||
datasets:
|
||||
04_titanic:
|
||||
titanic:
|
||||
dataset: 04_titanic
|
||||
metric: f1
|
||||
target_col: Survived
|
||||
user_requirement: "This is a 04_titanic dataset. Your goal is to predict the target\
|
||||
\ column `Survived`.\nPerform data analysis, data preprocessing, feature engineering,\
|
||||
\ and modeling to predict the target. \nReport f1 on the eval data. Do not plot\
|
||||
\ or make any visualizations.\n"
|
||||
05_house-prices-advanced-regression-techniques:
|
||||
house-prices:
|
||||
dataset: 05_house-prices-advanced-regression-techniques
|
||||
metric: rmse
|
||||
target_col: SalePrice
|
||||
user_requirement: "This is a 05_house-prices-advanced-regression-techniques dataset.\
|
||||
\ Your goal is to predict the target column `SalePrice`.\nPerform data analysis,\
|
||||
\ data preprocessing, feature engineering, and modeling to predict the target.\
|
||||
\ \nReport rmse on the eval data. Do not plot or make any visualizations.\n"
|
||||
06_santander-customer-transaction-prediction:
|
||||
santander-customer:
|
||||
dataset: 06_santander-customer-transaction-prediction
|
||||
metric: f1
|
||||
target_col: target
|
||||
user_requirement: "This is a 06_santander-customer-transaction-prediction dataset.\
|
||||
\ Your goal is to predict the target column `target`.\nPerform data analysis,\
|
||||
\ data preprocessing, feature engineering, and modeling to predict the target.\
|
||||
\ \nReport f1 on the eval data. Do not plot or make any visualizations.\n"
|
||||
07_icr-identify-age-related-conditions:
|
||||
icr:
|
||||
dataset: 07_icr-identify-age-related-conditions
|
||||
metric: f1
|
||||
target_col: Class
|
||||
user_requirement: "This is a 07_icr-identify-age-related-conditions dataset. Your\
|
||||
\ goal is to predict the target column `Class`.\nPerform data analysis, data\
|
||||
\ preprocessing, feature engineering, and modeling to predict the target. \n\
|
||||
|
|
@ -30,6 +34,7 @@ datasets:
|
|||
Click_prediction_small:
|
||||
dataset: Click_prediction_small
|
||||
metric: f1
|
||||
target_col: click
|
||||
user_requirement: "This is a Click_prediction_small dataset. Your goal is to predict\
|
||||
\ the target column `click`.\nPerform data analysis, data preprocessing, feature\
|
||||
\ engineering, and modeling to predict the target. \nReport f1 on the eval data.\
|
||||
|
|
@ -37,6 +42,7 @@ datasets:
|
|||
GesturePhaseSegmentationProcessed:
|
||||
dataset: GesturePhaseSegmentationProcessed
|
||||
metric: f1 weighted
|
||||
target_col: Phase
|
||||
user_requirement: "This is a GesturePhaseSegmentationProcessed dataset. Your goal\
|
||||
\ is to predict the target column `Phase`.\nPerform data analysis, data preprocessing,\
|
||||
\ feature engineering, and modeling to predict the target. \nReport f1 weighted\
|
||||
|
|
@ -44,6 +50,7 @@ datasets:
|
|||
Moneyball:
|
||||
dataset: Moneyball
|
||||
metric: rmse
|
||||
target_col: RS
|
||||
user_requirement: "This is a Moneyball dataset. Your goal is to predict the target\
|
||||
\ column `RS`.\nPerform data analysis, data preprocessing, feature engineering,\
|
||||
\ and modeling to predict the target. \nReport rmse on the eval data. Do not\
|
||||
|
|
@ -51,6 +58,7 @@ datasets:
|
|||
SAT11-HAND-runtime-regression:
|
||||
dataset: SAT11-HAND-runtime-regression
|
||||
metric: rmse
|
||||
target_col: runtime
|
||||
user_requirement: "This is a SAT11-HAND-runtime-regression dataset. Your goal\
|
||||
\ is to predict the target column `runtime`.\nPerform data analysis, data preprocessing,\
|
||||
\ feature engineering, and modeling to predict the target. \nReport rmse on\
|
||||
|
|
@ -58,6 +66,7 @@ datasets:
|
|||
boston:
|
||||
dataset: boston
|
||||
metric: rmse
|
||||
target_col: MEDV
|
||||
user_requirement: "This is a boston dataset. Your goal is to predict the target\
|
||||
\ column `MEDV`.\nPerform data analysis, data preprocessing, feature engineering,\
|
||||
\ and modeling to predict the target. \nReport rmse on the eval data. Do not\
|
||||
|
|
@ -65,6 +74,7 @@ datasets:
|
|||
colleges:
|
||||
dataset: colleges
|
||||
metric: rmse
|
||||
target_col: percent_pell_grant
|
||||
user_requirement: "This is a colleges dataset. Your goal is to predict the target\
|
||||
\ column `percent_pell_grant`.\nPerform data analysis, data preprocessing, feature\
|
||||
\ engineering, and modeling to predict the target. \nReport rmse on the eval\
|
||||
|
|
@ -72,6 +82,7 @@ datasets:
|
|||
credit-g:
|
||||
dataset: credit-g
|
||||
metric: f1
|
||||
target_col: class
|
||||
user_requirement: "This is a credit-g dataset. Your goal is to predict the target\
|
||||
\ column `class`.\nPerform data analysis, data preprocessing, feature engineering,\
|
||||
\ and modeling to predict the target. \nReport f1 on the eval data. Do not plot\
|
||||
|
|
@ -79,6 +90,7 @@ datasets:
|
|||
diamonds:
|
||||
dataset: diamonds
|
||||
metric: rmse
|
||||
target_col: price
|
||||
user_requirement: "This is a diamonds dataset. Your goal is to predict the target\
|
||||
\ column `price`.\nPerform data analysis, data preprocessing, feature engineering,\
|
||||
\ and modeling to predict the target. \nReport rmse on the eval data. Do not\
|
||||
|
|
@ -86,6 +98,7 @@ datasets:
|
|||
jasmine:
|
||||
dataset: jasmine
|
||||
metric: f1
|
||||
target_col: class
|
||||
user_requirement: "This is a jasmine dataset. Your goal is to predict the target\
|
||||
\ column `class`.\nPerform data analysis, data preprocessing, feature engineering,\
|
||||
\ and modeling to predict the target. \nReport f1 on the eval data. Do not plot\
|
||||
|
|
@ -93,6 +106,7 @@ datasets:
|
|||
kc1:
|
||||
dataset: kc1
|
||||
metric: f1
|
||||
target_col: defects
|
||||
user_requirement: "This is a kc1 dataset. Your goal is to predict the target column\
|
||||
\ `defects`.\nPerform data analysis, data preprocessing, feature engineering,\
|
||||
\ and modeling to predict the target. \nReport f1 on the eval data. Do not plot\
|
||||
|
|
@ -100,6 +114,7 @@ datasets:
|
|||
kick:
|
||||
dataset: kick
|
||||
metric: f1
|
||||
target_col: IsBadBuy
|
||||
user_requirement: "This is a kick dataset. Your goal is to predict the target\
|
||||
\ column `IsBadBuy`.\nPerform data analysis, data preprocessing, feature engineering,\
|
||||
\ and modeling to predict the target. \nReport f1 on the eval data. Do not plot\
|
||||
|
|
@ -107,6 +122,7 @@ datasets:
|
|||
mfeat-factors:
|
||||
dataset: mfeat-factors
|
||||
metric: f1 weighted
|
||||
target_col: class
|
||||
user_requirement: "This is a mfeat-factors dataset. Your goal is to predict the\
|
||||
\ target column `class`.\nPerform data analysis, data preprocessing, feature\
|
||||
\ engineering, and modeling to predict the target. \nReport f1 weighted on the\
|
||||
|
|
@ -114,6 +130,7 @@ datasets:
|
|||
segment:
|
||||
dataset: segment
|
||||
metric: f1 weighted
|
||||
target_col: class
|
||||
user_requirement: "This is a segment dataset. Your goal is to predict the target\
|
||||
\ column `class`.\nPerform data analysis, data preprocessing, feature engineering,\
|
||||
\ and modeling to predict the target. \nReport f1 weighted on the eval data.\
|
||||
|
|
@ -121,6 +138,7 @@ datasets:
|
|||
steel-plates-fault:
|
||||
dataset: steel-plates-fault
|
||||
metric: f1 weighted
|
||||
target_col: target
|
||||
user_requirement: "This is a steel-plates-fault dataset. Your goal is to predict\
|
||||
\ the target column `target`.\nPerform data analysis, data preprocessing, feature\
|
||||
\ engineering, and modeling to predict the target. \nReport f1 weighted on the\
|
||||
|
|
@ -128,6 +146,7 @@ datasets:
|
|||
wine-quality-white:
|
||||
dataset: wine-quality-white
|
||||
metric: f1 weighted
|
||||
target_col: Class
|
||||
user_requirement: "This is a wine-quality-white dataset. Your goal is to predict\
|
||||
\ the target column `Class`.\nPerform data analysis, data preprocessing, feature\
|
||||
\ engineering, and modeling to predict the target. \nReport f1 weighted on the\
|
||||
|
|
|
|||
|
|
@ -9,10 +9,12 @@ class CustomExperimenter(Experimenter):
|
|||
|
||||
def __init__(self, args, **kwargs):
|
||||
super().__init__(args, **kwargs)
|
||||
self.framework = kwargs["framework"]
|
||||
self.framework = kwargs["framework"] # todo
|
||||
self.task = kwargs.get("task", self.args.task)
|
||||
self.low_is_better = kwargs.get("low_is_better", self.args.low_is_better)
|
||||
self.name = kwargs.get("name", "")
|
||||
self.result_path = f"results/custom_{self.name}"
|
||||
self.state = create_initial_state(self.args.task, start_task_id=1, data_config=self.data_config, low_is_better=self.args.low_is_better, name="")
|
||||
self.state = create_initial_state(self.task, start_task_id=1, data_config=self.data_config, low_is_better=self.low_is_better, name=self.name)
|
||||
|
||||
async def run_experiment(self):
|
||||
user_requirement = self.state["requirement"]
|
||||
|
|
@ -30,6 +32,15 @@ class CustomExperimenter(Experimenter):
|
|||
}
|
||||
self.save_result(results)
|
||||
|
||||
def evaluate_pred_files(self, dev_pred_path, test_pred_path):
|
||||
dev_preds = pd.read_csv(dev_pred_path)["target"]
|
||||
test_preds = pd.read_csv(test_pred_path)["target"]
|
||||
score_dict = {
|
||||
"dev_score": self.evaluate_score(dev_preds, "dev"),
|
||||
"test_score": self.evaluate_score(test_preds, "test")
|
||||
}
|
||||
return score_dict
|
||||
|
||||
def evaluate_predictions(self, preds, split):
|
||||
metric = self.state["dataset_config"]["metric"]
|
||||
gt_path = os.path.join(self.state["datasets_dir"][f"{split}_target"])
|
||||
|
|
|
|||
|
|
@ -20,16 +20,24 @@ class Experimenter:
|
|||
async def run_experiment(self):
|
||||
state = create_initial_state(self.args.task, start_task_id=1, data_config=self.data_config, low_is_better=self.args.low_is_better, name="")
|
||||
user_requirement = state["requirement"]
|
||||
di = ResearchAssistant(node_id="0", use_reflection=self.args.reflection)
|
||||
await di.run(user_requirement)
|
||||
|
||||
score_dict = await di.get_score()
|
||||
score_dict = self.evaluate(score_dict, state)
|
||||
results = {
|
||||
"score_dict": score_dict,
|
||||
"user_requirement": user_requirement,
|
||||
"args": vars(self.args)
|
||||
}
|
||||
results = []
|
||||
|
||||
for i in range(self.args.num_experiments):
|
||||
di = ResearchAssistant(node_id="0", use_reflection=self.args.reflection)
|
||||
await di.run(user_requirement)
|
||||
score_dict = await di.get_score()
|
||||
score_dict = self.evaluate(score_dict, state)
|
||||
results.append({
|
||||
"idx": i,
|
||||
"score_dict": score_dict,
|
||||
"user_requirement": user_requirement,
|
||||
"args": vars(self.args)
|
||||
})
|
||||
scores = [result["score_dict"]["test_score"] for result in results]
|
||||
avg_score = sum(scores) / len(scores)
|
||||
best_score = max(scores) if not self.args.low_is_better else min(scores)
|
||||
best_score_idx = scores.index(best_score)
|
||||
results.insert(0, {"avg_score": avg_score, "best_score": best_score, "best_score_idx": best_score_idx})
|
||||
self.save_result(results)
|
||||
|
||||
def evaluate_prediction(self, split, state):
|
||||
|
|
|
|||
|
|
@ -22,18 +22,19 @@ class MCTSExperimenter(Experimenter):
|
|||
text += f"Best node: {best_node}, score: {best_node.raw_reward}\n"
|
||||
text += f"Dev best node: {dev_best_node}, score: {dev_best_node.raw_reward}\n"
|
||||
print(text)
|
||||
self.save_tree(text)
|
||||
if self.args.rollouts > 0:
|
||||
self.save_tree(text)
|
||||
|
||||
results = {
|
||||
"best_node": best_node.id,
|
||||
"best_node_score": best_node.raw_reward,
|
||||
"dev_best_node": dev_best_node.id,
|
||||
"dev_best_node_score": dev_best_node.raw_reward,
|
||||
"num_generated_codes": num_generated_codes,
|
||||
"user_requirement": best_node.state["requirement"],
|
||||
"args": vars(self.args)
|
||||
}
|
||||
self.save_result(results)
|
||||
results = {
|
||||
"best_node": best_node.id,
|
||||
"best_node_score": best_node.raw_reward,
|
||||
"dev_best_node": dev_best_node.id,
|
||||
"dev_best_node_score": dev_best_node.raw_reward,
|
||||
"num_generated_codes": num_generated_codes,
|
||||
"user_requirement": best_node.state["requirement"],
|
||||
"args": vars(self.args)
|
||||
}
|
||||
self.save_result(results)
|
||||
|
||||
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue