mirror of
https://github.com/FoundationAgents/MetaGPT.git
synced 2026-06-11 15:15:18 +02:00
allow special-instruction for mle-bench
This commit is contained in:
parent
07800be441
commit
d179982949
4 changed files with 45 additions and 30 deletions
|
|
@ -33,7 +33,9 @@ def create_initial_state(
|
|||
if args.custom_dataset_dir:
|
||||
dataset_config = None
|
||||
datasets_dir = args.custom_dataset_dir
|
||||
requirement = get_mle_bench_requirements(args.custom_dataset_dir, data_config)
|
||||
requirement = get_mle_bench_requirements(
|
||||
args.custom_dataset_dir, data_config, special_instruction=special_instruction
|
||||
)
|
||||
exp_pool_path = None
|
||||
# external_eval = False # make sure external eval is false if custom dataset is used
|
||||
task = get_mle_task_id(args.custom_dataset_dir)
|
||||
|
|
@ -309,7 +311,7 @@ class MCTS:
|
|||
node = random.choice(node.children)
|
||||
reward, result_dict = await node.run_node(role)
|
||||
mcts_logger.log("MCTS", f"Simulated node's reward: {reward}")
|
||||
|
||||
# TODO: add new insights
|
||||
return reward
|
||||
|
||||
def backpropagate(self, node: Node, reward):
|
||||
|
|
|
|||
|
|
@ -6,7 +6,12 @@ # SELA: Tree-Search Enhanced LLM Agents for Automated Machine Learning
|
|||
## 1. Data Preparation
|
||||
|
||||
- Download Datasets:https://deepwisdom.feishu.cn/drive/folder/RVyofv9cvlvtxKdddt2cyn3BnTc?from=from_copylink
|
||||
|
||||
- Download and prepare datasets from scratch:
|
||||
```
|
||||
cd expo/data
|
||||
python dataset.py --save_analysis_pool
|
||||
python hf_data.py --save_analysis_pool
|
||||
```
|
||||
|
||||
## 2. Configs
|
||||
|
||||
|
|
@ -85,6 +90,23 @@ ## 4. Evaluation
|
|||
|
||||
- Use the function `evaluate_score` to evaluate.
|
||||
|
||||
#### MLE-Bench
|
||||
**Note: mle-bench requires python 3.11 or higher**
|
||||
```
|
||||
git clone https://github.com/openai/mle-bench.git
|
||||
cd mle-bench
|
||||
pip install -e .
|
||||
```
|
||||
|
||||
```
|
||||
mlebench prepare -c <competition-id> --data-dir <dataset-dir-save-path>
|
||||
```
|
||||
|
||||
Enter the following command to run the experiment:
|
||||
```
|
||||
python run_experiment.py --exp_mode mcts --custom_dataset_dir <dataset-dir-save-path/prepared/public> --rollouts 10 --from_scratch
|
||||
```
|
||||
|
||||
|
||||
## 5. Baselines
|
||||
### DS Agent
|
||||
|
|
@ -92,7 +114,7 @@ ### DS Agent
|
|||
git clone https://github.com/guosyjlu/DS-Agent.git
|
||||
```
|
||||
|
||||
将其deployment/generate.py line46-48行部分修改如下(目的是用deepseek而非GPT的API):
|
||||
Modify the following lines in deployment/generate.py (lines 46-48) as shown below (the purpose is to use deepseek instead of OpenAI's API):
|
||||
```python
|
||||
messages = [{"role": "user", "content": prompt}]
|
||||
|
||||
|
|
@ -120,7 +142,7 @@ ### DS Agent
|
|||
completion = raw_completion.split("```python")[1].split("```")[0]
|
||||
```
|
||||
|
||||
修改完后在新建一个`deployment/test.sh` 分别运行下列两行,`$TASK` 是你要测试的task name
|
||||
After making the changes, create a new `deployment/test.sh` and run the following two lines separately, where `$TASK` is the name of the task you want to test
|
||||
```
|
||||
python -u generate.py --llm deepseek-coder --task $TASK --shot 1 --retrieval > "$TASK".txt 2>&1
|
||||
|
||||
|
|
@ -135,7 +157,7 @@ #### Setup
|
|||
git clone https://github.com/WecoAI/aideml.git
|
||||
```
|
||||
|
||||
修改 `aideml/aide/utils/config.yaml` 内容如下
|
||||
Modify `aideml/aide/utils/config.yaml`:
|
||||
|
||||
```yaml
|
||||
# path to the task data directory
|
||||
|
|
@ -192,14 +214,14 @@ # hyperparameters for the tree search
|
|||
num_drafts: 5
|
||||
```
|
||||
|
||||
由于 deepseek 完全兼容 OpenAI 的 API,修改`base_url`为`自己的url`,`api_key`为`自己的key`即可
|
||||
Since Deepseek is compatible to OpenAI's API, change `base_url` into `your own url`,`api_key` into `your api key`
|
||||
|
||||
```
|
||||
export OPENAI_API_KEY="自己的key"
|
||||
export OPENAI_BASE_URL="自己的url"
|
||||
export OPENAI_API_KEY="your api key"
|
||||
export OPENAI_BASE_URL="your own url"
|
||||
```
|
||||
|
||||
修改`aideml/aide/backend/__init__.py` 30 行内容如下:
|
||||
Modify `aideml/aide/backend/__init__.py`'s line 30 and below:
|
||||
|
||||
```python
|
||||
model_kwargs = model_kwargs | {
|
||||
|
|
@ -213,7 +235,7 @@ # hyperparameters for the tree search
|
|||
query_func = backend_openai.query
|
||||
```
|
||||
|
||||
由于 deepseekV2.5 不再支持 system message 使用 function call,修改 `aideml/aide/agent.py` 312 行内容如下:
|
||||
Since deepseekV2.5 no longer supports system message using function call, modify `aideml/aide/agent.py`'s line 312:
|
||||
|
||||
```python
|
||||
response = cast(
|
||||
|
|
@ -228,7 +250,7 @@ # hyperparameters for the tree search
|
|||
)
|
||||
```
|
||||
|
||||
修改完后
|
||||
Modify and install:
|
||||
|
||||
```
|
||||
cd aideml
|
||||
|
|
@ -237,8 +259,8 @@ # hyperparameters for the tree search
|
|||
|
||||
#### Run
|
||||
|
||||
运行下面脚本获取运行结果,在当前目录下将生成一个 log 文件夹以及 workspace 文件夹
|
||||
log 文件夹中将包含实验使用配置以及生成方案记录,workspace 文件夹下将保存 aide 最后生成的结果文件
|
||||
Run the following script to get the running results, a `log` folder and a `workspace` folder will be generated in the current directory
|
||||
The `log` folder will contain the experimental configuration and the generated scheme, and the `workspace` folder will save the final results generated by aide
|
||||
|
||||
```
|
||||
python experimenter/aide.py
|
||||
|
|
@ -264,7 +286,6 @@ #### Setup
|
|||
Replace {task_name} with the specific task you want to run.
|
||||
|
||||
|
||||
提供github链接,并说明使用的命令以及参数设置
|
||||
### AutoSklearn
|
||||
#### System requirements
|
||||
auto-sklearn has the following system requirements:
|
||||
|
|
@ -295,15 +316,4 @@ ### Base DI
|
|||
For setup, check 4.
|
||||
- `python run_experiment.py --exp_mode base --task titanic --num_experiments 10`
|
||||
- Specifically instruct DI to use AutoGluon: `--special_instruction ag`
|
||||
- Specifically instruct DI to use the stacking ensemble method: `--special_instruction stacking`
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
- Specifically instruct DI to use the stacking ensemble method: `--special_instruction stacking`
|
||||
|
|
@ -1,5 +1,6 @@
|
|||
import os
|
||||
|
||||
from expo.data.dataset import SPECIAL_INSTRUCTIONS
|
||||
from expo.experimenter.mle_bench.instructions import (
|
||||
ADDITIONAL_NOTES,
|
||||
INSTRUCTIONS,
|
||||
|
|
@ -24,7 +25,7 @@ COMPETITION INSTRUCTIONS
|
|||
- Besides `submission.csv`, you should also save your output in the output directory.
|
||||
- You should split the training data into train and dev set.
|
||||
- Save the prediction results of BOTH the dev set and test set in `dev_predictions.csv` and `test_predictions.csv` respectively in the output directory. They should be in the same format as the `submission.csv`.
|
||||
- Perform data analysis, data preprocessing, feature engineering, and modeling to predict the target.
|
||||
- Perform data analysis, data preprocessing, feature engineering, and modeling to predict the target. {special_instruction}
|
||||
**Do not make any plots or visualizations.**
|
||||
"""
|
||||
|
||||
|
|
@ -33,12 +34,13 @@ def get_mle_task_id(dataset_dir):
|
|||
return dataset_dir.split("/")[-3]
|
||||
|
||||
|
||||
def get_mle_bench_requirements(dataset_dir, data_config, obfuscated=False):
|
||||
def get_mle_bench_requirements(dataset_dir, data_config, obfuscated=False, special_instruction=""):
|
||||
work_dir = data_config["work_dir"]
|
||||
task = get_mle_task_id(dataset_dir)
|
||||
output_dir = f"{work_dir}/{task}"
|
||||
final_output_dir = f"{work_dir}/submission"
|
||||
os.makedirs(output_dir, exist_ok=True)
|
||||
special_instruction = SPECIAL_INSTRUCTIONS[special_instruction]
|
||||
|
||||
if obfuscated:
|
||||
instructions = INSTRUCTIONS_OBFUSCATED.format(dataset_dir=dataset_dir, output_dir=final_output_dir)
|
||||
|
|
@ -54,6 +56,7 @@ def get_mle_bench_requirements(dataset_dir, data_config, obfuscated=False):
|
|||
additonal_notes=ADDITIONAL_NOTES,
|
||||
task_description=task_description,
|
||||
output_dir=output_dir,
|
||||
special_instruction=special_instruction,
|
||||
)
|
||||
print(mle_requirement)
|
||||
return mle_requirement
|
||||
|
|
|
|||
|
|
@ -111,7 +111,7 @@ async def load_execute_notebook(role):
|
|||
codes = [task.code for task in tasks if task.code]
|
||||
executor = role.execute_code
|
||||
executor.nb = nbformat.v4.new_notebook()
|
||||
executor.nb_client = NotebookClient(executor.nb, timeout=executor.timeout)
|
||||
executor.nb_client = NotebookClient(executor.nb, timeout=role.role_timeout)
|
||||
# await executor.build()
|
||||
for code in codes:
|
||||
outputs, success = await executor.run(code)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue