allow special-instruction for mle-bench

2026-06-11 15:15:18 +02:00 · 2024-10-15 14:14:29 +08:00 · 2024-10-15 14:14:29 +08:00 · d179982949
commit d179982949
parent 07800be441
4 changed files with 45 additions and 30 deletions
--- a/expo/MCTS.py
+++ b/expo/MCTS.py
@ -33,7 +33,9 @@ def create_initial_state(
    if args.custom_dataset_dir:
        dataset_config = None
        datasets_dir = args.custom_dataset_dir
-        requirement = get_mle_bench_requirements(args.custom_dataset_dir, data_config)
+        requirement = get_mle_bench_requirements(
+            args.custom_dataset_dir, data_config, special_instruction=special_instruction
+        )
        exp_pool_path = None
        # external_eval = False # make sure external eval is false if custom dataset is used
        task = get_mle_task_id(args.custom_dataset_dir)
@ -309,7 +311,7 @@ class MCTS:
            node = random.choice(node.children)
        reward, result_dict = await node.run_node(role)
        mcts_logger.log("MCTS", f"Simulated node's reward: {reward}")
-
+        # TODO: add new insights
        return reward

    def backpropagate(self, node: Node, reward):
--- a/expo/README.md
+++ b/expo/README.md
@ -6,7 +6,12 @@ # SELA: Tree-Search Enhanced LLM Agents for Automated Machine Learning
 ## 1. Data Preparation

 - Download Datasets：https://deepwisdom.feishu.cn/drive/folder/RVyofv9cvlvtxKdddt2cyn3BnTc?from=from_copylink
-
+- Download and prepare datasets from scratch:
+  ```
+  cd expo/data
+  python dataset.py --save_analysis_pool
+  python hf_data.py --save_analysis_pool
+  ```

 ## 2. Configs

@ -85,6 +90,23 @@ ## 4. Evaluation

 - Use the function `evaluate_score` to evaluate.

+#### MLE-Bench
+**Note: mle-bench requires python 3.11 or higher**
+```
+git clone https://github.com/openai/mle-bench.git
+cd mle-bench
+pip install -e .
+```
+
+```
+mlebench prepare -c <competition-id> --data-dir <dataset-dir-save-path>
+```
+
+Enter the following command to run the experiment:
+```
+python run_experiment.py --exp_mode mcts --custom_dataset_dir <dataset-dir-save-path/prepared/public> --rollouts 10 --from_scratch
+```
+

 ## 5. Baselines
 ### DS Agent
@ -92,7 +114,7 @@ ### DS Agent
 git clone https://github.com/guosyjlu/DS-Agent.git
 ```

-将其deployment/generate.py line46-48行部分修改如下（目的是用deepseek而非GPT的API）：
+Modify the following lines in deployment/generate.py (lines 46-48) as shown below (the purpose is to use deepseek instead of OpenAI's API):
 ```python
 messages = [{"role": "user", "content": prompt}]

@ -120,7 +142,7 @@ ### DS Agent
 completion = raw_completion.split("```python")[1].split("```")[0]
 ```

-修改完后在新建一个`deployment/test.sh` 分别运行下列两行，`$TASK` 是你要测试的task name
+After making the changes, create a new `deployment/test.sh` and run the following two lines separately, where `$TASK` is the name of the task you want to test
 ```
 python -u generate.py --llm deepseek-coder --task $TASK --shot 1 --retrieval > "$TASK".txt 2>&1 

@ -135,7 +157,7 @@ #### Setup
 git clone https://github.com/WecoAI/aideml.git
 ```

-修改 `aideml/aide/utils/config.yaml` 内容如下
+Modify `aideml/aide/utils/config.yaml`:

 ```yaml
 # path to the task data directory
@ -192,14 +214,14 @@   # hyperparameters for the tree search
    num_drafts: 5
 ```

-由于 deepseek 完全兼容 OpenAI 的 API，修改`base_url`为`自己的url`，`api_key`为`自己的key`即可
+Since Deepseek is compatible to OpenAI's API, change `base_url` into `your own url`，`api_key` into `your api key`

 ```
-export OPENAI_API_KEY="自己的key"
-export OPENAI_BASE_URL="自己的url"
+export OPENAI_API_KEY="your api key"
+export OPENAI_BASE_URL="your own url"
 ```

-修改`aideml/aide/backend/__init__.py` 30 行内容如下：
+Modify `aideml/aide/backend/__init__.py`'s line 30 and below:

 ```python
 model_kwargs = model_kwargs | {
@ -213,7 +235,7 @@   # hyperparameters for the tree search
        query_func = backend_openai.query
 ```

-由于 deepseekV2.5 不再支持 system message 使用 function call，修改 `aideml/aide/agent.py` 312 行内容如下：
+Since deepseekV2.5 no longer supports system message using function call, modify `aideml/aide/agent.py`'s line 312:

 ```python
 response = cast(
@ -228,7 +250,7 @@   # hyperparameters for the tree search
        )
 ```

-修改完后
+Modify and install:

 ```
 cd aideml
@ -237,8 +259,8 @@   # hyperparameters for the tree search

 #### Run

-运行下面脚本获取运行结果，在当前目录下将生成一个 log 文件夹以及 workspace 文件夹
-log 文件夹中将包含实验使用配置以及生成方案记录，workspace 文件夹下将保存 aide 最后生成的结果文件
+Run the following script to get the running results, a `log` folder and a `workspace` folder will be generated in the current directory
+The `log` folder will contain the experimental configuration and the generated scheme, and the `workspace` folder will save the final results generated by aide

 ```
 python experimenter/aide.py
@ -264,7 +286,6 @@ #### Setup
 Replace {task_name} with the specific task you want to run.


-提供github链接，并说明使用的命令以及参数设置
 ### AutoSklearn
 #### System requirements
 auto-sklearn has the following system requirements:
@ -295,15 +316,4 @@ ### Base DI
 For setup, check 4.
 - `python run_experiment.py --exp_mode base --task titanic --num_experiments 10`
 - Specifically instruct DI to use AutoGluon: `--special_instruction ag`
- Specifically instruct DI to use the stacking ensemble method: `--special_instruction stacking`
-
-
-
-
-
-
-
-
-
-
-
+- Specifically instruct DI to use the stacking ensemble method: `--special_instruction stacking`
--- a/expo/data/custom_task.py
+++ b/expo/data/custom_task.py
@ -1,5 +1,6 @@
 import os

+from expo.data.dataset import SPECIAL_INSTRUCTIONS
 from expo.experimenter.mle_bench.instructions import (
    ADDITIONAL_NOTES,
    INSTRUCTIONS,
@ -24,7 +25,7 @@ COMPETITION INSTRUCTIONS
 - Besides `submission.csv`, you should also save your output in the output directory.
 - You should split the training data into train and dev set.
 - Save the prediction results of BOTH the dev set and test set in `dev_predictions.csv` and `test_predictions.csv` respectively in the output directory. They should be in the same format as the `submission.csv`.
- Perform data analysis, data preprocessing, feature engineering, and modeling to predict the target. 
+- Perform data analysis, data preprocessing, feature engineering, and modeling to predict the target. {special_instruction}
 **Do not make any plots or visualizations.**
 """

@ -33,12 +34,13 @@ def get_mle_task_id(dataset_dir):
    return dataset_dir.split("/")[-3]


-def get_mle_bench_requirements(dataset_dir, data_config, obfuscated=False):
+def get_mle_bench_requirements(dataset_dir, data_config, obfuscated=False, special_instruction=""):
    work_dir = data_config["work_dir"]
    task = get_mle_task_id(dataset_dir)
    output_dir = f"{work_dir}/{task}"
    final_output_dir = f"{work_dir}/submission"
    os.makedirs(output_dir, exist_ok=True)
+    special_instruction = SPECIAL_INSTRUCTIONS[special_instruction]

    if obfuscated:
        instructions = INSTRUCTIONS_OBFUSCATED.format(dataset_dir=dataset_dir, output_dir=final_output_dir)
@ -54,6 +56,7 @@ def get_mle_bench_requirements(dataset_dir, data_config, obfuscated=False):
        additonal_notes=ADDITIONAL_NOTES,
        task_description=task_description,
        output_dir=output_dir,
+        special_instruction=special_instruction,
    )
    print(mle_requirement)
    return mle_requirement
--- a/expo/utils.py
+++ b/expo/utils.py
@ -111,7 +111,7 @@ async def load_execute_notebook(role):
    codes = [task.code for task in tasks if task.code]
    executor = role.execute_code
    executor.nb = nbformat.v4.new_notebook()
-    executor.nb_client = NotebookClient(executor.nb, timeout=executor.timeout)
+    executor.nb_client = NotebookClient(executor.nb, timeout=role.role_timeout)
    # await executor.build()
    for code in codes:
        outputs, success = await executor.run(code)