diff --git a/expo/MCTS.py b/expo/MCTS.py index 749850dd6..378474b4e 100644 --- a/expo/MCTS.py +++ b/expo/MCTS.py @@ -33,7 +33,9 @@ def create_initial_state( if args.custom_dataset_dir: dataset_config = None datasets_dir = args.custom_dataset_dir - requirement = get_mle_bench_requirements(args.custom_dataset_dir, data_config) + requirement = get_mle_bench_requirements( + args.custom_dataset_dir, data_config, special_instruction=special_instruction + ) exp_pool_path = None # external_eval = False # make sure external eval is false if custom dataset is used task = get_mle_task_id(args.custom_dataset_dir) @@ -309,7 +311,7 @@ class MCTS: node = random.choice(node.children) reward, result_dict = await node.run_node(role) mcts_logger.log("MCTS", f"Simulated node's reward: {reward}") - + # TODO: add new insights return reward def backpropagate(self, node: Node, reward): diff --git a/expo/README.md b/expo/README.md index 598de039d..5b913e415 100644 --- a/expo/README.md +++ b/expo/README.md @@ -6,7 +6,12 @@ # SELA: Tree-Search Enhanced LLM Agents for Automated Machine Learning ## 1. Data Preparation - Download Datasets:https://deepwisdom.feishu.cn/drive/folder/RVyofv9cvlvtxKdddt2cyn3BnTc?from=from_copylink - +- Download and prepare datasets from scratch: + ``` + cd expo/data + python dataset.py --save_analysis_pool + python hf_data.py --save_analysis_pool + ``` ## 2. Configs @@ -85,6 +90,23 @@ ## 4. Evaluation - Use the function `evaluate_score` to evaluate. +#### MLE-Bench +**Note: mle-bench requires python 3.11 or higher** +``` +git clone https://github.com/openai/mle-bench.git +cd mle-bench +pip install -e . +``` + +``` +mlebench prepare -c --data-dir +``` + +Enter the following command to run the experiment: +``` +python run_experiment.py --exp_mode mcts --custom_dataset_dir --rollouts 10 --from_scratch +``` + ## 5. Baselines ### DS Agent @@ -92,7 +114,7 @@ ### DS Agent git clone https://github.com/guosyjlu/DS-Agent.git ``` -将其deployment/generate.py line46-48行部分修改如下(目的是用deepseek而非GPT的API): +Modify the following lines in deployment/generate.py (lines 46-48) as shown below (the purpose is to use deepseek instead of OpenAI's API): ```python messages = [{"role": "user", "content": prompt}] @@ -120,7 +142,7 @@ ### DS Agent completion = raw_completion.split("```python")[1].split("```")[0] ``` -修改完后在新建一个`deployment/test.sh` 分别运行下列两行,`$TASK` 是你要测试的task name +After making the changes, create a new `deployment/test.sh` and run the following two lines separately, where `$TASK` is the name of the task you want to test ``` python -u generate.py --llm deepseek-coder --task $TASK --shot 1 --retrieval > "$TASK".txt 2>&1 @@ -135,7 +157,7 @@ #### Setup git clone https://github.com/WecoAI/aideml.git ``` -修改 `aideml/aide/utils/config.yaml` 内容如下 +Modify `aideml/aide/utils/config.yaml`: ```yaml # path to the task data directory @@ -192,14 +214,14 @@ # hyperparameters for the tree search num_drafts: 5 ``` -由于 deepseek 完全兼容 OpenAI 的 API,修改`base_url`为`自己的url`,`api_key`为`自己的key`即可 +Since Deepseek is compatible to OpenAI's API, change `base_url` into `your own url`,`api_key` into `your api key` ``` -export OPENAI_API_KEY="自己的key" -export OPENAI_BASE_URL="自己的url" +export OPENAI_API_KEY="your api key" +export OPENAI_BASE_URL="your own url" ``` -修改`aideml/aide/backend/__init__.py` 30 行内容如下: +Modify `aideml/aide/backend/__init__.py`'s line 30 and below: ```python model_kwargs = model_kwargs | { @@ -213,7 +235,7 @@ # hyperparameters for the tree search query_func = backend_openai.query ``` -由于 deepseekV2.5 不再支持 system message 使用 function call,修改 `aideml/aide/agent.py` 312 行内容如下: +Since deepseekV2.5 no longer supports system message using function call, modify `aideml/aide/agent.py`'s line 312: ```python response = cast( @@ -228,7 +250,7 @@ # hyperparameters for the tree search ) ``` -修改完后 +Modify and install: ``` cd aideml @@ -237,8 +259,8 @@ # hyperparameters for the tree search #### Run -运行下面脚本获取运行结果,在当前目录下将生成一个 log 文件夹以及 workspace 文件夹 -log 文件夹中将包含实验使用配置以及生成方案记录,workspace 文件夹下将保存 aide 最后生成的结果文件 +Run the following script to get the running results, a `log` folder and a `workspace` folder will be generated in the current directory +The `log` folder will contain the experimental configuration and the generated scheme, and the `workspace` folder will save the final results generated by aide ``` python experimenter/aide.py @@ -264,7 +286,6 @@ #### Setup Replace {task_name} with the specific task you want to run. -提供github链接,并说明使用的命令以及参数设置 ### AutoSklearn #### System requirements auto-sklearn has the following system requirements: @@ -295,15 +316,4 @@ ### Base DI For setup, check 4. - `python run_experiment.py --exp_mode base --task titanic --num_experiments 10` - Specifically instruct DI to use AutoGluon: `--special_instruction ag` -- Specifically instruct DI to use the stacking ensemble method: `--special_instruction stacking` - - - - - - - - - - - +- Specifically instruct DI to use the stacking ensemble method: `--special_instruction stacking` \ No newline at end of file diff --git a/expo/data/custom_task.py b/expo/data/custom_task.py index f66b4aa58..e904e9496 100644 --- a/expo/data/custom_task.py +++ b/expo/data/custom_task.py @@ -1,5 +1,6 @@ import os +from expo.data.dataset import SPECIAL_INSTRUCTIONS from expo.experimenter.mle_bench.instructions import ( ADDITIONAL_NOTES, INSTRUCTIONS, @@ -24,7 +25,7 @@ COMPETITION INSTRUCTIONS - Besides `submission.csv`, you should also save your output in the output directory. - You should split the training data into train and dev set. - Save the prediction results of BOTH the dev set and test set in `dev_predictions.csv` and `test_predictions.csv` respectively in the output directory. They should be in the same format as the `submission.csv`. -- Perform data analysis, data preprocessing, feature engineering, and modeling to predict the target. +- Perform data analysis, data preprocessing, feature engineering, and modeling to predict the target. {special_instruction} **Do not make any plots or visualizations.** """ @@ -33,12 +34,13 @@ def get_mle_task_id(dataset_dir): return dataset_dir.split("/")[-3] -def get_mle_bench_requirements(dataset_dir, data_config, obfuscated=False): +def get_mle_bench_requirements(dataset_dir, data_config, obfuscated=False, special_instruction=""): work_dir = data_config["work_dir"] task = get_mle_task_id(dataset_dir) output_dir = f"{work_dir}/{task}" final_output_dir = f"{work_dir}/submission" os.makedirs(output_dir, exist_ok=True) + special_instruction = SPECIAL_INSTRUCTIONS[special_instruction] if obfuscated: instructions = INSTRUCTIONS_OBFUSCATED.format(dataset_dir=dataset_dir, output_dir=final_output_dir) @@ -54,6 +56,7 @@ def get_mle_bench_requirements(dataset_dir, data_config, obfuscated=False): additonal_notes=ADDITIONAL_NOTES, task_description=task_description, output_dir=output_dir, + special_instruction=special_instruction, ) print(mle_requirement) return mle_requirement diff --git a/expo/utils.py b/expo/utils.py index f3381c91c..21b311e7f 100644 --- a/expo/utils.py +++ b/expo/utils.py @@ -111,7 +111,7 @@ async def load_execute_notebook(role): codes = [task.code for task in tasks if task.code] executor = role.execute_code executor.nb = nbformat.v4.new_notebook() - executor.nb_client = NotebookClient(executor.nb, timeout=executor.timeout) + executor.nb_client = NotebookClient(executor.nb, timeout=role.role_timeout) # await executor.build() for code in codes: outputs, success = await executor.run(code)