From 1aac79c3b379c47b0b80ad8efa216d417b3494a2 Mon Sep 17 00:00:00 2001 From: Cyzus Chi Date: Mon, 28 Oct 2024 21:05:59 +0800 Subject: [PATCH 1/4] identation on readme --- metagpt/ext/sela/README.md | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/metagpt/ext/sela/README.md b/metagpt/ext/sela/README.md index c8df4eeba..3fa03ee08 100644 --- a/metagpt/ext/sela/README.md +++ b/metagpt/ext/sela/README.md @@ -179,25 +179,25 @@ # hyperparameters for the tree search "temperature": temperature, "max_tokens": max_tokens, } - if "claude-" in model: - query_func = backend_anthropic.query - else: - query_func = backend_openai.query +if "claude-" in model: + query_func = backend_anthropic.query +else: + query_func = backend_openai.query ``` Since deepseekV2.5 no longer supports system message using function call, modify `aideml/aide/agent.py`'s line 312: ```python response = cast( - dict, - query( - system_message=None, - user_message=prompt, - func_spec=review_func_spec, - model=self.acfg.feedback.model, - temperature=self.acfg.feedback.temp, - ), - ) + dict, + query( + system_message=None, + user_message=prompt, + func_spec=review_func_spec, + model=self.acfg.feedback.model, + temperature=self.acfg.feedback.temp, + ), +) ``` Modify and install: From e0cbbf82f437a525412a730436acd6923ca4e75d Mon Sep 17 00:00:00 2001 From: Cyzus Chi Date: Mon, 28 Oct 2024 21:42:46 +0800 Subject: [PATCH 2/4] rename research assistant to experimenter --- metagpt/ext/sela/README.md | 2 +- metagpt/ext/sela/data/custom_task.py | 2 +- ...{research_assistant.py => experimenter.py} | 0 metagpt/ext/sela/run_experiment.py | 30 +++++++++---------- .../sela/{experimenter => runner}/__init__.py | 0 .../ext/sela/{experimenter => runner}/aide.py | 0 .../{experimenter => runner}/autogluon.py | 4 +-- .../{experimenter => runner}/autosklearn.py | 4 +-- .../sela/{experimenter => runner}/custom.py | 4 +-- .../ext/sela/{experimenter => runner}/mcts.py | 4 +-- .../mle_bench/instructions.py | 4 +-- .../{experimenter => runner}/random_search.py | 6 ++-- .../experimenter.py => runner/runner.py} | 4 +-- metagpt/ext/sela/search/tree_search.py | 2 +- 14 files changed, 33 insertions(+), 33 deletions(-) rename metagpt/ext/sela/{research_assistant.py => experimenter.py} (100%) rename metagpt/ext/sela/{experimenter => runner}/__init__.py (100%) rename metagpt/ext/sela/{experimenter => runner}/aide.py (100%) rename metagpt/ext/sela/{experimenter => runner}/autogluon.py (98%) rename metagpt/ext/sela/{experimenter => runner}/autosklearn.py (96%) rename metagpt/ext/sela/{experimenter => runner}/custom.py (95%) rename metagpt/ext/sela/{experimenter => runner}/mcts.py (96%) rename metagpt/ext/sela/{experimenter => runner}/mle_bench/instructions.py (98%) rename metagpt/ext/sela/{experimenter => runner}/random_search.py (92%) rename metagpt/ext/sela/{experimenter/experimenter.py => runner/runner.py} (98%) diff --git a/metagpt/ext/sela/README.md b/metagpt/ext/sela/README.md index 3fa03ee08..829306e36 100644 --- a/metagpt/ext/sela/README.md +++ b/metagpt/ext/sela/README.md @@ -213,7 +213,7 @@ #### Run The `log` folder will contain the experimental configuration and the generated scheme, and the `workspace` folder will save the final results generated by aide ``` -python experimenter/aide.py +python runner/aide.py ``` ### Autogluon diff --git a/metagpt/ext/sela/data/custom_task.py b/metagpt/ext/sela/data/custom_task.py index 3371d5b1c..08a7cbabb 100644 --- a/metagpt/ext/sela/data/custom_task.py +++ b/metagpt/ext/sela/data/custom_task.py @@ -1,7 +1,7 @@ import os from metagpt.ext.sela.data.dataset import SPECIAL_INSTRUCTIONS -from metagpt.ext.sela.experimenter.mle_bench.instructions import ( +from metagpt.ext.sela.runner.mle_bench.instructions import ( ADDITIONAL_NOTES, INSTRUCTIONS, INSTRUCTIONS_OBFUSCATED, diff --git a/metagpt/ext/sela/research_assistant.py b/metagpt/ext/sela/experimenter.py similarity index 100% rename from metagpt/ext/sela/research_assistant.py rename to metagpt/ext/sela/experimenter.py diff --git a/metagpt/ext/sela/run_experiment.py b/metagpt/ext/sela/run_experiment.py index 4cced19c3..32130a6fb 100644 --- a/metagpt/ext/sela/run_experiment.py +++ b/metagpt/ext/sela/run_experiment.py @@ -2,12 +2,12 @@ import argparse import asyncio from metagpt.ext.sela.data.custom_task import get_mle_is_lower_better, get_mle_task_id -from metagpt.ext.sela.experimenter.autogluon import GluonExperimenter -from metagpt.ext.sela.experimenter.autosklearn import AutoSklearnExperimenter -from metagpt.ext.sela.experimenter.custom import CustomExperimenter -from metagpt.ext.sela.experimenter.experimenter import Experimenter -from metagpt.ext.sela.experimenter.mcts import MCTSExperimenter -from metagpt.ext.sela.experimenter.random_search import RandomSearchExperimenter +from metagpt.ext.sela.runner.autogluon import GluonRunner +from metagpt.ext.sela.runner.autosklearn import AutoSklearnRunner +from metagpt.ext.sela.runner.custom import CustomRunner +from metagpt.ext.sela.runner.mcts import MCTSRunner +from metagpt.ext.sela.runner.random_search import RandomSearchRunner +from metagpt.ext.sela.runner.runner import Runner def get_args(cmd=True): @@ -74,24 +74,24 @@ def get_di_args(parser): async def main(args): if args.exp_mode == "mcts": - experimenter = MCTSExperimenter(args) + runner = MCTSRunner(args) elif args.exp_mode == "greedy": - experimenter = MCTSExperimenter(args, tree_mode="greedy") + runner = MCTSRunner(args, tree_mode="greedy") elif args.exp_mode == "random": - experimenter = MCTSExperimenter(args, tree_mode="random") + runner = MCTSRunner(args, tree_mode="random") elif args.exp_mode == "rs": - experimenter = RandomSearchExperimenter(args) + runner = RandomSearchRunner(args) elif args.exp_mode == "base": - experimenter = Experimenter(args) + runner = Runner(args) elif args.exp_mode == "autogluon": - experimenter = GluonExperimenter(args) + runner = GluonRunner(args) elif args.exp_mode == "custom": - experimenter = CustomExperimenter(args) + runner = CustomRunner(args) elif args.exp_mode == "autosklearn": - experimenter = AutoSklearnExperimenter(args) + runner = AutoSklearnRunner(args) else: raise ValueError(f"Invalid exp_mode: {args.exp_mode}") - await experimenter.run_experiment() + await runner.run_experiment() if __name__ == "__main__": diff --git a/metagpt/ext/sela/experimenter/__init__.py b/metagpt/ext/sela/runner/__init__.py similarity index 100% rename from metagpt/ext/sela/experimenter/__init__.py rename to metagpt/ext/sela/runner/__init__.py diff --git a/metagpt/ext/sela/experimenter/aide.py b/metagpt/ext/sela/runner/aide.py similarity index 100% rename from metagpt/ext/sela/experimenter/aide.py rename to metagpt/ext/sela/runner/aide.py diff --git a/metagpt/ext/sela/experimenter/autogluon.py b/metagpt/ext/sela/runner/autogluon.py similarity index 98% rename from metagpt/ext/sela/experimenter/autogluon.py rename to metagpt/ext/sela/runner/autogluon.py index f547ce4ba..48737da04 100644 --- a/metagpt/ext/sela/experimenter/autogluon.py +++ b/metagpt/ext/sela/runner/autogluon.py @@ -3,7 +3,7 @@ from datetime import datetime import pandas as pd -from metagpt.ext.sela.experimenter.custom import CustomExperimenter +from metagpt.ext.sela.runner.custom import CustomRunner class AGRunner: @@ -102,7 +102,7 @@ class AGRunner: return train_data, dev_data, dev_wo_target_data, test_data -class GluonExperimenter(CustomExperimenter): +class GluonRunner(CustomRunner): result_path: str = "results/autogluon" def __init__(self, args, **kwargs): diff --git a/metagpt/ext/sela/experimenter/autosklearn.py b/metagpt/ext/sela/runner/autosklearn.py similarity index 96% rename from metagpt/ext/sela/experimenter/autosklearn.py rename to metagpt/ext/sela/runner/autosklearn.py index f6ff267e7..7d0eb364e 100644 --- a/metagpt/ext/sela/experimenter/autosklearn.py +++ b/metagpt/ext/sela/runner/autosklearn.py @@ -4,7 +4,7 @@ from functools import partial import pandas as pd from metagpt.ext.sela.evaluation.evaluation import evaluate_score -from metagpt.ext.sela.experimenter.custom import CustomExperimenter +from metagpt.ext.sela.runner.custom import CustomRunner def custom_scorer(y_true, y_pred, metric_name): @@ -69,7 +69,7 @@ class ASRunner: return {"test_preds": test_preds, "dev_preds": dev_preds} -class AutoSklearnExperimenter(CustomExperimenter): +class AutoSklearnRunner(CustomRunner): result_path: str = "results/autosklearn" def __init__(self, args, **kwargs): diff --git a/metagpt/ext/sela/experimenter/custom.py b/metagpt/ext/sela/runner/custom.py similarity index 95% rename from metagpt/ext/sela/experimenter/custom.py rename to metagpt/ext/sela/runner/custom.py index 70df1a78e..e9a8ee276 100644 --- a/metagpt/ext/sela/experimenter/custom.py +++ b/metagpt/ext/sela/runner/custom.py @@ -3,11 +3,11 @@ import os import pandas as pd from metagpt.ext.sela.evaluation.evaluation import evaluate_score -from metagpt.ext.sela.experimenter.experimenter import Experimenter +from metagpt.ext.sela.runner.runner import Runner from metagpt.ext.sela.search.tree_search import create_initial_state -class CustomExperimenter(Experimenter): +class CustomRunner(Runner): result_path: str = "results/custom" def __init__(self, args, **kwargs): diff --git a/metagpt/ext/sela/experimenter/mcts.py b/metagpt/ext/sela/runner/mcts.py similarity index 96% rename from metagpt/ext/sela/experimenter/mcts.py rename to metagpt/ext/sela/runner/mcts.py index 9fd66121d..8b6c14100 100644 --- a/metagpt/ext/sela/experimenter/mcts.py +++ b/metagpt/ext/sela/runner/mcts.py @@ -5,11 +5,11 @@ from metagpt.ext.sela.evaluation.evaluation import ( node_evaluate_score_sela, ) from metagpt.ext.sela.evaluation.visualize_mcts import get_tree_text -from metagpt.ext.sela.experimenter.experimenter import Experimenter +from metagpt.ext.sela.runner.runner import Runner from metagpt.ext.sela.search.search_algorithm import MCTS, Greedy, Random -class MCTSExperimenter(Experimenter): +class MCTSRunner(Runner): result_path: str = "results/mcts" def __init__(self, args, tree_mode=None, **kwargs): diff --git a/metagpt/ext/sela/experimenter/mle_bench/instructions.py b/metagpt/ext/sela/runner/mle_bench/instructions.py similarity index 98% rename from metagpt/ext/sela/experimenter/mle_bench/instructions.py rename to metagpt/ext/sela/runner/mle_bench/instructions.py index 5c9c7b70f..136726bcf 100644 --- a/metagpt/ext/sela/experimenter/mle_bench/instructions.py +++ b/metagpt/ext/sela/runner/mle_bench/instructions.py @@ -8,7 +8,7 @@ You will be given a machine learning task. You must solve the task by training a BENCHMARK INSTRUCTIONS ------ -First, please read the instructions below which are specific to our benchmark setup. Note, these instructions can be found again in `experimenter/mle_bench/instructions.py`. +First, please read the instructions below which are specific to our benchmark setup. Note, these instructions can be found again in `runner/mle_bench/instructions.py`. - **Competition-specific instructions**: The instructions for the competition are available in `{dataset_dir}/description.md`. Read this file to understand the problem setup for the competition you are currently solving. - **Dataset**: The dataset files for the competition are available in `{dataset_dir}`. @@ -28,7 +28,7 @@ You will be given a machine learning task. You must solve the task by training a BENCHMARK INSTRUCTIONS ------ -First, please read the instructions below which are specific to our benchmark setup. Note, these instructions can be found again in `experimenter/mle_bench/instructions.py`. +First, please read the instructions below which are specific to our benchmark setup. Note, these instructions can be found again in `runner/mle_bench/instructions.py`. - **Task-specific instructions**: The instructions for the task are available in `{dataset_dir}/description.md`. Read this file to understand the problem setup for the task you are currently solving. - **Dataset**: The dataset files for the task are available in `{dataset_dir}/`. diff --git a/metagpt/ext/sela/experimenter/random_search.py b/metagpt/ext/sela/runner/random_search.py similarity index 92% rename from metagpt/ext/sela/experimenter/random_search.py rename to metagpt/ext/sela/runner/random_search.py index 5617ee601..8ce42f0ff 100644 --- a/metagpt/ext/sela/experimenter/random_search.py +++ b/metagpt/ext/sela/runner/random_search.py @@ -1,6 +1,6 @@ -from metagpt.ext.sela.experimenter.experimenter import Experimenter +from metagpt.ext.sela.experimenter import ResearchAssistant from metagpt.ext.sela.insights.instruction_generator import InstructionGenerator -from metagpt.ext.sela.research_assistant import ResearchAssistant +from metagpt.ext.sela.runner.runner import Runner from metagpt.ext.sela.utils import get_exp_pool_path EXPS_PROMPT = """ @@ -10,7 +10,7 @@ When doing the tasks, you can refer to the insights below: """ -class RandomSearchExperimenter(Experimenter): +class RandomSearchRunner(Runner): result_path: str = "results/random_search" async def run_experiment(self): diff --git a/metagpt/ext/sela/experimenter/experimenter.py b/metagpt/ext/sela/runner/runner.py similarity index 98% rename from metagpt/ext/sela/experimenter/experimenter.py rename to metagpt/ext/sela/runner/runner.py index 3df46b74b..7ab83c6c3 100644 --- a/metagpt/ext/sela/experimenter/experimenter.py +++ b/metagpt/ext/sela/runner/runner.py @@ -6,12 +6,12 @@ import numpy as np import pandas as pd from metagpt.ext.sela.evaluation.evaluation import evaluate_score -from metagpt.ext.sela.research_assistant import ResearchAssistant +from metagpt.ext.sela.experimenter import ResearchAssistant from metagpt.ext.sela.search.tree_search import create_initial_state from metagpt.ext.sela.utils import DATA_CONFIG, save_notebook -class Experimenter: +class Runner: result_path: str = "results/base" data_config = DATA_CONFIG start_task_id = 1 diff --git a/metagpt/ext/sela/search/tree_search.py b/metagpt/ext/sela/search/tree_search.py index cde8dc82a..684426fe6 100644 --- a/metagpt/ext/sela/search/tree_search.py +++ b/metagpt/ext/sela/search/tree_search.py @@ -15,8 +15,8 @@ from metagpt.ext.sela.data.dataset import ( get_split_dataset_path, ) from metagpt.ext.sela.evaluation.evaluation import evaluate_score +from metagpt.ext.sela.experimenter import ResearchAssistant, TimeoutException from metagpt.ext.sela.insights.instruction_generator import InstructionGenerator -from metagpt.ext.sela.research_assistant import ResearchAssistant, TimeoutException from metagpt.ext.sela.utils import get_exp_pool_path, load_execute_notebook, mcts_logger from metagpt.tools.tool_recommend import ToolRecommender from metagpt.utils.common import read_json_file From 25299e1f127b9fa3baa04ac057122375a5bde6ee Mon Sep 17 00:00:00 2001 From: Cyzus Chi Date: Tue, 29 Oct 2024 14:24:38 +0800 Subject: [PATCH 3/4] change research assistant to experimenter --- metagpt/ext/sela/data.yaml | 2 +- metagpt/ext/sela/experimenter.py | 4 ++-- metagpt/ext/sela/runner/random_search.py | 6 ++---- metagpt/ext/sela/runner/runner.py | 6 ++---- metagpt/ext/sela/search/tree_search.py | 12 ++++++------ 5 files changed, 13 insertions(+), 17 deletions(-) diff --git a/metagpt/ext/sela/data.yaml b/metagpt/ext/sela/data.yaml index 5f4a290ea..7da5dbb3c 100644 --- a/metagpt/ext/sela/data.yaml +++ b/metagpt/ext/sela/data.yaml @@ -1,3 +1,3 @@ datasets_dir: "path/to/datasets" # path to the datasets directory -work_dir: ../../workspace # path to the workspace directory +work_dir: ../../../workspace # path to the workspace directory role_dir: storage/SELA # path to the role directory \ No newline at end of file diff --git a/metagpt/ext/sela/experimenter.py b/metagpt/ext/sela/experimenter.py index 2c698c1d2..b05ea2fc3 100644 --- a/metagpt/ext/sela/experimenter.py +++ b/metagpt/ext/sela/experimenter.py @@ -60,7 +60,7 @@ def async_timeout(): return decorator -class ResearchAssistant(DataInterpreter): +class Experimenter(DataInterpreter): node_id: str = "0" start_task_id: int = 1 state_saved: bool = False @@ -78,7 +78,7 @@ class ResearchAssistant(DataInterpreter): self.planner.plan.task_map[str(self.start_task_id)].instruction = new_instruction self.remap_tasks() - def update_til_start_task(self, role: ResearchAssistant, backward: bool = True): + def update_til_start_task(self, role: Experimenter, backward: bool = True): if backward: # make sure the previous task instructions are matched assert ( diff --git a/metagpt/ext/sela/runner/random_search.py b/metagpt/ext/sela/runner/random_search.py index 8ce42f0ff..b1f43ac0c 100644 --- a/metagpt/ext/sela/runner/random_search.py +++ b/metagpt/ext/sela/runner/random_search.py @@ -1,4 +1,4 @@ -from metagpt.ext.sela.experimenter import ResearchAssistant +from metagpt.ext.sela.experimenter import Experimenter from metagpt.ext.sela.insights.instruction_generator import InstructionGenerator from metagpt.ext.sela.runner.runner import Runner from metagpt.ext.sela.utils import get_exp_pool_path @@ -34,9 +34,7 @@ class RandomSearchRunner(Runner): results = [] for i in range(self.args.num_experiments): - di = ResearchAssistant( - node_id=str(i), use_reflection=self.args.reflection, role_timeout=self.args.role_timeout - ) + di = Experimenter(node_id=str(i), use_reflection=self.args.reflection, role_timeout=self.args.role_timeout) di.role_dir = f"{di.role_dir}_{self.args.task}" requirement = user_requirement + EXPS_PROMPT.format(experience=exps[i]) print(requirement) diff --git a/metagpt/ext/sela/runner/runner.py b/metagpt/ext/sela/runner/runner.py index 7ab83c6c3..4b5504e09 100644 --- a/metagpt/ext/sela/runner/runner.py +++ b/metagpt/ext/sela/runner/runner.py @@ -6,7 +6,7 @@ import numpy as np import pandas as pd from metagpt.ext.sela.evaluation.evaluation import evaluate_score -from metagpt.ext.sela.experimenter import ResearchAssistant +from metagpt.ext.sela.experimenter import Experimenter from metagpt.ext.sela.search.tree_search import create_initial_state from metagpt.ext.sela.utils import DATA_CONFIG, save_notebook @@ -83,9 +83,7 @@ class Runner: results = [] for i in range(self.args.num_experiments): - di = ResearchAssistant( - node_id="0", use_reflection=self.args.reflection, role_timeout=self.args.role_timeout - ) + di = Experimenter(node_id="0", use_reflection=self.args.reflection, role_timeout=self.args.role_timeout) score_dict = await self.run_di(di, user_requirement, run_idx=i) results.append( {"idx": i, "score_dict": score_dict, "user_requirement": user_requirement, "args": vars(self.args)} diff --git a/metagpt/ext/sela/search/tree_search.py b/metagpt/ext/sela/search/tree_search.py index 684426fe6..eac26c86c 100644 --- a/metagpt/ext/sela/search/tree_search.py +++ b/metagpt/ext/sela/search/tree_search.py @@ -15,7 +15,7 @@ from metagpt.ext.sela.data.dataset import ( get_split_dataset_path, ) from metagpt.ext.sela.evaluation.evaluation import evaluate_score -from metagpt.ext.sela.experimenter import ResearchAssistant, TimeoutException +from metagpt.ext.sela.experimenter import Experimenter, TimeoutException from metagpt.ext.sela.insights.instruction_generator import InstructionGenerator from metagpt.ext.sela.utils import get_exp_pool_path, load_execute_notebook, mcts_logger from metagpt.tools.tool_recommend import ToolRecommender @@ -44,9 +44,9 @@ def initialize_di_root_node(state: dict, reflection: bool = True): reflection (bool, optional): Whether to use reflection. Defaults to True. Returns: - tuple: A tuple containing the ResearchAssistant role and the root Node. + tuple: A tuple containing the Experimenter role and the root Node. """ - role = ResearchAssistant( + role = Experimenter( node_id="0", start_task_id=state["start_task_id"], use_reflection=reflection, @@ -204,14 +204,14 @@ class Node: role_dict["tool_recommender"] = ToolRecommender() elif isinstance(role_dict.get("tool_recommender", {}).get("tools"), dict): role_dict["tool_recommender"]["tools"] = list(role_dict["tool_recommender"]["tools"].keys()) - role = ResearchAssistant(**role_dict) + role = Experimenter(**role_dict) if self.parent is not None: # TODO: Check this parent_role = self.parent.load_role() role.update_til_start_task(parent_role, backward=False) role.remap_tasks() return role - def save_new_role(self, role: ResearchAssistant): + def save_new_role(self, role: Experimenter): role.node_id = self.id role.start_task_id = self.state["start_task_id"] role.state_saved = False @@ -268,7 +268,7 @@ class Node: self.get_and_move_predictions("test") return score_dict - async def run_node(self, role: ResearchAssistant = None): + async def run_node(self, role: Experimenter = None): if self.is_terminal() and role is not None: if role.state_saved: return self.raw_reward From 37698b3f636d7d2ffdd8f8f754084b99b81b2158 Mon Sep 17 00:00:00 2001 From: Cyzus Chi Date: Tue, 29 Oct 2024 14:55:39 +0800 Subject: [PATCH 4/4] update readme - put baseline readme in /runner --- metagpt/ext/sela/README.md | 271 +++++------------------------- metagpt/ext/sela/runner/README.md | 198 ++++++++++++++++++++++ 2 files changed, 242 insertions(+), 227 deletions(-) create mode 100644 metagpt/ext/sela/runner/README.md diff --git a/metagpt/ext/sela/README.md b/metagpt/ext/sela/README.md index 829306e36..a942fdb7d 100644 --- a/metagpt/ext/sela/README.md +++ b/metagpt/ext/sela/README.md @@ -1,29 +1,26 @@ # SELA: Tree-Search Enhanced LLM Agents for Automated Machine Learning - - ## 1. Data Preparation -- Download Datasets:https://deepwisdom.feishu.cn/drive/folder/RVyofv9cvlvtxKdddt2cyn3BnTc?from=from_copylink -- Download and prepare datasets from scratch: -``` -cd data -python dataset.py --save_analysis_pool -python hf_data.py --save_analysis_pool -``` +You can either download the datasets from the link or prepare the datasets from scratch. +- **Download Datasets:** [Dataset Link](https://deepwisdom.feishu.cn/drive/folder/RVyofv9cvlvtxKdddt2cyn3BnTc?from=from_copylink) +- **Download and prepare datasets from scratch:** + ```bash + cd data + python dataset.py --save_analysis_pool + python hf_data.py --save_analysis_pool + ``` -## 2. Configs +## 2. Configurations ### Data Config -`datasets.yaml` Provide base prompts, metrics, target columns for respective datasets - -- Modify `datasets_dir` to the root directory of all the datasets in `data.yaml` - +- **`datasets.yaml`:** Provide base prompts, metrics, and target columns for respective datasets. +- **`data.yaml`:** Modify `datasets_dir` to the base directory of all prepared datasets. ### LLM Config -``` +```yaml llm: api_type: 'openai' model: deepseek-coder @@ -32,237 +29,57 @@ ### LLM Config temperature: 0.5 ``` -### Budget -Experiment rollouts k = 5, 10, 20 - - -### Prompt Usage - -- Use the function `generate_task_requirement` in `dataset.py` to get task requirement. - - If the method is non-DI-based, set `is_di=False`. - - Use `utils.DATA_CONFIG` as `data_config` - ## 3. SELA ### Run SELA #### Setup -In the root directory, -``` +```bash pip install -e . -cd expo +cd metagpt/ext/sela pip install -r requirements.txt ``` -#### Run +#### Running Experiments -- Examples - ``` - python run_experiment.py --exp_mode mcts --task titanic --rollouts 10 - python run_experiment.py --exp_mode mcts --task house-prices --rollouts 10 --low_is_better - ``` - - -- `--rollouts` - The number of rollouts - -- `--use_fixed_insights` - In addition to the generated insights, include the fixed insights saved in `expo/insights/fixed_insights.json` - -- `--low_is_better` - If the dataset has reg metric, remember to use `--low_is_better` - -- `--from_scratch` - Do not use pre-processed insight pool, generate new insight pool based on dataset before running MCTS, facilitating subsequent tuning to propose search space prompts - -- `--role_timeout` - The timeout for the role - - This feature limits the duration of a single simulation, making the experiment duration more controllable (for example, if you do ten rollouts and set role_timeout to 1,000, the experiment will stop at the latest after 10,000s) - - -- `--max_depth` - The maximum depth of MCTS, default is 4 (nodes at this depth directly return the previous simulation result without further expansion) - -- `--load_tree` - If MCTS was interrupted due to certain reasons but had already run multiple rollouts, you can use `--load_tree`. - - For example: - ``` +- **Examples:** + ```bash python run_experiment.py --exp_mode mcts --task titanic --rollouts 10 - ``` - - If this was interrupted after running three rollouts, you can use `--load_tree`: - ``` - python run_experiment.py --exp_mode mcts --task titanic --rollouts 7 --load_tree + python run_experiment.py --exp_mode mcts --task house-prices --rollouts 10 --low_is_better ``` +#### Parameters -#### Ablation Study +- **`--rollouts`:** The number of rollouts. +- **`--use_fixed_insights`:** Include fixed insights saved in `expo/insights/fixed_insights.json`. +- **`--low_is_better`:** Use this if the dataset has a regression metric. +- **`--from_scratch`:** Generate a new insight pool based on the dataset before running MCTS. +- **`--role_timeout`:** Limits the duration of a single simulation (e.g., `10 rollouts with timeout 1,000` = max 10,000s). +- **`--max_depth`:** Set the maximum depth of MCTS (default is 4). +- **`--load_tree`:** Load an existing MCTS tree if the previous experiment was interrupted. + - Example: + ```bash + python run_experiment.py --exp_mode mcts --task titanic --rollouts 10 + ``` + - To resume: + ```bash + python run_experiment.py --exp_mode mcts --task titanic --rollouts 7 --load_tree + ``` -**DI RandomSearch** +### Ablation Study -- Single insight -`python run_experiment.py --exp_mode rs --task titanic --rs_mode single` +**RandomSearch** -- Set insight -`python run_experiment.py --exp_mode rs --task titanic --rs_mode set` +- **Use a single insight:** + ```bash + python run_experiment.py --exp_mode rs --task titanic --rs_mode single + ``` - -## 4. Evaluation - -Each baseline needs to produce `dev_predictions.csv`和`test_predictions.csv`. Each csv file only needs a `target` column. - -- Use the function `evaluate_score` to evaluate. - -#### MLE-Bench -**Note: mle-bench requires python 3.11 or higher** -``` -git clone https://github.com/openai/mle-bench.git -cd mle-bench -pip install -e . -``` - -``` -mlebench prepare -c --data-dir -``` - -Enter the following command to run the experiment: -``` -python run_experiment.py --exp_mode mcts --custom_dataset_dir --rollouts 10 --from_scratch --role_timeout 3600 -``` - - -## 5. Baselines - -### AIDE - -#### Setup -The version of AIDE we use is dated September 30, 2024 -``` -git clone https://github.com/WecoAI/aideml.git -git checkout 77953247ea0a5dc1bd502dd10939dd6d7fdcc5cc -``` - -Modify `aideml/aide/utils/config.yaml` - change `k_fold_validation`, `code model`, and `feedback model` as follows: - -```yaml -# agent hyperparams -agent: - # how many improvement iterations to run - steps: 10 - # whether to instruct the agent to use CV (set to 1 to disable) - k_fold_validation: 1 - # LLM settings for coding - code: - model: deepseek-coder - temp: 0.5 - - # LLM settings for evaluating program output / tracebacks - feedback: - model: deepseek-coder - temp: 0.5 - - # hyperparameters for the tree search - search: - max_debug_depth: 3 - debug_prob: 0.5 - num_drafts: 5 -``` - -Since Deepseek is compatible to OpenAI's API, change `base_url` into `your own url`,`api_key` into `your api key` - -``` -export OPENAI_API_KEY="your api key" -export OPENAI_BASE_URL="your own url" -``` - -Modify `aideml/aide/backend/__init__.py`'s line 30 and below: - -```python -model_kwargs = model_kwargs | { - "model": model, - "temperature": temperature, - "max_tokens": max_tokens, - } -if "claude-" in model: - query_func = backend_anthropic.query -else: - query_func = backend_openai.query -``` - -Since deepseekV2.5 no longer supports system message using function call, modify `aideml/aide/agent.py`'s line 312: - -```python -response = cast( - dict, - query( - system_message=None, - user_message=prompt, - func_spec=review_func_spec, - model=self.acfg.feedback.model, - temperature=self.acfg.feedback.temp, - ), -) -``` - -Modify and install: - -``` -cd aideml -pip install -e . -``` - -#### Run - -Run the following script to get the running results, a `log` folder and a `workspace` folder will be generated in the current directory -The `log` folder will contain the experimental configuration and the generated scheme, and the `workspace` folder will save the final results generated by aide - -``` -python runner/aide.py -``` - -### Autogluon -#### Setup -``` -pip install -U pip -pip install -U setuptools wheel -pip install autogluon==1.1.1 -``` - -For Tabular data: -``` -python run_expriment.py --exp_mode autogluon --task {task_name} -``` -For Multimodal data: -``` -python run_expriment.py --exp_mode autogluon --task {task_name} --is_multimodal -``` -Replace {task_name} with the specific task you want to run. - - -### AutoSklearn -#### System requirements -auto-sklearn has the following system requirements: - -- Linux operating system (for example Ubuntu) - -- Python (>=3.7) - -- C++ compiler (with C++11 supports) - -In case you try to install Auto-sklearn on a system where no wheel files for the pyrfr package are provided (see here for available wheels) you also need: - -- SWIG [(get SWIG here).](https://www.swig.org/survey.html) - -For an explanation of missing Microsoft Windows and macOS support please check the Section [Windows/macOS compatibility](https://automl.github.io/auto-sklearn/master/installation.html#windows-macos-compatibility). - -#### Setup -``` -pip install auto-sklearn==0.15.0 -``` - -#### Run -``` -python run_experiment.py --exp_mode autosklearn --task titanic -``` - -### Base DI -For setup, check 4. -- `python run_experiment.py --exp_mode base --task titanic --num_experiments 10` -- Specifically instruct DI to use AutoGluon: `--special_instruction ag` -- Specifically instruct DI to use the stacking ensemble method: `--special_instruction stacking` \ No newline at end of file +- **Use a set of insights:** + ```bash + python run_experiment.py --exp_mode rs --task titanic --rs_mode set + ``` \ No newline at end of file diff --git a/metagpt/ext/sela/runner/README.md b/metagpt/ext/sela/runner/README.md new file mode 100644 index 000000000..7c031f1ee --- /dev/null +++ b/metagpt/ext/sela/runner/README.md @@ -0,0 +1,198 @@ +# SELA: Tree-Search Enhanced LLM Agents for Automated Machine Learning + +This document provides instructions for running baseline models. To start with, ensure that you prepare the datasets as instructed in `sela/README.md`. + +## Baselines + +### 1. AIDE + +#### Setup + +We use the AIDE version from September 30, 2024. Clone the repository and check out the specified commit: + +```bash +git clone https://github.com/WecoAI/aideml.git +git checkout 77953247ea0a5dc1bd502dd10939dd6d7fdcc5cc +``` + + +Modify `aideml/aide/utils/config.yaml` to set the following parameters: + +```yaml +# agent hyperparams +agent: + steps: 10 # Number of improvement iterations + k_fold_validation: 1 # Set to 1 to disable cross-validation + code: + model: deepseek-coder + temp: 0.5 + feedback: + model: deepseek-coder + temp: 0.5 + search: + max_debug_depth: 3 + debug_prob: 0.5 + num_drafts: 5 +``` + +Update your OpenAI API credentials in the environment: + +```bash +export OPENAI_API_KEY="your api key" +export OPENAI_BASE_URL="your own url" +``` + +Modify `aideml/aide/backend/__init__.py` (line 30 and below): + +```python +model_kwargs = model_kwargs | { + "model": model, + "temperature": temperature, + "max_tokens": max_tokens, + } +if "claude-" in model: + query_func = backend_anthropic.query +else: + query_func = backend_openai.query +``` + +Since Deepseek V2.5 no longer supports system messages using function calls, modify `aideml/aide/agent.py` (line 312): + +```python +response = cast( + dict, + query( + system_message=None, + user_message=prompt, + func_spec=review_func_spec, + model=self.acfg.feedback.model, + temperature=self.acfg.feedback.temp, + ), +) +``` + +Finally, install AIDE: + +```bash +cd aideml +pip install -e . +``` + +#### Run + +Execute the following script to generate results. A `log` folder (containing experimental configurations) and a `workspace` folder (storing final results) will be created: + +```bash +python runner/aide.py +``` + +--- + +### 2. Autogluon + +#### Setup + +Install Autogluon: + +```bash +pip install -U pip +pip install -U setuptools wheel +pip install autogluon==1.1.1 +``` + +#### Run + +For Tabular data: + +```bash +python run_experiment.py --exp_mode autogluon --task {task_name} +``` + +For Multimodal data: + +```bash +python run_experiment.py --exp_mode autogluon --task {task_name} --is_multimodal +``` + +Replace `{task_name}` with the specific task you want to run. + +--- + +### 3. AutoSklearn + +**Note:** +AutoSklearn requires: +- Linux operating system (e.g., Ubuntu) +- Python (>=3.7) +- C++ compiler (with C++11 support) + +If installing on a system without wheel files for the `pyrfr` package, you also need: + +- [SWIG](https://www.swig.org/survey.html) + +Refer to the [Windows/macOS compatibility](https://automl.github.io/auto-sklearn/master/installation.html#windows-macos-compatibility) section for further details. + +#### Setup + +Install AutoSklearn: + +```bash +pip install auto-sklearn==0.15.0 +``` + +#### Run + +Execute the following command for the Titanic task: + +```bash +python run_experiment.py --exp_mode autosklearn --task titanic +``` + +--- + +### 4. Base Data Interpreter + +Run the following command for the Titanic task: + +```bash +python run_experiment.py --exp_mode base --task titanic --num_experiments 10 +``` + +--- + +### 5. Custom Baselines + +To run additional baselines: + +- Each baseline must produce `dev_predictions.csv` and `test_predictions.csv` with a `target` column. +- Use the `evaluate_score` function for evaluation. + +--- + +## MLE-Bench + +**Note:** MLE-Bench requires Python 3.11 or higher. + +#### Setup + +Clone the repository and install: + +```bash +git clone https://github.com/openai/mle-bench.git +cd mle-bench +pip install -e . +``` + +Prepare the data: + +```bash +mlebench prepare -c --data-dir +``` + +#### Run the MLE-Bench Experiment + +Run the following command to execute the experiment: + +```bash +python run_experiment.py --exp_mode mcts --custom_dataset_dir --rollouts 10 --from_scratch --role_timeout 3600 +``` \ No newline at end of file