diff --git a/.gitignore b/.gitignore index 6e1fc7f74..46c9b0dd4 100644 --- a/.gitignore +++ b/.gitignore @@ -29,7 +29,7 @@ share/python-wheels/ MANIFEST metagpt/tools/schemas/ examples/data/search_kb/*.json -expo/AutogluonModels +metagpt/ext/sela/AutogluonModels # PyInstaller # Usually these files are written by a python scripts from a template @@ -189,4 +189,4 @@ cov.xml *-structure.json *.dot .python-version -expo/results/* +metagpt/ext/sela/results/* diff --git a/expo/Greedy.py b/expo/Greedy.py deleted file mode 100644 index 8c8d865cd..000000000 --- a/expo/Greedy.py +++ /dev/null @@ -1,19 +0,0 @@ -import random - -from expo.MCTS import MCTS - - -class Greedy(MCTS): - def best_child(self): - if len(self.children) == 0: - return self.root_node - all_children = [child for children in self.children.values() for child in children] - return max(all_children, key=lambda x: x.normalized_reward.get("dev_score", 0)) - - -class Random(MCTS): - def best_child(self): - if len(self.children) == 0: - return self.root_node - all_children = [child for children in self.children.values() for child in children] - return random.choice(all_children) diff --git a/expo/data.yaml b/expo/data.yaml deleted file mode 100644 index 4c6549490..000000000 --- a/expo/data.yaml +++ /dev/null @@ -1,3 +0,0 @@ -datasets_dir: "D:/work/automl/datasets" # path to the datasets directory -work_dir: ../workspace # path to the workspace directory -role_dir: storage/SELA # path to the role directory \ No newline at end of file diff --git a/expo/results/PLACEHOLDER b/expo/results/PLACEHOLDER deleted file mode 100644 index e69de29bb..000000000 diff --git a/expo/results/tree/TREE b/expo/results/tree/TREE deleted file mode 100644 index e69de29bb..000000000 diff --git a/expo/README.md b/metagpt/ext/sela/README.md similarity index 70% rename from expo/README.md rename to metagpt/ext/sela/README.md index 800afc3cc..c8df4eeba 100644 --- a/expo/README.md +++ b/metagpt/ext/sela/README.md @@ -2,16 +2,15 @@ # SELA: Tree-Search Enhanced LLM Agents for Automated Machine Learning - ## 1. Data Preparation - Download Datasets:https://deepwisdom.feishu.cn/drive/folder/RVyofv9cvlvtxKdddt2cyn3BnTc?from=from_copylink - Download and prepare datasets from scratch: - ``` - cd expo/data - python dataset.py --save_analysis_pool - python hf_data.py --save_analysis_pool - ``` +``` +cd data +python dataset.py --save_analysis_pool +python hf_data.py --save_analysis_pool +``` ## 2. Configs @@ -28,7 +27,7 @@ ### LLM Config llm: api_type: 'openai' model: deepseek-coder - base_url: "https://oneapi.deepwisdom.ai/v1" + base_url: "https://your_base_url" api_key: sk-xxx temperature: 0.5 ``` @@ -61,16 +60,36 @@ #### Setup #### Run -- `python run_experiment.py --exp_mode mcts --task titanic --rollouts 10` - -If the dataset has reg metric, remember to use `--low_is_better`: - -- `python run_experiment.py --exp_mode mcts --task house-prices --rollouts 10 --low_is_better` +- Examples + ``` + python run_experiment.py --exp_mode mcts --task titanic --rollouts 10 + python run_experiment.py --exp_mode mcts --task house-prices --rollouts 10 --low_is_better + ``` -In addition to the generated insights, include the fixed insights saved in `expo/insights/fixed_insights.json` -- `--use_fixed_insights` +- `--rollouts` - The number of rollouts + +- `--use_fixed_insights` - In addition to the generated insights, include the fixed insights saved in `expo/insights/fixed_insights.json` +- `--low_is_better` - If the dataset has reg metric, remember to use `--low_is_better` + +- `--from_scratch` - Do not use pre-processed insight pool, generate new insight pool based on dataset before running MCTS, facilitating subsequent tuning to propose search space prompts + +- `--role_timeout` - The timeout for the role + - This feature limits the duration of a single simulation, making the experiment duration more controllable (for example, if you do ten rollouts and set role_timeout to 1,000, the experiment will stop at the latest after 10,000s) + + +- `--max_depth` - The maximum depth of MCTS, default is 4 (nodes at this depth directly return the previous simulation result without further expansion) + +- `--load_tree` - If MCTS was interrupted due to certain reasons but had already run multiple rollouts, you can use `--load_tree`. + - For example: + ``` + python run_experiment.py --exp_mode mcts --task titanic --rollouts 10 + ``` + - If this was interrupted after running three rollouts, you can use `--load_tree`: + ``` + python run_experiment.py --exp_mode mcts --task titanic --rollouts 7 --load_tree + ``` #### Ablation Study @@ -78,10 +97,10 @@ #### Ablation Study **DI RandomSearch** - Single insight -`python run_experiment.py --exp_mode aug --task titanic --aug_mode single` +`python run_experiment.py --exp_mode rs --task titanic --rs_mode single` - Set insight -`python run_experiment.py --exp_mode aug --task titanic --aug_mode set` +`python run_experiment.py --exp_mode rs --task titanic --rs_mode set` ## 4. Evaluation @@ -109,52 +128,14 @@ #### MLE-Bench ## 5. Baselines -### DS Agent -``` -git clone https://github.com/guosyjlu/DS-Agent.git -``` - -Modify the following lines in deployment/generate.py (lines 46-48) as shown below (the purpose is to use deepseek instead of OpenAI's API): -```python -messages = [{"role": "user", "content": prompt}] - -if 'gpt' in llm: - response = openai.ChatCompletion.create(**{"messages": messages,**raw_request}) - raw_completion = response["choices"][0]["message"]["content"] - -elif llm == 'deepseek-coder': - from openai import OpenAI - client = OpenAI( - api_key="yours", - base_url="https://oneapi.deepwisdom.ai/v1" - ) - response = client.chat.completions.create( - model="deepseek-coder", - messages=[ - # {"role": "system", "content": "You are a helpful assistant"}, - {"role": "user", "content": prompt}, - ], - temperature=temperature, - stream=False - ) - raw_completion = response.choices[0].message.content - -completion = raw_completion.split("```python")[1].split("```")[0] -``` - -After making the changes, create a new `deployment/test.sh` and run the following two lines separately, where `$TASK` is the name of the task you want to test -``` -python -u generate.py --llm deepseek-coder --task $TASK --shot 1 --retrieval > "$TASK".txt 2>&1 - -python -u evaluation.py --path "deepseek-coder_True_1" --task $TASK --device 0 > "$TASK"_eval.txt 2>&1 -``` ### AIDE #### Setup - +The version of AIDE we use is dated September 30, 2024 ``` git clone https://github.com/WecoAI/aideml.git +git checkout 77953247ea0a5dc1bd502dd10939dd6d7fdcc5cc ``` Modify `aideml/aide/utils/config.yaml` - change `k_fold_validation`, `code model`, and `feedback model` as follows: @@ -240,8 +221,7 @@ #### Setup ``` pip install -U pip pip install -U setuptools wheel -pip install autogluon - +pip install autogluon==1.1.1 ``` For Tabular data: @@ -273,7 +253,7 @@ #### System requirements #### Setup ``` -pip install auto-sklearn +pip install auto-sklearn==0.15.0 ``` #### Run diff --git a/metagpt/ext/sela/data.yaml b/metagpt/ext/sela/data.yaml new file mode 100644 index 000000000..5f4a290ea --- /dev/null +++ b/metagpt/ext/sela/data.yaml @@ -0,0 +1,3 @@ +datasets_dir: "path/to/datasets" # path to the datasets directory +work_dir: ../../workspace # path to the workspace directory +role_dir: storage/SELA # path to the role directory \ No newline at end of file diff --git a/expo/data/custom_task.py b/metagpt/ext/sela/data/custom_task.py similarity index 94% rename from expo/data/custom_task.py rename to metagpt/ext/sela/data/custom_task.py index c2bf5c710..3371d5b1c 100644 --- a/expo/data/custom_task.py +++ b/metagpt/ext/sela/data/custom_task.py @@ -1,7 +1,7 @@ import os -from expo.data.dataset import SPECIAL_INSTRUCTIONS -from expo.experimenter.mle_bench.instructions import ( +from metagpt.ext.sela.data.dataset import SPECIAL_INSTRUCTIONS +from metagpt.ext.sela.experimenter.mle_bench.instructions import ( ADDITIONAL_NOTES, INSTRUCTIONS, INSTRUCTIONS_OBFUSCATED, diff --git a/expo/data/dataset.py b/metagpt/ext/sela/data/dataset.py similarity index 99% rename from expo/data/dataset.py rename to metagpt/ext/sela/data/dataset.py index 91490dcd7..ef4179011 100644 --- a/expo/data/dataset.py +++ b/metagpt/ext/sela/data/dataset.py @@ -9,8 +9,8 @@ import pandas as pd import yaml from sklearn.model_selection import train_test_split -from expo.insights.solution_designer import SolutionDesigner -from expo.utils import DATA_CONFIG +from metagpt.ext.sela.insights.solution_designer import SolutionDesigner +from metagpt.ext.sela.utils import DATA_CONFIG BASE_USER_REQUIREMENT = """ This is a {datasetname} dataset. Your goal is to predict the target column `{target_col}`. diff --git a/expo/data/hf_data.py b/metagpt/ext/sela/data/hf_data.py similarity index 96% rename from expo/data/hf_data.py rename to metagpt/ext/sela/data/hf_data.py index a18517d49..9645796af 100644 --- a/expo/data/hf_data.py +++ b/metagpt/ext/sela/data/hf_data.py @@ -7,14 +7,14 @@ import pandas as pd from datasets import load_dataset from PIL import Image -from expo.data.dataset import ( +from metagpt.ext.sela.data.dataset import ( ExpDataset, parse_args, process_dataset, save_datasets_dict_to_yaml, ) -from expo.insights.solution_designer import SolutionDesigner -from expo.utils import DATA_CONFIG +from metagpt.ext.sela.insights.solution_designer import SolutionDesigner +from metagpt.ext.sela.utils import DATA_CONFIG HFDATSETS = [ {"name": "sms_spam", "dataset_name": "ucirvine/sms_spam", "target_col": "label", "modality": "text"}, diff --git a/expo/datasets.yaml b/metagpt/ext/sela/datasets.yaml similarity index 100% rename from expo/datasets.yaml rename to metagpt/ext/sela/datasets.yaml diff --git a/expo/evaluation/evaluation.py b/metagpt/ext/sela/evaluation/evaluation.py similarity index 100% rename from expo/evaluation/evaluation.py rename to metagpt/ext/sela/evaluation/evaluation.py diff --git a/expo/evaluation/visualize_mcts.py b/metagpt/ext/sela/evaluation/visualize_mcts.py similarity index 99% rename from expo/evaluation/visualize_mcts.py rename to metagpt/ext/sela/evaluation/visualize_mcts.py index 44f5ec5f5..6f803a91c 100644 --- a/expo/evaluation/visualize_mcts.py +++ b/metagpt/ext/sela/evaluation/visualize_mcts.py @@ -3,7 +3,7 @@ import textwrap import matplotlib.pyplot as plt import networkx as nx -from expo.MCTS import Node +from metagpt.ext.sela.search.tree_search import Node NODE_TEMPLATE = """\ [Node {id}] diff --git a/expo/experimenter/__init__.py b/metagpt/ext/sela/experimenter/__init__.py similarity index 100% rename from expo/experimenter/__init__.py rename to metagpt/ext/sela/experimenter/__init__.py diff --git a/expo/experimenter/aide.py b/metagpt/ext/sela/experimenter/aide.py similarity index 95% rename from expo/experimenter/aide.py rename to metagpt/ext/sela/experimenter/aide.py index fb71dbdab..50fae94c1 100644 --- a/expo/experimenter/aide.py +++ b/metagpt/ext/sela/experimenter/aide.py @@ -1,11 +1,15 @@ -import aide import os import time +import aide + os.environ["OPENAI_API_KEY"] = "sk-xxx" os.environ["OPENAI_BASE_URL"] = "your url" + start_time = time.time() + data_dir = "xxx/data/titanic" + goal = f""" # User requirement ({data_dir}, 'This is a 04_titanic dataset. Your goal is to predict the target column `Survived`.\nPerform data analysis, data preprocessing, feature engineering, and modeling to predict the target. \nReport f1 on the eval data. Do not plot or make any visualizations.\n') @@ -28,4 +32,4 @@ print(f"Best solution code: {best_solution.code}") end_time = time.time() execution_time = end_time - start_time -print(f"run time : {execution_time} seconds") \ No newline at end of file +print(f"run time : {execution_time} seconds") diff --git a/expo/experimenter/autogluon.py b/metagpt/ext/sela/experimenter/autogluon.py similarity index 95% rename from expo/experimenter/autogluon.py rename to metagpt/ext/sela/experimenter/autogluon.py index dabf0c138..f547ce4ba 100644 --- a/expo/experimenter/autogluon.py +++ b/metagpt/ext/sela/experimenter/autogluon.py @@ -1,8 +1,10 @@ -from datetime import datetime -from expo.experimenter.custom import CustomExperimenter import os +from datetime import datetime + import pandas as pd +from metagpt.ext.sela.experimenter.custom import CustomExperimenter + class AGRunner: def __init__(self, state=None): @@ -11,6 +13,7 @@ class AGRunner: def run(self): from autogluon.tabular import TabularDataset, TabularPredictor + train_path = self.datasets["train"] dev_path = self.datasets["dev"] dev_wo_target_path = self.datasets["dev_wo_target"] @@ -32,6 +35,7 @@ class AGRunner: def run_multimodal(self): from autogluon.multimodal import MultiModalPredictor + target_col = self.state["dataset_config"]["target_col"] train_path = self.datasets["train"] dev_path = self.datasets["dev"] @@ -56,10 +60,7 @@ class AGRunner: test_preds = predictor.predict(test_data) # Return predictions for dev and test datasets - return { - "dev_preds": dev_preds, - "test_preds": test_preds - } + return {"dev_preds": dev_preds, "test_preds": test_preds} def load_split_dataset(self, train_path, dev_path, dev_wo_target_path, test_wo_target_path): """ @@ -94,7 +95,8 @@ class AGRunner: train_data[image_column] = train_data[image_column].apply(lambda x: os.path.join(root_folder, x)) dev_data[image_column] = dev_data[image_column].apply(lambda x: os.path.join(root_folder, x)) dev_wo_target_data[image_column] = dev_wo_target_data[image_column].apply( - lambda x: os.path.join(root_folder, x)) + lambda x: os.path.join(root_folder, x) + ) test_data[image_column] = test_data[image_column].apply(lambda x: os.path.join(root_folder, x)) return train_data, dev_data, dev_wo_target_data, test_data @@ -106,7 +108,7 @@ class GluonExperimenter(CustomExperimenter): def __init__(self, args, **kwargs): super().__init__(args, **kwargs) self.framework = AGRunner(self.state) - self.is_multimodal = args.is_multimodal if hasattr(args, 'is_multimodal') else False + self.is_multimodal = args.is_multimodal if hasattr(args, "is_multimodal") else False async def run_experiment(self): if not self.is_multimodal: diff --git a/expo/experimenter/autosklearn.py b/metagpt/ext/sela/experimenter/autosklearn.py similarity index 92% rename from expo/experimenter/autosklearn.py rename to metagpt/ext/sela/experimenter/autosklearn.py index 02a3cc465..f6ff267e7 100644 --- a/expo/experimenter/autosklearn.py +++ b/metagpt/ext/sela/experimenter/autosklearn.py @@ -1,9 +1,11 @@ from datetime import datetime -import pandas as pd -from expo.experimenter.custom import CustomExperimenter -from expo.evaluation.evaluation import evaluate_score from functools import partial +import pandas as pd + +from metagpt.ext.sela.evaluation.evaluation import evaluate_score +from metagpt.ext.sela.experimenter.custom import CustomExperimenter + def custom_scorer(y_true, y_pred, metric_name): return evaluate_score(y_pred, y_true, metric_name) @@ -19,9 +21,7 @@ class ASRunner: def create_autosklearn_scorer(self, metric_name): from autosklearn.metrics import make_scorer - return make_scorer( - name=metric_name, score_func=partial(custom_scorer, metric_name=metric_name) - ) + return make_scorer(name=metric_name, score_func=partial(custom_scorer, metric_name=metric_name)) def run(self): import autosklearn.classification diff --git a/expo/experimenter/custom.py b/metagpt/ext/sela/experimenter/custom.py similarity index 91% rename from expo/experimenter/custom.py rename to metagpt/ext/sela/experimenter/custom.py index f245499ca..70df1a78e 100644 --- a/expo/experimenter/custom.py +++ b/metagpt/ext/sela/experimenter/custom.py @@ -2,9 +2,9 @@ import os import pandas as pd -from expo.evaluation.evaluation import evaluate_score -from expo.experimenter.experimenter import Experimenter -from expo.MCTS import create_initial_state +from metagpt.ext.sela.evaluation.evaluation import evaluate_score +from metagpt.ext.sela.experimenter.experimenter import Experimenter +from metagpt.ext.sela.search.tree_search import create_initial_state class CustomExperimenter(Experimenter): diff --git a/expo/experimenter/experimenter.py b/metagpt/ext/sela/experimenter/experimenter.py similarity index 94% rename from expo/experimenter/experimenter.py rename to metagpt/ext/sela/experimenter/experimenter.py index 4a0b8413e..3df46b74b 100644 --- a/expo/experimenter/experimenter.py +++ b/metagpt/ext/sela/experimenter/experimenter.py @@ -5,10 +5,10 @@ import os import numpy as np import pandas as pd -from expo.evaluation.evaluation import evaluate_score -from expo.MCTS import create_initial_state -from expo.research_assistant import ResearchAssistant -from expo.utils import DATA_CONFIG, save_notebook +from metagpt.ext.sela.evaluation.evaluation import evaluate_score +from metagpt.ext.sela.research_assistant import ResearchAssistant +from metagpt.ext.sela.search.tree_search import create_initial_state +from metagpt.ext.sela.utils import DATA_CONFIG, save_notebook class Experimenter: diff --git a/expo/experimenter/mcts.py b/metagpt/ext/sela/experimenter/mcts.py similarity index 92% rename from expo/experimenter/mcts.py rename to metagpt/ext/sela/experimenter/mcts.py index a42566366..9fd66121d 100644 --- a/expo/experimenter/mcts.py +++ b/metagpt/ext/sela/experimenter/mcts.py @@ -1,13 +1,12 @@ import shutil -from expo.evaluation.evaluation import ( +from metagpt.ext.sela.evaluation.evaluation import ( node_evaluate_score_mlebench, node_evaluate_score_sela, ) -from expo.evaluation.visualize_mcts import get_tree_text -from expo.experimenter.experimenter import Experimenter -from expo.Greedy import Greedy, Random -from expo.MCTS import MCTS +from metagpt.ext.sela.evaluation.visualize_mcts import get_tree_text +from metagpt.ext.sela.experimenter.experimenter import Experimenter +from metagpt.ext.sela.search.search_algorithm import MCTS, Greedy, Random class MCTSExperimenter(Experimenter): diff --git a/expo/experimenter/mle_bench/instructions.py b/metagpt/ext/sela/experimenter/mle_bench/instructions.py similarity index 98% rename from expo/experimenter/mle_bench/instructions.py rename to metagpt/ext/sela/experimenter/mle_bench/instructions.py index 631f410dc..5c9c7b70f 100644 --- a/expo/experimenter/mle_bench/instructions.py +++ b/metagpt/ext/sela/experimenter/mle_bench/instructions.py @@ -8,7 +8,7 @@ You will be given a machine learning task. You must solve the task by training a BENCHMARK INSTRUCTIONS ------ -First, please read the instructions below which are specific to our benchmark setup. Note, these instructions can be found again in `expo.experimenter.mle_bench.instructions.py`. +First, please read the instructions below which are specific to our benchmark setup. Note, these instructions can be found again in `experimenter/mle_bench/instructions.py`. - **Competition-specific instructions**: The instructions for the competition are available in `{dataset_dir}/description.md`. Read this file to understand the problem setup for the competition you are currently solving. - **Dataset**: The dataset files for the competition are available in `{dataset_dir}`. @@ -28,7 +28,7 @@ You will be given a machine learning task. You must solve the task by training a BENCHMARK INSTRUCTIONS ------ -First, please read the instructions below which are specific to our benchmark setup. Note, these instructions can be found again in `expo.experimenter.mle_bench.instructions.py`. +First, please read the instructions below which are specific to our benchmark setup. Note, these instructions can be found again in `experimenter/mle_bench/instructions.py`. - **Task-specific instructions**: The instructions for the task are available in `{dataset_dir}/description.md`. Read this file to understand the problem setup for the task you are currently solving. - **Dataset**: The dataset files for the task are available in `{dataset_dir}/`. diff --git a/expo/experimenter/aug.py b/metagpt/ext/sela/experimenter/random_search.py similarity index 74% rename from expo/experimenter/aug.py rename to metagpt/ext/sela/experimenter/random_search.py index bcfa5d4ad..5617ee601 100644 --- a/expo/experimenter/aug.py +++ b/metagpt/ext/sela/experimenter/random_search.py @@ -1,7 +1,7 @@ -from expo.experimenter.experimenter import Experimenter -from expo.insights.instruction_generator import InstructionGenerator -from expo.research_assistant import ResearchAssistant -from expo.utils import get_exp_pool_path +from metagpt.ext.sela.experimenter.experimenter import Experimenter +from metagpt.ext.sela.insights.instruction_generator import InstructionGenerator +from metagpt.ext.sela.research_assistant import ResearchAssistant +from metagpt.ext.sela.utils import get_exp_pool_path EXPS_PROMPT = """ When doing the tasks, you can refer to the insights below: @@ -10,27 +10,27 @@ When doing the tasks, you can refer to the insights below: """ -class AugExperimenter(Experimenter): - result_path: str = "results/aug" +class RandomSearchExperimenter(Experimenter): + result_path: str = "results/random_search" async def run_experiment(self): # state = create_initial_state(self.args.task, start_task_id=1, data_config=self.data_config, low_is_better=self.args.low_is_better, name="") user_requirement = self.state["requirement"] exp_pool_path = get_exp_pool_path(self.args.task, self.data_config, pool_name="ds_analysis_pool") - exp_pool = InstructionGenerator.load_analysis_pool( + exp_pool = InstructionGenerator.load_insight_pool( exp_pool_path, use_fixed_insights=self.args.use_fixed_insights ) - if self.args.aug_mode == "single": + if self.args.rs_mode == "single": exps = InstructionGenerator._random_sample(exp_pool, self.args.num_experiments) exps = [exp["Analysis"] for exp in exps] - elif self.args.aug_mode == "set": + elif self.args.rs_mode == "set": exps = [] for i in range(self.args.num_experiments): exp_set = InstructionGenerator.sample_instruction_set(exp_pool) exp_set_text = "\n".join([f"{exp['task_id']}: {exp['Analysis']}" for exp in exp_set]) exps.append(exp_set_text) else: - raise ValueError(f"Invalid mode: {self.args.aug_mode}") + raise ValueError(f"Invalid mode: {self.args.rs_mode}") results = [] for i in range(self.args.num_experiments): @@ -45,7 +45,7 @@ class AugExperimenter(Experimenter): { "idx": i, "score_dict": score_dict, - "aug_mode": self.args.aug_mode, + "rs_mode": self.args.rs_mode, "insights": exps[i], "user_requirement": requirement, "args": vars(self.args), diff --git a/expo/insights/fixed_insights.json b/metagpt/ext/sela/insights/fixed_insights.json similarity index 100% rename from expo/insights/fixed_insights.json rename to metagpt/ext/sela/insights/fixed_insights.json diff --git a/expo/insights/instruction_generator.py b/metagpt/ext/sela/insights/instruction_generator.py similarity index 97% rename from expo/insights/instruction_generator.py rename to metagpt/ext/sela/insights/instruction_generator.py index ab9b2cc67..d5d24c74d 100644 --- a/expo/insights/instruction_generator.py +++ b/metagpt/ext/sela/insights/instruction_generator.py @@ -3,8 +3,8 @@ import os import random from difflib import SequenceMatcher -from expo.insights.solution_designer import SolutionDesigner -from expo.utils import clean_json_from_rsp, load_data_config, mcts_logger +from metagpt.ext.sela.insights.solution_designer import SolutionDesigner +from metagpt.ext.sela.utils import clean_json_from_rsp, load_data_config, mcts_logger from metagpt.llm import LLM from metagpt.schema import Message diff --git a/expo/insights/solution_designer.py b/metagpt/ext/sela/insights/solution_designer.py similarity index 98% rename from expo/insights/solution_designer.py rename to metagpt/ext/sela/insights/solution_designer.py index 262caa0f6..1b61c2141 100644 --- a/expo/insights/solution_designer.py +++ b/metagpt/ext/sela/insights/solution_designer.py @@ -1,6 +1,6 @@ import json -from expo.utils import clean_json_from_rsp, load_data_config +from metagpt.ext.sela.utils import clean_json_from_rsp, load_data_config from metagpt.llm import LLM DATA_CONFIG = load_data_config() diff --git a/expo/requirements.txt b/metagpt/ext/sela/requirements.txt similarity index 100% rename from expo/requirements.txt rename to metagpt/ext/sela/requirements.txt diff --git a/expo/research_assistant.py b/metagpt/ext/sela/research_assistant.py similarity index 98% rename from expo/research_assistant.py rename to metagpt/ext/sela/research_assistant.py index d068dd4e5..2c698c1d2 100644 --- a/expo/research_assistant.py +++ b/metagpt/ext/sela/research_assistant.py @@ -6,9 +6,9 @@ import os from pydantic import model_validator -from expo.utils import mcts_logger, save_notebook from metagpt.actions.di.write_analysis_code import WriteAnalysisCode from metagpt.const import SERDESER_PATH +from metagpt.ext.sela.utils import mcts_logger, save_notebook from metagpt.roles.di.data_interpreter import DataInterpreter from metagpt.schema import Message, Task, TaskResult from metagpt.utils.common import CodeParser, write_json_file @@ -71,7 +71,7 @@ class ResearchAssistant(DataInterpreter): return f"Node-{self.node_id}" def get_next_instruction(self): - return self.planner.plan.tasks[self.start_task_id] + return self.planner.plan.tasks[self.start_task_id].instruction def change_next_instruction(self, new_instruction): if new_instruction is not None: diff --git a/expo/run_experiment.py b/metagpt/ext/sela/run_experiment.py similarity index 79% rename from expo/run_experiment.py rename to metagpt/ext/sela/run_experiment.py index 68c3b35d4..4cced19c3 100644 --- a/expo/run_experiment.py +++ b/metagpt/ext/sela/run_experiment.py @@ -1,13 +1,13 @@ import argparse import asyncio -from expo.data.custom_task import get_mle_is_lower_better, get_mle_task_id -from expo.experimenter.aug import AugExperimenter -from expo.experimenter.autogluon import GluonExperimenter -from expo.experimenter.autosklearn import AutoSklearnExperimenter -from expo.experimenter.custom import CustomExperimenter -from expo.experimenter.experimenter import Experimenter -from expo.experimenter.mcts import MCTSExperimenter +from metagpt.ext.sela.data.custom_task import get_mle_is_lower_better, get_mle_task_id +from metagpt.ext.sela.experimenter.autogluon import GluonExperimenter +from metagpt.ext.sela.experimenter.autosklearn import AutoSklearnExperimenter +from metagpt.ext.sela.experimenter.custom import CustomExperimenter +from metagpt.ext.sela.experimenter.experimenter import Experimenter +from metagpt.ext.sela.experimenter.mcts import MCTSExperimenter +from metagpt.ext.sela.experimenter.random_search import RandomSearchExperimenter def get_args(cmd=True): @@ -17,12 +17,12 @@ def get_args(cmd=True): "--exp_mode", type=str, default="mcts", - choices=["mcts", "aug", "base", "custom", "greedy", "autogluon", "random", "autosklearn"], + choices=["mcts", "rs", "base", "custom", "greedy", "autogluon", "random", "autosklearn"], ) parser.add_argument("--role_timeout", type=int, default=1000) get_di_args(parser) get_mcts_args(parser) - get_aug_exp_args(parser) + get_rs_exp_args(parser) if cmd: args = parser.parse_args() else: @@ -56,8 +56,8 @@ def get_mcts_args(parser): parser.add_argument("--max_depth", type=int, default=4) -def get_aug_exp_args(parser): - parser.add_argument("--aug_mode", type=str, default="single", choices=["single", "set"]) +def get_rs_exp_args(parser): + parser.add_argument("--rs_mode", type=str, default="single", choices=["single", "set"]) parser.add_argument("--is_multimodal", action="store_true", help="Specify if the model is multi-modal") @@ -79,8 +79,8 @@ async def main(args): experimenter = MCTSExperimenter(args, tree_mode="greedy") elif args.exp_mode == "random": experimenter = MCTSExperimenter(args, tree_mode="random") - elif args.exp_mode == "aug": - experimenter = AugExperimenter(args) + elif args.exp_mode == "rs": + experimenter = RandomSearchExperimenter(args) elif args.exp_mode == "base": experimenter = Experimenter(args) elif args.exp_mode == "autogluon": diff --git a/expo/scripts/run_cls.sh b/metagpt/ext/sela/scripts/run_cls.sh similarity index 100% rename from expo/scripts/run_cls.sh rename to metagpt/ext/sela/scripts/run_cls.sh diff --git a/expo/scripts/run_cls_mod.sh b/metagpt/ext/sela/scripts/run_cls_mod.sh similarity index 100% rename from expo/scripts/run_cls_mod.sh rename to metagpt/ext/sela/scripts/run_cls_mod.sh diff --git a/expo/scripts/run_reg.sh b/metagpt/ext/sela/scripts/run_reg.sh similarity index 100% rename from expo/scripts/run_reg.sh rename to metagpt/ext/sela/scripts/run_reg.sh diff --git a/expo/scripts/visualize_experiment.py b/metagpt/ext/sela/scripts/visualize_experiment.py similarity index 66% rename from expo/scripts/visualize_experiment.py rename to metagpt/ext/sela/scripts/visualize_experiment.py index 6cd84a0de..a6d980d11 100644 --- a/expo/scripts/visualize_experiment.py +++ b/metagpt/ext/sela/scripts/visualize_experiment.py @@ -1,9 +1,12 @@ import networkx as nx -from expo.evaluation.visualize_mcts import build_tree_recursive, visualize_tree -from expo.MCTS import MCTS, create_initial_state, initialize_di_root_node -from expo.run_experiment import get_args -from expo.utils import DATA_CONFIG +from metagpt.ext.sela.evaluation.visualize_mcts import ( + build_tree_recursive, + visualize_tree, +) +from metagpt.ext.sela.MCTS import MCTS, create_initial_state, initialize_di_root_node +from metagpt.ext.sela.run_experiment import get_args +from metagpt.ext.sela.utils import DATA_CONFIG if __name__ == "__main__": args = get_args() diff --git a/metagpt/ext/sela/search/search_algorithm.py b/metagpt/ext/sela/search/search_algorithm.py new file mode 100644 index 000000000..ca47d8cf6 --- /dev/null +++ b/metagpt/ext/sela/search/search_algorithm.py @@ -0,0 +1,32 @@ +import numpy as np + +from metagpt.ext.sela.search.tree_search import BaseTreeSearch, Node + + +class Greedy(BaseTreeSearch): + def best_child(self): + if len(self.children) == 0: + return self.root_node + all_children = [child for children in self.children.values() for child in children] + return max(all_children, key=lambda x: x.normalized_reward.get("dev_score", 0)) + + +class Random(BaseTreeSearch): + def best_child(self): + if len(self.children) == 0: + return self.root_node + all_children = [child for children in self.children.values() for child in children] + return np.random.choice(all_children) + + +class MCTS(BaseTreeSearch): + def best_child(self): + def uct(node: Node): + n_visits = node.visited if node.visited else self.c_unvisited + avg_value = node.avg_value() if node.visited else node.value / self.c_unvisited + return avg_value + self.c_explore * np.sqrt(np.log(node.parent.visited) / n_visits) + + if len(self.children) == 0: + return self.root_node + all_children = [child for children in self.children.values() for child in children] + return max(all_children, key=uct) diff --git a/expo/MCTS.py b/metagpt/ext/sela/search/tree_search.py similarity index 83% rename from expo/MCTS.py rename to metagpt/ext/sela/search/tree_search.py index 2ce559ae0..cde8dc82a 100644 --- a/expo/MCTS.py +++ b/metagpt/ext/sela/search/tree_search.py @@ -1,24 +1,51 @@ import json -import math import os import pickle -import random import shutil import numpy as np import pandas as pd -from expo.data.custom_task import get_mle_bench_requirements, get_mle_task_id -from expo.data.dataset import generate_task_requirement, get_split_dataset_path -from expo.evaluation.evaluation import evaluate_score -from expo.insights.instruction_generator import InstructionGenerator -from expo.research_assistant import ResearchAssistant, TimeoutException -from expo.utils import get_exp_pool_path, load_execute_notebook, mcts_logger +from metagpt.ext.sela.data.custom_task import ( + get_mle_bench_requirements, + get_mle_task_id, +) +from metagpt.ext.sela.data.dataset import ( + generate_task_requirement, + get_split_dataset_path, +) +from metagpt.ext.sela.evaluation.evaluation import evaluate_score +from metagpt.ext.sela.insights.instruction_generator import InstructionGenerator +from metagpt.ext.sela.research_assistant import ResearchAssistant, TimeoutException +from metagpt.ext.sela.utils import get_exp_pool_path, load_execute_notebook, mcts_logger from metagpt.tools.tool_recommend import ToolRecommender from metagpt.utils.common import read_json_file -def initialize_di_root_node(state, reflection: bool = True): +def initialize_di_root_node(state: dict, reflection: bool = True): + """ + Initialize the root node of the decision tree. + + Args: + state (dict): The initial state of the tree, containing: + - task (str): The task to be performed (e.g., "titanic"). + - work_dir (str): The working directory. + - node_dir (str): The directory for the node. + - dataset_config (dict): The configuration of the dataset. + - datasets_dir (str): The directory of the datasets. + - exp_pool_path (str): The path to the experiment pool. + - requirement (str): The requirement for the task. + - has_run (bool): Whether the task has run. + - start_task_id (int): The ID of the starting task. + - low_is_better (bool): Whether a lower score is better. + - role_timeout (int): The timeout for the role. + - external_eval (bool): Whether to use external evaluation. + - custom_dataset_dir (str): The directory of the custom dataset. + reflection (bool, optional): Whether to use reflection. Defaults to True. + + Returns: + tuple: A tuple containing the ResearchAssistant role and the root Node. + """ role = ResearchAssistant( node_id="0", start_task_id=state["start_task_id"], @@ -29,7 +56,21 @@ def initialize_di_root_node(state, reflection: bool = True): return role, Node(parent=None, state=state, action=None, value=0) -def create_initial_state(task, start_task_id, data_config, args): +def create_initial_state(task: str, start_task_id: int, data_config: dict, args): + """ + Create the initial state of the tree. + + Args: + task (str): The task to be performed. + start_task_id (int): The ID of the starting task. + data_config (dict): The configuration of the data. + Expected keys: 'datasets', 'work_dir', 'role_dir'. + args (Namespace): The arguments passed to the program. + Expected attributes: 'external_eval', 'custom_dataset_dir', 'special_instruction', 'name', 'low_is_better', 'role_timeout'. + + Returns: + dict: The initial state of the tree. + """ external_eval = args.external_eval if args.custom_dataset_dir: @@ -79,7 +120,9 @@ class Node: normalized_reward: dict = {"train_score": 0, "dev_score": 0, "test_score": 0} parent = None - def __init__(self, parent=None, state=None, action=None, value=0, max_depth=4, **kwargs): + def __init__( + self, parent=None, state: dict = None, action: str = None, value: float = 0, max_depth: int = 4, **kwargs + ): self.state = state self.action = action self.value = value @@ -225,7 +268,7 @@ class Node: self.get_and_move_predictions("test") return score_dict - async def run_node(self, role=None): + async def run_node(self, role: ResearchAssistant = None): if self.is_terminal() and role is not None: if role.state_saved: return self.raw_reward @@ -272,7 +315,7 @@ class Node: return score_dict, result_dict -class MCTS: +class BaseTreeSearch: # data_path root_node: Node = None children: dict = {} @@ -283,7 +326,7 @@ class MCTS: # insight generator instruction_generator: InstructionGenerator = None - def __init__(self, root_node, max_depth, use_fixed_insights): + def __init__(self, root_node: Node, max_depth: int, use_fixed_insights: bool): self.root_node = root_node self.max_depth = max_depth self.use_fixed_insights = use_fixed_insights @@ -294,15 +337,7 @@ class MCTS: return node def best_child(self): - def uct(node: Node): - n_visits = node.visited if node.visited else self.c_unvisited - avg_value = node.avg_value() if node.visited else node.value / self.c_unvisited - return avg_value + self.c_explore * math.sqrt(math.log(node.parent.visited) / n_visits) - - if len(self.children) == 0: - return self.root_node - all_children = [child for children in self.children.values() for child in children] - return max(all_children, key=uct) + raise NotImplementedError async def expand(self, node: Node, max_children=5): await node.expand(max_children, self.instruction_generator) @@ -314,13 +349,13 @@ class MCTS: "Returns the reward for a random simulation (to completion) of `node`" mcts_logger.log("MCTS", f"Start simulating node {node.id}:") while node.children: - node = random.choice(node.children) + node = np.random.choice(node.children) reward, result_dict = await node.run_node(role) mcts_logger.log("MCTS", f"Simulated node's reward: {reward}") # TODO: add new insights return reward - def backpropagate(self, node: Node, reward): + def backpropagate(self, node: Node, reward: dict): child_node = node node.update(reward) node = node.parent @@ -333,7 +368,7 @@ class MCTS: global_best_score = root.normalized_reward["test_score"] dev_best_score = root.normalized_reward["dev_score"] - def bfs(node: Node, best_score, best_child: Node, split): + def bfs(node: Node, best_score: float, best_child: Node, split: str): assert split in ["test_score", "dev_score"] if node not in self.children: return best_score, best_child @@ -354,7 +389,7 @@ class MCTS: def get_num_simulations(self): return self.root_node.visited - def save_node_order(self, node_id): + def save_node_order(self, node_id: str): self.node_order.append(node_id) with open(os.path.join(self.root_node.state["node_dir"], "node_order.json"), "w") as f: json.dump(self.node_order, f) @@ -375,7 +410,7 @@ class MCTS: scores["test_raw"].append(node.raw_reward["test_score"]) return scores - async def search(self, state, args): + async def search(self, state: dict, args): reflection = args.reflection load_tree = args.load_tree rollouts = args.rollouts @@ -424,17 +459,17 @@ class MCTS: self.save_node_order(node.id) return self.best_path(root) - async def expand_and_simulate(self, node): + async def expand_and_simulate(self, node: Node): # Expand and randomly select a child node, then simulate it if node.visited > 0: children = await self.expand(node) - node = random.choice(children) + node = np.random.choice(children) reward = await self.simulate(node) self.backpropagate(node, reward) return node, reward def load_tree(self): - def load_children_node(node): + def load_children_node(node: Node): mcts_logger.log("MCTS", f"Load node {node.id}'s child: {node.children}") if node.is_terminal() or not node.children: return diff --git a/expo/utils.py b/metagpt/ext/sela/utils.py similarity index 100% rename from expo/utils.py rename to metagpt/ext/sela/utils.py