Merge pull request #1543 from cyzus/sela

renaming etc.
This commit is contained in:
garylin2099 2024-10-28 18:17:36 +08:00 committed by GitHub
commit cf03c5d26e
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
35 changed files with 221 additions and 185 deletions

4
.gitignore vendored
View file

@ -29,7 +29,7 @@ share/python-wheels/
MANIFEST
metagpt/tools/schemas/
examples/data/search_kb/*.json
expo/AutogluonModels
metagpt/ext/sela/AutogluonModels
# PyInstaller
# Usually these files are written by a python scripts from a template
@ -189,4 +189,4 @@ cov.xml
*-structure.json
*.dot
.python-version
expo/results/*
metagpt/ext/sela/results/*

View file

@ -1,19 +0,0 @@
import random
from expo.MCTS import MCTS
class Greedy(MCTS):
def best_child(self):
if len(self.children) == 0:
return self.root_node
all_children = [child for children in self.children.values() for child in children]
return max(all_children, key=lambda x: x.normalized_reward.get("dev_score", 0))
class Random(MCTS):
def best_child(self):
if len(self.children) == 0:
return self.root_node
all_children = [child for children in self.children.values() for child in children]
return random.choice(all_children)

View file

@ -1,3 +0,0 @@
datasets_dir: "D:/work/automl/datasets" # path to the datasets directory
work_dir: ../workspace # path to the workspace directory
role_dir: storage/SELA # path to the role directory

View file

View file

@ -2,16 +2,15 @@ # SELA: Tree-Search Enhanced LLM Agents for Automated Machine Learning
## 1. Data Preparation
- Download Datasetshttps://deepwisdom.feishu.cn/drive/folder/RVyofv9cvlvtxKdddt2cyn3BnTc?from=from_copylink
- Download and prepare datasets from scratch:
```
cd expo/data
python dataset.py --save_analysis_pool
python hf_data.py --save_analysis_pool
```
```
cd data
python dataset.py --save_analysis_pool
python hf_data.py --save_analysis_pool
```
## 2. Configs
@ -28,7 +27,7 @@ ### LLM Config
llm:
api_type: 'openai'
model: deepseek-coder
base_url: "https://oneapi.deepwisdom.ai/v1"
base_url: "https://your_base_url"
api_key: sk-xxx
temperature: 0.5
```
@ -61,16 +60,36 @@ #### Setup
#### Run
- `python run_experiment.py --exp_mode mcts --task titanic --rollouts 10`
If the dataset has reg metric, remember to use `--low_is_better`:
- `python run_experiment.py --exp_mode mcts --task house-prices --rollouts 10 --low_is_better`
- Examples
```
python run_experiment.py --exp_mode mcts --task titanic --rollouts 10
python run_experiment.py --exp_mode mcts --task house-prices --rollouts 10 --low_is_better
```
In addition to the generated insights, include the fixed insights saved in `expo/insights/fixed_insights.json`
- `--use_fixed_insights`
- `--rollouts` - The number of rollouts
- `--use_fixed_insights` - In addition to the generated insights, include the fixed insights saved in `expo/insights/fixed_insights.json`
- `--low_is_better` - If the dataset has reg metric, remember to use `--low_is_better`
- `--from_scratch` - Do not use pre-processed insight pool, generate new insight pool based on dataset before running MCTS, facilitating subsequent tuning to propose search space prompts
- `--role_timeout` - The timeout for the role
- This feature limits the duration of a single simulation, making the experiment duration more controllable (for example, if you do ten rollouts and set role_timeout to 1,000, the experiment will stop at the latest after 10,000s)
- `--max_depth` - The maximum depth of MCTS, default is 4 (nodes at this depth directly return the previous simulation result without further expansion)
- `--load_tree` - If MCTS was interrupted due to certain reasons but had already run multiple rollouts, you can use `--load_tree`.
- For example:
```
python run_experiment.py --exp_mode mcts --task titanic --rollouts 10
```
- If this was interrupted after running three rollouts, you can use `--load_tree`:
```
python run_experiment.py --exp_mode mcts --task titanic --rollouts 7 --load_tree
```
#### Ablation Study
@ -78,10 +97,10 @@ #### Ablation Study
**DI RandomSearch**
- Single insight
`python run_experiment.py --exp_mode aug --task titanic --aug_mode single`
`python run_experiment.py --exp_mode rs --task titanic --rs_mode single`
- Set insight
`python run_experiment.py --exp_mode aug --task titanic --aug_mode set`
`python run_experiment.py --exp_mode rs --task titanic --rs_mode set`
## 4. Evaluation
@ -109,52 +128,14 @@ #### MLE-Bench
## 5. Baselines
### DS Agent
```
git clone https://github.com/guosyjlu/DS-Agent.git
```
Modify the following lines in deployment/generate.py (lines 46-48) as shown below (the purpose is to use deepseek instead of OpenAI's API):
```python
messages = [{"role": "user", "content": prompt}]
if 'gpt' in llm:
response = openai.ChatCompletion.create(**{"messages": messages,**raw_request})
raw_completion = response["choices"][0]["message"]["content"]
elif llm == 'deepseek-coder':
from openai import OpenAI
client = OpenAI(
api_key="yours",
base_url="https://oneapi.deepwisdom.ai/v1"
)
response = client.chat.completions.create(
model="deepseek-coder",
messages=[
# {"role": "system", "content": "You are a helpful assistant"},
{"role": "user", "content": prompt},
],
temperature=temperature,
stream=False
)
raw_completion = response.choices[0].message.content
completion = raw_completion.split("```python")[1].split("```")[0]
```
After making the changes, create a new `deployment/test.sh` and run the following two lines separately, where `$TASK` is the name of the task you want to test
```
python -u generate.py --llm deepseek-coder --task $TASK --shot 1 --retrieval > "$TASK".txt 2>&1
python -u evaluation.py --path "deepseek-coder_True_1" --task $TASK --device 0 > "$TASK"_eval.txt 2>&1
```
### AIDE
#### Setup
The version of AIDE we use is dated September 30, 2024
```
git clone https://github.com/WecoAI/aideml.git
git checkout 77953247ea0a5dc1bd502dd10939dd6d7fdcc5cc
```
Modify `aideml/aide/utils/config.yaml` - change `k_fold_validation`, `code model`, and `feedback model` as follows:
@ -240,8 +221,7 @@ #### Setup
```
pip install -U pip
pip install -U setuptools wheel
pip install autogluon
pip install autogluon==1.1.1
```
For Tabular data:
@ -273,7 +253,7 @@ #### System requirements
#### Setup
```
pip install auto-sklearn
pip install auto-sklearn==0.15.0
```
#### Run

View file

@ -0,0 +1,3 @@
datasets_dir: "path/to/datasets" # path to the datasets directory
work_dir: ../../workspace # path to the workspace directory
role_dir: storage/SELA # path to the role directory

View file

@ -1,7 +1,7 @@
import os
from expo.data.dataset import SPECIAL_INSTRUCTIONS
from expo.experimenter.mle_bench.instructions import (
from metagpt.ext.sela.data.dataset import SPECIAL_INSTRUCTIONS
from metagpt.ext.sela.experimenter.mle_bench.instructions import (
ADDITIONAL_NOTES,
INSTRUCTIONS,
INSTRUCTIONS_OBFUSCATED,

View file

@ -9,8 +9,8 @@ import pandas as pd
import yaml
from sklearn.model_selection import train_test_split
from expo.insights.solution_designer import SolutionDesigner
from expo.utils import DATA_CONFIG
from metagpt.ext.sela.insights.solution_designer import SolutionDesigner
from metagpt.ext.sela.utils import DATA_CONFIG
BASE_USER_REQUIREMENT = """
This is a {datasetname} dataset. Your goal is to predict the target column `{target_col}`.

View file

@ -7,14 +7,14 @@ import pandas as pd
from datasets import load_dataset
from PIL import Image
from expo.data.dataset import (
from metagpt.ext.sela.data.dataset import (
ExpDataset,
parse_args,
process_dataset,
save_datasets_dict_to_yaml,
)
from expo.insights.solution_designer import SolutionDesigner
from expo.utils import DATA_CONFIG
from metagpt.ext.sela.insights.solution_designer import SolutionDesigner
from metagpt.ext.sela.utils import DATA_CONFIG
HFDATSETS = [
{"name": "sms_spam", "dataset_name": "ucirvine/sms_spam", "target_col": "label", "modality": "text"},

View file

@ -3,7 +3,7 @@ import textwrap
import matplotlib.pyplot as plt
import networkx as nx
from expo.MCTS import Node
from metagpt.ext.sela.search.tree_search import Node
NODE_TEMPLATE = """\
[Node {id}]

View file

@ -1,11 +1,15 @@
import aide
import os
import time
import aide
os.environ["OPENAI_API_KEY"] = "sk-xxx"
os.environ["OPENAI_BASE_URL"] = "your url"
start_time = time.time()
data_dir = "xxx/data/titanic"
goal = f"""
# User requirement
({data_dir}, 'This is a 04_titanic dataset. Your goal is to predict the target column `Survived`.\nPerform data analysis, data preprocessing, feature engineering, and modeling to predict the target. \nReport f1 on the eval data. Do not plot or make any visualizations.\n')
@ -28,4 +32,4 @@ print(f"Best solution code: {best_solution.code}")
end_time = time.time()
execution_time = end_time - start_time
print(f"run time : {execution_time} seconds")
print(f"run time : {execution_time} seconds")

View file

@ -1,8 +1,10 @@
from datetime import datetime
from expo.experimenter.custom import CustomExperimenter
import os
from datetime import datetime
import pandas as pd
from metagpt.ext.sela.experimenter.custom import CustomExperimenter
class AGRunner:
def __init__(self, state=None):
@ -11,6 +13,7 @@ class AGRunner:
def run(self):
from autogluon.tabular import TabularDataset, TabularPredictor
train_path = self.datasets["train"]
dev_path = self.datasets["dev"]
dev_wo_target_path = self.datasets["dev_wo_target"]
@ -32,6 +35,7 @@ class AGRunner:
def run_multimodal(self):
from autogluon.multimodal import MultiModalPredictor
target_col = self.state["dataset_config"]["target_col"]
train_path = self.datasets["train"]
dev_path = self.datasets["dev"]
@ -56,10 +60,7 @@ class AGRunner:
test_preds = predictor.predict(test_data)
# Return predictions for dev and test datasets
return {
"dev_preds": dev_preds,
"test_preds": test_preds
}
return {"dev_preds": dev_preds, "test_preds": test_preds}
def load_split_dataset(self, train_path, dev_path, dev_wo_target_path, test_wo_target_path):
"""
@ -94,7 +95,8 @@ class AGRunner:
train_data[image_column] = train_data[image_column].apply(lambda x: os.path.join(root_folder, x))
dev_data[image_column] = dev_data[image_column].apply(lambda x: os.path.join(root_folder, x))
dev_wo_target_data[image_column] = dev_wo_target_data[image_column].apply(
lambda x: os.path.join(root_folder, x))
lambda x: os.path.join(root_folder, x)
)
test_data[image_column] = test_data[image_column].apply(lambda x: os.path.join(root_folder, x))
return train_data, dev_data, dev_wo_target_data, test_data
@ -106,7 +108,7 @@ class GluonExperimenter(CustomExperimenter):
def __init__(self, args, **kwargs):
super().__init__(args, **kwargs)
self.framework = AGRunner(self.state)
self.is_multimodal = args.is_multimodal if hasattr(args, 'is_multimodal') else False
self.is_multimodal = args.is_multimodal if hasattr(args, "is_multimodal") else False
async def run_experiment(self):
if not self.is_multimodal:

View file

@ -1,9 +1,11 @@
from datetime import datetime
import pandas as pd
from expo.experimenter.custom import CustomExperimenter
from expo.evaluation.evaluation import evaluate_score
from functools import partial
import pandas as pd
from metagpt.ext.sela.evaluation.evaluation import evaluate_score
from metagpt.ext.sela.experimenter.custom import CustomExperimenter
def custom_scorer(y_true, y_pred, metric_name):
return evaluate_score(y_pred, y_true, metric_name)
@ -19,9 +21,7 @@ class ASRunner:
def create_autosklearn_scorer(self, metric_name):
from autosklearn.metrics import make_scorer
return make_scorer(
name=metric_name, score_func=partial(custom_scorer, metric_name=metric_name)
)
return make_scorer(name=metric_name, score_func=partial(custom_scorer, metric_name=metric_name))
def run(self):
import autosklearn.classification

View file

@ -2,9 +2,9 @@ import os
import pandas as pd
from expo.evaluation.evaluation import evaluate_score
from expo.experimenter.experimenter import Experimenter
from expo.MCTS import create_initial_state
from metagpt.ext.sela.evaluation.evaluation import evaluate_score
from metagpt.ext.sela.experimenter.experimenter import Experimenter
from metagpt.ext.sela.search.tree_search import create_initial_state
class CustomExperimenter(Experimenter):

View file

@ -5,10 +5,10 @@ import os
import numpy as np
import pandas as pd
from expo.evaluation.evaluation import evaluate_score
from expo.MCTS import create_initial_state
from expo.research_assistant import ResearchAssistant
from expo.utils import DATA_CONFIG, save_notebook
from metagpt.ext.sela.evaluation.evaluation import evaluate_score
from metagpt.ext.sela.research_assistant import ResearchAssistant
from metagpt.ext.sela.search.tree_search import create_initial_state
from metagpt.ext.sela.utils import DATA_CONFIG, save_notebook
class Experimenter:

View file

@ -1,13 +1,12 @@
import shutil
from expo.evaluation.evaluation import (
from metagpt.ext.sela.evaluation.evaluation import (
node_evaluate_score_mlebench,
node_evaluate_score_sela,
)
from expo.evaluation.visualize_mcts import get_tree_text
from expo.experimenter.experimenter import Experimenter
from expo.Greedy import Greedy, Random
from expo.MCTS import MCTS
from metagpt.ext.sela.evaluation.visualize_mcts import get_tree_text
from metagpt.ext.sela.experimenter.experimenter import Experimenter
from metagpt.ext.sela.search.search_algorithm import MCTS, Greedy, Random
class MCTSExperimenter(Experimenter):

View file

@ -8,7 +8,7 @@ You will be given a machine learning task. You must solve the task by training a
BENCHMARK INSTRUCTIONS
------
First, please read the instructions below which are specific to our benchmark setup. Note, these instructions can be found again in `expo.experimenter.mle_bench.instructions.py`.
First, please read the instructions below which are specific to our benchmark setup. Note, these instructions can be found again in `experimenter/mle_bench/instructions.py`.
- **Competition-specific instructions**: The instructions for the competition are available in `{dataset_dir}/description.md`. Read this file to understand the problem setup for the competition you are currently solving.
- **Dataset**: The dataset files for the competition are available in `{dataset_dir}`.
@ -28,7 +28,7 @@ You will be given a machine learning task. You must solve the task by training a
BENCHMARK INSTRUCTIONS
------
First, please read the instructions below which are specific to our benchmark setup. Note, these instructions can be found again in `expo.experimenter.mle_bench.instructions.py`.
First, please read the instructions below which are specific to our benchmark setup. Note, these instructions can be found again in `experimenter/mle_bench/instructions.py`.
- **Task-specific instructions**: The instructions for the task are available in `{dataset_dir}/description.md`. Read this file to understand the problem setup for the task you are currently solving.
- **Dataset**: The dataset files for the task are available in `{dataset_dir}/`.

View file

@ -1,7 +1,7 @@
from expo.experimenter.experimenter import Experimenter
from expo.insights.instruction_generator import InstructionGenerator
from expo.research_assistant import ResearchAssistant
from expo.utils import get_exp_pool_path
from metagpt.ext.sela.experimenter.experimenter import Experimenter
from metagpt.ext.sela.insights.instruction_generator import InstructionGenerator
from metagpt.ext.sela.research_assistant import ResearchAssistant
from metagpt.ext.sela.utils import get_exp_pool_path
EXPS_PROMPT = """
When doing the tasks, you can refer to the insights below:
@ -10,27 +10,27 @@ When doing the tasks, you can refer to the insights below:
"""
class AugExperimenter(Experimenter):
result_path: str = "results/aug"
class RandomSearchExperimenter(Experimenter):
result_path: str = "results/random_search"
async def run_experiment(self):
# state = create_initial_state(self.args.task, start_task_id=1, data_config=self.data_config, low_is_better=self.args.low_is_better, name="")
user_requirement = self.state["requirement"]
exp_pool_path = get_exp_pool_path(self.args.task, self.data_config, pool_name="ds_analysis_pool")
exp_pool = InstructionGenerator.load_analysis_pool(
exp_pool = InstructionGenerator.load_insight_pool(
exp_pool_path, use_fixed_insights=self.args.use_fixed_insights
)
if self.args.aug_mode == "single":
if self.args.rs_mode == "single":
exps = InstructionGenerator._random_sample(exp_pool, self.args.num_experiments)
exps = [exp["Analysis"] for exp in exps]
elif self.args.aug_mode == "set":
elif self.args.rs_mode == "set":
exps = []
for i in range(self.args.num_experiments):
exp_set = InstructionGenerator.sample_instruction_set(exp_pool)
exp_set_text = "\n".join([f"{exp['task_id']}: {exp['Analysis']}" for exp in exp_set])
exps.append(exp_set_text)
else:
raise ValueError(f"Invalid mode: {self.args.aug_mode}")
raise ValueError(f"Invalid mode: {self.args.rs_mode}")
results = []
for i in range(self.args.num_experiments):
@ -45,7 +45,7 @@ class AugExperimenter(Experimenter):
{
"idx": i,
"score_dict": score_dict,
"aug_mode": self.args.aug_mode,
"rs_mode": self.args.rs_mode,
"insights": exps[i],
"user_requirement": requirement,
"args": vars(self.args),

View file

@ -3,8 +3,8 @@ import os
import random
from difflib import SequenceMatcher
from expo.insights.solution_designer import SolutionDesigner
from expo.utils import clean_json_from_rsp, load_data_config, mcts_logger
from metagpt.ext.sela.insights.solution_designer import SolutionDesigner
from metagpt.ext.sela.utils import clean_json_from_rsp, load_data_config, mcts_logger
from metagpt.llm import LLM
from metagpt.schema import Message

View file

@ -1,6 +1,6 @@
import json
from expo.utils import clean_json_from_rsp, load_data_config
from metagpt.ext.sela.utils import clean_json_from_rsp, load_data_config
from metagpt.llm import LLM
DATA_CONFIG = load_data_config()

View file

@ -6,9 +6,9 @@ import os
from pydantic import model_validator
from expo.utils import mcts_logger, save_notebook
from metagpt.actions.di.write_analysis_code import WriteAnalysisCode
from metagpt.const import SERDESER_PATH
from metagpt.ext.sela.utils import mcts_logger, save_notebook
from metagpt.roles.di.data_interpreter import DataInterpreter
from metagpt.schema import Message, Task, TaskResult
from metagpt.utils.common import CodeParser, write_json_file
@ -71,7 +71,7 @@ class ResearchAssistant(DataInterpreter):
return f"Node-{self.node_id}"
def get_next_instruction(self):
return self.planner.plan.tasks[self.start_task_id]
return self.planner.plan.tasks[self.start_task_id].instruction
def change_next_instruction(self, new_instruction):
if new_instruction is not None:

View file

@ -1,13 +1,13 @@
import argparse
import asyncio
from expo.data.custom_task import get_mle_is_lower_better, get_mle_task_id
from expo.experimenter.aug import AugExperimenter
from expo.experimenter.autogluon import GluonExperimenter
from expo.experimenter.autosklearn import AutoSklearnExperimenter
from expo.experimenter.custom import CustomExperimenter
from expo.experimenter.experimenter import Experimenter
from expo.experimenter.mcts import MCTSExperimenter
from metagpt.ext.sela.data.custom_task import get_mle_is_lower_better, get_mle_task_id
from metagpt.ext.sela.experimenter.autogluon import GluonExperimenter
from metagpt.ext.sela.experimenter.autosklearn import AutoSklearnExperimenter
from metagpt.ext.sela.experimenter.custom import CustomExperimenter
from metagpt.ext.sela.experimenter.experimenter import Experimenter
from metagpt.ext.sela.experimenter.mcts import MCTSExperimenter
from metagpt.ext.sela.experimenter.random_search import RandomSearchExperimenter
def get_args(cmd=True):
@ -17,12 +17,12 @@ def get_args(cmd=True):
"--exp_mode",
type=str,
default="mcts",
choices=["mcts", "aug", "base", "custom", "greedy", "autogluon", "random", "autosklearn"],
choices=["mcts", "rs", "base", "custom", "greedy", "autogluon", "random", "autosklearn"],
)
parser.add_argument("--role_timeout", type=int, default=1000)
get_di_args(parser)
get_mcts_args(parser)
get_aug_exp_args(parser)
get_rs_exp_args(parser)
if cmd:
args = parser.parse_args()
else:
@ -56,8 +56,8 @@ def get_mcts_args(parser):
parser.add_argument("--max_depth", type=int, default=4)
def get_aug_exp_args(parser):
parser.add_argument("--aug_mode", type=str, default="single", choices=["single", "set"])
def get_rs_exp_args(parser):
parser.add_argument("--rs_mode", type=str, default="single", choices=["single", "set"])
parser.add_argument("--is_multimodal", action="store_true", help="Specify if the model is multi-modal")
@ -79,8 +79,8 @@ async def main(args):
experimenter = MCTSExperimenter(args, tree_mode="greedy")
elif args.exp_mode == "random":
experimenter = MCTSExperimenter(args, tree_mode="random")
elif args.exp_mode == "aug":
experimenter = AugExperimenter(args)
elif args.exp_mode == "rs":
experimenter = RandomSearchExperimenter(args)
elif args.exp_mode == "base":
experimenter = Experimenter(args)
elif args.exp_mode == "autogluon":

View file

@ -1,9 +1,12 @@
import networkx as nx
from expo.evaluation.visualize_mcts import build_tree_recursive, visualize_tree
from expo.MCTS import MCTS, create_initial_state, initialize_di_root_node
from expo.run_experiment import get_args
from expo.utils import DATA_CONFIG
from metagpt.ext.sela.evaluation.visualize_mcts import (
build_tree_recursive,
visualize_tree,
)
from metagpt.ext.sela.MCTS import MCTS, create_initial_state, initialize_di_root_node
from metagpt.ext.sela.run_experiment import get_args
from metagpt.ext.sela.utils import DATA_CONFIG
if __name__ == "__main__":
args = get_args()

View file

@ -0,0 +1,32 @@
import numpy as np
from metagpt.ext.sela.search.tree_search import BaseTreeSearch, Node
class Greedy(BaseTreeSearch):
def best_child(self):
if len(self.children) == 0:
return self.root_node
all_children = [child for children in self.children.values() for child in children]
return max(all_children, key=lambda x: x.normalized_reward.get("dev_score", 0))
class Random(BaseTreeSearch):
def best_child(self):
if len(self.children) == 0:
return self.root_node
all_children = [child for children in self.children.values() for child in children]
return np.random.choice(all_children)
class MCTS(BaseTreeSearch):
def best_child(self):
def uct(node: Node):
n_visits = node.visited if node.visited else self.c_unvisited
avg_value = node.avg_value() if node.visited else node.value / self.c_unvisited
return avg_value + self.c_explore * np.sqrt(np.log(node.parent.visited) / n_visits)
if len(self.children) == 0:
return self.root_node
all_children = [child for children in self.children.values() for child in children]
return max(all_children, key=uct)

View file

@ -1,24 +1,51 @@
import json
import math
import os
import pickle
import random
import shutil
import numpy as np
import pandas as pd
from expo.data.custom_task import get_mle_bench_requirements, get_mle_task_id
from expo.data.dataset import generate_task_requirement, get_split_dataset_path
from expo.evaluation.evaluation import evaluate_score
from expo.insights.instruction_generator import InstructionGenerator
from expo.research_assistant import ResearchAssistant, TimeoutException
from expo.utils import get_exp_pool_path, load_execute_notebook, mcts_logger
from metagpt.ext.sela.data.custom_task import (
get_mle_bench_requirements,
get_mle_task_id,
)
from metagpt.ext.sela.data.dataset import (
generate_task_requirement,
get_split_dataset_path,
)
from metagpt.ext.sela.evaluation.evaluation import evaluate_score
from metagpt.ext.sela.insights.instruction_generator import InstructionGenerator
from metagpt.ext.sela.research_assistant import ResearchAssistant, TimeoutException
from metagpt.ext.sela.utils import get_exp_pool_path, load_execute_notebook, mcts_logger
from metagpt.tools.tool_recommend import ToolRecommender
from metagpt.utils.common import read_json_file
def initialize_di_root_node(state, reflection: bool = True):
def initialize_di_root_node(state: dict, reflection: bool = True):
"""
Initialize the root node of the decision tree.
Args:
state (dict): The initial state of the tree, containing:
- task (str): The task to be performed (e.g., "titanic").
- work_dir (str): The working directory.
- node_dir (str): The directory for the node.
- dataset_config (dict): The configuration of the dataset.
- datasets_dir (str): The directory of the datasets.
- exp_pool_path (str): The path to the experiment pool.
- requirement (str): The requirement for the task.
- has_run (bool): Whether the task has run.
- start_task_id (int): The ID of the starting task.
- low_is_better (bool): Whether a lower score is better.
- role_timeout (int): The timeout for the role.
- external_eval (bool): Whether to use external evaluation.
- custom_dataset_dir (str): The directory of the custom dataset.
reflection (bool, optional): Whether to use reflection. Defaults to True.
Returns:
tuple: A tuple containing the ResearchAssistant role and the root Node.
"""
role = ResearchAssistant(
node_id="0",
start_task_id=state["start_task_id"],
@ -29,7 +56,21 @@ def initialize_di_root_node(state, reflection: bool = True):
return role, Node(parent=None, state=state, action=None, value=0)
def create_initial_state(task, start_task_id, data_config, args):
def create_initial_state(task: str, start_task_id: int, data_config: dict, args):
"""
Create the initial state of the tree.
Args:
task (str): The task to be performed.
start_task_id (int): The ID of the starting task.
data_config (dict): The configuration of the data.
Expected keys: 'datasets', 'work_dir', 'role_dir'.
args (Namespace): The arguments passed to the program.
Expected attributes: 'external_eval', 'custom_dataset_dir', 'special_instruction', 'name', 'low_is_better', 'role_timeout'.
Returns:
dict: The initial state of the tree.
"""
external_eval = args.external_eval
if args.custom_dataset_dir:
@ -79,7 +120,9 @@ class Node:
normalized_reward: dict = {"train_score": 0, "dev_score": 0, "test_score": 0}
parent = None
def __init__(self, parent=None, state=None, action=None, value=0, max_depth=4, **kwargs):
def __init__(
self, parent=None, state: dict = None, action: str = None, value: float = 0, max_depth: int = 4, **kwargs
):
self.state = state
self.action = action
self.value = value
@ -225,7 +268,7 @@ class Node:
self.get_and_move_predictions("test")
return score_dict
async def run_node(self, role=None):
async def run_node(self, role: ResearchAssistant = None):
if self.is_terminal() and role is not None:
if role.state_saved:
return self.raw_reward
@ -272,7 +315,7 @@ class Node:
return score_dict, result_dict
class MCTS:
class BaseTreeSearch:
# data_path
root_node: Node = None
children: dict = {}
@ -283,7 +326,7 @@ class MCTS:
# insight generator
instruction_generator: InstructionGenerator = None
def __init__(self, root_node, max_depth, use_fixed_insights):
def __init__(self, root_node: Node, max_depth: int, use_fixed_insights: bool):
self.root_node = root_node
self.max_depth = max_depth
self.use_fixed_insights = use_fixed_insights
@ -294,15 +337,7 @@ class MCTS:
return node
def best_child(self):
def uct(node: Node):
n_visits = node.visited if node.visited else self.c_unvisited
avg_value = node.avg_value() if node.visited else node.value / self.c_unvisited
return avg_value + self.c_explore * math.sqrt(math.log(node.parent.visited) / n_visits)
if len(self.children) == 0:
return self.root_node
all_children = [child for children in self.children.values() for child in children]
return max(all_children, key=uct)
raise NotImplementedError
async def expand(self, node: Node, max_children=5):
await node.expand(max_children, self.instruction_generator)
@ -314,13 +349,13 @@ class MCTS:
"Returns the reward for a random simulation (to completion) of `node`"
mcts_logger.log("MCTS", f"Start simulating node {node.id}:")
while node.children:
node = random.choice(node.children)
node = np.random.choice(node.children)
reward, result_dict = await node.run_node(role)
mcts_logger.log("MCTS", f"Simulated node's reward: {reward}")
# TODO: add new insights
return reward
def backpropagate(self, node: Node, reward):
def backpropagate(self, node: Node, reward: dict):
child_node = node
node.update(reward)
node = node.parent
@ -333,7 +368,7 @@ class MCTS:
global_best_score = root.normalized_reward["test_score"]
dev_best_score = root.normalized_reward["dev_score"]
def bfs(node: Node, best_score, best_child: Node, split):
def bfs(node: Node, best_score: float, best_child: Node, split: str):
assert split in ["test_score", "dev_score"]
if node not in self.children:
return best_score, best_child
@ -354,7 +389,7 @@ class MCTS:
def get_num_simulations(self):
return self.root_node.visited
def save_node_order(self, node_id):
def save_node_order(self, node_id: str):
self.node_order.append(node_id)
with open(os.path.join(self.root_node.state["node_dir"], "node_order.json"), "w") as f:
json.dump(self.node_order, f)
@ -375,7 +410,7 @@ class MCTS:
scores["test_raw"].append(node.raw_reward["test_score"])
return scores
async def search(self, state, args):
async def search(self, state: dict, args):
reflection = args.reflection
load_tree = args.load_tree
rollouts = args.rollouts
@ -424,17 +459,17 @@ class MCTS:
self.save_node_order(node.id)
return self.best_path(root)
async def expand_and_simulate(self, node):
async def expand_and_simulate(self, node: Node):
# Expand and randomly select a child node, then simulate it
if node.visited > 0:
children = await self.expand(node)
node = random.choice(children)
node = np.random.choice(children)
reward = await self.simulate(node)
self.backpropagate(node, reward)
return node, reward
def load_tree(self):
def load_children_node(node):
def load_children_node(node: Node):
mcts_logger.log("MCTS", f"Load node {node.id}'s child: {node.children}")
if node.is_terminal() or not node.children:
return