Merge branch 'improve_mcts' into 'expo'

add fixed ss and special instructions

See merge request agents/exp_optimizer!15
This commit is contained in:
林义章 2024-09-14 13:44:28 +00:00
commit 3cba031c2c
12 changed files with 130 additions and 56 deletions

View file

@ -15,18 +15,17 @@ from metagpt.tools.tool_recommend import ToolRecommender
from metagpt.utils.common import read_json_file
def initialize_di_root_node(task, data_config, low_is_better=False, reflection=True, name=""):
start_task_id = 2
state = create_initial_state(
task, start_task_id=start_task_id, data_config=data_config, low_is_better=low_is_better, name=name
)
def initialize_di_root_node(state, reflection: bool = True):
# state = create_initial_state(
# task, start_task_id=start_task_id, data_config=data_config, low_is_better=low_is_better, name=name
# )
role = ResearchAssistant(
node_id="0", start_task_id=start_task_id, use_reflection=reflection, role_dir=state["node_dir"]
node_id="0", start_task_id=state["start_task_id"], use_reflection=reflection, role_dir=state["node_dir"]
)
return role, Node(parent=None, state=state, action=None, value=0)
def create_initial_state(task, start_task_id, data_config, low_is_better, name):
def create_initial_state(task, start_task_id, data_config, low_is_better: bool, name: str, special_instruction: str):
initial_state = {
"task": task,
"work_dir": data_config["work_dir"],
@ -34,11 +33,14 @@ def create_initial_state(task, start_task_id, data_config, low_is_better, name):
"dataset_config": data_config["datasets"][task],
"datasets_dir": get_split_dataset_path(task, data_config),
"exp_pool_path": get_exp_pool_path(task, data_config, pool_name="ds_analysis_pool"),
"requirement": generate_task_requirement(task, data_config),
"requirement": generate_task_requirement(
task, data_config, is_di=True, special_instruction=special_instruction
),
"has_run": False,
"start_task_id": start_task_id,
"low_is_better": low_is_better,
}
os.makedirs(initial_state["node_dir"], exist_ok=True)
return initial_state
@ -146,7 +148,7 @@ class Node:
role = role.model_copy()
role.save_state(static_save=True)
async def expand(self, max_children):
async def expand(self, max_children, use_fixed_insights):
if self.is_fully_expanded():
return
insight_geneartor = InstructionGenerator()
@ -157,6 +159,7 @@ class Node:
original_instruction=original_instruction,
max_num=max_children,
file_path=self.state["exp_pool_path"],
use_fixed_insights=use_fixed_insights,
)
new_state = self.state.copy()
new_state["start_task_id"] += 1
@ -205,6 +208,7 @@ class Node:
self.raw_reward = score_dict
run_finished = True
except Exception as e:
print(f"Error: {e}")
mcts_logger.log("MCTS", f"Error in running the role: {e}")
num_runs += 1
if not run_finished:
@ -234,9 +238,10 @@ class MCTS:
c_explore: float = 1.4
c_unvisited: float = 0.8
def __init__(self, root_node, max_depth):
def __init__(self, root_node, max_depth, use_fixed_insights):
self.root_node = root_node
self.max_depth = max_depth
self.use_fixed_insights = use_fixed_insights
def select(self, node: Node):
node = self.best_child()
@ -255,7 +260,7 @@ class MCTS:
return max(all_children, key=uct)
async def expand(self, node: Node, max_children=5):
await node.expand(max_children)
await node.expand(max_children, self.use_fixed_insights)
if node not in self.children or not self.children[node]:
self.children[node] = node.children
return node.children
@ -303,10 +308,8 @@ class MCTS:
def get_num_simulations(self):
return self.root_node.visited
async def search(self, task, data_config, name, rollouts, load_tree=False, low_is_better=False, reflection=False):
role, root = initialize_di_root_node(
task, data_config, low_is_better=low_is_better, reflection=reflection, name=name
)
async def search(self, state, rollouts, load_tree=False, reflection=False):
role, root = initialize_di_root_node(state, reflection=reflection)
self.root_node = root
tree_loaded = False
if load_tree:

View file

@ -223,16 +223,10 @@ ### Base DI
For setup, check 5.
- `python run_experiment.py --exp_mode base --task titanic --num_experiments 10`
- Ask DI to use AutoGluon: `--special_instruction ag`
- Ask DI to use the stacking ensemble method: `--special_instruction stacking`
### DI RandomSearch
For setup, check 5.
- Single insight
`python run_experiment.py --exp_mode aug --task titanic --aug_mode single`
- Set insight
`python run_experiment.py --exp_mode aug --task titanic --aug_mode set`
## 5. DI MCTS
@ -259,6 +253,20 @@ #### Run
- `python run_experiment.py --exp_mode mcts --task househouse_prices --rollout 10 --low_is_better`
In addition to the generated insights, include the fixed insights saved in `expo/insights/fixed_insights.json`
- `--use_fixed_insights`
#### Ablation Study
**DI RandomSearch**
- Single insight
`python run_experiment.py --exp_mode aug --task titanic --aug_mode single`
- Set insight
`python run_experiment.py --exp_mode aug --task titanic --aug_mode set`

View file

@ -10,26 +10,37 @@ from sklearn.model_selection import train_test_split
from expo.insights.solution_designer import SolutionDesigner
BASE_USER_REQUIREMENT = """\
BASE_USER_REQUIREMENT = """
This is a {datasetname} dataset. Your goal is to predict the target column `{target_col}`.
Perform data analysis, data preprocessing, feature engineering, and modeling to predict the target.
Report {metric} on the eval data. Do not plot or make any visualizations.
"""
USE_AG = """
7. Please use autogluon for model training with presets='medium_quality', time_limit=None, give dev dataset to tuning_data, and use right eval_metric.
"""
DI_INSTRUCTION = """\
**Attention**
STACKING = """
7. To avoid overfitting, train a weighted ensemble model such as StackingClassifier or StackingRegressor.
8. You could do some quick model prototyping to see which models work best and then use them in the ensemble.
"""
SPECIAL_INSTRUCTIONS = {"ag": USE_AG, "stacking": STACKING}
DI_INSTRUCTION = """
## Attention
1. Please do not leak the target label in any form during training.
2. Test set does not have the target column.
3. You should perform transformations on train, dev, and test sets at the same time (it's a good idea to define functions for this and avoid code repetition).
4. If labels are transformed during training, they should be transformed back to the original format before saving the predictions.
5. You could utilize dev set to validate and improve model training.
6. Use techniques to avoid overfitting.
3. When conducting data exploration or analysis, print out the results of your findings.
4. You should perform transformations on train, dev, and test sets at the same time (it's a good idea to define functions for this and avoid code repetition).
5. When scaling or transforming features, make sure the target column is not included.
6. You could utilize dev set to validate and improve model training. {special_instruction}
## Saving Dev and Test Predictions
1. Save the prediction results of BOTH the dev set and test set in `dev_predictions.csv` and `test_predictions.csv` respectively in the output directory.
- Both files should contain a single column named `target` with the predicted values.
2. Make sure the prediction results are in the same format as the target column in the training set.
- For instance, if the target column is categorical, the prediction results should be categorical as well.
## Output Performance
Print the train and dev set performance in the last step.
@ -38,7 +49,7 @@ Print the train and dev set performance in the last step.
{output_dir}
"""
TASK_PROMPT = """\
TASK_PROMPT = """
# User requirement
{user_requirement}
{additional_instruction}
@ -134,12 +145,18 @@ def create_dataset_dict(dataset):
return dataset_dict
def generate_di_instruction(output_dir):
additional_instruction = DI_INSTRUCTION.format(output_dir=output_dir)
def generate_di_instruction(output_dir, special_instruction):
if special_instruction:
special_instruction_prompt = SPECIAL_INSTRUCTIONS[special_instruction]
else:
special_instruction_prompt = ""
additional_instruction = DI_INSTRUCTION.format(
output_dir=output_dir, special_instruction=special_instruction_prompt
)
return additional_instruction
def generate_task_requirement(task_name, data_config, is_di=True):
def generate_task_requirement(task_name, data_config, is_di=True, special_instruction=None):
user_requirement = get_user_requirement(task_name, data_config)
split_dataset_path = get_split_dataset_path(task_name, data_config)
train_path = split_dataset_path["train"]
@ -150,7 +167,7 @@ def generate_task_requirement(task_name, data_config, is_di=True):
datasets_dir = data_config["datasets_dir"]
data_info_path = f"{datasets_dir}/{task_name}/dataset_info.json"
if is_di:
additional_instruction = generate_di_instruction(output_dir)
additional_instruction = generate_di_instruction(output_dir, special_instruction)
else:
additional_instruction = ""
user_requirement = TASK_PROMPT.format(

View file

@ -17,7 +17,9 @@ class AugExperimenter(Experimenter):
# state = create_initial_state(self.args.task, start_task_id=1, data_config=self.data_config, low_is_better=self.args.low_is_better, name="")
user_requirement = self.state["requirement"]
exp_pool_path = get_exp_pool_path(self.args.task, self.data_config, pool_name="ds_analysis_pool")
exp_pool = InstructionGenerator.load_analysis_pool(exp_pool_path)
exp_pool = InstructionGenerator.load_analysis_pool(
exp_pool_path, use_fixed_insights=self.args.use_fixed_insights
)
if self.args.aug_mode == "single":
exps = InstructionGenerator._random_sample(exp_pool, self.args.num_experiments)
exps = [exp["Analysis"] for exp in exps]

View file

@ -18,7 +18,12 @@ class CustomExperimenter(Experimenter):
self.name = kwargs.get("name", "")
self.result_path = f"results/custom_{self.name}"
self.state = create_initial_state(
self.task, start_task_id=1, data_config=self.data_config, low_is_better=self.low_is_better, name=self.name
self.task,
start_task_id=1,
data_config=self.data_config,
low_is_better=self.low_is_better,
name=self.name,
special_instruction=self.args.special_instruction,
)
def run_experiment(self):

View file

@ -13,6 +13,7 @@ from expo.utils import DATA_CONFIG, save_notebook
class Experimenter:
result_path: str = "results/base"
data_config = DATA_CONFIG
start_task_id = 1
def __init__(self, args, **kwargs):
self.args = args
@ -20,10 +21,11 @@ class Experimenter:
self.start_time = self.start_time_raw.strftime("%Y%m%d%H%M")
self.state = create_initial_state(
self.args.task,
start_task_id=1,
start_task_id=self.start_task_id,
data_config=self.data_config,
low_is_better=self.args.low_is_better,
name="",
name=self.args.name,
special_instruction=self.args.special_instruction,
)
async def run_di(self, di, user_requirement, run_idx):
@ -86,7 +88,7 @@ class Experimenter:
pred_node_path = os.path.join(state["node_dir"], f"{self.start_time}-{split}_predictions.csv")
gt_path = os.path.join(state["datasets_dir"][f"{split}_target"])
preds = pd.read_csv(pred_path)
preds = preds[preds.columns.tolist()[0]]
preds = preds[preds.columns.tolist()[-1]]
preds.to_csv(pred_node_path, index=False)
gt = pd.read_csv(gt_path)["target"]
metric = state["dataset_config"]["metric"]

View file

@ -6,6 +6,7 @@ from expo.MCTS import MCTS
class MCTSExperimenter(Experimenter):
result_path: str = "results/mcts"
start_task_id = 2
def __init__(self, args, tree_mode=None, **kwargs):
super().__init__(args, **kwargs)
@ -13,19 +14,16 @@ class MCTSExperimenter(Experimenter):
async def run_experiment(self):
if self.tree_mode == "greedy":
mcts = Greedy(root_node=None, max_depth=5)
mcts = Greedy(root_node=None, max_depth=5, use_fixed_insights=self.args.use_fixed_insights)
elif self.tree_mode == "random":
mcts = Random(root_node=None, max_depth=5)
mcts = Random(root_node=None, max_depth=5, use_fixed_insights=self.args.use_fixed_insights)
else:
mcts = MCTS(root_node=None, max_depth=5)
mcts = MCTS(root_node=None, max_depth=5, use_fixed_insights=self.args.use_fixed_insights)
best_nodes = await mcts.search(
self.args.task,
self.data_config,
low_is_better=self.args.low_is_better,
load_tree=self.args.load_tree,
state=self.state,
reflection=self.args.reflection,
rollouts=self.args.rollouts,
name=self.args.name,
load_tree=self.args.load_tree,
)
best_node = best_nodes["global_best"]
dev_best_node = best_nodes["dev_best"]

View file

@ -0,0 +1,22 @@
[
{
"Analysis": "Use early stopping, hyperparameter tuning, and cross-validation to avoid overfitting and improve robustness of the model.",
"Category": "Model Training",
"task_id": 4
},
{
"Analysis": "use k-fold bagging and early stopping",
"Category": "Model Training",
"task_id": 4
},
{
"Analysis": "To avoid overfitting, train a weighted ensemble model such as StackingClassifier or StackingRegressor; You could do some quick model prototyping to see which models work best and then use them in the ensemble.",
"Category": "Model Training",
"task_id": 4
},
{
"Analysis": "Please use autogluon for model training with presets='medium_quality', time_limit=None, give dev dataset to tuning_data, and use right eval_metric.",
"Category": "Model Training",
"task_id": 4
}
]

View file

@ -1,4 +1,5 @@
import json
import os
import random
from expo.utils import clean_json_from_rsp, load_data_config, mcts_logger
@ -68,8 +69,12 @@ class InstructionGenerator:
return new_data
@staticmethod
def load_analysis_pool(file_path, task_id=None):
def load_analysis_pool(file_path, use_fixed_insights, task_id=None):
data = InstructionGenerator.load_json_data(file_path)
if use_fixed_insights:
current_directory = os.path.dirname(__file__)
fixed_insights = InstructionGenerator.load_json_data(f"{current_directory}/fixed_insights.json")
data.extend(fixed_insights)
for item in data:
if "task_id" not in item:
raise ValueError("task_id is not found in the analysis pool")
@ -79,8 +84,12 @@ class InstructionGenerator:
return data
@staticmethod
async def generate_new_instructions(task_id, original_instruction, max_num, file_path):
data = InstructionGenerator.load_analysis_pool(file_path, task_id)
async def generate_new_instructions(
task_id, original_instruction, max_num, file_path, ext_info=None, use_fixed_insights=False
):
data = InstructionGenerator.load_analysis_pool(
file_path, task_id=task_id, use_fixed_insights=use_fixed_insights
)
new_instructions = []
if len(data) == 0:
mcts_logger.log("MCTS", f"No insights available for task {task_id}")
@ -91,12 +100,14 @@ class InstructionGenerator:
else:
item = data[i]
insights = item["Analysis"]
new_instruction = await InstructionGenerator.generate_new_instruction(original_instruction, insights)
new_instruction = await InstructionGenerator.generate_new_instruction(
original_instruction, insights, ext_info
)
new_instructions.append(new_instruction)
return new_instructions
@staticmethod
async def generate_new_instruction(original_instruction, insights):
async def generate_new_instruction(original_instruction, insights, ext_info):
prompt = CHANGE_INSTRUCTION.format(instruction=original_instruction, insights=insights)
llm = LLM()
context = llm.format_msg([Message(content=prompt, role="user")])

View file

@ -3,3 +3,4 @@ openml==0.14.2
# ml module to run in DI
xgboost
catboost
lightgbm

View file

@ -28,11 +28,11 @@ def get_mcts_args(parser):
parser.add_argument("--no_load_tree", dest="load_tree", action="store_false")
parser.set_defaults(load_tree=False)
parser.add_argument("--rollouts", type=int, default=5)
parser.add_argument("--use_fixed_insights", dest="use_fixed_insights", action="store_true")
def get_aug_exp_args(parser):
parser.add_argument("--aug_mode", type=str, default="single", choices=["single", "set"])
parser.add_argument("--num_experiments", type=int, default=1)
def get_di_args(parser):
@ -41,6 +41,8 @@ def get_di_args(parser):
parser.set_defaults(low_is_better=False)
parser.add_argument("--reflection", dest="reflection", action="store_true")
parser.add_argument("--no_reflection", dest="reflection", action="store_false")
parser.add_argument("--num_experiments", type=int, default=1)
parser.add_argument("--special_instruction", type=str, default=None, choices=["ag", "stacking"])
parser.set_defaults(reflection=True)

View file

@ -11,7 +11,7 @@ The current task is about data preprocessing, please note the following:
- Monitor data types per column, applying appropriate methods.
- Ensure operations are on existing dataset columns.
- Avoid writing processed data to files.
- Avoid any change to label column, such as standardization, etc.
- **ATTENTION** Do NOT make any changes to the label column, such as standardization, etc.
- Prefer alternatives to one-hot encoding for categorical data.
- Only encode or scale necessary columns to allow for potential feature-specific engineering tasks (like time_extract, binning, extraction, etc.) later.
- Each step do data preprocessing to train, must do same for test separately at the same time.
@ -26,7 +26,7 @@ The current task is about feature engineering. when performing it, please adhere
- Avoid creating redundant or excessively numerous features in one step.
- Exclude ID columns from feature generation and remove them.
- Each feature engineering operation performed on the train set must also applies to the dev/test separately at the same time.
- Avoid using the label column to create features, except for cat encoding.
- **ATTENTION** Do NOT use the label column to create features, except for cat encoding.
- Use the data from previous task result if exist, do not mock or reload data yourself.
- Always copy the DataFrame before processing it and use the copy to process.
"""
@ -34,6 +34,9 @@ The current task is about feature engineering. when performing it, please adhere
# Prompt for taking on "model_train" tasks
MODEL_TRAIN_PROMPT = """
The current task is about training a model, please ensure high performance:
- For tabular datasets - you have access to LightGBM, CatBoost, XGBoost, random forest, extremely randomized trees, k-nearest neighbors, linear regression, etc.
- For image datasets - you have access to ResNet, VGG, Inception, MobileNet, DenseNet, EfficientNet, etc.
- For text datasets - you have access to BERT, GPT-2, RoBERTa, DistilBERT, T5, etc.
- Keep in mind that your user prioritizes results and is highly focused on model performance. So, when needed, feel free to use models of any complexity to improve effectiveness, such as XGBoost, CatBoost, etc.
- If non-numeric columns exist, perform label encode together with all steps.
- Use the data from previous task result directly, do not mock or reload data yourself.