Merge branch 'improve_mcts' into 'expo'

add fixed ss and special instructions See merge request agents/exp_optimizer!15
2026-05-01 20:03:28 +02:00 · 2024-09-14 13:44:28 +00:00 · 2024-09-14 13:44:28 +00:00 · 3cba031c2c
commit 3cba031c2c
parent c64153f7bb 3c50575ff7
12 changed files with 130 additions and 56 deletions
--- a/expo/MCTS.py
+++ b/expo/MCTS.py
@ -15,18 +15,17 @@ from metagpt.tools.tool_recommend import ToolRecommender
 from metagpt.utils.common import read_json_file


-def initialize_di_root_node(task, data_config, low_is_better=False, reflection=True, name=""):
-    start_task_id = 2
-    state = create_initial_state(
-        task, start_task_id=start_task_id, data_config=data_config, low_is_better=low_is_better, name=name
-    )
+def initialize_di_root_node(state, reflection: bool = True):
+    # state = create_initial_state(
+    #     task, start_task_id=start_task_id, data_config=data_config, low_is_better=low_is_better, name=name
+    # )
    role = ResearchAssistant(
-        node_id="0", start_task_id=start_task_id, use_reflection=reflection, role_dir=state["node_dir"]
+        node_id="0", start_task_id=state["start_task_id"], use_reflection=reflection, role_dir=state["node_dir"]
    )
    return role, Node(parent=None, state=state, action=None, value=0)


-def create_initial_state(task, start_task_id, data_config, low_is_better, name):
+def create_initial_state(task, start_task_id, data_config, low_is_better: bool, name: str, special_instruction: str):
    initial_state = {
        "task": task,
        "work_dir": data_config["work_dir"],
@ -34,11 +33,14 @@ def create_initial_state(task, start_task_id, data_config, low_is_better, name):
        "dataset_config": data_config["datasets"][task],
        "datasets_dir": get_split_dataset_path(task, data_config),
        "exp_pool_path": get_exp_pool_path(task, data_config, pool_name="ds_analysis_pool"),
-        "requirement": generate_task_requirement(task, data_config),
+        "requirement": generate_task_requirement(
+            task, data_config, is_di=True, special_instruction=special_instruction
+        ),
        "has_run": False,
        "start_task_id": start_task_id,
        "low_is_better": low_is_better,
    }
+    os.makedirs(initial_state["node_dir"], exist_ok=True)
    return initial_state


@ -146,7 +148,7 @@ class Node:
        role = role.model_copy()
        role.save_state(static_save=True)

-    async def expand(self, max_children):
+    async def expand(self, max_children, use_fixed_insights):
        if self.is_fully_expanded():
            return
        insight_geneartor = InstructionGenerator()
@ -157,6 +159,7 @@ class Node:
            original_instruction=original_instruction,
            max_num=max_children,
            file_path=self.state["exp_pool_path"],
+            use_fixed_insights=use_fixed_insights,
        )
        new_state = self.state.copy()
        new_state["start_task_id"] += 1
@ -205,6 +208,7 @@ class Node:
                self.raw_reward = score_dict
                run_finished = True
            except Exception as e:
+                print(f"Error: {e}")
                mcts_logger.log("MCTS", f"Error in running the role: {e}")
                num_runs += 1
        if not run_finished:
@ -234,9 +238,10 @@ class MCTS:
    c_explore: float = 1.4
    c_unvisited: float = 0.8

-    def __init__(self, root_node, max_depth):
+    def __init__(self, root_node, max_depth, use_fixed_insights):
        self.root_node = root_node
        self.max_depth = max_depth
+        self.use_fixed_insights = use_fixed_insights

    def select(self, node: Node):
        node = self.best_child()
@ -255,7 +260,7 @@ class MCTS:
        return max(all_children, key=uct)

    async def expand(self, node: Node, max_children=5):
-        await node.expand(max_children)
+        await node.expand(max_children, self.use_fixed_insights)
        if node not in self.children or not self.children[node]:
            self.children[node] = node.children
        return node.children
@ -303,10 +308,8 @@ class MCTS:
    def get_num_simulations(self):
        return self.root_node.visited

-    async def search(self, task, data_config, name, rollouts, load_tree=False, low_is_better=False, reflection=False):
-        role, root = initialize_di_root_node(
-            task, data_config, low_is_better=low_is_better, reflection=reflection, name=name
-        )
+    async def search(self, state, rollouts, load_tree=False, reflection=False):
+        role, root = initialize_di_root_node(state, reflection=reflection)
        self.root_node = root
        tree_loaded = False
        if load_tree:
--- a/expo/README.md
+++ b/expo/README.md
@ -223,16 +223,10 @@ ### Base DI
 For setup, check 5.

 - `python run_experiment.py --exp_mode base --task titanic --num_experiments 10`
+- Ask DI to use AutoGluon: `--special_instruction ag`
+- Ask DI to use the stacking ensemble method: `--special_instruction stacking`


-### DI RandomSearch
-For setup, check 5.
-
- Single insight
-`python run_experiment.py --exp_mode aug --task titanic --aug_mode single`
-
- Set insight
-`python run_experiment.py --exp_mode aug --task titanic --aug_mode set`


 ## 5. DI MCTS
@ -259,6 +253,20 @@ #### Run
 - `python run_experiment.py --exp_mode mcts --task househouse_prices --rollout 10 --low_is_better`


+In addition to the generated insights, include the fixed insights saved in `expo/insights/fixed_insights.json`
+- `--use_fixed_insights`
+  
+
+
+#### Ablation Study
+
+**DI RandomSearch**
+
+- Single insight
+`python run_experiment.py --exp_mode aug --task titanic --aug_mode single`
+
+- Set insight
+`python run_experiment.py --exp_mode aug --task titanic --aug_mode set`



--- a/expo/data/dataset.py
+++ b/expo/data/dataset.py
@ -10,26 +10,37 @@ from sklearn.model_selection import train_test_split

 from expo.insights.solution_designer import SolutionDesigner

-BASE_USER_REQUIREMENT = """\
+BASE_USER_REQUIREMENT = """
 This is a {datasetname} dataset. Your goal is to predict the target column `{target_col}`.
 Perform data analysis, data preprocessing, feature engineering, and modeling to predict the target. 
 Report {metric} on the eval data. Do not plot or make any visualizations.
 """

+USE_AG = """
+7. Please use autogluon for model training with presets='medium_quality', time_limit=None, give dev dataset to tuning_data, and use right eval_metric.
+"""

-DI_INSTRUCTION = """\
-**Attention** 
+STACKING = """
+7. To avoid overfitting, train a weighted ensemble model such as StackingClassifier or StackingRegressor.
+8. You could do some quick model prototyping to see which models work best and then use them in the ensemble. 
+"""
+
+SPECIAL_INSTRUCTIONS = {"ag": USE_AG, "stacking": STACKING}
+
+DI_INSTRUCTION = """
+## Attention
 1. Please do not leak the target label in any form during training.
 2. Test set does not have the target column.
-3. You should perform transformations on train, dev, and test sets at the same time (it's a good idea to define functions for this and avoid code repetition).
-4. If labels are transformed during training, they should be transformed back to the original format before saving the predictions.
-5. You could utilize dev set to validate and improve model training.
-6. Use techniques to avoid overfitting.
+3. When conducting data exploration or analysis, print out the results of your findings.
+4. You should perform transformations on train, dev, and test sets at the same time (it's a good idea to define functions for this and avoid code repetition).
+5. When scaling or transforming features, make sure the target column is not included.
+6. You could utilize dev set to validate and improve model training. {special_instruction}

 ## Saving Dev and Test Predictions
 1. Save the prediction results of BOTH the dev set and test set in `dev_predictions.csv` and `test_predictions.csv` respectively in the output directory. 
 - Both files should contain a single column named `target` with the predicted values.
 2. Make sure the prediction results are in the same format as the target column in the training set. 
+- For instance, if the target column is categorical, the prediction results should be categorical as well.

 ## Output Performance
 Print the train and dev set performance in the last step.
@ -38,7 +49,7 @@ Print the train and dev set performance in the last step.
 {output_dir}
 """

-TASK_PROMPT = """\
+TASK_PROMPT = """
 # User requirement
 {user_requirement}
 {additional_instruction}
@ -134,12 +145,18 @@ def create_dataset_dict(dataset):
    return dataset_dict


-def generate_di_instruction(output_dir):
-    additional_instruction = DI_INSTRUCTION.format(output_dir=output_dir)
+def generate_di_instruction(output_dir, special_instruction):
+    if special_instruction:
+        special_instruction_prompt = SPECIAL_INSTRUCTIONS[special_instruction]
+    else:
+        special_instruction_prompt = ""
+    additional_instruction = DI_INSTRUCTION.format(
+        output_dir=output_dir, special_instruction=special_instruction_prompt
+    )
    return additional_instruction


-def generate_task_requirement(task_name, data_config, is_di=True):
+def generate_task_requirement(task_name, data_config, is_di=True, special_instruction=None):
    user_requirement = get_user_requirement(task_name, data_config)
    split_dataset_path = get_split_dataset_path(task_name, data_config)
    train_path = split_dataset_path["train"]
@ -150,7 +167,7 @@ def generate_task_requirement(task_name, data_config, is_di=True):
    datasets_dir = data_config["datasets_dir"]
    data_info_path = f"{datasets_dir}/{task_name}/dataset_info.json"
    if is_di:
-        additional_instruction = generate_di_instruction(output_dir)
+        additional_instruction = generate_di_instruction(output_dir, special_instruction)
    else:
        additional_instruction = ""
    user_requirement = TASK_PROMPT.format(
--- a/expo/experimenter/aug.py
+++ b/expo/experimenter/aug.py
@ -17,7 +17,9 @@ class AugExperimenter(Experimenter):
        # state = create_initial_state(self.args.task, start_task_id=1, data_config=self.data_config, low_is_better=self.args.low_is_better, name="")
        user_requirement = self.state["requirement"]
        exp_pool_path = get_exp_pool_path(self.args.task, self.data_config, pool_name="ds_analysis_pool")
-        exp_pool = InstructionGenerator.load_analysis_pool(exp_pool_path)
+        exp_pool = InstructionGenerator.load_analysis_pool(
+            exp_pool_path, use_fixed_insights=self.args.use_fixed_insights
+        )
        if self.args.aug_mode == "single":
            exps = InstructionGenerator._random_sample(exp_pool, self.args.num_experiments)
            exps = [exp["Analysis"] for exp in exps]
--- a/expo/experimenter/custom.py
+++ b/expo/experimenter/custom.py
@ -18,7 +18,12 @@ class CustomExperimenter(Experimenter):
        self.name = kwargs.get("name", "")
        self.result_path = f"results/custom_{self.name}"
        self.state = create_initial_state(
-            self.task, start_task_id=1, data_config=self.data_config, low_is_better=self.low_is_better, name=self.name
+            self.task,
+            start_task_id=1,
+            data_config=self.data_config,
+            low_is_better=self.low_is_better,
+            name=self.name,
+            special_instruction=self.args.special_instruction,
        )

    def run_experiment(self):
--- a/expo/experimenter/experimenter.py
+++ b/expo/experimenter/experimenter.py
@ -13,6 +13,7 @@ from expo.utils import DATA_CONFIG, save_notebook
 class Experimenter:
    result_path: str = "results/base"
    data_config = DATA_CONFIG
+    start_task_id = 1

    def __init__(self, args, **kwargs):
        self.args = args
@ -20,10 +21,11 @@ class Experimenter:
        self.start_time = self.start_time_raw.strftime("%Y%m%d%H%M")
        self.state = create_initial_state(
            self.args.task,
-            start_task_id=1,
+            start_task_id=self.start_task_id,
            data_config=self.data_config,
            low_is_better=self.args.low_is_better,
-            name="",
+            name=self.args.name,
+            special_instruction=self.args.special_instruction,
        )

    async def run_di(self, di, user_requirement, run_idx):
@ -86,7 +88,7 @@ class Experimenter:
        pred_node_path = os.path.join(state["node_dir"], f"{self.start_time}-{split}_predictions.csv")
        gt_path = os.path.join(state["datasets_dir"][f"{split}_target"])
        preds = pd.read_csv(pred_path)
-        preds = preds[preds.columns.tolist()[0]]
+        preds = preds[preds.columns.tolist()[-1]]
        preds.to_csv(pred_node_path, index=False)
        gt = pd.read_csv(gt_path)["target"]
        metric = state["dataset_config"]["metric"]
--- a/expo/experimenter/mcts.py
+++ b/expo/experimenter/mcts.py
@ -6,6 +6,7 @@ from expo.MCTS import MCTS

 class MCTSExperimenter(Experimenter):
    result_path: str = "results/mcts"
+    start_task_id = 2

    def __init__(self, args, tree_mode=None, **kwargs):
        super().__init__(args, **kwargs)
@ -13,19 +14,16 @@ class MCTSExperimenter(Experimenter):

    async def run_experiment(self):
        if self.tree_mode == "greedy":
-            mcts = Greedy(root_node=None, max_depth=5)
+            mcts = Greedy(root_node=None, max_depth=5, use_fixed_insights=self.args.use_fixed_insights)
        elif self.tree_mode == "random":
-            mcts = Random(root_node=None, max_depth=5)
+            mcts = Random(root_node=None, max_depth=5, use_fixed_insights=self.args.use_fixed_insights)
        else:
-            mcts = MCTS(root_node=None, max_depth=5)
+            mcts = MCTS(root_node=None, max_depth=5, use_fixed_insights=self.args.use_fixed_insights)
        best_nodes = await mcts.search(
-            self.args.task,
-            self.data_config,
-            low_is_better=self.args.low_is_better,
-            load_tree=self.args.load_tree,
+            state=self.state,
            reflection=self.args.reflection,
            rollouts=self.args.rollouts,
-            name=self.args.name,
+            load_tree=self.args.load_tree,
        )
        best_node = best_nodes["global_best"]
        dev_best_node = best_nodes["dev_best"]
--- a/expo/insights/fixed_insights.json
+++ b/expo/insights/fixed_insights.json
@ -0,0 +1,22 @@
+[
+{
+    "Analysis": "Use early stopping, hyperparameter tuning, and cross-validation to avoid overfitting and improve robustness of the model.",
+    "Category": "Model Training",
+    "task_id": 4
+},
+{
+    "Analysis": "use k-fold bagging and early stopping",
+    "Category": "Model Training",
+    "task_id": 4
+},
+{
+    "Analysis": "To avoid overfitting, train a weighted ensemble model such as StackingClassifier or StackingRegressor; You could do some quick model prototyping to see which models work best and then use them in the ensemble.",
+    "Category": "Model Training",
+    "task_id": 4
+},
+{
+    "Analysis": "Please use autogluon for model training with presets='medium_quality', time_limit=None, give dev dataset to tuning_data, and use right eval_metric.",
+    "Category": "Model Training",
+    "task_id": 4
+}
+]
--- a/expo/insights/instruction_generator.py
+++ b/expo/insights/instruction_generator.py
@ -1,4 +1,5 @@
 import json
+import os
 import random

 from expo.utils import clean_json_from_rsp, load_data_config, mcts_logger
@ -68,8 +69,12 @@ class InstructionGenerator:
        return new_data

    @staticmethod
-    def load_analysis_pool(file_path, task_id=None):
+    def load_analysis_pool(file_path, use_fixed_insights, task_id=None):
        data = InstructionGenerator.load_json_data(file_path)
+        if use_fixed_insights:
+            current_directory = os.path.dirname(__file__)
+            fixed_insights = InstructionGenerator.load_json_data(f"{current_directory}/fixed_insights.json")
+            data.extend(fixed_insights)
        for item in data:
            if "task_id" not in item:
                raise ValueError("task_id is not found in the analysis pool")
@ -79,8 +84,12 @@ class InstructionGenerator:
        return data

    @staticmethod
-    async def generate_new_instructions(task_id, original_instruction, max_num, file_path):
-        data = InstructionGenerator.load_analysis_pool(file_path, task_id)
+    async def generate_new_instructions(
+        task_id, original_instruction, max_num, file_path, ext_info=None, use_fixed_insights=False
+    ):
+        data = InstructionGenerator.load_analysis_pool(
+            file_path, task_id=task_id, use_fixed_insights=use_fixed_insights
+        )
        new_instructions = []
        if len(data) == 0:
            mcts_logger.log("MCTS", f"No insights available for task {task_id}")
@ -91,12 +100,14 @@ class InstructionGenerator:
            else:
                item = data[i]
                insights = item["Analysis"]
-            new_instruction = await InstructionGenerator.generate_new_instruction(original_instruction, insights)
+            new_instruction = await InstructionGenerator.generate_new_instruction(
+                original_instruction, insights, ext_info
+            )
            new_instructions.append(new_instruction)
        return new_instructions

    @staticmethod
-    async def generate_new_instruction(original_instruction, insights):
+    async def generate_new_instruction(original_instruction, insights, ext_info):
        prompt = CHANGE_INSTRUCTION.format(instruction=original_instruction, insights=insights)
        llm = LLM()
        context = llm.format_msg([Message(content=prompt, role="user")])
--- a/expo/requirements.txt
+++ b/expo/requirements.txt
@ -3,3 +3,4 @@ openml==0.14.2
 # ml module to run in DI
 xgboost
 catboost
+lightgbm
--- a/expo/run_experiment.py
+++ b/expo/run_experiment.py
@ -28,11 +28,11 @@ def get_mcts_args(parser):
    parser.add_argument("--no_load_tree", dest="load_tree", action="store_false")
    parser.set_defaults(load_tree=False)
    parser.add_argument("--rollouts", type=int, default=5)
+    parser.add_argument("--use_fixed_insights", dest="use_fixed_insights", action="store_true")


 def get_aug_exp_args(parser):
    parser.add_argument("--aug_mode", type=str, default="single", choices=["single", "set"])
-    parser.add_argument("--num_experiments", type=int, default=1)


 def get_di_args(parser):
@ -41,6 +41,8 @@ def get_di_args(parser):
    parser.set_defaults(low_is_better=False)
    parser.add_argument("--reflection", dest="reflection", action="store_true")
    parser.add_argument("--no_reflection", dest="reflection", action="store_false")
+    parser.add_argument("--num_experiments", type=int, default=1)
+    parser.add_argument("--special_instruction", type=str, default=None, choices=["ag", "stacking"])
    parser.set_defaults(reflection=True)


--- a/metagpt/prompts/task_type.py
+++ b/metagpt/prompts/task_type.py
@ -11,7 +11,7 @@ The current task is about data preprocessing, please note the following:
 - Monitor data types per column, applying appropriate methods.
 - Ensure operations are on existing dataset columns.
 - Avoid writing processed data to files.
- Avoid any change to label column, such as standardization, etc.
+- **ATTENTION** Do NOT make any changes to the label column, such as standardization, etc.
 - Prefer alternatives to one-hot encoding for categorical data.
 - Only encode or scale necessary columns to allow for potential feature-specific engineering tasks (like time_extract, binning, extraction, etc.) later.
 - Each step do data preprocessing to train, must do same for test separately at the same time.
@ -26,7 +26,7 @@ The current task is about feature engineering. when performing it, please adhere
 - Avoid creating redundant or excessively numerous features in one step.
 - Exclude ID columns from feature generation and remove them.
 - Each feature engineering operation performed on the train set must also applies to the dev/test separately at the same time.
- Avoid using the label column to create features, except for cat encoding.
+- **ATTENTION** Do NOT use the label column to create features, except for cat encoding.
 - Use the data from previous task result if exist, do not mock or reload data yourself.
 - Always copy the DataFrame before processing it and use the copy to process.
 """
@ -34,6 +34,9 @@ The current task is about feature engineering. when performing it, please adhere
 # Prompt for taking on "model_train" tasks
 MODEL_TRAIN_PROMPT = """
 The current task is about training a model, please ensure high performance:
+- For tabular datasets - you have access to LightGBM, CatBoost, XGBoost, random forest, extremely randomized trees, k-nearest neighbors, linear regression, etc.
+- For image datasets - you have access to ResNet, VGG, Inception, MobileNet, DenseNet, EfficientNet, etc.
+- For text datasets - you have access to BERT, GPT-2, RoBERTa, DistilBERT, T5, etc.
 - Keep in mind that your user prioritizes results and is highly focused on model performance. So, when needed, feel free to use models of any complexity to improve effectiveness, such as XGBoost, CatBoost, etc.
 - If non-numeric columns exist, perform label encode together with all steps.
 - Use the data from previous task result directly, do not mock or reload data yourself.