Merge branch 'improve_mcts' into 'expo'

add fixed ss and special instructions See merge request agents/exp_optimizer!15
2026-06-14 15:25:17 +02:00 · 2024-09-14 13:44:28 +00:00 · 2024-09-14 13:44:28 +00:00 · 3cba031c2c
commit 3cba031c2c
parent c64153f7bb 3c50575ff7
12 changed files with 130 additions and 56 deletions
--- a/expo/MCTS.py
+++ b/expo/MCTS.py
@ -15,18 +15,17 @@ from metagpt.tools.tool_recommend import ToolRecommender
 from metagpt.utils.common import read_json_file


-def initialize_di_root_node(task, data_config, low_is_better=False, reflection=True, name=""):
-    start_task_id = 2
-    state = create_initial_state(
-        task, start_task_id=start_task_id, data_config=data_config, low_is_better=low_is_better, name=name
-    )
+def initialize_di_root_node(state, reflection: bool = True):
+    # state = create_initial_state(
+    #     task, start_task_id=start_task_id, data_config=data_config, low_is_better=low_is_better, name=name
+    # )
    role = ResearchAssistant(
-        node_id="0", start_task_id=start_task_id, use_reflection=reflection, role_dir=state["node_dir"]
+        node_id="0", start_task_id=state["start_task_id"], use_reflection=reflection, role_dir=state["node_dir"]
    )
    return role, Node(parent=None, state=state, action=None, value=0)


-def create_initial_state(task, start_task_id, data_config, low_is_better, name):
+def create_initial_state(task, start_task_id, data_config, low_is_better: bool, name: str, special_instruction: str):
    initial_state = {
        "task": task,
        "work_dir": data_config["work_dir"],
@ -34,11 +33,14 @@ def create_initial_state(task, start_task_id, data_config, low_is_better, name):
        "dataset_config": data_config["datasets"][task],
        "datasets_dir": get_split_dataset_path(task, data_config),
        "exp_pool_path": get_exp_pool_path(task, data_config, pool_name="ds_analysis_pool"),
-        "requirement": generate_task_requirement(task, data_config),
+        "requirement": generate_task_requirement(
+            task, data_config, is_di=True, special_instruction=special_instruction
+        ),
        "has_run": False,
        "start_task_id": start_task_id,
        "low_is_better": low_is_better,
    }
+    os.makedirs(initial_state["node_dir"], exist_ok=True)
    return initial_state


@ -146,7 +148,7 @@ class Node:
        role = role.model_copy()
        role.save_state(static_save=True)

-    async def expand(self, max_children):
+    async def expand(self, max_children, use_fixed_insights):
        if self.is_fully_expanded():
            return
        insight_geneartor = InstructionGenerator()
@ -157,6 +159,7 @@ class Node:
            original_instruction=original_instruction,
            max_num=max_children,
            file_path=self.state["exp_pool_path"],
+            use_fixed_insights=use_fixed_insights,
        )
        new_state = self.state.copy()
        new_state["start_task_id"] += 1
@ -205,6 +208,7 @@ class Node:
                self.raw_reward = score_dict
                run_finished = True
            except Exception as e:
+                print(f"Error: {e}")
                mcts_logger.log("MCTS", f"Error in running the role: {e}")
                num_runs += 1
        if not run_finished:
@ -234,9 +238,10 @@ class MCTS:
    c_explore: float = 1.4
    c_unvisited: float = 0.8

-    def __init__(self, root_node, max_depth):
+    def __init__(self, root_node, max_depth, use_fixed_insights):
        self.root_node = root_node
        self.max_depth = max_depth
+        self.use_fixed_insights = use_fixed_insights

    def select(self, node: Node):
        node = self.best_child()
@ -255,7 +260,7 @@ class MCTS:
        return max(all_children, key=uct)

    async def expand(self, node: Node, max_children=5):
-        await node.expand(max_children)
+        await node.expand(max_children, self.use_fixed_insights)
        if node not in self.children or not self.children[node]:
            self.children[node] = node.children
        return node.children
@ -303,10 +308,8 @@ class MCTS:
    def get_num_simulations(self):
        return self.root_node.visited

-    async def search(self, task, data_config, name, rollouts, load_tree=False, low_is_better=False, reflection=False):
-        role, root = initialize_di_root_node(
-            task, data_config, low_is_better=low_is_better, reflection=reflection, name=name
-        )
+    async def search(self, state, rollouts, load_tree=False, reflection=False):
+        role, root = initialize_di_root_node(state, reflection=reflection)
        self.root_node = root
        tree_loaded = False
        if load_tree:
--- a/expo/README.md
+++ b/expo/README.md
@ -223,16 +223,10 @@ ### Base DI
 For setup, check 5.

 - `python run_experiment.py --exp_mode base --task titanic --num_experiments 10`
+- Ask DI to use AutoGluon: `--special_instruction ag`
+- Ask DI to use the stacking ensemble method: `--special_instruction stacking`


-### DI RandomSearch
-For setup, check 5.
-
- Single insight
-`python run_experiment.py --exp_mode aug --task titanic --aug_mode single`
-
- Set insight
-`python run_experiment.py --exp_mode aug --task titanic --aug_mode set`


 ## 5. DI MCTS
@ -259,6 +253,20 @@ #### Run
 - `python run_experiment.py --exp_mode mcts --task househouse_prices --rollout 10 --low_is_better`


+In addition to the generated insights, include the fixed insights saved in `expo/insights/fixed_insights.json`
+- `--use_fixed_insights`
+  
+
+
+#### Ablation Study
+
+**DI RandomSearch**
+
+- Single insight
+`python run_experiment.py --exp_mode aug --task titanic --aug_mode single`
+
+- Set insight
+`python run_experiment.py --exp_mode aug --task titanic --aug_mode set`



--- a/expo/data/dataset.py
+++ b/expo/data/dataset.py
@ -10,26 +10,37 @@ from sklearn.model_selection import train_test_split

 from expo.insights.solution_designer import SolutionDesigner

-BASE_USER_REQUIREMENT = """\
+BASE_USER_REQUIREMENT = """
 This is a {datasetname} dataset. Your goal is to predict the target column `{target_col}`.
 Perform data analysis, data preprocessing, feature engineering, and modeling to predict the target. 
 Report {metric} on the eval data. Do not plot or make any visualizations.
 """

+USE_AG = """
+7. Please use autogluon for model training with presets='medium_quality', time_limit=None, give dev dataset to tuning_data, and use right eval_metric.
+"""

-DI_INSTRUCTION = """\
-**Attention** 
+STACKING = """
+7. To avoid overfitting, train a weighted ensemble model such as StackingClassifier or StackingRegressor.
+8. You could do some quick model prototyping to see which models work best and then use them in the ensemble. 
+"""
+
+SPECIAL_INSTRUCTIONS = {"ag": USE_AG, "stacking": STACKING}
+
+DI_INSTRUCTION = """
+## Attention
 1. Please do not leak the target label in any form during training.
 2. Test set does not have the target column.
-3. You should perform transformations on train, dev, and test sets at the same time (it's a good idea to define functions for this and avoid code repetition).
-4. If labels are transformed during training, they should be transformed back to the original format before saving the predictions.
-5. You could utilize dev set to validate and improve model training.
-6. Use techniques to avoid overfitting.
+3. When conducting data exploration or analysis, print out the results of your findings.
+4. You should perform transformations on train, dev, and test sets at the same time (it's a good idea to define functions for this and avoid code repetition).
+5. When scaling or transforming features, make sure the target column is not included.
+6. You could utilize dev set to validate and improve model training. {special_instruction}

 ## Saving Dev and Test Predictions
 1. Save the prediction results of BOTH the dev set and test set in `dev_predictions.csv` and `test_predictions.csv` respectively in the output directory. 
 - Both files should contain a single column named `target` with the predicted values.
 2. Make sure the prediction results are in the same format as the target column in the training set. 
+- For instance, if the target column is categorical, the prediction results should be categorical as well.

 ## Output Performance
 Print the train and dev set performance in the last step.
@ -38,7 +49,7 @@ Print the train and dev set performance in the last step.
 {output_dir}
 """

-TASK_PROMPT = """\
+TASK_PROMPT = """
 # User requirement
 {user_requirement}
 {additional_instruction}
@ -134,12 +145,18 @@ def create_dataset_dict(dataset):
    return dataset_dict


-def generate_di_instruction(output_dir):
-    additional_instruction = DI_INSTRUCTION.format(output_dir=output_dir)
+def generate_di_instruction(output_dir, special_instruction):
+    if special_instruction:
+        special_instruction_prompt = SPECIAL_INSTRUCTIONS[special_instruction]
+    else:
+        special_instruction_prompt = ""
+    additional_instruction = DI_INSTRUCTION.format(
+        output_dir=output_dir, special_instruction=special_instruction_prompt
+    )
    return additional_instruction


-def generate_task_requirement(task_name, data_config, is_di=True):
+def generate_task_requirement(task_name, data_config, is_di=True, special_instruction=None):
    user_requirement = get_user_requirement(task_name, data_config)
    split_dataset_path = get_split_dataset_path(task_name, data_config)
    train_path = split_dataset_path["train"]
@ -150,7 +167,7 @@ def generate_task_requirement(task_name, data_config, is_di=True):
    datasets_dir = data_config["datasets_dir"]
    data_info_path = f"{datasets_dir}/{task_name}/dataset_info.json"
    if is_di:
-        additional_instruction = generate_di_instruction(output_dir)
+        additional_instruction = generate_di_instruction(output_dir, special_instruction)
    else:
        additional_instruction = ""
    user_requirement = TASK_PROMPT.format(
--- a/expo/experimenter/aug.py
+++ b/expo/experimenter/aug.py
@ -17,7 +17,9 @@ class AugExperimenter(Experimenter):
        # state = create_initial_state(self.args.task, start_task_id=1, data_config=self.data_config, low_is_better=self.args.low_is_better, name="")
        user_requirement = self.state["requirement"]
        exp_pool_path = get_exp_pool_path(self.args.task, self.data_config, pool_name="ds_analysis_pool")
-        exp_pool = InstructionGenerator.load_analysis_pool(exp_pool_path)
+        exp_pool = InstructionGenerator.load_analysis_pool(
+            exp_pool_path, use_fixed_insights=self.args.use_fixed_insights
+        )
        if self.args.aug_mode == "single":
            exps = InstructionGenerator._random_sample(exp_pool, self.args.num_experiments)
            exps = [exp["Analysis"] for exp in exps]
--- a/expo/experimenter/custom.py
+++ b/expo/experimenter/custom.py
@ -18,7 +18,12 @@ class CustomExperimenter(Experimenter):
        self.name = kwargs.get("name", "")
        self.result_path = f"results/custom_{self.name}"
        self.state = create_initial_state(
-            self.task, start_task_id=1, data_config=self.data_config, low_is_better=self.low_is_better, name=self.name
+            self.task,
+            start_task_id=1,
+            data_config=self.data_config,
+            low_is_better=self.low_is_better,
+            name=self.name,
+            special_instruction=self.args.special_instruction,
        )

    def run_experiment(self):
--- a/expo/experimenter/experimenter.py
+++ b/expo/experimenter/experimenter.py
@ -13,6 +13,7 @@ from expo.utils import DATA_CONFIG, save_notebook
 class Experimenter:
    result_path: str = "results/base"
    data_config = DATA_CONFIG
+    start_task_id = 1

    def __init__(self, args, **kwargs):
        self.args = args
@ -20,10 +21,11 @@ class Experimenter:
        self.start_time = self.start_time_raw.strftime("%Y%m%d%H%M")
        self.state = create_initial_state(
            self.args.task,
-            start_task_id=1,
+            start_task_id=self.start_task_id,
            data_config=self.data_config,
            low_is_better=self.args.low_is_better,
-            name="",
+            name=self.args.name,
+            special_instruction=self.args.special_instruction,
        )

    async def run_di(self, di, user_requirement, run_idx):
@ -86,7 +88,7 @@ class Experimenter:
        pred_node_path = os.path.join(state["node_dir"], f"{self.start_time}-{split}_predictions.csv")
        gt_path = os.path.join(state["datasets_dir"][f"{split}_target"])
        preds = pd.read_csv(pred_path)
-        preds = preds[preds.columns.tolist()[0]]
+        preds = preds[preds.columns.tolist()[-1]]
        preds.to_csv(pred_node_path, index=False)
        gt = pd.read_csv(gt_path)["target"]
        metric = state["dataset_config"]["metric"]
--- a/expo/experimenter/mcts.py
+++ b/expo/experimenter/mcts.py
@ -6,6 +6,7 @@ from expo.MCTS import MCTS

 class MCTSExperimenter(Experimenter):
    result_path: str = "results/mcts"
+    start_task_id = 2

    def __init__(self, args, tree_mode=None, **kwargs):
        super().__init__(args, **kwargs)
@ -13,19 +14,16 @@ class MCTSExperimenter(Experimenter):

    async def run_experiment(self):
        if self.tree_mode == "greedy":
-            mcts = Greedy(root_node=None, max_depth=5)
+            mcts = Greedy(root_node=None, max_depth=5, use_fixed_insights=self.args.use_fixed_insights)
        elif self.tree_mode == "random":
-            mcts = Random(root_node=None, max_depth=5)
+            mcts = Random(root_node=None, max_depth=5, use_fixed_insights=self.args.use_fixed_insights)
        else:
-            mcts = MCTS(root_node=None, max_depth=5)
+            mcts = MCTS(root_node=None, max_depth=5, use_fixed_insights=self.args.use_fixed_insights)
        best_nodes = await mcts.search(
-            self.args.task,
-            self.data_config,
-            low_is_better=self.args.low_is_better,
-            load_tree=self.args.load_tree,
+            state=self.state,
            reflection=self.args.reflection,
            rollouts=self.args.rollouts,
-            name=self.args.name,
+            load_tree=self.args.load_tree,
        )
        best_node = best_nodes["global_best"]
        dev_best_node = best_nodes["dev_best"]
--- a/expo/insights/fixed_insights.json
+++ b/expo/insights/fixed_insights.json
@ -0,0 +1,22 @@
+[
+{
+    "Analysis": "Use early stopping, hyperparameter tuning, and cross-validation to avoid overfitting and improve robustness of the model.",
+    "Category": "Model Training",
+    "task_id": 4
+},
+{
+    "Analysis": "use k-fold bagging and early stopping",
+    "Category": "Model Training",
+    "task_id": 4
+},
+{
+    "Analysis": "To avoid overfitting, train a weighted ensemble model such as StackingClassifier or StackingRegressor; You could do some quick model prototyping to see which models work best and then use them in the ensemble.",
+    "Category": "Model Training",
+    "task_id": 4
+},
+{
+    "Analysis": "Please use autogluon for model training with presets='medium_quality', time_limit=None, give dev dataset to tuning_data, and use right eval_metric.",
+    "Category": "Model Training",
+    "task_id": 4
+}
+]
--- a/expo/insights/instruction_generator.py
+++ b/expo/insights/instruction_generator.py
@ -1,4 +1,5 @@
 import json
+import os
 import random

 from expo.utils import clean_json_from_rsp, load_data_config, mcts_logger
@ -68,8 +69,12 @@ class InstructionGenerator:
        return new_data

    @staticmethod
-    def load_analysis_pool(file_path, task_id=None):
+    def load_analysis_pool(file_path, use_fixed_insights, task_id=None):
        data = InstructionGenerator.load_json_data(file_path)
+        if use_fixed_insights:
+            current_directory = os.path.dirname(__file__)
+            fixed_insights = InstructionGenerator.load_json_data(f"{current_directory}/fixed_insights.json")
+            data.extend(fixed_insights)
        for item in data:
            if "task_id" not in item:
                raise ValueError("task_id is not found in the analysis pool")
@ -79,8 +84,12 @@ class InstructionGenerator:
        return data

    @staticmethod
-    async def generate_new_instructions(task_id, original_instruction, max_num, file_path):
-        data = InstructionGenerator.load_analysis_pool(file_path, task_id)
+    async def generate_new_instructions(
+        task_id, original_instruction, max_num, file_path, ext_info=None, use_fixed_insights=False
+    ):
+        data = InstructionGenerator.load_analysis_pool(
+            file_path, task_id=task_id, use_fixed_insights=use_fixed_insights
+        )
        new_instructions = []
        if len(data) == 0:
            mcts_logger.log("MCTS", f"No insights available for task {task_id}")
@ -91,12 +100,14 @@ class InstructionGenerator:
            else:
                item = data[i]
                insights = item["Analysis"]
-            new_instruction = await InstructionGenerator.generate_new_instruction(original_instruction, insights)
+            new_instruction = await InstructionGenerator.generate_new_instruction(
+                original_instruction, insights, ext_info
+            )
            new_instructions.append(new_instruction)
        return new_instructions

    @staticmethod
-    async def generate_new_instruction(original_instruction, insights):
+    async def generate_new_instruction(original_instruction, insights, ext_info):
        prompt = CHANGE_INSTRUCTION.format(instruction=original_instruction, insights=insights)
        llm = LLM()
        context = llm.format_msg([Message(content=prompt, role="user")])
--- a/expo/requirements.txt
+++ b/expo/requirements.txt
@ -3,3 +3,4 @@ openml==0.14.2
 # ml module to run in DI
 xgboost
 catboost
+lightgbm
--- a/expo/run_experiment.py
+++ b/expo/run_experiment.py
@ -28,11 +28,11 @@ def get_mcts_args(parser):
    parser.add_argument("--no_load_tree", dest="load_tree", action="store_false")
    parser.set_defaults(load_tree=False)
    parser.add_argument("--rollouts", type=int, default=5)
+    parser.add_argument("--use_fixed_insights", dest="use_fixed_insights", action="store_true")


 def get_aug_exp_args(parser):
    parser.add_argument("--aug_mode", type=str, default="single", choices=["single", "set"])
-    parser.add_argument("--num_experiments", type=int, default=1)


 def get_di_args(parser):
@ -41,6 +41,8 @@ def get_di_args(parser):
    parser.set_defaults(low_is_better=False)
    parser.add_argument("--reflection", dest="reflection", action="store_true")
    parser.add_argument("--no_reflection", dest="reflection", action="store_false")
+    parser.add_argument("--num_experiments", type=int, default=1)
+    parser.add_argument("--special_instruction", type=str, default=None, choices=["ag", "stacking"])
    parser.set_defaults(reflection=True)