From c54e4121c611473ef7ef874d19cfb5891280d091 Mon Sep 17 00:00:00 2001
From: Yizhou Chi <chiyizhou@fuzhi.ai>
Date: Fri, 13 Sep 2024 16:57:37 +0800
Subject: [PATCH 01/14] update di prompt

---
 expo/data/dataset.py                   | 16 ++++++++++++----
 expo/insights/instruction_generator.py |  8 +++++---
 2 files changed, 17 insertions(+), 7 deletions(-)

diff --git a/expo/data/dataset.py b/expo/data/dataset.py
index 3b2017d1a..d2ec48326 100644
--- a/expo/data/dataset.py
+++ b/expo/data/dataset.py
@@ -16,15 +16,22 @@ Perform data analysis, data preprocessing, feature engineering, and modeling to
 Report {metric} on the eval data. Do not plot or make any visualizations.
 """
 
+RECOMMENDATION = """\
+## Base Models and Ensemble
+You can consider using the following base models:
+’GBM’ (LightGBM) ‘CAT’ (CatBoost) ‘XGB’ (XGBoost) ‘RF’ (random forest) ‘XT’ (extremely randomized trees) ‘KNN’ (k-nearest neighbors) ‘LR’ (linear regression)
+"""
 
-DI_INSTRUCTION = """\
-**Attention** 
+DI_INSTRUCTION = (
+    RECOMMENDATION
+    + """**Attention** 
 1. Please do not leak the target label in any form during training.
 2. Test set does not have the target column.
 3. You should perform transformations on train, dev, and test sets at the same time (it's a good idea to define functions for this and avoid code repetition).
-4. If labels are transformed during training, they should be transformed back to the original format before saving the predictions.
+4. When scaling or transforming features, make sure the target column is not included.
 5. You could utilize dev set to validate and improve model training.
-6. Use techniques to avoid overfitting.
+6. To avoid overfitting, train a weighted ensemble model such as StackingClassifier or StackingRegressor using **dev set** after base models being trained
+7. Make sure the model prototyping is fast. 
 
 ## Saving Dev and Test Predictions
 1. Save the prediction results of BOTH the dev set and test set in `dev_predictions.csv` and `test_predictions.csv` respectively in the output directory. 
@@ -37,6 +44,7 @@ Print the train and dev set performance in the last step.
 # Output dir
 {output_dir}
 """
+)
 
 TASK_PROMPT = """\
 # User requirement
diff --git a/expo/insights/instruction_generator.py b/expo/insights/instruction_generator.py
index c9ff7ec6e..a800f4507 100644
--- a/expo/insights/instruction_generator.py
+++ b/expo/insights/instruction_generator.py
@@ -79,7 +79,7 @@ class InstructionGenerator:
         return data
 
     @staticmethod
-    async def generate_new_instructions(task_id, original_instruction, max_num, file_path):
+    async def generate_new_instructions(task_id, original_instruction, max_num, file_path, ext_info=None):
         data = InstructionGenerator.load_analysis_pool(file_path, task_id)
         new_instructions = []
         if len(data) == 0:
@@ -91,12 +91,14 @@ class InstructionGenerator:
             else:
                 item = data[i]
                 insights = item["Analysis"]
-            new_instruction = await InstructionGenerator.generate_new_instruction(original_instruction, insights)
+            new_instruction = await InstructionGenerator.generate_new_instruction(
+                original_instruction, insights, ext_info
+            )
             new_instructions.append(new_instruction)
         return new_instructions
 
     @staticmethod
-    async def generate_new_instruction(original_instruction, insights):
+    async def generate_new_instruction(original_instruction, insights, ext_info):
         prompt = CHANGE_INSTRUCTION.format(instruction=original_instruction, insights=insights)
         llm = LLM()
         context = llm.format_msg([Message(content=prompt, role="user")])

From 8beca0faddd33b981d23d875c1a59df0b71947f0 Mon Sep 17 00:00:00 2001
From: Yizhou Chi <chiyizhou@fuzhi.ai>
Date: Sat, 14 Sep 2024 15:17:42 +0800
Subject: [PATCH 02/14] 1. add special instruction 2. add fixed insights

---
 expo/MCTS.py                           | 24 ++++++++-------
 expo/README.md                         | 24 ++++++++++-----
 expo/data/dataset.py                   | 42 ++++++++++++++++++--------
 expo/experimenter/aug.py               |  4 ++-
 expo/experimenter/custom.py            |  7 ++++-
 expo/experimenter/experimenter.py      |  3 +-
 expo/experimenter/mcts.py              | 12 +++-----
 expo/insights/fixed_insights.json      | 22 ++++++++++++++
 expo/insights/instruction_generator.py | 15 +++++++--
 expo/requirements.txt                  |  1 +
 expo/run_experiment.py                 |  4 ++-
 11 files changed, 111 insertions(+), 47 deletions(-)
 create mode 100644 expo/insights/fixed_insights.json

diff --git a/expo/MCTS.py b/expo/MCTS.py
index 360baac8d..265356f65 100644
--- a/expo/MCTS.py
+++ b/expo/MCTS.py
@@ -15,18 +15,18 @@ from metagpt.tools.tool_recommend import ToolRecommender
 from metagpt.utils.common import read_json_file
 
 
-def initialize_di_root_node(task, data_config, low_is_better=False, reflection=True, name=""):
+def initialize_di_root_node(state, reflection: bool = True):
     start_task_id = 2
-    state = create_initial_state(
-        task, start_task_id=start_task_id, data_config=data_config, low_is_better=low_is_better, name=name
-    )
+    # state = create_initial_state(
+    #     task, start_task_id=start_task_id, data_config=data_config, low_is_better=low_is_better, name=name
+    # )
     role = ResearchAssistant(
         node_id="0", start_task_id=start_task_id, use_reflection=reflection, role_dir=state["node_dir"]
     )
     return role, Node(parent=None, state=state, action=None, value=0)
 
 
-def create_initial_state(task, start_task_id, data_config, low_is_better, name):
+def create_initial_state(task, start_task_id, data_config, low_is_better: bool, name: str, special_instruction: str):
     initial_state = {
         "task": task,
         "work_dir": data_config["work_dir"],
@@ -34,7 +34,9 @@ def create_initial_state(task, start_task_id, data_config, low_is_better, name):
         "dataset_config": data_config["datasets"][task],
         "datasets_dir": get_split_dataset_path(task, data_config),
         "exp_pool_path": get_exp_pool_path(task, data_config, pool_name="ds_analysis_pool"),
-        "requirement": generate_task_requirement(task, data_config),
+        "requirement": generate_task_requirement(
+            task, data_config, is_di=True, special_instruction=special_instruction
+        ),
         "has_run": False,
         "start_task_id": start_task_id,
         "low_is_better": low_is_better,
@@ -157,6 +159,7 @@ class Node:
             original_instruction=original_instruction,
             max_num=max_children,
             file_path=self.state["exp_pool_path"],
+            use_fixed_insights=self.use_fixed_insights,
         )
         new_state = self.state.copy()
         new_state["start_task_id"] += 1
@@ -234,9 +237,10 @@ class MCTS:
     c_explore: float = 1.4
     c_unvisited: float = 0.8
 
-    def __init__(self, root_node, max_depth):
+    def __init__(self, root_node, max_depth, use_fixed_insights):
         self.root_node = root_node
         self.max_depth = max_depth
+        self.use_fixed_insights = use_fixed_insights
 
     def select(self, node: Node):
         node = self.best_child()
@@ -303,10 +307,8 @@ class MCTS:
     def get_num_simulations(self):
         return self.root_node.visited
 
-    async def search(self, task, data_config, name, rollouts, load_tree=False, low_is_better=False, reflection=False):
-        role, root = initialize_di_root_node(
-            task, data_config, low_is_better=low_is_better, reflection=reflection, name=name
-        )
+    async def search(self, state, rollouts, load_tree=False, reflection=False):
+        role, root = initialize_di_root_node(state, reflection=reflection)
         self.root_node = root
         tree_loaded = False
         if load_tree:
diff --git a/expo/README.md b/expo/README.md
index 55ea7eed4..00d1cae50 100644
--- a/expo/README.md
+++ b/expo/README.md
@@ -187,16 +187,10 @@ ### Base DI
 For setup, check 5.
 
 - `python run_experiment.py --exp_mode base --task titanic --num_experiments 10`
+- Ask DI to use AutoGluon: `--special_instruction ag`
+- Ask DI to use the stacking ensemble method: `--special_instruction stacking`
 
 
-### DI RandomSearch
-For setup, check 5.
-
-- Single insight
-`python run_experiment.py --exp_mode aug --task titanic --aug_mode single`
-
-- Set insight
-`python run_experiment.py --exp_mode aug --task titanic --aug_mode set`
 
 
 ## 5. DI MCTS
@@ -223,6 +217,20 @@ #### Run
 - `python run_experiment.py --exp_mode mcts --task househouse_prices --rollout 10 --low_is_better`
 
 
+In addition to the generated insights, include the fixed insights saved in `insights/fixed_insights.json`
+- `--use_fixed_insights`
+  
+
+
+#### Ablation Study
+
+**DI RandomSearch**
+
+- Single insight
+`python run_experiment.py --exp_mode aug --task titanic --aug_mode single`
+
+- Set insight
+`python run_experiment.py --exp_mode aug --task titanic --aug_mode set`
 
 
 
diff --git a/expo/data/dataset.py b/expo/data/dataset.py
index d2ec48326..03b80985a 100644
--- a/expo/data/dataset.py
+++ b/expo/data/dataset.py
@@ -10,16 +10,27 @@ from sklearn.model_selection import train_test_split
 
 from expo.insights.solution_designer import SolutionDesigner
 
-BASE_USER_REQUIREMENT = """\
+BASE_USER_REQUIREMENT = """
 This is a {datasetname} dataset. Your goal is to predict the target column `{target_col}`.
 Perform data analysis, data preprocessing, feature engineering, and modeling to predict the target. 
 Report {metric} on the eval data. Do not plot or make any visualizations.
 """
 
-RECOMMENDATION = """\
+USE_AG = """
+7. Please use autogluon for model training with presets='medium_quality', time_limit=None, give dev dataset to tuning_data, and use right eval_metric.
+"""
+
+STACKING = """
+7. To avoid overfitting, train a weighted ensemble model such as StackingClassifier or StackingRegressor.
+8. You could do some quick model prototyping to see which models work best and then use them in the ensemble. 
+"""
+
+SPECIAL_INSTRUCTIONS = {"ag": USE_AG, "stacking": STACKING}
+
+RECOMMENDATION = """
 ## Base Models and Ensemble
 You can consider using the following base models:
-’GBM’ (LightGBM) ‘CAT’ (CatBoost) ‘XGB’ (XGBoost) ‘RF’ (random forest) ‘XT’ (extremely randomized trees) ‘KNN’ (k-nearest neighbors) ‘LR’ (linear regression)
+`GBM` (LightGBM) `CAT` (CatBoost) `XGB` (XGBoost) `RF` (random forest) `XT` (extremely randomized trees) `KNN` (k-nearest neighbors) ‘LR’ (linear regression)
 """
 
 DI_INSTRUCTION = (
@@ -27,11 +38,10 @@ DI_INSTRUCTION = (
     + """**Attention** 
 1. Please do not leak the target label in any form during training.
 2. Test set does not have the target column.
-3. You should perform transformations on train, dev, and test sets at the same time (it's a good idea to define functions for this and avoid code repetition).
-4. When scaling or transforming features, make sure the target column is not included.
-5. You could utilize dev set to validate and improve model training.
-6. To avoid overfitting, train a weighted ensemble model such as StackingClassifier or StackingRegressor using **dev set** after base models being trained
-7. Make sure the model prototyping is fast. 
+3. When conducting data exploration or analysis, print out the results of your findings.
+4. You should perform transformations on train, dev, and test sets at the same time (it's a good idea to define functions for this and avoid code repetition).
+5. When scaling or transforming features, make sure the target column is not included.
+6. You could utilize dev set to validate and improve model training. {special_instruction}
 
 ## Saving Dev and Test Predictions
 1. Save the prediction results of BOTH the dev set and test set in `dev_predictions.csv` and `test_predictions.csv` respectively in the output directory. 
@@ -46,7 +56,7 @@ Print the train and dev set performance in the last step.
 """
 )
 
-TASK_PROMPT = """\
+TASK_PROMPT = """
 # User requirement
 {user_requirement}
 {additional_instruction}
@@ -142,12 +152,18 @@ def create_dataset_dict(dataset):
     return dataset_dict
 
 
-def generate_di_instruction(output_dir):
-    additional_instruction = DI_INSTRUCTION.format(output_dir=output_dir)
+def generate_di_instruction(output_dir, special_instruction):
+    if special_instruction:
+        special_instruction_prompt = SPECIAL_INSTRUCTIONS[special_instruction]
+    else:
+        special_instruction_prompt = ""
+    additional_instruction = DI_INSTRUCTION.format(
+        output_dir=output_dir, special_instruction=special_instruction_prompt
+    )
     return additional_instruction
 
 
-def generate_task_requirement(task_name, data_config, is_di=True):
+def generate_task_requirement(task_name, data_config, is_di=True, special_instruction=None):
     user_requirement = get_user_requirement(task_name, data_config)
     split_dataset_path = get_split_dataset_path(task_name, data_config)
     train_path = split_dataset_path["train"]
@@ -158,7 +174,7 @@ def generate_task_requirement(task_name, data_config, is_di=True):
     datasets_dir = data_config["datasets_dir"]
     data_info_path = f"{datasets_dir}/{task_name}/dataset_info.json"
     if is_di:
-        additional_instruction = generate_di_instruction(output_dir)
+        additional_instruction = generate_di_instruction(output_dir, special_instruction)
     else:
         additional_instruction = ""
     user_requirement = TASK_PROMPT.format(
diff --git a/expo/experimenter/aug.py b/expo/experimenter/aug.py
index 8312f57fc..e57d024bd 100644
--- a/expo/experimenter/aug.py
+++ b/expo/experimenter/aug.py
@@ -17,7 +17,9 @@ class AugExperimenter(Experimenter):
         # state = create_initial_state(self.args.task, start_task_id=1, data_config=self.data_config, low_is_better=self.args.low_is_better, name="")
         user_requirement = self.state["requirement"]
         exp_pool_path = get_exp_pool_path(self.args.task, self.data_config, pool_name="ds_analysis_pool")
-        exp_pool = InstructionGenerator.load_analysis_pool(exp_pool_path)
+        exp_pool = InstructionGenerator.load_analysis_pool(
+            exp_pool_path, use_fixed_insights=self.args.use_fixed_insights
+        )
         if self.args.aug_mode == "single":
             exps = InstructionGenerator._random_sample(exp_pool, self.args.num_experiments)
             exps = [exp["Analysis"] for exp in exps]
diff --git a/expo/experimenter/custom.py b/expo/experimenter/custom.py
index df090fb58..92b7dafa2 100644
--- a/expo/experimenter/custom.py
+++ b/expo/experimenter/custom.py
@@ -18,7 +18,12 @@ class CustomExperimenter(Experimenter):
         self.name = kwargs.get("name", "")
         self.result_path = f"results/custom_{self.name}"
         self.state = create_initial_state(
-            self.task, start_task_id=1, data_config=self.data_config, low_is_better=self.low_is_better, name=self.name
+            self.task,
+            start_task_id=1,
+            data_config=self.data_config,
+            low_is_better=self.low_is_better,
+            name=self.name,
+            special_instruction=self.args.special_instruction,
         )
 
     def run_experiment(self):
diff --git a/expo/experimenter/experimenter.py b/expo/experimenter/experimenter.py
index 418e0089a..89d589d7d 100644
--- a/expo/experimenter/experimenter.py
+++ b/expo/experimenter/experimenter.py
@@ -23,7 +23,8 @@ class Experimenter:
             start_task_id=1,
             data_config=self.data_config,
             low_is_better=self.args.low_is_better,
-            name="",
+            name=self.args.name,
+            special_instruction=self.args.special_instruction,
         )
 
     async def run_di(self, di, user_requirement, run_idx):
diff --git a/expo/experimenter/mcts.py b/expo/experimenter/mcts.py
index fbe2f35f1..e06169a70 100644
--- a/expo/experimenter/mcts.py
+++ b/expo/experimenter/mcts.py
@@ -13,19 +13,15 @@ class MCTSExperimenter(Experimenter):
 
     async def run_experiment(self):
         if self.tree_mode == "greedy":
-            mcts = Greedy(root_node=None, max_depth=5)
+            mcts = Greedy(root_node=None, max_depth=5, use_fixed_insights=self.args.use_fixed_insights)
         elif self.tree_mode == "random":
-            mcts = Random(root_node=None, max_depth=5)
+            mcts = Random(root_node=None, max_depth=5, use_fixed_insights=self.args.use_fixed_insights)
         else:
-            mcts = MCTS(root_node=None, max_depth=5)
+            mcts = MCTS(root_node=None, max_depth=5, use_fixed_insights=self.args.use_fixed_insights)
         best_nodes = await mcts.search(
-            self.args.task,
-            self.data_config,
-            low_is_better=self.args.low_is_better,
-            load_tree=self.args.load_tree,
+            state=self.state,
             reflection=self.args.reflection,
             rollouts=self.args.rollouts,
-            name=self.args.name,
         )
         best_node = best_nodes["global_best"]
         dev_best_node = best_nodes["dev_best"]
diff --git a/expo/insights/fixed_insights.json b/expo/insights/fixed_insights.json
new file mode 100644
index 000000000..e52745707
--- /dev/null
+++ b/expo/insights/fixed_insights.json
@@ -0,0 +1,22 @@
+[
+{
+    "Analysis": "Use early stopping, hyperparameter tuning, and cross-validation to avoid overfitting and improve robustness of the model.",
+    "Category": "Model Training",
+    "task_id": 4
+},
+{
+    "Analysis": "use k-fold bagging and early stopping",
+    "Category": "Model Training",
+    "task_id": 4
+},
+{
+    "Analysis": "To avoid overfitting, train a weighted ensemble model such as StackingClassifier or StackingRegressor using **dev set** after base models being trained.",
+    "Category": "Model Training",
+    "task_id": 4
+},
+{
+    "Analysis": "Please use autogluon for model training with presets='medium_quality', time_limit=None, give dev dataset to tuning_data, and use right eval_metric.",
+    "Category": "Model Training",
+    "task_id": 4
+}
+]
\ No newline at end of file
diff --git a/expo/insights/instruction_generator.py b/expo/insights/instruction_generator.py
index a800f4507..07e5fb655 100644
--- a/expo/insights/instruction_generator.py
+++ b/expo/insights/instruction_generator.py
@@ -1,4 +1,5 @@
 import json
+import os
 import random
 
 from expo.utils import clean_json_from_rsp, load_data_config, mcts_logger
@@ -68,8 +69,12 @@ class InstructionGenerator:
         return new_data
 
     @staticmethod
-    def load_analysis_pool(file_path, task_id=None):
+    def load_analysis_pool(file_path, use_fixed_insights, task_id=None):
         data = InstructionGenerator.load_json_data(file_path)
+        if use_fixed_insights:
+            current_directory = os.path.dirname(__file__)
+            fixed_insights = InstructionGenerator.load_json_data(f"{current_directory}/fixed_insights.json")
+            data.extend(fixed_insights)
         for item in data:
             if "task_id" not in item:
                 raise ValueError("task_id is not found in the analysis pool")
@@ -79,8 +84,12 @@ class InstructionGenerator:
         return data
 
     @staticmethod
-    async def generate_new_instructions(task_id, original_instruction, max_num, file_path, ext_info=None):
-        data = InstructionGenerator.load_analysis_pool(file_path, task_id)
+    async def generate_new_instructions(
+        task_id, original_instruction, max_num, file_path, ext_info=None, use_fixed_insights=False
+    ):
+        data = InstructionGenerator.load_analysis_pool(
+            file_path, task_id=task_id, use_fixed_insights=use_fixed_insights
+        )
         new_instructions = []
         if len(data) == 0:
             mcts_logger.log("MCTS", f"No insights available for task {task_id}")
diff --git a/expo/requirements.txt b/expo/requirements.txt
index 04de1a8bb..e85818bbe 100644
--- a/expo/requirements.txt
+++ b/expo/requirements.txt
@@ -3,3 +3,4 @@ openml==0.14.2
 # ml module to run in DI
 xgboost
 catboost
+lightgbm
diff --git a/expo/run_experiment.py b/expo/run_experiment.py
index 2123fade3..f1b5b2d80 100644
--- a/expo/run_experiment.py
+++ b/expo/run_experiment.py
@@ -28,11 +28,11 @@ def get_mcts_args(parser):
     parser.add_argument("--no_load_tree", dest="load_tree", action="store_false")
     parser.set_defaults(load_tree=False)
     parser.add_argument("--rollouts", type=int, default=5)
+    parser.add_argument("--use_fixed_insights", dest="use_fixed_insights", action="store_true")
 
 
 def get_aug_exp_args(parser):
     parser.add_argument("--aug_mode", type=str, default="single", choices=["single", "set"])
-    parser.add_argument("--num_experiments", type=int, default=1)
 
 
 def get_di_args(parser):
@@ -41,6 +41,8 @@ def get_di_args(parser):
     parser.set_defaults(low_is_better=False)
     parser.add_argument("--reflection", dest="reflection", action="store_true")
     parser.add_argument("--no_reflection", dest="reflection", action="store_false")
+    parser.add_argument("--num_experiments", type=int, default=1)
+    parser.add_argument("--special_instruction", type=str, default=None, choices=["ag", "stacking"])
     parser.set_defaults(reflection=True)
 
 

From 9089ecf7d6f031d069623381006b14615788ecde Mon Sep 17 00:00:00 2001
From: Yizhou Chi <chiyizhou@fuzhi.ai>
Date: Sat, 14 Sep 2024 15:21:21 +0800
Subject: [PATCH 03/14] update readme

---
 expo/README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/expo/README.md b/expo/README.md
index 00d1cae50..0af20388e 100644
--- a/expo/README.md
+++ b/expo/README.md
@@ -217,7 +217,7 @@ #### Run
 - `python run_experiment.py --exp_mode mcts --task househouse_prices --rollout 10 --low_is_better`
 
 
-In addition to the generated insights, include the fixed insights saved in `insights/fixed_insights.json`
+In addition to the generated insights, include the fixed insights saved in `expo/insights/fixed_insights.json`
 - `--use_fixed_insights`
   
 

From ed6ce14838861dec0385e5cdfb131ab3664948b9 Mon Sep 17 00:00:00 2001
From: Yizhou Chi <chiyizhou@fuzhi.ai>
Date: Sat, 14 Sep 2024 15:24:17 +0800
Subject: [PATCH 04/14] update fixed insights

---
 expo/insights/fixed_insights.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/expo/insights/fixed_insights.json b/expo/insights/fixed_insights.json
index e52745707..4f42b9db1 100644
--- a/expo/insights/fixed_insights.json
+++ b/expo/insights/fixed_insights.json
@@ -10,7 +10,7 @@
     "task_id": 4
 },
 {
-    "Analysis": "To avoid overfitting, train a weighted ensemble model such as StackingClassifier or StackingRegressor using **dev set** after base models being trained.",
+    "Analysis": "To avoid overfitting, train a weighted ensemble model such as StackingClassifier or StackingRegressor; You could do some quick model prototyping to see which models work best and then use them in the ensemble.",
     "Category": "Model Training",
     "task_id": 4
 },

From 8a5b6d6e7794c9b1fb795e654b9a1655c4dd83da Mon Sep 17 00:00:00 2001
From: Yizhou Chi <chiyizhou@fuzhi.ai>
Date: Sat, 14 Sep 2024 15:53:38 +0800
Subject: [PATCH 05/14] update recommendation prompt

---
 expo/data/dataset.py | 23 +++++++++++++++++++----
 1 file changed, 19 insertions(+), 4 deletions(-)

diff --git a/expo/data/dataset.py b/expo/data/dataset.py
index 03b80985a..8af0c485e 100644
--- a/expo/data/dataset.py
+++ b/expo/data/dataset.py
@@ -28,14 +28,29 @@ STACKING = """
 SPECIAL_INSTRUCTIONS = {"ag": USE_AG, "stacking": STACKING}
 
 RECOMMENDATION = """
-## Base Models and Ensemble
-You can consider using the following base models:
-`GBM` (LightGBM) `CAT` (CatBoost) `XGB` (XGBoost) `RF` (random forest) `XT` (extremely randomized trees) `KNN` (k-nearest neighbors) ‘LR’ (linear regression)
+## Base Models
+You have access to the following base models:
+Tabular:
+LightGBM, CatBoost, XGBoost, random forest, extremely randomized trees, k-nearest neighbors, linear regression
+
+Image:
+ResNet, DenseNet, VGG, Inception, MobileNet, EfficientNet
+
+Text:
+BERT, RoBERTa, DistilBERT, GPT-2
+"""
+
+# The RECOMMENDATION above is not tested but might be needed for multi-modal datasets
+
+RECOMMENDATION = """
+## Base Models
+You have access to the following base models:
+LightGBM, CatBoost, XGBoost, random forest, extremely randomized trees, k-nearest neighbors, linear regression
 """
 
 DI_INSTRUCTION = (
     RECOMMENDATION
-    + """**Attention** 
+    + """## Attention
 1. Please do not leak the target label in any form during training.
 2. Test set does not have the target column.
 3. When conducting data exploration or analysis, print out the results of your findings.

From 743c67aef8c6b0aed81d4334546a543cc2187832 Mon Sep 17 00:00:00 2001
From: Yizhou Chi <chiyizhou@fuzhi.ai>
Date: Sat, 14 Sep 2024 17:34:17 +0800
Subject: [PATCH 06/14] change task type prompt to prevent unwanted label
 transformation

---
 metagpt/prompts/task_type.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/metagpt/prompts/task_type.py b/metagpt/prompts/task_type.py
index 116756edc..ca0aae572 100644
--- a/metagpt/prompts/task_type.py
+++ b/metagpt/prompts/task_type.py
@@ -11,7 +11,7 @@ The current task is about data preprocessing, please note the following:
 - Monitor data types per column, applying appropriate methods.
 - Ensure operations are on existing dataset columns.
 - Avoid writing processed data to files.
-- Avoid any change to label column, such as standardization, etc.
+- **ATTENTION** Do NOT make any changes to the label column, such as standardization, etc.
 - Prefer alternatives to one-hot encoding for categorical data.
 - Only encode or scale necessary columns to allow for potential feature-specific engineering tasks (like time_extract, binning, extraction, etc.) later.
 - Each step do data preprocessing to train, must do same for test separately at the same time.
@@ -26,7 +26,7 @@ The current task is about feature engineering. when performing it, please adhere
 - Avoid creating redundant or excessively numerous features in one step.
 - Exclude ID columns from feature generation and remove them.
 - Each feature engineering operation performed on the train set must also applies to the dev/test separately at the same time.
-- Avoid using the label column to create features, except for cat encoding.
+- **ATTENTION** Do NOT use the label column to create features or make any changes to the label column, except for cat encoding.
 - Use the data from previous task result if exist, do not mock or reload data yourself.
 - Always copy the DataFrame before processing it and use the copy to process.
 """

From 5e7cac7e6e3f53a98cddc62c6453e19cfb7b3bf8 Mon Sep 17 00:00:00 2001
From: Yizhou Chi <chiyizhou@fuzhi.ai>
Date: Sat, 14 Sep 2024 17:51:32 +0800
Subject: [PATCH 07/14] fix fixed_insights bug

---
 expo/MCTS.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/expo/MCTS.py b/expo/MCTS.py
index 265356f65..ef408b2dd 100644
--- a/expo/MCTS.py
+++ b/expo/MCTS.py
@@ -148,7 +148,7 @@ class Node:
         role = role.model_copy()
         role.save_state(static_save=True)
 
-    async def expand(self, max_children):
+    async def expand(self, max_children, use_fixed_insights):
         if self.is_fully_expanded():
             return
         insight_geneartor = InstructionGenerator()
@@ -159,7 +159,7 @@ class Node:
             original_instruction=original_instruction,
             max_num=max_children,
             file_path=self.state["exp_pool_path"],
-            use_fixed_insights=self.use_fixed_insights,
+            use_fixed_insights=use_fixed_insights,
         )
         new_state = self.state.copy()
         new_state["start_task_id"] += 1
@@ -259,7 +259,7 @@ class MCTS:
         return max(all_children, key=uct)
 
     async def expand(self, node: Node, max_children=5):
-        await node.expand(max_children)
+        await node.expand(max_children, self.use_fixed_insights)
         if node not in self.children or not self.children[node]:
             self.children[node] = node.children
         return node.children

From f856d768fe2630c1482cfb8b568f48e97978acc2 Mon Sep 17 00:00:00 2001
From: Yizhou Chi <chiyizhou@fuzhi.ai>
Date: Sat, 14 Sep 2024 18:05:02 +0800
Subject: [PATCH 08/14] remove recommendation from di initial prompt, add
 recommendation to task type prompt

---
 expo/data/dataset.py         | 27 ++-------------------------
 metagpt/prompts/task_type.py |  3 +++
 2 files changed, 5 insertions(+), 25 deletions(-)

diff --git a/expo/data/dataset.py b/expo/data/dataset.py
index 8af0c485e..9748cb8c2 100644
--- a/expo/data/dataset.py
+++ b/expo/data/dataset.py
@@ -27,30 +27,8 @@ STACKING = """
 
 SPECIAL_INSTRUCTIONS = {"ag": USE_AG, "stacking": STACKING}
 
-RECOMMENDATION = """
-## Base Models
-You have access to the following base models:
-Tabular:
-LightGBM, CatBoost, XGBoost, random forest, extremely randomized trees, k-nearest neighbors, linear regression
-
-Image:
-ResNet, DenseNet, VGG, Inception, MobileNet, EfficientNet
-
-Text:
-BERT, RoBERTa, DistilBERT, GPT-2
-"""
-
-# The RECOMMENDATION above is not tested but might be needed for multi-modal datasets
-
-RECOMMENDATION = """
-## Base Models
-You have access to the following base models:
-LightGBM, CatBoost, XGBoost, random forest, extremely randomized trees, k-nearest neighbors, linear regression
-"""
-
-DI_INSTRUCTION = (
-    RECOMMENDATION
-    + """## Attention
+DI_INSTRUCTION = """
+## Attention
 1. Please do not leak the target label in any form during training.
 2. Test set does not have the target column.
 3. When conducting data exploration or analysis, print out the results of your findings.
@@ -69,7 +47,6 @@ Print the train and dev set performance in the last step.
 # Output dir
 {output_dir}
 """
-)
 
 TASK_PROMPT = """
 # User requirement
diff --git a/metagpt/prompts/task_type.py b/metagpt/prompts/task_type.py
index ca0aae572..6b230fc9e 100644
--- a/metagpt/prompts/task_type.py
+++ b/metagpt/prompts/task_type.py
@@ -34,6 +34,9 @@ The current task is about feature engineering. when performing it, please adhere
 # Prompt for taking on "model_train" tasks
 MODEL_TRAIN_PROMPT = """
 The current task is about training a model, please ensure high performance:
+- For tabular datasets - you have access to LightGBM, CatBoost, XGBoost, random forest, extremely randomized trees, k-nearest neighbors, linear regression, etc.
+- For image datasets - you have access to ResNet, VGG, Inception, MobileNet, DenseNet, EfficientNet, etc.
+- For text datasets - you have access to BERT, GPT-2, RoBERTa, DistilBERT, T5, etc.
 - Keep in mind that your user prioritizes results and is highly focused on model performance. So, when needed, feel free to use models of any complexity to improve effectiveness, such as XGBoost, CatBoost, etc.
 - If non-numeric columns exist, perform label encode together with all steps.
 - Use the data from previous task result directly, do not mock or reload data yourself.

From 9d2c81a127de86b7ddcdceba6ad0f6db0eac6073 Mon Sep 17 00:00:00 2001
From: Yizhou Chi <chiyizhou@fuzhi.ai>
Date: Sat, 14 Sep 2024 20:04:09 +0800
Subject: [PATCH 09/14] fix evaluation bug

---
 expo/experimenter/experimenter.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/expo/experimenter/experimenter.py b/expo/experimenter/experimenter.py
index 89d589d7d..b7a0e0b2f 100644
--- a/expo/experimenter/experimenter.py
+++ b/expo/experimenter/experimenter.py
@@ -87,7 +87,7 @@ class Experimenter:
         pred_node_path = os.path.join(state["node_dir"], f"{self.start_time}-{split}_predictions.csv")
         gt_path = os.path.join(state["datasets_dir"][f"{split}_target"])
         preds = pd.read_csv(pred_path)
-        preds = preds[preds.columns.tolist()[0]]
+        preds = preds[preds.columns.tolist()[-1]]
         preds.to_csv(pred_node_path, index=False)
         gt = pd.read_csv(gt_path)["target"]
         metric = state["dataset_config"]["metric"]

From 9ff9d27ab0c388d1c37af00751181976358433d1 Mon Sep 17 00:00:00 2001
From: Yizhou Chi <chiyizhou@fuzhi.ai>
Date: Sat, 14 Sep 2024 20:08:02 +0800
Subject: [PATCH 10/14] include load tree

---
 expo/experimenter/mcts.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/expo/experimenter/mcts.py b/expo/experimenter/mcts.py
index e06169a70..f0db72841 100644
--- a/expo/experimenter/mcts.py
+++ b/expo/experimenter/mcts.py
@@ -22,6 +22,7 @@ class MCTSExperimenter(Experimenter):
             state=self.state,
             reflection=self.args.reflection,
             rollouts=self.args.rollouts,
+            load_tree=self.args.load_tree,
         )
         best_node = best_nodes["global_best"]
         dev_best_node = best_nodes["dev_best"]

From 24db19fa13c70f43297de44330d8b7fe3f702ef7 Mon Sep 17 00:00:00 2001
From: Yizhou Chi <chiyizhou@fuzhi.ai>
Date: Sat, 14 Sep 2024 20:33:40 +0800
Subject: [PATCH 11/14] fix start task id consistency

---
 expo/MCTS.py                      | 7 +++++--
 expo/experimenter/experimenter.py | 3 ++-
 expo/experimenter/mcts.py         | 1 +
 3 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/expo/MCTS.py b/expo/MCTS.py
index ef408b2dd..c96c57b47 100644
--- a/expo/MCTS.py
+++ b/expo/MCTS.py
@@ -16,12 +16,11 @@ from metagpt.utils.common import read_json_file
 
 
 def initialize_di_root_node(state, reflection: bool = True):
-    start_task_id = 2
     # state = create_initial_state(
     #     task, start_task_id=start_task_id, data_config=data_config, low_is_better=low_is_better, name=name
     # )
     role = ResearchAssistant(
-        node_id="0", start_task_id=start_task_id, use_reflection=reflection, role_dir=state["node_dir"]
+        node_id="0", start_task_id=state["start_task_id"], use_reflection=reflection, role_dir=state["node_dir"]
     )
     return role, Node(parent=None, state=state, action=None, value=0)
 
@@ -208,6 +207,10 @@ class Node:
                 self.raw_reward = score_dict
                 run_finished = True
             except Exception as e:
+                print(f"Error: {e}")
+                import pdb
+
+                pdb.set_trace()
                 mcts_logger.log("MCTS", f"Error in running the role: {e}")
                 num_runs += 1
         if not run_finished:
diff --git a/expo/experimenter/experimenter.py b/expo/experimenter/experimenter.py
index b7a0e0b2f..155108f8d 100644
--- a/expo/experimenter/experimenter.py
+++ b/expo/experimenter/experimenter.py
@@ -13,6 +13,7 @@ from expo.utils import DATA_CONFIG, save_notebook
 class Experimenter:
     result_path: str = "results/base"
     data_config = DATA_CONFIG
+    start_task_id = 1
 
     def __init__(self, args, **kwargs):
         self.args = args
@@ -20,7 +21,7 @@ class Experimenter:
         self.start_time = self.start_time_raw.strftime("%Y%m%d%H%M")
         self.state = create_initial_state(
             self.args.task,
-            start_task_id=1,
+            start_task_id=self.start_task_id,
             data_config=self.data_config,
             low_is_better=self.args.low_is_better,
             name=self.args.name,
diff --git a/expo/experimenter/mcts.py b/expo/experimenter/mcts.py
index f0db72841..89f362b6b 100644
--- a/expo/experimenter/mcts.py
+++ b/expo/experimenter/mcts.py
@@ -6,6 +6,7 @@ from expo.MCTS import MCTS
 
 class MCTSExperimenter(Experimenter):
     result_path: str = "results/mcts"
+    start_task_id = 2
 
     def __init__(self, args, tree_mode=None, **kwargs):
         super().__init__(args, **kwargs)

From 1cdffc3d8550538195fce30cea4d913a24be9c04 Mon Sep 17 00:00:00 2001
From: Yizhou Chi <chiyizhou@fuzhi.ai>
Date: Sat, 14 Sep 2024 20:49:49 +0800
Subject: [PATCH 12/14] =?UTF-8?q?FE=20prompt:=20FE=E9=80=9A=E5=B8=B8?=
 =?UTF-8?q?=E4=B8=8D=E4=BC=9Amake=20changes=E8=80=8C=E6=98=AF=E5=8A=A0?=
 =?UTF-8?q?=E6=96=B0=E7=9A=84=E7=89=B9=E5=BE=81=20DI=20prompt:=20=E8=A6=81?=
 =?UTF-8?q?=E6=B1=82=E8=AE=A9predictions=E6=9C=80=E7=BB=88=E7=BB=93?=
 =?UTF-8?q?=E6=9E=9C=E4=B8=80=E8=87=B4=EF=BC=8C=E5=B9=B6=E6=8F=90=E4=BE=9B?=
 =?UTF-8?q?=E4=BE=8B=E5=AD=90?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 expo/data/dataset.py         | 1 +
 metagpt/prompts/task_type.py | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/expo/data/dataset.py b/expo/data/dataset.py
index 9748cb8c2..28bd26d2e 100644
--- a/expo/data/dataset.py
+++ b/expo/data/dataset.py
@@ -40,6 +40,7 @@ DI_INSTRUCTION = """
 1. Save the prediction results of BOTH the dev set and test set in `dev_predictions.csv` and `test_predictions.csv` respectively in the output directory. 
 - Both files should contain a single column named `target` with the predicted values.
 2. Make sure the prediction results are in the same format as the target column in the training set. 
+- For instance, if the target column is categorical, the prediction results should be categorical as well.
 
 ## Output Performance
 Print the train and dev set performance in the last step.
diff --git a/metagpt/prompts/task_type.py b/metagpt/prompts/task_type.py
index 6b230fc9e..599d437c5 100644
--- a/metagpt/prompts/task_type.py
+++ b/metagpt/prompts/task_type.py
@@ -26,7 +26,7 @@ The current task is about feature engineering. when performing it, please adhere
 - Avoid creating redundant or excessively numerous features in one step.
 - Exclude ID columns from feature generation and remove them.
 - Each feature engineering operation performed on the train set must also applies to the dev/test separately at the same time.
-- **ATTENTION** Do NOT use the label column to create features or make any changes to the label column, except for cat encoding.
+- **ATTENTION** Do NOT use the label column to create features, except for cat encoding.
 - Use the data from previous task result if exist, do not mock or reload data yourself.
 - Always copy the DataFrame before processing it and use the copy to process.
 """

From 8c6dd480dcddae7bb123f2a2e9dc833db11b7fe7 Mon Sep 17 00:00:00 2001
From: Yizhou Chi <chiyizhou@fuzhi.ai>
Date: Sat, 14 Sep 2024 21:10:37 +0800
Subject: [PATCH 13/14] remove pdb

---
 expo/MCTS.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/expo/MCTS.py b/expo/MCTS.py
index c96c57b47..5cd357989 100644
--- a/expo/MCTS.py
+++ b/expo/MCTS.py
@@ -208,9 +208,6 @@ class Node:
                 run_finished = True
             except Exception as e:
                 print(f"Error: {e}")
-                import pdb
-
-                pdb.set_trace()
                 mcts_logger.log("MCTS", f"Error in running the role: {e}")
                 num_runs += 1
         if not run_finished:

From 3c50575ff7902dbe79d144dee40d0383765fb060 Mon Sep 17 00:00:00 2001
From: Yizhou Chi <chiyizhou@fuzhi.ai>
Date: Sat, 14 Sep 2024 21:16:40 +0800
Subject: [PATCH 14/14] make dir at start

---
 expo/MCTS.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/expo/MCTS.py b/expo/MCTS.py
index 5cd357989..228671e2c 100644
--- a/expo/MCTS.py
+++ b/expo/MCTS.py
@@ -40,6 +40,7 @@ def create_initial_state(task, start_task_id, data_config, low_is_better: bool,
         "start_task_id": start_task_id,
         "low_is_better": low_is_better,
     }
+    os.makedirs(initial_state["node_dir"], exist_ok=True)
     return initial_state