From 358a97e34cb388455f7d1dbfd5f3492174e10b88 Mon Sep 17 00:00:00 2001
From: Yizhou Chi <chiyizhou@fuzhi.ai>
Date: Fri, 18 Oct 2024 11:16:28 +0800
Subject: [PATCH 1/4] modify prompt

---
 expo/data/custom_task.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/expo/data/custom_task.py b/expo/data/custom_task.py
index f3cd433f5..032fcb8ac 100644
--- a/expo/data/custom_task.py
+++ b/expo/data/custom_task.py
@@ -21,11 +21,11 @@ COMPETITION INSTRUCTIONS
 {task_description}
 
 ## More Instructions
-- output_dir: {output_dir}
-- Besides `submission.csv`, you should also save your output in the output directory.
 - You should split the training data into train and dev set.
 - You should use the dev set to improve your model. Print the final dev set score after training.
-- Save the prediction results of BOTH the dev set and test set in `dev_predictions.csv` and `test_predictions.csv` respectively in the output directory. They should be in the same format as the `submission.csv`.
+- output_dir: {output_dir}
+- Besides `submission.csv`, you should also save your `test_predictions.csv` and `dev_predictions.csv` in the output directory.
+- Note that `test_predictions.csv` should be identical to `submission.csv`.
 - Perform data analysis, data preprocessing, feature engineering, and modeling to predict the target. {special_instruction}
 **Do not make any plots or visualizations.**
 """

From 7a38165e6b20272d44b511c1f43d9e75425c1690 Mon Sep 17 00:00:00 2001
From: Yizhou Chi <chiyizhou@fuzhi.ai>
Date: Fri, 18 Oct 2024 14:04:57 +0800
Subject: [PATCH 2/4] add seed

---
 expo/data/custom_task.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/expo/data/custom_task.py b/expo/data/custom_task.py
index 032fcb8ac..c2bf5c710 100644
--- a/expo/data/custom_task.py
+++ b/expo/data/custom_task.py
@@ -21,7 +21,7 @@ COMPETITION INSTRUCTIONS
 {task_description}
 
 ## More Instructions
-- You should split the training data into train and dev set.
+- You should split the training data into train and dev set with a seed of 42.
 - You should use the dev set to improve your model. Print the final dev set score after training.
 - output_dir: {output_dir}
 - Besides `submission.csv`, you should also save your `test_predictions.csv` and `dev_predictions.csv` in the output directory.

From f97ad720b0ed9b78921cf57662ca7dbf9430ebb8 Mon Sep 17 00:00:00 2001
From: Yizhou Chi <chiyizhou@fuzhi.ai>
Date: Fri, 18 Oct 2024 14:29:36 +0800
Subject: [PATCH 3/4] add task arg for tree visualization

---
 expo/scripts/visualize_experiment.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/expo/scripts/visualize_experiment.py b/expo/scripts/visualize_experiment.py
index 42b4490ec..e2443d0fd 100644
--- a/expo/scripts/visualize_experiment.py
+++ b/expo/scripts/visualize_experiment.py
@@ -20,4 +20,4 @@ if __name__ == "__main__":
     root = mcts.root_node
     G = nx.DiGraph()
     build_tree_recursive(G, "0", root)
-    visualize_tree(G, save_path="results/tree.png")
+    visualize_tree(G, save_path=f"results/{args.task}-tree.png")

From 5eaa072d8d3abd2e5a8f587179e2ae7e5c9bc023 Mon Sep 17 00:00:00 2001
From: Yizhou Chi <chiyizhou@fuzhi.ai>
Date: Fri, 18 Oct 2024 16:05:24 +0800
Subject: [PATCH 4/4] add an instruction to avoid splitting instruction being
 replaced

---
 expo/insights/instruction_generator.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/expo/insights/instruction_generator.py b/expo/insights/instruction_generator.py
index 78b32e45d..ab9b2cc67 100644
--- a/expo/insights/instruction_generator.py
+++ b/expo/insights/instruction_generator.py
@@ -8,7 +8,7 @@ from expo.utils import clean_json_from_rsp, load_data_config, mcts_logger
 from metagpt.llm import LLM
 from metagpt.schema import Message
 
-REFLECTION_SYSTEM_MSG = "As a Kaggle grandmaster participating in a competition, you need to analyze your experience and propose evolutionary points that are more likely to improve the performance of baseline code."
+REFLECTION_SYSTEM_MSG = "As a Kaggle Grandmaster competing in a challenge, your task is to suggest potential evolutionary improvements that could enhance the performance of the baseline code."
 
 CHANGE_INSTRUCTION = """
 # Original instruction
@@ -17,7 +17,9 @@ CHANGE_INSTRUCTION = """
 # Insights
 {insights}
 
-Rewrite the original instruction according to the insights
+Rewrite the original instruction according to the insights 
+(If the original instruction involves splitting the data, ensure that your insights are integrated with the data split instructions, 
+rather than replacing them.)
 
 # Expected Output Hard Format
 ```json