From 358a97e34cb388455f7d1dbfd5f3492174e10b88 Mon Sep 17 00:00:00 2001 From: Yizhou Chi Date: Fri, 18 Oct 2024 11:16:28 +0800 Subject: [PATCH 1/4] modify prompt --- expo/data/custom_task.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/expo/data/custom_task.py b/expo/data/custom_task.py index f3cd433f5..032fcb8ac 100644 --- a/expo/data/custom_task.py +++ b/expo/data/custom_task.py @@ -21,11 +21,11 @@ COMPETITION INSTRUCTIONS {task_description} ## More Instructions -- output_dir: {output_dir} -- Besides `submission.csv`, you should also save your output in the output directory. - You should split the training data into train and dev set. - You should use the dev set to improve your model. Print the final dev set score after training. -- Save the prediction results of BOTH the dev set and test set in `dev_predictions.csv` and `test_predictions.csv` respectively in the output directory. They should be in the same format as the `submission.csv`. +- output_dir: {output_dir} +- Besides `submission.csv`, you should also save your `test_predictions.csv` and `dev_predictions.csv` in the output directory. +- Note that `test_predictions.csv` should be identical to `submission.csv`. - Perform data analysis, data preprocessing, feature engineering, and modeling to predict the target. {special_instruction} **Do not make any plots or visualizations.** """ From 7a38165e6b20272d44b511c1f43d9e75425c1690 Mon Sep 17 00:00:00 2001 From: Yizhou Chi Date: Fri, 18 Oct 2024 14:04:57 +0800 Subject: [PATCH 2/4] add seed --- expo/data/custom_task.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/expo/data/custom_task.py b/expo/data/custom_task.py index 032fcb8ac..c2bf5c710 100644 --- a/expo/data/custom_task.py +++ b/expo/data/custom_task.py @@ -21,7 +21,7 @@ COMPETITION INSTRUCTIONS {task_description} ## More Instructions -- You should split the training data into train and dev set. +- You should split the training data into train and dev set with a seed of 42. - You should use the dev set to improve your model. Print the final dev set score after training. - output_dir: {output_dir} - Besides `submission.csv`, you should also save your `test_predictions.csv` and `dev_predictions.csv` in the output directory. From f97ad720b0ed9b78921cf57662ca7dbf9430ebb8 Mon Sep 17 00:00:00 2001 From: Yizhou Chi Date: Fri, 18 Oct 2024 14:29:36 +0800 Subject: [PATCH 3/4] add task arg for tree visualization --- expo/scripts/visualize_experiment.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/expo/scripts/visualize_experiment.py b/expo/scripts/visualize_experiment.py index 42b4490ec..e2443d0fd 100644 --- a/expo/scripts/visualize_experiment.py +++ b/expo/scripts/visualize_experiment.py @@ -20,4 +20,4 @@ if __name__ == "__main__": root = mcts.root_node G = nx.DiGraph() build_tree_recursive(G, "0", root) - visualize_tree(G, save_path="results/tree.png") + visualize_tree(G, save_path=f"results/{args.task}-tree.png") From 5eaa072d8d3abd2e5a8f587179e2ae7e5c9bc023 Mon Sep 17 00:00:00 2001 From: Yizhou Chi Date: Fri, 18 Oct 2024 16:05:24 +0800 Subject: [PATCH 4/4] add an instruction to avoid splitting instruction being replaced --- expo/insights/instruction_generator.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/expo/insights/instruction_generator.py b/expo/insights/instruction_generator.py index 78b32e45d..ab9b2cc67 100644 --- a/expo/insights/instruction_generator.py +++ b/expo/insights/instruction_generator.py @@ -8,7 +8,7 @@ from expo.utils import clean_json_from_rsp, load_data_config, mcts_logger from metagpt.llm import LLM from metagpt.schema import Message -REFLECTION_SYSTEM_MSG = "As a Kaggle grandmaster participating in a competition, you need to analyze your experience and propose evolutionary points that are more likely to improve the performance of baseline code." +REFLECTION_SYSTEM_MSG = "As a Kaggle Grandmaster competing in a challenge, your task is to suggest potential evolutionary improvements that could enhance the performance of the baseline code." CHANGE_INSTRUCTION = """ # Original instruction @@ -17,7 +17,9 @@ CHANGE_INSTRUCTION = """ # Insights {insights} -Rewrite the original instruction according to the insights +Rewrite the original instruction according to the insights +(If the original instruction involves splitting the data, ensure that your insights are integrated with the data split instructions, +rather than replacing them.) # Expected Output Hard Format ```json