remove recommendation from di initial prompt, add recommendation to task type prompt

2026-06-11 15:15:18 +02:00 · 2024-09-14 18:05:02 +08:00 · 2024-09-14 18:05:02 +08:00 · f856d768fe
commit f856d768fe
parent 5e7cac7e6e
2 changed files with 5 additions and 25 deletions
--- a/expo/data/dataset.py
+++ b/expo/data/dataset.py
@ -27,30 +27,8 @@ STACKING = """

 SPECIAL_INSTRUCTIONS = {"ag": USE_AG, "stacking": STACKING}

-RECOMMENDATION = """
-## Base Models
-You have access to the following base models:
-Tabular:
-LightGBM, CatBoost, XGBoost, random forest, extremely randomized trees, k-nearest neighbors, linear regression
-
-Image:
-ResNet, DenseNet, VGG, Inception, MobileNet, EfficientNet
-
-Text:
-BERT, RoBERTa, DistilBERT, GPT-2
-"""
-
-# The RECOMMENDATION above is not tested but might be needed for multi-modal datasets
-
-RECOMMENDATION = """
-## Base Models
-You have access to the following base models:
-LightGBM, CatBoost, XGBoost, random forest, extremely randomized trees, k-nearest neighbors, linear regression
-"""
-
-DI_INSTRUCTION = (
-    RECOMMENDATION
-    + """## Attention
+DI_INSTRUCTION = """
+## Attention
 1. Please do not leak the target label in any form during training.
 2. Test set does not have the target column.
 3. When conducting data exploration or analysis, print out the results of your findings.
@ -69,7 +47,6 @@ Print the train and dev set performance in the last step.
 # Output dir
 {output_dir}
 """
-)

 TASK_PROMPT = """
 # User requirement
--- a/metagpt/prompts/task_type.py
+++ b/metagpt/prompts/task_type.py
@ -34,6 +34,9 @@ The current task is about feature engineering. when performing it, please adhere
 # Prompt for taking on "model_train" tasks
 MODEL_TRAIN_PROMPT = """
 The current task is about training a model, please ensure high performance:
+- For tabular datasets - you have access to LightGBM, CatBoost, XGBoost, random forest, extremely randomized trees, k-nearest neighbors, linear regression, etc.
+- For image datasets - you have access to ResNet, VGG, Inception, MobileNet, DenseNet, EfficientNet, etc.
+- For text datasets - you have access to BERT, GPT-2, RoBERTa, DistilBERT, T5, etc.
 - Keep in mind that your user prioritizes results and is highly focused on model performance. So, when needed, feel free to use models of any complexity to improve effectiveness, such as XGBoost, CatBoost, etc.
 - If non-numeric columns exist, perform label encode together with all steps.
 - Use the data from previous task result directly, do not mock or reload data yourself.