remove recommendation from di initial prompt, add recommendation to task type prompt

This commit is contained in:
Yizhou Chi 2024-09-14 18:05:02 +08:00
parent 5e7cac7e6e
commit f856d768fe
2 changed files with 5 additions and 25 deletions

View file

@ -27,30 +27,8 @@ STACKING = """
SPECIAL_INSTRUCTIONS = {"ag": USE_AG, "stacking": STACKING}
RECOMMENDATION = """
## Base Models
You have access to the following base models:
Tabular:
LightGBM, CatBoost, XGBoost, random forest, extremely randomized trees, k-nearest neighbors, linear regression
Image:
ResNet, DenseNet, VGG, Inception, MobileNet, EfficientNet
Text:
BERT, RoBERTa, DistilBERT, GPT-2
"""
# The RECOMMENDATION above is not tested but might be needed for multi-modal datasets
RECOMMENDATION = """
## Base Models
You have access to the following base models:
LightGBM, CatBoost, XGBoost, random forest, extremely randomized trees, k-nearest neighbors, linear regression
"""
DI_INSTRUCTION = (
RECOMMENDATION
+ """## Attention
DI_INSTRUCTION = """
## Attention
1. Please do not leak the target label in any form during training.
2. Test set does not have the target column.
3. When conducting data exploration or analysis, print out the results of your findings.
@ -69,7 +47,6 @@ Print the train and dev set performance in the last step.
# Output dir
{output_dir}
"""
)
TASK_PROMPT = """
# User requirement

View file

@ -34,6 +34,9 @@ The current task is about feature engineering. when performing it, please adhere
# Prompt for taking on "model_train" tasks
MODEL_TRAIN_PROMPT = """
The current task is about training a model, please ensure high performance:
- For tabular datasets - you have access to LightGBM, CatBoost, XGBoost, random forest, extremely randomized trees, k-nearest neighbors, linear regression, etc.
- For image datasets - you have access to ResNet, VGG, Inception, MobileNet, DenseNet, EfficientNet, etc.
- For text datasets - you have access to BERT, GPT-2, RoBERTa, DistilBERT, T5, etc.
- Keep in mind that your user prioritizes results and is highly focused on model performance. So, when needed, feel free to use models of any complexity to improve effectiveness, such as XGBoost, CatBoost, etc.
- If non-numeric columns exist, perform label encode together with all steps.
- Use the data from previous task result directly, do not mock or reload data yourself.