refine prompt

This commit is contained in:
lidanyang 2023-12-13 19:36:02 +08:00
parent 4423524734
commit e59bab73b0
2 changed files with 27 additions and 13 deletions

View file

@ -174,11 +174,29 @@ Write complete code for 'Current Task'. And avoid duplicating code from 'Done Ta
Specifically, {special_prompt}
# Code Steps:
Follow steps below when you writing code if it's convenient.
Strictly follow steps below when you writing code if it's convenient.
{code_steps}
# Output Example:
when current task is "train a lightgbm model on training data", and their are two steps in 'Code Steps', the code be like:
```python
# Step 1: check data type and convert to numeric
ojb_cols = train.select_dtypes(include='object').columns.tolist()
for col in obj_cols:
encoder = LabelEncoder()
train[col] = encoder.fit_transform(train[col])
test[col] = test[col].apply(lambda x: x if x in encoder.classes_ else 'unknown')
test[col] = encoder.transform(test[col])
# Step 2: train lightgbm model
model = LGBMClassifier()
model.fit(train, y_train)
```end
# Constraints:
- Ensure the output new code is executable in the same Jupyter notebook with previous tasks code have been executed.
- The output code should contain all steps implemented in 'Code Steps'.
"""
TOOL_USAGE_PROMPT = """
@ -202,7 +220,7 @@ Write complete code for 'Current Task'. And avoid duplicating code from 'Done Ta
Specifically, {special_prompt}
# Code Steps:
Follow steps below when you writing code if it's convenient.
Strictly follow steps below when you writing code if it's convenient.
{code_steps}
# Capabilities
@ -214,8 +232,9 @@ Each Class tool is described in JSON format. When you call a tool, import the to
{tool_catalog}
# Output Example:
when current task is "fill missing value and handle outliers", and their are training data and test data, the output code be like:
when current task is "do data preprocess, like fill missing value, handle outliers, etc.", and their are two steps in 'Code Steps', the code be like:
```python
# Step 1: fill missing value
# Tools used: ['FillMissingValue']
from metagpt.tools.functions.libs.data_preprocess import FillMissingValue
@ -227,6 +246,7 @@ fill_missing_value.fit(train_processed)
train_processed = fill_missing_value.transform(train_processed)
test_processed = fill_missing_value.transform(test_processed)
# Step 2: handle outliers
for col in num_cols:
low, high = train_processed[col].quantile([0.01, 0.99])
train_processed[col] = train_processed[col].clip(low, high)
@ -235,8 +255,9 @@ for col in num_cols:
# Constraints:
- Ensure the output new code is executable in the same Jupyter notebook with previous tasks code have been executed.
- Prioritize using pre-defined tools for the same functionality.
- Always prioritize using pre-defined tools for the same functionality.
- Always copy the DataFrame before processing it and use the copy to process.
- The output code should contain all steps implemented correctly in 'Code Steps'.
"""
#- If 'Code Steps' contains step done in 'Done Tasks', such as reading data, don't repeat it.
@ -266,7 +287,7 @@ The current task is about training a model, please ensure high performance:
MODEL_EVALUATE_PROMPT = """
The current task is about evaluating a model, please note the following:
- Ensure that the evaluated data is same processed as the training data.
- Ensure that the evaluated data is same processed as the training data. If not, remember use object in 'Done Tasks' to transform the data.
- Use trained model from previous task result directly, do not mock or reload model yourself.
"""

View file

@ -32,13 +32,6 @@ STRUCTURAL_CONTEXT = """
{tasks}
## Current Task
{current_task}
## Packages Installed
scikit-learn
pandas
numpy
lightgbm
xgboost
catboost
"""
@ -212,7 +205,7 @@ class MLEngineer(Role):
result, success = await self.execute_code.run(code)
# truncated the result
print(truncate(result))
self.working_memory.add(
Message(content=truncate(remove_escape_and_color_codes(result)), role="user", cause_by=ExecutePyCode)
)