mirror of
https://github.com/FoundationAgents/MetaGPT.git
synced 2026-06-11 15:15:18 +02:00
add ml Class tool schema
This commit is contained in:
parent
4f0d55656e
commit
07771a7699
2 changed files with 735 additions and 0 deletions
306
metagpt/tools/functions/schemas/data_preprocess.yml
Normal file
306
metagpt/tools/functions/schemas/data_preprocess.yml
Normal file
|
|
@ -0,0 +1,306 @@
|
|||
FillMissingValue:
|
||||
type: class
|
||||
description: "Completing missing values with simple strategies"
|
||||
methods:
|
||||
__init__:
|
||||
description: "Initialize self."
|
||||
parameters:
|
||||
properties:
|
||||
features:
|
||||
type: list
|
||||
description: "columns to be processed"
|
||||
strategy:
|
||||
type: str
|
||||
description: "the imputation strategy"
|
||||
default: mean
|
||||
enum:
|
||||
- mean
|
||||
- median
|
||||
- most_frequent
|
||||
- constant
|
||||
fill_value:
|
||||
type: int
|
||||
description: "fill_value is used to replace all occurrences of missing_values"
|
||||
default: null
|
||||
required:
|
||||
- features
|
||||
fit:
|
||||
description: "Fit the FillMissingValue model."
|
||||
parameters:
|
||||
properties:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The input DataFrame."
|
||||
required:
|
||||
- df
|
||||
transform:
|
||||
description: "Transform the input DataFrame with the fitted model."
|
||||
parameters:
|
||||
properties:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The input DataFrame."
|
||||
required:
|
||||
- df
|
||||
returns:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The transformed DataFrame."
|
||||
fit_transform:
|
||||
description: "Fit and transform the input DataFrame."
|
||||
parameters:
|
||||
properties:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The input DataFrame."
|
||||
required:
|
||||
- df
|
||||
returns:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The transformed DataFrame."
|
||||
|
||||
MinMaxScale:
|
||||
type: class
|
||||
description: "Transform features by scaling each feature to a range, witch is (0, 1)"
|
||||
methods:
|
||||
__init__:
|
||||
description: "Initialize self."
|
||||
parameters:
|
||||
properties:
|
||||
features:
|
||||
type: list
|
||||
description: "columns to be processed"
|
||||
required:
|
||||
- features
|
||||
fit:
|
||||
description: "Fit the MinMaxScale model."
|
||||
parameters:
|
||||
properties:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The input DataFrame."
|
||||
required:
|
||||
- df
|
||||
transform:
|
||||
description: "Transform the input DataFrame with the fitted model."
|
||||
parameters:
|
||||
properties:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The input DataFrame."
|
||||
required:
|
||||
- df
|
||||
returns:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The transformed DataFrame."
|
||||
fit_transform:
|
||||
description: "Fit and transform the input DataFrame."
|
||||
parameters:
|
||||
properties:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The input DataFrame."
|
||||
required:
|
||||
- df
|
||||
returns:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The transformed DataFrame."
|
||||
|
||||
StandardScale:
|
||||
type: class
|
||||
description: "Standardize features by removing the mean and scaling to unit variance"
|
||||
methods:
|
||||
__init__:
|
||||
description: "Initialize self."
|
||||
parameters:
|
||||
properties:
|
||||
features:
|
||||
type: list
|
||||
description: "columns to be processed"
|
||||
required:
|
||||
- features
|
||||
fit:
|
||||
description: "Fit the StandardScale model."
|
||||
parameters:
|
||||
properties:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The input DataFrame."
|
||||
required:
|
||||
- df
|
||||
transform:
|
||||
description: "Transform the input DataFrame with the fitted model."
|
||||
parameters:
|
||||
properties:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The input DataFrame."
|
||||
required:
|
||||
- df
|
||||
returns:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The transformed DataFrame."
|
||||
fit_transform:
|
||||
description: "Fit and transform the input DataFrame."
|
||||
parameters:
|
||||
properties:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The input DataFrame."
|
||||
required:
|
||||
- df
|
||||
returns:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The transformed DataFrame."
|
||||
|
||||
MaxAbsScale:
|
||||
type: class
|
||||
description: "cale each feature by its maximum absolute value"
|
||||
methods:
|
||||
__init__:
|
||||
description: "Initialize self."
|
||||
parameters:
|
||||
properties:
|
||||
features:
|
||||
type: list
|
||||
description: "columns to be processed"
|
||||
required:
|
||||
- features
|
||||
fit:
|
||||
description: "Fit the MaxAbsScale model."
|
||||
parameters:
|
||||
properties:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The input DataFrame."
|
||||
required:
|
||||
- df
|
||||
transform:
|
||||
description: "Transform the input DataFrame with the fitted model."
|
||||
parameters:
|
||||
properties:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The input DataFrame."
|
||||
required:
|
||||
- df
|
||||
returns:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The transformed DataFrame."
|
||||
fit_transform:
|
||||
description: "Fit and transform the input DataFrame."
|
||||
parameters:
|
||||
properties:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The input DataFrame."
|
||||
required:
|
||||
- df
|
||||
returns:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The transformed DataFrame."
|
||||
|
||||
LabelEncode:
|
||||
type: class
|
||||
description: "Apply label encoding to specified categorical columns in-place."
|
||||
methods:
|
||||
__init__:
|
||||
description: "Initialize self."
|
||||
parameters:
|
||||
properties:
|
||||
features:
|
||||
type: list
|
||||
description: "Categorical columns to be label encoded"
|
||||
required:
|
||||
- features
|
||||
fit:
|
||||
description: "Fit the LabelEncode model."
|
||||
parameters:
|
||||
properties:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The input DataFrame."
|
||||
required:
|
||||
- df
|
||||
transform:
|
||||
description: "Transform the input DataFrame with the fitted model."
|
||||
parameters:
|
||||
properties:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The input DataFrame."
|
||||
required:
|
||||
- df
|
||||
returns:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The transformed DataFrame."
|
||||
fit_transform:
|
||||
description: "Fit and transform the input DataFrame."
|
||||
parameters:
|
||||
properties:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The input DataFrame."
|
||||
required:
|
||||
- df
|
||||
returns:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The transformed DataFrame."
|
||||
|
||||
OneHotEncode:
|
||||
type: class
|
||||
description: "Apply one-hot encoding to specified categorical columns, the original columns will be dropped."
|
||||
methods:
|
||||
__init__:
|
||||
description: "Initialize self."
|
||||
parameters:
|
||||
properties:
|
||||
features:
|
||||
type: list
|
||||
description: "Categorical columns to be one-hot encoded and dropped"
|
||||
required:
|
||||
- features
|
||||
fit:
|
||||
description: "Fit the OneHotEncoding model."
|
||||
parameters:
|
||||
properties:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The input DataFrame."
|
||||
required:
|
||||
- df
|
||||
transform:
|
||||
description: "Transform the input DataFrame with the fitted model."
|
||||
parameters:
|
||||
properties:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The input DataFrame."
|
||||
required:
|
||||
- df
|
||||
returns:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The transformed DataFrame."
|
||||
fit_transform:
|
||||
description: "Fit and transform the input DataFrame."
|
||||
parameters:
|
||||
properties:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The input DataFrame."
|
||||
required:
|
||||
- df
|
||||
returns:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The transformed DataFrame."
|
||||
429
metagpt/tools/functions/schemas/feature_engineering.yml
Normal file
429
metagpt/tools/functions/schemas/feature_engineering.yml
Normal file
|
|
@ -0,0 +1,429 @@
|
|||
PolynomialExpansion:
|
||||
type: class
|
||||
description: "Add polynomial and interaction features from selected numeric columns, excluding the bias column."
|
||||
methods:
|
||||
__init__:
|
||||
description: "Initialize self."
|
||||
parameters:
|
||||
properties:
|
||||
cols:
|
||||
type: list
|
||||
description: "Columns for polynomial expansion."
|
||||
degree:
|
||||
type: int
|
||||
description: "The degree of the polynomial features."
|
||||
default: 2
|
||||
required:
|
||||
- cols
|
||||
fit:
|
||||
description: "Fit the PolynomialExpansion model."
|
||||
parameters:
|
||||
properties:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The input DataFrame."
|
||||
required:
|
||||
- df
|
||||
transform:
|
||||
description: "Transform the input DataFrame with the fitted model."
|
||||
parameters:
|
||||
properties:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The input DataFrame."
|
||||
required:
|
||||
- df
|
||||
returns:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The transformed DataFrame."
|
||||
fit_transform:
|
||||
description: "Fit and transform the input DataFrame."
|
||||
parameters:
|
||||
properties:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The input DataFrame."
|
||||
required:
|
||||
- df
|
||||
returns:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The transformed DataFrame."
|
||||
|
||||
CatCount:
|
||||
type: class
|
||||
description: "Add value counts of categorical columns as new features."
|
||||
methods:
|
||||
__init__:
|
||||
description: "Initialize self."
|
||||
parameters:
|
||||
properties:
|
||||
cols:
|
||||
type: list
|
||||
description: "Columns for value counts."
|
||||
required:
|
||||
- cols
|
||||
fit:
|
||||
description: "Fit the CatCount model."
|
||||
parameters:
|
||||
properties:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The input DataFrame."
|
||||
required:
|
||||
- df
|
||||
transform:
|
||||
description: "Transform the input DataFrame with the fitted model."
|
||||
parameters:
|
||||
properties:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The input DataFrame."
|
||||
required:
|
||||
- df
|
||||
returns:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The transformed DataFrame."
|
||||
fit_transform:
|
||||
description: "Fit and transform the input DataFrame."
|
||||
parameters:
|
||||
properties:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The input DataFrame."
|
||||
required:
|
||||
- df
|
||||
returns:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The transformed DataFrame."
|
||||
|
||||
TargetMeanEncoder:
|
||||
type: class
|
||||
description: "Encodes a categorical column by the mean of the label column, and adds the result as a new feature."
|
||||
methods:
|
||||
__init__:
|
||||
description: "Initialize self."
|
||||
parameters:
|
||||
properties:
|
||||
col:
|
||||
type: str
|
||||
description: "Column to be mean encoded."
|
||||
label:
|
||||
type: str
|
||||
description: "Predicted label column."
|
||||
required:
|
||||
- col
|
||||
- label
|
||||
fit:
|
||||
description: "Fit the TargetMeanEncoder model."
|
||||
parameters:
|
||||
properties:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The input DataFrame."
|
||||
required:
|
||||
- df
|
||||
transform:
|
||||
description: "Transform the input DataFrame with the fitted model."
|
||||
parameters:
|
||||
properties:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The input DataFrame."
|
||||
required:
|
||||
- df
|
||||
returns:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The transformed DataFrame."
|
||||
fit_transform:
|
||||
description: "Fit and transform the input DataFrame."
|
||||
parameters:
|
||||
properties:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The input DataFrame."
|
||||
required:
|
||||
- df
|
||||
returns:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The transformed DataFrame."
|
||||
|
||||
KFoldTargetMeanEncoder:
|
||||
type: class
|
||||
description: "Adds a new feature to the DataFrame by k-fold mean encoding of a categorical column using the label column."
|
||||
methods:
|
||||
__init__:
|
||||
description: "Initialize self."
|
||||
parameters:
|
||||
properties:
|
||||
col:
|
||||
type: str
|
||||
description: "Column to be k-fold mean encoded."
|
||||
label:
|
||||
type: str
|
||||
description: "Predicted label column."
|
||||
n_splits:
|
||||
type: int
|
||||
description: "Number of splits for K-fold."
|
||||
default: 5
|
||||
random_state:
|
||||
type: int
|
||||
description: "Random seed."
|
||||
default: 2021
|
||||
required:
|
||||
- col
|
||||
- label
|
||||
fit:
|
||||
description: "Fit the KFoldTargetMeanEncoder model."
|
||||
parameters:
|
||||
properties:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The input DataFrame."
|
||||
required:
|
||||
- df
|
||||
transform:
|
||||
description: "Transform the input DataFrame with the fitted model."
|
||||
parameters:
|
||||
properties:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The input DataFrame."
|
||||
required:
|
||||
- df
|
||||
returns:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The transformed DataFrame."
|
||||
fit_transform:
|
||||
description: "Fit and transform the input DataFrame."
|
||||
parameters:
|
||||
properties:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The input DataFrame."
|
||||
required:
|
||||
- df
|
||||
returns:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The transformed DataFrame."
|
||||
|
||||
CatCross:
|
||||
type: class
|
||||
description: "Add pairwise crossed features and convert them to numerical features."
|
||||
methods:
|
||||
__init__:
|
||||
description: "Initialize self."
|
||||
parameters:
|
||||
properties:
|
||||
cols:
|
||||
type: list
|
||||
description: "Columns to be pairwise crossed."
|
||||
max_cat_num:
|
||||
type: int
|
||||
description: "Maximum unique categories per crossed feature."
|
||||
default: 100
|
||||
required:
|
||||
- cols
|
||||
fit:
|
||||
description: "Fit the CatCross model."
|
||||
parameters:
|
||||
properties:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The input DataFrame."
|
||||
required:
|
||||
- df
|
||||
transform:
|
||||
description: "Transform the input DataFrame with the fitted model."
|
||||
parameters:
|
||||
properties:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The input DataFrame."
|
||||
required:
|
||||
- df
|
||||
returns:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The transformed DataFrame."
|
||||
fit_transform:
|
||||
description: "Fit and transform the input DataFrame."
|
||||
parameters:
|
||||
properties:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The input DataFrame."
|
||||
required:
|
||||
- df
|
||||
returns:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The transformed DataFrame."
|
||||
|
||||
GroupStat:
|
||||
type: class
|
||||
description: "Aggregate specified column in a DataFrame grouped by another column, adding new features named '<agg_col>_<agg_func>_by_<group_col>'."
|
||||
methods:
|
||||
__init__:
|
||||
description: "Initialize self."
|
||||
parameters:
|
||||
properties:
|
||||
group_col:
|
||||
type: str
|
||||
description: "Column used for grouping."
|
||||
agg_col:
|
||||
type: str
|
||||
description: "Column on which aggregation is performed."
|
||||
agg_funcs:
|
||||
type: list
|
||||
description: >-
|
||||
List of aggregation functions to apply, such as ['mean', 'std'].
|
||||
Each function must be supported by pandas.
|
||||
required:
|
||||
- group_col
|
||||
- agg_col
|
||||
- agg_funcs
|
||||
fit:
|
||||
description: "Fit the GroupStat model."
|
||||
parameters:
|
||||
properties:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The input DataFrame."
|
||||
required:
|
||||
- df
|
||||
transform:
|
||||
description: "Transform the input DataFrame with the fitted model."
|
||||
parameters:
|
||||
properties:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The input DataFrame."
|
||||
required:
|
||||
- df
|
||||
returns:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The transformed DataFrame."
|
||||
fit_transform:
|
||||
description: "Fit and transform the input DataFrame."
|
||||
parameters:
|
||||
properties:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The input DataFrame."
|
||||
required:
|
||||
- df
|
||||
returns:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The transformed DataFrame."
|
||||
|
||||
SplitBins:
|
||||
type: class
|
||||
description: "Bin continuous data into intervals and return the bin identifier encoded as an integer value"
|
||||
methods:
|
||||
__init__:
|
||||
description: "Initialize self."
|
||||
parameters:
|
||||
properties:
|
||||
cols:
|
||||
type: list
|
||||
description: "Columns to be binned."
|
||||
strategy:
|
||||
type: str
|
||||
description: "Strategy used to define the widths of the bins."
|
||||
default: quantile
|
||||
required:
|
||||
- cols
|
||||
fit:
|
||||
description: "Fit the SplitBins model."
|
||||
parameters:
|
||||
properties:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The input DataFrame."
|
||||
required:
|
||||
- df
|
||||
transform:
|
||||
description: "Transform the input DataFrame with the fitted model."
|
||||
parameters:
|
||||
properties:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The input DataFrame."
|
||||
required:
|
||||
- df
|
||||
returns:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The transformed DataFrame."
|
||||
fit_transform:
|
||||
description: "Fit and transform the input DataFrame."
|
||||
parameters:
|
||||
properties:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The input DataFrame."
|
||||
required:
|
||||
- df
|
||||
returns:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The transformed DataFrame."
|
||||
|
||||
GeneralSelection:
|
||||
type: class
|
||||
description: "Drop all nan feats and feats with only one unique value."
|
||||
methods:
|
||||
__init__:
|
||||
description: "Initialize self."
|
||||
parameters:
|
||||
properties:
|
||||
label_col:
|
||||
type: str
|
||||
description: "Label column name."
|
||||
required:
|
||||
- label_col
|
||||
fit:
|
||||
description: "Fit the GeneralSelection model."
|
||||
parameters:
|
||||
properties:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The input DataFrame."
|
||||
required:
|
||||
- df
|
||||
transform:
|
||||
description: "Transform the input DataFrame with the fitted model."
|
||||
parameters:
|
||||
properties:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The input DataFrame."
|
||||
required:
|
||||
- df
|
||||
returns:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The transformed DataFrame."
|
||||
fit_transform:
|
||||
description: "Fit and transform the input DataFrame."
|
||||
parameters:
|
||||
properties:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The input DataFrame."
|
||||
required:
|
||||
- df
|
||||
returns:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The transformed DataFrame."
|
||||
Loading…
Add table
Add a link
Reference in a new issue