add ml Class tool schema

This commit is contained in:
lidanyang 2023-12-12 10:56:51 +08:00
parent 4f0d55656e
commit 07771a7699
2 changed files with 735 additions and 0 deletions

View file

@ -0,0 +1,306 @@
FillMissingValue:
type: class
description: "Completing missing values with simple strategies"
methods:
__init__:
description: "Initialize self."
parameters:
properties:
features:
type: list
description: "columns to be processed"
strategy:
type: str
description: "the imputation strategy"
default: mean
enum:
- mean
- median
- most_frequent
- constant
fill_value:
type: int
description: "fill_value is used to replace all occurrences of missing_values"
default: null
required:
- features
fit:
description: "Fit the FillMissingValue model."
parameters:
properties:
df:
type: DataFrame
description: "The input DataFrame."
required:
- df
transform:
description: "Transform the input DataFrame with the fitted model."
parameters:
properties:
df:
type: DataFrame
description: "The input DataFrame."
required:
- df
returns:
df:
type: DataFrame
description: "The transformed DataFrame."
fit_transform:
description: "Fit and transform the input DataFrame."
parameters:
properties:
df:
type: DataFrame
description: "The input DataFrame."
required:
- df
returns:
df:
type: DataFrame
description: "The transformed DataFrame."
MinMaxScale:
type: class
description: "Transform features by scaling each feature to a range, witch is (0, 1)"
methods:
__init__:
description: "Initialize self."
parameters:
properties:
features:
type: list
description: "columns to be processed"
required:
- features
fit:
description: "Fit the MinMaxScale model."
parameters:
properties:
df:
type: DataFrame
description: "The input DataFrame."
required:
- df
transform:
description: "Transform the input DataFrame with the fitted model."
parameters:
properties:
df:
type: DataFrame
description: "The input DataFrame."
required:
- df
returns:
df:
type: DataFrame
description: "The transformed DataFrame."
fit_transform:
description: "Fit and transform the input DataFrame."
parameters:
properties:
df:
type: DataFrame
description: "The input DataFrame."
required:
- df
returns:
df:
type: DataFrame
description: "The transformed DataFrame."
StandardScale:
type: class
description: "Standardize features by removing the mean and scaling to unit variance"
methods:
__init__:
description: "Initialize self."
parameters:
properties:
features:
type: list
description: "columns to be processed"
required:
- features
fit:
description: "Fit the StandardScale model."
parameters:
properties:
df:
type: DataFrame
description: "The input DataFrame."
required:
- df
transform:
description: "Transform the input DataFrame with the fitted model."
parameters:
properties:
df:
type: DataFrame
description: "The input DataFrame."
required:
- df
returns:
df:
type: DataFrame
description: "The transformed DataFrame."
fit_transform:
description: "Fit and transform the input DataFrame."
parameters:
properties:
df:
type: DataFrame
description: "The input DataFrame."
required:
- df
returns:
df:
type: DataFrame
description: "The transformed DataFrame."
MaxAbsScale:
type: class
description: "cale each feature by its maximum absolute value"
methods:
__init__:
description: "Initialize self."
parameters:
properties:
features:
type: list
description: "columns to be processed"
required:
- features
fit:
description: "Fit the MaxAbsScale model."
parameters:
properties:
df:
type: DataFrame
description: "The input DataFrame."
required:
- df
transform:
description: "Transform the input DataFrame with the fitted model."
parameters:
properties:
df:
type: DataFrame
description: "The input DataFrame."
required:
- df
returns:
df:
type: DataFrame
description: "The transformed DataFrame."
fit_transform:
description: "Fit and transform the input DataFrame."
parameters:
properties:
df:
type: DataFrame
description: "The input DataFrame."
required:
- df
returns:
df:
type: DataFrame
description: "The transformed DataFrame."
LabelEncode:
type: class
description: "Apply label encoding to specified categorical columns in-place."
methods:
__init__:
description: "Initialize self."
parameters:
properties:
features:
type: list
description: "Categorical columns to be label encoded"
required:
- features
fit:
description: "Fit the LabelEncode model."
parameters:
properties:
df:
type: DataFrame
description: "The input DataFrame."
required:
- df
transform:
description: "Transform the input DataFrame with the fitted model."
parameters:
properties:
df:
type: DataFrame
description: "The input DataFrame."
required:
- df
returns:
df:
type: DataFrame
description: "The transformed DataFrame."
fit_transform:
description: "Fit and transform the input DataFrame."
parameters:
properties:
df:
type: DataFrame
description: "The input DataFrame."
required:
- df
returns:
df:
type: DataFrame
description: "The transformed DataFrame."
OneHotEncode:
type: class
description: "Apply one-hot encoding to specified categorical columns, the original columns will be dropped."
methods:
__init__:
description: "Initialize self."
parameters:
properties:
features:
type: list
description: "Categorical columns to be one-hot encoded and dropped"
required:
- features
fit:
description: "Fit the OneHotEncoding model."
parameters:
properties:
df:
type: DataFrame
description: "The input DataFrame."
required:
- df
transform:
description: "Transform the input DataFrame with the fitted model."
parameters:
properties:
df:
type: DataFrame
description: "The input DataFrame."
required:
- df
returns:
df:
type: DataFrame
description: "The transformed DataFrame."
fit_transform:
description: "Fit and transform the input DataFrame."
parameters:
properties:
df:
type: DataFrame
description: "The input DataFrame."
required:
- df
returns:
df:
type: DataFrame
description: "The transformed DataFrame."

View file

@ -0,0 +1,429 @@
PolynomialExpansion:
type: class
description: "Add polynomial and interaction features from selected numeric columns, excluding the bias column."
methods:
__init__:
description: "Initialize self."
parameters:
properties:
cols:
type: list
description: "Columns for polynomial expansion."
degree:
type: int
description: "The degree of the polynomial features."
default: 2
required:
- cols
fit:
description: "Fit the PolynomialExpansion model."
parameters:
properties:
df:
type: DataFrame
description: "The input DataFrame."
required:
- df
transform:
description: "Transform the input DataFrame with the fitted model."
parameters:
properties:
df:
type: DataFrame
description: "The input DataFrame."
required:
- df
returns:
df:
type: DataFrame
description: "The transformed DataFrame."
fit_transform:
description: "Fit and transform the input DataFrame."
parameters:
properties:
df:
type: DataFrame
description: "The input DataFrame."
required:
- df
returns:
df:
type: DataFrame
description: "The transformed DataFrame."
CatCount:
type: class
description: "Add value counts of categorical columns as new features."
methods:
__init__:
description: "Initialize self."
parameters:
properties:
cols:
type: list
description: "Columns for value counts."
required:
- cols
fit:
description: "Fit the CatCount model."
parameters:
properties:
df:
type: DataFrame
description: "The input DataFrame."
required:
- df
transform:
description: "Transform the input DataFrame with the fitted model."
parameters:
properties:
df:
type: DataFrame
description: "The input DataFrame."
required:
- df
returns:
df:
type: DataFrame
description: "The transformed DataFrame."
fit_transform:
description: "Fit and transform the input DataFrame."
parameters:
properties:
df:
type: DataFrame
description: "The input DataFrame."
required:
- df
returns:
df:
type: DataFrame
description: "The transformed DataFrame."
TargetMeanEncoder:
type: class
description: "Encodes a categorical column by the mean of the label column, and adds the result as a new feature."
methods:
__init__:
description: "Initialize self."
parameters:
properties:
col:
type: str
description: "Column to be mean encoded."
label:
type: str
description: "Predicted label column."
required:
- col
- label
fit:
description: "Fit the TargetMeanEncoder model."
parameters:
properties:
df:
type: DataFrame
description: "The input DataFrame."
required:
- df
transform:
description: "Transform the input DataFrame with the fitted model."
parameters:
properties:
df:
type: DataFrame
description: "The input DataFrame."
required:
- df
returns:
df:
type: DataFrame
description: "The transformed DataFrame."
fit_transform:
description: "Fit and transform the input DataFrame."
parameters:
properties:
df:
type: DataFrame
description: "The input DataFrame."
required:
- df
returns:
df:
type: DataFrame
description: "The transformed DataFrame."
KFoldTargetMeanEncoder:
type: class
description: "Adds a new feature to the DataFrame by k-fold mean encoding of a categorical column using the label column."
methods:
__init__:
description: "Initialize self."
parameters:
properties:
col:
type: str
description: "Column to be k-fold mean encoded."
label:
type: str
description: "Predicted label column."
n_splits:
type: int
description: "Number of splits for K-fold."
default: 5
random_state:
type: int
description: "Random seed."
default: 2021
required:
- col
- label
fit:
description: "Fit the KFoldTargetMeanEncoder model."
parameters:
properties:
df:
type: DataFrame
description: "The input DataFrame."
required:
- df
transform:
description: "Transform the input DataFrame with the fitted model."
parameters:
properties:
df:
type: DataFrame
description: "The input DataFrame."
required:
- df
returns:
df:
type: DataFrame
description: "The transformed DataFrame."
fit_transform:
description: "Fit and transform the input DataFrame."
parameters:
properties:
df:
type: DataFrame
description: "The input DataFrame."
required:
- df
returns:
df:
type: DataFrame
description: "The transformed DataFrame."
CatCross:
type: class
description: "Add pairwise crossed features and convert them to numerical features."
methods:
__init__:
description: "Initialize self."
parameters:
properties:
cols:
type: list
description: "Columns to be pairwise crossed."
max_cat_num:
type: int
description: "Maximum unique categories per crossed feature."
default: 100
required:
- cols
fit:
description: "Fit the CatCross model."
parameters:
properties:
df:
type: DataFrame
description: "The input DataFrame."
required:
- df
transform:
description: "Transform the input DataFrame with the fitted model."
parameters:
properties:
df:
type: DataFrame
description: "The input DataFrame."
required:
- df
returns:
df:
type: DataFrame
description: "The transformed DataFrame."
fit_transform:
description: "Fit and transform the input DataFrame."
parameters:
properties:
df:
type: DataFrame
description: "The input DataFrame."
required:
- df
returns:
df:
type: DataFrame
description: "The transformed DataFrame."
GroupStat:
type: class
description: "Aggregate specified column in a DataFrame grouped by another column, adding new features named '<agg_col>_<agg_func>_by_<group_col>'."
methods:
__init__:
description: "Initialize self."
parameters:
properties:
group_col:
type: str
description: "Column used for grouping."
agg_col:
type: str
description: "Column on which aggregation is performed."
agg_funcs:
type: list
description: >-
List of aggregation functions to apply, such as ['mean', 'std'].
Each function must be supported by pandas.
required:
- group_col
- agg_col
- agg_funcs
fit:
description: "Fit the GroupStat model."
parameters:
properties:
df:
type: DataFrame
description: "The input DataFrame."
required:
- df
transform:
description: "Transform the input DataFrame with the fitted model."
parameters:
properties:
df:
type: DataFrame
description: "The input DataFrame."
required:
- df
returns:
df:
type: DataFrame
description: "The transformed DataFrame."
fit_transform:
description: "Fit and transform the input DataFrame."
parameters:
properties:
df:
type: DataFrame
description: "The input DataFrame."
required:
- df
returns:
df:
type: DataFrame
description: "The transformed DataFrame."
SplitBins:
type: class
description: "Bin continuous data into intervals and return the bin identifier encoded as an integer value"
methods:
__init__:
description: "Initialize self."
parameters:
properties:
cols:
type: list
description: "Columns to be binned."
strategy:
type: str
description: "Strategy used to define the widths of the bins."
default: quantile
required:
- cols
fit:
description: "Fit the SplitBins model."
parameters:
properties:
df:
type: DataFrame
description: "The input DataFrame."
required:
- df
transform:
description: "Transform the input DataFrame with the fitted model."
parameters:
properties:
df:
type: DataFrame
description: "The input DataFrame."
required:
- df
returns:
df:
type: DataFrame
description: "The transformed DataFrame."
fit_transform:
description: "Fit and transform the input DataFrame."
parameters:
properties:
df:
type: DataFrame
description: "The input DataFrame."
required:
- df
returns:
df:
type: DataFrame
description: "The transformed DataFrame."
GeneralSelection:
type: class
description: "Drop all nan feats and feats with only one unique value."
methods:
__init__:
description: "Initialize self."
parameters:
properties:
label_col:
type: str
description: "Label column name."
required:
- label_col
fit:
description: "Fit the GeneralSelection model."
parameters:
properties:
df:
type: DataFrame
description: "The input DataFrame."
required:
- df
transform:
description: "Transform the input DataFrame with the fitted model."
parameters:
properties:
df:
type: DataFrame
description: "The input DataFrame."
required:
- df
returns:
df:
type: DataFrame
description: "The transformed DataFrame."
fit_transform:
description: "Fit and transform the input DataFrame."
parameters:
properties:
df:
type: DataFrame
description: "The input DataFrame."
required:
- df
returns:
df:
type: DataFrame
description: "The transformed DataFrame."