From edd6987a1c4738f27fb1936fa701441145b96869 Mon Sep 17 00:00:00 2001
From: lidanyang <lidanyang@fuzhi.ai>
Date: Wed, 13 Dec 2023 20:41:32 +0800
Subject: [PATCH] drop old tool definition

---
 metagpt/tools/functions/__init__.py           |   3 -
 metagpt/tools/functions/libs/ml_model.py      | 196 ------------------
 metagpt/tools/functions/register/__init__.py  |   6 -
 metagpt/tools/functions/register/register.py  |  78 -------
 metagpt/tools/functions/schemas/base.py       | 100 ---------
 .../functions/schemas/data_preprocess.py      |  67 ------
 .../functions/schemas/feature_engineering.py  | 110 ----------
 metagpt/tools/functions/schemas/ml_model.py   |  55 -----
 8 files changed, 615 deletions(-)
 delete mode 100644 metagpt/tools/functions/libs/ml_model.py
 delete mode 100644 metagpt/tools/functions/register/__init__.py
 delete mode 100644 metagpt/tools/functions/register/register.py
 delete mode 100644 metagpt/tools/functions/schemas/base.py
 delete mode 100644 metagpt/tools/functions/schemas/data_preprocess.py
 delete mode 100644 metagpt/tools/functions/schemas/feature_engineering.py
 delete mode 100644 metagpt/tools/functions/schemas/ml_model.py

diff --git a/metagpt/tools/functions/__init__.py b/metagpt/tools/functions/__init__.py
index 30ee10827..a0a43f507 100644
--- a/metagpt/tools/functions/__init__.py
+++ b/metagpt/tools/functions/__init__.py
@@ -4,6 +4,3 @@
 # @Author  : lidanyang
 # @File    : __init__.py
 # @Desc    :
-from metagpt.tools.functions.register.register import registry
-import metagpt.tools.functions.libs.feature_engineering
-import metagpt.tools.functions.libs.data_preprocess
diff --git a/metagpt/tools/functions/libs/ml_model.py b/metagpt/tools/functions/libs/ml_model.py
deleted file mode 100644
index b669de2c1..000000000
--- a/metagpt/tools/functions/libs/ml_model.py
+++ /dev/null
@@ -1,196 +0,0 @@
-from sklearn.model_selection import train_test_split
-from sklearn.preprocessing import LabelEncoder
-
-from sklearn.linear_model import LogisticRegression
-from sklearn.ensemble import RandomForestClassifier
-from sklearn.ensemble import GradientBoostingClassifier
-
-
-from sklearn.linear_model import LinearRegression
-from sklearn.ensemble import RandomForestRegressor
-from sklearn.ensemble import GradientBoostingRegressor
-
-from metagpt.tools.functions import registry
-from metagpt.tools.functions.schemas.ml_model import *
-
-
-#########
-## 分类 ##
-#########
-
-
-@registry.register("classification_model", LogisticRegressionClassification)
-def logistic_regression_classification(df, label, test_size=0.2, penalty='l2', dual=False):
-    nonnumeric_columns = [col for col in df if df[col].dtype == 'object']
-    for col in nonnumeric_columns:
-        df[col] = LabelEncoder().fit_transform(df[col])
-    df = df.fillna(0)
-
-    features = [col for col in df if col != label]
-    x, y = df[features], df[label]
-    tr_x, te_x, tr_y, te_y = train_test_split(x, y, test_size=test_size, random_state=1)
-
-    model = LogisticRegression(penalty=penalty, dual=dual)
-    model.fit(tr_x, tr_y, )
-    te_pred_prob = model.predict_proba(te_x)
-
-    res = {
-        'te_pred_prob': te_pred_prob
-    }
-    return res
-
-
-@registry.register("classification_model", RandomForestClassification)
-def random_forest_classification(df, label, test_size=0.2, n_estimators=100, criterion='gini'):
-    nonnumeric_columns = [col for col in df if df[col].dtype == 'object']
-    for col in nonnumeric_columns:
-        df[col] = LabelEncoder().fit_transform(df[col])
-    df = df.fillna(0)
-
-    features = [col for col in df if col != label]
-    x, y = df[features], df[label]
-    tr_x, te_x, tr_y, te_y = train_test_split(x, y, test_size=test_size, random_state=1)
-    model = RandomForestClassifier(n_estimators=n_estimators, criterion=criterion)
-    model.fit(tr_x, tr_y, )
-    te_pred_prob = model.predict_proba(te_x)
-
-    res = {
-        'te_pred_prob': te_pred_prob
-    }
-    return res
-
-
-@registry.register("classification_model", GradientBoostingClassification)
-def gradient_boosting_classification(df, label, test_size=0.2, n_estimators=100, learning_rate=0.1):
-    nonnumeric_columns = [col for col in df if df[col].dtype == 'object']
-    for col in nonnumeric_columns:
-        df[col] = LabelEncoder().fit_transform(df[col])
-    df = df.fillna(0)
-
-    features = [col for col in df if col != label]
-    x, y = df[features], df[label]
-    tr_x, te_x, tr_y, te_y = train_test_split(x, y, test_size=test_size, random_state=1)
-    model = GradientBoostingClassifier(n_estimators=n_estimators, learning_rate=learning_rate)
-    model.fit(tr_x, tr_y, )
-    te_pred_prob = model.predict_proba(te_x)
-
-    res = {
-        'te_pred_prob': te_pred_prob
-    }
-    return res
-
-
-
-#########
-## 回归 ##
-#########
-
-
-@registry.register("regression_model", LinearRegressionRegression)
-def linear_regression(df, label, test_size=0.2, ):
-    nonnumeric_columns = [col for col in df if df[col].dtype == 'object']
-    for col in nonnumeric_columns:
-        df[col] = LabelEncoder().fit_transform(df[col])
-    df = df.fillna(0)
-
-    features = [col for col in df if col != label]
-    x, y = df[features], df[label]
-    tr_x, te_x, tr_y, te_y = train_test_split(x, y, test_size=test_size, random_state=1)
-
-    model = LinearRegression()
-    model.fit(tr_x, tr_y, )
-    te_pred_prob = model.predict(te_x)
-
-    res = {
-        'te_pred_prob': te_pred_prob
-    }
-    return res
-
-
-@registry.register("regression_model", RandomForestRegression)
-def random_forest_regression(df, label, test_size=0.2, n_estimators=100, criterion='squared_error'):
-    nonnumeric_columns = [col for col in df if df[col].dtype == 'object']
-    for col in nonnumeric_columns:
-        df[col] = LabelEncoder().fit_transform(df[col])
-    df = df.fillna(0)
-
-    features = [col for col in df if col != label]
-    x, y = df[features], df[label]
-    tr_x, te_x, tr_y, te_y = train_test_split(x, y, test_size=test_size, random_state=1)
-    model = RandomForestRegressor(n_estimators=n_estimators, criterion=criterion)
-    model.fit(tr_x, tr_y, )
-    te_pred_prob = model.predict(te_x)
-
-    res = {
-        'te_pred_prob': te_pred_prob
-    }
-    return res
-
-
-@registry.register("regression_model", GradientBoostingRegression)
-def gradient_boosting_regression(df, label, test_size=0.2, n_estimators=100, learning_rate=0.1):
-    nonnumeric_columns = [col for col in df if df[col].dtype == 'object']
-    for col in nonnumeric_columns:
-        df[col] = LabelEncoder().fit_transform(df[col])
-    df = df.fillna(0)
-
-    features = [col for col in df if col != label]
-    x, y = df[features], df[label]
-    tr_x, te_x, tr_y, te_y = train_test_split(x, y, test_size=test_size, random_state=1)
-    model = GradientBoostingRegressor(n_estimators=n_estimators, learning_rate=learning_rate)
-    model.fit(tr_x, tr_y, )
-    te_pred_prob = model.predict(te_x)
-
-    res = {
-        'te_pred_prob': te_pred_prob
-    }
-    return res
-
-
-if __name__ == '__main__':
-    def run():
-        from sklearn.datasets import load_iris
-        loader = load_iris(as_frame=True)
-        df = loader['data']
-        df['target'] = loader['target']
-
-        df[df.columns[0]] = df[df.columns[0]].astype(str)
-        df[df.columns[1]] = df[df.columns[1]].astype(int)
-        df['target'] = df['target'].astype(str)
-
-        print(df)
-        print('####'*5)
-        res = logistic_regression_classification(df, 'target', test_size=0.25, penalty='l2', dual=False)
-        print(res['te_pred_prob'])
-
-        print('####'*5)
-        res = random_forest_classification(df, 'target', test_size=0.25, n_estimators=100, criterion='gini')
-        print(res['te_pred_prob'])
-
-        print('####'*5)
-        res = gradient_boosting_classification(df, 'target', test_size=0.25, n_estimators=100, learning_rate=0.1)
-        print(res['te_pred_prob'])
-
-        from sklearn.datasets import make_regression
-        import pandas as pd
-        loader = make_regression()
-        df = pd.DataFrame(loader[0])
-        df['target'] = loader[1]
-
-        df[df.columns[0]] = df[df.columns[0]].astype(str)
-        df[df.columns[1]] = df[df.columns[1]].astype(int)
-        # df['target'] = df['target'].astype(str)
-
-        print(df)
-        print('####' * 5)
-        res = linear_regression(df, 'target', test_size=0.25, )
-        print(res['te_pred_prob'])
-
-        print('####' * 5)
-        res = random_forest_regression(df, 'target', test_size=0.25, n_estimators=100, criterion='squared_error')
-        print(res['te_pred_prob'])
-
-        print('####' * 5)
-        res = gradient_boosting_regression(df, 'target', test_size=0.25, n_estimators=100, learning_rate=0.1)
-        print(res['te_pred_prob'])
-    run()
\ No newline at end of file
diff --git a/metagpt/tools/functions/register/__init__.py b/metagpt/tools/functions/register/__init__.py
deleted file mode 100644
index c80872750..000000000
--- a/metagpt/tools/functions/register/__init__.py
+++ /dev/null
@@ -1,6 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-# @Time    : 2023/11/16 16:37
-# @Author  : lidanyang
-# @File    : __init__.py
-# @Desc    :
diff --git a/metagpt/tools/functions/register/register.py b/metagpt/tools/functions/register/register.py
deleted file mode 100644
index 0731e31c0..000000000
--- a/metagpt/tools/functions/register/register.py
+++ /dev/null
@@ -1,78 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-# @Time    : 2023/11/16 16:38
-# @Author  : lidanyang
-# @File    : register.py
-# @Desc    :
-import inspect
-from typing import Type, Optional, Callable, Dict, Union, List
-
-from metagpt.tools.functions.schemas.base import ToolSchema
-
-
-class FunctionRegistry:
-    def __init__(self):
-        self.functions: Dict[str, Dict[str, Dict]] = {}
-
-    @staticmethod
-    def _check_param_consistency(func_params, schema):
-        param_names = set(func_params.keys())
-        schema_names = set(schema["parameters"]["properties"].keys())
-
-        if param_names != schema_names:
-            raise ValueError("Function parameters do not match schema properties")
-
-    def register(self, module: str, tool_schema: Type[ToolSchema]) -> Callable:
-        def wrapper(func: Callable) -> Callable:
-            module_registry = self.functions.setdefault(module, {})
-
-            if func.__name__ in module_registry:
-                raise ValueError(f"Function {func.__name__} is already registered in {module}")
-
-            func_params = inspect.signature(func).parameters
-
-            schema = tool_schema.schema()
-            schema["name"] = func.__name__
-
-            self._check_param_consistency(func_params, schema)
-
-            module_registry[func.__name__] = {
-                "func": func,
-                "schema": schema,
-            }
-            return func
-
-        return wrapper
-
-    def get(self, module: str, name: str) -> Optional[Union[Callable, Dict]]:
-        """Get function by module and name"""
-        module_registry = self.functions.get(module, {})
-        return module_registry.get(name)
-
-    def get_by_name(self, name: str) -> Optional[Dict]:
-        """Get function by name"""
-        for module_registry in self.functions.values():
-            if name in module_registry:
-                return module_registry.get(name, {})
-
-    def get_all_by_module(self, module: str) -> Optional[Dict]:
-        """Get all functions by module"""
-        return self.functions.get(module, {})
-
-    def get_schema(self, module: str, name: str) -> Optional[Dict]:
-        """Get schema by module and name"""
-        module_registry = self.functions.get(module, {})
-        return module_registry.get(name, {}).get("schema")
-
-    def get_schemas(self, module: str, names: List[str]) -> List[Dict]:
-        """Get schemas by module and names"""
-        module_registry = self.functions.get(module, {})
-        return [module_registry.get(name, {}).get("schema") for name in names]
-
-    def get_all_schema_by_module(self, module: str) -> List[Dict]:
-        """Get all schemas by module"""
-        module_registry = self.functions.get(module, {})
-        return [v.get("schema") for v in module_registry.values()]
-
-
-registry = FunctionRegistry()
diff --git a/metagpt/tools/functions/schemas/base.py b/metagpt/tools/functions/schemas/base.py
deleted file mode 100644
index aef604c8d..000000000
--- a/metagpt/tools/functions/schemas/base.py
+++ /dev/null
@@ -1,100 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-# @Time    : 2023/11/16 16:34
-# @Author  : lidanyang
-# @File    : base.py
-# @Desc    : Build base class to generate schema for tool
-from typing import Any, List, Optional, get_type_hints
-
-
-class NoDefault:
-    """
-    A class to represent a missing default value.
-
-    This is used to distinguish between a default value of None and a missing default value.
-    """
-    pass
-
-
-def tool_field(
-    description: str, default: Any = NoDefault(), enum: Optional[List[Any]] = None, **kwargs
-):
-    """
-    Create a field for a tool parameter.
-
-    Args:
-        description (str): A description of the field.
-        default (Any, optional): The default value for the field. Defaults to None.
-        enum (Optional[List[Any]], optional): A list of possible values for the field. Defaults to None.
-        **kwargs: Additional keyword arguments.
-
-    Returns:
-        dict: A dictionary representing the field with provided attributes.
-    """
-    field_info = {
-        "description": description,
-        "default": default,
-        "enum": enum,
-    }
-    field_info.update(kwargs)
-    return field_info
-
-
-class ToolSchema:
-    @staticmethod
-    def format_type(type_hint):
-        """
-        Format a type hint into a string representation.
-
-        Args:
-            type_hint (type): The type hint to format.
-
-        Returns:
-            str: A string representation of the type hint.
-        """
-        if isinstance(type_hint, type):
-            # Handle built-in types separately
-            if type_hint.__module__ == "builtins":
-                return type_hint.__name__
-            else:
-                return f"{type_hint.__module__}.{type_hint.__name__}"
-        elif hasattr(type_hint, "__origin__") and hasattr(type_hint, "__args__"):
-            # Handle generic types (like List[int])
-            origin_type = ToolSchema.format_type(type_hint.__origin__)
-            args_type = ", ".join(
-                [ToolSchema.format_type(t) for t in type_hint.__args__]
-            )
-            return f"{origin_type}[{args_type}]"
-        else:
-            return str(type_hint)
-
-    @classmethod
-    def schema(cls):
-        """
-        Generate a schema dictionary for the class.
-
-        The schema includes the class name, description, and information about
-        each class parameter based on type hints and field definitions.
-
-        Returns:
-            dict: A dictionary representing the schema of the class.
-        """
-        schema = {
-            "name": cls.__name__,
-            "description": cls.__doc__,
-            "parameters": {"type": "object", "properties": {}, "required": []},
-        }
-        type_hints = get_type_hints(cls)
-        for attr, type_hint in type_hints.items():
-            value = getattr(cls, attr, None)
-            if isinstance(value, dict):
-                # Process each attribute that is defined using the field function
-                prop_info = {k: v for k, v in value.items() if v is not None or k == "default"}
-                if isinstance(prop_info["default"], NoDefault):
-                    del prop_info["default"]
-                prop_info["type"] = ToolSchema.format_type(type_hint)
-                schema["parameters"]["properties"][attr] = prop_info
-                # Check for required fields
-                if "default" not in prop_info:
-                    schema["parameters"]["required"].append(attr)
-        return schema
diff --git a/metagpt/tools/functions/schemas/data_preprocess.py b/metagpt/tools/functions/schemas/data_preprocess.py
deleted file mode 100644
index 16b97aeac..000000000
--- a/metagpt/tools/functions/schemas/data_preprocess.py
+++ /dev/null
@@ -1,67 +0,0 @@
-
-import pandas as pd
-
-from metagpt.tools.functions.schemas.base import tool_field, ToolSchema
-
-
-class FillMissingValue(ToolSchema):
-    """Completing missing values with simple strategies"""
-    df: pd.DataFrame = tool_field(description="input dataframe")
-    features: list = tool_field(description="columns to be processed")
-    strategy: str = tool_field(
-        description="the imputation strategy",
-        default='mean',
-        enum=['mean', 'median', 'most_frequent', 'constant']
-    )
-    fill_value: int = tool_field(
-        description="fill_value is used to replace all occurrences of missing_values", default=None)
-
-
-class SplitBins(ToolSchema):
-    """Bin continuous data into intervals and return the bin identifier encoded as an integer value"""
-    df: pd.DataFrame = tool_field(description="input dataframe")
-    features: list = tool_field(description="columns to be processed")
-    strategy: str = tool_field(description="Strategy used to define the widths of the bins", default='quantile')
-
-
-class MinMaxScale(ToolSchema):
-    """Transform features by scaling each feature to a range, witch is (0, 1)"""
-    df: pd.DataFrame = tool_field(description="input dataframe")
-    features: list = tool_field(description="columns to be processed")
-
-
-class StandardScale(ToolSchema):
-    """Standardize features by removing the mean and scaling to unit variance"""
-    df: pd.DataFrame = tool_field(description="input dataframe")
-    features: list = tool_field(description="columns to be processed")
-
-
-class LogTransform(ToolSchema):
-    """Performs a logarithmic transformation on the specified columns"""
-    df: pd.DataFrame = tool_field(description="input dataframe")
-    features: list = tool_field(description="columns to be processed")
-
-
-class MaxAbsScale(ToolSchema):
-    """Scale each feature by its maximum absolute value"""
-    df: pd.DataFrame = tool_field(description="input dataframe")
-    features: list = tool_field(description="columns to be processed")
-
-
-class RobustScale(ToolSchema):
-    """Scale features using statistics that are robust to outliers, the quantile_range is (25.0, 75.0)"""
-    df: pd.DataFrame = tool_field(description="input dataframe")
-    features: list = tool_field(description="columns to be processed")
-
-
-class OrdinalEncode(ToolSchema):
-    """Encode categorical features as an integer array"""
-    df: pd.DataFrame = tool_field(description="input dataframe")
-    features: list = tool_field(description="columns to be processed")
-
-
-class OneHotEncoding(ToolSchema):
-    """Apply one-hot encoding to specified categorical columns, the original columns will be dropped."""
-
-    df: pd.DataFrame = tool_field(description="DataFrame to process.")
-    cols: list = tool_field(description="Categorical columns to be one-hot encoded and dropped.")
diff --git a/metagpt/tools/functions/schemas/feature_engineering.py b/metagpt/tools/functions/schemas/feature_engineering.py
deleted file mode 100644
index 5c89d9b16..000000000
--- a/metagpt/tools/functions/schemas/feature_engineering.py
+++ /dev/null
@@ -1,110 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-# @Time    : 2023/11/17 10:34
-# @Author  : lidanyang
-# @File    : feature_engineering.py
-# @Desc    : Schema for feature engineering functions
-from typing import List
-
-import pandas as pd
-
-from metagpt.tools.functions.schemas.base import ToolSchema, tool_field
-
-
-class PolynomialExpansion(ToolSchema):
-    """Add polynomial and interaction features from selected numeric columns, excluding the bias column."""
-
-    df: pd.DataFrame = tool_field(description="DataFrame to process.")
-    cols: list = tool_field(description="Columns for polynomial expansion.")
-    degree: int = tool_field(description="Degree of polynomial features.", default=2)
-
-
-class FrequencyEncoding(ToolSchema):
-    """Add value counts of categorical columns as new features."""
-
-    df: pd.DataFrame = tool_field(description="DataFrame to process.")
-    cols: list = tool_field(description="Categorical columns to be frequency encoded.")
-
-
-class TargetMeanEncoder(ToolSchema):
-    """Encodes a categorical column by the mean of the label column, and adds the result as a new feature."""
-
-    df: pd.DataFrame = tool_field(description="DataFrame to process.")
-    col: str = tool_field(description="Column to be mean encoded.")
-    label: str = tool_field(description="Predicted label column.")
-
-
-class KFoldTargetMeanEncoder(ToolSchema):
-    """Adds a new feature to the DataFrame by k-fold mean encoding of a categorical column using the label column."""
-    df: pd.DataFrame = tool_field(description="DataFrame to process.")
-    col: str = tool_field(description="Column to be k-fold mean encoded.")
-    label: str = tool_field(description="Predicted label column.")
-    n_splits: int = tool_field(description="Number of splits for K-fold.", default=5)
-    random_state: int = tool_field(description="Random seed.", default=2021)
-
-
-class CatCross(ToolSchema):
-    """Add pairwise crossed features and convert them to numerical features."""
-
-    df: pd.DataFrame = tool_field(description="DataFrame to process.")
-    cols: list = tool_field(description="Columns to be pairwise crossed.")
-    max_cat_num: int = tool_field(
-        description="Maximum unique categories per crossed feature.", default=100
-    )
-
-
-class GroupStat(ToolSchema):
-    """Aggregate specified column in a DataFrame grouped by another column, adding new features named '<agg_col>_<agg_func>_by_<group_col>'."""
-
-    df: pd.DataFrame = tool_field(description="DataFrame to process.")
-    group_col: str = tool_field(description="Column used for grouping.")
-    agg_col: str = tool_field(description="Column on which aggregation is performed.")
-    agg_funcs: list = tool_field(
-        description="""List of aggregation functions to apply, such as ['mean', 'std'].
-                    Each function must be supported by pandas."""
-    )
-
-
-class ExtractTimeComps(ToolSchema):
-    """Extract and add specific time components as new features from a designated time column."""
-
-    df: pd.DataFrame = tool_field(description="DataFrame to process.")
-    time_col: str = tool_field(
-        description="The name of the column containing time data."
-    )
-    time_comps: List[str] = tool_field(
-        description="""List of time components to extract.
-        Each component must be in ['year', 'month', 'day', 'hour', 'dayofweek', 'is_weekend']."""
-    )
-
-
-class FeShiftByTime(ToolSchema):
-    """Shift column values based on specified time intervals and add the resulting new features to the DataFrame. New features are named in the format of '<group_col>_<shift_col>_lag_<period>_<freq>'."""
-
-    df: pd.DataFrame = tool_field(description="DataFrame to process.")
-    time_col: str = tool_field(description="Column for time-based shifting.")
-    group_col: str = tool_field(description="Column for grouping before shifting.")
-    shift_col: str = tool_field(description="Column to shift.")
-    periods: list = tool_field(description="Time intervals for shifting.")
-    freq: str = tool_field(
-        description="Frequency unit for time intervals (e.g., 'D', 'M').",
-        enum=["D", "M", "Y", "W", "H"],
-    )
-
-
-class FeRollingByTime(ToolSchema):
-    """Calculate rolling statistics for a DataFrame column over time intervals."""
-
-    df: pd.DataFrame = tool_field(description="DataFrame to process.")
-    time_col: str = tool_field(description="Column for time-based rolling.")
-    group_col: str = tool_field(description="Column for grouping before rolling.")
-    rolling_col: str = tool_field(description="Column for rolling calculations.")
-    periods: list = tool_field(description="Window sizes for rolling.")
-    freq: str = tool_field(
-        description="Frequency unit for time windows (e.g., 'D', 'M').",
-        enum=["D", "M", "Y", "W", "H"],
-    )
-    agg_funcs: list = tool_field(
-        description="""List of aggregation functions for rolling, like ['mean', 'std'].
-        Each function must be in ['mean', 'std', 'min', 'max', 'median', 'sum', 'count']."""
-    )
diff --git a/metagpt/tools/functions/schemas/ml_model.py b/metagpt/tools/functions/schemas/ml_model.py
deleted file mode 100644
index 9268156af..000000000
--- a/metagpt/tools/functions/schemas/ml_model.py
+++ /dev/null
@@ -1,55 +0,0 @@
-import pandas as pd
-
-from metagpt.tools.functions.schemas.base import tool_field, ToolSchema
-
-
-class LogisticRegressionClassification(ToolSchema):
-    """Logistic Regression (aka logit, MaxEnt) classifier"""
-    df: pd.DataFrame = tool_field(description="input dataframe")
-    label: str = tool_field(description="target name")
-    test_size: float = tool_field(description="The proportion of the test set to all the data", default=0.2)
-    penalty: str = tool_field(description="Specify the norm of the penalty", default="l2")
-    dual: bool = tool_field(description="Dual (constrained) or primal (regularized) formulation", default="l2")
-
-
-class RandomForestClassification(ToolSchema):
-    """random forest is a meta estimator that fits a number of decision tree classifiers on various sub-samples of the dataset and uses averaging to improve the predictive accuracy and control over-fitting"""
-    df: pd.DataFrame = tool_field(description="input dataframe")
-    label: str = tool_field(description="target name")
-    test_size: float = tool_field(description="The proportion of the test set to all the data", default=0.2)
-    n_estimators: int = tool_field(description="The number of trees in the forest", default=100)
-    criterion: str = tool_field(description="The function to measure the quality of a split", default="gini")
-
-
-class GradientBoostingClassification(ToolSchema):
-    """Gradient Boosting for classification.This algorithm builds an additive model in a forward stage-wise fashion"""
-    df: pd.DataFrame = tool_field(description="input dataframe")
-    label: str = tool_field(description="target name")
-    test_size: float = tool_field(description="The proportion of the test set to all the data", default=0.2)
-    n_estimators: int = tool_field(description="The number of boosting stages to perform", default=100)
-    learning_rate: float = tool_field(description="Learning rate shrinks the contribution of each tree by learning_rate", default=0.1)
-
-
-class LinearRegressionRegression(ToolSchema):
-    """Ordinary least squares Linear Regression."""
-    df: pd.DataFrame = tool_field(description="input dataframe")
-    label: str = tool_field(description="target name")
-    test_size: float = tool_field(description="The proportion of the test set to all the data", default=0.2)
-
-
-class RandomForestRegression(ToolSchema):
-    """random forest is a meta estimator that fits a number of decision tree on various sub-samples of the dataset and uses averaging to improve the predictive accuracy and control over-fitting"""
-    df: pd.DataFrame = tool_field(description="input dataframe")
-    label: str = tool_field(description="target name")
-    test_size: float = tool_field(description="The proportion of the test set to all the data", default=0.2)
-    n_estimators: int = tool_field(description="The number of trees in the forest", default=100)
-    criterion: str = tool_field(description="The function to measure the quality of a split", default="squared_error")
-
-
-class GradientBoostingRegression(ToolSchema):
-    """Gradient Boosting for regression.This estimator builds an additive model in a forward stage-wise fashion"""
-    df: pd.DataFrame = tool_field(description="input dataframe")
-    label: str = tool_field(description="target name")
-    test_size: float = tool_field(description="The proportion of the test set to all the data", default=0.2)
-    n_estimators: int = tool_field(description="The number of boosting stages to perform", default=100)
-    learning_rate: float = tool_field(description="Learning rate shrinks the contribution of each tree by learning_rate", default=0.1)