From edd6987a1c4738f27fb1936fa701441145b96869 Mon Sep 17 00:00:00 2001 From: lidanyang Date: Wed, 13 Dec 2023 20:41:32 +0800 Subject: [PATCH] drop old tool definition --- metagpt/tools/functions/__init__.py | 3 - metagpt/tools/functions/libs/ml_model.py | 196 ------------------ metagpt/tools/functions/register/__init__.py | 6 - metagpt/tools/functions/register/register.py | 78 ------- metagpt/tools/functions/schemas/base.py | 100 --------- .../functions/schemas/data_preprocess.py | 67 ------ .../functions/schemas/feature_engineering.py | 110 ---------- metagpt/tools/functions/schemas/ml_model.py | 55 ----- 8 files changed, 615 deletions(-) delete mode 100644 metagpt/tools/functions/libs/ml_model.py delete mode 100644 metagpt/tools/functions/register/__init__.py delete mode 100644 metagpt/tools/functions/register/register.py delete mode 100644 metagpt/tools/functions/schemas/base.py delete mode 100644 metagpt/tools/functions/schemas/data_preprocess.py delete mode 100644 metagpt/tools/functions/schemas/feature_engineering.py delete mode 100644 metagpt/tools/functions/schemas/ml_model.py diff --git a/metagpt/tools/functions/__init__.py b/metagpt/tools/functions/__init__.py index 30ee10827..a0a43f507 100644 --- a/metagpt/tools/functions/__init__.py +++ b/metagpt/tools/functions/__init__.py @@ -4,6 +4,3 @@ # @Author : lidanyang # @File : __init__.py # @Desc : -from metagpt.tools.functions.register.register import registry -import metagpt.tools.functions.libs.feature_engineering -import metagpt.tools.functions.libs.data_preprocess diff --git a/metagpt/tools/functions/libs/ml_model.py b/metagpt/tools/functions/libs/ml_model.py deleted file mode 100644 index b669de2c1..000000000 --- a/metagpt/tools/functions/libs/ml_model.py +++ /dev/null @@ -1,196 +0,0 @@ -from sklearn.model_selection import train_test_split -from sklearn.preprocessing import LabelEncoder - -from sklearn.linear_model import LogisticRegression -from sklearn.ensemble import RandomForestClassifier -from sklearn.ensemble import GradientBoostingClassifier - - -from sklearn.linear_model import LinearRegression -from sklearn.ensemble import RandomForestRegressor -from sklearn.ensemble import GradientBoostingRegressor - -from metagpt.tools.functions import registry -from metagpt.tools.functions.schemas.ml_model import * - - -######### -## 分类 ## -######### - - -@registry.register("classification_model", LogisticRegressionClassification) -def logistic_regression_classification(df, label, test_size=0.2, penalty='l2', dual=False): - nonnumeric_columns = [col for col in df if df[col].dtype == 'object'] - for col in nonnumeric_columns: - df[col] = LabelEncoder().fit_transform(df[col]) - df = df.fillna(0) - - features = [col for col in df if col != label] - x, y = df[features], df[label] - tr_x, te_x, tr_y, te_y = train_test_split(x, y, test_size=test_size, random_state=1) - - model = LogisticRegression(penalty=penalty, dual=dual) - model.fit(tr_x, tr_y, ) - te_pred_prob = model.predict_proba(te_x) - - res = { - 'te_pred_prob': te_pred_prob - } - return res - - -@registry.register("classification_model", RandomForestClassification) -def random_forest_classification(df, label, test_size=0.2, n_estimators=100, criterion='gini'): - nonnumeric_columns = [col for col in df if df[col].dtype == 'object'] - for col in nonnumeric_columns: - df[col] = LabelEncoder().fit_transform(df[col]) - df = df.fillna(0) - - features = [col for col in df if col != label] - x, y = df[features], df[label] - tr_x, te_x, tr_y, te_y = train_test_split(x, y, test_size=test_size, random_state=1) - model = RandomForestClassifier(n_estimators=n_estimators, criterion=criterion) - model.fit(tr_x, tr_y, ) - te_pred_prob = model.predict_proba(te_x) - - res = { - 'te_pred_prob': te_pred_prob - } - return res - - -@registry.register("classification_model", GradientBoostingClassification) -def gradient_boosting_classification(df, label, test_size=0.2, n_estimators=100, learning_rate=0.1): - nonnumeric_columns = [col for col in df if df[col].dtype == 'object'] - for col in nonnumeric_columns: - df[col] = LabelEncoder().fit_transform(df[col]) - df = df.fillna(0) - - features = [col for col in df if col != label] - x, y = df[features], df[label] - tr_x, te_x, tr_y, te_y = train_test_split(x, y, test_size=test_size, random_state=1) - model = GradientBoostingClassifier(n_estimators=n_estimators, learning_rate=learning_rate) - model.fit(tr_x, tr_y, ) - te_pred_prob = model.predict_proba(te_x) - - res = { - 'te_pred_prob': te_pred_prob - } - return res - - - -######### -## 回归 ## -######### - - -@registry.register("regression_model", LinearRegressionRegression) -def linear_regression(df, label, test_size=0.2, ): - nonnumeric_columns = [col for col in df if df[col].dtype == 'object'] - for col in nonnumeric_columns: - df[col] = LabelEncoder().fit_transform(df[col]) - df = df.fillna(0) - - features = [col for col in df if col != label] - x, y = df[features], df[label] - tr_x, te_x, tr_y, te_y = train_test_split(x, y, test_size=test_size, random_state=1) - - model = LinearRegression() - model.fit(tr_x, tr_y, ) - te_pred_prob = model.predict(te_x) - - res = { - 'te_pred_prob': te_pred_prob - } - return res - - -@registry.register("regression_model", RandomForestRegression) -def random_forest_regression(df, label, test_size=0.2, n_estimators=100, criterion='squared_error'): - nonnumeric_columns = [col for col in df if df[col].dtype == 'object'] - for col in nonnumeric_columns: - df[col] = LabelEncoder().fit_transform(df[col]) - df = df.fillna(0) - - features = [col for col in df if col != label] - x, y = df[features], df[label] - tr_x, te_x, tr_y, te_y = train_test_split(x, y, test_size=test_size, random_state=1) - model = RandomForestRegressor(n_estimators=n_estimators, criterion=criterion) - model.fit(tr_x, tr_y, ) - te_pred_prob = model.predict(te_x) - - res = { - 'te_pred_prob': te_pred_prob - } - return res - - -@registry.register("regression_model", GradientBoostingRegression) -def gradient_boosting_regression(df, label, test_size=0.2, n_estimators=100, learning_rate=0.1): - nonnumeric_columns = [col for col in df if df[col].dtype == 'object'] - for col in nonnumeric_columns: - df[col] = LabelEncoder().fit_transform(df[col]) - df = df.fillna(0) - - features = [col for col in df if col != label] - x, y = df[features], df[label] - tr_x, te_x, tr_y, te_y = train_test_split(x, y, test_size=test_size, random_state=1) - model = GradientBoostingRegressor(n_estimators=n_estimators, learning_rate=learning_rate) - model.fit(tr_x, tr_y, ) - te_pred_prob = model.predict(te_x) - - res = { - 'te_pred_prob': te_pred_prob - } - return res - - -if __name__ == '__main__': - def run(): - from sklearn.datasets import load_iris - loader = load_iris(as_frame=True) - df = loader['data'] - df['target'] = loader['target'] - - df[df.columns[0]] = df[df.columns[0]].astype(str) - df[df.columns[1]] = df[df.columns[1]].astype(int) - df['target'] = df['target'].astype(str) - - print(df) - print('####'*5) - res = logistic_regression_classification(df, 'target', test_size=0.25, penalty='l2', dual=False) - print(res['te_pred_prob']) - - print('####'*5) - res = random_forest_classification(df, 'target', test_size=0.25, n_estimators=100, criterion='gini') - print(res['te_pred_prob']) - - print('####'*5) - res = gradient_boosting_classification(df, 'target', test_size=0.25, n_estimators=100, learning_rate=0.1) - print(res['te_pred_prob']) - - from sklearn.datasets import make_regression - import pandas as pd - loader = make_regression() - df = pd.DataFrame(loader[0]) - df['target'] = loader[1] - - df[df.columns[0]] = df[df.columns[0]].astype(str) - df[df.columns[1]] = df[df.columns[1]].astype(int) - # df['target'] = df['target'].astype(str) - - print(df) - print('####' * 5) - res = linear_regression(df, 'target', test_size=0.25, ) - print(res['te_pred_prob']) - - print('####' * 5) - res = random_forest_regression(df, 'target', test_size=0.25, n_estimators=100, criterion='squared_error') - print(res['te_pred_prob']) - - print('####' * 5) - res = gradient_boosting_regression(df, 'target', test_size=0.25, n_estimators=100, learning_rate=0.1) - print(res['te_pred_prob']) - run() \ No newline at end of file diff --git a/metagpt/tools/functions/register/__init__.py b/metagpt/tools/functions/register/__init__.py deleted file mode 100644 index c80872750..000000000 --- a/metagpt/tools/functions/register/__init__.py +++ /dev/null @@ -1,6 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -# @Time : 2023/11/16 16:37 -# @Author : lidanyang -# @File : __init__.py -# @Desc : diff --git a/metagpt/tools/functions/register/register.py b/metagpt/tools/functions/register/register.py deleted file mode 100644 index 0731e31c0..000000000 --- a/metagpt/tools/functions/register/register.py +++ /dev/null @@ -1,78 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -# @Time : 2023/11/16 16:38 -# @Author : lidanyang -# @File : register.py -# @Desc : -import inspect -from typing import Type, Optional, Callable, Dict, Union, List - -from metagpt.tools.functions.schemas.base import ToolSchema - - -class FunctionRegistry: - def __init__(self): - self.functions: Dict[str, Dict[str, Dict]] = {} - - @staticmethod - def _check_param_consistency(func_params, schema): - param_names = set(func_params.keys()) - schema_names = set(schema["parameters"]["properties"].keys()) - - if param_names != schema_names: - raise ValueError("Function parameters do not match schema properties") - - def register(self, module: str, tool_schema: Type[ToolSchema]) -> Callable: - def wrapper(func: Callable) -> Callable: - module_registry = self.functions.setdefault(module, {}) - - if func.__name__ in module_registry: - raise ValueError(f"Function {func.__name__} is already registered in {module}") - - func_params = inspect.signature(func).parameters - - schema = tool_schema.schema() - schema["name"] = func.__name__ - - self._check_param_consistency(func_params, schema) - - module_registry[func.__name__] = { - "func": func, - "schema": schema, - } - return func - - return wrapper - - def get(self, module: str, name: str) -> Optional[Union[Callable, Dict]]: - """Get function by module and name""" - module_registry = self.functions.get(module, {}) - return module_registry.get(name) - - def get_by_name(self, name: str) -> Optional[Dict]: - """Get function by name""" - for module_registry in self.functions.values(): - if name in module_registry: - return module_registry.get(name, {}) - - def get_all_by_module(self, module: str) -> Optional[Dict]: - """Get all functions by module""" - return self.functions.get(module, {}) - - def get_schema(self, module: str, name: str) -> Optional[Dict]: - """Get schema by module and name""" - module_registry = self.functions.get(module, {}) - return module_registry.get(name, {}).get("schema") - - def get_schemas(self, module: str, names: List[str]) -> List[Dict]: - """Get schemas by module and names""" - module_registry = self.functions.get(module, {}) - return [module_registry.get(name, {}).get("schema") for name in names] - - def get_all_schema_by_module(self, module: str) -> List[Dict]: - """Get all schemas by module""" - module_registry = self.functions.get(module, {}) - return [v.get("schema") for v in module_registry.values()] - - -registry = FunctionRegistry() diff --git a/metagpt/tools/functions/schemas/base.py b/metagpt/tools/functions/schemas/base.py deleted file mode 100644 index aef604c8d..000000000 --- a/metagpt/tools/functions/schemas/base.py +++ /dev/null @@ -1,100 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -# @Time : 2023/11/16 16:34 -# @Author : lidanyang -# @File : base.py -# @Desc : Build base class to generate schema for tool -from typing import Any, List, Optional, get_type_hints - - -class NoDefault: - """ - A class to represent a missing default value. - - This is used to distinguish between a default value of None and a missing default value. - """ - pass - - -def tool_field( - description: str, default: Any = NoDefault(), enum: Optional[List[Any]] = None, **kwargs -): - """ - Create a field for a tool parameter. - - Args: - description (str): A description of the field. - default (Any, optional): The default value for the field. Defaults to None. - enum (Optional[List[Any]], optional): A list of possible values for the field. Defaults to None. - **kwargs: Additional keyword arguments. - - Returns: - dict: A dictionary representing the field with provided attributes. - """ - field_info = { - "description": description, - "default": default, - "enum": enum, - } - field_info.update(kwargs) - return field_info - - -class ToolSchema: - @staticmethod - def format_type(type_hint): - """ - Format a type hint into a string representation. - - Args: - type_hint (type): The type hint to format. - - Returns: - str: A string representation of the type hint. - """ - if isinstance(type_hint, type): - # Handle built-in types separately - if type_hint.__module__ == "builtins": - return type_hint.__name__ - else: - return f"{type_hint.__module__}.{type_hint.__name__}" - elif hasattr(type_hint, "__origin__") and hasattr(type_hint, "__args__"): - # Handle generic types (like List[int]) - origin_type = ToolSchema.format_type(type_hint.__origin__) - args_type = ", ".join( - [ToolSchema.format_type(t) for t in type_hint.__args__] - ) - return f"{origin_type}[{args_type}]" - else: - return str(type_hint) - - @classmethod - def schema(cls): - """ - Generate a schema dictionary for the class. - - The schema includes the class name, description, and information about - each class parameter based on type hints and field definitions. - - Returns: - dict: A dictionary representing the schema of the class. - """ - schema = { - "name": cls.__name__, - "description": cls.__doc__, - "parameters": {"type": "object", "properties": {}, "required": []}, - } - type_hints = get_type_hints(cls) - for attr, type_hint in type_hints.items(): - value = getattr(cls, attr, None) - if isinstance(value, dict): - # Process each attribute that is defined using the field function - prop_info = {k: v for k, v in value.items() if v is not None or k == "default"} - if isinstance(prop_info["default"], NoDefault): - del prop_info["default"] - prop_info["type"] = ToolSchema.format_type(type_hint) - schema["parameters"]["properties"][attr] = prop_info - # Check for required fields - if "default" not in prop_info: - schema["parameters"]["required"].append(attr) - return schema diff --git a/metagpt/tools/functions/schemas/data_preprocess.py b/metagpt/tools/functions/schemas/data_preprocess.py deleted file mode 100644 index 16b97aeac..000000000 --- a/metagpt/tools/functions/schemas/data_preprocess.py +++ /dev/null @@ -1,67 +0,0 @@ - -import pandas as pd - -from metagpt.tools.functions.schemas.base import tool_field, ToolSchema - - -class FillMissingValue(ToolSchema): - """Completing missing values with simple strategies""" - df: pd.DataFrame = tool_field(description="input dataframe") - features: list = tool_field(description="columns to be processed") - strategy: str = tool_field( - description="the imputation strategy", - default='mean', - enum=['mean', 'median', 'most_frequent', 'constant'] - ) - fill_value: int = tool_field( - description="fill_value is used to replace all occurrences of missing_values", default=None) - - -class SplitBins(ToolSchema): - """Bin continuous data into intervals and return the bin identifier encoded as an integer value""" - df: pd.DataFrame = tool_field(description="input dataframe") - features: list = tool_field(description="columns to be processed") - strategy: str = tool_field(description="Strategy used to define the widths of the bins", default='quantile') - - -class MinMaxScale(ToolSchema): - """Transform features by scaling each feature to a range, witch is (0, 1)""" - df: pd.DataFrame = tool_field(description="input dataframe") - features: list = tool_field(description="columns to be processed") - - -class StandardScale(ToolSchema): - """Standardize features by removing the mean and scaling to unit variance""" - df: pd.DataFrame = tool_field(description="input dataframe") - features: list = tool_field(description="columns to be processed") - - -class LogTransform(ToolSchema): - """Performs a logarithmic transformation on the specified columns""" - df: pd.DataFrame = tool_field(description="input dataframe") - features: list = tool_field(description="columns to be processed") - - -class MaxAbsScale(ToolSchema): - """Scale each feature by its maximum absolute value""" - df: pd.DataFrame = tool_field(description="input dataframe") - features: list = tool_field(description="columns to be processed") - - -class RobustScale(ToolSchema): - """Scale features using statistics that are robust to outliers, the quantile_range is (25.0, 75.0)""" - df: pd.DataFrame = tool_field(description="input dataframe") - features: list = tool_field(description="columns to be processed") - - -class OrdinalEncode(ToolSchema): - """Encode categorical features as an integer array""" - df: pd.DataFrame = tool_field(description="input dataframe") - features: list = tool_field(description="columns to be processed") - - -class OneHotEncoding(ToolSchema): - """Apply one-hot encoding to specified categorical columns, the original columns will be dropped.""" - - df: pd.DataFrame = tool_field(description="DataFrame to process.") - cols: list = tool_field(description="Categorical columns to be one-hot encoded and dropped.") diff --git a/metagpt/tools/functions/schemas/feature_engineering.py b/metagpt/tools/functions/schemas/feature_engineering.py deleted file mode 100644 index 5c89d9b16..000000000 --- a/metagpt/tools/functions/schemas/feature_engineering.py +++ /dev/null @@ -1,110 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -# @Time : 2023/11/17 10:34 -# @Author : lidanyang -# @File : feature_engineering.py -# @Desc : Schema for feature engineering functions -from typing import List - -import pandas as pd - -from metagpt.tools.functions.schemas.base import ToolSchema, tool_field - - -class PolynomialExpansion(ToolSchema): - """Add polynomial and interaction features from selected numeric columns, excluding the bias column.""" - - df: pd.DataFrame = tool_field(description="DataFrame to process.") - cols: list = tool_field(description="Columns for polynomial expansion.") - degree: int = tool_field(description="Degree of polynomial features.", default=2) - - -class FrequencyEncoding(ToolSchema): - """Add value counts of categorical columns as new features.""" - - df: pd.DataFrame = tool_field(description="DataFrame to process.") - cols: list = tool_field(description="Categorical columns to be frequency encoded.") - - -class TargetMeanEncoder(ToolSchema): - """Encodes a categorical column by the mean of the label column, and adds the result as a new feature.""" - - df: pd.DataFrame = tool_field(description="DataFrame to process.") - col: str = tool_field(description="Column to be mean encoded.") - label: str = tool_field(description="Predicted label column.") - - -class KFoldTargetMeanEncoder(ToolSchema): - """Adds a new feature to the DataFrame by k-fold mean encoding of a categorical column using the label column.""" - df: pd.DataFrame = tool_field(description="DataFrame to process.") - col: str = tool_field(description="Column to be k-fold mean encoded.") - label: str = tool_field(description="Predicted label column.") - n_splits: int = tool_field(description="Number of splits for K-fold.", default=5) - random_state: int = tool_field(description="Random seed.", default=2021) - - -class CatCross(ToolSchema): - """Add pairwise crossed features and convert them to numerical features.""" - - df: pd.DataFrame = tool_field(description="DataFrame to process.") - cols: list = tool_field(description="Columns to be pairwise crossed.") - max_cat_num: int = tool_field( - description="Maximum unique categories per crossed feature.", default=100 - ) - - -class GroupStat(ToolSchema): - """Aggregate specified column in a DataFrame grouped by another column, adding new features named '__by_'.""" - - df: pd.DataFrame = tool_field(description="DataFrame to process.") - group_col: str = tool_field(description="Column used for grouping.") - agg_col: str = tool_field(description="Column on which aggregation is performed.") - agg_funcs: list = tool_field( - description="""List of aggregation functions to apply, such as ['mean', 'std']. - Each function must be supported by pandas.""" - ) - - -class ExtractTimeComps(ToolSchema): - """Extract and add specific time components as new features from a designated time column.""" - - df: pd.DataFrame = tool_field(description="DataFrame to process.") - time_col: str = tool_field( - description="The name of the column containing time data." - ) - time_comps: List[str] = tool_field( - description="""List of time components to extract. - Each component must be in ['year', 'month', 'day', 'hour', 'dayofweek', 'is_weekend'].""" - ) - - -class FeShiftByTime(ToolSchema): - """Shift column values based on specified time intervals and add the resulting new features to the DataFrame. New features are named in the format of '__lag__'.""" - - df: pd.DataFrame = tool_field(description="DataFrame to process.") - time_col: str = tool_field(description="Column for time-based shifting.") - group_col: str = tool_field(description="Column for grouping before shifting.") - shift_col: str = tool_field(description="Column to shift.") - periods: list = tool_field(description="Time intervals for shifting.") - freq: str = tool_field( - description="Frequency unit for time intervals (e.g., 'D', 'M').", - enum=["D", "M", "Y", "W", "H"], - ) - - -class FeRollingByTime(ToolSchema): - """Calculate rolling statistics for a DataFrame column over time intervals.""" - - df: pd.DataFrame = tool_field(description="DataFrame to process.") - time_col: str = tool_field(description="Column for time-based rolling.") - group_col: str = tool_field(description="Column for grouping before rolling.") - rolling_col: str = tool_field(description="Column for rolling calculations.") - periods: list = tool_field(description="Window sizes for rolling.") - freq: str = tool_field( - description="Frequency unit for time windows (e.g., 'D', 'M').", - enum=["D", "M", "Y", "W", "H"], - ) - agg_funcs: list = tool_field( - description="""List of aggregation functions for rolling, like ['mean', 'std']. - Each function must be in ['mean', 'std', 'min', 'max', 'median', 'sum', 'count'].""" - ) diff --git a/metagpt/tools/functions/schemas/ml_model.py b/metagpt/tools/functions/schemas/ml_model.py deleted file mode 100644 index 9268156af..000000000 --- a/metagpt/tools/functions/schemas/ml_model.py +++ /dev/null @@ -1,55 +0,0 @@ -import pandas as pd - -from metagpt.tools.functions.schemas.base import tool_field, ToolSchema - - -class LogisticRegressionClassification(ToolSchema): - """Logistic Regression (aka logit, MaxEnt) classifier""" - df: pd.DataFrame = tool_field(description="input dataframe") - label: str = tool_field(description="target name") - test_size: float = tool_field(description="The proportion of the test set to all the data", default=0.2) - penalty: str = tool_field(description="Specify the norm of the penalty", default="l2") - dual: bool = tool_field(description="Dual (constrained) or primal (regularized) formulation", default="l2") - - -class RandomForestClassification(ToolSchema): - """random forest is a meta estimator that fits a number of decision tree classifiers on various sub-samples of the dataset and uses averaging to improve the predictive accuracy and control over-fitting""" - df: pd.DataFrame = tool_field(description="input dataframe") - label: str = tool_field(description="target name") - test_size: float = tool_field(description="The proportion of the test set to all the data", default=0.2) - n_estimators: int = tool_field(description="The number of trees in the forest", default=100) - criterion: str = tool_field(description="The function to measure the quality of a split", default="gini") - - -class GradientBoostingClassification(ToolSchema): - """Gradient Boosting for classification.This algorithm builds an additive model in a forward stage-wise fashion""" - df: pd.DataFrame = tool_field(description="input dataframe") - label: str = tool_field(description="target name") - test_size: float = tool_field(description="The proportion of the test set to all the data", default=0.2) - n_estimators: int = tool_field(description="The number of boosting stages to perform", default=100) - learning_rate: float = tool_field(description="Learning rate shrinks the contribution of each tree by learning_rate", default=0.1) - - -class LinearRegressionRegression(ToolSchema): - """Ordinary least squares Linear Regression.""" - df: pd.DataFrame = tool_field(description="input dataframe") - label: str = tool_field(description="target name") - test_size: float = tool_field(description="The proportion of the test set to all the data", default=0.2) - - -class RandomForestRegression(ToolSchema): - """random forest is a meta estimator that fits a number of decision tree on various sub-samples of the dataset and uses averaging to improve the predictive accuracy and control over-fitting""" - df: pd.DataFrame = tool_field(description="input dataframe") - label: str = tool_field(description="target name") - test_size: float = tool_field(description="The proportion of the test set to all the data", default=0.2) - n_estimators: int = tool_field(description="The number of trees in the forest", default=100) - criterion: str = tool_field(description="The function to measure the quality of a split", default="squared_error") - - -class GradientBoostingRegression(ToolSchema): - """Gradient Boosting for regression.This estimator builds an additive model in a forward stage-wise fashion""" - df: pd.DataFrame = tool_field(description="input dataframe") - label: str = tool_field(description="target name") - test_size: float = tool_field(description="The proportion of the test set to all the data", default=0.2) - n_estimators: int = tool_field(description="The number of boosting stages to perform", default=100) - learning_rate: float = tool_field(description="Learning rate shrinks the contribution of each tree by learning_rate", default=0.1)