mirror of
https://github.com/FoundationAgents/MetaGPT.git
synced 2026-05-21 14:05:17 +02:00
add tool registry
This commit is contained in:
parent
224bf820b2
commit
46cd219e81
25 changed files with 1582 additions and 59 deletions
|
|
@ -8,17 +8,6 @@
|
|||
|
||||
from enum import Enum
|
||||
|
||||
from pydantic import BaseModel
|
||||
|
||||
from metagpt.const import TOOL_LIBS_PATH
|
||||
from metagpt.prompts.tool_type import (
|
||||
DATA_PREPROCESS_PROMPT,
|
||||
FEATURE_ENGINEERING_PROMPT,
|
||||
MODEL_TRAIN_PROMPT,
|
||||
MODEL_EVALUATE_PROMPT,
|
||||
VISION_PROMPT,
|
||||
)
|
||||
|
||||
|
||||
class SearchEngineType(Enum):
|
||||
SERPAPI_GOOGLE = "serpapi"
|
||||
|
|
|
|||
|
|
@ -14,8 +14,13 @@ from sklearn.preprocessing import (
|
|||
)
|
||||
|
||||
from metagpt.tools.functions.libs.base import MLProcess
|
||||
from metagpt.tools.tool_registry import register_tool
|
||||
from metagpt.tools.tool_schema import ToolTypeEnum
|
||||
|
||||
TOOL_TYPE = ToolTypeEnum.DATA_PREPROCESS.value
|
||||
|
||||
|
||||
@register_tool(tool_type_name=TOOL_TYPE)
|
||||
class FillMissingValue(MLProcess):
|
||||
def __init__(
|
||||
self,
|
||||
|
|
@ -42,6 +47,7 @@ class FillMissingValue(MLProcess):
|
|||
return new_df
|
||||
|
||||
|
||||
@register_tool(tool_type_name=TOOL_TYPE)
|
||||
class MinMaxScale(MLProcess):
|
||||
def __init__(
|
||||
self,
|
||||
|
|
@ -60,6 +66,7 @@ class MinMaxScale(MLProcess):
|
|||
return new_df
|
||||
|
||||
|
||||
@register_tool(tool_type_name=TOOL_TYPE)
|
||||
class StandardScale(MLProcess):
|
||||
def __init__(
|
||||
self,
|
||||
|
|
@ -78,6 +85,7 @@ class StandardScale(MLProcess):
|
|||
return new_df
|
||||
|
||||
|
||||
@register_tool(tool_type_name=TOOL_TYPE)
|
||||
class MaxAbsScale(MLProcess):
|
||||
def __init__(
|
||||
self,
|
||||
|
|
@ -96,6 +104,7 @@ class MaxAbsScale(MLProcess):
|
|||
return new_df
|
||||
|
||||
|
||||
@register_tool(tool_type_name=TOOL_TYPE)
|
||||
class RobustScale(MLProcess):
|
||||
def __init__(
|
||||
self,
|
||||
|
|
@ -114,6 +123,7 @@ class RobustScale(MLProcess):
|
|||
return new_df
|
||||
|
||||
|
||||
@register_tool(tool_type_name=TOOL_TYPE)
|
||||
class OrdinalEncode(MLProcess):
|
||||
def __init__(
|
||||
self,
|
||||
|
|
@ -132,6 +142,7 @@ class OrdinalEncode(MLProcess):
|
|||
return new_df
|
||||
|
||||
|
||||
@register_tool(tool_type_name=TOOL_TYPE)
|
||||
class OneHotEncode(MLProcess):
|
||||
def __init__(
|
||||
self,
|
||||
|
|
@ -153,6 +164,7 @@ class OneHotEncode(MLProcess):
|
|||
return new_df
|
||||
|
||||
|
||||
@register_tool(tool_type_name=TOOL_TYPE)
|
||||
class LabelEncode(MLProcess):
|
||||
def __init__(
|
||||
self,
|
||||
|
|
@ -181,6 +193,7 @@ class LabelEncode(MLProcess):
|
|||
return new_df
|
||||
|
||||
|
||||
@register_tool(tool_type_name=TOOL_TYPE)
|
||||
def get_column_info(df: pd.DataFrame) -> dict:
|
||||
column_info = {
|
||||
"Category": [],
|
||||
|
|
|
|||
|
|
@ -6,7 +6,7 @@
|
|||
# @Desc : Feature Engineering Tools
|
||||
import itertools
|
||||
|
||||
import lightgbm as lgb
|
||||
# import lightgbm as lgb
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
from joblib import Parallel, delayed
|
||||
|
|
@ -16,8 +16,13 @@ from sklearn.model_selection import KFold
|
|||
from sklearn.preprocessing import KBinsDiscretizer, PolynomialFeatures
|
||||
|
||||
from metagpt.tools.functions.libs.base import MLProcess
|
||||
from metagpt.tools.tool_registry import register_tool
|
||||
from metagpt.tools.tool_schema import ToolTypeEnum
|
||||
|
||||
TOOL_TYPE = ToolTypeEnum.FEATURE_ENGINEERING.value
|
||||
|
||||
|
||||
@register_tool(tool_type_name=TOOL_TYPE)
|
||||
class PolynomialExpansion(MLProcess):
|
||||
def __init__(self, cols: list, degree: int = 2, label_col: str = None):
|
||||
self.cols = cols
|
||||
|
|
@ -48,6 +53,7 @@ class PolynomialExpansion(MLProcess):
|
|||
return new_df
|
||||
|
||||
|
||||
@register_tool(tool_type_name=TOOL_TYPE)
|
||||
class CatCount(MLProcess):
|
||||
def __init__(self, col: str):
|
||||
self.col = col
|
||||
|
|
@ -62,6 +68,7 @@ class CatCount(MLProcess):
|
|||
return new_df
|
||||
|
||||
|
||||
@register_tool(tool_type_name=TOOL_TYPE)
|
||||
class TargetMeanEncoder(MLProcess):
|
||||
def __init__(self, col: str, label: str):
|
||||
self.col = col
|
||||
|
|
@ -77,6 +84,7 @@ class TargetMeanEncoder(MLProcess):
|
|||
return new_df
|
||||
|
||||
|
||||
@register_tool(tool_type_name=TOOL_TYPE)
|
||||
class KFoldTargetMeanEncoder(MLProcess):
|
||||
def __init__(self, col: str, label: str, n_splits: int = 5, random_state: int = 2021):
|
||||
self.col = col
|
||||
|
|
@ -103,6 +111,7 @@ class KFoldTargetMeanEncoder(MLProcess):
|
|||
return new_df
|
||||
|
||||
|
||||
@register_tool(tool_type_name=TOOL_TYPE)
|
||||
class CatCross(MLProcess):
|
||||
def __init__(self, cols: list, max_cat_num: int = 100):
|
||||
self.cols = cols
|
||||
|
|
@ -138,6 +147,7 @@ class CatCross(MLProcess):
|
|||
return new_df
|
||||
|
||||
|
||||
@register_tool(tool_type_name=TOOL_TYPE)
|
||||
class GroupStat(MLProcess):
|
||||
def __init__(self, group_col: str, agg_col: str, agg_funcs: list):
|
||||
self.group_col = group_col
|
||||
|
|
@ -157,6 +167,7 @@ class GroupStat(MLProcess):
|
|||
return new_df
|
||||
|
||||
|
||||
@register_tool(tool_type_name=TOOL_TYPE)
|
||||
class SplitBins(MLProcess):
|
||||
def __init__(self, cols: list, strategy: str = "quantile"):
|
||||
self.cols = cols
|
||||
|
|
@ -173,6 +184,7 @@ class SplitBins(MLProcess):
|
|||
return new_df
|
||||
|
||||
|
||||
@register_tool(tool_type_name=TOOL_TYPE)
|
||||
class ExtractTimeComps(MLProcess):
|
||||
def __init__(self, time_col: str, time_comps: list):
|
||||
self.time_col = time_col
|
||||
|
|
@ -201,6 +213,7 @@ class ExtractTimeComps(MLProcess):
|
|||
return new_df
|
||||
|
||||
|
||||
@register_tool(tool_type_name=TOOL_TYPE)
|
||||
class GeneralSelection(MLProcess):
|
||||
def __init__(self, label_col: str):
|
||||
self.label_col = label_col
|
||||
|
|
@ -228,6 +241,7 @@ class GeneralSelection(MLProcess):
|
|||
return new_df
|
||||
|
||||
|
||||
# skip for now because lgb is needed
|
||||
class TreeBasedSelection(MLProcess):
|
||||
def __init__(self, label_col: str, task_type: str):
|
||||
self.label_col = label_col
|
||||
|
|
@ -270,6 +284,7 @@ class TreeBasedSelection(MLProcess):
|
|||
return new_df
|
||||
|
||||
|
||||
@register_tool(tool_type_name=TOOL_TYPE)
|
||||
class VarianceBasedSelection(MLProcess):
|
||||
def __init__(self, label_col: str, threshold: float = 0):
|
||||
self.label_col = label_col
|
||||
|
|
|
|||
|
|
@ -0,0 +1,61 @@
|
|||
FillMissingValue:
|
||||
type: class
|
||||
description: "Completing missing values with simple strategies"
|
||||
methods:
|
||||
__init__:
|
||||
description: "Initialize self."
|
||||
parameters:
|
||||
properties:
|
||||
features:
|
||||
type: list
|
||||
description: "columns to be processed"
|
||||
strategy:
|
||||
type: str
|
||||
description: "the imputation strategy, notice mean/median can only be used for numeric features"
|
||||
default: mean
|
||||
enum:
|
||||
- mean
|
||||
- median
|
||||
- most_frequent
|
||||
- constant
|
||||
fill_value:
|
||||
type: int
|
||||
description: "fill_value is used to replace all occurrences of missing_values"
|
||||
default: null
|
||||
required:
|
||||
- features
|
||||
fit:
|
||||
description: "Fit the FillMissingValue model."
|
||||
parameters:
|
||||
properties:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The input DataFrame."
|
||||
required:
|
||||
- df
|
||||
transform:
|
||||
description: "Transform the input DataFrame with the fitted model."
|
||||
parameters:
|
||||
properties:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The input DataFrame."
|
||||
required:
|
||||
- df
|
||||
returns:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The transformed DataFrame."
|
||||
fit_transform:
|
||||
description: "Fit and transform the input DataFrame."
|
||||
parameters:
|
||||
properties:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The input DataFrame."
|
||||
required:
|
||||
- df
|
||||
returns:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The transformed DataFrame."
|
||||
|
|
@ -0,0 +1,48 @@
|
|||
LabelEncode:
|
||||
type: class
|
||||
description: "Apply label encoding to specified categorical columns in-place."
|
||||
methods:
|
||||
__init__:
|
||||
description: "Initialize self."
|
||||
parameters:
|
||||
properties:
|
||||
features:
|
||||
type: list
|
||||
description: "Categorical columns to be label encoded"
|
||||
required:
|
||||
- features
|
||||
fit:
|
||||
description: "Fit the LabelEncode model."
|
||||
parameters:
|
||||
properties:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The input DataFrame."
|
||||
required:
|
||||
- df
|
||||
transform:
|
||||
description: "Transform the input DataFrame with the fitted model."
|
||||
parameters:
|
||||
properties:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The input DataFrame."
|
||||
required:
|
||||
- df
|
||||
returns:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The transformed DataFrame."
|
||||
fit_transform:
|
||||
description: "Fit and transform the input DataFrame."
|
||||
parameters:
|
||||
properties:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The input DataFrame."
|
||||
required:
|
||||
- df
|
||||
returns:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The transformed DataFrame."
|
||||
|
|
@ -0,0 +1,48 @@
|
|||
MaxAbsScale:
|
||||
type: class
|
||||
description: "cale each feature by its maximum absolute value"
|
||||
methods:
|
||||
__init__:
|
||||
description: "Initialize self."
|
||||
parameters:
|
||||
properties:
|
||||
features:
|
||||
type: list
|
||||
description: "columns to be processed"
|
||||
required:
|
||||
- features
|
||||
fit:
|
||||
description: "Fit the MaxAbsScale model."
|
||||
parameters:
|
||||
properties:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The input DataFrame."
|
||||
required:
|
||||
- df
|
||||
transform:
|
||||
description: "Transform the input DataFrame with the fitted model."
|
||||
parameters:
|
||||
properties:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The input DataFrame."
|
||||
required:
|
||||
- df
|
||||
returns:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The transformed DataFrame."
|
||||
fit_transform:
|
||||
description: "Fit and transform the input DataFrame."
|
||||
parameters:
|
||||
properties:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The input DataFrame."
|
||||
required:
|
||||
- df
|
||||
returns:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The transformed DataFrame."
|
||||
|
|
@ -0,0 +1,48 @@
|
|||
MinMaxScale:
|
||||
type: class
|
||||
description: "Transform features by scaling each feature to a range, witch is (0, 1)"
|
||||
methods:
|
||||
__init__:
|
||||
description: "Initialize self."
|
||||
parameters:
|
||||
properties:
|
||||
features:
|
||||
type: list
|
||||
description: "columns to be processed"
|
||||
required:
|
||||
- features
|
||||
fit:
|
||||
description: "Fit the MinMaxScale model."
|
||||
parameters:
|
||||
properties:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The input DataFrame."
|
||||
required:
|
||||
- df
|
||||
transform:
|
||||
description: "Transform the input DataFrame with the fitted model."
|
||||
parameters:
|
||||
properties:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The input DataFrame."
|
||||
required:
|
||||
- df
|
||||
returns:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The transformed DataFrame."
|
||||
fit_transform:
|
||||
description: "Fit and transform the input DataFrame."
|
||||
parameters:
|
||||
properties:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The input DataFrame."
|
||||
required:
|
||||
- df
|
||||
returns:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The transformed DataFrame."
|
||||
|
|
@ -0,0 +1,48 @@
|
|||
OneHotEncode:
|
||||
type: class
|
||||
description: "Apply one-hot encoding to specified categorical columns, the original columns will be dropped."
|
||||
methods:
|
||||
__init__:
|
||||
description: "Initialize self."
|
||||
parameters:
|
||||
properties:
|
||||
features:
|
||||
type: list
|
||||
description: "Categorical columns to be one-hot encoded and dropped"
|
||||
required:
|
||||
- features
|
||||
fit:
|
||||
description: "Fit the OneHotEncoding model."
|
||||
parameters:
|
||||
properties:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The input DataFrame."
|
||||
required:
|
||||
- df
|
||||
transform:
|
||||
description: "Transform the input DataFrame with the fitted model."
|
||||
parameters:
|
||||
properties:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The input DataFrame."
|
||||
required:
|
||||
- df
|
||||
returns:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The transformed DataFrame."
|
||||
fit_transform:
|
||||
description: "Fit and transform the input DataFrame."
|
||||
parameters:
|
||||
properties:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The input DataFrame."
|
||||
required:
|
||||
- df
|
||||
returns:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The transformed DataFrame."
|
||||
|
|
@ -0,0 +1,48 @@
|
|||
StandardScale:
|
||||
type: class
|
||||
description: "Standardize features by removing the mean and scaling to unit variance"
|
||||
methods:
|
||||
__init__:
|
||||
description: "Initialize self."
|
||||
parameters:
|
||||
properties:
|
||||
features:
|
||||
type: list
|
||||
description: "columns to be processed"
|
||||
required:
|
||||
- features
|
||||
fit:
|
||||
description: "Fit the StandardScale model."
|
||||
parameters:
|
||||
properties:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The input DataFrame."
|
||||
required:
|
||||
- df
|
||||
transform:
|
||||
description: "Transform the input DataFrame with the fitted model."
|
||||
parameters:
|
||||
properties:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The input DataFrame."
|
||||
required:
|
||||
- df
|
||||
returns:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The transformed DataFrame."
|
||||
fit_transform:
|
||||
description: "Fit and transform the input DataFrame."
|
||||
parameters:
|
||||
properties:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The input DataFrame."
|
||||
required:
|
||||
- df
|
||||
returns:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The transformed DataFrame."
|
||||
|
|
@ -0,0 +1,48 @@
|
|||
CatCount:
|
||||
type: class
|
||||
description: "Add value counts of a categorical column as new feature."
|
||||
methods:
|
||||
__init__:
|
||||
description: "Initialize self."
|
||||
parameters:
|
||||
properties:
|
||||
col:
|
||||
type: str
|
||||
description: "Column for value counts."
|
||||
required:
|
||||
- col
|
||||
fit:
|
||||
description: "Fit the CatCount model."
|
||||
parameters:
|
||||
properties:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The input DataFrame."
|
||||
required:
|
||||
- df
|
||||
transform:
|
||||
description: "Transform the input DataFrame with the fitted model."
|
||||
parameters:
|
||||
properties:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The input DataFrame."
|
||||
required:
|
||||
- df
|
||||
returns:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The transformed DataFrame."
|
||||
fit_transform:
|
||||
description: "Fit and transform the input DataFrame."
|
||||
parameters:
|
||||
properties:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The input DataFrame."
|
||||
required:
|
||||
- df
|
||||
returns:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The transformed DataFrame."
|
||||
|
|
@ -0,0 +1,52 @@
|
|||
CatCross:
|
||||
type: class
|
||||
description: "Add pairwise crossed features and convert them to numerical features."
|
||||
methods:
|
||||
__init__:
|
||||
description: "Initialize self."
|
||||
parameters:
|
||||
properties:
|
||||
cols:
|
||||
type: list
|
||||
description: "Columns to be pairwise crossed, at least 2 columns."
|
||||
max_cat_num:
|
||||
type: int
|
||||
description: "Maximum unique categories per crossed feature."
|
||||
default: 100
|
||||
required:
|
||||
- cols
|
||||
fit:
|
||||
description: "Fit the CatCross model."
|
||||
parameters:
|
||||
properties:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The input DataFrame."
|
||||
required:
|
||||
- df
|
||||
transform:
|
||||
description: "Transform the input DataFrame with the fitted model."
|
||||
parameters:
|
||||
properties:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The input DataFrame."
|
||||
required:
|
||||
- df
|
||||
returns:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The transformed DataFrame."
|
||||
fit_transform:
|
||||
description: "Fit and transform the input DataFrame."
|
||||
parameters:
|
||||
properties:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The input DataFrame."
|
||||
required:
|
||||
- df
|
||||
returns:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The transformed DataFrame."
|
||||
|
|
@ -0,0 +1,48 @@
|
|||
GeneralSelection:
|
||||
type: class
|
||||
description: "Drop all nan feats and feats with only one unique value."
|
||||
methods:
|
||||
__init__:
|
||||
description: "Initialize self."
|
||||
parameters:
|
||||
properties:
|
||||
label_col:
|
||||
type: str
|
||||
description: "Label column name."
|
||||
required:
|
||||
- label_col
|
||||
fit:
|
||||
description: "Fit the GeneralSelection model."
|
||||
parameters:
|
||||
properties:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The input DataFrame."
|
||||
required:
|
||||
- df
|
||||
transform:
|
||||
description: "Transform the input DataFrame with the fitted model."
|
||||
parameters:
|
||||
properties:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The input DataFrame."
|
||||
required:
|
||||
- df
|
||||
returns:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The transformed DataFrame."
|
||||
fit_transform:
|
||||
description: "Fit and transform the input DataFrame."
|
||||
parameters:
|
||||
properties:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The input DataFrame."
|
||||
required:
|
||||
- df
|
||||
returns:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The transformed DataFrame."
|
||||
|
|
@ -0,0 +1,58 @@
|
|||
GroupStat:
|
||||
type: class
|
||||
description: "Aggregate specified column in a DataFrame grouped by another column, adding new features named '<agg_col>_<agg_func>_by_<group_col>'."
|
||||
methods:
|
||||
__init__:
|
||||
description: "Initialize self."
|
||||
parameters:
|
||||
properties:
|
||||
group_col:
|
||||
type: str
|
||||
description: "Column used for grouping."
|
||||
agg_col:
|
||||
type: str
|
||||
description: "Column on which aggregation is performed."
|
||||
agg_funcs:
|
||||
type: list
|
||||
description: >-
|
||||
List of aggregation functions to apply, such as ['mean', 'std'].
|
||||
Each function must be supported by pandas.
|
||||
required:
|
||||
- group_col
|
||||
- agg_col
|
||||
- agg_funcs
|
||||
fit:
|
||||
description: "Fit the GroupStat model."
|
||||
parameters:
|
||||
properties:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The input DataFrame."
|
||||
required:
|
||||
- df
|
||||
transform:
|
||||
description: "Transform the input DataFrame with the fitted model."
|
||||
parameters:
|
||||
properties:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The input DataFrame."
|
||||
required:
|
||||
- df
|
||||
returns:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The transformed DataFrame."
|
||||
fit_transform:
|
||||
description: "Fit and transform the input DataFrame."
|
||||
parameters:
|
||||
properties:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The input DataFrame."
|
||||
required:
|
||||
- df
|
||||
returns:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The transformed DataFrame."
|
||||
|
|
@ -0,0 +1,60 @@
|
|||
KFoldTargetMeanEncoder:
|
||||
type: class
|
||||
description: "Adds a new feature to the DataFrame by k-fold mean encoding of a categorical column using the label column."
|
||||
methods:
|
||||
__init__:
|
||||
description: "Initialize self."
|
||||
parameters:
|
||||
properties:
|
||||
col:
|
||||
type: str
|
||||
description: "Column to be k-fold mean encoded."
|
||||
label:
|
||||
type: str
|
||||
description: "Predicted label column."
|
||||
n_splits:
|
||||
type: int
|
||||
description: "Number of splits for K-fold."
|
||||
default: 5
|
||||
random_state:
|
||||
type: int
|
||||
description: "Random seed."
|
||||
default: 2021
|
||||
required:
|
||||
- col
|
||||
- label
|
||||
fit:
|
||||
description: "Fit the KFoldTargetMeanEncoder model."
|
||||
parameters:
|
||||
properties:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The input DataFrame."
|
||||
required:
|
||||
- df
|
||||
transform:
|
||||
description: "Transform the input DataFrame with the fitted model."
|
||||
parameters:
|
||||
properties:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The input DataFrame."
|
||||
required:
|
||||
- df
|
||||
returns:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The transformed DataFrame."
|
||||
fit_transform:
|
||||
description: "Fit and transform the input DataFrame."
|
||||
parameters:
|
||||
properties:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The input DataFrame."
|
||||
required:
|
||||
- df
|
||||
returns:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The transformed DataFrame."
|
||||
|
|
@ -0,0 +1,548 @@
|
|||
PolynomialExpansion:
|
||||
type: class
|
||||
description: "Add polynomial and interaction features from selected numeric columns to input DataFrame."
|
||||
methods:
|
||||
__init__:
|
||||
description: "Initialize self."
|
||||
parameters:
|
||||
properties:
|
||||
cols:
|
||||
type: list
|
||||
description: "Columns for polynomial expansion."
|
||||
label_col:
|
||||
type: str
|
||||
description: "Label column name."
|
||||
degree:
|
||||
type: int
|
||||
description: "The degree of the polynomial features."
|
||||
default: 2
|
||||
required:
|
||||
- cols
|
||||
- label_col
|
||||
fit:
|
||||
description: "Fit the PolynomialExpansion model."
|
||||
parameters:
|
||||
properties:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The input DataFrame."
|
||||
required:
|
||||
- df
|
||||
transform:
|
||||
description: "Transform the input DataFrame with the fitted model."
|
||||
parameters:
|
||||
properties:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The input DataFrame."
|
||||
required:
|
||||
- df
|
||||
returns:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The transformed DataFrame without duplicated columns."
|
||||
fit_transform:
|
||||
description: "Fit and transform the input DataFrame."
|
||||
parameters:
|
||||
properties:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The input DataFrame without duplicated columns."
|
||||
required:
|
||||
- df
|
||||
returns:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The transformed DataFrame."
|
||||
|
||||
CatCount:
|
||||
type: class
|
||||
description: "Add value counts of a categorical column as new feature."
|
||||
methods:
|
||||
__init__:
|
||||
description: "Initialize self."
|
||||
parameters:
|
||||
properties:
|
||||
col:
|
||||
type: str
|
||||
description: "Column for value counts."
|
||||
required:
|
||||
- col
|
||||
fit:
|
||||
description: "Fit the CatCount model."
|
||||
parameters:
|
||||
properties:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The input DataFrame."
|
||||
required:
|
||||
- df
|
||||
transform:
|
||||
description: "Transform the input DataFrame with the fitted model."
|
||||
parameters:
|
||||
properties:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The input DataFrame."
|
||||
required:
|
||||
- df
|
||||
returns:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The transformed DataFrame."
|
||||
fit_transform:
|
||||
description: "Fit and transform the input DataFrame."
|
||||
parameters:
|
||||
properties:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The input DataFrame."
|
||||
required:
|
||||
- df
|
||||
returns:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The transformed DataFrame."
|
||||
|
||||
TargetMeanEncoder:
|
||||
type: class
|
||||
description: "Encodes a categorical column by the mean of the label column, and adds the result as a new feature."
|
||||
methods:
|
||||
__init__:
|
||||
description: "Initialize self."
|
||||
parameters:
|
||||
properties:
|
||||
col:
|
||||
type: str
|
||||
description: "Column to be mean encoded."
|
||||
label:
|
||||
type: str
|
||||
description: "Predicted label column."
|
||||
required:
|
||||
- col
|
||||
- label
|
||||
fit:
|
||||
description: "Fit the TargetMeanEncoder model."
|
||||
parameters:
|
||||
properties:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The input DataFrame."
|
||||
required:
|
||||
- df
|
||||
transform:
|
||||
description: "Transform the input DataFrame with the fitted model."
|
||||
parameters:
|
||||
properties:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The input DataFrame."
|
||||
required:
|
||||
- df
|
||||
returns:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The transformed DataFrame."
|
||||
fit_transform:
|
||||
description: "Fit and transform the input DataFrame."
|
||||
parameters:
|
||||
properties:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The input DataFrame."
|
||||
required:
|
||||
- df
|
||||
returns:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The transformed DataFrame."
|
||||
|
||||
KFoldTargetMeanEncoder:
|
||||
type: class
|
||||
description: "Adds a new feature to the DataFrame by k-fold mean encoding of a categorical column using the label column."
|
||||
methods:
|
||||
__init__:
|
||||
description: "Initialize self."
|
||||
parameters:
|
||||
properties:
|
||||
col:
|
||||
type: str
|
||||
description: "Column to be k-fold mean encoded."
|
||||
label:
|
||||
type: str
|
||||
description: "Predicted label column."
|
||||
n_splits:
|
||||
type: int
|
||||
description: "Number of splits for K-fold."
|
||||
default: 5
|
||||
random_state:
|
||||
type: int
|
||||
description: "Random seed."
|
||||
default: 2021
|
||||
required:
|
||||
- col
|
||||
- label
|
||||
fit:
|
||||
description: "Fit the KFoldTargetMeanEncoder model."
|
||||
parameters:
|
||||
properties:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The input DataFrame."
|
||||
required:
|
||||
- df
|
||||
transform:
|
||||
description: "Transform the input DataFrame with the fitted model."
|
||||
parameters:
|
||||
properties:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The input DataFrame."
|
||||
required:
|
||||
- df
|
||||
returns:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The transformed DataFrame."
|
||||
fit_transform:
|
||||
description: "Fit and transform the input DataFrame."
|
||||
parameters:
|
||||
properties:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The input DataFrame."
|
||||
required:
|
||||
- df
|
||||
returns:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The transformed DataFrame."
|
||||
|
||||
CatCross:
|
||||
type: class
|
||||
description: "Add pairwise crossed features and convert them to numerical features."
|
||||
methods:
|
||||
__init__:
|
||||
description: "Initialize self."
|
||||
parameters:
|
||||
properties:
|
||||
cols:
|
||||
type: list
|
||||
description: "Columns to be pairwise crossed, at least 2 columns."
|
||||
max_cat_num:
|
||||
type: int
|
||||
description: "Maximum unique categories per crossed feature."
|
||||
default: 100
|
||||
required:
|
||||
- cols
|
||||
fit:
|
||||
description: "Fit the CatCross model."
|
||||
parameters:
|
||||
properties:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The input DataFrame."
|
||||
required:
|
||||
- df
|
||||
transform:
|
||||
description: "Transform the input DataFrame with the fitted model."
|
||||
parameters:
|
||||
properties:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The input DataFrame."
|
||||
required:
|
||||
- df
|
||||
returns:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The transformed DataFrame."
|
||||
fit_transform:
|
||||
description: "Fit and transform the input DataFrame."
|
||||
parameters:
|
||||
properties:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The input DataFrame."
|
||||
required:
|
||||
- df
|
||||
returns:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The transformed DataFrame."
|
||||
|
||||
GroupStat:
|
||||
type: class
|
||||
description: "Aggregate specified column in a DataFrame grouped by another column, adding new features named '<agg_col>_<agg_func>_by_<group_col>'."
|
||||
methods:
|
||||
__init__:
|
||||
description: "Initialize self."
|
||||
parameters:
|
||||
properties:
|
||||
group_col:
|
||||
type: str
|
||||
description: "Column used for grouping."
|
||||
agg_col:
|
||||
type: str
|
||||
description: "Column on which aggregation is performed."
|
||||
agg_funcs:
|
||||
type: list
|
||||
description: >-
|
||||
List of aggregation functions to apply, such as ['mean', 'std'].
|
||||
Each function must be supported by pandas.
|
||||
required:
|
||||
- group_col
|
||||
- agg_col
|
||||
- agg_funcs
|
||||
fit:
|
||||
description: "Fit the GroupStat model."
|
||||
parameters:
|
||||
properties:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The input DataFrame."
|
||||
required:
|
||||
- df
|
||||
transform:
|
||||
description: "Transform the input DataFrame with the fitted model."
|
||||
parameters:
|
||||
properties:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The input DataFrame."
|
||||
required:
|
||||
- df
|
||||
returns:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The transformed DataFrame."
|
||||
fit_transform:
|
||||
description: "Fit and transform the input DataFrame."
|
||||
parameters:
|
||||
properties:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The input DataFrame."
|
||||
required:
|
||||
- df
|
||||
returns:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The transformed DataFrame."
|
||||
|
||||
SplitBins:
|
||||
type: class
|
||||
description: "Inplace binning of continuous data into intervals, returning integer-encoded bin identifiers directly."
|
||||
methods:
|
||||
__init__:
|
||||
description: "Initialize self."
|
||||
parameters:
|
||||
properties:
|
||||
cols:
|
||||
type: list
|
||||
description: "Columns to be binned inplace."
|
||||
strategy:
|
||||
type: str
|
||||
description: "Strategy used to define the widths of the bins."
|
||||
default: quantile
|
||||
enum:
|
||||
- quantile
|
||||
- uniform
|
||||
- kmeans
|
||||
required:
|
||||
- cols
|
||||
fit:
|
||||
description: "Fit the SplitBins model."
|
||||
parameters:
|
||||
properties:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The input DataFrame."
|
||||
required:
|
||||
- df
|
||||
transform:
|
||||
description: "Transform the input DataFrame with the fitted model."
|
||||
parameters:
|
||||
properties:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The input DataFrame."
|
||||
required:
|
||||
- df
|
||||
returns:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The transformed DataFrame."
|
||||
fit_transform:
|
||||
description: "Fit and transform the input DataFrame."
|
||||
parameters:
|
||||
properties:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The input DataFrame."
|
||||
required:
|
||||
- df
|
||||
returns:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The transformed DataFrame."
|
||||
|
||||
GeneralSelection:
|
||||
type: class
|
||||
description: "Drop all nan feats and feats with only one unique value."
|
||||
methods:
|
||||
__init__:
|
||||
description: "Initialize self."
|
||||
parameters:
|
||||
properties:
|
||||
label_col:
|
||||
type: str
|
||||
description: "Label column name."
|
||||
required:
|
||||
- label_col
|
||||
fit:
|
||||
description: "Fit the GeneralSelection model."
|
||||
parameters:
|
||||
properties:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The input DataFrame."
|
||||
required:
|
||||
- df
|
||||
transform:
|
||||
description: "Transform the input DataFrame with the fitted model."
|
||||
parameters:
|
||||
properties:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The input DataFrame."
|
||||
required:
|
||||
- df
|
||||
returns:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The transformed DataFrame."
|
||||
fit_transform:
|
||||
description: "Fit and transform the input DataFrame."
|
||||
parameters:
|
||||
properties:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The input DataFrame."
|
||||
required:
|
||||
- df
|
||||
returns:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The transformed DataFrame."
|
||||
|
||||
|
||||
TreeBasedSelection:
|
||||
type: class
|
||||
description: "Select features based on tree-based model and remove features with low importance."
|
||||
methods:
|
||||
__init__:
|
||||
description: "Initialize self."
|
||||
parameters:
|
||||
properties:
|
||||
label_col:
|
||||
type: str
|
||||
description: "Label column name."
|
||||
task_type:
|
||||
type: str
|
||||
description: "Task type, 'cls' for classification, 'mcls' for multi-class classification, 'reg' for regression."
|
||||
enum:
|
||||
- cls
|
||||
- mcls
|
||||
- reg
|
||||
required:
|
||||
- label_col
|
||||
- task_type
|
||||
fit:
|
||||
description: "Fit the TreeBasedSelection model."
|
||||
parameters:
|
||||
properties:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The input DataFrame."
|
||||
required:
|
||||
- df
|
||||
transform:
|
||||
description: "Transform the input DataFrame with the fitted model."
|
||||
parameters:
|
||||
properties:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The input DataFrame."
|
||||
required:
|
||||
- df
|
||||
returns:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The transformed DataFrame contain label_col."
|
||||
fit_transform:
|
||||
description: "Fit and transform the input DataFrame."
|
||||
parameters:
|
||||
properties:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The input DataFrame."
|
||||
required:
|
||||
- df
|
||||
returns:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The transformed DataFrame contain label_col."
|
||||
|
||||
VarianceBasedSelection:
|
||||
type: class
|
||||
description: "Select features based on variance and remove features with low variance."
|
||||
methods:
|
||||
__init__:
|
||||
description: "Initialize self."
|
||||
parameters:
|
||||
properties:
|
||||
label_col:
|
||||
type: str
|
||||
description: "Label column name."
|
||||
threshold:
|
||||
type: float
|
||||
description: "Threshold for variance."
|
||||
default: 0.0
|
||||
required:
|
||||
- label_col
|
||||
fit:
|
||||
description: "Fit the VarianceBasedSelection model."
|
||||
parameters:
|
||||
properties:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The input DataFrame."
|
||||
required:
|
||||
- df
|
||||
transform:
|
||||
description: "Transform the input DataFrame with the fitted model."
|
||||
parameters:
|
||||
properties:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The input DataFrame."
|
||||
required:
|
||||
- df
|
||||
returns:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The transformed DataFrame contain label_col."
|
||||
fit_transform:
|
||||
description: "Fit and transform the input DataFrame."
|
||||
parameters:
|
||||
properties:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The input DataFrame."
|
||||
required:
|
||||
- df
|
||||
returns:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The transformed DataFrame contain label_col."
|
||||
|
|
@ -0,0 +1,56 @@
|
|||
SplitBins:
|
||||
type: class
|
||||
description: "Inplace binning of continuous data into intervals, returning integer-encoded bin identifiers directly."
|
||||
methods:
|
||||
__init__:
|
||||
description: "Initialize self."
|
||||
parameters:
|
||||
properties:
|
||||
cols:
|
||||
type: list
|
||||
description: "Columns to be binned inplace."
|
||||
strategy:
|
||||
type: str
|
||||
description: "Strategy used to define the widths of the bins."
|
||||
default: quantile
|
||||
enum:
|
||||
- quantile
|
||||
- uniform
|
||||
- kmeans
|
||||
required:
|
||||
- cols
|
||||
fit:
|
||||
description: "Fit the SplitBins model."
|
||||
parameters:
|
||||
properties:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The input DataFrame."
|
||||
required:
|
||||
- df
|
||||
transform:
|
||||
description: "Transform the input DataFrame with the fitted model."
|
||||
parameters:
|
||||
properties:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The input DataFrame."
|
||||
required:
|
||||
- df
|
||||
returns:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The transformed DataFrame."
|
||||
fit_transform:
|
||||
description: "Fit and transform the input DataFrame."
|
||||
parameters:
|
||||
properties:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The input DataFrame."
|
||||
required:
|
||||
- df
|
||||
returns:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The transformed DataFrame."
|
||||
|
|
@ -0,0 +1,52 @@
|
|||
TargetMeanEncoder:
|
||||
type: class
|
||||
description: "Encodes a categorical column by the mean of the label column, and adds the result as a new feature."
|
||||
methods:
|
||||
__init__:
|
||||
description: "Initialize self."
|
||||
parameters:
|
||||
properties:
|
||||
col:
|
||||
type: str
|
||||
description: "Column to be mean encoded."
|
||||
label:
|
||||
type: str
|
||||
description: "Predicted label column."
|
||||
required:
|
||||
- col
|
||||
- label
|
||||
fit:
|
||||
description: "Fit the TargetMeanEncoder model."
|
||||
parameters:
|
||||
properties:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The input DataFrame."
|
||||
required:
|
||||
- df
|
||||
transform:
|
||||
description: "Transform the input DataFrame with the fitted model."
|
||||
parameters:
|
||||
properties:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The input DataFrame."
|
||||
required:
|
||||
- df
|
||||
returns:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The transformed DataFrame."
|
||||
fit_transform:
|
||||
description: "Fit and transform the input DataFrame."
|
||||
parameters:
|
||||
properties:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The input DataFrame."
|
||||
required:
|
||||
- df
|
||||
returns:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The transformed DataFrame."
|
||||
|
|
@ -0,0 +1,56 @@
|
|||
TreeBasedSelection:
|
||||
type: class
|
||||
description: "Select features based on tree-based model and remove features with low importance."
|
||||
methods:
|
||||
__init__:
|
||||
description: "Initialize self."
|
||||
parameters:
|
||||
properties:
|
||||
label_col:
|
||||
type: str
|
||||
description: "Label column name."
|
||||
task_type:
|
||||
type: str
|
||||
description: "Task type, 'cls' for classification, 'mcls' for multi-class classification, 'reg' for regression."
|
||||
enum:
|
||||
- cls
|
||||
- mcls
|
||||
- reg
|
||||
required:
|
||||
- label_col
|
||||
- task_type
|
||||
fit:
|
||||
description: "Fit the TreeBasedSelection model."
|
||||
parameters:
|
||||
properties:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The input DataFrame."
|
||||
required:
|
||||
- df
|
||||
transform:
|
||||
description: "Transform the input DataFrame with the fitted model."
|
||||
parameters:
|
||||
properties:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The input DataFrame."
|
||||
required:
|
||||
- df
|
||||
returns:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The transformed DataFrame contain label_col."
|
||||
fit_transform:
|
||||
description: "Fit and transform the input DataFrame."
|
||||
parameters:
|
||||
properties:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The input DataFrame."
|
||||
required:
|
||||
- df
|
||||
returns:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The transformed DataFrame contain label_col."
|
||||
|
|
@ -0,0 +1,52 @@
|
|||
VarianceBasedSelection:
|
||||
type: class
|
||||
description: "Select features based on variance and remove features with low variance."
|
||||
methods:
|
||||
__init__:
|
||||
description: "Initialize self."
|
||||
parameters:
|
||||
properties:
|
||||
label_col:
|
||||
type: str
|
||||
description: "Label column name."
|
||||
threshold:
|
||||
type: float
|
||||
description: "Threshold for variance."
|
||||
default: 0.0
|
||||
required:
|
||||
- label_col
|
||||
fit:
|
||||
description: "Fit the VarianceBasedSelection model."
|
||||
parameters:
|
||||
properties:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The input DataFrame."
|
||||
required:
|
||||
- df
|
||||
transform:
|
||||
description: "Transform the input DataFrame with the fitted model."
|
||||
parameters:
|
||||
properties:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The input DataFrame."
|
||||
required:
|
||||
- df
|
||||
returns:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The transformed DataFrame contain label_col."
|
||||
fit_transform:
|
||||
description: "Fit and transform the input DataFrame."
|
||||
parameters:
|
||||
properties:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The input DataFrame."
|
||||
required:
|
||||
- df
|
||||
returns:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The transformed DataFrame contain label_col."
|
||||
128
metagpt/tools/tool_registry.py
Normal file
128
metagpt/tools/tool_registry.py
Normal file
|
|
@ -0,0 +1,128 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
@Time : 2023/01/12 17:07
|
||||
@Author : garylin2099
|
||||
@File : tool_registry.py
|
||||
"""
|
||||
import os
|
||||
from collections import defaultdict
|
||||
import inspect
|
||||
import re
|
||||
|
||||
import yaml
|
||||
|
||||
from metagpt.tools.tool_schema import ToolType, ToolSchema, Tool
|
||||
from metagpt.logs import logger
|
||||
from metagpt.const import TOOL_SCHEMA_PATH
|
||||
|
||||
|
||||
class ToolRegistry:
|
||||
def __init__(self):
|
||||
self.tools = {}
|
||||
self.tool_types = {}
|
||||
self.tools_by_types = defaultdict(
|
||||
dict
|
||||
) # two-layer k-v, {tool_type_name: {tool_name: {...}, ...}, ...}
|
||||
|
||||
def register_tool_type(self, tool_type: ToolType):
|
||||
self.tool_types[tool_type.name] = tool_type
|
||||
|
||||
def register_tool(
|
||||
self,
|
||||
tool_name,
|
||||
tool_path,
|
||||
schema_path=None,
|
||||
tool_code="",
|
||||
tool_type_name="other",
|
||||
make_schema_if_not_exists=False,
|
||||
):
|
||||
if self.has_tool(tool_name):
|
||||
return
|
||||
|
||||
schema_path = schema_path or TOOL_SCHEMA_PATH / tool_type_name / f"{tool_name}.yml"
|
||||
|
||||
if not os.path.exists(schema_path):
|
||||
if make_schema_if_not_exists:
|
||||
logger.warning(f"no schema found, will make schema at {schema_path}")
|
||||
make_schema(tool_code, schema_path)
|
||||
else:
|
||||
logger.warning(f"no schema found at assumed schema_path {schema_path}, skip registering {tool_name}")
|
||||
return
|
||||
|
||||
with open(schema_path, "r", encoding="utf-8") as f:
|
||||
schema = yaml.safe_load(f)[tool_name]
|
||||
schema["tool_path"] = tool_path # corresponding code file path of the tool
|
||||
try:
|
||||
ToolSchema(**schema) # validation
|
||||
except Exception as e:
|
||||
pass
|
||||
# logger.warning(
|
||||
# f"{tool_name} schema not conforms to required format, but will be used anyway. Mismatch: {e}"
|
||||
# )
|
||||
tool = Tool(name=tool_name, path=tool_path, schema=schema, code=tool_code)
|
||||
self.tools[tool_name] = tool
|
||||
self.tools_by_types[tool_type_name][tool_name] = tool
|
||||
logger.info(f"{tool_name} registered")
|
||||
|
||||
def has_tool(self, key):
|
||||
return key in self.tools
|
||||
|
||||
def get_tool(self, key):
|
||||
return self.tools.get(key)
|
||||
|
||||
def get_tools_by_type(self, key):
|
||||
return self.tools_by_types.get(key)
|
||||
|
||||
def has_tool_type(self, key):
|
||||
return key in self.tool_types
|
||||
|
||||
def get_tool_type(self, key):
|
||||
return self.tool_types.get(key)
|
||||
|
||||
def get_tool_types(self):
|
||||
return self.tool_types
|
||||
|
||||
|
||||
# Registry instance
|
||||
TOOL_REGISTRY = ToolRegistry()
|
||||
|
||||
|
||||
def register_tool_type(cls):
|
||||
"""register a tool type to registry"""
|
||||
TOOL_REGISTRY.register_tool_type(tool_type=cls())
|
||||
return cls
|
||||
|
||||
|
||||
def register_tool(tool_name="", tool_type_name="other", schema_path=None):
|
||||
"""register a tool to registry"""
|
||||
|
||||
def decorator(cls, tool_name=tool_name):
|
||||
tool_name = tool_name or cls.__name__
|
||||
|
||||
# Get the file path where the function / class is defined and the source code
|
||||
file_path = inspect.getfile(cls)
|
||||
if "metagpt" in file_path:
|
||||
file_path = re.search("metagpt.+", file_path).group(0)
|
||||
source_code = inspect.getsource(cls)
|
||||
|
||||
TOOL_REGISTRY.register_tool(
|
||||
tool_name=tool_name,
|
||||
tool_path=file_path,
|
||||
schema_path=schema_path,
|
||||
tool_code=source_code,
|
||||
tool_type_name=tool_type_name,
|
||||
)
|
||||
return cls
|
||||
|
||||
return decorator
|
||||
|
||||
|
||||
def make_schema(tool_code, path):
|
||||
os.makedirs(
|
||||
os.path.dirname(path), exist_ok=True
|
||||
) # Create the necessary directories
|
||||
schema = {} # an empty schema for now
|
||||
with open(path, "w", encoding="utf-8") as f:
|
||||
yaml.dump(schema, f)
|
||||
return path
|
||||
31
metagpt/tools/tool_schema.py
Normal file
31
metagpt/tools/tool_schema.py
Normal file
|
|
@ -0,0 +1,31 @@
|
|||
from enum import Enum
|
||||
|
||||
from pydantic import BaseModel
|
||||
|
||||
|
||||
class ToolTypeEnum(Enum):
|
||||
DATA_PREPROCESS = "data_preprocess"
|
||||
FEATURE_ENGINEERING = "feature_engineering"
|
||||
MODEL_TRAIN = "model_train"
|
||||
MODEL_EVALUATE = "model_evaluate"
|
||||
OTHER = "other"
|
||||
|
||||
def __missing__(self, key):
|
||||
return self.OTHER
|
||||
|
||||
|
||||
class ToolType(BaseModel):
|
||||
name: str
|
||||
desc: str
|
||||
usage_prompt: str = ""
|
||||
|
||||
|
||||
class ToolSchema(BaseModel):
|
||||
name: str
|
||||
|
||||
|
||||
class Tool(BaseModel):
|
||||
name: str
|
||||
path: str
|
||||
schema: dict = {}
|
||||
code: str = ""
|
||||
43
metagpt/tools/tool_types.py
Normal file
43
metagpt/tools/tool_types.py
Normal file
|
|
@ -0,0 +1,43 @@
|
|||
from metagpt.prompts.tool_type import (
|
||||
DATA_PREPROCESS_PROMPT,
|
||||
FEATURE_ENGINEERING_PROMPT,
|
||||
MODEL_TRAIN_PROMPT,
|
||||
MODEL_EVALUATE_PROMPT,
|
||||
)
|
||||
from metagpt.tools.tool_schema import ToolTypeEnum, ToolType
|
||||
from metagpt.tools.tool_registry import register_tool_type
|
||||
|
||||
|
||||
@register_tool_type
|
||||
class DataPreprocess(ToolType):
|
||||
name: str = ToolTypeEnum.DATA_PREPROCESS.value
|
||||
desc: str = "Only for changing value inplace."
|
||||
usage_prompt: str = DATA_PREPROCESS_PROMPT
|
||||
|
||||
|
||||
@register_tool_type
|
||||
class FeatureEngineer(ToolType):
|
||||
name: str = ToolTypeEnum.FEATURE_ENGINEERING.value
|
||||
desc: str = "Only for creating new columns for input data."
|
||||
usage_prompt: str = FEATURE_ENGINEERING_PROMPT
|
||||
|
||||
|
||||
@register_tool_type
|
||||
class ModelTrain(ToolType):
|
||||
name: str = ToolTypeEnum.MODEL_TRAIN.value
|
||||
desc: str = "Only for training model."
|
||||
usage_prompt: str = MODEL_TRAIN_PROMPT
|
||||
|
||||
|
||||
@register_tool_type
|
||||
class ModelEvaluate(ToolType):
|
||||
name: str = ToolTypeEnum.MODEL_EVALUATE.value
|
||||
desc: str = "Only for evaluating model."
|
||||
usage_prompt: str = MODEL_EVALUATE_PROMPT
|
||||
|
||||
|
||||
@register_tool_type
|
||||
class Other(ToolType):
|
||||
name: str = ToolTypeEnum.OTHER.value
|
||||
desc: str = "Any tools not in the defined categories"
|
||||
usage_prompt: str = ""
|
||||
Loading…
Add table
Add a link
Reference in a new issue