mirror of
https://github.com/FoundationAgents/MetaGPT.git
synced 2026-04-27 09:46:24 +02:00
update ml tools
This commit is contained in:
parent
49779d8615
commit
f614fbfa7c
2 changed files with 8 additions and 13 deletions
|
|
@ -1,6 +1,6 @@
|
|||
import numpy as np
|
||||
from sklearn.impute import SimpleImputer
|
||||
from sklearn.preprocessing import KBinsDiscretizer, LabelEncoder
|
||||
from sklearn.preprocessing import LabelEncoder
|
||||
from sklearn.preprocessing import MaxAbsScaler
|
||||
from sklearn.preprocessing import MinMaxScaler
|
||||
from sklearn.preprocessing import OneHotEncoder
|
||||
|
|
@ -8,7 +8,6 @@ from sklearn.preprocessing import OrdinalEncoder
|
|||
from sklearn.preprocessing import RobustScaler
|
||||
from sklearn.preprocessing import StandardScaler
|
||||
|
||||
from metagpt.tools.functions import registry
|
||||
from metagpt.tools.functions.libs.base import MLProcess
|
||||
from metagpt.tools.functions.schemas.data_preprocess import *
|
||||
|
||||
|
|
@ -57,15 +56,6 @@ class StandardScale(MLProcess):
|
|||
return df
|
||||
|
||||
|
||||
@registry.register("data_preprocess", LogTransform)
|
||||
def log_transform(df: pd.DataFrame, features: list, ):
|
||||
for col in features:
|
||||
if df[col].min() <= 0:
|
||||
df[col] = df[col] - df[col].min() + 2
|
||||
df[col] = np.log(df[col])
|
||||
return df
|
||||
|
||||
|
||||
class MaxAbsScale(MLProcess):
|
||||
def __init__(self, features: list,):
|
||||
self.features = features
|
||||
|
|
@ -146,7 +136,7 @@ class LabelEncode(MLProcess):
|
|||
return df
|
||||
|
||||
|
||||
def get_column_info(df: pd.DataFrame) -> str:
|
||||
def get_column_info(df: pd.DataFrame) -> dict:
|
||||
data = []
|
||||
for i in df.columns:
|
||||
nan_freq = float("%.2g" % (df[i].isna().mean() * 100))
|
||||
|
|
@ -157,7 +147,7 @@ def get_column_info(df: pd.DataFrame) -> str:
|
|||
data,
|
||||
columns=["Column_name", "Data_type", "NaN_Frequency(%)", "N_unique"],
|
||||
)
|
||||
return samples.to_string(index=False)
|
||||
return samples.to_dict(orient='list')
|
||||
#
|
||||
#
|
||||
# if __name__ == '__main__':
|
||||
|
|
|
|||
|
|
@ -10,6 +10,7 @@ import numpy as np
|
|||
from dateutil.relativedelta import relativedelta
|
||||
from joblib import Parallel, delayed
|
||||
from pandas.api.types import is_numeric_dtype
|
||||
from pandas.core.dtypes.common import is_object_dtype
|
||||
from sklearn.model_selection import KFold
|
||||
from sklearn.preprocessing import PolynomialFeatures, KBinsDiscretizer
|
||||
|
||||
|
|
@ -280,6 +281,10 @@ class GeneralSelection(MLProcess):
|
|||
or df.loc[df[col] == np.inf].shape[0] != 0
|
||||
):
|
||||
feats.remove(col)
|
||||
|
||||
if is_object_dtype(df[col]) and df[col].nunique() == df.shape[0]:
|
||||
feats.remove(col)
|
||||
|
||||
self.feats = feats
|
||||
|
||||
def transform(self, df: pd.DataFrame) -> pd.DataFrame:
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue