diff --git a/metagpt/tools/functions/libs/data_preprocess.py b/metagpt/tools/functions/libs/data_preprocess.py index fa70bf8fc..ec3580889 100644 --- a/metagpt/tools/functions/libs/data_preprocess.py +++ b/metagpt/tools/functions/libs/data_preprocess.py @@ -1,4 +1,5 @@ import numpy as np +import pandas as pd from sklearn.impute import SimpleImputer from sklearn.preprocessing import LabelEncoder from sklearn.preprocessing import MaxAbsScaler @@ -9,7 +10,6 @@ from sklearn.preprocessing import RobustScaler from sklearn.preprocessing import StandardScaler from metagpt.tools.functions.libs.base import MLProcess -from metagpt.tools.functions.schemas.data_preprocess import * class FillMissingValue(MLProcess): @@ -141,7 +141,10 @@ def get_column_info(df: pd.DataFrame) -> dict: for i in df.columns: nan_freq = float("%.2g" % (df[i].isna().mean() * 100)) n_unique = df[i].nunique() - data.append([i, df[i].dtype, nan_freq, n_unique]) + data_type = str(df[i].dtype).replace("dtype('", "").replace("')", "") + if data_type == "O": + data_type = "object" + data.append([i, data_type, nan_freq, n_unique]) samples = pd.DataFrame( data, diff --git a/metagpt/tools/functions/libs/feature_engineering.py b/metagpt/tools/functions/libs/feature_engineering.py index de54e4db0..1ec2b9675 100644 --- a/metagpt/tools/functions/libs/feature_engineering.py +++ b/metagpt/tools/functions/libs/feature_engineering.py @@ -7,6 +7,7 @@ import itertools import numpy as np +import pandas as pd from dateutil.relativedelta import relativedelta from joblib import Parallel, delayed from pandas.api.types import is_numeric_dtype @@ -15,7 +16,6 @@ from sklearn.model_selection import KFold from sklearn.preprocessing import PolynomialFeatures, KBinsDiscretizer from metagpt.tools.functions.libs.base import MLProcess -from metagpt.tools.functions.schemas.feature_engineering import * class PolynomialExpansion(MLProcess):