diff --git a/metagpt/tools/functions/libs/data_preprocess.py b/metagpt/tools/functions/libs/data_preprocess.py index ec3580889..8c70462ee 100644 --- a/metagpt/tools/functions/libs/data_preprocess.py +++ b/metagpt/tools/functions/libs/data_preprocess.py @@ -151,53 +151,3 @@ def get_column_info(df: pd.DataFrame) -> dict: columns=["Column_name", "Data_type", "NaN_Frequency(%)", "N_unique"], ) return samples.to_dict(orient='list') -# -# -# if __name__ == '__main__': -# def run(): -# V = { -# 'a': [-1, 2, 3, 6, 5, 4], -# 'b': [1.1, 2.2, 3.3, 6.6, 5.5, 4.4], -# 'c': ['aa', 'bb', 'cc', 'dd', 'ee', 'ff'], -# 'd': [1, None, 3, None, 5, 4], -# 'e': [1.1, np.NAN, 3.3, None, 5.5, 4.4], -# 'f': ['aa', np.NAN, 'cc', None, '', 'ff'], -# -# } -# -# df = pd.DataFrame(V) -# print(df.dtypes) -# -# numeric_features = ['a', 'b', 'd', 'e'] -# numeric_features_wo_miss = ['a', 'b', ] -# categorial_features = ['c', 'f'] -# -# df_ = fill_missing_value(df.copy(), numeric_features) -# print(df_) -# df_ = fill_missing_value(df.copy(), categorial_features, strategy='constant', fill_value='hehe') -# print(df_) -# -# df_ = fill_missing_value(df.copy(), numeric_features, strategy='constant', fill_value=999) -# print(df_) -# -# # df_ = label_encode(df.copy(), numeric_features + categorial_features, ) -# # print(df_) -# -# df_ = split_bins(df.copy(), numeric_features_wo_miss, strategy='quantile') -# print(df_) -# -# df_ = min_max_scale(df.copy(), numeric_features, ) -# print(df_) -# -# df_ = standard_scale(df.copy(), numeric_features, ) -# print(df_) -# -# df_ = log_transform(df.copy(), numeric_features, ) -# print(df_) -# -# df_ = max_abs_scale(df.copy(), numeric_features, ) -# print(df_) -# -# df_ = robust_scale(df.copy(), numeric_features, ) -# print(df_) -# run() \ No newline at end of file