diff --git a/apt/minimization/minimizer.py b/apt/minimization/minimizer.py index 90c26b6..75d7208 100644 --- a/apt/minimization/minimizer.py +++ b/apt/minimization/minimizer.py @@ -878,7 +878,6 @@ class GeneralizeToRepresentative(BaseEstimator, MetaEstimatorMixin, TransformerM rows[feature] = generalizations['range_representatives'][feature][r_index] return original_data_generalized - def _generalize_from_tree(self, original_data, prepared_data, level_nodes, cells, cells_by_id): mapping_to_cells = self._map_to_cells(prepared_data, level_nodes, cells_by_id) all_indexes = [] @@ -1199,6 +1198,7 @@ class GeneralizeToRepresentative(BaseEstimator, MetaEstimatorMixin, TransformerM else: ranges[feature] = list(set(ranges[feature])) ranges[feature].sort() + prev_value = 0 for index, value in enumerate(ranges[feature]): if index == 0: # for first range, use min value diff --git a/apt/utils/datasets/datasets.py b/apt/utils/datasets/datasets.py index 17e6e6b..b3278f4 100644 --- a/apt/utils/datasets/datasets.py +++ b/apt/utils/datasets/datasets.py @@ -20,7 +20,7 @@ from scipy.sparse import csr_matrix logger = logging.getLogger(__name__) -INPUT_DATA_ARRAY_TYPE = Union[np.ndarray, pd.DataFrame, List, Tensor] +INPUT_DATA_ARRAY_TYPE = Union[np.ndarray, pd.DataFrame, List, Tensor, csr_matrix] OUTPUT_DATA_ARRAY_TYPE = np.ndarray DATA_PANDAS_NUMPY_TYPE = Union[np.ndarray, pd.DataFrame] @@ -30,15 +30,15 @@ def array2numpy(arr: INPUT_DATA_ARRAY_TYPE) -> OUTPUT_DATA_ARRAY_TYPE: """ converts from INPUT_DATA_ARRAY_TYPE to numpy array """ - if type(arr) == np.ndarray: + if isinstance(arr, np.ndarray): return arr - if type(arr) == pd.DataFrame or type(arr) == pd.Series: + if isinstance(arr, pd.DataFrame) or isinstance(arr, pd.Series): return arr.to_numpy() if isinstance(arr, list): return np.array(arr) - if type(arr) == Tensor: + if isinstance(arr, Tensor): return arr.detach().cpu().numpy() - if type(arr) == csr_matrix: + if isinstance(arr, csr_matrix): return arr.toarray() raise ValueError("Non supported type: ", type(arr).__name__) @@ -48,15 +48,15 @@ def array2torch_tensor(arr: INPUT_DATA_ARRAY_TYPE) -> Tensor: """ converts from INPUT_DATA_ARRAY_TYPE to torch tensor array """ - if type(arr) == np.ndarray: + if isinstance(arr, np.ndarray): return torch.from_numpy(arr) - if type(arr) == pd.DataFrame or type(arr) == pd.Series: + if isinstance(arr, pd.DataFrame) or isinstance(arr, pd.Series): return torch.from_numpy(arr.to_numpy()) if isinstance(arr, list): return torch.tensor(arr) - if type(arr) == Tensor: + if isinstance(arr, Tensor): return arr - if type(arr) == csr_matrix: + if isinstance(arr, csr_matrix): return torch.from_numpy(arr.toarray()) raise ValueError("Non supported type: ", type(arr).__name__) @@ -222,7 +222,7 @@ class ArrayDataset(Dataset): def __init__(self, x: INPUT_DATA_ARRAY_TYPE, y: Optional[INPUT_DATA_ARRAY_TYPE] = None, features_names: Optional[list] = None, **kwargs): - self.is_pandas = self.is_pandas = type(x) == pd.DataFrame or type(x) == pd.Series + self.is_pandas = self.is_pandas = isinstance(x, pd.DataFrame) or isinstance(x, pd.Series) self.features_names = features_names self._y = array2numpy(y) if y is not None else None @@ -330,7 +330,7 @@ class PytorchData(Dataset): self._y = array2torch_tensor(y) if y is not None else None self._x = array2torch_tensor(x) - self.is_pandas = type(x) == pd.DataFrame or type(x) == pd.Series + self.is_pandas = isinstance(x, pd.DataFrame) or isinstance(x, pd.Series) if self.is_pandas: self.features_names = x.columns diff --git a/apt/utils/models/model.py b/apt/utils/models/model.py index ebf0464..157158e 100644 --- a/apt/utils/models/model.py +++ b/apt/utils/models/model.py @@ -43,7 +43,7 @@ def get_nb_classes(y: OUTPUT_DATA_ARRAY_TYPE) -> int: if y is None: return 0 - if type(y) != np.ndarray: + if not isinstance(y, np.ndarray): raise ValueError("Input should be numpy array") if is_one_hot(y): @@ -339,8 +339,8 @@ class BlackboxClassifierPredictions(BlackboxClassifier): y_test_pred = check_and_transform_label_format(y_test_pred, nb_classes=self._nb_classes) if x_train_pred is not None and y_train_pred is not None and x_test_pred is not None and y_test_pred is not None: - if type(y_train_pred) != np.ndarray or type(y_test_pred) != np.ndarray \ - or type(y_train_pred) != np.ndarray or type(y_test_pred) != np.ndarray: + if not isinstance(y_train_pred, np.ndarray) or not isinstance(y_test_pred, np.ndarray) \ + or not isinstance(y_train_pred, np.ndarray) or not isinstance(y_test_pred, np.ndarray): raise NotImplementedError("X/Y Data should be numpy array") x_pred = np.vstack((x_train_pred, x_test_pred)) y_pred = np.vstack((y_train_pred, y_test_pred)) diff --git a/tests/test_minimizer.py b/tests/test_minimizer.py index ae35f81..62b42a2 100644 --- a/tests/test_minimizer.py +++ b/tests/test_minimizer.py @@ -246,6 +246,7 @@ def create_encoder(numeric_features, categorical_features, x): return preprocessor, encoded + def test_minimizer_params_not_transform(get_cells): # Assume two features, age and height, and boolean label cells, features, x, y = get_cells @@ -385,7 +386,7 @@ def test_minimizer_fit_not_transform(get_data_two_features): gen.fit(dataset=train_dataset, generalize_using_transform=False) gener = gen.generalizations - expected_generalizations = {'ranges': {'age': [], 'height':[157.0]}, 'categories': {}, 'untouched': []} + expected_generalizations = {'ranges': {'age': [], 'height': [157.0]}, 'categories': {}, 'untouched': []} compare_generalizations(gener, expected_generalizations)