diff --git a/apt/minimization/minimizer.py b/apt/minimization/minimizer.py index 2d3acae..1d53932 100644 --- a/apt/minimization/minimizer.py +++ b/apt/minimization/minimizer.py @@ -16,7 +16,8 @@ from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor from sklearn.model_selection import train_test_split from apt.utils.datasets import ArrayDataset, DATA_PANDAS_NUMPY_TYPE -from apt.utils.models import Model, SklearnRegressor, ModelOutputType, SklearnClassifier +from apt.utils.models import Model, SklearnRegressor, ModelOutputType, SklearnClassifier, \ + CLASSIFIER_SINGLE_OUTPUT_CLASS_PROBABILITIES @dataclass @@ -96,7 +97,7 @@ class GeneralizeToRepresentative(BaseEstimator, MetaEstimatorMixin, TransformerM # model output type is not critical as it only affects computation of nb_classes, which is in any case # the same currently for single and multi output probabilities. self.estimator = SklearnClassifier(estimator, - ModelOutputType.CLASSIFIER_SINGLE_OUTPUT_CLASS_PROBABILITIES) + CLASSIFIER_SINGLE_OUTPUT_CLASS_PROBABILITIES) self.target_accuracy = target_accuracy self.cells = cells self.categorical_features = [] diff --git a/apt/utils/models/__init__.py b/apt/utils/models/__init__.py index 9c1e40b..6472c45 100644 --- a/apt/utils/models/__init__.py +++ b/apt/utils/models/__init__.py @@ -1,6 +1,11 @@ from apt.utils.models.model import Model, BlackboxClassifier, ModelOutputType, ScoringMethod, \ BlackboxClassifierPredictions, BlackboxClassifierPredictFunction, get_nb_classes, is_one_hot, \ - check_correct_model_output, is_multi_label, is_multi_label_binary, is_logits + check_correct_model_output, is_multi_label, is_multi_label_binary, is_logits, is_binary, \ + CLASSIFIER_SINGLE_OUTPUT_CATEGORICAL, CLASSIFIER_SINGLE_OUTPUT_BINARY_PROBABILITIES, \ + CLASSIFIER_SINGLE_OUTPUT_CLASS_PROBABILITIES, CLASSIFIER_SINGLE_OUTPUT_BINARY_LOGITS, \ + CLASSIFIER_SINGLE_OUTPUT_CLASS_LOGITS, CLASSIFIER_MULTI_OUTPUT_CATEGORICAL, \ + CLASSIFIER_MULTI_OUTPUT_BINARY_PROBABILITIES, CLASSIFIER_MULTI_OUTPUT_CLASS_PROBABILITIES, \ + CLASSIFIER_MULTI_OUTPUT_BINARY_LOGITS, CLASSIFIER_MULTI_OUTPUT_CLASS_LOGITS from apt.utils.models.sklearn_model import SklearnModel, SklearnClassifier, SklearnRegressor from apt.utils.models.keras_model import KerasClassifier, KerasRegressor from apt.utils.models.xgboost_model import XGBoostClassifier diff --git a/apt/utils/models/keras_model.py b/apt/utils/models/keras_model.py index b077d3a..ac890cb 100644 --- a/apt/utils/models/keras_model.py +++ b/apt/utils/models/keras_model.py @@ -101,7 +101,7 @@ class KerasRegressor(KerasModel): """ def __init__(self, model: "keras.models.Model", black_box_access: Optional[bool] = True, unlimited_queries: Optional[bool] = True, **kwargs): - super().__init__(model, ModelOutputType.REGRESSOR_SCALAR, black_box_access, unlimited_queries, **kwargs) + super().__init__(model, ModelOutputType.REGRESSION, black_box_access, unlimited_queries, **kwargs) self._art_model = ArtKerasRegressor(model) def fit(self, train_data: Dataset, **kwargs) -> None: diff --git a/apt/utils/models/model.py b/apt/utils/models/model.py index 010b0f6..3e9ca33 100644 --- a/apt/utils/models/model.py +++ b/apt/utils/models/model.py @@ -1,6 +1,6 @@ from abc import ABCMeta, abstractmethod from typing import Any, Optional, Callable, Tuple, Union, TYPE_CHECKING -from enum import Enum, auto +from enum import Enum, Flag, auto import numpy as np from scipy.special import expit @@ -12,18 +12,40 @@ if TYPE_CHECKING: import torch -class ModelOutputType(Enum): - CLASSIFIER_SINGLE_OUTPUT_CATEGORICAL = auto() # class labels - CLASSIFIER_SINGLE_OUTPUT_BINARY_PROBABILITIES = auto() # single binary probability - CLASSIFIER_SINGLE_OUTPUT_CLASS_PROBABILITIES = auto() # vector of class probabilities - CLASSIFIER_SINGLE_OUTPUT_BINARY_LOGITS = auto() # single binary logit - CLASSIFIER_SINGLE_OUTPUT_CLASS_LOGITS = auto() # vector of logits - CLASSIFIER_MULTI_OUTPUT_CATEGORICAL = auto() # vector of class labels - CLASSIFIER_MULTI_OUTPUT_BINARY_PROBABILITIES = auto() # vector of binary probabilities, 1 per output - CLASSIFIER_MULTI_OUTPUT_CLASS_PROBABILITIES = auto() # vector of class probabilities for multiple outputs - CLASSIFIER_MULTI_OUTPUT_BINARY_LOGITS = auto() # vector of binary logits - CLASSIFIER_MULTI_OUTPUT_CLASS_LOGITS = auto() # vector of logits for multiple outputs - REGRESSOR_SCALAR = auto() # value +class ModelOutputType(Flag): + CLASSIFIER = auto() + MULTI_OUTPUT = auto() + BINARY = auto() + LOGITS = auto() + PROBABILITIES = auto() + REGRESSION = auto() + + +# class labels +CLASSIFIER_SINGLE_OUTPUT_CATEGORICAL = ModelOutputType.CLASSIFIER +# single binary probability +CLASSIFIER_SINGLE_OUTPUT_BINARY_PROBABILITIES = ModelOutputType.CLASSIFIER | ModelOutputType.BINARY | \ + ModelOutputType.PROBABILITIES +# vector of class probabilities +CLASSIFIER_SINGLE_OUTPUT_CLASS_PROBABILITIES = ModelOutputType.CLASSIFIER | ModelOutputType.PROBABILITIES +# single binary logit +CLASSIFIER_SINGLE_OUTPUT_BINARY_LOGITS = ModelOutputType.CLASSIFIER | ModelOutputType.BINARY | ModelOutputType.LOGITS +# vector of logits +CLASSIFIER_SINGLE_OUTPUT_CLASS_LOGITS = ModelOutputType.CLASSIFIER | ModelOutputType.LOGITS +# vector of class labels +CLASSIFIER_MULTI_OUTPUT_CATEGORICAL = ModelOutputType.MULTI_OUTPUT | ModelOutputType.CLASSIFIER +# vector of binary probabilities, 1 per output +CLASSIFIER_MULTI_OUTPUT_BINARY_PROBABILITIES = ModelOutputType.MULTI_OUTPUT | ModelOutputType.CLASSIFIER | \ + ModelOutputType.BINARY | ModelOutputType.PROBABILITIES +# vector of class probabilities for multiple outputs +CLASSIFIER_MULTI_OUTPUT_CLASS_PROBABILITIES = ModelOutputType.MULTI_OUTPUT | ModelOutputType.CLASSIFIER | \ + ModelOutputType.PROBABILITIES +# vector of binary logits +CLASSIFIER_MULTI_OUTPUT_BINARY_LOGITS = ModelOutputType.MULTI_OUTPUT | ModelOutputType.CLASSIFIER | \ + ModelOutputType.BINARY | ModelOutputType.LOGITS +# vector of logits for multiple outputs +CLASSIFIER_MULTI_OUTPUT_CLASS_LOGITS = ModelOutputType.MULTI_OUTPUT | ModelOutputType.CLASSIFIER | \ + ModelOutputType.LOGITS class ModelType(Enum): @@ -43,40 +65,35 @@ def is_one_hot(y: OUTPUT_DATA_ARRAY_TYPE) -> bool: def is_multi_label(output_type: ModelOutputType) -> bool: - return (output_type == ModelOutputType.CLASSIFIER_MULTI_OUTPUT_CATEGORICAL - or output_type == ModelOutputType.CLASSIFIER_MULTI_OUTPUT_BINARY_PROBABILITIES - or output_type == ModelOutputType.CLASSIFIER_MULTI_OUTPUT_CLASS_PROBABILITIES - or output_type == ModelOutputType.CLASSIFIER_MULTI_OUTPUT_BINARY_LOGITS - or output_type == ModelOutputType.CLASSIFIER_MULTI_OUTPUT_CLASS_LOGITS) + return ModelOutputType.MULTI_OUTPUT in output_type def is_multi_label_binary(output_type: ModelOutputType) -> bool: - return (output_type == ModelOutputType.CLASSIFIER_MULTI_OUTPUT_BINARY_PROBABILITIES - or output_type == ModelOutputType.CLASSIFIER_MULTI_OUTPUT_BINARY_LOGITS) + return (ModelOutputType.MULTI_OUTPUT in output_type + and ModelOutputType.BINARY in output_type) def is_binary(output_type: ModelOutputType) -> bool: - return (output_type == ModelOutputType.CLASSIFIER_MULTI_OUTPUT_BINARY_PROBABILITIES - or output_type == ModelOutputType.CLASSIFIER_MULTI_OUTPUT_BINARY_LOGITS - or output_type == ModelOutputType.CLASSIFIER_SINGLE_OUTPUT_BINARY_PROBABILITIES - or output_type == ModelOutputType.CLASSIFIER_SINGLE_OUTPUT_BINARY_LOGITS) + return ModelOutputType.BINARY in output_type def is_categorical(output_type: ModelOutputType) -> bool: - return (output_type == ModelOutputType.CLASSIFIER_SINGLE_OUTPUT_CATEGORICAL - or output_type == ModelOutputType.CLASSIFIER_MULTI_OUTPUT_CATEGORICAL) + return (ModelOutputType.CLASSIFIER in output_type + and not ModelOutputType.BINARY in output_type + and not ModelOutputType.PROBABILITIES in output_type + and not ModelOutputType.LOGITS in output_type) def is_probabilities(output_type: ModelOutputType) -> bool: - return (output_type == ModelOutputType.CLASSIFIER_SINGLE_OUTPUT_CLASS_PROBABILITIES - or output_type == ModelOutputType.CLASSIFIER_MULTI_OUTPUT_CLASS_PROBABILITIES) + return ModelOutputType.PROBABILITIES in output_type def is_logits(output_type: ModelOutputType) -> bool: - return (output_type == ModelOutputType.CLASSIFIER_SINGLE_OUTPUT_CLASS_LOGITS - or output_type == ModelOutputType.CLASSIFIER_MULTI_OUTPUT_CLASS_LOGITS - or output_type == ModelOutputType.CLASSIFIER_SINGLE_OUTPUT_BINARY_LOGITS - or output_type == ModelOutputType.CLASSIFIER_MULTI_OUTPUT_BINARY_LOGITS) + return ModelOutputType.LOGITS in output_type + + +def is_logits_or_probabilities(output_type: ModelOutputType) -> bool: + return is_probabilities(output_type) or is_logits(output_type) def get_nb_classes(y: OUTPUT_DATA_ARRAY_TYPE, output_type: ModelOutputType) -> int: @@ -115,8 +132,7 @@ def check_correct_model_output(y: OUTPUT_DATA_ARRAY_TYPE, output_type: ModelOutp :type output_type: ModelOutputType :raises: ValueError (in case of mismatch) """ - if not is_one_hot(y) and (output_type == ModelOutputType.CLASSIFIER_SINGLE_OUTPUT_CLASS_PROBABILITIES - or output_type == ModelOutputType.CLASSIFIER_SINGLE_OUTPUT_CLASS_LOGITS): + if not is_one_hot(y) and not is_multi_label(output_type) and is_categorical(output_type): raise ValueError("Incompatible model output types. Model outputs 1D array of categorical scalars while " "output type is set to ", output_type) @@ -208,15 +224,13 @@ class Model(metaclass=ABCMeta): y = array2numpy(test_data.get_labels()) if scoring_method == ScoringMethod.ACCURACY: - if not is_multi_label(self.output_type) and not is_binary(self.output_type) and nb_classes is not None: - y = check_and_transform_label_format(y, nb_classes=nb_classes) - if (self.output_type == ModelOutputType.CLASSIFIER_SINGLE_OUTPUT_CLASS_PROBABILITIES - or self.output_type == ModelOutputType.CLASSIFIER_SINGLE_OUTPUT_CLASS_LOGITS - or self.output_type == ModelOutputType.CLASSIFIER_SINGLE_OUTPUT_CATEGORICAL): + if not is_multi_label(self.output_type) and not is_binary(self.output_type): + if nb_classes is not None: + y = check_and_transform_label_format(y, nb_classes=nb_classes) # categorical has been 1-hot encoded by check_and_transform_label_format return np.count_nonzero(np.argmax(y, axis=1) == np.argmax(predicted, axis=1)) / predicted.shape[0] - elif (self.output_type == ModelOutputType.CLASSIFIER_MULTI_OUTPUT_CLASS_LOGITS - or self.output_type == ModelOutputType.CLASSIFIER_SINGLE_OUTPUT_CLASS_PROBABILITIES): + elif (is_multi_label(self.output_type) and not is_binary(self.output_type) + and is_logits_or_probabilities(self.output_type)): if predicted.shape != y.shape: raise ValueError('Do not know how to compare arrays with different shapes') elif len(predicted.shape) < 3: @@ -228,7 +242,7 @@ class Model(metaclass=ABCMeta): count += np.count_nonzero(np.argmax(y[:, i], axis=1) == np.argmax(predicted[:, i], axis=1)) sum += predicted.shape[0] * predicted.shape[-1] return count / sum - elif self.output_type == ModelOutputType.CLASSIFIER_MULTI_OUTPUT_CATEGORICAL: + elif is_multi_label(self.output_type) and is_categorical(self.output_type): return np.count_nonzero(y == predicted) / (predicted.shape[0] * y.shape[1]) elif is_binary(self.output_type): if is_logits(self.output_type): @@ -392,7 +406,6 @@ class BlackboxClassifier(Model): :return: Predictions from the model as numpy array. """ predictions = self._art_model.predict(x.get_samples()) - # check_correct_model_output(predictions, self.output_type) return predictions @abstractmethod diff --git a/apt/utils/models/pytorch_model.py b/apt/utils/models/pytorch_model.py index c91df72..0257a5b 100644 --- a/apt/utils/models/pytorch_model.py +++ b/apt/utils/models/pytorch_model.py @@ -10,7 +10,7 @@ from torch.utils.data import DataLoader, TensorDataset from art.utils import check_and_transform_label_format from apt.utils.datasets.datasets import PytorchData, DatasetWithPredictions, ArrayDataset -from apt.utils.models import Model, ModelOutputType, is_multi_label, is_multi_label_binary +from apt.utils.models import Model, ModelOutputType, is_multi_label, is_multi_label_binary, is_binary from apt.utils.datasets import OUTPUT_DATA_ARRAY_TYPE, array2numpy from art.estimators.classification.pytorch import PyTorchClassifier as ArtPyTorchClassifier @@ -56,8 +56,7 @@ class PyTorchClassifierWrapper(ArtPyTorchClassifier): super().__init__(model, loss, input_shape, nb_classes, optimizer, use_amp, opt_level, loss_scale, channels_first, clip_values, preprocessing_defences, postprocessing_defences, preprocessing, device_type) - self._is_single_binary = (output_type == ModelOutputType.CLASSIFIER_SINGLE_OUTPUT_BINARY_PROBABILITIES - or output_type == ModelOutputType.CLASSIFIER_SINGLE_OUTPUT_BINARY_LOGITS) + self._is_single_binary = not is_multi_label(output_type) and is_binary(output_type) self._is_multi_label = is_multi_label(output_type) self._is_multi_label_binary = is_multi_label_binary(output_type) diff --git a/apt/utils/models/sklearn_model.py b/apt/utils/models/sklearn_model.py index 8f09887..2515b26 100644 --- a/apt/utils/models/sklearn_model.py +++ b/apt/utils/models/sklearn_model.py @@ -93,7 +93,7 @@ class SklearnRegressor(SklearnModel): """ def __init__(self, model: BaseEstimator, black_box_access: Optional[bool] = True, unlimited_queries: Optional[bool] = True, **kwargs): - super().__init__(model, ModelOutputType.REGRESSOR_SCALAR, black_box_access, unlimited_queries, **kwargs) + super().__init__(model, ModelOutputType.REGRESSION, black_box_access, unlimited_queries, **kwargs) self._art_model = ScikitlearnRegressor(model) def fit(self, train_data: Dataset, **kwargs) -> None: diff --git a/tests/test_anonymizer.py b/tests/test_anonymizer.py index dd753a9..884a9b6 100644 --- a/tests/test_anonymizer.py +++ b/tests/test_anonymizer.py @@ -13,7 +13,7 @@ from torch.nn import functional from scipy.special import expit from apt.utils.datasets.datasets import PytorchData -from apt.utils.models import ModelOutputType +from apt.utils.models import CLASSIFIER_MULTI_OUTPUT_BINARY_LOGITS from apt.utils.models.pytorch_model import PyTorchClassifier from apt.anonymization import Anonymize from apt.utils.dataset_utils import get_iris_dataset_np, get_adult_dataset_pd, get_nursery_dataset_pd @@ -237,7 +237,7 @@ def test_anonymize_pytorch_multi_label_binary(): optimizer = optim.RMSprop(model.parameters(), lr=0.01) art_model = PyTorchClassifier(model=model, - output_type=ModelOutputType.CLASSIFIER_MULTI_OUTPUT_BINARY_LOGITS, + output_type=CLASSIFIER_MULTI_OUTPUT_BINARY_LOGITS, loss=criterion, optimizer=optimizer, input_shape=(24,), diff --git a/tests/test_minimizer.py b/tests/test_minimizer.py index c9e3d62..be1294b 100644 --- a/tests/test_minimizer.py +++ b/tests/test_minimizer.py @@ -24,8 +24,9 @@ from apt.utils.models.pytorch_model import PyTorchClassifier from apt.minimization import GeneralizeToRepresentative from apt.utils.dataset_utils import get_iris_dataset_np, get_adult_dataset_pd, get_german_credit_dataset_pd from apt.utils.datasets import ArrayDataset -from apt.utils.models import SklearnClassifier, ModelOutputType, SklearnRegressor, KerasClassifier - +from apt.utils.models import SklearnClassifier, SklearnRegressor, KerasClassifier, \ + CLASSIFIER_SINGLE_OUTPUT_CLASS_PROBABILITIES, CLASSIFIER_SINGLE_OUTPUT_CATEGORICAL, \ + CLASSIFIER_SINGLE_OUTPUT_CLASS_LOGITS, CLASSIFIER_MULTI_OUTPUT_BINARY_LOGITS tf.compat.v1.disable_eager_execution() @@ -219,7 +220,7 @@ def test_minimizer_params(cells): base_est = DecisionTreeClassifier(random_state=0, min_samples_split=2, min_samples_leaf=1) - model = SklearnClassifier(base_est, ModelOutputType.CLASSIFIER_SINGLE_OUTPUT_CLASS_PROBABILITIES) + model = SklearnClassifier(base_est, CLASSIFIER_SINGLE_OUTPUT_CLASS_PROBABILITIES) model.fit(ArrayDataset(x, y)) expected_generalizations = {'categories': {}, 'category_representatives': {}, @@ -261,7 +262,7 @@ def test_minimizer_params_not_transform(cells): samples = ArrayDataset(x, y, features) base_est = DecisionTreeClassifier(random_state=0, min_samples_split=2, min_samples_leaf=1) - model = SklearnClassifier(base_est, ModelOutputType.CLASSIFIER_SINGLE_OUTPUT_CLASS_PROBABILITIES) + model = SklearnClassifier(base_est, CLASSIFIER_SINGLE_OUTPUT_CLASS_PROBABILITIES) model.fit(ArrayDataset(x, y)) gen = GeneralizeToRepresentative(model, cells=cells, generalize_using_transform=False) @@ -273,7 +274,7 @@ def test_minimizer_fit(data_two_features): x, y, features, _ = data_two_features base_est = DecisionTreeClassifier(random_state=0, min_samples_split=2, min_samples_leaf=1) - model = SklearnClassifier(base_est, ModelOutputType.CLASSIFIER_SINGLE_OUTPUT_CLASS_PROBABILITIES) + model = SklearnClassifier(base_est, CLASSIFIER_SINGLE_OUTPUT_CLASS_PROBABILITIES) model.fit(ArrayDataset(x, y)) ad = ArrayDataset(x) predictions = model.predict(ad) @@ -303,7 +304,7 @@ def test_minimizer_ncp(data_two_features): base_est = DecisionTreeClassifier(random_state=0, min_samples_split=2, min_samples_leaf=1) - model = SklearnClassifier(base_est, ModelOutputType.CLASSIFIER_SINGLE_OUTPUT_CLASS_PROBABILITIES) + model = SklearnClassifier(base_est, CLASSIFIER_SINGLE_OUTPUT_CLASS_PROBABILITIES) model.fit(ArrayDataset(x, y)) ad = ArrayDataset(x) ad1 = ArrayDataset(x1, features_names=features) @@ -346,7 +347,7 @@ def test_minimizer_ncp_categorical(data_four_features): base_est = DecisionTreeClassifier(random_state=0, min_samples_split=2, min_samples_leaf=1) - model = SklearnClassifier(base_est, ModelOutputType.CLASSIFIER_SINGLE_OUTPUT_CLASS_PROBABILITIES) + model = SklearnClassifier(base_est, CLASSIFIER_SINGLE_OUTPUT_CLASS_PROBABILITIES) model.fit(ArrayDataset(encoded, y)) ad = ArrayDataset(x) ad1 = ArrayDataset(x1) @@ -386,7 +387,7 @@ def test_minimizer_fit_not_transform(data_two_features): x, y, features, x1 = data_two_features base_est = DecisionTreeClassifier(random_state=0, min_samples_split=2, min_samples_leaf=1) - model = SklearnClassifier(base_est, ModelOutputType.CLASSIFIER_SINGLE_OUTPUT_CLASS_PROBABILITIES) + model = SklearnClassifier(base_est, CLASSIFIER_SINGLE_OUTPUT_CLASS_PROBABILITIES) model.fit(ArrayDataset(x, y)) ad = ArrayDataset(x) predictions = model.predict(ad) @@ -416,7 +417,7 @@ def test_minimizer_fit_pandas(data_four_features): base_est = DecisionTreeClassifier(random_state=0, min_samples_split=2, min_samples_leaf=1) - model = SklearnClassifier(base_est, ModelOutputType.CLASSIFIER_SINGLE_OUTPUT_CLASS_PROBABILITIES) + model = SklearnClassifier(base_est, CLASSIFIER_SINGLE_OUTPUT_CLASS_PROBABILITIES) model.fit(ArrayDataset(encoded, y)) predictions = model.predict(ArrayDataset(encoded)) if predictions.shape[1] > 1: @@ -454,7 +455,7 @@ def test_minimizer_params_categorical(cells_categorical): preprocessor, encoded = create_encoder(numeric_features, categorical_features, x) base_est = DecisionTreeClassifier(random_state=0, min_samples_split=2, min_samples_leaf=1) - model = SklearnClassifier(base_est, ModelOutputType.CLASSIFIER_SINGLE_OUTPUT_CLASS_PROBABILITIES) + model = SklearnClassifier(base_est, CLASSIFIER_SINGLE_OUTPUT_CLASS_PROBABILITIES) model.fit(ArrayDataset(encoded, y)) predictions = model.predict(ArrayDataset(encoded)) if predictions.shape[1] > 1: @@ -478,7 +479,7 @@ def test_minimizer_fit_qi(data_three_features): qi = ['age', 'weight'] base_est = DecisionTreeClassifier(random_state=0, min_samples_split=2, min_samples_leaf=1) - model = SklearnClassifier(base_est, ModelOutputType.CLASSIFIER_SINGLE_OUTPUT_CLASS_PROBABILITIES) + model = SklearnClassifier(base_est, CLASSIFIER_SINGLE_OUTPUT_CLASS_PROBABILITIES) model.fit(ArrayDataset(x, y)) ad = ArrayDataset(x) predictions = model.predict(ad) @@ -512,7 +513,7 @@ def test_minimizer_fit_pandas_qi(data_five_features): base_est = DecisionTreeClassifier(random_state=0, min_samples_split=2, min_samples_leaf=1) - model = SklearnClassifier(base_est, ModelOutputType.CLASSIFIER_SINGLE_OUTPUT_CLASS_PROBABILITIES) + model = SklearnClassifier(base_est, CLASSIFIER_SINGLE_OUTPUT_CLASS_PROBABILITIES) model.fit(ArrayDataset(encoded, y)) predictions = model.predict(ArrayDataset(encoded)) if predictions.shape[1] > 1: @@ -547,7 +548,7 @@ def test_minimize_ndarray_iris(): qi = ['sepal length (cm)', 'petal length (cm)'] base_est = DecisionTreeClassifier(random_state=0, min_samples_split=2, min_samples_leaf=1) - model = SklearnClassifier(base_est, ModelOutputType.CLASSIFIER_SINGLE_OUTPUT_CATEGORICAL) + model = SklearnClassifier(base_est, CLASSIFIER_SINGLE_OUTPUT_CATEGORICAL) model.fit(ArrayDataset(x_train, y_train)) predictions = model.predict(ArrayDataset(x_train)) if predictions.shape[1] > 1: @@ -590,7 +591,7 @@ def test_minimize_pandas_adult(): base_est = DecisionTreeClassifier(random_state=0, min_samples_split=2, min_samples_leaf=1) - model = SklearnClassifier(base_est, ModelOutputType.CLASSIFIER_SINGLE_OUTPUT_CLASS_PROBABILITIES) + model = SklearnClassifier(base_est, CLASSIFIER_SINGLE_OUTPUT_CLASS_PROBABILITIES) model.fit(ArrayDataset(encoded, y_train)) predictions = model.predict(ArrayDataset(encoded)) if predictions.shape[1] > 1: @@ -646,7 +647,7 @@ def test_german_credit_pandas(): base_est = DecisionTreeClassifier(random_state=0, min_samples_split=2, min_samples_leaf=1) - model = SklearnClassifier(base_est, ModelOutputType.CLASSIFIER_SINGLE_OUTPUT_CLASS_PROBABILITIES) + model = SklearnClassifier(base_est, CLASSIFIER_SINGLE_OUTPUT_CLASS_PROBABILITIES) model.fit(ArrayDataset(encoded, y_train)) predictions = model.predict(ArrayDataset(encoded)) if predictions.shape[1] > 1: @@ -764,7 +765,7 @@ def test_x_y(): qi = [0, 2] base_est = DecisionTreeClassifier(random_state=0, min_samples_split=2, min_samples_leaf=1) - model = SklearnClassifier(base_est, ModelOutputType.CLASSIFIER_SINGLE_OUTPUT_CLASS_PROBABILITIES) + model = SklearnClassifier(base_est, CLASSIFIER_SINGLE_OUTPUT_CLASS_PROBABILITIES) model.fit(ArrayDataset(x, y)) ad = ArrayDataset(x) predictions = model.predict(ad) @@ -804,7 +805,7 @@ def test_x_y_features_names(): qi = ['age', 'weight'] base_est = DecisionTreeClassifier(random_state=0, min_samples_split=2, min_samples_leaf=1) - model = SklearnClassifier(base_est, ModelOutputType.CLASSIFIER_SINGLE_OUTPUT_CLASS_PROBABILITIES) + model = SklearnClassifier(base_est, CLASSIFIER_SINGLE_OUTPUT_CLASS_PROBABILITIES) model.fit(ArrayDataset(x, y)) ad = ArrayDataset(x) predictions = model.predict(ad) @@ -1206,7 +1207,7 @@ def test_keras_model(): base_est.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"]) - model = KerasClassifier(base_est, ModelOutputType.CLASSIFIER_SINGLE_OUTPUT_CLASS_PROBABILITIES) + model = KerasClassifier(base_est, CLASSIFIER_SINGLE_OUTPUT_CLASS_PROBABILITIES) model.fit(ArrayDataset(x, y)) ad = ArrayDataset(x_test) predictions = model.predict(ad) @@ -1274,7 +1275,7 @@ def test_minimizer_pytorch(data_three_features): optimizer = optim.Adam(base_est.parameters(), lr=0.01) model = PyTorchClassifier(model=base_est, - output_type=ModelOutputType.CLASSIFIER_SINGLE_OUTPUT_CLASS_LOGITS, + output_type=CLASSIFIER_SINGLE_OUTPUT_CLASS_LOGITS, loss=criterion, optimizer=optimizer, input_shape=(3,), @@ -1316,7 +1317,7 @@ def test_minimizer_pytorch_iris(): optimizer = optim.Adam(base_est.parameters(), lr=0.01) model = PyTorchClassifier(model=base_est, - output_type=ModelOutputType.CLASSIFIER_SINGLE_OUTPUT_CLASS_LOGITS, + output_type=CLASSIFIER_SINGLE_OUTPUT_CLASS_LOGITS, loss=criterion, optimizer=optimizer, input_shape=(4,), @@ -1387,7 +1388,7 @@ def test_minimizer_pytorch_multi_label_binary(): optimizer = optim.RMSprop(orig_model.parameters(), lr=0.01) model = PyTorchClassifier(model=orig_model, - output_type=ModelOutputType.CLASSIFIER_MULTI_OUTPUT_BINARY_LOGITS, + output_type=CLASSIFIER_MULTI_OUTPUT_BINARY_LOGITS, loss=criterion, optimizer=optimizer, input_shape=(24,), @@ -1444,7 +1445,7 @@ def test_errors(): y = np.array([1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0]) base_est = DecisionTreeClassifier(random_state=0, min_samples_split=2, min_samples_leaf=1) - model = SklearnClassifier(base_est, ModelOutputType.CLASSIFIER_SINGLE_OUTPUT_CLASS_PROBABILITIES) + model = SklearnClassifier(base_est, CLASSIFIER_SINGLE_OUTPUT_CLASS_PROBABILITIES) model.fit(ArrayDataset(X, y)) ad = ArrayDataset(X) predictions = model.predict(ad) diff --git a/tests/test_model.py b/tests/test_model.py index de36237..0667b7a 100644 --- a/tests/test_model.py +++ b/tests/test_model.py @@ -2,7 +2,12 @@ import pytest import numpy as np from apt.utils.models import SklearnClassifier, SklearnRegressor, ModelOutputType, KerasClassifier, KerasRegressor, \ - BlackboxClassifierPredictions, BlackboxClassifierPredictFunction, is_one_hot, get_nb_classes, XGBoostClassifier + BlackboxClassifierPredictions, BlackboxClassifierPredictFunction, is_one_hot, get_nb_classes, XGBoostClassifier, \ + CLASSIFIER_SINGLE_OUTPUT_CATEGORICAL, CLASSIFIER_SINGLE_OUTPUT_BINARY_PROBABILITIES, \ + CLASSIFIER_SINGLE_OUTPUT_CLASS_PROBABILITIES, CLASSIFIER_SINGLE_OUTPUT_BINARY_LOGITS, \ + CLASSIFIER_SINGLE_OUTPUT_CLASS_LOGITS, CLASSIFIER_MULTI_OUTPUT_CATEGORICAL, \ + CLASSIFIER_MULTI_OUTPUT_BINARY_PROBABILITIES, CLASSIFIER_MULTI_OUTPUT_CLASS_PROBABILITIES, \ + CLASSIFIER_MULTI_OUTPUT_BINARY_LOGITS, CLASSIFIER_MULTI_OUTPUT_CLASS_LOGITS from apt.utils.datasets import ArrayDataset, Data, DatasetWithPredictions from apt.utils import dataset_utils @@ -24,7 +29,7 @@ tf.compat.v1.disable_eager_execution() def test_sklearn_classifier(): (x_train, y_train), (x_test, y_test) = dataset_utils.get_iris_dataset_np() underlying_model = RandomForestClassifier() - model = SklearnClassifier(underlying_model, ModelOutputType.CLASSIFIER_SINGLE_OUTPUT_CATEGORICAL) + model = SklearnClassifier(underlying_model, CLASSIFIER_SINGLE_OUTPUT_CATEGORICAL) train = ArrayDataset(x_train, y_train) test = ArrayDataset(x_test, y_test) model.fit(train) @@ -81,7 +86,7 @@ def test_keras_classifier(): underlying_model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"]) - model = KerasClassifier(underlying_model, ModelOutputType.CLASSIFIER_SINGLE_OUTPUT_CATEGORICAL) + model = KerasClassifier(underlying_model, CLASSIFIER_SINGLE_OUTPUT_CATEGORICAL) train = ArrayDataset(x_train, y_train) test = ArrayDataset(x_test, y_test) @@ -119,7 +124,7 @@ def test_xgboost_classifier(): (x_train, y_train), (x_test, y_test) = dataset_utils.get_iris_dataset_np() underlying_model = XGBClassifier() underlying_model.fit(x_train, y_train) - model = XGBoostClassifier(underlying_model, ModelOutputType.CLASSIFIER_SINGLE_OUTPUT_CATEGORICAL, + model = XGBoostClassifier(underlying_model, CLASSIFIER_SINGLE_OUTPUT_CATEGORICAL, input_shape=(4,), nb_classes=3) train = ArrayDataset(x_train, y_train) test = ArrayDataset(x_test, y_test) @@ -138,7 +143,7 @@ def test_blackbox_classifier(): train = ArrayDataset(x_train, y_train) test = ArrayDataset(x_test, y_test) data = Data(train, test) - model = BlackboxClassifierPredictions(data, ModelOutputType.CLASSIFIER_SINGLE_OUTPUT_CATEGORICAL) + model = BlackboxClassifierPredictions(data, CLASSIFIER_SINGLE_OUTPUT_CATEGORICAL) pred = model.predict(test) assert (pred.shape[0] == x_test.shape[0]) @@ -154,7 +159,7 @@ def test_blackbox_classifier_predictions(): train = DatasetWithPredictions(y_train, x_train) test = DatasetWithPredictions(y_test, x_test) data = Data(train, test) - model = BlackboxClassifierPredictions(data, ModelOutputType.CLASSIFIER_SINGLE_OUTPUT_CATEGORICAL) + model = BlackboxClassifierPredictions(data, CLASSIFIER_SINGLE_OUTPUT_CATEGORICAL) pred = model.predict(test) assert (pred.shape[0] == x_test.shape[0]) assert model.model_type is None @@ -169,7 +174,7 @@ def test_blackbox_classifier_predictions_y(): train = DatasetWithPredictions(y_train, x_train, y_train) test = DatasetWithPredictions(y_test, x_test, y_test) data = Data(train, test) - model = BlackboxClassifierPredictions(data, ModelOutputType.CLASSIFIER_SINGLE_OUTPUT_CATEGORICAL) + model = BlackboxClassifierPredictions(data, CLASSIFIER_SINGLE_OUTPUT_CATEGORICAL) pred = model.predict(test) assert (pred.shape[0] == x_test.shape[0]) @@ -189,7 +194,7 @@ def test_blackbox_classifier_predictions_multi_label_cat(): train = DatasetWithPredictions(y_train, x_train, y_train) test = DatasetWithPredictions(y_test, x_test, y_test) data = Data(train, test) - model = BlackboxClassifierPredictions(data, ModelOutputType.CLASSIFIER_MULTI_OUTPUT_CATEGORICAL) + model = BlackboxClassifierPredictions(data, CLASSIFIER_MULTI_OUTPUT_CATEGORICAL) pred = model.predict(test) assert (pred.shape[0] == x_test.shape[0]) @@ -217,7 +222,7 @@ def test_blackbox_classifier_predictions_multi_label_binary(): train = DatasetWithPredictions(pred_train, x_train, y_train) test = DatasetWithPredictions(pred_test, x_test, y_test) data = Data(train, test) - model = BlackboxClassifierPredictions(data, ModelOutputType.CLASSIFIER_MULTI_OUTPUT_BINARY_PROBABILITIES) + model = BlackboxClassifierPredictions(data, CLASSIFIER_MULTI_OUTPUT_BINARY_PROBABILITIES) pred = model.predict(test) assert (pred.shape[0] == x_test.shape[0]) @@ -243,7 +248,7 @@ def test_blackbox_classifier_no_test(): train = ArrayDataset(x_train, y_train) data = Data(train) - model = BlackboxClassifierPredictions(data, ModelOutputType.CLASSIFIER_SINGLE_OUTPUT_CATEGORICAL) + model = BlackboxClassifierPredictions(data, CLASSIFIER_SINGLE_OUTPUT_CATEGORICAL) pred = model.predict(train) assert (pred.shape[0] == x_train.shape[0]) @@ -260,7 +265,7 @@ def test_blackbox_classifier_no_train(): test = ArrayDataset(x_test, y_test) data = Data(test=test) - model = BlackboxClassifierPredictions(data, ModelOutputType.CLASSIFIER_SINGLE_OUTPUT_CATEGORICAL) + model = BlackboxClassifierPredictions(data, CLASSIFIER_SINGLE_OUTPUT_CATEGORICAL) pred = model.predict(test) assert (pred.shape[0] == x_test.shape[0]) @@ -278,7 +283,7 @@ def test_blackbox_classifier_no_test_y(): train = ArrayDataset(x_train, y_train) test = ArrayDataset(x_test) data = Data(train, test) - model = BlackboxClassifierPredictions(data, ModelOutputType.CLASSIFIER_SINGLE_OUTPUT_CATEGORICAL) + model = BlackboxClassifierPredictions(data, CLASSIFIER_SINGLE_OUTPUT_CATEGORICAL) pred = model.predict(train) assert (pred.shape[0] == x_train.shape[0]) @@ -301,7 +306,7 @@ def test_blackbox_classifier_no_train_y(): train = ArrayDataset(x_train) test = ArrayDataset(x_test, y_test) data = Data(train, test) - model = BlackboxClassifierPredictions(data, ModelOutputType.CLASSIFIER_SINGLE_OUTPUT_CATEGORICAL) + model = BlackboxClassifierPredictions(data, CLASSIFIER_SINGLE_OUTPUT_CATEGORICAL) pred = model.predict(test) assert (pred.shape[0] == x_test.shape[0]) @@ -325,7 +330,7 @@ def test_blackbox_classifier_probabilities(): train = ArrayDataset(x_train, y_train) data = Data(train) - model = BlackboxClassifierPredictions(data, ModelOutputType.CLASSIFIER_SINGLE_OUTPUT_CLASS_PROBABILITIES) + model = BlackboxClassifierPredictions(data, CLASSIFIER_SINGLE_OUTPUT_CLASS_PROBABILITIES) pred = model.predict(train) assert (pred.shape[0] == x_train.shape[0]) assert (0.0 < pred).all() @@ -345,7 +350,7 @@ def test_blackbox_classifier_multi_label_probabilities(): train = ArrayDataset(x_train, y_train) data = Data(train) - model = BlackboxClassifierPredictions(data, ModelOutputType.CLASSIFIER_MULTI_OUTPUT_CLASS_PROBABILITIES) + model = BlackboxClassifierPredictions(data, CLASSIFIER_MULTI_OUTPUT_CLASS_PROBABILITIES) pred = model.predict(train) assert (pred.shape[0] == x_train.shape[0]) assert (0.0 < pred).all() @@ -361,7 +366,7 @@ def test_blackbox_classifier_predict(): train = ArrayDataset(x_train, y_train) - model = BlackboxClassifierPredictFunction(predict, ModelOutputType.CLASSIFIER_SINGLE_OUTPUT_CLASS_PROBABILITIES, + model = BlackboxClassifierPredictFunction(predict, CLASSIFIER_SINGLE_OUTPUT_CLASS_PROBABILITIES, (4,), 3) pred = model.predict(train) assert (pred.shape[0] == x_train.shape[0]) @@ -381,7 +386,7 @@ def test_blackbox_classifier_predict_scalar(): train = ArrayDataset(x_train, y_train) - model = BlackboxClassifierPredictFunction(predict, ModelOutputType.CLASSIFIER_SINGLE_OUTPUT_CLASS_PROBABILITIES, + model = BlackboxClassifierPredictFunction(predict, CLASSIFIER_SINGLE_OUTPUT_CLASS_PROBABILITIES, (4,), 3) pred = model.predict(train) assert (pred.shape[0] == x_train.shape[0]) @@ -400,7 +405,7 @@ def test_is_one_hot(): def test_get_nb_classes(): (_, y_train), (_, y_test) = dataset_utils.get_iris_dataset_np() - output_type = ModelOutputType.CLASSIFIER_SINGLE_OUTPUT_CATEGORICAL + output_type = CLASSIFIER_SINGLE_OUTPUT_CATEGORICAL # shape: (x,) - not 1-hot nb_classes_test = get_nb_classes(y_test, output_type) nb_classes_train = get_nb_classes(y_train, output_type) diff --git a/tests/test_pytorch.py b/tests/test_pytorch.py index 2346bbc..bd546f2 100644 --- a/tests/test_pytorch.py +++ b/tests/test_pytorch.py @@ -6,7 +6,9 @@ from scipy.special import expit from art.utils import check_and_transform_label_format from apt.utils.datasets.datasets import PytorchData -from apt.utils.models import ModelOutputType +from apt.utils.models import CLASSIFIER_SINGLE_OUTPUT_CLASS_LOGITS, CLASSIFIER_SINGLE_OUTPUT_BINARY_LOGITS, \ + CLASSIFIER_SINGLE_OUTPUT_BINARY_PROBABILITIES, CLASSIFIER_MULTI_OUTPUT_CLASS_LOGITS, \ + CLASSIFIER_MULTI_OUTPUT_BINARY_LOGITS from apt.utils.models.pytorch_model import PyTorchClassifier from art.utils import load_nursery from apt.utils import dataset_utils @@ -128,7 +130,7 @@ def test_pytorch_nursery_state_dict(): optimizer = optim.Adam(inner_model.parameters(), lr=0.01) model = PyTorchClassifier(model=inner_model, - output_type=ModelOutputType.CLASSIFIER_SINGLE_OUTPUT_CLASS_LOGITS, + output_type=CLASSIFIER_SINGLE_OUTPUT_CLASS_LOGITS, loss=criterion, optimizer=optimizer, input_shape=(24,), @@ -161,7 +163,7 @@ def test_pytorch_nursery_save_entire_model(): optimizer = optim.Adam(inner_model.parameters(), lr=0.01) model = PyTorchClassifier(model=inner_model, - output_type=ModelOutputType.CLASSIFIER_SINGLE_OUTPUT_CLASS_LOGITS, + output_type=CLASSIFIER_SINGLE_OUTPUT_CLASS_LOGITS, loss=criterion, optimizer=optimizer, input_shape=(24,), @@ -201,7 +203,7 @@ def test_pytorch_predictions_single_label_binary(): criterion = nn.BCEWithLogitsLoss() optimizer = optim.Adam(inner_model.parameters(), lr=0.01) - model = PyTorchClassifier(model=inner_model, output_type=ModelOutputType.CLASSIFIER_SINGLE_OUTPUT_BINARY_LOGITS, + model = PyTorchClassifier(model=inner_model, output_type=CLASSIFIER_SINGLE_OUTPUT_BINARY_LOGITS, loss=criterion, optimizer=optimizer, input_shape=(4,), nb_classes=2) @@ -238,7 +240,7 @@ def test_pytorch_predictions_single_label_binary_prob(): optimizer = optim.Adam(inner_model.parameters(), lr=0.01) model = PyTorchClassifier(model=inner_model, - output_type=ModelOutputType.CLASSIFIER_SINGLE_OUTPUT_BINARY_PROBABILITIES, + output_type=CLASSIFIER_SINGLE_OUTPUT_BINARY_PROBABILITIES, loss=criterion, optimizer=optimizer, input_shape=(4,), nb_classes=2) @@ -307,7 +309,7 @@ def test_pytorch_predictions_multi_label_cat(): optimizer.step() model = PyTorchClassifier(model=inner_model, - output_type=ModelOutputType.CLASSIFIER_MULTI_OUTPUT_CLASS_LOGITS, + output_type=CLASSIFIER_MULTI_OUTPUT_CLASS_LOGITS, loss=criterion, optimizer=optimizer, input_shape=(24,), @@ -348,7 +350,7 @@ def test_pytorch_predictions_multi_label_binary(): optimizer = optim.RMSprop(inner_model.parameters(), lr=0.01) model = PyTorchClassifier(model=inner_model, - output_type=ModelOutputType.CLASSIFIER_MULTI_OUTPUT_BINARY_LOGITS, + output_type=CLASSIFIER_MULTI_OUTPUT_BINARY_LOGITS, loss=criterion, optimizer=optimizer, input_shape=(24,),