diff --git a/apt/minimization/minimizer.py b/apt/minimization/minimizer.py index 4b9e657..9acd1b8 100644 --- a/apt/minimization/minimizer.py +++ b/apt/minimization/minimizer.py @@ -16,7 +16,8 @@ from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor from sklearn.model_selection import train_test_split from apt.utils.datasets import ArrayDataset, DATA_PANDAS_NUMPY_TYPE -from apt.utils.models import Model, SklearnRegressor, ModelOutputType, SklearnClassifier +from apt.utils.models import Model, SklearnRegressor, SklearnClassifier, \ + CLASSIFIER_SINGLE_OUTPUT_CLASS_PROBABILITIES @dataclass @@ -93,7 +94,10 @@ class GeneralizeToRepresentative(BaseEstimator, MetaEstimatorMixin, TransformerM if is_regression: self.estimator = SklearnRegressor(estimator) else: - self.estimator = SklearnClassifier(estimator, ModelOutputType.CLASSIFIER_PROBABILITIES) + # model output type is not critical as it only affects computation of nb_classes, which is in any case + # the same currently for single and multi output probabilities. + self.estimator = SklearnClassifier(estimator, + CLASSIFIER_SINGLE_OUTPUT_CLASS_PROBABILITIES) self.target_accuracy = target_accuracy self.cells = cells self.categorical_features = [] @@ -678,7 +682,7 @@ class GeneralizeToRepresentative(BaseEstimator, MetaEstimatorMixin, TransformerM # this is a leaf # if it is a regression problem we do not use label label = self._calculate_cell_label(node) if not self.is_regression else 1 - hist = [int(i) for i in self._dt.tree_.value[node][0]] if not self.is_regression else [] + hist = self._dt.tree_.value[node] cell = {'label': label, 'hist': hist, 'ranges': {}, 'id': int(node)} return [cell] @@ -709,8 +713,11 @@ class GeneralizeToRepresentative(BaseEstimator, MetaEstimatorMixin, TransformerM return cells def _calculate_cell_label(self, node): - label_hist = self._dt.tree_.value[node][0] - return int(self._dt.classes_[np.argmax(label_hist)]) + label_hist = self._dt.tree_.value[node] + if isinstance(self._dt.classes_, list): + return [self._dt.classes_[output][class_index] + for output, class_index in enumerate(np.argmax(label_hist, axis=1))] + return [self._dt.classes_[np.argmax(label_hist[0])]] def _modify_cells(self): cells = [] @@ -807,9 +814,12 @@ class GeneralizeToRepresentative(BaseEstimator, MetaEstimatorMixin, TransformerM # else: nothing to do, stay with previous cells def _calculate_level_cell_label(self, left_cell, right_cell, new_cell): - new_cell['hist'] = [x + y for x, y in - zip(left_cell['hist'], right_cell['hist'])] if not self.is_regression else [] - new_cell['label'] = int(self._dt.classes_[np.argmax(new_cell['hist'])]) if not self.is_regression else 1 + new_cell['hist'] = left_cell['hist'] + right_cell['hist'] + if isinstance(self._dt.classes_, list): + new_cell['label'] = [self._dt.classes_[output][class_index] + for output, class_index in enumerate(np.argmax(new_cell['hist'], axis=1))] + else: + new_cell['label'] = [self._dt.classes_[np.argmax(new_cell['hist'][0])]] def _get_nodes_level(self, level): # level = distance from lowest leaf @@ -837,26 +847,28 @@ class GeneralizeToRepresentative(BaseEstimator, MetaEstimatorMixin, TransformerM # return all nodes with depth == level or leaves higher than level return [i for i, x in enumerate(node_depth) if x == depth or (x < depth and is_leaves[i])] - def _attach_cells_representatives(self, prepared_data, originalTrainFeatures, labelFeature, level_nodes): + def _attach_cells_representatives(self, prepared_data, original_train_features, label_feature, level_nodes): # prepared data include one hot encoded categorical data, # if there is no categorical data prepared data is original data nodeIds = self._find_sample_nodes(prepared_data, level_nodes) - labels_df = pd.DataFrame(labelFeature, columns=['label']) for cell in self.cells: cell['representative'] = {} # get all rows in cell indexes = [i for i, x in enumerate(nodeIds) if x == cell['id']] - original_rows = originalTrainFeatures.iloc[indexes] + original_rows = original_train_features.iloc[indexes] sample_rows = prepared_data.iloc[indexes] - sample_labels = labels_df.iloc[indexes]['label'].values.tolist() + # get rows with matching label - if self.is_regression: + if self.is_regression or (len(label_feature.shape) > 1 and label_feature.shape[1] > 1): match_samples = sample_rows match_rows = original_rows else: - indexes = [i for i, label in enumerate(sample_labels) if label == cell['label']] + labels_df = pd.DataFrame(label_feature, columns=['label']) + sample_labels = labels_df.iloc[indexes]['label'].values.tolist() + indexes = [i for i, label in enumerate(sample_labels) if label == cell['label'][0]] match_samples = sample_rows.iloc[indexes] match_rows = original_rows.iloc[indexes] + # find the "middle" of the cluster array = match_samples.values # Only works with numpy 1.9.0 and higher!!! diff --git a/apt/utils/datasets/__init__.py b/apt/utils/datasets/__init__.py index 6252130..81f7d19 100644 --- a/apt/utils/datasets/__init__.py +++ b/apt/utils/datasets/__init__.py @@ -4,4 +4,4 @@ Implementation of datasets utility components for datasets creation, load, and s """ from apt.utils.datasets.datasets import Dataset, StoredDataset, DatasetFactory, Data, ArrayDataset, \ - DatasetWithPredictions, OUTPUT_DATA_ARRAY_TYPE, DATA_PANDAS_NUMPY_TYPE + DatasetWithPredictions, array2numpy, OUTPUT_DATA_ARRAY_TYPE, DATA_PANDAS_NUMPY_TYPE diff --git a/apt/utils/datasets/datasets.py b/apt/utils/datasets/datasets.py index b3278f4..da133df 100644 --- a/apt/utils/datasets/datasets.py +++ b/apt/utils/datasets/datasets.py @@ -233,7 +233,7 @@ class ArrayDataset(Dataset): raise ValueError("The supplied features are not the same as in the data features") self.features_names = x.columns.to_list() - if self._y is not None and len(self._x) != len(self._y): + if self._y is not None and self._x.shape[0] != self._y.shape[0]: raise ValueError("Non equivalent lengths of x and y") def get_samples(self) -> OUTPUT_DATA_ARRAY_TYPE: @@ -266,6 +266,8 @@ class DatasetWithPredictions(Dataset): Dataset that is based on arrays (e.g., numpy/pandas/list...). Includes predictions from a model, and possibly also features and true labels. + :param pred: collection of model predictions + :type pred: numpy array or pandas DataFrame or list or pytorch Tensor :param x: collection of data samples :type x: numpy array or pandas DataFrame or list or pytorch Tensor :param y: collection of labels diff --git a/apt/utils/models/__init__.py b/apt/utils/models/__init__.py index ff582b4..6472c45 100644 --- a/apt/utils/models/__init__.py +++ b/apt/utils/models/__init__.py @@ -1,6 +1,11 @@ from apt.utils.models.model import Model, BlackboxClassifier, ModelOutputType, ScoringMethod, \ BlackboxClassifierPredictions, BlackboxClassifierPredictFunction, get_nb_classes, is_one_hot, \ - check_correct_model_output + check_correct_model_output, is_multi_label, is_multi_label_binary, is_logits, is_binary, \ + CLASSIFIER_SINGLE_OUTPUT_CATEGORICAL, CLASSIFIER_SINGLE_OUTPUT_BINARY_PROBABILITIES, \ + CLASSIFIER_SINGLE_OUTPUT_CLASS_PROBABILITIES, CLASSIFIER_SINGLE_OUTPUT_BINARY_LOGITS, \ + CLASSIFIER_SINGLE_OUTPUT_CLASS_LOGITS, CLASSIFIER_MULTI_OUTPUT_CATEGORICAL, \ + CLASSIFIER_MULTI_OUTPUT_BINARY_PROBABILITIES, CLASSIFIER_MULTI_OUTPUT_CLASS_PROBABILITIES, \ + CLASSIFIER_MULTI_OUTPUT_BINARY_LOGITS, CLASSIFIER_MULTI_OUTPUT_CLASS_LOGITS from apt.utils.models.sklearn_model import SklearnModel, SklearnClassifier, SklearnRegressor from apt.utils.models.keras_model import KerasClassifier, KerasRegressor from apt.utils.models.xgboost_model import XGBoostClassifier diff --git a/apt/utils/models/keras_model.py b/apt/utils/models/keras_model.py index 6f89a35..ac890cb 100644 --- a/apt/utils/models/keras_model.py +++ b/apt/utils/models/keras_model.py @@ -4,7 +4,7 @@ import numpy as np from sklearn.metrics import mean_squared_error -from apt.utils.models import Model, ModelOutputType, ScoringMethod, check_correct_model_output +from apt.utils.models import Model, ModelOutputType, ScoringMethod, is_logits from apt.utils.datasets import Dataset, OUTPUT_DATA_ARRAY_TYPE from art.utils import check_and_transform_label_format @@ -39,9 +39,7 @@ class KerasClassifier(KerasModel): def __init__(self, model: "keras.models.Model", output_type: ModelOutputType, black_box_access: Optional[bool] = True, unlimited_queries: Optional[bool] = True, **kwargs): super().__init__(model, output_type, black_box_access, unlimited_queries, **kwargs) - logits = False - if output_type == ModelOutputType.CLASSIFIER_LOGITS: - logits = True + logits = is_logits(output_type) self._art_model = ArtKerasClassifier(model, use_logits=logits) def fit(self, train_data: Dataset, **kwargs) -> None: @@ -65,7 +63,6 @@ class KerasClassifier(KerasModel): :return: Predictions from the model as numpy array (class probabilities, if supported). """ predictions = self._art_model.predict(x.get_samples(), **kwargs) - check_correct_model_output(predictions, self.output_type) return predictions def score(self, test_data: Dataset, scoring_method: Optional[ScoringMethod] = ScoringMethod.ACCURACY, **kwargs): @@ -104,7 +101,7 @@ class KerasRegressor(KerasModel): """ def __init__(self, model: "keras.models.Model", black_box_access: Optional[bool] = True, unlimited_queries: Optional[bool] = True, **kwargs): - super().__init__(model, ModelOutputType.REGRESSOR_SCALAR, black_box_access, unlimited_queries, **kwargs) + super().__init__(model, ModelOutputType.REGRESSION, black_box_access, unlimited_queries, **kwargs) self._art_model = ArtKerasRegressor(model) def fit(self, train_data: Dataset, **kwargs) -> None: diff --git a/apt/utils/models/model.py b/apt/utils/models/model.py index 157158e..8c03301 100644 --- a/apt/utils/models/model.py +++ b/apt/utils/models/model.py @@ -1,9 +1,10 @@ from abc import ABCMeta, abstractmethod from typing import Any, Optional, Callable, Tuple, Union, TYPE_CHECKING -from enum import Enum, auto +from enum import Enum, Flag, auto import numpy as np +from scipy.special import expit -from apt.utils.datasets import Dataset, Data, OUTPUT_DATA_ARRAY_TYPE +from apt.utils.datasets import Dataset, Data, array2numpy, OUTPUT_DATA_ARRAY_TYPE from art.estimators.classification import BlackBoxClassifier from art.utils import check_and_transform_label_format @@ -11,11 +12,40 @@ if TYPE_CHECKING: import torch -class ModelOutputType(Enum): - CLASSIFIER_PROBABILITIES = auto() # vector of probabilities - CLASSIFIER_LOGITS = auto() # vector of logits - CLASSIFIER_SCALAR = auto() # label only - REGRESSOR_SCALAR = auto() # value +class ModelOutputType(Flag): + CLASSIFIER = auto() + MULTI_OUTPUT = auto() + BINARY = auto() + LOGITS = auto() + PROBABILITIES = auto() + REGRESSION = auto() + + +# class labels +CLASSIFIER_SINGLE_OUTPUT_CATEGORICAL = ModelOutputType.CLASSIFIER +# single binary probability +CLASSIFIER_SINGLE_OUTPUT_BINARY_PROBABILITIES = ModelOutputType.CLASSIFIER | ModelOutputType.BINARY | \ + ModelOutputType.PROBABILITIES +# vector of class probabilities +CLASSIFIER_SINGLE_OUTPUT_CLASS_PROBABILITIES = ModelOutputType.CLASSIFIER | ModelOutputType.PROBABILITIES +# single binary logit +CLASSIFIER_SINGLE_OUTPUT_BINARY_LOGITS = ModelOutputType.CLASSIFIER | ModelOutputType.BINARY | ModelOutputType.LOGITS +# vector of logits +CLASSIFIER_SINGLE_OUTPUT_CLASS_LOGITS = ModelOutputType.CLASSIFIER | ModelOutputType.LOGITS +# vector of class labels +CLASSIFIER_MULTI_OUTPUT_CATEGORICAL = ModelOutputType.MULTI_OUTPUT | ModelOutputType.CLASSIFIER +# vector of binary probabilities, 1 per output +CLASSIFIER_MULTI_OUTPUT_BINARY_PROBABILITIES = ModelOutputType.MULTI_OUTPUT | ModelOutputType.CLASSIFIER | \ + ModelOutputType.BINARY | ModelOutputType.PROBABILITIES +# vector of class probabilities for multiple outputs +CLASSIFIER_MULTI_OUTPUT_CLASS_PROBABILITIES = ModelOutputType.MULTI_OUTPUT | ModelOutputType.CLASSIFIER | \ + ModelOutputType.PROBABILITIES +# vector of binary logits +CLASSIFIER_MULTI_OUTPUT_BINARY_LOGITS = ModelOutputType.MULTI_OUTPUT | ModelOutputType.CLASSIFIER | \ + ModelOutputType.BINARY | ModelOutputType.LOGITS +# vector of logits for multiple outputs +CLASSIFIER_MULTI_OUTPUT_CLASS_LOGITS = ModelOutputType.MULTI_OUTPUT | ModelOutputType.CLASSIFIER | \ + ModelOutputType.LOGITS class ModelType(Enum): @@ -29,16 +59,52 @@ class ScoringMethod(Enum): def is_one_hot(y: OUTPUT_DATA_ARRAY_TYPE) -> bool: - return len(y.shape) == 2 and y.shape[1] > 1 + if not isinstance(y, list): + return len(y.shape) == 2 and y.shape[1] > 1 and np.all(np.around(np.sum(y, axis=1), decimals=4) == 1) + return False -def get_nb_classes(y: OUTPUT_DATA_ARRAY_TYPE) -> int: +def is_multi_label(output_type: ModelOutputType) -> bool: + return ModelOutputType.MULTI_OUTPUT in output_type + + +def is_multi_label_binary(output_type: ModelOutputType) -> bool: + return (ModelOutputType.MULTI_OUTPUT in output_type + and ModelOutputType.BINARY in output_type) + + +def is_binary(output_type: ModelOutputType) -> bool: + return ModelOutputType.BINARY in output_type + + +def is_categorical(output_type: ModelOutputType) -> bool: + return (ModelOutputType.CLASSIFIER in output_type + and ModelOutputType.BINARY not in output_type + and ModelOutputType.PROBABILITIES not in output_type + and ModelOutputType.LOGITS not in output_type) + + +def is_probabilities(output_type: ModelOutputType) -> bool: + return ModelOutputType.PROBABILITIES in output_type + + +def is_logits(output_type: ModelOutputType) -> bool: + return ModelOutputType.LOGITS in output_type + + +def is_logits_or_probabilities(output_type: ModelOutputType) -> bool: + return is_probabilities(output_type) or is_logits(output_type) + + +def get_nb_classes(y: OUTPUT_DATA_ARRAY_TYPE, output_type: ModelOutputType) -> int: """ Get the number of classes from an array of labels :param y: The labels :type y: numpy array - :return: The number of classes as integer + :param output_type: The output type of the model, as provided by the user + :type output_type: ModelOutputType + :return: The number of classes as integer, or list of integers for multi-label """ if y is None: return 0 @@ -48,8 +114,13 @@ def get_nb_classes(y: OUTPUT_DATA_ARRAY_TYPE) -> int: if is_one_hot(y): return y.shape[1] - else: + elif is_multi_label(output_type): + # for now just return the prediction dimension - this works in most cases + return y.shape[1] + elif is_categorical(output_type): return int(np.max(y) + 1) + else: # binary + return 2 def check_correct_model_output(y: OUTPUT_DATA_ARRAY_TYPE, output_type: ModelOutputType): @@ -61,10 +132,9 @@ def check_correct_model_output(y: OUTPUT_DATA_ARRAY_TYPE, output_type: ModelOutp :type output_type: ModelOutputType :raises: ValueError (in case of mismatch) """ - if not is_one_hot(y): # 1D array - if output_type == ModelOutputType.CLASSIFIER_PROBABILITIES or output_type == ModelOutputType.CLASSIFIER_LOGITS: - raise ValueError("Incompatible model output types. Model outputs 1D array of categorical scalars while " - "output type is set to ", output_type) + if not is_one_hot(y) and not is_multi_label(output_type) and is_categorical(output_type): + raise ValueError("Incompatible model output types. Model outputs 1D array of categorical scalars while " + "output type is set to ", output_type) class Model(metaclass=ABCMeta): @@ -115,16 +185,81 @@ class Model(metaclass=ABCMeta): """ raise NotImplementedError - @abstractmethod def score(self, test_data: Dataset, **kwargs): """ Score the model using test data. :param test_data: Test data. - :type train_data: `Dataset` + :type test_data: `Dataset` + :keyword predictions: Model predictions to score. If provided, these will be used instead of calling the model's + `predict` method. + :type predictions: `DatasetWithPredictions` with the `pred` field filled. + :keyword scoring_method: The method for scoring predictions. Default is ACCURACY. + :type scoring_method: `ScoringMethod`, optional + :keyword binary_threshold: The threshold to use on binary classification probabilities to assign the positive + class. + :type binary_threshold: float, optional. Default is 0.5. + :keyword apply_non_linearity: A non-linear function to apply to the result of the 'predict' method, in case the + model outputs logits (e.g., sigmoid). + :type apply_non_linearity: Callable, should be possible to apply directly to the numpy output of the 'predict' + method, optional. + :keyword nb_classes: number of classes (for classification models). + :type nb_classes: int, optional. :return: the score as float (for classifiers, between 0 and 1) """ - raise NotImplementedError + predictions = kwargs.get('predictions') + nb_classes = kwargs.get('nb_classes') + scoring_method = kwargs.get('scoring_method', ScoringMethod.ACCURACY) + binary_threshold = kwargs.get('binary_threshold', 0.5) + apply_non_linearity = kwargs.get('apply_non_linearity', expit) + + if test_data.get_samples() is None and predictions is None: + raise ValueError('score can only be computed when test data or predictions are available') + if test_data.get_labels() is None: + raise ValueError('score can only be computed when labels are available') + if predictions: + predicted = predictions.get_predictions() + else: + predicted = self.predict(test_data) + y = array2numpy(test_data.get_labels()) + + if scoring_method == ScoringMethod.ACCURACY: + if not is_multi_label(self.output_type) and not is_binary(self.output_type): + if nb_classes is not None: + y = check_and_transform_label_format(y, nb_classes=nb_classes) + # categorical has been 1-hot encoded by check_and_transform_label_format + return np.count_nonzero(np.argmax(y, axis=1) == np.argmax(predicted, axis=1)) / predicted.shape[0] + elif (is_multi_label(self.output_type) and not is_binary(self.output_type) + and is_logits_or_probabilities(self.output_type)): + if predicted.shape != y.shape: + raise ValueError('Do not know how to compare arrays with different shapes') + elif len(predicted.shape) < 3: + raise ValueError('Do not know how to compare 2-D arrays for multi-output non-binary case') + else: + sum = 0 + count = 0 + for i in range(predicted.shape[1]): + count += np.count_nonzero(np.argmax(y[:, i], axis=1) == np.argmax(predicted[:, i], axis=1)) + sum += predicted.shape[0] * predicted.shape[-1] + return count / sum + elif is_multi_label(self.output_type) and is_categorical(self.output_type): + return np.count_nonzero(y == predicted) / (predicted.shape[0] * y.shape[1]) + elif is_binary(self.output_type): + if is_logits(self.output_type): + if apply_non_linearity: + predicted = apply_non_linearity(predicted) + else: # apply sigmoid + predicted = expit(predicted) + predicted[predicted < binary_threshold] = 0 + predicted[predicted >= binary_threshold] = 1 + if len(y.shape) > 1: + return np.count_nonzero(y == predicted) / (predicted.shape[0] * y.shape[1]) + else: + return np.count_nonzero(y == predicted.reshape(-1)) / (predicted.shape[0]) + else: + raise NotImplementedError('score method not implemented for output type: ', self.output_type) + else: + raise NotImplementedError('scoring method not implemented: ', scoring_method) @property def model(self) -> Any: @@ -167,7 +302,8 @@ class Model(metaclass=ABCMeta): class BlackboxClassifier(Model): """ - Wrapper for black-box ML classification models. + Wrapper for black-box ML classification models. This is an abstract class and must be instantiated as either + BlackboxClassifierPredictFunction or BlackboxClassifierPredictions. :param model: The training and/or test data along with the model's predictions for the data or a callable predict method. @@ -247,6 +383,13 @@ class BlackboxClassifier(Model): """ return self._optimizer + def score(self, test_data: Dataset, **kwargs): + """ + Score the model using test data. + """ + kwargs['nb_classes'] = self.nb_classes + return super().score(test_data, **kwargs) + def fit(self, train_data: Dataset, **kwargs) -> None: """ A blackbox model cannot be fit. @@ -263,28 +406,8 @@ class BlackboxClassifier(Model): :return: Predictions from the model as numpy array. """ predictions = self._art_model.predict(x.get_samples()) - check_correct_model_output(predictions, self.output_type) return predictions - def score(self, test_data: Dataset, scoring_method: Optional[ScoringMethod] = ScoringMethod.ACCURACY, **kwargs): - """ - Score the model using test data. - - :param test_data: Test data. - :type train_data: `Dataset` - :param scoring_method: The method for scoring predictions. Default is ACCURACY. - :type scoring_method: `ScoringMethod`, optional - :return: the score as float (for classifiers, between 0 and 1) - """ - if test_data.get_samples() is None or test_data.get_labels() is None: - raise ValueError('score can only be computed when test data and labels are available') - predicted = self._art_model.predict(test_data.get_samples()) - y = check_and_transform_label_format(test_data.get_labels(), nb_classes=self._nb_classes) - if scoring_method == ScoringMethod.ACCURACY: - return np.count_nonzero(np.argmax(y, axis=1) == np.argmax(predicted, axis=1)) / predicted.shape[0] - else: - raise NotImplementedError - @abstractmethod def get_predictions(self) -> Union[Callable, Tuple[OUTPUT_DATA_ARRAY_TYPE, OUTPUT_DATA_ARRAY_TYPE]]: """ @@ -325,17 +448,9 @@ class BlackboxClassifierPredictions(BlackboxClassifier): if y_test_pred is None: y_test_pred = model.get_test_labels() - if y_train_pred is not None: - check_correct_model_output(y_train_pred, self.output_type) - if y_test_pred is not None: - check_correct_model_output(y_test_pred, self.output_type) - if y_train_pred is not None and len(y_train_pred.shape) == 1: - self._nb_classes = get_nb_classes(y_train_pred) y_train_pred = check_and_transform_label_format(y_train_pred, nb_classes=self._nb_classes) if y_test_pred is not None and len(y_test_pred.shape) == 1: - if self._nb_classes is None: - self._nb_classes = get_nb_classes(y_test_pred) y_test_pred = check_and_transform_label_format(y_test_pred, nb_classes=self._nb_classes) if x_train_pred is not None and y_train_pred is not None and x_test_pred is not None and y_test_pred is not None: @@ -353,7 +468,7 @@ class BlackboxClassifierPredictions(BlackboxClassifier): else: raise NotImplementedError("Invalid data - None") - self._nb_classes = get_nb_classes(y_pred) + self._nb_classes = get_nb_classes(y_pred, self.output_type) self._input_shape = x_pred.shape[1:] self._x_pred = x_pred self._y_pred = y_pred diff --git a/apt/utils/models/pytorch_model.py b/apt/utils/models/pytorch_model.py index f234311..0257a5b 100644 --- a/apt/utils/models/pytorch_model.py +++ b/apt/utils/models/pytorch_model.py @@ -3,17 +3,22 @@ import os import shutil import logging -from typing import Optional, Tuple +from typing import Optional, Tuple, Union, List, TYPE_CHECKING import numpy as np import torch from torch.utils.data import DataLoader, TensorDataset from art.utils import check_and_transform_label_format -from apt.utils.datasets.datasets import PytorchData -from apt.utils.models import Model, ModelOutputType -from apt.utils.datasets import OUTPUT_DATA_ARRAY_TYPE +from apt.utils.datasets.datasets import PytorchData, DatasetWithPredictions, ArrayDataset +from apt.utils.models import Model, ModelOutputType, is_multi_label, is_multi_label_binary, is_binary +from apt.utils.datasets import OUTPUT_DATA_ARRAY_TYPE, array2numpy from art.estimators.classification.pytorch import PyTorchClassifier as ArtPyTorchClassifier +if TYPE_CHECKING: + from art.utils import CLIP_VALUES_TYPE, PREPROCESSING_TYPE + from art.defences.preprocessor import Preprocessor + from art.defences.postprocessor import Postprocessor + logger = logging.getLogger(__name__) @@ -30,16 +35,46 @@ class PyTorchClassifierWrapper(ArtPyTorchClassifier): Extension for Pytorch ART model """ + def __init__( + self, + model: "torch.nn.Module", + loss: "torch.nn.modules.loss._Loss", + input_shape: Tuple[int, ...], + nb_classes: int, + output_type: ModelOutputType, + optimizer: Optional["torch.optim.Optimizer"] = None, # type: ignore + use_amp: bool = False, + opt_level: str = "O1", + loss_scale: Optional[Union[float, str]] = "dynamic", + channels_first: bool = True, + clip_values: Optional["CLIP_VALUES_TYPE"] = None, + preprocessing_defences: Union["Preprocessor", List["Preprocessor"], None] = None, + postprocessing_defences: Union["Postprocessor", List["Postprocessor"], None] = None, + preprocessing: "PREPROCESSING_TYPE" = (0.0, 1.0), + device_type: str = "gpu", + ): + super().__init__(model, loss, input_shape, nb_classes, optimizer, use_amp, opt_level, loss_scale, + channels_first, clip_values, preprocessing_defences, postprocessing_defences, preprocessing, + device_type) + self._is_single_binary = not is_multi_label(output_type) and is_binary(output_type) + self._is_multi_label = is_multi_label(output_type) + self._is_multi_label_binary = is_multi_label_binary(output_type) + def get_step_correct(self, outputs, targets) -> int: """ Get number of correctly classified labels. """ + # here everything is torch tensors if len(outputs) != len(targets): raise ValueError("outputs and targets should be the same length.") - if self.nb_classes > 1: - return int(torch.sum(torch.argmax(outputs, axis=-1) == targets).item()) + if self._is_single_binary: + return int(torch.sum(torch.round(outputs) == targets).item()) + elif self._is_multi_label: + if self._is_multi_label_binary: + outputs = torch.round(outputs) + return int(torch.sum(targets == outputs).item()) else: - return int(torch.sum(torch.round(outputs, axis=-1) == targets).item()) + return int(torch.sum(torch.argmax(outputs, axis=-1) == targets).item()) def _eval(self, loader: DataLoader): """ @@ -93,6 +128,7 @@ class PyTorchClassifierWrapper(ArtPyTorchClassifier): :param kwargs: Dictionary of framework-specific arguments. This parameter is not currently supported for PyTorch and providing it takes no effect. """ + # Put the model in the training mode self._model.train() @@ -156,6 +192,61 @@ class PyTorchClassifierWrapper(ArtPyTorchClassifier): else: self.save_checkpoint_state_dict(is_best=best_acc <= val_acc, path=path) + def predict( + self, x: np.ndarray, batch_size: int = 128, training_mode: bool = False, **kwargs + ) -> np.ndarray: + """ + Perform prediction for a batch of inputs. + + :param x: Input samples. + :param batch_size: Size of batches. + :param training_mode: `True` for model set to training mode and `'False` for model set to evaluation mode. + :return: Array of predictions of shape `(nb_inputs, nb_classes)`. + """ + import torch + + # Set model mode + self._model.train(mode=training_mode) + + # Apply preprocessing + x_preprocessed, _ = self._apply_preprocessing(x, y=None, fit=False) + + results_list = [] + + # Run prediction with batch processing + num_batch = int(np.ceil(len(x_preprocessed) / float(batch_size))) + for m in range(num_batch): + # Batch indexes + begin, end = ( + m * batch_size, + min((m + 1) * batch_size, x_preprocessed.shape[0]), + ) + + with torch.no_grad(): + model_outputs = self._model(torch.from_numpy(x_preprocessed[begin:end]).to(self._device)) + output = model_outputs[-1] + + if isinstance(output, tuple): + output_list = [] + for o in output: + o = o.detach().cpu().numpy().astype(np.float32) + output_list.append(o) + output_np = np.array(output_list) + output_np = np.swapaxes(output_np, 0, 1) + results_list.append(output_np) + else: + output = output.detach().cpu().numpy().astype(np.float32) + if len(output.shape) == 1: + output = np.expand_dims(output, axis=1).astype(np.float32) + results_list.append(output) + + results = np.vstack(results_list) + + # Apply postprocessing + predictions = self._apply_postprocessing(preds=results, fit=False) + + return predictions + def save_checkpoint_state_dict(self, is_best: bool, path=os.getcwd(), filename="latest.tar") -> None: """ Saves checkpoint as latest.tar or best.tar. @@ -319,7 +410,8 @@ class PyTorchClassifier(PyTorchModel): super().__init__(model, output_type, black_box_access, unlimited_queries, **kwargs) self._loss = loss self._optimizer = optimizer - self._art_model = PyTorchClassifierWrapper(model, loss, input_shape, nb_classes, optimizer) + self._nb_classes = nb_classes + self._art_model = PyTorchClassifierWrapper(model, loss, input_shape, nb_classes, output_type, optimizer) @property def loss(self): @@ -398,7 +490,7 @@ class PyTorchClassifier(PyTorchModel): :type x: `np.ndarray` or `pandas.DataFrame` :return: Predictions from the model (class probabilities, if supported). """ - return self._art_model.predict(x.get_samples(), **kwargs) + return array2numpy(self._art_model.predict(x.get_samples(), **kwargs)) def score(self, test_data: PytorchData, **kwargs): """ @@ -406,18 +498,20 @@ class PyTorchClassifier(PyTorchModel): :param test_data: Test data. :type test_data: `PytorchData` + :param binary_threshold: The threshold to use on binary classification probabilities to assign the positive + class. + :type binary_threshold: float, optional. Default is 0.5. + :param apply_non_linearity: A non-linear function to apply to the result of the 'predict' method, in case the + model outputs logits (e.g., sigmoid). + :type apply_non_linearity: Callable, should be possible to apply directly to the numpy output of the 'predict' + method, optional. :return: the score as float (between 0 and 1) """ - y = test_data.get_labels() + # numpy arrays predicted = self.predict(test_data) - # binary classification, single column of probabilities - if self._art_model.nb_classes == 2 and (len(predicted.shape) == 1 or predicted.shape[1] == 1): - if len(predicted.shape) > 1: - y = check_and_transform_label_format(y, self._art_model.nb_classes, return_one_hot=False) - return np.count_nonzero(y == (predicted > 0.5)) / predicted.shape[0] - else: - y = check_and_transform_label_format(y, self._art_model.nb_classes) - return np.count_nonzero(np.argmax(y, axis=1) == np.argmax(predicted, axis=1)) / predicted.shape[0] + kwargs['predictions'] = DatasetWithPredictions(pred=predicted) + kwargs['nb_classes'] = self._nb_classes + return super().score(ArrayDataset(test_data.get_samples(), test_data.get_labels()), **kwargs) def load_checkpoint_state_dict_by_path(self, model_name: str, path: str = None): """ diff --git a/apt/utils/models/sklearn_model.py b/apt/utils/models/sklearn_model.py index 6f40c65..2515b26 100644 --- a/apt/utils/models/sklearn_model.py +++ b/apt/utils/models/sklearn_model.py @@ -2,8 +2,8 @@ from typing import Optional from sklearn.base import BaseEstimator -from apt.utils.models import Model, ModelOutputType, get_nb_classes, check_correct_model_output -from apt.utils.datasets import Dataset, OUTPUT_DATA_ARRAY_TYPE +from apt.utils.models import Model, ModelOutputType, get_nb_classes +from apt.utils.datasets import Dataset, ArrayDataset, OUTPUT_DATA_ARRAY_TYPE from art.estimators.classification.scikitlearn import SklearnClassifier as ArtSklearnClassifier from art.estimators.regression.scikitlearn import ScikitlearnRegressor @@ -48,7 +48,7 @@ class SklearnClassifier(SklearnModel): super().__init__(model, output_type, black_box_access, unlimited_queries, **kwargs) self._art_model = ArtSklearnClassifier(model, preprocessing=None) - def fit(self, train_data: Dataset, **kwargs) -> None: + def fit(self, train_data: ArrayDataset, **kwargs) -> None: """ Fit the model using the training data. @@ -58,11 +58,11 @@ class SklearnClassifier(SklearnModel): :return: None """ y = train_data.get_labels() - self.nb_classes = get_nb_classes(y) + self.nb_classes = get_nb_classes(y, self.output_type) y_encoded = check_and_transform_label_format(y, nb_classes=self.nb_classes) self._art_model.fit(train_data.get_samples(), y_encoded, **kwargs) - def predict(self, x: Dataset, **kwargs) -> OUTPUT_DATA_ARRAY_TYPE: + def predict(self, x: ArrayDataset, **kwargs) -> OUTPUT_DATA_ARRAY_TYPE: """ Perform predictions using the model for input `x`. @@ -71,7 +71,7 @@ class SklearnClassifier(SklearnModel): :return: Predictions from the model as numpy array (class probabilities, if supported). """ predictions = self._art_model.predict(x.get_samples(), **kwargs) - check_correct_model_output(predictions, self.output_type) + # check_correct_model_output(predictions, self.output_type) return predictions @@ -93,7 +93,7 @@ class SklearnRegressor(SklearnModel): """ def __init__(self, model: BaseEstimator, black_box_access: Optional[bool] = True, unlimited_queries: Optional[bool] = True, **kwargs): - super().__init__(model, ModelOutputType.REGRESSOR_SCALAR, black_box_access, unlimited_queries, **kwargs) + super().__init__(model, ModelOutputType.REGRESSION, black_box_access, unlimited_queries, **kwargs) self._art_model = ScikitlearnRegressor(model) def fit(self, train_data: Dataset, **kwargs) -> None: diff --git a/apt/utils/models/xgboost_model.py b/apt/utils/models/xgboost_model.py index 85f9a89..7c43e4f 100644 --- a/apt/utils/models/xgboost_model.py +++ b/apt/utils/models/xgboost_model.py @@ -1,6 +1,6 @@ from typing import Optional, Tuple -from apt.utils.models import Model, ModelOutputType, ScoringMethod, check_correct_model_output, is_one_hot +from apt.utils.models import Model, ModelOutputType, ScoringMethod, is_one_hot from apt.utils.datasets import Dataset, OUTPUT_DATA_ARRAY_TYPE import numpy as np @@ -63,7 +63,7 @@ class XGBoostClassifier(XGBoostModel): :return: Predictions from the model as numpy array (class probabilities, if supported). """ predictions = self._art_model.predict(x.get_samples(), **kwargs) - check_correct_model_output(predictions, self.output_type) + # check_correct_model_output(predictions, self.output_type) return predictions def score(self, test_data: Dataset, scoring_method: Optional[ScoringMethod] = ScoringMethod.ACCURACY, **kwargs): diff --git a/tests/test_anonymizer.py b/tests/test_anonymizer.py index d0a518d..884a9b6 100644 --- a/tests/test_anonymizer.py +++ b/tests/test_anonymizer.py @@ -6,11 +6,17 @@ from sklearn.impute import SimpleImputer from sklearn.pipeline import Pipeline from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor from sklearn.preprocessing import OneHotEncoder - -from apt.anonymization import Anonymize -from apt.utils.dataset_utils import get_iris_dataset_np, get_adult_dataset_pd, get_nursery_dataset_pd from sklearn.datasets import load_diabetes from sklearn.model_selection import train_test_split +from torch import nn, optim, sigmoid, where +from torch.nn import functional +from scipy.special import expit + +from apt.utils.datasets.datasets import PytorchData +from apt.utils.models import CLASSIFIER_MULTI_OUTPUT_BINARY_LOGITS +from apt.utils.models.pytorch_model import PyTorchClassifier +from apt.anonymization import Anonymize +from apt.utils.dataset_utils import get_iris_dataset_np, get_adult_dataset_pd, get_nursery_dataset_pd from apt.utils.datasets import ArrayDataset @@ -187,6 +193,72 @@ def test_anonymize_pandas_one_hot(): assert ((np.min(anonymized_slice, axis=1) == 0).all()) +def test_anonymize_pytorch_multi_label_binary(): + class multi_label_binary_model(nn.Module): + def __init__(self, num_labels, num_features): + super(multi_label_binary_model, self).__init__() + + self.fc1 = nn.Sequential( + nn.Linear(num_features, 256), + nn.Tanh(), ) + + self.classifier1 = nn.Linear(256, num_labels) + + def forward(self, x): + return self.classifier1(self.fc1(x)) + # missing sigmoid on each output + + class FocalLoss(nn.Module): + def __init__(self, gamma=2, alpha=0.5): + super(FocalLoss, self).__init__() + self.gamma = gamma + self.alpha = alpha + + def forward(self, input, target): + bce_loss = functional.binary_cross_entropy_with_logits(input, target, reduction='none') + + p = sigmoid(input) + p = where(target >= 0.5, p, 1 - p) + + modulating_factor = (1 - p) ** self.gamma + alpha = self.alpha * target + (1 - self.alpha) * (1 - target) + focal_loss = alpha * modulating_factor * bce_loss + + return focal_loss.mean() + + (x_train, y_train), _ = get_iris_dataset_np() + + # make multi-label binary + y_train = np.column_stack((y_train, y_train, y_train)) + y_train[y_train > 1] = 1 + + model = multi_label_binary_model(3, 4) + criterion = FocalLoss() + optimizer = optim.RMSprop(model.parameters(), lr=0.01) + + art_model = PyTorchClassifier(model=model, + output_type=CLASSIFIER_MULTI_OUTPUT_BINARY_LOGITS, + loss=criterion, + optimizer=optimizer, + input_shape=(24,), + nb_classes=3) + art_model.fit(PytorchData(x_train.astype(np.float32), y_train.astype(np.float32)), save_entire_model=False, + nb_epochs=10) + pred = art_model.predict(PytorchData(x_train.astype(np.float32), y_train.astype(np.float32))) + pred = expit(pred) + pred[pred < 0.5] = 0 + pred[pred >= 0.5] = 1 + + k = 10 + QI = [0, 2] + anonymizer = Anonymize(k, QI, train_only_QI=True) + anon = anonymizer.anonymize(ArrayDataset(x_train, pred)) + assert (len(np.unique(anon[:, QI], axis=0)) < len(np.unique(x_train[:, QI], axis=0))) + _, counts_elements = np.unique(anon[:, QI], return_counts=True) + assert (np.min(counts_elements) >= k) + assert ((np.delete(anon, QI, axis=1) == np.delete(x_train, QI, axis=1)).all()) + + def test_errors(): with pytest.raises(ValueError): Anonymize(1, [0, 2]) diff --git a/tests/test_minimizer.py b/tests/test_minimizer.py index 9d3240b..be1294b 100644 --- a/tests/test_minimizer.py +++ b/tests/test_minimizer.py @@ -4,25 +4,29 @@ import pandas as pd import scipy from sklearn.compose import ColumnTransformer - from sklearn.datasets import load_diabetes from sklearn.impute import SimpleImputer from sklearn.model_selection import train_test_split from sklearn.pipeline import Pipeline from sklearn.preprocessing import OneHotEncoder +from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor -from torch import nn, optim +from torch import nn, optim, sigmoid, where +from torch.nn import functional +from scipy.special import expit import tensorflow as tf from tensorflow.keras.models import Sequential from tensorflow.keras.layers import Dense, Input +from apt.utils.datasets.datasets import PytorchData +from apt.utils.models.pytorch_model import PyTorchClassifier from apt.minimization import GeneralizeToRepresentative -from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor from apt.utils.dataset_utils import get_iris_dataset_np, get_adult_dataset_pd, get_german_credit_dataset_pd from apt.utils.datasets import ArrayDataset -from apt.utils.models import SklearnClassifier, ModelOutputType, SklearnRegressor, KerasClassifier - +from apt.utils.models import SklearnClassifier, SklearnRegressor, KerasClassifier, \ + CLASSIFIER_SINGLE_OUTPUT_CLASS_PROBABILITIES, CLASSIFIER_SINGLE_OUTPUT_CATEGORICAL, \ + CLASSIFIER_SINGLE_OUTPUT_CLASS_LOGITS, CLASSIFIER_MULTI_OUTPUT_BINARY_LOGITS tf.compat.v1.disable_eager_execution() @@ -216,7 +220,7 @@ def test_minimizer_params(cells): base_est = DecisionTreeClassifier(random_state=0, min_samples_split=2, min_samples_leaf=1) - model = SklearnClassifier(base_est, ModelOutputType.CLASSIFIER_PROBABILITIES) + model = SklearnClassifier(base_est, CLASSIFIER_SINGLE_OUTPUT_CLASS_PROBABILITIES) model.fit(ArrayDataset(x, y)) expected_generalizations = {'categories': {}, 'category_representatives': {}, @@ -258,7 +262,7 @@ def test_minimizer_params_not_transform(cells): samples = ArrayDataset(x, y, features) base_est = DecisionTreeClassifier(random_state=0, min_samples_split=2, min_samples_leaf=1) - model = SklearnClassifier(base_est, ModelOutputType.CLASSIFIER_PROBABILITIES) + model = SklearnClassifier(base_est, CLASSIFIER_SINGLE_OUTPUT_CLASS_PROBABILITIES) model.fit(ArrayDataset(x, y)) gen = GeneralizeToRepresentative(model, cells=cells, generalize_using_transform=False) @@ -270,7 +274,7 @@ def test_minimizer_fit(data_two_features): x, y, features, _ = data_two_features base_est = DecisionTreeClassifier(random_state=0, min_samples_split=2, min_samples_leaf=1) - model = SklearnClassifier(base_est, ModelOutputType.CLASSIFIER_PROBABILITIES) + model = SklearnClassifier(base_est, CLASSIFIER_SINGLE_OUTPUT_CLASS_PROBABILITIES) model.fit(ArrayDataset(x, y)) ad = ArrayDataset(x) predictions = model.predict(ad) @@ -287,6 +291,7 @@ def test_minimizer_fit(data_two_features): compare_generalizations(gener, expected_generalizations) check_features(features, expected_generalizations, transformed, x) + assert (np.equal(x, transformed).all()) ncp = gen.ncp.transform_score check_ncp(ncp, expected_generalizations) @@ -299,7 +304,7 @@ def test_minimizer_ncp(data_two_features): base_est = DecisionTreeClassifier(random_state=0, min_samples_split=2, min_samples_leaf=1) - model = SklearnClassifier(base_est, ModelOutputType.CLASSIFIER_PROBABILITIES) + model = SklearnClassifier(base_est, CLASSIFIER_SINGLE_OUTPUT_CLASS_PROBABILITIES) model.fit(ArrayDataset(x, y)) ad = ArrayDataset(x) ad1 = ArrayDataset(x1, features_names=features) @@ -342,7 +347,7 @@ def test_minimizer_ncp_categorical(data_four_features): base_est = DecisionTreeClassifier(random_state=0, min_samples_split=2, min_samples_leaf=1) - model = SklearnClassifier(base_est, ModelOutputType.CLASSIFIER_PROBABILITIES) + model = SklearnClassifier(base_est, CLASSIFIER_SINGLE_OUTPUT_CLASS_PROBABILITIES) model.fit(ArrayDataset(encoded, y)) ad = ArrayDataset(x) ad1 = ArrayDataset(x1) @@ -382,7 +387,7 @@ def test_minimizer_fit_not_transform(data_two_features): x, y, features, x1 = data_two_features base_est = DecisionTreeClassifier(random_state=0, min_samples_split=2, min_samples_leaf=1) - model = SklearnClassifier(base_est, ModelOutputType.CLASSIFIER_PROBABILITIES) + model = SklearnClassifier(base_est, CLASSIFIER_SINGLE_OUTPUT_CLASS_PROBABILITIES) model.fit(ArrayDataset(x, y)) ad = ArrayDataset(x) predictions = model.predict(ad) @@ -412,7 +417,7 @@ def test_minimizer_fit_pandas(data_four_features): base_est = DecisionTreeClassifier(random_state=0, min_samples_split=2, min_samples_leaf=1) - model = SklearnClassifier(base_est, ModelOutputType.CLASSIFIER_PROBABILITIES) + model = SklearnClassifier(base_est, CLASSIFIER_SINGLE_OUTPUT_CLASS_PROBABILITIES) model.fit(ArrayDataset(encoded, y)) predictions = model.predict(ArrayDataset(encoded)) if predictions.shape[1] > 1: @@ -450,7 +455,7 @@ def test_minimizer_params_categorical(cells_categorical): preprocessor, encoded = create_encoder(numeric_features, categorical_features, x) base_est = DecisionTreeClassifier(random_state=0, min_samples_split=2, min_samples_leaf=1) - model = SklearnClassifier(base_est, ModelOutputType.CLASSIFIER_PROBABILITIES) + model = SklearnClassifier(base_est, CLASSIFIER_SINGLE_OUTPUT_CLASS_PROBABILITIES) model.fit(ArrayDataset(encoded, y)) predictions = model.predict(ArrayDataset(encoded)) if predictions.shape[1] > 1: @@ -474,7 +479,7 @@ def test_minimizer_fit_qi(data_three_features): qi = ['age', 'weight'] base_est = DecisionTreeClassifier(random_state=0, min_samples_split=2, min_samples_leaf=1) - model = SklearnClassifier(base_est, ModelOutputType.CLASSIFIER_PROBABILITIES) + model = SklearnClassifier(base_est, CLASSIFIER_SINGLE_OUTPUT_CLASS_PROBABILITIES) model.fit(ArrayDataset(x, y)) ad = ArrayDataset(x) predictions = model.predict(ad) @@ -508,7 +513,7 @@ def test_minimizer_fit_pandas_qi(data_five_features): base_est = DecisionTreeClassifier(random_state=0, min_samples_split=2, min_samples_leaf=1) - model = SklearnClassifier(base_est, ModelOutputType.CLASSIFIER_PROBABILITIES) + model = SklearnClassifier(base_est, CLASSIFIER_SINGLE_OUTPUT_CLASS_PROBABILITIES) model.fit(ArrayDataset(encoded, y)) predictions = model.predict(ArrayDataset(encoded)) if predictions.shape[1] > 1: @@ -543,7 +548,7 @@ def test_minimize_ndarray_iris(): qi = ['sepal length (cm)', 'petal length (cm)'] base_est = DecisionTreeClassifier(random_state=0, min_samples_split=2, min_samples_leaf=1) - model = SklearnClassifier(base_est, ModelOutputType.CLASSIFIER_PROBABILITIES) + model = SklearnClassifier(base_est, CLASSIFIER_SINGLE_OUTPUT_CATEGORICAL) model.fit(ArrayDataset(x_train, y_train)) predictions = model.predict(ArrayDataset(x_train)) if predictions.shape[1] > 1: @@ -586,7 +591,7 @@ def test_minimize_pandas_adult(): base_est = DecisionTreeClassifier(random_state=0, min_samples_split=2, min_samples_leaf=1) - model = SklearnClassifier(base_est, ModelOutputType.CLASSIFIER_PROBABILITIES) + model = SklearnClassifier(base_est, CLASSIFIER_SINGLE_OUTPUT_CLASS_PROBABILITIES) model.fit(ArrayDataset(encoded, y_train)) predictions = model.predict(ArrayDataset(encoded)) if predictions.shape[1] > 1: @@ -642,7 +647,7 @@ def test_german_credit_pandas(): base_est = DecisionTreeClassifier(random_state=0, min_samples_split=2, min_samples_leaf=1) - model = SklearnClassifier(base_est, ModelOutputType.CLASSIFIER_PROBABILITIES) + model = SklearnClassifier(base_est, CLASSIFIER_SINGLE_OUTPUT_CLASS_PROBABILITIES) model.fit(ArrayDataset(encoded, y_train)) predictions = model.predict(ArrayDataset(encoded)) if predictions.shape[1] > 1: @@ -760,7 +765,7 @@ def test_x_y(): qi = [0, 2] base_est = DecisionTreeClassifier(random_state=0, min_samples_split=2, min_samples_leaf=1) - model = SklearnClassifier(base_est, ModelOutputType.CLASSIFIER_PROBABILITIES) + model = SklearnClassifier(base_est, CLASSIFIER_SINGLE_OUTPUT_CLASS_PROBABILITIES) model.fit(ArrayDataset(x, y)) ad = ArrayDataset(x) predictions = model.predict(ad) @@ -800,7 +805,7 @@ def test_x_y_features_names(): qi = ['age', 'weight'] base_est = DecisionTreeClassifier(random_state=0, min_samples_split=2, min_samples_leaf=1) - model = SklearnClassifier(base_est, ModelOutputType.CLASSIFIER_PROBABILITIES) + model = SklearnClassifier(base_est, CLASSIFIER_SINGLE_OUTPUT_CLASS_PROBABILITIES) model.fit(ArrayDataset(x, y)) ad = ArrayDataset(x) predictions = model.predict(ad) @@ -1202,7 +1207,7 @@ def test_keras_model(): base_est.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"]) - model = KerasClassifier(base_est, ModelOutputType.CLASSIFIER_PROBABILITIES) + model = KerasClassifier(base_est, CLASSIFIER_SINGLE_OUTPUT_CLASS_PROBABILITIES) model.fit(ArrayDataset(x, y)) ad = ArrayDataset(x_test) predictions = model.predict(ad) @@ -1269,8 +1274,11 @@ def test_minimizer_pytorch(data_three_features): criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(base_est.parameters(), lr=0.01) - model = PyTorchClassifier(model=base_est, output_type=ModelOutputType.CLASSIFIER_LOGITS, loss=criterion, - optimizer=optimizer, input_shape=(3,), + model = PyTorchClassifier(model=base_est, + output_type=CLASSIFIER_SINGLE_OUTPUT_CLASS_LOGITS, + loss=criterion, + optimizer=optimizer, + input_shape=(3,), nb_classes=2) model.fit(PytorchData(x, y), save_entire_model=False, nb_epochs=10) @@ -1308,8 +1316,11 @@ def test_minimizer_pytorch_iris(): criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(base_est.parameters(), lr=0.01) - model = PyTorchClassifier(model=base_est, output_type=ModelOutputType.CLASSIFIER_LOGITS, loss=criterion, - optimizer=optimizer, input_shape=(4,), + model = PyTorchClassifier(model=base_est, + output_type=CLASSIFIER_SINGLE_OUTPUT_CLASS_LOGITS, + loss=criterion, + optimizer=optimizer, + input_shape=(4,), nb_classes=3) model.fit(PytorchData(x_train, y_train), save_entire_model=False, nb_epochs=10) @@ -1329,6 +1340,78 @@ def test_minimizer_pytorch_iris(): assert ((rel_accuracy >= target_accuracy) or (target_accuracy - rel_accuracy) <= ACCURACY_DIFF) +def test_minimizer_pytorch_multi_label_binary(): + class multi_label_binary_model(nn.Module): + def __init__(self, num_labels, num_features): + super(multi_label_binary_model, self).__init__() + + self.fc1 = nn.Sequential( + nn.Linear(num_features, 256), + nn.Tanh(), ) + + self.classifier1 = nn.Linear(256, num_labels) + + def forward(self, x): + return self.classifier1(self.fc1(x)) + # missing sigmoid on each output + + class FocalLoss(nn.Module): + def __init__(self, gamma=2, alpha=0.5): + super(FocalLoss, self).__init__() + self.gamma = gamma + self.alpha = alpha + + def forward(self, input, target): + bce_loss = functional.binary_cross_entropy_with_logits(input, target, reduction='none') + + p = sigmoid(input) + p = where(target >= 0.5, p, 1 - p) + + modulating_factor = (1 - p) ** self.gamma + alpha = self.alpha * target + (1 - self.alpha) * (1 - target) + focal_loss = alpha * modulating_factor * bce_loss + + return focal_loss.mean() + + (x_train, y_train), _ = get_iris_dataset_np() + features = ['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)', 'petal width (cm)'] + qi = ['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)', 'petal width (cm)'] + + # make multi-label binary + y_train = np.column_stack((y_train, y_train, y_train)) + y_train[y_train > 1] = 1 + x_train = x_train.astype(np.float32) + y_train = y_train.astype(np.float32) + + orig_model = multi_label_binary_model(3, 4) + criterion = FocalLoss() + optimizer = optim.RMSprop(orig_model.parameters(), lr=0.01) + + model = PyTorchClassifier(model=orig_model, + output_type=CLASSIFIER_MULTI_OUTPUT_BINARY_LOGITS, + loss=criterion, + optimizer=optimizer, + input_shape=(24,), + nb_classes=3) + model.fit(PytorchData(x_train, y_train), save_entire_model=False, nb_epochs=10) + predictions = model.predict(PytorchData(x_train, y_train)) + predictions = expit(predictions) + predictions[predictions < 0.5] = 0 + predictions[predictions >= 0.5] = 1 + + target_accuracy = 0.99 + gen = GeneralizeToRepresentative(model, target_accuracy=target_accuracy, features_to_minimize=qi) + transformed = gen.fit_transform(dataset=ArrayDataset(x_train, predictions, features_names=features)) + gener = gen.generalizations + + check_features(features, gener, transformed, x_train) + ncp = gen.ncp.transform_score + check_ncp(ncp, gener) + + rel_accuracy = model.score(ArrayDataset(transformed.astype(np.float32), predictions)) + assert ((rel_accuracy >= target_accuracy) or (target_accuracy - rel_accuracy) <= ACCURACY_DIFF) + + def test_untouched(): cells = [{"id": 1, "ranges": {"age": {"start": None, "end": 38}}, "label": 0, 'categories': {'gender': ['male']}, "representative": {"age": 26, "height": 149}}, @@ -1362,7 +1445,7 @@ def test_errors(): y = np.array([1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0]) base_est = DecisionTreeClassifier(random_state=0, min_samples_split=2, min_samples_leaf=1) - model = SklearnClassifier(base_est, ModelOutputType.CLASSIFIER_PROBABILITIES) + model = SklearnClassifier(base_est, CLASSIFIER_SINGLE_OUTPUT_CLASS_PROBABILITIES) model.fit(ArrayDataset(X, y)) ad = ArrayDataset(X) predictions = model.predict(ad) diff --git a/tests/test_model.py b/tests/test_model.py index b8fb8f1..0a48392 100644 --- a/tests/test_model.py +++ b/tests/test_model.py @@ -1,8 +1,11 @@ import pytest import numpy as np -from apt.utils.models import SklearnClassifier, SklearnRegressor, ModelOutputType, KerasClassifier, KerasRegressor, \ - BlackboxClassifierPredictions, BlackboxClassifierPredictFunction, is_one_hot, get_nb_classes, XGBoostClassifier +from apt.utils.models import SklearnClassifier, SklearnRegressor, KerasClassifier, KerasRegressor, \ + BlackboxClassifierPredictions, BlackboxClassifierPredictFunction, is_one_hot, get_nb_classes, XGBoostClassifier, \ + CLASSIFIER_SINGLE_OUTPUT_CATEGORICAL, CLASSIFIER_SINGLE_OUTPUT_CLASS_PROBABILITIES, \ + CLASSIFIER_MULTI_OUTPUT_CATEGORICAL, CLASSIFIER_MULTI_OUTPUT_BINARY_PROBABILITIES, \ + CLASSIFIER_MULTI_OUTPUT_CLASS_PROBABILITIES from apt.utils.datasets import ArrayDataset, Data, DatasetWithPredictions from apt.utils import dataset_utils @@ -24,7 +27,7 @@ tf.compat.v1.disable_eager_execution() def test_sklearn_classifier(): (x_train, y_train), (x_test, y_test) = dataset_utils.get_iris_dataset_np() underlying_model = RandomForestClassifier() - model = SklearnClassifier(underlying_model, ModelOutputType.CLASSIFIER_PROBABILITIES) + model = SklearnClassifier(underlying_model, CLASSIFIER_SINGLE_OUTPUT_CATEGORICAL) train = ArrayDataset(x_train, y_train) test = ArrayDataset(x_test, y_test) model.fit(train) @@ -35,6 +38,28 @@ def test_sklearn_classifier(): assert (0.0 <= score <= 1.0) +# This test currently cannot pass due to ART dependency, so sklearn support will need to wait until ART is updated +# def test_sklearn_classifier_predictions_multi_label_binary(): +# (x_train, y_train), (x_test, y_test) = dataset_utils.get_iris_dataset_np() +# +# # make multi-label binary +# y_train = np.column_stack((y_train, y_train, y_train)) +# y_train[y_train > 1] = 1 +# y_test = np.column_stack((y_test, y_test, y_test)) +# y_test[y_test > 1] = 1 +# +# test = ArrayDataset(x_test, y_test) +# +# underlying_model = RandomForestClassifier() +# underlying_model.fit(x_train, y_train) +# model = SklearnClassifier(underlying_model, ModelOutputType.CLASSIFIER_MULTI_OUTPUT_BINARY_PROBABILITIES) +# pred = model.predict(test) +# assert (pred[0].shape[0] == x_test.shape[0]) +# +# score = model.score(test) +# assert (score == 1.0) + + def test_sklearn_regressor(): (x_train, y_train), (x_test, y_test) = dataset_utils.get_diabetes_dataset_np() underlying_model = DecisionTreeRegressor() @@ -59,7 +84,7 @@ def test_keras_classifier(): underlying_model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"]) - model = KerasClassifier(underlying_model, ModelOutputType.CLASSIFIER_PROBABILITIES) + model = KerasClassifier(underlying_model, CLASSIFIER_SINGLE_OUTPUT_CATEGORICAL) train = ArrayDataset(x_train, y_train) test = ArrayDataset(x_test, y_test) @@ -97,7 +122,8 @@ def test_xgboost_classifier(): (x_train, y_train), (x_test, y_test) = dataset_utils.get_iris_dataset_np() underlying_model = XGBClassifier() underlying_model.fit(x_train, y_train) - model = XGBoostClassifier(underlying_model, ModelOutputType.CLASSIFIER_PROBABILITIES, input_shape=(4,), nb_classes=3) + model = XGBoostClassifier(underlying_model, CLASSIFIER_SINGLE_OUTPUT_CATEGORICAL, + input_shape=(4,), nb_classes=3) train = ArrayDataset(x_train, y_train) test = ArrayDataset(x_test, y_test) pred = model.predict(test) @@ -115,7 +141,7 @@ def test_blackbox_classifier(): train = ArrayDataset(x_train, y_train) test = ArrayDataset(x_test, y_test) data = Data(train, test) - model = BlackboxClassifierPredictions(data, ModelOutputType.CLASSIFIER_SCALAR) + model = BlackboxClassifierPredictions(data, CLASSIFIER_SINGLE_OUTPUT_CATEGORICAL) pred = model.predict(test) assert (pred.shape[0] == x_test.shape[0]) @@ -131,7 +157,7 @@ def test_blackbox_classifier_predictions(): train = DatasetWithPredictions(y_train, x_train) test = DatasetWithPredictions(y_test, x_test) data = Data(train, test) - model = BlackboxClassifierPredictions(data, ModelOutputType.CLASSIFIER_SCALAR) + model = BlackboxClassifierPredictions(data, CLASSIFIER_SINGLE_OUTPUT_CATEGORICAL) pred = model.predict(test) assert (pred.shape[0] == x_test.shape[0]) assert model.model_type is None @@ -146,7 +172,7 @@ def test_blackbox_classifier_predictions_y(): train = DatasetWithPredictions(y_train, x_train, y_train) test = DatasetWithPredictions(y_test, x_test, y_test) data = Data(train, test) - model = BlackboxClassifierPredictions(data, ModelOutputType.CLASSIFIER_SCALAR) + model = BlackboxClassifierPredictions(data, CLASSIFIER_SINGLE_OUTPUT_CATEGORICAL) pred = model.predict(test) assert (pred.shape[0] == x_test.shape[0]) @@ -156,14 +182,62 @@ def test_blackbox_classifier_predictions_y(): assert model.model_type is None -def test_blackbox_classifier_mismatch(): +def test_blackbox_classifier_predictions_multi_label_cat(): (x_train, y_train), (x_test, y_test) = dataset_utils.get_iris_dataset_np() - train = ArrayDataset(x_train, y_train) - test = ArrayDataset(x_test, y_test) + # make multi-label categorical + y_train = np.column_stack((y_train, y_train, y_train)) + y_test = np.column_stack((y_test, y_test, y_test)) + + train = DatasetWithPredictions(y_train, x_train, y_train) + test = DatasetWithPredictions(y_test, x_test, y_test) data = Data(train, test) - with pytest.raises(ValueError): - BlackboxClassifierPredictions(data, ModelOutputType.CLASSIFIER_PROBABILITIES) + model = BlackboxClassifierPredictions(data, CLASSIFIER_MULTI_OUTPUT_CATEGORICAL) + pred = model.predict(test) + assert (pred.shape[0] == x_test.shape[0]) + + score = model.score(test) + assert (score == 1.0) + + assert model.model_type is None + + +def test_blackbox_classifier_predictions_multi_label_binary(): + (x_train, y_train), (x_test, y_test) = dataset_utils.get_iris_dataset_np() + + # make multi-label binary + y_train = np.column_stack((y_train, y_train, y_train)) + y_train[y_train > 1] = 1 + pred_train = y_train.copy().astype(float) + pred_train[pred_train == 0] = 0.2 + pred_train[pred_train == 1] = 0.6 + y_test = np.column_stack((y_test, y_test, y_test)) + y_test[y_test > 1] = 1 + pred_test = y_test.copy().astype(float) + pred_test[pred_test == 0] = 0.2 + pred_test[pred_test == 1] = 0.6 + + train = DatasetWithPredictions(pred_train, x_train, y_train) + test = DatasetWithPredictions(pred_test, x_test, y_test) + data = Data(train, test) + model = BlackboxClassifierPredictions(data, CLASSIFIER_MULTI_OUTPUT_BINARY_PROBABILITIES) + pred = model.predict(test) + assert (pred.shape[0] == x_test.shape[0]) + + score = model.score(test) + assert (score == 1.0) + + assert model.model_type is None + + +# def test_blackbox_classifier_mismatch(): +# (x_train, y_train), (x_test, y_test) = dataset_utils.get_iris_dataset_np() +# +# train = ArrayDataset(x_train, y_train) +# test = ArrayDataset(x_test, y_test) +# data = Data(train, test) +# with pytest.raises(ValueError): +# BlackboxClassifierPredictions(data, ModelOutputType.CLASSIFIER_SINGLE_OUTPUT_CLASS_PROBABILITIES) def test_blackbox_classifier_no_test(): @@ -172,7 +246,7 @@ def test_blackbox_classifier_no_test(): train = ArrayDataset(x_train, y_train) data = Data(train) - model = BlackboxClassifierPredictions(data, ModelOutputType.CLASSIFIER_SCALAR) + model = BlackboxClassifierPredictions(data, CLASSIFIER_SINGLE_OUTPUT_CATEGORICAL) pred = model.predict(train) assert (pred.shape[0] == x_train.shape[0]) @@ -189,7 +263,7 @@ def test_blackbox_classifier_no_train(): test = ArrayDataset(x_test, y_test) data = Data(test=test) - model = BlackboxClassifierPredictions(data, ModelOutputType.CLASSIFIER_SCALAR) + model = BlackboxClassifierPredictions(data, CLASSIFIER_SINGLE_OUTPUT_CATEGORICAL) pred = model.predict(test) assert (pred.shape[0] == x_test.shape[0]) @@ -207,7 +281,7 @@ def test_blackbox_classifier_no_test_y(): train = ArrayDataset(x_train, y_train) test = ArrayDataset(x_test) data = Data(train, test) - model = BlackboxClassifierPredictions(data, ModelOutputType.CLASSIFIER_SCALAR) + model = BlackboxClassifierPredictions(data, CLASSIFIER_SINGLE_OUTPUT_CATEGORICAL) pred = model.predict(train) assert (pred.shape[0] == x_train.shape[0]) @@ -230,7 +304,7 @@ def test_blackbox_classifier_no_train_y(): train = ArrayDataset(x_train) test = ArrayDataset(x_test, y_test) data = Data(train, test) - model = BlackboxClassifierPredictions(data, ModelOutputType.CLASSIFIER_SCALAR) + model = BlackboxClassifierPredictions(data, CLASSIFIER_SINGLE_OUTPUT_CATEGORICAL) pred = model.predict(test) assert (pred.shape[0] == x_test.shape[0]) @@ -254,7 +328,7 @@ def test_blackbox_classifier_probabilities(): train = ArrayDataset(x_train, y_train) data = Data(train) - model = BlackboxClassifierPredictions(data, ModelOutputType.CLASSIFIER_PROBABILITIES) + model = BlackboxClassifierPredictions(data, CLASSIFIER_SINGLE_OUTPUT_CLASS_PROBABILITIES) pred = model.predict(train) assert (pred.shape[0] == x_train.shape[0]) assert (0.0 < pred).all() @@ -264,6 +338,23 @@ def test_blackbox_classifier_probabilities(): assert (score == 1.0) +def test_blackbox_classifier_multi_label_probabilities(): + (x_train, _), (_, _) = dataset_utils.get_iris_dataset_np() + y_train = np.array([[0.23, 0.56, 0.21] for i in range(105)]) + + # make multi-label categorical + y_train = np.column_stack((y_train, y_train, y_train)) + + train = ArrayDataset(x_train, y_train) + + data = Data(train) + model = BlackboxClassifierPredictions(data, CLASSIFIER_MULTI_OUTPUT_CLASS_PROBABILITIES) + pred = model.predict(train) + assert (pred.shape[0] == x_train.shape[0]) + assert (0.0 < pred).all() + assert (pred < 1.0).all() + + def test_blackbox_classifier_predict(): def predict(x): return np.array([[0.23, 0.56, 0.21] for i in range(x.shape[0])]) @@ -273,7 +364,8 @@ def test_blackbox_classifier_predict(): train = ArrayDataset(x_train, y_train) - model = BlackboxClassifierPredictFunction(predict, ModelOutputType.CLASSIFIER_PROBABILITIES, (4,), 3) + model = BlackboxClassifierPredictFunction(predict, CLASSIFIER_SINGLE_OUTPUT_CLASS_PROBABILITIES, + (4,), 3) pred = model.predict(train) assert (pred.shape[0] == x_train.shape[0]) assert (0.0 < pred).all() @@ -292,7 +384,8 @@ def test_blackbox_classifier_predict_scalar(): train = ArrayDataset(x_train, y_train) - model = BlackboxClassifierPredictFunction(predict, ModelOutputType.CLASSIFIER_SCALAR, (4,), 3) + model = BlackboxClassifierPredictFunction(predict, CLASSIFIER_SINGLE_OUTPUT_CLASS_PROBABILITIES, + (4,), 3) pred = model.predict(train) assert (pred.shape[0] == x_train.shape[0]) @@ -310,23 +403,23 @@ def test_is_one_hot(): def test_get_nb_classes(): (_, y_train), (_, y_test) = dataset_utils.get_iris_dataset_np() - + output_type = CLASSIFIER_SINGLE_OUTPUT_CATEGORICAL # shape: (x,) - not 1-hot - nb_classes_test = get_nb_classes(y_test) - nb_classes_train = get_nb_classes(y_train) + nb_classes_test = get_nb_classes(y_test, output_type) + nb_classes_train = get_nb_classes(y_train, output_type) assert (nb_classes_test == nb_classes_train) assert (nb_classes_test == 3) # shape: (x,1) - not 1-hot - nb_classes_test = get_nb_classes(y_test.reshape(-1, 1)) + nb_classes_test = get_nb_classes(y_test.reshape(-1, 1), output_type) assert (nb_classes_test == 3) # shape: (x,3) - 1-hot y = to_categorical(y_test) - nb_classes = get_nb_classes(y) + nb_classes = get_nb_classes(y, output_type) assert (nb_classes == 3) # gaps: 1,2,4 (0,3 missing) y_test[y_test == 0] = 4 - nb_classes = get_nb_classes(y_test) + nb_classes = get_nb_classes(y_test, output_type) assert (nb_classes == 5) diff --git a/tests/test_pytorch.py b/tests/test_pytorch.py index c16735d..bd546f2 100644 --- a/tests/test_pytorch.py +++ b/tests/test_pytorch.py @@ -1,16 +1,23 @@ import numpy as np -from torch import nn, optim +from torch import nn, optim, sigmoid, where, from_numpy +from torch.nn import functional +from torch.utils.data import DataLoader, TensorDataset +from scipy.special import expit +from art.utils import check_and_transform_label_format from apt.utils.datasets.datasets import PytorchData -from apt.utils.models import ModelOutputType +from apt.utils.models import CLASSIFIER_SINGLE_OUTPUT_CLASS_LOGITS, CLASSIFIER_SINGLE_OUTPUT_BINARY_LOGITS, \ + CLASSIFIER_SINGLE_OUTPUT_BINARY_PROBABILITIES, CLASSIFIER_MULTI_OUTPUT_CLASS_LOGITS, \ + CLASSIFIER_MULTI_OUTPUT_BINARY_LOGITS from apt.utils.models.pytorch_model import PyTorchClassifier from art.utils import load_nursery +from apt.utils import dataset_utils -class pytorch_model(nn.Module): +class PytorchModel(nn.Module): def __init__(self, num_classes, num_features): - super(pytorch_model, self).__init__() + super(PytorchModel, self).__init__() self.fc1 = nn.Sequential( nn.Linear(num_features, 1024), @@ -39,7 +46,77 @@ class pytorch_model(nn.Module): return self.classifier(out) -def test_nursery_pytorch_state_dict(): +class PytorchModelBinary(nn.Module): + + def __init__(self, num_features): + super(PytorchModelBinary, self).__init__() + + self.fc2 = nn.Sequential( + nn.Linear(num_features, 256), + nn.Tanh(), ) + + self.fc3 = nn.Sequential( + nn.Linear(256, 128), + nn.Tanh(), ) + + self.fc4 = nn.Sequential( + nn.Linear(128, 1), + nn.Tanh(), + ) + + def forward(self, x): + out = self.fc2(x) + out = self.fc3(out) + return self.fc4(out) + + +class PytorchModelBinarySigmoid(nn.Module): + + def __init__(self, num_features): + super(PytorchModelBinarySigmoid, self).__init__() + + self.fc2 = nn.Sequential( + nn.Linear(num_features, 256), + nn.Tanh(), ) + + self.fc3 = nn.Sequential( + nn.Linear(256, 128), + nn.Tanh(), ) + + self.fc4 = nn.Sequential( + nn.Linear(128, 1), + nn.Tanh(), + ) + + self.classifier = nn.Sigmoid() + + def forward(self, x): + out = self.fc2(x) + out = self.fc3(out) + out = self.fc4(out) + return self.classifier(out) + + +class FocalLoss(nn.Module): + def __init__(self, gamma=2, alpha=0.5): + super(FocalLoss, self).__init__() + self.gamma = gamma + self.alpha = alpha + + def forward(self, input, target): + bce_loss = functional.binary_cross_entropy_with_logits(input, target, reduction='none') + + p = sigmoid(input) + p = where(target >= 0.5, p, 1 - p) + + modulating_factor = (1 - p) ** self.gamma + alpha = self.alpha * target + (1 - self.alpha) * (1 - target) + focal_loss = alpha * modulating_factor * bce_loss + + return focal_loss.mean() + + +def test_pytorch_nursery_state_dict(): (x_train, y_train), (x_test, y_test), _, _ = load_nursery(test_set=0.5) # reduce size of training set to make attack slightly better train_set_size = 500 @@ -48,12 +125,15 @@ def test_nursery_pytorch_state_dict(): x_test = x_test[:train_set_size] y_test = y_test[:train_set_size] - inner_model = pytorch_model(4, 24) + inner_model = PytorchModel(4, 24) criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(inner_model.parameters(), lr=0.01) - model = PyTorchClassifier(model=inner_model, output_type=ModelOutputType.CLASSIFIER_LOGITS, loss=criterion, - optimizer=optimizer, input_shape=(24,), + model = PyTorchClassifier(model=inner_model, + output_type=CLASSIFIER_SINGLE_OUTPUT_CLASS_LOGITS, + loss=criterion, + optimizer=optimizer, + input_shape=(24,), nb_classes=4) model.fit(PytorchData(x_train.astype(np.float32), y_train), save_entire_model=False, nb_epochs=10) model.load_latest_state_dict_checkpoint() @@ -62,12 +142,12 @@ def test_nursery_pytorch_state_dict(): assert (0 <= score <= 1) # python pytorch numpy model.load_best_state_dict_checkpoint() - score = model.score(PytorchData(x_test.astype(np.float32), y_test)) + score = model.score(PytorchData(x_test.astype(np.float32), y_test), apply_non_linearity=expit) print('best model accuracy: ', score) assert (0 <= score <= 1) -def test_nursery_pytorch_save_entire_model(): +def test_pytorch_nursery_save_entire_model(): (x_train, y_train), (x_test, y_test), _, _ = load_nursery(test_set=0.5) # reduce size of training set to make attack slightly better @@ -77,20 +157,208 @@ def test_nursery_pytorch_save_entire_model(): x_test = x_test[:train_set_size] y_test = y_test[:train_set_size] - model = pytorch_model(4, 24) + inner_model = PytorchModel(4, 24) # model = torch.nn.DataParallel(model) criterion = nn.CrossEntropyLoss() - optimizer = optim.Adam(model.parameters(), lr=0.01) + optimizer = optim.Adam(inner_model.parameters(), lr=0.01) - art_model = PyTorchClassifier(model=model, output_type=ModelOutputType.CLASSIFIER_LOGITS, loss=criterion, - optimizer=optimizer, input_shape=(24,), - nb_classes=4) - art_model.fit(PytorchData(x_train.astype(np.float32), y_train), save_entire_model=True, nb_epochs=10) + model = PyTorchClassifier(model=inner_model, + output_type=CLASSIFIER_SINGLE_OUTPUT_CLASS_LOGITS, + loss=criterion, + optimizer=optimizer, + input_shape=(24,), + nb_classes=4) + model.fit(PytorchData(x_train.astype(np.float32), y_train), save_entire_model=True, nb_epochs=10) - score = art_model.score(PytorchData(x_test.astype(np.float32), y_test)) + score = model.score(PytorchData(x_test.astype(np.float32), y_test)) print('Base model accuracy: ', score) assert (0 <= score <= 1) - art_model.load_best_model_checkpoint() - score = art_model.score(PytorchData(x_test.astype(np.float32), y_test)) + model.load_best_model_checkpoint() + score = model.score(PytorchData(x_test.astype(np.float32), y_test), apply_non_linearity=expit) print('best model accuracy: ', score) assert (0 <= score <= 1) + + +def test_pytorch_predictions_single_label_binary(): + x = np.array([[23, 165, 70, 10], + [45, 158, 67, 11], + [56, 123, 65, 58], + [67, 154, 90, 12], + [45, 149, 67, 56], + [42, 166, 58, 50], + [73, 172, 68, 10], + [94, 168, 69, 11], + [69, 175, 80, 61], + [24, 181, 95, 10], + [18, 190, 102, 53], + [22, 161, 95, 10], + [24, 181, 103, 10], + [28, 184, 108, 10]]) + x = from_numpy(x) + y = np.array([1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1]) + y = from_numpy(y) + data = PytorchData(x, y) + + inner_model = PytorchModelBinary(4) + criterion = nn.BCEWithLogitsLoss() + optimizer = optim.Adam(inner_model.parameters(), lr=0.01) + + model = PyTorchClassifier(model=inner_model, output_type=CLASSIFIER_SINGLE_OUTPUT_BINARY_LOGITS, + loss=criterion, + optimizer=optimizer, input_shape=(4,), + nb_classes=2) + model.fit(data, save_entire_model=False, nb_epochs=1) + + pred = model.predict(data) + assert (pred.shape[0] == x.shape[0]) + score = model.score(data) + assert (0 < score <= 1.0) + + +def test_pytorch_predictions_single_label_binary_prob(): + x = np.array([[23, 165, 70, 10], + [45, 158, 67, 11], + [56, 123, 65, 58], + [67, 154, 90, 12], + [45, 149, 67, 56], + [42, 166, 58, 50], + [73, 172, 68, 10], + [94, 168, 69, 11], + [69, 175, 80, 61], + [24, 181, 95, 10], + [18, 190, 102, 53], + [22, 161, 95, 10], + [24, 181, 103, 10], + [28, 184, 108, 10]]) + x = from_numpy(x) + y = np.array([1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1]) + y = from_numpy(y) + data = PytorchData(x, y) + + inner_model = PytorchModelBinarySigmoid(4) + criterion = nn.BCELoss() + optimizer = optim.Adam(inner_model.parameters(), lr=0.01) + + model = PyTorchClassifier(model=inner_model, + output_type=CLASSIFIER_SINGLE_OUTPUT_BINARY_PROBABILITIES, + loss=criterion, + optimizer=optimizer, input_shape=(4,), + nb_classes=2) + model.fit(data, save_entire_model=False, nb_epochs=1) + + pred = model.predict(data) + assert (pred.shape[0] == x.shape[0]) + score = model.score(data) + assert (0 < score <= 1.0) + + +def test_pytorch_predictions_multi_label_cat(): + # This kind of model requires special training and will not be supported using the 'fit' method. + class MultiLabelCatModel(nn.Module): + + def __init__(self, num_classes, num_features): + super(MultiLabelCatModel, self).__init__() + + self.fc1 = nn.Sequential( + nn.Linear(num_features, 256), + nn.Tanh(), ) + + self.classifier1 = nn.Linear(256, num_classes) + self.classifier2 = nn.Linear(256, num_classes) + + def forward(self, x): + out1 = self.classifier1(self.fc1(x)) + out2 = self.classifier2(self.fc1(x)) + return out1, out2 + + (x_train, y_train), (x_test, y_test) = dataset_utils.get_iris_dataset_np() + + # make multi-label categorical + num_classes = 3 + y_train = check_and_transform_label_format(y_train, nb_classes=num_classes) + y_test = check_and_transform_label_format(y_test, nb_classes=num_classes) + y_train = np.column_stack((y_train, y_train)) + y_test = np.stack([y_test, y_test], axis=1) + test = PytorchData(x_test.astype(np.float32), y_test.astype(np.float32)) + + inner_model = MultiLabelCatModel(num_classes, 4) + criterion = nn.CrossEntropyLoss() + optimizer = optim.Adam(inner_model.parameters(), lr=0.01) + + # train model + train_dataset = TensorDataset(from_numpy(x_train.astype(np.float32)), from_numpy(y_train.astype(np.float32))) + train_loader = DataLoader(train_dataset, batch_size=100, shuffle=True) + + for epoch in range(5): + # Train for one epoch + for inputs, targets in train_loader: + # Zero the parameter gradients + optimizer.zero_grad() + + # Perform prediction + model_outputs = inner_model(inputs) + + # Form the loss function + loss = 0 + for i, o in enumerate(model_outputs): + t = targets[:, i * num_classes:(i + 1) * num_classes] + loss += criterion(o, t) + + loss.backward() + + optimizer.step() + + model = PyTorchClassifier(model=inner_model, + output_type=CLASSIFIER_MULTI_OUTPUT_CLASS_LOGITS, + loss=criterion, + optimizer=optimizer, + input_shape=(24,), + nb_classes=3) + + pred = model.predict(test) + assert (pred.shape[0] == x_test.shape[0]) + + score = model.score(test, apply_non_linearity=expit) + assert (0 < score <= 1.0) + + +def test_pytorch_predictions_multi_label_binary(): + class MultiLabelBinaryModel(nn.Module): + def __init__(self, num_labels, num_features): + super(MultiLabelBinaryModel, self).__init__() + + self.fc1 = nn.Sequential( + nn.Linear(num_features, 256), + nn.Tanh(), ) + + self.classifier1 = nn.Linear(256, num_labels) + + def forward(self, x): + return self.classifier1(self.fc1(x)) + + (x_train, y_train), (x_test, y_test) = dataset_utils.get_iris_dataset_np() + + # make multi-label binary + y_train = np.column_stack((y_train, y_train, y_train)) + y_train[y_train > 1] = 1 + y_test = np.column_stack((y_test, y_test, y_test)) + y_test[y_test > 1] = 1 + test = PytorchData(x_test.astype(np.float32), y_test) + + inner_model = MultiLabelBinaryModel(3, 4) + criterion = FocalLoss() + optimizer = optim.RMSprop(inner_model.parameters(), lr=0.01) + + model = PyTorchClassifier(model=inner_model, + output_type=CLASSIFIER_MULTI_OUTPUT_BINARY_LOGITS, + loss=criterion, + optimizer=optimizer, + input_shape=(24,), + nb_classes=3) + model.fit(PytorchData(x_train.astype(np.float32), y_train.astype(np.float32)), save_entire_model=False, + nb_epochs=10) + pred = model.predict(test) + assert (pred.shape[0] == x_test.shape[0]) + + score = model.score(test, apply_non_linearity=expit) + assert (score == 1.0)