From a4816878f9fc93c64e2455cb8658949a854be055 Mon Sep 17 00:00:00 2001 From: abigailt Date: Thu, 2 May 2024 17:04:34 +0300 Subject: [PATCH] Formatting Signed-off-by: abigailt --- apt/minimization/minimizer.py | 5 +-- apt/utils/models/model.py | 60 +++++++++++++++---------------- apt/utils/models/pytorch_model.py | 30 +++++----------- tests/test_anonymizer.py | 4 +-- tests/test_minimizer.py | 18 +++++----- tests/test_pytorch.py | 39 ++++++++++---------- 6 files changed, 68 insertions(+), 88 deletions(-) diff --git a/apt/minimization/minimizer.py b/apt/minimization/minimizer.py index 15c771b..fed20bd 100644 --- a/apt/minimization/minimizer.py +++ b/apt/minimization/minimizer.py @@ -93,7 +93,7 @@ class GeneralizeToRepresentative(BaseEstimator, MetaEstimatorMixin, TransformerM if is_regression: self.estimator = SklearnRegressor(estimator) else: - #TODO: maybe we should get model output type from user in this case + # TODO: maybe we should get model output type from user in this case self.estimator = SklearnClassifier(estimator, ModelOutputType.CLASSIFIER_SINGLE_OUTPUT_CLASS_PROBABILITIES) self.target_accuracy = target_accuracy @@ -813,15 +813,12 @@ class GeneralizeToRepresentative(BaseEstimator, MetaEstimatorMixin, TransformerM def _calculate_level_cell_label(self, left_cell, right_cell, new_cell): new_cell['hist'] = left_cell['hist'] + right_cell['hist'] - # [x + y for x, y in - # zip(left_cell['hist'], right_cell['hist'])] if not self.is_regression else [] if isinstance(self._dt.classes_, list): new_cell['label'] = [self._dt.classes_[output][class_index] for output, class_index in enumerate(np.argmax(new_cell['hist'], axis=1))] else: new_cell['label'] = [self._dt.classes_[np.argmax(new_cell['hist'][0])]] - def _get_nodes_level(self, level): # level = distance from lowest leaf node_depth = np.zeros(shape=self._dt.tree_.node_count, dtype=np.int64) diff --git a/apt/utils/models/model.py b/apt/utils/models/model.py index 1b1deea..82dd9b9 100644 --- a/apt/utils/models/model.py +++ b/apt/utils/models/model.py @@ -4,7 +4,7 @@ from enum import Enum, auto import numpy as np from scipy.special import expit -from apt.utils.datasets import Dataset, Data, DatasetWithPredictions, array2numpy, OUTPUT_DATA_ARRAY_TYPE +from apt.utils.datasets import Dataset, Data, array2numpy, OUTPUT_DATA_ARRAY_TYPE from art.estimators.classification import BlackBoxClassifier from art.utils import check_and_transform_label_format @@ -43,40 +43,40 @@ def is_one_hot(y: OUTPUT_DATA_ARRAY_TYPE) -> bool: def is_multi_label(output_type: ModelOutputType) -> bool: - return (output_type == ModelOutputType.CLASSIFIER_MULTI_OUTPUT_CATEGORICAL or - output_type == ModelOutputType.CLASSIFIER_MULTI_OUTPUT_BINARY_PROBABILITIES or - output_type == ModelOutputType.CLASSIFIER_MULTI_OUTPUT_CLASS_PROBABILITIES or - output_type == ModelOutputType.CLASSIFIER_MULTI_OUTPUT_BINARY_LOGITS or - output_type == ModelOutputType.CLASSIFIER_MULTI_OUTPUT_CLASS_LOGITS) + return (output_type == ModelOutputType.CLASSIFIER_MULTI_OUTPUT_CATEGORICAL + or output_type == ModelOutputType.CLASSIFIER_MULTI_OUTPUT_BINARY_PROBABILITIES + or output_type == ModelOutputType.CLASSIFIER_MULTI_OUTPUT_CLASS_PROBABILITIES + or output_type == ModelOutputType.CLASSIFIER_MULTI_OUTPUT_BINARY_LOGITS + or output_type == ModelOutputType.CLASSIFIER_MULTI_OUTPUT_CLASS_LOGITS) def is_multi_label_binary(output_type: ModelOutputType) -> bool: - return (output_type == ModelOutputType.CLASSIFIER_MULTI_OUTPUT_BINARY_PROBABILITIES or - output_type == ModelOutputType.CLASSIFIER_MULTI_OUTPUT_BINARY_LOGITS) + return (output_type == ModelOutputType.CLASSIFIER_MULTI_OUTPUT_BINARY_PROBABILITIES + or output_type == ModelOutputType.CLASSIFIER_MULTI_OUTPUT_BINARY_LOGITS) def is_binary(output_type: ModelOutputType) -> bool: - return (output_type == ModelOutputType.CLASSIFIER_MULTI_OUTPUT_BINARY_PROBABILITIES or - output_type == ModelOutputType.CLASSIFIER_MULTI_OUTPUT_BINARY_LOGITS or - output_type == ModelOutputType.CLASSIFIER_SINGLE_OUTPUT_BINARY_PROBABILITIES or - output_type == ModelOutputType.CLASSIFIER_SINGLE_OUTPUT_BINARY_LOGITS) + return (output_type == ModelOutputType.CLASSIFIER_MULTI_OUTPUT_BINARY_PROBABILITIES + or output_type == ModelOutputType.CLASSIFIER_MULTI_OUTPUT_BINARY_LOGITS + or output_type == ModelOutputType.CLASSIFIER_SINGLE_OUTPUT_BINARY_PROBABILITIES + or output_type == ModelOutputType.CLASSIFIER_SINGLE_OUTPUT_BINARY_LOGITS) def is_categorical(output_type: ModelOutputType) -> bool: - return (output_type == ModelOutputType.CLASSIFIER_SINGLE_OUTPUT_CATEGORICAL or - output_type == ModelOutputType.CLASSIFIER_MULTI_OUTPUT_CATEGORICAL) + return (output_type == ModelOutputType.CLASSIFIER_SINGLE_OUTPUT_CATEGORICAL + or output_type == ModelOutputType.CLASSIFIER_MULTI_OUTPUT_CATEGORICAL) def is_probabilities(output_type: ModelOutputType) -> bool: - return (output_type == ModelOutputType.CLASSIFIER_SINGLE_OUTPUT_CLASS_PROBABILITIES or - output_type == ModelOutputType.CLASSIFIER_MULTI_OUTPUT_CLASS_PROBABILITIES) + return (output_type == ModelOutputType.CLASSIFIER_SINGLE_OUTPUT_CLASS_PROBABILITIES + or output_type == ModelOutputType.CLASSIFIER_MULTI_OUTPUT_CLASS_PROBABILITIES) def is_logits(output_type: ModelOutputType) -> bool: - return (output_type == ModelOutputType.CLASSIFIER_SINGLE_OUTPUT_CLASS_LOGITS or - output_type == ModelOutputType.CLASSIFIER_MULTI_OUTPUT_CLASS_LOGITS or - output_type == ModelOutputType.CLASSIFIER_SINGLE_OUTPUT_BINARY_LOGITS or - output_type == ModelOutputType.CLASSIFIER_MULTI_OUTPUT_BINARY_LOGITS) + return (output_type == ModelOutputType.CLASSIFIER_SINGLE_OUTPUT_CLASS_LOGITS + or output_type == ModelOutputType.CLASSIFIER_MULTI_OUTPUT_CLASS_LOGITS + or output_type == ModelOutputType.CLASSIFIER_SINGLE_OUTPUT_BINARY_LOGITS + or output_type == ModelOutputType.CLASSIFIER_MULTI_OUTPUT_BINARY_LOGITS) def get_nb_classes(y: OUTPUT_DATA_ARRAY_TYPE, output_type: ModelOutputType) -> int: @@ -114,10 +114,10 @@ def check_correct_model_output(y: OUTPUT_DATA_ARRAY_TYPE, output_type: ModelOutp :type output_type: ModelOutputType :raises: ValueError (in case of mismatch) """ - if not is_one_hot(y) and (output_type == ModelOutputType.CLASSIFIER_SINGLE_OUTPUT_CLASS_PROBABILITIES or - output_type == ModelOutputType.CLASSIFIER_SINGLE_OUTPUT_CLASS_LOGITS): - raise ValueError("Incompatible model output types. Model outputs 1D array of categorical scalars while " - "output type is set to ", output_type) + if not is_one_hot(y) and (output_type == ModelOutputType.CLASSIFIER_SINGLE_OUTPUT_CLASS_PROBABILITIES + or output_type == ModelOutputType.CLASSIFIER_SINGLE_OUTPUT_CLASS_LOGITS): + raise ValueError("Incompatible model output types. Model outputs 1D array of categorical scalars while " + "output type is set to ", output_type) class Model(metaclass=ABCMeta): @@ -209,13 +209,13 @@ class Model(metaclass=ABCMeta): if scoring_method == ScoringMethod.ACCURACY: if not is_multi_label(self.output_type) and not is_binary(self.output_type) and nb_classes is not None: y = check_and_transform_label_format(y, nb_classes=nb_classes) - if (self.output_type == ModelOutputType.CLASSIFIER_SINGLE_OUTPUT_CLASS_PROBABILITIES or - self.output_type == ModelOutputType.CLASSIFIER_SINGLE_OUTPUT_CLASS_LOGITS or - self.output_type == ModelOutputType.CLASSIFIER_SINGLE_OUTPUT_CATEGORICAL): + if (self.output_type == ModelOutputType.CLASSIFIER_SINGLE_OUTPUT_CLASS_PROBABILITIES + or self.output_type == ModelOutputType.CLASSIFIER_SINGLE_OUTPUT_CLASS_LOGITS + or self.output_type == ModelOutputType.CLASSIFIER_SINGLE_OUTPUT_CATEGORICAL): # categorical has been 1-hot encoded by check_and_transform_label_format return np.count_nonzero(np.argmax(y, axis=1) == np.argmax(predicted, axis=1)) / predicted.shape[0] - elif (self.output_type == ModelOutputType.CLASSIFIER_MULTI_OUTPUT_CLASS_LOGITS or - self.output_type == ModelOutputType.CLASSIFIER_SINGLE_OUTPUT_CLASS_PROBABILITIES): + elif (self.output_type == ModelOutputType.CLASSIFIER_MULTI_OUTPUT_CLASS_LOGITS + or self.output_type == ModelOutputType.CLASSIFIER_SINGLE_OUTPUT_CLASS_PROBABILITIES): if predicted.shape != y.shape: raise ValueError('Do not know how to compare arrays with different shapes') elif len(predicted.shape) < 3: @@ -372,7 +372,7 @@ class BlackboxClassifier(Model): """ Score the model using test data. """ - kwargs ['nb_classes'] = self.nb_classes + kwargs['nb_classes'] = self.nb_classes return super().score(test_data, **kwargs) def fit(self, train_data: Dataset, **kwargs) -> None: diff --git a/apt/utils/models/pytorch_model.py b/apt/utils/models/pytorch_model.py index 723619b..c91df72 100644 --- a/apt/utils/models/pytorch_model.py +++ b/apt/utils/models/pytorch_model.py @@ -3,7 +3,7 @@ import os import shutil import logging -from typing import Optional, Tuple, Union, List +from typing import Optional, Tuple, Union, List, TYPE_CHECKING import numpy as np import torch from torch.utils.data import DataLoader, TensorDataset @@ -14,6 +14,11 @@ from apt.utils.models import Model, ModelOutputType, is_multi_label, is_multi_la from apt.utils.datasets import OUTPUT_DATA_ARRAY_TYPE, array2numpy from art.estimators.classification.pytorch import PyTorchClassifier as ArtPyTorchClassifier +if TYPE_CHECKING: + from art.utils import CLIP_VALUES_TYPE, PREPROCESSING_TYPE + from art.defences.preprocessor import Preprocessor + from art.defences.postprocessor import Postprocessor + logger = logging.getLogger(__name__) @@ -51,8 +56,8 @@ class PyTorchClassifierWrapper(ArtPyTorchClassifier): super().__init__(model, loss, input_shape, nb_classes, optimizer, use_amp, opt_level, loss_scale, channels_first, clip_values, preprocessing_defences, postprocessing_defences, preprocessing, device_type) - self._is_single_binary = (output_type == ModelOutputType.CLASSIFIER_SINGLE_OUTPUT_BINARY_PROBABILITIES or - output_type == ModelOutputType.CLASSIFIER_SINGLE_OUTPUT_BINARY_LOGITS) + self._is_single_binary = (output_type == ModelOutputType.CLASSIFIER_SINGLE_OUTPUT_BINARY_PROBABILITIES + or output_type == ModelOutputType.CLASSIFIER_SINGLE_OUTPUT_BINARY_LOGITS) self._is_multi_label = is_multi_label(output_type) self._is_multi_label_binary = is_multi_label_binary(output_type) @@ -504,29 +509,10 @@ class PyTorchClassifier(PyTorchModel): :return: the score as float (between 0 and 1) """ # numpy arrays - y = test_data.get_labels() predicted = self.predict(test_data) kwargs['predictions'] = DatasetWithPredictions(pred=predicted) kwargs['nb_classes'] = self._nb_classes return super().score(ArrayDataset(test_data.get_samples(), test_data.get_labels()), **kwargs) - # if apply_non_linearity: - # predicted = apply_non_linearity(predicted) - # # binary classification, single column of probabilities - # if self._art_model.nb_classes == 2 and (len(predicted.shape) == 1 or predicted.shape[1] == 1): - # if len(predicted.shape) > 1: - # y = check_and_transform_label_format(y, self._art_model.nb_classes, return_one_hot=False) - # return np.count_nonzero(y == (predicted > binary_threshold)) / predicted.shape[0] - # # multi column - # else: - # if not is_multi_label(y): - # y = check_and_transform_label_format(y, self._art_model.nb_classes) - # return np.count_nonzero(np.argmax(y, axis=1) == np.argmax(predicted, axis=1)) / predicted.shape[0] - # else: - # if is_multi_label_binary(y): - # predicted[predicted < binary_threshold] = 0 - # predicted[predicted >= binary_threshold] = 1 - # return np.count_nonzero(y == predicted) / (predicted.shape[0] * y.shape[1]) - def load_checkpoint_state_dict_by_path(self, model_name: str, path: str = None): """ diff --git a/tests/test_anonymizer.py b/tests/test_anonymizer.py index 83ba2dd..dd753a9 100644 --- a/tests/test_anonymizer.py +++ b/tests/test_anonymizer.py @@ -218,9 +218,9 @@ def test_anonymize_pytorch_multi_label_binary(): bce_loss = functional.binary_cross_entropy_with_logits(input, target, reduction='none') p = sigmoid(input) - p = where(target >= 0.5, p, 1-p) + p = where(target >= 0.5, p, 1 - p) - modulating_factor = (1 - p)**self.gamma + modulating_factor = (1 - p) ** self.gamma alpha = self.alpha * target + (1 - self.alpha) * (1 - target) focal_loss = alpha * modulating_factor * bce_loss diff --git a/tests/test_minimizer.py b/tests/test_minimizer.py index 43c6543..c9e3d62 100644 --- a/tests/test_minimizer.py +++ b/tests/test_minimizer.py @@ -20,7 +20,6 @@ from tensorflow.keras.models import Sequential from tensorflow.keras.layers import Dense, Input from apt.utils.datasets.datasets import PytorchData -from apt.utils.models import ModelOutputType from apt.utils.models.pytorch_model import PyTorchClassifier from apt.minimization import GeneralizeToRepresentative from apt.utils.dataset_utils import get_iris_dataset_np, get_adult_dataset_pd, get_german_credit_dataset_pd @@ -1365,9 +1364,9 @@ def test_minimizer_pytorch_multi_label_binary(): bce_loss = functional.binary_cross_entropy_with_logits(input, target, reduction='none') p = sigmoid(input) - p = where(target >= 0.5, p, 1-p) + p = where(target >= 0.5, p, 1 - p) - modulating_factor = (1 - p)**self.gamma + modulating_factor = (1 - p) ** self.gamma alpha = self.alpha * target + (1 - self.alpha) * (1 - target) focal_loss = alpha * modulating_factor * bce_loss @@ -1388,13 +1387,12 @@ def test_minimizer_pytorch_multi_label_binary(): optimizer = optim.RMSprop(orig_model.parameters(), lr=0.01) model = PyTorchClassifier(model=orig_model, - output_type=ModelOutputType.CLASSIFIER_MULTI_OUTPUT_BINARY_LOGITS, - loss=criterion, - optimizer=optimizer, - input_shape=(24,), - nb_classes=3) - model.fit(PytorchData(x_train, y_train), save_entire_model=False, - nb_epochs=10) + output_type=ModelOutputType.CLASSIFIER_MULTI_OUTPUT_BINARY_LOGITS, + loss=criterion, + optimizer=optimizer, + input_shape=(24,), + nb_classes=3) + model.fit(PytorchData(x_train, y_train), save_entire_model=False, nb_epochs=10) predictions = model.predict(PytorchData(x_train, y_train)) predictions = expit(predictions) predictions[predictions < 0.5] = 0 diff --git a/tests/test_pytorch.py b/tests/test_pytorch.py index 05cf8bc..cf1dc91 100644 --- a/tests/test_pytorch.py +++ b/tests/test_pytorch.py @@ -105,9 +105,9 @@ class FocalLoss(nn.Module): bce_loss = functional.binary_cross_entropy_with_logits(input, target, reduction='none') p = sigmoid(input) - p = where(target >= 0.5, p, 1-p) + p = where(target >= 0.5, p, 1 - p) - modulating_factor = (1 - p)**self.gamma + modulating_factor = (1 - p) ** self.gamma alpha = self.alpha * target + (1 - self.alpha) * (1 - target) focal_loss = alpha * modulating_factor * bce_loss @@ -161,11 +161,11 @@ def test_pytorch_nursery_save_entire_model(): optimizer = optim.Adam(inner_model.parameters(), lr=0.01) model = PyTorchClassifier(model=inner_model, - output_type=ModelOutputType.CLASSIFIER_SINGLE_OUTPUT_CLASS_LOGITS, - loss=criterion, - optimizer=optimizer, - input_shape=(24,), - nb_classes=4) + output_type=ModelOutputType.CLASSIFIER_SINGLE_OUTPUT_CLASS_LOGITS, + loss=criterion, + optimizer=optimizer, + input_shape=(24,), + nb_classes=4) model.fit(PytorchData(x_train.astype(np.float32), y_train), save_entire_model=True, nb_epochs=10) score = model.score(PytorchData(x_test.astype(np.float32), y_test)) @@ -250,7 +250,6 @@ def test_pytorch_predictions_single_label_binary_prob(): assert (0 < score <= 1.0) - def test_pytorch_predictions_multi_label_cat(): # This kind of model requires special training and will not be supported using the 'fit' method. class multi_label_cat_model(nn.Module): @@ -300,7 +299,7 @@ def test_pytorch_predictions_multi_label_cat(): # Form the loss function loss = 0 for i, o in enumerate(model_outputs): - t = targets[:, i*num_classes:(i+1)*num_classes] + t = targets[:, i * num_classes:(i + 1) * num_classes] loss += criterion(o, t) loss.backward() @@ -308,11 +307,11 @@ def test_pytorch_predictions_multi_label_cat(): optimizer.step() model = PyTorchClassifier(model=inner_model, - output_type=ModelOutputType.CLASSIFIER_MULTI_OUTPUT_CLASS_LOGITS, - loss=criterion, - optimizer=optimizer, - input_shape=(24,), - nb_classes=3) + output_type=ModelOutputType.CLASSIFIER_MULTI_OUTPUT_CLASS_LOGITS, + loss=criterion, + optimizer=optimizer, + input_shape=(24,), + nb_classes=3) pred = model.predict(test) assert (pred.shape[0] == x_test.shape[0]) @@ -349,13 +348,13 @@ def test_pytorch_predictions_multi_label_binary(): optimizer = optim.RMSprop(inner_model.parameters(), lr=0.01) model = PyTorchClassifier(model=inner_model, - output_type=ModelOutputType.CLASSIFIER_MULTI_OUTPUT_BINARY_LOGITS, - loss=criterion, - optimizer=optimizer, - input_shape=(24,), - nb_classes=3) + output_type=ModelOutputType.CLASSIFIER_MULTI_OUTPUT_BINARY_LOGITS, + loss=criterion, + optimizer=optimizer, + input_shape=(24,), + nb_classes=3) model.fit(PytorchData(x_train.astype(np.float32), y_train.astype(np.float32)), save_entire_model=False, - nb_epochs=10) + nb_epochs=10) pred = model.predict(test) assert (pred.shape[0] == x_test.shape[0])