Formatting

Signed-off-by: abigailt <abigailt@il.ibm.com>
This commit is contained in:
abigailt 2024-05-02 17:04:34 +03:00
parent 0f5a1bcaa0
commit a4816878f9
6 changed files with 68 additions and 88 deletions

View file

@ -93,7 +93,7 @@ class GeneralizeToRepresentative(BaseEstimator, MetaEstimatorMixin, TransformerM
if is_regression:
self.estimator = SklearnRegressor(estimator)
else:
#TODO: maybe we should get model output type from user in this case
# TODO: maybe we should get model output type from user in this case
self.estimator = SklearnClassifier(estimator,
ModelOutputType.CLASSIFIER_SINGLE_OUTPUT_CLASS_PROBABILITIES)
self.target_accuracy = target_accuracy
@ -813,15 +813,12 @@ class GeneralizeToRepresentative(BaseEstimator, MetaEstimatorMixin, TransformerM
def _calculate_level_cell_label(self, left_cell, right_cell, new_cell):
new_cell['hist'] = left_cell['hist'] + right_cell['hist']
# [x + y for x, y in
# zip(left_cell['hist'], right_cell['hist'])] if not self.is_regression else []
if isinstance(self._dt.classes_, list):
new_cell['label'] = [self._dt.classes_[output][class_index]
for output, class_index in enumerate(np.argmax(new_cell['hist'], axis=1))]
else:
new_cell['label'] = [self._dt.classes_[np.argmax(new_cell['hist'][0])]]
def _get_nodes_level(self, level):
# level = distance from lowest leaf
node_depth = np.zeros(shape=self._dt.tree_.node_count, dtype=np.int64)

View file

@ -4,7 +4,7 @@ from enum import Enum, auto
import numpy as np
from scipy.special import expit
from apt.utils.datasets import Dataset, Data, DatasetWithPredictions, array2numpy, OUTPUT_DATA_ARRAY_TYPE
from apt.utils.datasets import Dataset, Data, array2numpy, OUTPUT_DATA_ARRAY_TYPE
from art.estimators.classification import BlackBoxClassifier
from art.utils import check_and_transform_label_format
@ -43,40 +43,40 @@ def is_one_hot(y: OUTPUT_DATA_ARRAY_TYPE) -> bool:
def is_multi_label(output_type: ModelOutputType) -> bool:
return (output_type == ModelOutputType.CLASSIFIER_MULTI_OUTPUT_CATEGORICAL or
output_type == ModelOutputType.CLASSIFIER_MULTI_OUTPUT_BINARY_PROBABILITIES or
output_type == ModelOutputType.CLASSIFIER_MULTI_OUTPUT_CLASS_PROBABILITIES or
output_type == ModelOutputType.CLASSIFIER_MULTI_OUTPUT_BINARY_LOGITS or
output_type == ModelOutputType.CLASSIFIER_MULTI_OUTPUT_CLASS_LOGITS)
return (output_type == ModelOutputType.CLASSIFIER_MULTI_OUTPUT_CATEGORICAL
or output_type == ModelOutputType.CLASSIFIER_MULTI_OUTPUT_BINARY_PROBABILITIES
or output_type == ModelOutputType.CLASSIFIER_MULTI_OUTPUT_CLASS_PROBABILITIES
or output_type == ModelOutputType.CLASSIFIER_MULTI_OUTPUT_BINARY_LOGITS
or output_type == ModelOutputType.CLASSIFIER_MULTI_OUTPUT_CLASS_LOGITS)
def is_multi_label_binary(output_type: ModelOutputType) -> bool:
return (output_type == ModelOutputType.CLASSIFIER_MULTI_OUTPUT_BINARY_PROBABILITIES or
output_type == ModelOutputType.CLASSIFIER_MULTI_OUTPUT_BINARY_LOGITS)
return (output_type == ModelOutputType.CLASSIFIER_MULTI_OUTPUT_BINARY_PROBABILITIES
or output_type == ModelOutputType.CLASSIFIER_MULTI_OUTPUT_BINARY_LOGITS)
def is_binary(output_type: ModelOutputType) -> bool:
return (output_type == ModelOutputType.CLASSIFIER_MULTI_OUTPUT_BINARY_PROBABILITIES or
output_type == ModelOutputType.CLASSIFIER_MULTI_OUTPUT_BINARY_LOGITS or
output_type == ModelOutputType.CLASSIFIER_SINGLE_OUTPUT_BINARY_PROBABILITIES or
output_type == ModelOutputType.CLASSIFIER_SINGLE_OUTPUT_BINARY_LOGITS)
return (output_type == ModelOutputType.CLASSIFIER_MULTI_OUTPUT_BINARY_PROBABILITIES
or output_type == ModelOutputType.CLASSIFIER_MULTI_OUTPUT_BINARY_LOGITS
or output_type == ModelOutputType.CLASSIFIER_SINGLE_OUTPUT_BINARY_PROBABILITIES
or output_type == ModelOutputType.CLASSIFIER_SINGLE_OUTPUT_BINARY_LOGITS)
def is_categorical(output_type: ModelOutputType) -> bool:
return (output_type == ModelOutputType.CLASSIFIER_SINGLE_OUTPUT_CATEGORICAL or
output_type == ModelOutputType.CLASSIFIER_MULTI_OUTPUT_CATEGORICAL)
return (output_type == ModelOutputType.CLASSIFIER_SINGLE_OUTPUT_CATEGORICAL
or output_type == ModelOutputType.CLASSIFIER_MULTI_OUTPUT_CATEGORICAL)
def is_probabilities(output_type: ModelOutputType) -> bool:
return (output_type == ModelOutputType.CLASSIFIER_SINGLE_OUTPUT_CLASS_PROBABILITIES or
output_type == ModelOutputType.CLASSIFIER_MULTI_OUTPUT_CLASS_PROBABILITIES)
return (output_type == ModelOutputType.CLASSIFIER_SINGLE_OUTPUT_CLASS_PROBABILITIES
or output_type == ModelOutputType.CLASSIFIER_MULTI_OUTPUT_CLASS_PROBABILITIES)
def is_logits(output_type: ModelOutputType) -> bool:
return (output_type == ModelOutputType.CLASSIFIER_SINGLE_OUTPUT_CLASS_LOGITS or
output_type == ModelOutputType.CLASSIFIER_MULTI_OUTPUT_CLASS_LOGITS or
output_type == ModelOutputType.CLASSIFIER_SINGLE_OUTPUT_BINARY_LOGITS or
output_type == ModelOutputType.CLASSIFIER_MULTI_OUTPUT_BINARY_LOGITS)
return (output_type == ModelOutputType.CLASSIFIER_SINGLE_OUTPUT_CLASS_LOGITS
or output_type == ModelOutputType.CLASSIFIER_MULTI_OUTPUT_CLASS_LOGITS
or output_type == ModelOutputType.CLASSIFIER_SINGLE_OUTPUT_BINARY_LOGITS
or output_type == ModelOutputType.CLASSIFIER_MULTI_OUTPUT_BINARY_LOGITS)
def get_nb_classes(y: OUTPUT_DATA_ARRAY_TYPE, output_type: ModelOutputType) -> int:
@ -114,10 +114,10 @@ def check_correct_model_output(y: OUTPUT_DATA_ARRAY_TYPE, output_type: ModelOutp
:type output_type: ModelOutputType
:raises: ValueError (in case of mismatch)
"""
if not is_one_hot(y) and (output_type == ModelOutputType.CLASSIFIER_SINGLE_OUTPUT_CLASS_PROBABILITIES or
output_type == ModelOutputType.CLASSIFIER_SINGLE_OUTPUT_CLASS_LOGITS):
raise ValueError("Incompatible model output types. Model outputs 1D array of categorical scalars while "
"output type is set to ", output_type)
if not is_one_hot(y) and (output_type == ModelOutputType.CLASSIFIER_SINGLE_OUTPUT_CLASS_PROBABILITIES
or output_type == ModelOutputType.CLASSIFIER_SINGLE_OUTPUT_CLASS_LOGITS):
raise ValueError("Incompatible model output types. Model outputs 1D array of categorical scalars while "
"output type is set to ", output_type)
class Model(metaclass=ABCMeta):
@ -209,13 +209,13 @@ class Model(metaclass=ABCMeta):
if scoring_method == ScoringMethod.ACCURACY:
if not is_multi_label(self.output_type) and not is_binary(self.output_type) and nb_classes is not None:
y = check_and_transform_label_format(y, nb_classes=nb_classes)
if (self.output_type == ModelOutputType.CLASSIFIER_SINGLE_OUTPUT_CLASS_PROBABILITIES or
self.output_type == ModelOutputType.CLASSIFIER_SINGLE_OUTPUT_CLASS_LOGITS or
self.output_type == ModelOutputType.CLASSIFIER_SINGLE_OUTPUT_CATEGORICAL):
if (self.output_type == ModelOutputType.CLASSIFIER_SINGLE_OUTPUT_CLASS_PROBABILITIES
or self.output_type == ModelOutputType.CLASSIFIER_SINGLE_OUTPUT_CLASS_LOGITS
or self.output_type == ModelOutputType.CLASSIFIER_SINGLE_OUTPUT_CATEGORICAL):
# categorical has been 1-hot encoded by check_and_transform_label_format
return np.count_nonzero(np.argmax(y, axis=1) == np.argmax(predicted, axis=1)) / predicted.shape[0]
elif (self.output_type == ModelOutputType.CLASSIFIER_MULTI_OUTPUT_CLASS_LOGITS or
self.output_type == ModelOutputType.CLASSIFIER_SINGLE_OUTPUT_CLASS_PROBABILITIES):
elif (self.output_type == ModelOutputType.CLASSIFIER_MULTI_OUTPUT_CLASS_LOGITS
or self.output_type == ModelOutputType.CLASSIFIER_SINGLE_OUTPUT_CLASS_PROBABILITIES):
if predicted.shape != y.shape:
raise ValueError('Do not know how to compare arrays with different shapes')
elif len(predicted.shape) < 3:
@ -372,7 +372,7 @@ class BlackboxClassifier(Model):
"""
Score the model using test data.
"""
kwargs ['nb_classes'] = self.nb_classes
kwargs['nb_classes'] = self.nb_classes
return super().score(test_data, **kwargs)
def fit(self, train_data: Dataset, **kwargs) -> None:

View file

@ -3,7 +3,7 @@ import os
import shutil
import logging
from typing import Optional, Tuple, Union, List
from typing import Optional, Tuple, Union, List, TYPE_CHECKING
import numpy as np
import torch
from torch.utils.data import DataLoader, TensorDataset
@ -14,6 +14,11 @@ from apt.utils.models import Model, ModelOutputType, is_multi_label, is_multi_la
from apt.utils.datasets import OUTPUT_DATA_ARRAY_TYPE, array2numpy
from art.estimators.classification.pytorch import PyTorchClassifier as ArtPyTorchClassifier
if TYPE_CHECKING:
from art.utils import CLIP_VALUES_TYPE, PREPROCESSING_TYPE
from art.defences.preprocessor import Preprocessor
from art.defences.postprocessor import Postprocessor
logger = logging.getLogger(__name__)
@ -51,8 +56,8 @@ class PyTorchClassifierWrapper(ArtPyTorchClassifier):
super().__init__(model, loss, input_shape, nb_classes, optimizer, use_amp, opt_level, loss_scale,
channels_first, clip_values, preprocessing_defences, postprocessing_defences, preprocessing,
device_type)
self._is_single_binary = (output_type == ModelOutputType.CLASSIFIER_SINGLE_OUTPUT_BINARY_PROBABILITIES or
output_type == ModelOutputType.CLASSIFIER_SINGLE_OUTPUT_BINARY_LOGITS)
self._is_single_binary = (output_type == ModelOutputType.CLASSIFIER_SINGLE_OUTPUT_BINARY_PROBABILITIES
or output_type == ModelOutputType.CLASSIFIER_SINGLE_OUTPUT_BINARY_LOGITS)
self._is_multi_label = is_multi_label(output_type)
self._is_multi_label_binary = is_multi_label_binary(output_type)
@ -504,29 +509,10 @@ class PyTorchClassifier(PyTorchModel):
:return: the score as float (between 0 and 1)
"""
# numpy arrays
y = test_data.get_labels()
predicted = self.predict(test_data)
kwargs['predictions'] = DatasetWithPredictions(pred=predicted)
kwargs['nb_classes'] = self._nb_classes
return super().score(ArrayDataset(test_data.get_samples(), test_data.get_labels()), **kwargs)
# if apply_non_linearity:
# predicted = apply_non_linearity(predicted)
# # binary classification, single column of probabilities
# if self._art_model.nb_classes == 2 and (len(predicted.shape) == 1 or predicted.shape[1] == 1):
# if len(predicted.shape) > 1:
# y = check_and_transform_label_format(y, self._art_model.nb_classes, return_one_hot=False)
# return np.count_nonzero(y == (predicted > binary_threshold)) / predicted.shape[0]
# # multi column
# else:
# if not is_multi_label(y):
# y = check_and_transform_label_format(y, self._art_model.nb_classes)
# return np.count_nonzero(np.argmax(y, axis=1) == np.argmax(predicted, axis=1)) / predicted.shape[0]
# else:
# if is_multi_label_binary(y):
# predicted[predicted < binary_threshold] = 0
# predicted[predicted >= binary_threshold] = 1
# return np.count_nonzero(y == predicted) / (predicted.shape[0] * y.shape[1])
def load_checkpoint_state_dict_by_path(self, model_name: str, path: str = None):
"""

View file

@ -218,9 +218,9 @@ def test_anonymize_pytorch_multi_label_binary():
bce_loss = functional.binary_cross_entropy_with_logits(input, target, reduction='none')
p = sigmoid(input)
p = where(target >= 0.5, p, 1-p)
p = where(target >= 0.5, p, 1 - p)
modulating_factor = (1 - p)**self.gamma
modulating_factor = (1 - p) ** self.gamma
alpha = self.alpha * target + (1 - self.alpha) * (1 - target)
focal_loss = alpha * modulating_factor * bce_loss

View file

@ -20,7 +20,6 @@ from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Input
from apt.utils.datasets.datasets import PytorchData
from apt.utils.models import ModelOutputType
from apt.utils.models.pytorch_model import PyTorchClassifier
from apt.minimization import GeneralizeToRepresentative
from apt.utils.dataset_utils import get_iris_dataset_np, get_adult_dataset_pd, get_german_credit_dataset_pd
@ -1365,9 +1364,9 @@ def test_minimizer_pytorch_multi_label_binary():
bce_loss = functional.binary_cross_entropy_with_logits(input, target, reduction='none')
p = sigmoid(input)
p = where(target >= 0.5, p, 1-p)
p = where(target >= 0.5, p, 1 - p)
modulating_factor = (1 - p)**self.gamma
modulating_factor = (1 - p) ** self.gamma
alpha = self.alpha * target + (1 - self.alpha) * (1 - target)
focal_loss = alpha * modulating_factor * bce_loss
@ -1388,13 +1387,12 @@ def test_minimizer_pytorch_multi_label_binary():
optimizer = optim.RMSprop(orig_model.parameters(), lr=0.01)
model = PyTorchClassifier(model=orig_model,
output_type=ModelOutputType.CLASSIFIER_MULTI_OUTPUT_BINARY_LOGITS,
loss=criterion,
optimizer=optimizer,
input_shape=(24,),
nb_classes=3)
model.fit(PytorchData(x_train, y_train), save_entire_model=False,
nb_epochs=10)
output_type=ModelOutputType.CLASSIFIER_MULTI_OUTPUT_BINARY_LOGITS,
loss=criterion,
optimizer=optimizer,
input_shape=(24,),
nb_classes=3)
model.fit(PytorchData(x_train, y_train), save_entire_model=False, nb_epochs=10)
predictions = model.predict(PytorchData(x_train, y_train))
predictions = expit(predictions)
predictions[predictions < 0.5] = 0

View file

@ -105,9 +105,9 @@ class FocalLoss(nn.Module):
bce_loss = functional.binary_cross_entropy_with_logits(input, target, reduction='none')
p = sigmoid(input)
p = where(target >= 0.5, p, 1-p)
p = where(target >= 0.5, p, 1 - p)
modulating_factor = (1 - p)**self.gamma
modulating_factor = (1 - p) ** self.gamma
alpha = self.alpha * target + (1 - self.alpha) * (1 - target)
focal_loss = alpha * modulating_factor * bce_loss
@ -161,11 +161,11 @@ def test_pytorch_nursery_save_entire_model():
optimizer = optim.Adam(inner_model.parameters(), lr=0.01)
model = PyTorchClassifier(model=inner_model,
output_type=ModelOutputType.CLASSIFIER_SINGLE_OUTPUT_CLASS_LOGITS,
loss=criterion,
optimizer=optimizer,
input_shape=(24,),
nb_classes=4)
output_type=ModelOutputType.CLASSIFIER_SINGLE_OUTPUT_CLASS_LOGITS,
loss=criterion,
optimizer=optimizer,
input_shape=(24,),
nb_classes=4)
model.fit(PytorchData(x_train.astype(np.float32), y_train), save_entire_model=True, nb_epochs=10)
score = model.score(PytorchData(x_test.astype(np.float32), y_test))
@ -250,7 +250,6 @@ def test_pytorch_predictions_single_label_binary_prob():
assert (0 < score <= 1.0)
def test_pytorch_predictions_multi_label_cat():
# This kind of model requires special training and will not be supported using the 'fit' method.
class multi_label_cat_model(nn.Module):
@ -300,7 +299,7 @@ def test_pytorch_predictions_multi_label_cat():
# Form the loss function
loss = 0
for i, o in enumerate(model_outputs):
t = targets[:, i*num_classes:(i+1)*num_classes]
t = targets[:, i * num_classes:(i + 1) * num_classes]
loss += criterion(o, t)
loss.backward()
@ -308,11 +307,11 @@ def test_pytorch_predictions_multi_label_cat():
optimizer.step()
model = PyTorchClassifier(model=inner_model,
output_type=ModelOutputType.CLASSIFIER_MULTI_OUTPUT_CLASS_LOGITS,
loss=criterion,
optimizer=optimizer,
input_shape=(24,),
nb_classes=3)
output_type=ModelOutputType.CLASSIFIER_MULTI_OUTPUT_CLASS_LOGITS,
loss=criterion,
optimizer=optimizer,
input_shape=(24,),
nb_classes=3)
pred = model.predict(test)
assert (pred.shape[0] == x_test.shape[0])
@ -349,13 +348,13 @@ def test_pytorch_predictions_multi_label_binary():
optimizer = optim.RMSprop(inner_model.parameters(), lr=0.01)
model = PyTorchClassifier(model=inner_model,
output_type=ModelOutputType.CLASSIFIER_MULTI_OUTPUT_BINARY_LOGITS,
loss=criterion,
optimizer=optimizer,
input_shape=(24,),
nb_classes=3)
output_type=ModelOutputType.CLASSIFIER_MULTI_OUTPUT_BINARY_LOGITS,
loss=criterion,
optimizer=optimizer,
input_shape=(24,),
nb_classes=3)
model.fit(PytorchData(x_train.astype(np.float32), y_train.astype(np.float32)), save_entire_model=False,
nb_epochs=10)
nb_epochs=10)
pred = model.predict(test)
assert (pred.shape[0] == x_test.shape[0])