mirror of
https://github.com/IBM/ai-privacy-toolkit.git
synced 2026-06-08 15:05:13 +02:00
Formatting
Signed-off-by: abigailt <abigailt@il.ibm.com>
This commit is contained in:
parent
0f5a1bcaa0
commit
a4816878f9
6 changed files with 68 additions and 88 deletions
|
|
@ -93,7 +93,7 @@ class GeneralizeToRepresentative(BaseEstimator, MetaEstimatorMixin, TransformerM
|
|||
if is_regression:
|
||||
self.estimator = SklearnRegressor(estimator)
|
||||
else:
|
||||
#TODO: maybe we should get model output type from user in this case
|
||||
# TODO: maybe we should get model output type from user in this case
|
||||
self.estimator = SklearnClassifier(estimator,
|
||||
ModelOutputType.CLASSIFIER_SINGLE_OUTPUT_CLASS_PROBABILITIES)
|
||||
self.target_accuracy = target_accuracy
|
||||
|
|
@ -813,15 +813,12 @@ class GeneralizeToRepresentative(BaseEstimator, MetaEstimatorMixin, TransformerM
|
|||
|
||||
def _calculate_level_cell_label(self, left_cell, right_cell, new_cell):
|
||||
new_cell['hist'] = left_cell['hist'] + right_cell['hist']
|
||||
# [x + y for x, y in
|
||||
# zip(left_cell['hist'], right_cell['hist'])] if not self.is_regression else []
|
||||
if isinstance(self._dt.classes_, list):
|
||||
new_cell['label'] = [self._dt.classes_[output][class_index]
|
||||
for output, class_index in enumerate(np.argmax(new_cell['hist'], axis=1))]
|
||||
else:
|
||||
new_cell['label'] = [self._dt.classes_[np.argmax(new_cell['hist'][0])]]
|
||||
|
||||
|
||||
def _get_nodes_level(self, level):
|
||||
# level = distance from lowest leaf
|
||||
node_depth = np.zeros(shape=self._dt.tree_.node_count, dtype=np.int64)
|
||||
|
|
|
|||
|
|
@ -4,7 +4,7 @@ from enum import Enum, auto
|
|||
import numpy as np
|
||||
from scipy.special import expit
|
||||
|
||||
from apt.utils.datasets import Dataset, Data, DatasetWithPredictions, array2numpy, OUTPUT_DATA_ARRAY_TYPE
|
||||
from apt.utils.datasets import Dataset, Data, array2numpy, OUTPUT_DATA_ARRAY_TYPE
|
||||
from art.estimators.classification import BlackBoxClassifier
|
||||
from art.utils import check_and_transform_label_format
|
||||
|
||||
|
|
@ -43,40 +43,40 @@ def is_one_hot(y: OUTPUT_DATA_ARRAY_TYPE) -> bool:
|
|||
|
||||
|
||||
def is_multi_label(output_type: ModelOutputType) -> bool:
|
||||
return (output_type == ModelOutputType.CLASSIFIER_MULTI_OUTPUT_CATEGORICAL or
|
||||
output_type == ModelOutputType.CLASSIFIER_MULTI_OUTPUT_BINARY_PROBABILITIES or
|
||||
output_type == ModelOutputType.CLASSIFIER_MULTI_OUTPUT_CLASS_PROBABILITIES or
|
||||
output_type == ModelOutputType.CLASSIFIER_MULTI_OUTPUT_BINARY_LOGITS or
|
||||
output_type == ModelOutputType.CLASSIFIER_MULTI_OUTPUT_CLASS_LOGITS)
|
||||
return (output_type == ModelOutputType.CLASSIFIER_MULTI_OUTPUT_CATEGORICAL
|
||||
or output_type == ModelOutputType.CLASSIFIER_MULTI_OUTPUT_BINARY_PROBABILITIES
|
||||
or output_type == ModelOutputType.CLASSIFIER_MULTI_OUTPUT_CLASS_PROBABILITIES
|
||||
or output_type == ModelOutputType.CLASSIFIER_MULTI_OUTPUT_BINARY_LOGITS
|
||||
or output_type == ModelOutputType.CLASSIFIER_MULTI_OUTPUT_CLASS_LOGITS)
|
||||
|
||||
|
||||
def is_multi_label_binary(output_type: ModelOutputType) -> bool:
|
||||
return (output_type == ModelOutputType.CLASSIFIER_MULTI_OUTPUT_BINARY_PROBABILITIES or
|
||||
output_type == ModelOutputType.CLASSIFIER_MULTI_OUTPUT_BINARY_LOGITS)
|
||||
return (output_type == ModelOutputType.CLASSIFIER_MULTI_OUTPUT_BINARY_PROBABILITIES
|
||||
or output_type == ModelOutputType.CLASSIFIER_MULTI_OUTPUT_BINARY_LOGITS)
|
||||
|
||||
|
||||
def is_binary(output_type: ModelOutputType) -> bool:
|
||||
return (output_type == ModelOutputType.CLASSIFIER_MULTI_OUTPUT_BINARY_PROBABILITIES or
|
||||
output_type == ModelOutputType.CLASSIFIER_MULTI_OUTPUT_BINARY_LOGITS or
|
||||
output_type == ModelOutputType.CLASSIFIER_SINGLE_OUTPUT_BINARY_PROBABILITIES or
|
||||
output_type == ModelOutputType.CLASSIFIER_SINGLE_OUTPUT_BINARY_LOGITS)
|
||||
return (output_type == ModelOutputType.CLASSIFIER_MULTI_OUTPUT_BINARY_PROBABILITIES
|
||||
or output_type == ModelOutputType.CLASSIFIER_MULTI_OUTPUT_BINARY_LOGITS
|
||||
or output_type == ModelOutputType.CLASSIFIER_SINGLE_OUTPUT_BINARY_PROBABILITIES
|
||||
or output_type == ModelOutputType.CLASSIFIER_SINGLE_OUTPUT_BINARY_LOGITS)
|
||||
|
||||
|
||||
def is_categorical(output_type: ModelOutputType) -> bool:
|
||||
return (output_type == ModelOutputType.CLASSIFIER_SINGLE_OUTPUT_CATEGORICAL or
|
||||
output_type == ModelOutputType.CLASSIFIER_MULTI_OUTPUT_CATEGORICAL)
|
||||
return (output_type == ModelOutputType.CLASSIFIER_SINGLE_OUTPUT_CATEGORICAL
|
||||
or output_type == ModelOutputType.CLASSIFIER_MULTI_OUTPUT_CATEGORICAL)
|
||||
|
||||
|
||||
def is_probabilities(output_type: ModelOutputType) -> bool:
|
||||
return (output_type == ModelOutputType.CLASSIFIER_SINGLE_OUTPUT_CLASS_PROBABILITIES or
|
||||
output_type == ModelOutputType.CLASSIFIER_MULTI_OUTPUT_CLASS_PROBABILITIES)
|
||||
return (output_type == ModelOutputType.CLASSIFIER_SINGLE_OUTPUT_CLASS_PROBABILITIES
|
||||
or output_type == ModelOutputType.CLASSIFIER_MULTI_OUTPUT_CLASS_PROBABILITIES)
|
||||
|
||||
|
||||
def is_logits(output_type: ModelOutputType) -> bool:
|
||||
return (output_type == ModelOutputType.CLASSIFIER_SINGLE_OUTPUT_CLASS_LOGITS or
|
||||
output_type == ModelOutputType.CLASSIFIER_MULTI_OUTPUT_CLASS_LOGITS or
|
||||
output_type == ModelOutputType.CLASSIFIER_SINGLE_OUTPUT_BINARY_LOGITS or
|
||||
output_type == ModelOutputType.CLASSIFIER_MULTI_OUTPUT_BINARY_LOGITS)
|
||||
return (output_type == ModelOutputType.CLASSIFIER_SINGLE_OUTPUT_CLASS_LOGITS
|
||||
or output_type == ModelOutputType.CLASSIFIER_MULTI_OUTPUT_CLASS_LOGITS
|
||||
or output_type == ModelOutputType.CLASSIFIER_SINGLE_OUTPUT_BINARY_LOGITS
|
||||
or output_type == ModelOutputType.CLASSIFIER_MULTI_OUTPUT_BINARY_LOGITS)
|
||||
|
||||
|
||||
def get_nb_classes(y: OUTPUT_DATA_ARRAY_TYPE, output_type: ModelOutputType) -> int:
|
||||
|
|
@ -114,10 +114,10 @@ def check_correct_model_output(y: OUTPUT_DATA_ARRAY_TYPE, output_type: ModelOutp
|
|||
:type output_type: ModelOutputType
|
||||
:raises: ValueError (in case of mismatch)
|
||||
"""
|
||||
if not is_one_hot(y) and (output_type == ModelOutputType.CLASSIFIER_SINGLE_OUTPUT_CLASS_PROBABILITIES or
|
||||
output_type == ModelOutputType.CLASSIFIER_SINGLE_OUTPUT_CLASS_LOGITS):
|
||||
raise ValueError("Incompatible model output types. Model outputs 1D array of categorical scalars while "
|
||||
"output type is set to ", output_type)
|
||||
if not is_one_hot(y) and (output_type == ModelOutputType.CLASSIFIER_SINGLE_OUTPUT_CLASS_PROBABILITIES
|
||||
or output_type == ModelOutputType.CLASSIFIER_SINGLE_OUTPUT_CLASS_LOGITS):
|
||||
raise ValueError("Incompatible model output types. Model outputs 1D array of categorical scalars while "
|
||||
"output type is set to ", output_type)
|
||||
|
||||
|
||||
class Model(metaclass=ABCMeta):
|
||||
|
|
@ -209,13 +209,13 @@ class Model(metaclass=ABCMeta):
|
|||
if scoring_method == ScoringMethod.ACCURACY:
|
||||
if not is_multi_label(self.output_type) and not is_binary(self.output_type) and nb_classes is not None:
|
||||
y = check_and_transform_label_format(y, nb_classes=nb_classes)
|
||||
if (self.output_type == ModelOutputType.CLASSIFIER_SINGLE_OUTPUT_CLASS_PROBABILITIES or
|
||||
self.output_type == ModelOutputType.CLASSIFIER_SINGLE_OUTPUT_CLASS_LOGITS or
|
||||
self.output_type == ModelOutputType.CLASSIFIER_SINGLE_OUTPUT_CATEGORICAL):
|
||||
if (self.output_type == ModelOutputType.CLASSIFIER_SINGLE_OUTPUT_CLASS_PROBABILITIES
|
||||
or self.output_type == ModelOutputType.CLASSIFIER_SINGLE_OUTPUT_CLASS_LOGITS
|
||||
or self.output_type == ModelOutputType.CLASSIFIER_SINGLE_OUTPUT_CATEGORICAL):
|
||||
# categorical has been 1-hot encoded by check_and_transform_label_format
|
||||
return np.count_nonzero(np.argmax(y, axis=1) == np.argmax(predicted, axis=1)) / predicted.shape[0]
|
||||
elif (self.output_type == ModelOutputType.CLASSIFIER_MULTI_OUTPUT_CLASS_LOGITS or
|
||||
self.output_type == ModelOutputType.CLASSIFIER_SINGLE_OUTPUT_CLASS_PROBABILITIES):
|
||||
elif (self.output_type == ModelOutputType.CLASSIFIER_MULTI_OUTPUT_CLASS_LOGITS
|
||||
or self.output_type == ModelOutputType.CLASSIFIER_SINGLE_OUTPUT_CLASS_PROBABILITIES):
|
||||
if predicted.shape != y.shape:
|
||||
raise ValueError('Do not know how to compare arrays with different shapes')
|
||||
elif len(predicted.shape) < 3:
|
||||
|
|
@ -372,7 +372,7 @@ class BlackboxClassifier(Model):
|
|||
"""
|
||||
Score the model using test data.
|
||||
"""
|
||||
kwargs ['nb_classes'] = self.nb_classes
|
||||
kwargs['nb_classes'] = self.nb_classes
|
||||
return super().score(test_data, **kwargs)
|
||||
|
||||
def fit(self, train_data: Dataset, **kwargs) -> None:
|
||||
|
|
|
|||
|
|
@ -3,7 +3,7 @@ import os
|
|||
import shutil
|
||||
import logging
|
||||
|
||||
from typing import Optional, Tuple, Union, List
|
||||
from typing import Optional, Tuple, Union, List, TYPE_CHECKING
|
||||
import numpy as np
|
||||
import torch
|
||||
from torch.utils.data import DataLoader, TensorDataset
|
||||
|
|
@ -14,6 +14,11 @@ from apt.utils.models import Model, ModelOutputType, is_multi_label, is_multi_la
|
|||
from apt.utils.datasets import OUTPUT_DATA_ARRAY_TYPE, array2numpy
|
||||
from art.estimators.classification.pytorch import PyTorchClassifier as ArtPyTorchClassifier
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from art.utils import CLIP_VALUES_TYPE, PREPROCESSING_TYPE
|
||||
from art.defences.preprocessor import Preprocessor
|
||||
from art.defences.postprocessor import Postprocessor
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
|
@ -51,8 +56,8 @@ class PyTorchClassifierWrapper(ArtPyTorchClassifier):
|
|||
super().__init__(model, loss, input_shape, nb_classes, optimizer, use_amp, opt_level, loss_scale,
|
||||
channels_first, clip_values, preprocessing_defences, postprocessing_defences, preprocessing,
|
||||
device_type)
|
||||
self._is_single_binary = (output_type == ModelOutputType.CLASSIFIER_SINGLE_OUTPUT_BINARY_PROBABILITIES or
|
||||
output_type == ModelOutputType.CLASSIFIER_SINGLE_OUTPUT_BINARY_LOGITS)
|
||||
self._is_single_binary = (output_type == ModelOutputType.CLASSIFIER_SINGLE_OUTPUT_BINARY_PROBABILITIES
|
||||
or output_type == ModelOutputType.CLASSIFIER_SINGLE_OUTPUT_BINARY_LOGITS)
|
||||
self._is_multi_label = is_multi_label(output_type)
|
||||
self._is_multi_label_binary = is_multi_label_binary(output_type)
|
||||
|
||||
|
|
@ -504,29 +509,10 @@ class PyTorchClassifier(PyTorchModel):
|
|||
:return: the score as float (between 0 and 1)
|
||||
"""
|
||||
# numpy arrays
|
||||
y = test_data.get_labels()
|
||||
predicted = self.predict(test_data)
|
||||
kwargs['predictions'] = DatasetWithPredictions(pred=predicted)
|
||||
kwargs['nb_classes'] = self._nb_classes
|
||||
return super().score(ArrayDataset(test_data.get_samples(), test_data.get_labels()), **kwargs)
|
||||
# if apply_non_linearity:
|
||||
# predicted = apply_non_linearity(predicted)
|
||||
# # binary classification, single column of probabilities
|
||||
# if self._art_model.nb_classes == 2 and (len(predicted.shape) == 1 or predicted.shape[1] == 1):
|
||||
# if len(predicted.shape) > 1:
|
||||
# y = check_and_transform_label_format(y, self._art_model.nb_classes, return_one_hot=False)
|
||||
# return np.count_nonzero(y == (predicted > binary_threshold)) / predicted.shape[0]
|
||||
# # multi column
|
||||
# else:
|
||||
# if not is_multi_label(y):
|
||||
# y = check_and_transform_label_format(y, self._art_model.nb_classes)
|
||||
# return np.count_nonzero(np.argmax(y, axis=1) == np.argmax(predicted, axis=1)) / predicted.shape[0]
|
||||
# else:
|
||||
# if is_multi_label_binary(y):
|
||||
# predicted[predicted < binary_threshold] = 0
|
||||
# predicted[predicted >= binary_threshold] = 1
|
||||
# return np.count_nonzero(y == predicted) / (predicted.shape[0] * y.shape[1])
|
||||
|
||||
|
||||
def load_checkpoint_state_dict_by_path(self, model_name: str, path: str = None):
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -218,9 +218,9 @@ def test_anonymize_pytorch_multi_label_binary():
|
|||
bce_loss = functional.binary_cross_entropy_with_logits(input, target, reduction='none')
|
||||
|
||||
p = sigmoid(input)
|
||||
p = where(target >= 0.5, p, 1-p)
|
||||
p = where(target >= 0.5, p, 1 - p)
|
||||
|
||||
modulating_factor = (1 - p)**self.gamma
|
||||
modulating_factor = (1 - p) ** self.gamma
|
||||
alpha = self.alpha * target + (1 - self.alpha) * (1 - target)
|
||||
focal_loss = alpha * modulating_factor * bce_loss
|
||||
|
||||
|
|
|
|||
|
|
@ -20,7 +20,6 @@ from tensorflow.keras.models import Sequential
|
|||
from tensorflow.keras.layers import Dense, Input
|
||||
|
||||
from apt.utils.datasets.datasets import PytorchData
|
||||
from apt.utils.models import ModelOutputType
|
||||
from apt.utils.models.pytorch_model import PyTorchClassifier
|
||||
from apt.minimization import GeneralizeToRepresentative
|
||||
from apt.utils.dataset_utils import get_iris_dataset_np, get_adult_dataset_pd, get_german_credit_dataset_pd
|
||||
|
|
@ -1365,9 +1364,9 @@ def test_minimizer_pytorch_multi_label_binary():
|
|||
bce_loss = functional.binary_cross_entropy_with_logits(input, target, reduction='none')
|
||||
|
||||
p = sigmoid(input)
|
||||
p = where(target >= 0.5, p, 1-p)
|
||||
p = where(target >= 0.5, p, 1 - p)
|
||||
|
||||
modulating_factor = (1 - p)**self.gamma
|
||||
modulating_factor = (1 - p) ** self.gamma
|
||||
alpha = self.alpha * target + (1 - self.alpha) * (1 - target)
|
||||
focal_loss = alpha * modulating_factor * bce_loss
|
||||
|
||||
|
|
@ -1388,13 +1387,12 @@ def test_minimizer_pytorch_multi_label_binary():
|
|||
optimizer = optim.RMSprop(orig_model.parameters(), lr=0.01)
|
||||
|
||||
model = PyTorchClassifier(model=orig_model,
|
||||
output_type=ModelOutputType.CLASSIFIER_MULTI_OUTPUT_BINARY_LOGITS,
|
||||
loss=criterion,
|
||||
optimizer=optimizer,
|
||||
input_shape=(24,),
|
||||
nb_classes=3)
|
||||
model.fit(PytorchData(x_train, y_train), save_entire_model=False,
|
||||
nb_epochs=10)
|
||||
output_type=ModelOutputType.CLASSIFIER_MULTI_OUTPUT_BINARY_LOGITS,
|
||||
loss=criterion,
|
||||
optimizer=optimizer,
|
||||
input_shape=(24,),
|
||||
nb_classes=3)
|
||||
model.fit(PytorchData(x_train, y_train), save_entire_model=False, nb_epochs=10)
|
||||
predictions = model.predict(PytorchData(x_train, y_train))
|
||||
predictions = expit(predictions)
|
||||
predictions[predictions < 0.5] = 0
|
||||
|
|
|
|||
|
|
@ -105,9 +105,9 @@ class FocalLoss(nn.Module):
|
|||
bce_loss = functional.binary_cross_entropy_with_logits(input, target, reduction='none')
|
||||
|
||||
p = sigmoid(input)
|
||||
p = where(target >= 0.5, p, 1-p)
|
||||
p = where(target >= 0.5, p, 1 - p)
|
||||
|
||||
modulating_factor = (1 - p)**self.gamma
|
||||
modulating_factor = (1 - p) ** self.gamma
|
||||
alpha = self.alpha * target + (1 - self.alpha) * (1 - target)
|
||||
focal_loss = alpha * modulating_factor * bce_loss
|
||||
|
||||
|
|
@ -161,11 +161,11 @@ def test_pytorch_nursery_save_entire_model():
|
|||
optimizer = optim.Adam(inner_model.parameters(), lr=0.01)
|
||||
|
||||
model = PyTorchClassifier(model=inner_model,
|
||||
output_type=ModelOutputType.CLASSIFIER_SINGLE_OUTPUT_CLASS_LOGITS,
|
||||
loss=criterion,
|
||||
optimizer=optimizer,
|
||||
input_shape=(24,),
|
||||
nb_classes=4)
|
||||
output_type=ModelOutputType.CLASSIFIER_SINGLE_OUTPUT_CLASS_LOGITS,
|
||||
loss=criterion,
|
||||
optimizer=optimizer,
|
||||
input_shape=(24,),
|
||||
nb_classes=4)
|
||||
model.fit(PytorchData(x_train.astype(np.float32), y_train), save_entire_model=True, nb_epochs=10)
|
||||
|
||||
score = model.score(PytorchData(x_test.astype(np.float32), y_test))
|
||||
|
|
@ -250,7 +250,6 @@ def test_pytorch_predictions_single_label_binary_prob():
|
|||
assert (0 < score <= 1.0)
|
||||
|
||||
|
||||
|
||||
def test_pytorch_predictions_multi_label_cat():
|
||||
# This kind of model requires special training and will not be supported using the 'fit' method.
|
||||
class multi_label_cat_model(nn.Module):
|
||||
|
|
@ -300,7 +299,7 @@ def test_pytorch_predictions_multi_label_cat():
|
|||
# Form the loss function
|
||||
loss = 0
|
||||
for i, o in enumerate(model_outputs):
|
||||
t = targets[:, i*num_classes:(i+1)*num_classes]
|
||||
t = targets[:, i * num_classes:(i + 1) * num_classes]
|
||||
loss += criterion(o, t)
|
||||
|
||||
loss.backward()
|
||||
|
|
@ -308,11 +307,11 @@ def test_pytorch_predictions_multi_label_cat():
|
|||
optimizer.step()
|
||||
|
||||
model = PyTorchClassifier(model=inner_model,
|
||||
output_type=ModelOutputType.CLASSIFIER_MULTI_OUTPUT_CLASS_LOGITS,
|
||||
loss=criterion,
|
||||
optimizer=optimizer,
|
||||
input_shape=(24,),
|
||||
nb_classes=3)
|
||||
output_type=ModelOutputType.CLASSIFIER_MULTI_OUTPUT_CLASS_LOGITS,
|
||||
loss=criterion,
|
||||
optimizer=optimizer,
|
||||
input_shape=(24,),
|
||||
nb_classes=3)
|
||||
|
||||
pred = model.predict(test)
|
||||
assert (pred.shape[0] == x_test.shape[0])
|
||||
|
|
@ -349,13 +348,13 @@ def test_pytorch_predictions_multi_label_binary():
|
|||
optimizer = optim.RMSprop(inner_model.parameters(), lr=0.01)
|
||||
|
||||
model = PyTorchClassifier(model=inner_model,
|
||||
output_type=ModelOutputType.CLASSIFIER_MULTI_OUTPUT_BINARY_LOGITS,
|
||||
loss=criterion,
|
||||
optimizer=optimizer,
|
||||
input_shape=(24,),
|
||||
nb_classes=3)
|
||||
output_type=ModelOutputType.CLASSIFIER_MULTI_OUTPUT_BINARY_LOGITS,
|
||||
loss=criterion,
|
||||
optimizer=optimizer,
|
||||
input_shape=(24,),
|
||||
nb_classes=3)
|
||||
model.fit(PytorchData(x_train.astype(np.float32), y_train.astype(np.float32)), save_entire_model=False,
|
||||
nb_epochs=10)
|
||||
nb_epochs=10)
|
||||
pred = model.predict(test)
|
||||
assert (pred.shape[0] == x_test.shape[0])
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue