Support for many new model output types (#93)

* General model wrappers and methods supporting multi-label classifiers
* Support for pytorch multi-label binary classifier
* New model output types + single implementation of score method that supports multiple output types. 
* Anonymization with pytorch multi-output binary model
* Support for multi-label binary models in minimizer. 
* Support for multi-label logits/probabilities
---------
Signed-off-by: abigailt <abigailt@il.ibm.com>
This commit is contained in:
abigailgold 2024-07-03 09:04:59 -04:00 committed by GitHub
parent e00535d120
commit 57e38ea4fa
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
13 changed files with 913 additions and 172 deletions

View file

@ -16,7 +16,8 @@ from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
from sklearn.model_selection import train_test_split
from apt.utils.datasets import ArrayDataset, DATA_PANDAS_NUMPY_TYPE
from apt.utils.models import Model, SklearnRegressor, ModelOutputType, SklearnClassifier
from apt.utils.models import Model, SklearnRegressor, SklearnClassifier, \
CLASSIFIER_SINGLE_OUTPUT_CLASS_PROBABILITIES
@dataclass
@ -93,7 +94,10 @@ class GeneralizeToRepresentative(BaseEstimator, MetaEstimatorMixin, TransformerM
if is_regression:
self.estimator = SklearnRegressor(estimator)
else:
self.estimator = SklearnClassifier(estimator, ModelOutputType.CLASSIFIER_PROBABILITIES)
# model output type is not critical as it only affects computation of nb_classes, which is in any case
# the same currently for single and multi output probabilities.
self.estimator = SklearnClassifier(estimator,
CLASSIFIER_SINGLE_OUTPUT_CLASS_PROBABILITIES)
self.target_accuracy = target_accuracy
self.cells = cells
self.categorical_features = []
@ -678,7 +682,7 @@ class GeneralizeToRepresentative(BaseEstimator, MetaEstimatorMixin, TransformerM
# this is a leaf
# if it is a regression problem we do not use label
label = self._calculate_cell_label(node) if not self.is_regression else 1
hist = [int(i) for i in self._dt.tree_.value[node][0]] if not self.is_regression else []
hist = self._dt.tree_.value[node]
cell = {'label': label, 'hist': hist, 'ranges': {}, 'id': int(node)}
return [cell]
@ -709,8 +713,11 @@ class GeneralizeToRepresentative(BaseEstimator, MetaEstimatorMixin, TransformerM
return cells
def _calculate_cell_label(self, node):
label_hist = self._dt.tree_.value[node][0]
return int(self._dt.classes_[np.argmax(label_hist)])
label_hist = self._dt.tree_.value[node]
if isinstance(self._dt.classes_, list):
return [self._dt.classes_[output][class_index]
for output, class_index in enumerate(np.argmax(label_hist, axis=1))]
return [self._dt.classes_[np.argmax(label_hist[0])]]
def _modify_cells(self):
cells = []
@ -807,9 +814,12 @@ class GeneralizeToRepresentative(BaseEstimator, MetaEstimatorMixin, TransformerM
# else: nothing to do, stay with previous cells
def _calculate_level_cell_label(self, left_cell, right_cell, new_cell):
new_cell['hist'] = [x + y for x, y in
zip(left_cell['hist'], right_cell['hist'])] if not self.is_regression else []
new_cell['label'] = int(self._dt.classes_[np.argmax(new_cell['hist'])]) if not self.is_regression else 1
new_cell['hist'] = left_cell['hist'] + right_cell['hist']
if isinstance(self._dt.classes_, list):
new_cell['label'] = [self._dt.classes_[output][class_index]
for output, class_index in enumerate(np.argmax(new_cell['hist'], axis=1))]
else:
new_cell['label'] = [self._dt.classes_[np.argmax(new_cell['hist'][0])]]
def _get_nodes_level(self, level):
# level = distance from lowest leaf
@ -837,26 +847,28 @@ class GeneralizeToRepresentative(BaseEstimator, MetaEstimatorMixin, TransformerM
# return all nodes with depth == level or leaves higher than level
return [i for i, x in enumerate(node_depth) if x == depth or (x < depth and is_leaves[i])]
def _attach_cells_representatives(self, prepared_data, originalTrainFeatures, labelFeature, level_nodes):
def _attach_cells_representatives(self, prepared_data, original_train_features, label_feature, level_nodes):
# prepared data include one hot encoded categorical data,
# if there is no categorical data prepared data is original data
nodeIds = self._find_sample_nodes(prepared_data, level_nodes)
labels_df = pd.DataFrame(labelFeature, columns=['label'])
for cell in self.cells:
cell['representative'] = {}
# get all rows in cell
indexes = [i for i, x in enumerate(nodeIds) if x == cell['id']]
original_rows = originalTrainFeatures.iloc[indexes]
original_rows = original_train_features.iloc[indexes]
sample_rows = prepared_data.iloc[indexes]
sample_labels = labels_df.iloc[indexes]['label'].values.tolist()
# get rows with matching label
if self.is_regression:
if self.is_regression or (len(label_feature.shape) > 1 and label_feature.shape[1] > 1):
match_samples = sample_rows
match_rows = original_rows
else:
indexes = [i for i, label in enumerate(sample_labels) if label == cell['label']]
labels_df = pd.DataFrame(label_feature, columns=['label'])
sample_labels = labels_df.iloc[indexes]['label'].values.tolist()
indexes = [i for i, label in enumerate(sample_labels) if label == cell['label'][0]]
match_samples = sample_rows.iloc[indexes]
match_rows = original_rows.iloc[indexes]
# find the "middle" of the cluster
array = match_samples.values
# Only works with numpy 1.9.0 and higher!!!

View file

@ -4,4 +4,4 @@ Implementation of datasets utility components for datasets creation, load, and s
"""
from apt.utils.datasets.datasets import Dataset, StoredDataset, DatasetFactory, Data, ArrayDataset, \
DatasetWithPredictions, OUTPUT_DATA_ARRAY_TYPE, DATA_PANDAS_NUMPY_TYPE
DatasetWithPredictions, array2numpy, OUTPUT_DATA_ARRAY_TYPE, DATA_PANDAS_NUMPY_TYPE

View file

@ -233,7 +233,7 @@ class ArrayDataset(Dataset):
raise ValueError("The supplied features are not the same as in the data features")
self.features_names = x.columns.to_list()
if self._y is not None and len(self._x) != len(self._y):
if self._y is not None and self._x.shape[0] != self._y.shape[0]:
raise ValueError("Non equivalent lengths of x and y")
def get_samples(self) -> OUTPUT_DATA_ARRAY_TYPE:
@ -266,6 +266,8 @@ class DatasetWithPredictions(Dataset):
Dataset that is based on arrays (e.g., numpy/pandas/list...). Includes predictions from a model, and possibly also
features and true labels.
:param pred: collection of model predictions
:type pred: numpy array or pandas DataFrame or list or pytorch Tensor
:param x: collection of data samples
:type x: numpy array or pandas DataFrame or list or pytorch Tensor
:param y: collection of labels

View file

@ -1,6 +1,11 @@
from apt.utils.models.model import Model, BlackboxClassifier, ModelOutputType, ScoringMethod, \
BlackboxClassifierPredictions, BlackboxClassifierPredictFunction, get_nb_classes, is_one_hot, \
check_correct_model_output
check_correct_model_output, is_multi_label, is_multi_label_binary, is_logits, is_binary, \
CLASSIFIER_SINGLE_OUTPUT_CATEGORICAL, CLASSIFIER_SINGLE_OUTPUT_BINARY_PROBABILITIES, \
CLASSIFIER_SINGLE_OUTPUT_CLASS_PROBABILITIES, CLASSIFIER_SINGLE_OUTPUT_BINARY_LOGITS, \
CLASSIFIER_SINGLE_OUTPUT_CLASS_LOGITS, CLASSIFIER_MULTI_OUTPUT_CATEGORICAL, \
CLASSIFIER_MULTI_OUTPUT_BINARY_PROBABILITIES, CLASSIFIER_MULTI_OUTPUT_CLASS_PROBABILITIES, \
CLASSIFIER_MULTI_OUTPUT_BINARY_LOGITS, CLASSIFIER_MULTI_OUTPUT_CLASS_LOGITS
from apt.utils.models.sklearn_model import SklearnModel, SklearnClassifier, SklearnRegressor
from apt.utils.models.keras_model import KerasClassifier, KerasRegressor
from apt.utils.models.xgboost_model import XGBoostClassifier

View file

@ -4,7 +4,7 @@ import numpy as np
from sklearn.metrics import mean_squared_error
from apt.utils.models import Model, ModelOutputType, ScoringMethod, check_correct_model_output
from apt.utils.models import Model, ModelOutputType, ScoringMethod, is_logits
from apt.utils.datasets import Dataset, OUTPUT_DATA_ARRAY_TYPE
from art.utils import check_and_transform_label_format
@ -39,9 +39,7 @@ class KerasClassifier(KerasModel):
def __init__(self, model: "keras.models.Model", output_type: ModelOutputType, black_box_access: Optional[bool] = True,
unlimited_queries: Optional[bool] = True, **kwargs):
super().__init__(model, output_type, black_box_access, unlimited_queries, **kwargs)
logits = False
if output_type == ModelOutputType.CLASSIFIER_LOGITS:
logits = True
logits = is_logits(output_type)
self._art_model = ArtKerasClassifier(model, use_logits=logits)
def fit(self, train_data: Dataset, **kwargs) -> None:
@ -65,7 +63,6 @@ class KerasClassifier(KerasModel):
:return: Predictions from the model as numpy array (class probabilities, if supported).
"""
predictions = self._art_model.predict(x.get_samples(), **kwargs)
check_correct_model_output(predictions, self.output_type)
return predictions
def score(self, test_data: Dataset, scoring_method: Optional[ScoringMethod] = ScoringMethod.ACCURACY, **kwargs):
@ -104,7 +101,7 @@ class KerasRegressor(KerasModel):
"""
def __init__(self, model: "keras.models.Model", black_box_access: Optional[bool] = True,
unlimited_queries: Optional[bool] = True, **kwargs):
super().__init__(model, ModelOutputType.REGRESSOR_SCALAR, black_box_access, unlimited_queries, **kwargs)
super().__init__(model, ModelOutputType.REGRESSION, black_box_access, unlimited_queries, **kwargs)
self._art_model = ArtKerasRegressor(model)
def fit(self, train_data: Dataset, **kwargs) -> None:

View file

@ -1,9 +1,10 @@
from abc import ABCMeta, abstractmethod
from typing import Any, Optional, Callable, Tuple, Union, TYPE_CHECKING
from enum import Enum, auto
from enum import Enum, Flag, auto
import numpy as np
from scipy.special import expit
from apt.utils.datasets import Dataset, Data, OUTPUT_DATA_ARRAY_TYPE
from apt.utils.datasets import Dataset, Data, array2numpy, OUTPUT_DATA_ARRAY_TYPE
from art.estimators.classification import BlackBoxClassifier
from art.utils import check_and_transform_label_format
@ -11,11 +12,40 @@ if TYPE_CHECKING:
import torch
class ModelOutputType(Enum):
CLASSIFIER_PROBABILITIES = auto() # vector of probabilities
CLASSIFIER_LOGITS = auto() # vector of logits
CLASSIFIER_SCALAR = auto() # label only
REGRESSOR_SCALAR = auto() # value
class ModelOutputType(Flag):
CLASSIFIER = auto()
MULTI_OUTPUT = auto()
BINARY = auto()
LOGITS = auto()
PROBABILITIES = auto()
REGRESSION = auto()
# class labels
CLASSIFIER_SINGLE_OUTPUT_CATEGORICAL = ModelOutputType.CLASSIFIER
# single binary probability
CLASSIFIER_SINGLE_OUTPUT_BINARY_PROBABILITIES = ModelOutputType.CLASSIFIER | ModelOutputType.BINARY | \
ModelOutputType.PROBABILITIES
# vector of class probabilities
CLASSIFIER_SINGLE_OUTPUT_CLASS_PROBABILITIES = ModelOutputType.CLASSIFIER | ModelOutputType.PROBABILITIES
# single binary logit
CLASSIFIER_SINGLE_OUTPUT_BINARY_LOGITS = ModelOutputType.CLASSIFIER | ModelOutputType.BINARY | ModelOutputType.LOGITS
# vector of logits
CLASSIFIER_SINGLE_OUTPUT_CLASS_LOGITS = ModelOutputType.CLASSIFIER | ModelOutputType.LOGITS
# vector of class labels
CLASSIFIER_MULTI_OUTPUT_CATEGORICAL = ModelOutputType.MULTI_OUTPUT | ModelOutputType.CLASSIFIER
# vector of binary probabilities, 1 per output
CLASSIFIER_MULTI_OUTPUT_BINARY_PROBABILITIES = ModelOutputType.MULTI_OUTPUT | ModelOutputType.CLASSIFIER | \
ModelOutputType.BINARY | ModelOutputType.PROBABILITIES
# vector of class probabilities for multiple outputs
CLASSIFIER_MULTI_OUTPUT_CLASS_PROBABILITIES = ModelOutputType.MULTI_OUTPUT | ModelOutputType.CLASSIFIER | \
ModelOutputType.PROBABILITIES
# vector of binary logits
CLASSIFIER_MULTI_OUTPUT_BINARY_LOGITS = ModelOutputType.MULTI_OUTPUT | ModelOutputType.CLASSIFIER | \
ModelOutputType.BINARY | ModelOutputType.LOGITS
# vector of logits for multiple outputs
CLASSIFIER_MULTI_OUTPUT_CLASS_LOGITS = ModelOutputType.MULTI_OUTPUT | ModelOutputType.CLASSIFIER | \
ModelOutputType.LOGITS
class ModelType(Enum):
@ -29,16 +59,52 @@ class ScoringMethod(Enum):
def is_one_hot(y: OUTPUT_DATA_ARRAY_TYPE) -> bool:
return len(y.shape) == 2 and y.shape[1] > 1
if not isinstance(y, list):
return len(y.shape) == 2 and y.shape[1] > 1 and np.all(np.around(np.sum(y, axis=1), decimals=4) == 1)
return False
def get_nb_classes(y: OUTPUT_DATA_ARRAY_TYPE) -> int:
def is_multi_label(output_type: ModelOutputType) -> bool:
return ModelOutputType.MULTI_OUTPUT in output_type
def is_multi_label_binary(output_type: ModelOutputType) -> bool:
return (ModelOutputType.MULTI_OUTPUT in output_type
and ModelOutputType.BINARY in output_type)
def is_binary(output_type: ModelOutputType) -> bool:
return ModelOutputType.BINARY in output_type
def is_categorical(output_type: ModelOutputType) -> bool:
return (ModelOutputType.CLASSIFIER in output_type
and ModelOutputType.BINARY not in output_type
and ModelOutputType.PROBABILITIES not in output_type
and ModelOutputType.LOGITS not in output_type)
def is_probabilities(output_type: ModelOutputType) -> bool:
return ModelOutputType.PROBABILITIES in output_type
def is_logits(output_type: ModelOutputType) -> bool:
return ModelOutputType.LOGITS in output_type
def is_logits_or_probabilities(output_type: ModelOutputType) -> bool:
return is_probabilities(output_type) or is_logits(output_type)
def get_nb_classes(y: OUTPUT_DATA_ARRAY_TYPE, output_type: ModelOutputType) -> int:
"""
Get the number of classes from an array of labels
:param y: The labels
:type y: numpy array
:return: The number of classes as integer
:param output_type: The output type of the model, as provided by the user
:type output_type: ModelOutputType
:return: The number of classes as integer, or list of integers for multi-label
"""
if y is None:
return 0
@ -48,8 +114,13 @@ def get_nb_classes(y: OUTPUT_DATA_ARRAY_TYPE) -> int:
if is_one_hot(y):
return y.shape[1]
else:
elif is_multi_label(output_type):
# for now just return the prediction dimension - this works in most cases
return y.shape[1]
elif is_categorical(output_type):
return int(np.max(y) + 1)
else: # binary
return 2
def check_correct_model_output(y: OUTPUT_DATA_ARRAY_TYPE, output_type: ModelOutputType):
@ -61,10 +132,9 @@ def check_correct_model_output(y: OUTPUT_DATA_ARRAY_TYPE, output_type: ModelOutp
:type output_type: ModelOutputType
:raises: ValueError (in case of mismatch)
"""
if not is_one_hot(y): # 1D array
if output_type == ModelOutputType.CLASSIFIER_PROBABILITIES or output_type == ModelOutputType.CLASSIFIER_LOGITS:
raise ValueError("Incompatible model output types. Model outputs 1D array of categorical scalars while "
"output type is set to ", output_type)
if not is_one_hot(y) and not is_multi_label(output_type) and is_categorical(output_type):
raise ValueError("Incompatible model output types. Model outputs 1D array of categorical scalars while "
"output type is set to ", output_type)
class Model(metaclass=ABCMeta):
@ -115,16 +185,81 @@ class Model(metaclass=ABCMeta):
"""
raise NotImplementedError
@abstractmethod
def score(self, test_data: Dataset, **kwargs):
"""
Score the model using test data.
:param test_data: Test data.
:type train_data: `Dataset`
:type test_data: `Dataset`
:keyword predictions: Model predictions to score. If provided, these will be used instead of calling the model's
`predict` method.
:type predictions: `DatasetWithPredictions` with the `pred` field filled.
:keyword scoring_method: The method for scoring predictions. Default is ACCURACY.
:type scoring_method: `ScoringMethod`, optional
:keyword binary_threshold: The threshold to use on binary classification probabilities to assign the positive
class.
:type binary_threshold: float, optional. Default is 0.5.
:keyword apply_non_linearity: A non-linear function to apply to the result of the 'predict' method, in case the
model outputs logits (e.g., sigmoid).
:type apply_non_linearity: Callable, should be possible to apply directly to the numpy output of the 'predict'
method, optional.
:keyword nb_classes: number of classes (for classification models).
:type nb_classes: int, optional.
:return: the score as float (for classifiers, between 0 and 1)
"""
raise NotImplementedError
predictions = kwargs.get('predictions')
nb_classes = kwargs.get('nb_classes')
scoring_method = kwargs.get('scoring_method', ScoringMethod.ACCURACY)
binary_threshold = kwargs.get('binary_threshold', 0.5)
apply_non_linearity = kwargs.get('apply_non_linearity', expit)
if test_data.get_samples() is None and predictions is None:
raise ValueError('score can only be computed when test data or predictions are available')
if test_data.get_labels() is None:
raise ValueError('score can only be computed when labels are available')
if predictions:
predicted = predictions.get_predictions()
else:
predicted = self.predict(test_data)
y = array2numpy(test_data.get_labels())
if scoring_method == ScoringMethod.ACCURACY:
if not is_multi_label(self.output_type) and not is_binary(self.output_type):
if nb_classes is not None:
y = check_and_transform_label_format(y, nb_classes=nb_classes)
# categorical has been 1-hot encoded by check_and_transform_label_format
return np.count_nonzero(np.argmax(y, axis=1) == np.argmax(predicted, axis=1)) / predicted.shape[0]
elif (is_multi_label(self.output_type) and not is_binary(self.output_type)
and is_logits_or_probabilities(self.output_type)):
if predicted.shape != y.shape:
raise ValueError('Do not know how to compare arrays with different shapes')
elif len(predicted.shape) < 3:
raise ValueError('Do not know how to compare 2-D arrays for multi-output non-binary case')
else:
sum = 0
count = 0
for i in range(predicted.shape[1]):
count += np.count_nonzero(np.argmax(y[:, i], axis=1) == np.argmax(predicted[:, i], axis=1))
sum += predicted.shape[0] * predicted.shape[-1]
return count / sum
elif is_multi_label(self.output_type) and is_categorical(self.output_type):
return np.count_nonzero(y == predicted) / (predicted.shape[0] * y.shape[1])
elif is_binary(self.output_type):
if is_logits(self.output_type):
if apply_non_linearity:
predicted = apply_non_linearity(predicted)
else: # apply sigmoid
predicted = expit(predicted)
predicted[predicted < binary_threshold] = 0
predicted[predicted >= binary_threshold] = 1
if len(y.shape) > 1:
return np.count_nonzero(y == predicted) / (predicted.shape[0] * y.shape[1])
else:
return np.count_nonzero(y == predicted.reshape(-1)) / (predicted.shape[0])
else:
raise NotImplementedError('score method not implemented for output type: ', self.output_type)
else:
raise NotImplementedError('scoring method not implemented: ', scoring_method)
@property
def model(self) -> Any:
@ -167,7 +302,8 @@ class Model(metaclass=ABCMeta):
class BlackboxClassifier(Model):
"""
Wrapper for black-box ML classification models.
Wrapper for black-box ML classification models. This is an abstract class and must be instantiated as either
BlackboxClassifierPredictFunction or BlackboxClassifierPredictions.
:param model: The training and/or test data along with the model's predictions for the data or a callable predict
method.
@ -247,6 +383,13 @@ class BlackboxClassifier(Model):
"""
return self._optimizer
def score(self, test_data: Dataset, **kwargs):
"""
Score the model using test data.
"""
kwargs['nb_classes'] = self.nb_classes
return super().score(test_data, **kwargs)
def fit(self, train_data: Dataset, **kwargs) -> None:
"""
A blackbox model cannot be fit.
@ -263,28 +406,8 @@ class BlackboxClassifier(Model):
:return: Predictions from the model as numpy array.
"""
predictions = self._art_model.predict(x.get_samples())
check_correct_model_output(predictions, self.output_type)
return predictions
def score(self, test_data: Dataset, scoring_method: Optional[ScoringMethod] = ScoringMethod.ACCURACY, **kwargs):
"""
Score the model using test data.
:param test_data: Test data.
:type train_data: `Dataset`
:param scoring_method: The method for scoring predictions. Default is ACCURACY.
:type scoring_method: `ScoringMethod`, optional
:return: the score as float (for classifiers, between 0 and 1)
"""
if test_data.get_samples() is None or test_data.get_labels() is None:
raise ValueError('score can only be computed when test data and labels are available')
predicted = self._art_model.predict(test_data.get_samples())
y = check_and_transform_label_format(test_data.get_labels(), nb_classes=self._nb_classes)
if scoring_method == ScoringMethod.ACCURACY:
return np.count_nonzero(np.argmax(y, axis=1) == np.argmax(predicted, axis=1)) / predicted.shape[0]
else:
raise NotImplementedError
@abstractmethod
def get_predictions(self) -> Union[Callable, Tuple[OUTPUT_DATA_ARRAY_TYPE, OUTPUT_DATA_ARRAY_TYPE]]:
"""
@ -325,17 +448,9 @@ class BlackboxClassifierPredictions(BlackboxClassifier):
if y_test_pred is None:
y_test_pred = model.get_test_labels()
if y_train_pred is not None:
check_correct_model_output(y_train_pred, self.output_type)
if y_test_pred is not None:
check_correct_model_output(y_test_pred, self.output_type)
if y_train_pred is not None and len(y_train_pred.shape) == 1:
self._nb_classes = get_nb_classes(y_train_pred)
y_train_pred = check_and_transform_label_format(y_train_pred, nb_classes=self._nb_classes)
if y_test_pred is not None and len(y_test_pred.shape) == 1:
if self._nb_classes is None:
self._nb_classes = get_nb_classes(y_test_pred)
y_test_pred = check_and_transform_label_format(y_test_pred, nb_classes=self._nb_classes)
if x_train_pred is not None and y_train_pred is not None and x_test_pred is not None and y_test_pred is not None:
@ -353,7 +468,7 @@ class BlackboxClassifierPredictions(BlackboxClassifier):
else:
raise NotImplementedError("Invalid data - None")
self._nb_classes = get_nb_classes(y_pred)
self._nb_classes = get_nb_classes(y_pred, self.output_type)
self._input_shape = x_pred.shape[1:]
self._x_pred = x_pred
self._y_pred = y_pred

View file

@ -3,17 +3,22 @@ import os
import shutil
import logging
from typing import Optional, Tuple
from typing import Optional, Tuple, Union, List, TYPE_CHECKING
import numpy as np
import torch
from torch.utils.data import DataLoader, TensorDataset
from art.utils import check_and_transform_label_format
from apt.utils.datasets.datasets import PytorchData
from apt.utils.models import Model, ModelOutputType
from apt.utils.datasets import OUTPUT_DATA_ARRAY_TYPE
from apt.utils.datasets.datasets import PytorchData, DatasetWithPredictions, ArrayDataset
from apt.utils.models import Model, ModelOutputType, is_multi_label, is_multi_label_binary, is_binary
from apt.utils.datasets import OUTPUT_DATA_ARRAY_TYPE, array2numpy
from art.estimators.classification.pytorch import PyTorchClassifier as ArtPyTorchClassifier
if TYPE_CHECKING:
from art.utils import CLIP_VALUES_TYPE, PREPROCESSING_TYPE
from art.defences.preprocessor import Preprocessor
from art.defences.postprocessor import Postprocessor
logger = logging.getLogger(__name__)
@ -30,16 +35,46 @@ class PyTorchClassifierWrapper(ArtPyTorchClassifier):
Extension for Pytorch ART model
"""
def __init__(
self,
model: "torch.nn.Module",
loss: "torch.nn.modules.loss._Loss",
input_shape: Tuple[int, ...],
nb_classes: int,
output_type: ModelOutputType,
optimizer: Optional["torch.optim.Optimizer"] = None, # type: ignore
use_amp: bool = False,
opt_level: str = "O1",
loss_scale: Optional[Union[float, str]] = "dynamic",
channels_first: bool = True,
clip_values: Optional["CLIP_VALUES_TYPE"] = None,
preprocessing_defences: Union["Preprocessor", List["Preprocessor"], None] = None,
postprocessing_defences: Union["Postprocessor", List["Postprocessor"], None] = None,
preprocessing: "PREPROCESSING_TYPE" = (0.0, 1.0),
device_type: str = "gpu",
):
super().__init__(model, loss, input_shape, nb_classes, optimizer, use_amp, opt_level, loss_scale,
channels_first, clip_values, preprocessing_defences, postprocessing_defences, preprocessing,
device_type)
self._is_single_binary = not is_multi_label(output_type) and is_binary(output_type)
self._is_multi_label = is_multi_label(output_type)
self._is_multi_label_binary = is_multi_label_binary(output_type)
def get_step_correct(self, outputs, targets) -> int:
"""
Get number of correctly classified labels.
"""
# here everything is torch tensors
if len(outputs) != len(targets):
raise ValueError("outputs and targets should be the same length.")
if self.nb_classes > 1:
return int(torch.sum(torch.argmax(outputs, axis=-1) == targets).item())
if self._is_single_binary:
return int(torch.sum(torch.round(outputs) == targets).item())
elif self._is_multi_label:
if self._is_multi_label_binary:
outputs = torch.round(outputs)
return int(torch.sum(targets == outputs).item())
else:
return int(torch.sum(torch.round(outputs, axis=-1) == targets).item())
return int(torch.sum(torch.argmax(outputs, axis=-1) == targets).item())
def _eval(self, loader: DataLoader):
"""
@ -93,6 +128,7 @@ class PyTorchClassifierWrapper(ArtPyTorchClassifier):
:param kwargs: Dictionary of framework-specific arguments. This parameter is not currently
supported for PyTorch and providing it takes no effect.
"""
# Put the model in the training mode
self._model.train()
@ -156,6 +192,61 @@ class PyTorchClassifierWrapper(ArtPyTorchClassifier):
else:
self.save_checkpoint_state_dict(is_best=best_acc <= val_acc, path=path)
def predict(
self, x: np.ndarray, batch_size: int = 128, training_mode: bool = False, **kwargs
) -> np.ndarray:
"""
Perform prediction for a batch of inputs.
:param x: Input samples.
:param batch_size: Size of batches.
:param training_mode: `True` for model set to training mode and `'False` for model set to evaluation mode.
:return: Array of predictions of shape `(nb_inputs, nb_classes)`.
"""
import torch
# Set model mode
self._model.train(mode=training_mode)
# Apply preprocessing
x_preprocessed, _ = self._apply_preprocessing(x, y=None, fit=False)
results_list = []
# Run prediction with batch processing
num_batch = int(np.ceil(len(x_preprocessed) / float(batch_size)))
for m in range(num_batch):
# Batch indexes
begin, end = (
m * batch_size,
min((m + 1) * batch_size, x_preprocessed.shape[0]),
)
with torch.no_grad():
model_outputs = self._model(torch.from_numpy(x_preprocessed[begin:end]).to(self._device))
output = model_outputs[-1]
if isinstance(output, tuple):
output_list = []
for o in output:
o = o.detach().cpu().numpy().astype(np.float32)
output_list.append(o)
output_np = np.array(output_list)
output_np = np.swapaxes(output_np, 0, 1)
results_list.append(output_np)
else:
output = output.detach().cpu().numpy().astype(np.float32)
if len(output.shape) == 1:
output = np.expand_dims(output, axis=1).astype(np.float32)
results_list.append(output)
results = np.vstack(results_list)
# Apply postprocessing
predictions = self._apply_postprocessing(preds=results, fit=False)
return predictions
def save_checkpoint_state_dict(self, is_best: bool, path=os.getcwd(), filename="latest.tar") -> None:
"""
Saves checkpoint as latest.tar or best.tar.
@ -319,7 +410,8 @@ class PyTorchClassifier(PyTorchModel):
super().__init__(model, output_type, black_box_access, unlimited_queries, **kwargs)
self._loss = loss
self._optimizer = optimizer
self._art_model = PyTorchClassifierWrapper(model, loss, input_shape, nb_classes, optimizer)
self._nb_classes = nb_classes
self._art_model = PyTorchClassifierWrapper(model, loss, input_shape, nb_classes, output_type, optimizer)
@property
def loss(self):
@ -398,7 +490,7 @@ class PyTorchClassifier(PyTorchModel):
:type x: `np.ndarray` or `pandas.DataFrame`
:return: Predictions from the model (class probabilities, if supported).
"""
return self._art_model.predict(x.get_samples(), **kwargs)
return array2numpy(self._art_model.predict(x.get_samples(), **kwargs))
def score(self, test_data: PytorchData, **kwargs):
"""
@ -406,18 +498,20 @@ class PyTorchClassifier(PyTorchModel):
:param test_data: Test data.
:type test_data: `PytorchData`
:param binary_threshold: The threshold to use on binary classification probabilities to assign the positive
class.
:type binary_threshold: float, optional. Default is 0.5.
:param apply_non_linearity: A non-linear function to apply to the result of the 'predict' method, in case the
model outputs logits (e.g., sigmoid).
:type apply_non_linearity: Callable, should be possible to apply directly to the numpy output of the 'predict'
method, optional.
:return: the score as float (between 0 and 1)
"""
y = test_data.get_labels()
# numpy arrays
predicted = self.predict(test_data)
# binary classification, single column of probabilities
if self._art_model.nb_classes == 2 and (len(predicted.shape) == 1 or predicted.shape[1] == 1):
if len(predicted.shape) > 1:
y = check_and_transform_label_format(y, self._art_model.nb_classes, return_one_hot=False)
return np.count_nonzero(y == (predicted > 0.5)) / predicted.shape[0]
else:
y = check_and_transform_label_format(y, self._art_model.nb_classes)
return np.count_nonzero(np.argmax(y, axis=1) == np.argmax(predicted, axis=1)) / predicted.shape[0]
kwargs['predictions'] = DatasetWithPredictions(pred=predicted)
kwargs['nb_classes'] = self._nb_classes
return super().score(ArrayDataset(test_data.get_samples(), test_data.get_labels()), **kwargs)
def load_checkpoint_state_dict_by_path(self, model_name: str, path: str = None):
"""

View file

@ -2,8 +2,8 @@ from typing import Optional
from sklearn.base import BaseEstimator
from apt.utils.models import Model, ModelOutputType, get_nb_classes, check_correct_model_output
from apt.utils.datasets import Dataset, OUTPUT_DATA_ARRAY_TYPE
from apt.utils.models import Model, ModelOutputType, get_nb_classes
from apt.utils.datasets import Dataset, ArrayDataset, OUTPUT_DATA_ARRAY_TYPE
from art.estimators.classification.scikitlearn import SklearnClassifier as ArtSklearnClassifier
from art.estimators.regression.scikitlearn import ScikitlearnRegressor
@ -48,7 +48,7 @@ class SklearnClassifier(SklearnModel):
super().__init__(model, output_type, black_box_access, unlimited_queries, **kwargs)
self._art_model = ArtSklearnClassifier(model, preprocessing=None)
def fit(self, train_data: Dataset, **kwargs) -> None:
def fit(self, train_data: ArrayDataset, **kwargs) -> None:
"""
Fit the model using the training data.
@ -58,11 +58,11 @@ class SklearnClassifier(SklearnModel):
:return: None
"""
y = train_data.get_labels()
self.nb_classes = get_nb_classes(y)
self.nb_classes = get_nb_classes(y, self.output_type)
y_encoded = check_and_transform_label_format(y, nb_classes=self.nb_classes)
self._art_model.fit(train_data.get_samples(), y_encoded, **kwargs)
def predict(self, x: Dataset, **kwargs) -> OUTPUT_DATA_ARRAY_TYPE:
def predict(self, x: ArrayDataset, **kwargs) -> OUTPUT_DATA_ARRAY_TYPE:
"""
Perform predictions using the model for input `x`.
@ -71,7 +71,7 @@ class SklearnClassifier(SklearnModel):
:return: Predictions from the model as numpy array (class probabilities, if supported).
"""
predictions = self._art_model.predict(x.get_samples(), **kwargs)
check_correct_model_output(predictions, self.output_type)
# check_correct_model_output(predictions, self.output_type)
return predictions
@ -93,7 +93,7 @@ class SklearnRegressor(SklearnModel):
"""
def __init__(self, model: BaseEstimator, black_box_access: Optional[bool] = True,
unlimited_queries: Optional[bool] = True, **kwargs):
super().__init__(model, ModelOutputType.REGRESSOR_SCALAR, black_box_access, unlimited_queries, **kwargs)
super().__init__(model, ModelOutputType.REGRESSION, black_box_access, unlimited_queries, **kwargs)
self._art_model = ScikitlearnRegressor(model)
def fit(self, train_data: Dataset, **kwargs) -> None:

View file

@ -1,6 +1,6 @@
from typing import Optional, Tuple
from apt.utils.models import Model, ModelOutputType, ScoringMethod, check_correct_model_output, is_one_hot
from apt.utils.models import Model, ModelOutputType, ScoringMethod, is_one_hot
from apt.utils.datasets import Dataset, OUTPUT_DATA_ARRAY_TYPE
import numpy as np
@ -63,7 +63,7 @@ class XGBoostClassifier(XGBoostModel):
:return: Predictions from the model as numpy array (class probabilities, if supported).
"""
predictions = self._art_model.predict(x.get_samples(), **kwargs)
check_correct_model_output(predictions, self.output_type)
# check_correct_model_output(predictions, self.output_type)
return predictions
def score(self, test_data: Dataset, scoring_method: Optional[ScoringMethod] = ScoringMethod.ACCURACY, **kwargs):

View file

@ -6,11 +6,17 @@ from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
from sklearn.preprocessing import OneHotEncoder
from apt.anonymization import Anonymize
from apt.utils.dataset_utils import get_iris_dataset_np, get_adult_dataset_pd, get_nursery_dataset_pd
from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split
from torch import nn, optim, sigmoid, where
from torch.nn import functional
from scipy.special import expit
from apt.utils.datasets.datasets import PytorchData
from apt.utils.models import CLASSIFIER_MULTI_OUTPUT_BINARY_LOGITS
from apt.utils.models.pytorch_model import PyTorchClassifier
from apt.anonymization import Anonymize
from apt.utils.dataset_utils import get_iris_dataset_np, get_adult_dataset_pd, get_nursery_dataset_pd
from apt.utils.datasets import ArrayDataset
@ -187,6 +193,72 @@ def test_anonymize_pandas_one_hot():
assert ((np.min(anonymized_slice, axis=1) == 0).all())
def test_anonymize_pytorch_multi_label_binary():
class multi_label_binary_model(nn.Module):
def __init__(self, num_labels, num_features):
super(multi_label_binary_model, self).__init__()
self.fc1 = nn.Sequential(
nn.Linear(num_features, 256),
nn.Tanh(), )
self.classifier1 = nn.Linear(256, num_labels)
def forward(self, x):
return self.classifier1(self.fc1(x))
# missing sigmoid on each output
class FocalLoss(nn.Module):
def __init__(self, gamma=2, alpha=0.5):
super(FocalLoss, self).__init__()
self.gamma = gamma
self.alpha = alpha
def forward(self, input, target):
bce_loss = functional.binary_cross_entropy_with_logits(input, target, reduction='none')
p = sigmoid(input)
p = where(target >= 0.5, p, 1 - p)
modulating_factor = (1 - p) ** self.gamma
alpha = self.alpha * target + (1 - self.alpha) * (1 - target)
focal_loss = alpha * modulating_factor * bce_loss
return focal_loss.mean()
(x_train, y_train), _ = get_iris_dataset_np()
# make multi-label binary
y_train = np.column_stack((y_train, y_train, y_train))
y_train[y_train > 1] = 1
model = multi_label_binary_model(3, 4)
criterion = FocalLoss()
optimizer = optim.RMSprop(model.parameters(), lr=0.01)
art_model = PyTorchClassifier(model=model,
output_type=CLASSIFIER_MULTI_OUTPUT_BINARY_LOGITS,
loss=criterion,
optimizer=optimizer,
input_shape=(24,),
nb_classes=3)
art_model.fit(PytorchData(x_train.astype(np.float32), y_train.astype(np.float32)), save_entire_model=False,
nb_epochs=10)
pred = art_model.predict(PytorchData(x_train.astype(np.float32), y_train.astype(np.float32)))
pred = expit(pred)
pred[pred < 0.5] = 0
pred[pred >= 0.5] = 1
k = 10
QI = [0, 2]
anonymizer = Anonymize(k, QI, train_only_QI=True)
anon = anonymizer.anonymize(ArrayDataset(x_train, pred))
assert (len(np.unique(anon[:, QI], axis=0)) < len(np.unique(x_train[:, QI], axis=0)))
_, counts_elements = np.unique(anon[:, QI], return_counts=True)
assert (np.min(counts_elements) >= k)
assert ((np.delete(anon, QI, axis=1) == np.delete(x_train, QI, axis=1)).all())
def test_errors():
with pytest.raises(ValueError):
Anonymize(1, [0, 2])

View file

@ -4,25 +4,29 @@ import pandas as pd
import scipy
from sklearn.compose import ColumnTransformer
from sklearn.datasets import load_diabetes
from sklearn.impute import SimpleImputer
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
from torch import nn, optim
from torch import nn, optim, sigmoid, where
from torch.nn import functional
from scipy.special import expit
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Input
from apt.utils.datasets.datasets import PytorchData
from apt.utils.models.pytorch_model import PyTorchClassifier
from apt.minimization import GeneralizeToRepresentative
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
from apt.utils.dataset_utils import get_iris_dataset_np, get_adult_dataset_pd, get_german_credit_dataset_pd
from apt.utils.datasets import ArrayDataset
from apt.utils.models import SklearnClassifier, ModelOutputType, SklearnRegressor, KerasClassifier
from apt.utils.models import SklearnClassifier, SklearnRegressor, KerasClassifier, \
CLASSIFIER_SINGLE_OUTPUT_CLASS_PROBABILITIES, CLASSIFIER_SINGLE_OUTPUT_CATEGORICAL, \
CLASSIFIER_SINGLE_OUTPUT_CLASS_LOGITS, CLASSIFIER_MULTI_OUTPUT_BINARY_LOGITS
tf.compat.v1.disable_eager_execution()
@ -216,7 +220,7 @@ def test_minimizer_params(cells):
base_est = DecisionTreeClassifier(random_state=0, min_samples_split=2,
min_samples_leaf=1)
model = SklearnClassifier(base_est, ModelOutputType.CLASSIFIER_PROBABILITIES)
model = SklearnClassifier(base_est, CLASSIFIER_SINGLE_OUTPUT_CLASS_PROBABILITIES)
model.fit(ArrayDataset(x, y))
expected_generalizations = {'categories': {}, 'category_representatives': {},
@ -258,7 +262,7 @@ def test_minimizer_params_not_transform(cells):
samples = ArrayDataset(x, y, features)
base_est = DecisionTreeClassifier(random_state=0, min_samples_split=2,
min_samples_leaf=1)
model = SklearnClassifier(base_est, ModelOutputType.CLASSIFIER_PROBABILITIES)
model = SklearnClassifier(base_est, CLASSIFIER_SINGLE_OUTPUT_CLASS_PROBABILITIES)
model.fit(ArrayDataset(x, y))
gen = GeneralizeToRepresentative(model, cells=cells, generalize_using_transform=False)
@ -270,7 +274,7 @@ def test_minimizer_fit(data_two_features):
x, y, features, _ = data_two_features
base_est = DecisionTreeClassifier(random_state=0, min_samples_split=2,
min_samples_leaf=1)
model = SklearnClassifier(base_est, ModelOutputType.CLASSIFIER_PROBABILITIES)
model = SklearnClassifier(base_est, CLASSIFIER_SINGLE_OUTPUT_CLASS_PROBABILITIES)
model.fit(ArrayDataset(x, y))
ad = ArrayDataset(x)
predictions = model.predict(ad)
@ -287,6 +291,7 @@ def test_minimizer_fit(data_two_features):
compare_generalizations(gener, expected_generalizations)
check_features(features, expected_generalizations, transformed, x)
assert (np.equal(x, transformed).all())
ncp = gen.ncp.transform_score
check_ncp(ncp, expected_generalizations)
@ -299,7 +304,7 @@ def test_minimizer_ncp(data_two_features):
base_est = DecisionTreeClassifier(random_state=0, min_samples_split=2,
min_samples_leaf=1)
model = SklearnClassifier(base_est, ModelOutputType.CLASSIFIER_PROBABILITIES)
model = SklearnClassifier(base_est, CLASSIFIER_SINGLE_OUTPUT_CLASS_PROBABILITIES)
model.fit(ArrayDataset(x, y))
ad = ArrayDataset(x)
ad1 = ArrayDataset(x1, features_names=features)
@ -342,7 +347,7 @@ def test_minimizer_ncp_categorical(data_four_features):
base_est = DecisionTreeClassifier(random_state=0, min_samples_split=2,
min_samples_leaf=1)
model = SklearnClassifier(base_est, ModelOutputType.CLASSIFIER_PROBABILITIES)
model = SklearnClassifier(base_est, CLASSIFIER_SINGLE_OUTPUT_CLASS_PROBABILITIES)
model.fit(ArrayDataset(encoded, y))
ad = ArrayDataset(x)
ad1 = ArrayDataset(x1)
@ -382,7 +387,7 @@ def test_minimizer_fit_not_transform(data_two_features):
x, y, features, x1 = data_two_features
base_est = DecisionTreeClassifier(random_state=0, min_samples_split=2,
min_samples_leaf=1)
model = SklearnClassifier(base_est, ModelOutputType.CLASSIFIER_PROBABILITIES)
model = SklearnClassifier(base_est, CLASSIFIER_SINGLE_OUTPUT_CLASS_PROBABILITIES)
model.fit(ArrayDataset(x, y))
ad = ArrayDataset(x)
predictions = model.predict(ad)
@ -412,7 +417,7 @@ def test_minimizer_fit_pandas(data_four_features):
base_est = DecisionTreeClassifier(random_state=0, min_samples_split=2,
min_samples_leaf=1)
model = SklearnClassifier(base_est, ModelOutputType.CLASSIFIER_PROBABILITIES)
model = SklearnClassifier(base_est, CLASSIFIER_SINGLE_OUTPUT_CLASS_PROBABILITIES)
model.fit(ArrayDataset(encoded, y))
predictions = model.predict(ArrayDataset(encoded))
if predictions.shape[1] > 1:
@ -450,7 +455,7 @@ def test_minimizer_params_categorical(cells_categorical):
preprocessor, encoded = create_encoder(numeric_features, categorical_features, x)
base_est = DecisionTreeClassifier(random_state=0, min_samples_split=2,
min_samples_leaf=1)
model = SklearnClassifier(base_est, ModelOutputType.CLASSIFIER_PROBABILITIES)
model = SklearnClassifier(base_est, CLASSIFIER_SINGLE_OUTPUT_CLASS_PROBABILITIES)
model.fit(ArrayDataset(encoded, y))
predictions = model.predict(ArrayDataset(encoded))
if predictions.shape[1] > 1:
@ -474,7 +479,7 @@ def test_minimizer_fit_qi(data_three_features):
qi = ['age', 'weight']
base_est = DecisionTreeClassifier(random_state=0, min_samples_split=2,
min_samples_leaf=1)
model = SklearnClassifier(base_est, ModelOutputType.CLASSIFIER_PROBABILITIES)
model = SklearnClassifier(base_est, CLASSIFIER_SINGLE_OUTPUT_CLASS_PROBABILITIES)
model.fit(ArrayDataset(x, y))
ad = ArrayDataset(x)
predictions = model.predict(ad)
@ -508,7 +513,7 @@ def test_minimizer_fit_pandas_qi(data_five_features):
base_est = DecisionTreeClassifier(random_state=0, min_samples_split=2,
min_samples_leaf=1)
model = SklearnClassifier(base_est, ModelOutputType.CLASSIFIER_PROBABILITIES)
model = SklearnClassifier(base_est, CLASSIFIER_SINGLE_OUTPUT_CLASS_PROBABILITIES)
model.fit(ArrayDataset(encoded, y))
predictions = model.predict(ArrayDataset(encoded))
if predictions.shape[1] > 1:
@ -543,7 +548,7 @@ def test_minimize_ndarray_iris():
qi = ['sepal length (cm)', 'petal length (cm)']
base_est = DecisionTreeClassifier(random_state=0, min_samples_split=2,
min_samples_leaf=1)
model = SklearnClassifier(base_est, ModelOutputType.CLASSIFIER_PROBABILITIES)
model = SklearnClassifier(base_est, CLASSIFIER_SINGLE_OUTPUT_CATEGORICAL)
model.fit(ArrayDataset(x_train, y_train))
predictions = model.predict(ArrayDataset(x_train))
if predictions.shape[1] > 1:
@ -586,7 +591,7 @@ def test_minimize_pandas_adult():
base_est = DecisionTreeClassifier(random_state=0, min_samples_split=2,
min_samples_leaf=1)
model = SklearnClassifier(base_est, ModelOutputType.CLASSIFIER_PROBABILITIES)
model = SklearnClassifier(base_est, CLASSIFIER_SINGLE_OUTPUT_CLASS_PROBABILITIES)
model.fit(ArrayDataset(encoded, y_train))
predictions = model.predict(ArrayDataset(encoded))
if predictions.shape[1] > 1:
@ -642,7 +647,7 @@ def test_german_credit_pandas():
base_est = DecisionTreeClassifier(random_state=0, min_samples_split=2,
min_samples_leaf=1)
model = SklearnClassifier(base_est, ModelOutputType.CLASSIFIER_PROBABILITIES)
model = SklearnClassifier(base_est, CLASSIFIER_SINGLE_OUTPUT_CLASS_PROBABILITIES)
model.fit(ArrayDataset(encoded, y_train))
predictions = model.predict(ArrayDataset(encoded))
if predictions.shape[1] > 1:
@ -760,7 +765,7 @@ def test_x_y():
qi = [0, 2]
base_est = DecisionTreeClassifier(random_state=0, min_samples_split=2,
min_samples_leaf=1)
model = SklearnClassifier(base_est, ModelOutputType.CLASSIFIER_PROBABILITIES)
model = SklearnClassifier(base_est, CLASSIFIER_SINGLE_OUTPUT_CLASS_PROBABILITIES)
model.fit(ArrayDataset(x, y))
ad = ArrayDataset(x)
predictions = model.predict(ad)
@ -800,7 +805,7 @@ def test_x_y_features_names():
qi = ['age', 'weight']
base_est = DecisionTreeClassifier(random_state=0, min_samples_split=2,
min_samples_leaf=1)
model = SklearnClassifier(base_est, ModelOutputType.CLASSIFIER_PROBABILITIES)
model = SklearnClassifier(base_est, CLASSIFIER_SINGLE_OUTPUT_CLASS_PROBABILITIES)
model.fit(ArrayDataset(x, y))
ad = ArrayDataset(x)
predictions = model.predict(ad)
@ -1202,7 +1207,7 @@ def test_keras_model():
base_est.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"])
model = KerasClassifier(base_est, ModelOutputType.CLASSIFIER_PROBABILITIES)
model = KerasClassifier(base_est, CLASSIFIER_SINGLE_OUTPUT_CLASS_PROBABILITIES)
model.fit(ArrayDataset(x, y))
ad = ArrayDataset(x_test)
predictions = model.predict(ad)
@ -1269,8 +1274,11 @@ def test_minimizer_pytorch(data_three_features):
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(base_est.parameters(), lr=0.01)
model = PyTorchClassifier(model=base_est, output_type=ModelOutputType.CLASSIFIER_LOGITS, loss=criterion,
optimizer=optimizer, input_shape=(3,),
model = PyTorchClassifier(model=base_est,
output_type=CLASSIFIER_SINGLE_OUTPUT_CLASS_LOGITS,
loss=criterion,
optimizer=optimizer,
input_shape=(3,),
nb_classes=2)
model.fit(PytorchData(x, y), save_entire_model=False, nb_epochs=10)
@ -1308,8 +1316,11 @@ def test_minimizer_pytorch_iris():
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(base_est.parameters(), lr=0.01)
model = PyTorchClassifier(model=base_est, output_type=ModelOutputType.CLASSIFIER_LOGITS, loss=criterion,
optimizer=optimizer, input_shape=(4,),
model = PyTorchClassifier(model=base_est,
output_type=CLASSIFIER_SINGLE_OUTPUT_CLASS_LOGITS,
loss=criterion,
optimizer=optimizer,
input_shape=(4,),
nb_classes=3)
model.fit(PytorchData(x_train, y_train), save_entire_model=False, nb_epochs=10)
@ -1329,6 +1340,78 @@ def test_minimizer_pytorch_iris():
assert ((rel_accuracy >= target_accuracy) or (target_accuracy - rel_accuracy) <= ACCURACY_DIFF)
def test_minimizer_pytorch_multi_label_binary():
class multi_label_binary_model(nn.Module):
def __init__(self, num_labels, num_features):
super(multi_label_binary_model, self).__init__()
self.fc1 = nn.Sequential(
nn.Linear(num_features, 256),
nn.Tanh(), )
self.classifier1 = nn.Linear(256, num_labels)
def forward(self, x):
return self.classifier1(self.fc1(x))
# missing sigmoid on each output
class FocalLoss(nn.Module):
def __init__(self, gamma=2, alpha=0.5):
super(FocalLoss, self).__init__()
self.gamma = gamma
self.alpha = alpha
def forward(self, input, target):
bce_loss = functional.binary_cross_entropy_with_logits(input, target, reduction='none')
p = sigmoid(input)
p = where(target >= 0.5, p, 1 - p)
modulating_factor = (1 - p) ** self.gamma
alpha = self.alpha * target + (1 - self.alpha) * (1 - target)
focal_loss = alpha * modulating_factor * bce_loss
return focal_loss.mean()
(x_train, y_train), _ = get_iris_dataset_np()
features = ['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)', 'petal width (cm)']
qi = ['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)', 'petal width (cm)']
# make multi-label binary
y_train = np.column_stack((y_train, y_train, y_train))
y_train[y_train > 1] = 1
x_train = x_train.astype(np.float32)
y_train = y_train.astype(np.float32)
orig_model = multi_label_binary_model(3, 4)
criterion = FocalLoss()
optimizer = optim.RMSprop(orig_model.parameters(), lr=0.01)
model = PyTorchClassifier(model=orig_model,
output_type=CLASSIFIER_MULTI_OUTPUT_BINARY_LOGITS,
loss=criterion,
optimizer=optimizer,
input_shape=(24,),
nb_classes=3)
model.fit(PytorchData(x_train, y_train), save_entire_model=False, nb_epochs=10)
predictions = model.predict(PytorchData(x_train, y_train))
predictions = expit(predictions)
predictions[predictions < 0.5] = 0
predictions[predictions >= 0.5] = 1
target_accuracy = 0.99
gen = GeneralizeToRepresentative(model, target_accuracy=target_accuracy, features_to_minimize=qi)
transformed = gen.fit_transform(dataset=ArrayDataset(x_train, predictions, features_names=features))
gener = gen.generalizations
check_features(features, gener, transformed, x_train)
ncp = gen.ncp.transform_score
check_ncp(ncp, gener)
rel_accuracy = model.score(ArrayDataset(transformed.astype(np.float32), predictions))
assert ((rel_accuracy >= target_accuracy) or (target_accuracy - rel_accuracy) <= ACCURACY_DIFF)
def test_untouched():
cells = [{"id": 1, "ranges": {"age": {"start": None, "end": 38}}, "label": 0,
'categories': {'gender': ['male']}, "representative": {"age": 26, "height": 149}},
@ -1362,7 +1445,7 @@ def test_errors():
y = np.array([1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0])
base_est = DecisionTreeClassifier(random_state=0, min_samples_split=2,
min_samples_leaf=1)
model = SklearnClassifier(base_est, ModelOutputType.CLASSIFIER_PROBABILITIES)
model = SklearnClassifier(base_est, CLASSIFIER_SINGLE_OUTPUT_CLASS_PROBABILITIES)
model.fit(ArrayDataset(X, y))
ad = ArrayDataset(X)
predictions = model.predict(ad)

View file

@ -1,8 +1,11 @@
import pytest
import numpy as np
from apt.utils.models import SklearnClassifier, SklearnRegressor, ModelOutputType, KerasClassifier, KerasRegressor, \
BlackboxClassifierPredictions, BlackboxClassifierPredictFunction, is_one_hot, get_nb_classes, XGBoostClassifier
from apt.utils.models import SklearnClassifier, SklearnRegressor, KerasClassifier, KerasRegressor, \
BlackboxClassifierPredictions, BlackboxClassifierPredictFunction, is_one_hot, get_nb_classes, XGBoostClassifier, \
CLASSIFIER_SINGLE_OUTPUT_CATEGORICAL, CLASSIFIER_SINGLE_OUTPUT_CLASS_PROBABILITIES, \
CLASSIFIER_MULTI_OUTPUT_CATEGORICAL, CLASSIFIER_MULTI_OUTPUT_BINARY_PROBABILITIES, \
CLASSIFIER_MULTI_OUTPUT_CLASS_PROBABILITIES
from apt.utils.datasets import ArrayDataset, Data, DatasetWithPredictions
from apt.utils import dataset_utils
@ -24,7 +27,7 @@ tf.compat.v1.disable_eager_execution()
def test_sklearn_classifier():
(x_train, y_train), (x_test, y_test) = dataset_utils.get_iris_dataset_np()
underlying_model = RandomForestClassifier()
model = SklearnClassifier(underlying_model, ModelOutputType.CLASSIFIER_PROBABILITIES)
model = SklearnClassifier(underlying_model, CLASSIFIER_SINGLE_OUTPUT_CATEGORICAL)
train = ArrayDataset(x_train, y_train)
test = ArrayDataset(x_test, y_test)
model.fit(train)
@ -35,6 +38,28 @@ def test_sklearn_classifier():
assert (0.0 <= score <= 1.0)
# This test currently cannot pass due to ART dependency, so sklearn support will need to wait until ART is updated
# def test_sklearn_classifier_predictions_multi_label_binary():
# (x_train, y_train), (x_test, y_test) = dataset_utils.get_iris_dataset_np()
#
# # make multi-label binary
# y_train = np.column_stack((y_train, y_train, y_train))
# y_train[y_train > 1] = 1
# y_test = np.column_stack((y_test, y_test, y_test))
# y_test[y_test > 1] = 1
#
# test = ArrayDataset(x_test, y_test)
#
# underlying_model = RandomForestClassifier()
# underlying_model.fit(x_train, y_train)
# model = SklearnClassifier(underlying_model, ModelOutputType.CLASSIFIER_MULTI_OUTPUT_BINARY_PROBABILITIES)
# pred = model.predict(test)
# assert (pred[0].shape[0] == x_test.shape[0])
#
# score = model.score(test)
# assert (score == 1.0)
def test_sklearn_regressor():
(x_train, y_train), (x_test, y_test) = dataset_utils.get_diabetes_dataset_np()
underlying_model = DecisionTreeRegressor()
@ -59,7 +84,7 @@ def test_keras_classifier():
underlying_model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"])
model = KerasClassifier(underlying_model, ModelOutputType.CLASSIFIER_PROBABILITIES)
model = KerasClassifier(underlying_model, CLASSIFIER_SINGLE_OUTPUT_CATEGORICAL)
train = ArrayDataset(x_train, y_train)
test = ArrayDataset(x_test, y_test)
@ -97,7 +122,8 @@ def test_xgboost_classifier():
(x_train, y_train), (x_test, y_test) = dataset_utils.get_iris_dataset_np()
underlying_model = XGBClassifier()
underlying_model.fit(x_train, y_train)
model = XGBoostClassifier(underlying_model, ModelOutputType.CLASSIFIER_PROBABILITIES, input_shape=(4,), nb_classes=3)
model = XGBoostClassifier(underlying_model, CLASSIFIER_SINGLE_OUTPUT_CATEGORICAL,
input_shape=(4,), nb_classes=3)
train = ArrayDataset(x_train, y_train)
test = ArrayDataset(x_test, y_test)
pred = model.predict(test)
@ -115,7 +141,7 @@ def test_blackbox_classifier():
train = ArrayDataset(x_train, y_train)
test = ArrayDataset(x_test, y_test)
data = Data(train, test)
model = BlackboxClassifierPredictions(data, ModelOutputType.CLASSIFIER_SCALAR)
model = BlackboxClassifierPredictions(data, CLASSIFIER_SINGLE_OUTPUT_CATEGORICAL)
pred = model.predict(test)
assert (pred.shape[0] == x_test.shape[0])
@ -131,7 +157,7 @@ def test_blackbox_classifier_predictions():
train = DatasetWithPredictions(y_train, x_train)
test = DatasetWithPredictions(y_test, x_test)
data = Data(train, test)
model = BlackboxClassifierPredictions(data, ModelOutputType.CLASSIFIER_SCALAR)
model = BlackboxClassifierPredictions(data, CLASSIFIER_SINGLE_OUTPUT_CATEGORICAL)
pred = model.predict(test)
assert (pred.shape[0] == x_test.shape[0])
assert model.model_type is None
@ -146,7 +172,7 @@ def test_blackbox_classifier_predictions_y():
train = DatasetWithPredictions(y_train, x_train, y_train)
test = DatasetWithPredictions(y_test, x_test, y_test)
data = Data(train, test)
model = BlackboxClassifierPredictions(data, ModelOutputType.CLASSIFIER_SCALAR)
model = BlackboxClassifierPredictions(data, CLASSIFIER_SINGLE_OUTPUT_CATEGORICAL)
pred = model.predict(test)
assert (pred.shape[0] == x_test.shape[0])
@ -156,14 +182,62 @@ def test_blackbox_classifier_predictions_y():
assert model.model_type is None
def test_blackbox_classifier_mismatch():
def test_blackbox_classifier_predictions_multi_label_cat():
(x_train, y_train), (x_test, y_test) = dataset_utils.get_iris_dataset_np()
train = ArrayDataset(x_train, y_train)
test = ArrayDataset(x_test, y_test)
# make multi-label categorical
y_train = np.column_stack((y_train, y_train, y_train))
y_test = np.column_stack((y_test, y_test, y_test))
train = DatasetWithPredictions(y_train, x_train, y_train)
test = DatasetWithPredictions(y_test, x_test, y_test)
data = Data(train, test)
with pytest.raises(ValueError):
BlackboxClassifierPredictions(data, ModelOutputType.CLASSIFIER_PROBABILITIES)
model = BlackboxClassifierPredictions(data, CLASSIFIER_MULTI_OUTPUT_CATEGORICAL)
pred = model.predict(test)
assert (pred.shape[0] == x_test.shape[0])
score = model.score(test)
assert (score == 1.0)
assert model.model_type is None
def test_blackbox_classifier_predictions_multi_label_binary():
(x_train, y_train), (x_test, y_test) = dataset_utils.get_iris_dataset_np()
# make multi-label binary
y_train = np.column_stack((y_train, y_train, y_train))
y_train[y_train > 1] = 1
pred_train = y_train.copy().astype(float)
pred_train[pred_train == 0] = 0.2
pred_train[pred_train == 1] = 0.6
y_test = np.column_stack((y_test, y_test, y_test))
y_test[y_test > 1] = 1
pred_test = y_test.copy().astype(float)
pred_test[pred_test == 0] = 0.2
pred_test[pred_test == 1] = 0.6
train = DatasetWithPredictions(pred_train, x_train, y_train)
test = DatasetWithPredictions(pred_test, x_test, y_test)
data = Data(train, test)
model = BlackboxClassifierPredictions(data, CLASSIFIER_MULTI_OUTPUT_BINARY_PROBABILITIES)
pred = model.predict(test)
assert (pred.shape[0] == x_test.shape[0])
score = model.score(test)
assert (score == 1.0)
assert model.model_type is None
# def test_blackbox_classifier_mismatch():
# (x_train, y_train), (x_test, y_test) = dataset_utils.get_iris_dataset_np()
#
# train = ArrayDataset(x_train, y_train)
# test = ArrayDataset(x_test, y_test)
# data = Data(train, test)
# with pytest.raises(ValueError):
# BlackboxClassifierPredictions(data, ModelOutputType.CLASSIFIER_SINGLE_OUTPUT_CLASS_PROBABILITIES)
def test_blackbox_classifier_no_test():
@ -172,7 +246,7 @@ def test_blackbox_classifier_no_test():
train = ArrayDataset(x_train, y_train)
data = Data(train)
model = BlackboxClassifierPredictions(data, ModelOutputType.CLASSIFIER_SCALAR)
model = BlackboxClassifierPredictions(data, CLASSIFIER_SINGLE_OUTPUT_CATEGORICAL)
pred = model.predict(train)
assert (pred.shape[0] == x_train.shape[0])
@ -189,7 +263,7 @@ def test_blackbox_classifier_no_train():
test = ArrayDataset(x_test, y_test)
data = Data(test=test)
model = BlackboxClassifierPredictions(data, ModelOutputType.CLASSIFIER_SCALAR)
model = BlackboxClassifierPredictions(data, CLASSIFIER_SINGLE_OUTPUT_CATEGORICAL)
pred = model.predict(test)
assert (pred.shape[0] == x_test.shape[0])
@ -207,7 +281,7 @@ def test_blackbox_classifier_no_test_y():
train = ArrayDataset(x_train, y_train)
test = ArrayDataset(x_test)
data = Data(train, test)
model = BlackboxClassifierPredictions(data, ModelOutputType.CLASSIFIER_SCALAR)
model = BlackboxClassifierPredictions(data, CLASSIFIER_SINGLE_OUTPUT_CATEGORICAL)
pred = model.predict(train)
assert (pred.shape[0] == x_train.shape[0])
@ -230,7 +304,7 @@ def test_blackbox_classifier_no_train_y():
train = ArrayDataset(x_train)
test = ArrayDataset(x_test, y_test)
data = Data(train, test)
model = BlackboxClassifierPredictions(data, ModelOutputType.CLASSIFIER_SCALAR)
model = BlackboxClassifierPredictions(data, CLASSIFIER_SINGLE_OUTPUT_CATEGORICAL)
pred = model.predict(test)
assert (pred.shape[0] == x_test.shape[0])
@ -254,7 +328,7 @@ def test_blackbox_classifier_probabilities():
train = ArrayDataset(x_train, y_train)
data = Data(train)
model = BlackboxClassifierPredictions(data, ModelOutputType.CLASSIFIER_PROBABILITIES)
model = BlackboxClassifierPredictions(data, CLASSIFIER_SINGLE_OUTPUT_CLASS_PROBABILITIES)
pred = model.predict(train)
assert (pred.shape[0] == x_train.shape[0])
assert (0.0 < pred).all()
@ -264,6 +338,23 @@ def test_blackbox_classifier_probabilities():
assert (score == 1.0)
def test_blackbox_classifier_multi_label_probabilities():
(x_train, _), (_, _) = dataset_utils.get_iris_dataset_np()
y_train = np.array([[0.23, 0.56, 0.21] for i in range(105)])
# make multi-label categorical
y_train = np.column_stack((y_train, y_train, y_train))
train = ArrayDataset(x_train, y_train)
data = Data(train)
model = BlackboxClassifierPredictions(data, CLASSIFIER_MULTI_OUTPUT_CLASS_PROBABILITIES)
pred = model.predict(train)
assert (pred.shape[0] == x_train.shape[0])
assert (0.0 < pred).all()
assert (pred < 1.0).all()
def test_blackbox_classifier_predict():
def predict(x):
return np.array([[0.23, 0.56, 0.21] for i in range(x.shape[0])])
@ -273,7 +364,8 @@ def test_blackbox_classifier_predict():
train = ArrayDataset(x_train, y_train)
model = BlackboxClassifierPredictFunction(predict, ModelOutputType.CLASSIFIER_PROBABILITIES, (4,), 3)
model = BlackboxClassifierPredictFunction(predict, CLASSIFIER_SINGLE_OUTPUT_CLASS_PROBABILITIES,
(4,), 3)
pred = model.predict(train)
assert (pred.shape[0] == x_train.shape[0])
assert (0.0 < pred).all()
@ -292,7 +384,8 @@ def test_blackbox_classifier_predict_scalar():
train = ArrayDataset(x_train, y_train)
model = BlackboxClassifierPredictFunction(predict, ModelOutputType.CLASSIFIER_SCALAR, (4,), 3)
model = BlackboxClassifierPredictFunction(predict, CLASSIFIER_SINGLE_OUTPUT_CLASS_PROBABILITIES,
(4,), 3)
pred = model.predict(train)
assert (pred.shape[0] == x_train.shape[0])
@ -310,23 +403,23 @@ def test_is_one_hot():
def test_get_nb_classes():
(_, y_train), (_, y_test) = dataset_utils.get_iris_dataset_np()
output_type = CLASSIFIER_SINGLE_OUTPUT_CATEGORICAL
# shape: (x,) - not 1-hot
nb_classes_test = get_nb_classes(y_test)
nb_classes_train = get_nb_classes(y_train)
nb_classes_test = get_nb_classes(y_test, output_type)
nb_classes_train = get_nb_classes(y_train, output_type)
assert (nb_classes_test == nb_classes_train)
assert (nb_classes_test == 3)
# shape: (x,1) - not 1-hot
nb_classes_test = get_nb_classes(y_test.reshape(-1, 1))
nb_classes_test = get_nb_classes(y_test.reshape(-1, 1), output_type)
assert (nb_classes_test == 3)
# shape: (x,3) - 1-hot
y = to_categorical(y_test)
nb_classes = get_nb_classes(y)
nb_classes = get_nb_classes(y, output_type)
assert (nb_classes == 3)
# gaps: 1,2,4 (0,3 missing)
y_test[y_test == 0] = 4
nb_classes = get_nb_classes(y_test)
nb_classes = get_nb_classes(y_test, output_type)
assert (nb_classes == 5)

View file

@ -1,16 +1,23 @@
import numpy as np
from torch import nn, optim
from torch import nn, optim, sigmoid, where, from_numpy
from torch.nn import functional
from torch.utils.data import DataLoader, TensorDataset
from scipy.special import expit
from art.utils import check_and_transform_label_format
from apt.utils.datasets.datasets import PytorchData
from apt.utils.models import ModelOutputType
from apt.utils.models import CLASSIFIER_SINGLE_OUTPUT_CLASS_LOGITS, CLASSIFIER_SINGLE_OUTPUT_BINARY_LOGITS, \
CLASSIFIER_SINGLE_OUTPUT_BINARY_PROBABILITIES, CLASSIFIER_MULTI_OUTPUT_CLASS_LOGITS, \
CLASSIFIER_MULTI_OUTPUT_BINARY_LOGITS
from apt.utils.models.pytorch_model import PyTorchClassifier
from art.utils import load_nursery
from apt.utils import dataset_utils
class pytorch_model(nn.Module):
class PytorchModel(nn.Module):
def __init__(self, num_classes, num_features):
super(pytorch_model, self).__init__()
super(PytorchModel, self).__init__()
self.fc1 = nn.Sequential(
nn.Linear(num_features, 1024),
@ -39,7 +46,77 @@ class pytorch_model(nn.Module):
return self.classifier(out)
def test_nursery_pytorch_state_dict():
class PytorchModelBinary(nn.Module):
def __init__(self, num_features):
super(PytorchModelBinary, self).__init__()
self.fc2 = nn.Sequential(
nn.Linear(num_features, 256),
nn.Tanh(), )
self.fc3 = nn.Sequential(
nn.Linear(256, 128),
nn.Tanh(), )
self.fc4 = nn.Sequential(
nn.Linear(128, 1),
nn.Tanh(),
)
def forward(self, x):
out = self.fc2(x)
out = self.fc3(out)
return self.fc4(out)
class PytorchModelBinarySigmoid(nn.Module):
def __init__(self, num_features):
super(PytorchModelBinarySigmoid, self).__init__()
self.fc2 = nn.Sequential(
nn.Linear(num_features, 256),
nn.Tanh(), )
self.fc3 = nn.Sequential(
nn.Linear(256, 128),
nn.Tanh(), )
self.fc4 = nn.Sequential(
nn.Linear(128, 1),
nn.Tanh(),
)
self.classifier = nn.Sigmoid()
def forward(self, x):
out = self.fc2(x)
out = self.fc3(out)
out = self.fc4(out)
return self.classifier(out)
class FocalLoss(nn.Module):
def __init__(self, gamma=2, alpha=0.5):
super(FocalLoss, self).__init__()
self.gamma = gamma
self.alpha = alpha
def forward(self, input, target):
bce_loss = functional.binary_cross_entropy_with_logits(input, target, reduction='none')
p = sigmoid(input)
p = where(target >= 0.5, p, 1 - p)
modulating_factor = (1 - p) ** self.gamma
alpha = self.alpha * target + (1 - self.alpha) * (1 - target)
focal_loss = alpha * modulating_factor * bce_loss
return focal_loss.mean()
def test_pytorch_nursery_state_dict():
(x_train, y_train), (x_test, y_test), _, _ = load_nursery(test_set=0.5)
# reduce size of training set to make attack slightly better
train_set_size = 500
@ -48,12 +125,15 @@ def test_nursery_pytorch_state_dict():
x_test = x_test[:train_set_size]
y_test = y_test[:train_set_size]
inner_model = pytorch_model(4, 24)
inner_model = PytorchModel(4, 24)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(inner_model.parameters(), lr=0.01)
model = PyTorchClassifier(model=inner_model, output_type=ModelOutputType.CLASSIFIER_LOGITS, loss=criterion,
optimizer=optimizer, input_shape=(24,),
model = PyTorchClassifier(model=inner_model,
output_type=CLASSIFIER_SINGLE_OUTPUT_CLASS_LOGITS,
loss=criterion,
optimizer=optimizer,
input_shape=(24,),
nb_classes=4)
model.fit(PytorchData(x_train.astype(np.float32), y_train), save_entire_model=False, nb_epochs=10)
model.load_latest_state_dict_checkpoint()
@ -62,12 +142,12 @@ def test_nursery_pytorch_state_dict():
assert (0 <= score <= 1)
# python pytorch numpy
model.load_best_state_dict_checkpoint()
score = model.score(PytorchData(x_test.astype(np.float32), y_test))
score = model.score(PytorchData(x_test.astype(np.float32), y_test), apply_non_linearity=expit)
print('best model accuracy: ', score)
assert (0 <= score <= 1)
def test_nursery_pytorch_save_entire_model():
def test_pytorch_nursery_save_entire_model():
(x_train, y_train), (x_test, y_test), _, _ = load_nursery(test_set=0.5)
# reduce size of training set to make attack slightly better
@ -77,20 +157,208 @@ def test_nursery_pytorch_save_entire_model():
x_test = x_test[:train_set_size]
y_test = y_test[:train_set_size]
model = pytorch_model(4, 24)
inner_model = PytorchModel(4, 24)
# model = torch.nn.DataParallel(model)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)
optimizer = optim.Adam(inner_model.parameters(), lr=0.01)
art_model = PyTorchClassifier(model=model, output_type=ModelOutputType.CLASSIFIER_LOGITS, loss=criterion,
optimizer=optimizer, input_shape=(24,),
nb_classes=4)
art_model.fit(PytorchData(x_train.astype(np.float32), y_train), save_entire_model=True, nb_epochs=10)
model = PyTorchClassifier(model=inner_model,
output_type=CLASSIFIER_SINGLE_OUTPUT_CLASS_LOGITS,
loss=criterion,
optimizer=optimizer,
input_shape=(24,),
nb_classes=4)
model.fit(PytorchData(x_train.astype(np.float32), y_train), save_entire_model=True, nb_epochs=10)
score = art_model.score(PytorchData(x_test.astype(np.float32), y_test))
score = model.score(PytorchData(x_test.astype(np.float32), y_test))
print('Base model accuracy: ', score)
assert (0 <= score <= 1)
art_model.load_best_model_checkpoint()
score = art_model.score(PytorchData(x_test.astype(np.float32), y_test))
model.load_best_model_checkpoint()
score = model.score(PytorchData(x_test.astype(np.float32), y_test), apply_non_linearity=expit)
print('best model accuracy: ', score)
assert (0 <= score <= 1)
def test_pytorch_predictions_single_label_binary():
x = np.array([[23, 165, 70, 10],
[45, 158, 67, 11],
[56, 123, 65, 58],
[67, 154, 90, 12],
[45, 149, 67, 56],
[42, 166, 58, 50],
[73, 172, 68, 10],
[94, 168, 69, 11],
[69, 175, 80, 61],
[24, 181, 95, 10],
[18, 190, 102, 53],
[22, 161, 95, 10],
[24, 181, 103, 10],
[28, 184, 108, 10]])
x = from_numpy(x)
y = np.array([1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1])
y = from_numpy(y)
data = PytorchData(x, y)
inner_model = PytorchModelBinary(4)
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(inner_model.parameters(), lr=0.01)
model = PyTorchClassifier(model=inner_model, output_type=CLASSIFIER_SINGLE_OUTPUT_BINARY_LOGITS,
loss=criterion,
optimizer=optimizer, input_shape=(4,),
nb_classes=2)
model.fit(data, save_entire_model=False, nb_epochs=1)
pred = model.predict(data)
assert (pred.shape[0] == x.shape[0])
score = model.score(data)
assert (0 < score <= 1.0)
def test_pytorch_predictions_single_label_binary_prob():
x = np.array([[23, 165, 70, 10],
[45, 158, 67, 11],
[56, 123, 65, 58],
[67, 154, 90, 12],
[45, 149, 67, 56],
[42, 166, 58, 50],
[73, 172, 68, 10],
[94, 168, 69, 11],
[69, 175, 80, 61],
[24, 181, 95, 10],
[18, 190, 102, 53],
[22, 161, 95, 10],
[24, 181, 103, 10],
[28, 184, 108, 10]])
x = from_numpy(x)
y = np.array([1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1])
y = from_numpy(y)
data = PytorchData(x, y)
inner_model = PytorchModelBinarySigmoid(4)
criterion = nn.BCELoss()
optimizer = optim.Adam(inner_model.parameters(), lr=0.01)
model = PyTorchClassifier(model=inner_model,
output_type=CLASSIFIER_SINGLE_OUTPUT_BINARY_PROBABILITIES,
loss=criterion,
optimizer=optimizer, input_shape=(4,),
nb_classes=2)
model.fit(data, save_entire_model=False, nb_epochs=1)
pred = model.predict(data)
assert (pred.shape[0] == x.shape[0])
score = model.score(data)
assert (0 < score <= 1.0)
def test_pytorch_predictions_multi_label_cat():
# This kind of model requires special training and will not be supported using the 'fit' method.
class MultiLabelCatModel(nn.Module):
def __init__(self, num_classes, num_features):
super(MultiLabelCatModel, self).__init__()
self.fc1 = nn.Sequential(
nn.Linear(num_features, 256),
nn.Tanh(), )
self.classifier1 = nn.Linear(256, num_classes)
self.classifier2 = nn.Linear(256, num_classes)
def forward(self, x):
out1 = self.classifier1(self.fc1(x))
out2 = self.classifier2(self.fc1(x))
return out1, out2
(x_train, y_train), (x_test, y_test) = dataset_utils.get_iris_dataset_np()
# make multi-label categorical
num_classes = 3
y_train = check_and_transform_label_format(y_train, nb_classes=num_classes)
y_test = check_and_transform_label_format(y_test, nb_classes=num_classes)
y_train = np.column_stack((y_train, y_train))
y_test = np.stack([y_test, y_test], axis=1)
test = PytorchData(x_test.astype(np.float32), y_test.astype(np.float32))
inner_model = MultiLabelCatModel(num_classes, 4)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(inner_model.parameters(), lr=0.01)
# train model
train_dataset = TensorDataset(from_numpy(x_train.astype(np.float32)), from_numpy(y_train.astype(np.float32)))
train_loader = DataLoader(train_dataset, batch_size=100, shuffle=True)
for epoch in range(5):
# Train for one epoch
for inputs, targets in train_loader:
# Zero the parameter gradients
optimizer.zero_grad()
# Perform prediction
model_outputs = inner_model(inputs)
# Form the loss function
loss = 0
for i, o in enumerate(model_outputs):
t = targets[:, i * num_classes:(i + 1) * num_classes]
loss += criterion(o, t)
loss.backward()
optimizer.step()
model = PyTorchClassifier(model=inner_model,
output_type=CLASSIFIER_MULTI_OUTPUT_CLASS_LOGITS,
loss=criterion,
optimizer=optimizer,
input_shape=(24,),
nb_classes=3)
pred = model.predict(test)
assert (pred.shape[0] == x_test.shape[0])
score = model.score(test, apply_non_linearity=expit)
assert (0 < score <= 1.0)
def test_pytorch_predictions_multi_label_binary():
class MultiLabelBinaryModel(nn.Module):
def __init__(self, num_labels, num_features):
super(MultiLabelBinaryModel, self).__init__()
self.fc1 = nn.Sequential(
nn.Linear(num_features, 256),
nn.Tanh(), )
self.classifier1 = nn.Linear(256, num_labels)
def forward(self, x):
return self.classifier1(self.fc1(x))
(x_train, y_train), (x_test, y_test) = dataset_utils.get_iris_dataset_np()
# make multi-label binary
y_train = np.column_stack((y_train, y_train, y_train))
y_train[y_train > 1] = 1
y_test = np.column_stack((y_test, y_test, y_test))
y_test[y_test > 1] = 1
test = PytorchData(x_test.astype(np.float32), y_test)
inner_model = MultiLabelBinaryModel(3, 4)
criterion = FocalLoss()
optimizer = optim.RMSprop(inner_model.parameters(), lr=0.01)
model = PyTorchClassifier(model=inner_model,
output_type=CLASSIFIER_MULTI_OUTPUT_BINARY_LOGITS,
loss=criterion,
optimizer=optimizer,
input_shape=(24,),
nb_classes=3)
model.fit(PytorchData(x_train.astype(np.float32), y_train.astype(np.float32)), save_entire_model=False,
nb_epochs=10)
pred = model.predict(test)
assert (pred.shape[0] == x_test.shape[0])
score = model.score(test, apply_non_linearity=expit)
assert (score == 1.0)