ModelOutputType is now a Flag instead of regular enum. Combinations of the base flags are provided for all of the previous output types for convenience. All checks in the code now use the basic flags and not the complex types.

Signed-off-by: abigailt <abigailt@il.ibm.com>
This commit is contained in:
abigailt 2024-07-03 13:29:37 +03:00
parent 2895b40f05
commit 367cae679b
10 changed files with 126 additions and 100 deletions

View file

@ -1,6 +1,11 @@
from apt.utils.models.model import Model, BlackboxClassifier, ModelOutputType, ScoringMethod, \
BlackboxClassifierPredictions, BlackboxClassifierPredictFunction, get_nb_classes, is_one_hot, \
check_correct_model_output, is_multi_label, is_multi_label_binary, is_logits
check_correct_model_output, is_multi_label, is_multi_label_binary, is_logits, is_binary, \
CLASSIFIER_SINGLE_OUTPUT_CATEGORICAL, CLASSIFIER_SINGLE_OUTPUT_BINARY_PROBABILITIES, \
CLASSIFIER_SINGLE_OUTPUT_CLASS_PROBABILITIES, CLASSIFIER_SINGLE_OUTPUT_BINARY_LOGITS, \
CLASSIFIER_SINGLE_OUTPUT_CLASS_LOGITS, CLASSIFIER_MULTI_OUTPUT_CATEGORICAL, \
CLASSIFIER_MULTI_OUTPUT_BINARY_PROBABILITIES, CLASSIFIER_MULTI_OUTPUT_CLASS_PROBABILITIES, \
CLASSIFIER_MULTI_OUTPUT_BINARY_LOGITS, CLASSIFIER_MULTI_OUTPUT_CLASS_LOGITS
from apt.utils.models.sklearn_model import SklearnModel, SklearnClassifier, SklearnRegressor
from apt.utils.models.keras_model import KerasClassifier, KerasRegressor
from apt.utils.models.xgboost_model import XGBoostClassifier

View file

@ -101,7 +101,7 @@ class KerasRegressor(KerasModel):
"""
def __init__(self, model: "keras.models.Model", black_box_access: Optional[bool] = True,
unlimited_queries: Optional[bool] = True, **kwargs):
super().__init__(model, ModelOutputType.REGRESSOR_SCALAR, black_box_access, unlimited_queries, **kwargs)
super().__init__(model, ModelOutputType.REGRESSION, black_box_access, unlimited_queries, **kwargs)
self._art_model = ArtKerasRegressor(model)
def fit(self, train_data: Dataset, **kwargs) -> None:

View file

@ -1,6 +1,6 @@
from abc import ABCMeta, abstractmethod
from typing import Any, Optional, Callable, Tuple, Union, TYPE_CHECKING
from enum import Enum, auto
from enum import Enum, Flag, auto
import numpy as np
from scipy.special import expit
@ -12,18 +12,40 @@ if TYPE_CHECKING:
import torch
class ModelOutputType(Enum):
CLASSIFIER_SINGLE_OUTPUT_CATEGORICAL = auto() # class labels
CLASSIFIER_SINGLE_OUTPUT_BINARY_PROBABILITIES = auto() # single binary probability
CLASSIFIER_SINGLE_OUTPUT_CLASS_PROBABILITIES = auto() # vector of class probabilities
CLASSIFIER_SINGLE_OUTPUT_BINARY_LOGITS = auto() # single binary logit
CLASSIFIER_SINGLE_OUTPUT_CLASS_LOGITS = auto() # vector of logits
CLASSIFIER_MULTI_OUTPUT_CATEGORICAL = auto() # vector of class labels
CLASSIFIER_MULTI_OUTPUT_BINARY_PROBABILITIES = auto() # vector of binary probabilities, 1 per output
CLASSIFIER_MULTI_OUTPUT_CLASS_PROBABILITIES = auto() # vector of class probabilities for multiple outputs
CLASSIFIER_MULTI_OUTPUT_BINARY_LOGITS = auto() # vector of binary logits
CLASSIFIER_MULTI_OUTPUT_CLASS_LOGITS = auto() # vector of logits for multiple outputs
REGRESSOR_SCALAR = auto() # value
class ModelOutputType(Flag):
CLASSIFIER = auto()
MULTI_OUTPUT = auto()
BINARY = auto()
LOGITS = auto()
PROBABILITIES = auto()
REGRESSION = auto()
# class labels
CLASSIFIER_SINGLE_OUTPUT_CATEGORICAL = ModelOutputType.CLASSIFIER
# single binary probability
CLASSIFIER_SINGLE_OUTPUT_BINARY_PROBABILITIES = ModelOutputType.CLASSIFIER | ModelOutputType.BINARY | \
ModelOutputType.PROBABILITIES
# vector of class probabilities
CLASSIFIER_SINGLE_OUTPUT_CLASS_PROBABILITIES = ModelOutputType.CLASSIFIER | ModelOutputType.PROBABILITIES
# single binary logit
CLASSIFIER_SINGLE_OUTPUT_BINARY_LOGITS = ModelOutputType.CLASSIFIER | ModelOutputType.BINARY | ModelOutputType.LOGITS
# vector of logits
CLASSIFIER_SINGLE_OUTPUT_CLASS_LOGITS = ModelOutputType.CLASSIFIER | ModelOutputType.LOGITS
# vector of class labels
CLASSIFIER_MULTI_OUTPUT_CATEGORICAL = ModelOutputType.MULTI_OUTPUT | ModelOutputType.CLASSIFIER
# vector of binary probabilities, 1 per output
CLASSIFIER_MULTI_OUTPUT_BINARY_PROBABILITIES = ModelOutputType.MULTI_OUTPUT | ModelOutputType.CLASSIFIER | \
ModelOutputType.BINARY | ModelOutputType.PROBABILITIES
# vector of class probabilities for multiple outputs
CLASSIFIER_MULTI_OUTPUT_CLASS_PROBABILITIES = ModelOutputType.MULTI_OUTPUT | ModelOutputType.CLASSIFIER | \
ModelOutputType.PROBABILITIES
# vector of binary logits
CLASSIFIER_MULTI_OUTPUT_BINARY_LOGITS = ModelOutputType.MULTI_OUTPUT | ModelOutputType.CLASSIFIER | \
ModelOutputType.BINARY | ModelOutputType.LOGITS
# vector of logits for multiple outputs
CLASSIFIER_MULTI_OUTPUT_CLASS_LOGITS = ModelOutputType.MULTI_OUTPUT | ModelOutputType.CLASSIFIER | \
ModelOutputType.LOGITS
class ModelType(Enum):
@ -43,40 +65,35 @@ def is_one_hot(y: OUTPUT_DATA_ARRAY_TYPE) -> bool:
def is_multi_label(output_type: ModelOutputType) -> bool:
return (output_type == ModelOutputType.CLASSIFIER_MULTI_OUTPUT_CATEGORICAL
or output_type == ModelOutputType.CLASSIFIER_MULTI_OUTPUT_BINARY_PROBABILITIES
or output_type == ModelOutputType.CLASSIFIER_MULTI_OUTPUT_CLASS_PROBABILITIES
or output_type == ModelOutputType.CLASSIFIER_MULTI_OUTPUT_BINARY_LOGITS
or output_type == ModelOutputType.CLASSIFIER_MULTI_OUTPUT_CLASS_LOGITS)
return ModelOutputType.MULTI_OUTPUT in output_type
def is_multi_label_binary(output_type: ModelOutputType) -> bool:
return (output_type == ModelOutputType.CLASSIFIER_MULTI_OUTPUT_BINARY_PROBABILITIES
or output_type == ModelOutputType.CLASSIFIER_MULTI_OUTPUT_BINARY_LOGITS)
return (ModelOutputType.MULTI_OUTPUT in output_type
and ModelOutputType.BINARY in output_type)
def is_binary(output_type: ModelOutputType) -> bool:
return (output_type == ModelOutputType.CLASSIFIER_MULTI_OUTPUT_BINARY_PROBABILITIES
or output_type == ModelOutputType.CLASSIFIER_MULTI_OUTPUT_BINARY_LOGITS
or output_type == ModelOutputType.CLASSIFIER_SINGLE_OUTPUT_BINARY_PROBABILITIES
or output_type == ModelOutputType.CLASSIFIER_SINGLE_OUTPUT_BINARY_LOGITS)
return ModelOutputType.BINARY in output_type
def is_categorical(output_type: ModelOutputType) -> bool:
return (output_type == ModelOutputType.CLASSIFIER_SINGLE_OUTPUT_CATEGORICAL
or output_type == ModelOutputType.CLASSIFIER_MULTI_OUTPUT_CATEGORICAL)
return (ModelOutputType.CLASSIFIER in output_type
and not ModelOutputType.BINARY in output_type
and not ModelOutputType.PROBABILITIES in output_type
and not ModelOutputType.LOGITS in output_type)
def is_probabilities(output_type: ModelOutputType) -> bool:
return (output_type == ModelOutputType.CLASSIFIER_SINGLE_OUTPUT_CLASS_PROBABILITIES
or output_type == ModelOutputType.CLASSIFIER_MULTI_OUTPUT_CLASS_PROBABILITIES)
return ModelOutputType.PROBABILITIES in output_type
def is_logits(output_type: ModelOutputType) -> bool:
return (output_type == ModelOutputType.CLASSIFIER_SINGLE_OUTPUT_CLASS_LOGITS
or output_type == ModelOutputType.CLASSIFIER_MULTI_OUTPUT_CLASS_LOGITS
or output_type == ModelOutputType.CLASSIFIER_SINGLE_OUTPUT_BINARY_LOGITS
or output_type == ModelOutputType.CLASSIFIER_MULTI_OUTPUT_BINARY_LOGITS)
return ModelOutputType.LOGITS in output_type
def is_logits_or_probabilities(output_type: ModelOutputType) -> bool:
return is_probabilities(output_type) or is_logits(output_type)
def get_nb_classes(y: OUTPUT_DATA_ARRAY_TYPE, output_type: ModelOutputType) -> int:
@ -115,8 +132,7 @@ def check_correct_model_output(y: OUTPUT_DATA_ARRAY_TYPE, output_type: ModelOutp
:type output_type: ModelOutputType
:raises: ValueError (in case of mismatch)
"""
if not is_one_hot(y) and (output_type == ModelOutputType.CLASSIFIER_SINGLE_OUTPUT_CLASS_PROBABILITIES
or output_type == ModelOutputType.CLASSIFIER_SINGLE_OUTPUT_CLASS_LOGITS):
if not is_one_hot(y) and not is_multi_label(output_type) and is_categorical(output_type):
raise ValueError("Incompatible model output types. Model outputs 1D array of categorical scalars while "
"output type is set to ", output_type)
@ -208,15 +224,13 @@ class Model(metaclass=ABCMeta):
y = array2numpy(test_data.get_labels())
if scoring_method == ScoringMethod.ACCURACY:
if not is_multi_label(self.output_type) and not is_binary(self.output_type) and nb_classes is not None:
y = check_and_transform_label_format(y, nb_classes=nb_classes)
if (self.output_type == ModelOutputType.CLASSIFIER_SINGLE_OUTPUT_CLASS_PROBABILITIES
or self.output_type == ModelOutputType.CLASSIFIER_SINGLE_OUTPUT_CLASS_LOGITS
or self.output_type == ModelOutputType.CLASSIFIER_SINGLE_OUTPUT_CATEGORICAL):
if not is_multi_label(self.output_type) and not is_binary(self.output_type):
if nb_classes is not None:
y = check_and_transform_label_format(y, nb_classes=nb_classes)
# categorical has been 1-hot encoded by check_and_transform_label_format
return np.count_nonzero(np.argmax(y, axis=1) == np.argmax(predicted, axis=1)) / predicted.shape[0]
elif (self.output_type == ModelOutputType.CLASSIFIER_MULTI_OUTPUT_CLASS_LOGITS
or self.output_type == ModelOutputType.CLASSIFIER_SINGLE_OUTPUT_CLASS_PROBABILITIES):
elif (is_multi_label(self.output_type) and not is_binary(self.output_type)
and is_logits_or_probabilities(self.output_type)):
if predicted.shape != y.shape:
raise ValueError('Do not know how to compare arrays with different shapes')
elif len(predicted.shape) < 3:
@ -228,7 +242,7 @@ class Model(metaclass=ABCMeta):
count += np.count_nonzero(np.argmax(y[:, i], axis=1) == np.argmax(predicted[:, i], axis=1))
sum += predicted.shape[0] * predicted.shape[-1]
return count / sum
elif self.output_type == ModelOutputType.CLASSIFIER_MULTI_OUTPUT_CATEGORICAL:
elif is_multi_label(self.output_type) and is_categorical(self.output_type):
return np.count_nonzero(y == predicted) / (predicted.shape[0] * y.shape[1])
elif is_binary(self.output_type):
if is_logits(self.output_type):
@ -392,7 +406,6 @@ class BlackboxClassifier(Model):
:return: Predictions from the model as numpy array.
"""
predictions = self._art_model.predict(x.get_samples())
# check_correct_model_output(predictions, self.output_type)
return predictions
@abstractmethod

View file

@ -10,7 +10,7 @@ from torch.utils.data import DataLoader, TensorDataset
from art.utils import check_and_transform_label_format
from apt.utils.datasets.datasets import PytorchData, DatasetWithPredictions, ArrayDataset
from apt.utils.models import Model, ModelOutputType, is_multi_label, is_multi_label_binary
from apt.utils.models import Model, ModelOutputType, is_multi_label, is_multi_label_binary, is_binary
from apt.utils.datasets import OUTPUT_DATA_ARRAY_TYPE, array2numpy
from art.estimators.classification.pytorch import PyTorchClassifier as ArtPyTorchClassifier
@ -56,8 +56,7 @@ class PyTorchClassifierWrapper(ArtPyTorchClassifier):
super().__init__(model, loss, input_shape, nb_classes, optimizer, use_amp, opt_level, loss_scale,
channels_first, clip_values, preprocessing_defences, postprocessing_defences, preprocessing,
device_type)
self._is_single_binary = (output_type == ModelOutputType.CLASSIFIER_SINGLE_OUTPUT_BINARY_PROBABILITIES
or output_type == ModelOutputType.CLASSIFIER_SINGLE_OUTPUT_BINARY_LOGITS)
self._is_single_binary = not is_multi_label(output_type) and is_binary(output_type)
self._is_multi_label = is_multi_label(output_type)
self._is_multi_label_binary = is_multi_label_binary(output_type)

View file

@ -93,7 +93,7 @@ class SklearnRegressor(SklearnModel):
"""
def __init__(self, model: BaseEstimator, black_box_access: Optional[bool] = True,
unlimited_queries: Optional[bool] = True, **kwargs):
super().__init__(model, ModelOutputType.REGRESSOR_SCALAR, black_box_access, unlimited_queries, **kwargs)
super().__init__(model, ModelOutputType.REGRESSION, black_box_access, unlimited_queries, **kwargs)
self._art_model = ScikitlearnRegressor(model)
def fit(self, train_data: Dataset, **kwargs) -> None: