New model wrappers (#32)

* keras wrapper + blackbox classifier wrapper (fix #7)

* fix error in NCP calculation

* Update notebooks

* Fix #25 (incorrect attack_feature indexes for social feature in notebook)

* Consistent naming of internal parameters
This commit is contained in:
abigailgold 2022-05-12 15:44:29 +03:00 committed by GitHub
parent fd6be8e778
commit fe676fa426
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
15 changed files with 1407 additions and 656 deletions

View file

@ -1,16 +1,25 @@
from abc import ABCMeta, abstractmethod
from typing import Any, Optional
from enum import Enum, auto
import numpy as np
from apt.utils.datasets import Dataset, OUTPUT_DATA_ARRAY_TYPE
from apt.utils.datasets import Dataset, Data, OUTPUT_DATA_ARRAY_TYPE
from art.estimators.classification import BlackBoxClassifier
from art.utils import check_and_transform_label_format
class ModelOutputType(Enum):
CLASSIFIER_VECTOR = auto() # probabilities or logits
CLASSIFIER_PROBABILITIES = auto() # vector of probabilities
CLASSIFIER_LOGITS = auto() # vector of logits
CLASSIFIER_SCALAR = auto() # label only
REGRESSOR_SCALAR = auto() # value
class ScoringMethod(Enum):
ACCURACY = auto() # number of correct predictions divided by the number of samples
MEAN_SQUARED_ERROR = auto() # mean squared error between the predictions and true labels
class Model(metaclass=ABCMeta):
"""
Abstract base class for ML model wrappers.
@ -54,7 +63,7 @@ class Model(metaclass=ABCMeta):
Perform predictions using the model for input `x`.
:param x: Input samples.
:type x: `np.ndarray` or `pandas.DataFrame`
:type x: `Dataset`
:return: Predictions from the model as numpy array.
"""
raise NotImplementedError
@ -107,3 +116,87 @@ class Model(metaclass=ABCMeta):
:return: True if a user can perform unlimited queries to the model API, otherwise False.
"""
return self._unlimited_queries
def get_nb_classes(self, y: OUTPUT_DATA_ARRAY_TYPE) -> int:
"""
Get the number of classes from an array of labels
:param y: the labels
:type y: numpy array
:return: the number of classes as integer
"""
if len(y.shape) == 1:
return len(np.unique(y))
else:
return y.shape[1]
class BlackboxClassifier(Model):
"""
Wrapper for black-box ML classification models.
:param model: The training and/or test data along with the model's predictions for the data. Assumes that the data
is represented as numpy arrays. Labels are expected to either be one-hot encoded or
a 1D-array of categorical labels (consecutive integers starting at 0).
:type model: `Data` object
:param output_type: The type of output the model yields (vector/label only for classifiers,
value for regressors)
:type output_type: `ModelOutputType`
:param black_box_access: Boolean describing the type of deployment of the model (when in production).
Always assumed to be True for this wrapper.
:type black_box_access: boolean, optional
:param unlimited_queries: Boolean indicating whether a user can perform unlimited queries to the model API.
Always assumed to be False for this wrapper.
:type unlimited_queries: boolean, optional
"""
def __init__(self, model: Data, output_type: ModelOutputType, black_box_access: Optional[bool] = True,
unlimited_queries: Optional[bool] = True, **kwargs):
super().__init__(model, output_type, black_box_access=True, unlimited_queries=False, **kwargs)
x = model.get_train_samples()
y = model.get_train_labels()
self.nb_classes = self.get_nb_classes(y)
y = check_and_transform_label_format(y, nb_classes=self.nb_classes)
if model.get_test_samples() is not None and type(x) == np.ndarray:
x = np.vstack((x, model.get_test_samples()))
if model.get_test_labels() is not None and type(y) == np.ndarray:
y = np.vstack((y, check_and_transform_label_format(model.get_test_labels(), nb_classes=self.nb_classes)))
predict_fn = (x, y)
self._art_model = BlackBoxClassifier(predict_fn, x.shape[1:], self.nb_classes, fuzzy_float_compare=True)
def fit(self, train_data: Dataset, **kwargs) -> None:
"""
A blackbox model cannot be fit.
"""
raise NotImplementedError
def predict(self, x: Dataset, **kwargs) -> OUTPUT_DATA_ARRAY_TYPE:
"""
Get predictions from the model for input `x`. `x` must be a subset of the data provided in the `model` data in
`__init__()`.
:param x: Input samples.
:type x: `Dataset`
:return: Predictions from the model as numpy array.
"""
return self._art_model.predict(x.get_samples())
def score(self, test_data: Dataset, scoring_method: Optional[ScoringMethod] = ScoringMethod.ACCURACY, **kwargs):
"""
Score the model using test data.
:param test_data: Test data.
:type train_data: `Dataset`
:param scoring_method: The method for scoring predictions. Default is ACCURACY.
:type scoring_method: `ScoringMethod`, optional
:return: the score as float (for classifiers, between 0 and 1)
"""
predicted = self._art_model.predict(test_data.get_samples())
y = check_and_transform_label_format(test_data.get_labels(), nb_classes=self.nb_classes)
if scoring_method == ScoringMethod.ACCURACY:
return np.count_nonzero(np.argmax(y, axis=1) == np.argmax(predicted, axis=1)) / predicted.shape[0]
else:
raise NotImplementedError