New model wrappers (#32)

* keras wrapper + blackbox classifier wrapper (fix #7)

* fix error in NCP calculation

* Update notebooks

* Fix #25 (incorrect attack_feature indexes for social feature in notebook)

* Consistent naming of internal parameters
This commit is contained in:
abigailgold 2022-05-12 15:44:29 +03:00 committed by GitHub
parent fd6be8e778
commit fe676fa426
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
15 changed files with 1407 additions and 656 deletions

View file

@ -1,2 +1,3 @@
from apt.utils.models.model import Model, ModelOutputType
from apt.utils.models.model import Model, BlackboxClassifier, ModelOutputType, ScoringMethod
from apt.utils.models.sklearn_model import SklearnModel, SklearnClassifier, SklearnRegressor
from apt.utils.models.keras_model import KerasClassifier

View file

@ -0,0 +1,149 @@
from typing import Optional
import numpy as np
from sklearn.preprocessing import OneHotEncoder
import tensorflow as tf
from tensorflow import keras
tf.compat.v1.disable_eager_execution()
from apt.utils.models import Model, ModelOutputType, ScoringMethod
from apt.utils.datasets import Dataset, OUTPUT_DATA_ARRAY_TYPE
from art.utils import check_and_transform_label_format
from art.estimators.classification.keras import KerasClassifier as ArtKerasClassifier
# from art.estimators.regression.keras import KerasRegressor as ArtKerasRegressor
class KerasModel(Model):
"""
Wrapper class for keras models.
"""
class KerasClassifier(KerasModel):
"""
Wrapper class for keras classification models.
:param model: The original keras model object.
:type model: `keras.models.Model`
:param output_type: The type of output the model yields (vector/label only)
:type output_type: `ModelOutputType`
:param black_box_access: Boolean describing the type of deployment of the model (when in production).
Set to True if the model is only available via query (API) access, i.e.,
only the outputs of the model are exposed, and False if the model internals
are also available. Default is True.
:type black_box_access: boolean, optional
:param unlimited_queries: If black_box_access is True, this boolean indicates whether a user can perform
unlimited queries to the model API or whether there is a limit to the number of
queries that can be submitted. Default is True.
:type unlimited_queries: boolean, optional
"""
def __init__(self, model: keras.models.Model, output_type: ModelOutputType, black_box_access: Optional[bool] = True,
unlimited_queries: Optional[bool] = True, **kwargs):
super().__init__(model, output_type, black_box_access, unlimited_queries, **kwargs)
logits = False
if output_type == ModelOutputType.CLASSIFIER_LOGITS:
logits = True
self._art_model = ArtKerasClassifier(model, use_logits=logits)
def fit(self, train_data: Dataset, **kwargs) -> None:
"""
Fit the model using the training data.
:param train_data: Training data. Labels are expected to either be one-hot encoded or a 1D-array of categorical
labels (consecutive integers starting at 0).
:type train_data: `Dataset`
:return: None
"""
y_encoded = check_and_transform_label_format(train_data.get_labels())
self._art_model.fit(train_data.get_samples(), y_encoded, **kwargs)
def predict(self, x: Dataset, **kwargs) -> OUTPUT_DATA_ARRAY_TYPE:
"""
Perform predictions using the model for input `x`.
:param x: Input samples.
:type x: `Dataset`
:return: Predictions from the model as numpy array (class probabilities, if supported).
"""
return self._art_model.predict(x.get_samples(), **kwargs)
def score(self, test_data: Dataset, scoring_method: Optional[ScoringMethod] = ScoringMethod.ACCURACY, **kwargs):
"""
Score the model using test data.
:param test_data: Test data.
:type train_data: `Dataset`
:param scoring_method: The method for scoring predictions. Default is ACCURACY.
:type scoring_method: `ScoringMethod`, optional
:return: the score as float (between 0 and 1)
"""
y = check_and_transform_label_format(test_data.get_labels(), self._art_model.nb_classes)
predicted = self.predict(test_data)
if scoring_method == ScoringMethod.ACCURACY:
return np.count_nonzero(np.argmax(y, axis=1) == np.argmax(predicted, axis=1)) / predicted.shape[0]
else:
raise NotImplementedError
# class KerasRegressor(KerasModel):
# """
# Wrapper class for keras regression models.
#
# :param model: The original keras model object.
# :type model: `keras.models.Model`
# :param black_box_access: Boolean describing the type of deployment of the model (when in production).
# Set to True if the model is only available via query (API) access, i.e.,
# only the outputs of the model are exposed, and False if the model internals
# are also available. Default is True.
# :type black_box_access: boolean, optional
# :param unlimited_queries: If black_box_access is True, this boolean indicates whether a user can perform
# unlimited queries to the model API or whether there is a limit to the number of
# queries that can be submitted. Default is True.
# :type unlimited_queries: boolean, optional
# """
# def __init__(self, model: keras.models.Model, black_box_access: Optional[bool] = True,
# unlimited_queries: Optional[bool] = True, **kwargs):
# super().__init__(model, ModelOutputType.REGRESSOR_SCALAR, black_box_access, unlimited_queries, **kwargs)
# self._art_model = ArtKerasRegressor(model)
#
# def fit(self, train_data: Dataset, **kwargs) -> None:
# """
# Fit the model using the training data.
#
# :param train_data: Training data.
# :type train_data: `Dataset`
# :return: None
# """
# self._art_model.fit(train_data.get_samples(), train_data.get_labels(), **kwargs)
#
# def predict(self, x: Dataset, **kwargs) -> OUTPUT_DATA_ARRAY_TYPE:
# """
# Perform predictions using the model for input `x`.
#
# :param x: Input samples.
# :type x: `Dataset`
# :return: Predictions from the model as numpy array.
# """
# return self._art_model.predict(x.get_samples(), **kwargs)
#
# def score(self, test_data: Dataset, scoring_method: Optional[ScoringMethod] = ScoringMethod.MEAN_SQUARED_ERROR,
# **kwargs):
# """
# Score the model using test data.
#
# :param test_data: Test data.
# :type train_data: `Dataset`
# :param scoring_method: The method for scoring predictions. Default is ACCURACY.
# :type scoring_method: `ScoringMethod`, optional
# :return: the score as float
# """
# y = check_and_transform_label_format(test_data.get_labels(), self._art_model.nb_classes)
# predicted = self.predict(test_data)
# if scoring_method == ScoringMethod.MEAN_SQUARED_ERROR:
# mse = keras.losses.MeanSquaredError(reduction=keras.losses.Reduction.SUM)
# return mse(y, predicted).numpy()
# else:
# raise NotImplementedError('Only MEAN_SQUARED_ERROR supported as scoring method')

View file

@ -1,16 +1,25 @@
from abc import ABCMeta, abstractmethod
from typing import Any, Optional
from enum import Enum, auto
import numpy as np
from apt.utils.datasets import Dataset, OUTPUT_DATA_ARRAY_TYPE
from apt.utils.datasets import Dataset, Data, OUTPUT_DATA_ARRAY_TYPE
from art.estimators.classification import BlackBoxClassifier
from art.utils import check_and_transform_label_format
class ModelOutputType(Enum):
CLASSIFIER_VECTOR = auto() # probabilities or logits
CLASSIFIER_PROBABILITIES = auto() # vector of probabilities
CLASSIFIER_LOGITS = auto() # vector of logits
CLASSIFIER_SCALAR = auto() # label only
REGRESSOR_SCALAR = auto() # value
class ScoringMethod(Enum):
ACCURACY = auto() # number of correct predictions divided by the number of samples
MEAN_SQUARED_ERROR = auto() # mean squared error between the predictions and true labels
class Model(metaclass=ABCMeta):
"""
Abstract base class for ML model wrappers.
@ -54,7 +63,7 @@ class Model(metaclass=ABCMeta):
Perform predictions using the model for input `x`.
:param x: Input samples.
:type x: `np.ndarray` or `pandas.DataFrame`
:type x: `Dataset`
:return: Predictions from the model as numpy array.
"""
raise NotImplementedError
@ -107,3 +116,87 @@ class Model(metaclass=ABCMeta):
:return: True if a user can perform unlimited queries to the model API, otherwise False.
"""
return self._unlimited_queries
def get_nb_classes(self, y: OUTPUT_DATA_ARRAY_TYPE) -> int:
"""
Get the number of classes from an array of labels
:param y: the labels
:type y: numpy array
:return: the number of classes as integer
"""
if len(y.shape) == 1:
return len(np.unique(y))
else:
return y.shape[1]
class BlackboxClassifier(Model):
"""
Wrapper for black-box ML classification models.
:param model: The training and/or test data along with the model's predictions for the data. Assumes that the data
is represented as numpy arrays. Labels are expected to either be one-hot encoded or
a 1D-array of categorical labels (consecutive integers starting at 0).
:type model: `Data` object
:param output_type: The type of output the model yields (vector/label only for classifiers,
value for regressors)
:type output_type: `ModelOutputType`
:param black_box_access: Boolean describing the type of deployment of the model (when in production).
Always assumed to be True for this wrapper.
:type black_box_access: boolean, optional
:param unlimited_queries: Boolean indicating whether a user can perform unlimited queries to the model API.
Always assumed to be False for this wrapper.
:type unlimited_queries: boolean, optional
"""
def __init__(self, model: Data, output_type: ModelOutputType, black_box_access: Optional[bool] = True,
unlimited_queries: Optional[bool] = True, **kwargs):
super().__init__(model, output_type, black_box_access=True, unlimited_queries=False, **kwargs)
x = model.get_train_samples()
y = model.get_train_labels()
self.nb_classes = self.get_nb_classes(y)
y = check_and_transform_label_format(y, nb_classes=self.nb_classes)
if model.get_test_samples() is not None and type(x) == np.ndarray:
x = np.vstack((x, model.get_test_samples()))
if model.get_test_labels() is not None and type(y) == np.ndarray:
y = np.vstack((y, check_and_transform_label_format(model.get_test_labels(), nb_classes=self.nb_classes)))
predict_fn = (x, y)
self._art_model = BlackBoxClassifier(predict_fn, x.shape[1:], self.nb_classes, fuzzy_float_compare=True)
def fit(self, train_data: Dataset, **kwargs) -> None:
"""
A blackbox model cannot be fit.
"""
raise NotImplementedError
def predict(self, x: Dataset, **kwargs) -> OUTPUT_DATA_ARRAY_TYPE:
"""
Get predictions from the model for input `x`. `x` must be a subset of the data provided in the `model` data in
`__init__()`.
:param x: Input samples.
:type x: `Dataset`
:return: Predictions from the model as numpy array.
"""
return self._art_model.predict(x.get_samples())
def score(self, test_data: Dataset, scoring_method: Optional[ScoringMethod] = ScoringMethod.ACCURACY, **kwargs):
"""
Score the model using test data.
:param test_data: Test data.
:type train_data: `Dataset`
:param scoring_method: The method for scoring predictions. Default is ACCURACY.
:type scoring_method: `ScoringMethod`, optional
:return: the score as float (for classifiers, between 0 and 1)
"""
predicted = self._art_model.predict(test_data.get_samples())
y = check_and_transform_label_format(test_data.get_labels(), nb_classes=self.nb_classes)
if scoring_method == ScoringMethod.ACCURACY:
return np.count_nonzero(np.argmax(y, axis=1) == np.argmax(predicted, axis=1)) / predicted.shape[0]
else:
raise NotImplementedError

View file

@ -1,7 +1,5 @@
from typing import Optional
import numpy as np
from sklearn.preprocessing import OneHotEncoder
from sklearn.base import BaseEstimator
@ -10,6 +8,7 @@ from apt.utils.datasets import Dataset, OUTPUT_DATA_ARRAY_TYPE
from art.estimators.classification.scikitlearn import SklearnClassifier as ArtSklearnClassifier
from art.estimators.regression.scikitlearn import ScikitlearnRegressor
from art.utils import check_and_transform_label_format
class SklearnModel(Model):
@ -54,12 +53,14 @@ class SklearnClassifier(SklearnModel):
"""
Fit the model using the training data.
:param train_data: Training data.
:param train_data: Training data. Labels are expected to either be one-hot encoded or a 1D-array of categorical
labels (consecutive integers starting at 0).
:type train_data: `Dataset`
:return: None
"""
encoder = OneHotEncoder(sparse=False)
y_encoded = encoder.fit_transform(train_data.get_labels().reshape(-1, 1))
y = train_data.get_labels()
self.nb_classes = self.get_nb_classes(y)
y_encoded = check_and_transform_label_format(y, nb_classes=self.nb_classes)
self._art_model.fit(train_data.get_samples(), y_encoded, **kwargs)
def predict(self, x: Dataset, **kwargs) -> OUTPUT_DATA_ARRAY_TYPE:
@ -70,7 +71,7 @@ class SklearnClassifier(SklearnModel):
:type x: `Dataset`
:return: Predictions from the model as numpy array (class probabilities, if supported).
"""
return self._art_model.predict(x, **kwargs)
return self._art_model.predict(x.get_samples(), **kwargs)
class SklearnRegressor(SklearnModel):
@ -112,4 +113,4 @@ class SklearnRegressor(SklearnModel):
:type x: `Dataset`
:return: Predictions from the model as numpy array.
"""
return self._art_model.predict(x, **kwargs)
return self._art_model.predict(x.get_samples(), **kwargs)