Blackbox predict method (#43)

* Support output probabilities
* Support black box classifier with predict method
* Update requirements (security alert #1)
This commit is contained in:
abigailgold 2022-06-30 18:23:53 +03:00 committed by GitHub
parent bb224cd3dd
commit c6eb553a9f
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 159 additions and 50 deletions

View file

@ -1,3 +1,4 @@
from apt.utils.models.model import Model, BlackboxClassifier, ModelOutputType, ScoringMethod
from apt.utils.models.model import Model, BlackboxClassifier, ModelOutputType, ScoringMethod, \
BlackboxClassifierPredictions, BlackboxClassifierPredictFunction
from apt.utils.models.sklearn_model import SklearnModel, SklearnClassifier, SklearnRegressor
from apt.utils.models.keras_model import KerasClassifier

View file

@ -1,5 +1,5 @@
from abc import ABCMeta, abstractmethod
from typing import Any, Optional
from typing import Any, Optional, Callable, Tuple
from enum import Enum, auto
import numpy as np
@ -126,7 +126,7 @@ class Model(metaclass=ABCMeta):
:return: the number of classes as integer
"""
if len(y.shape) == 1:
return len(np.unique(y))
return np.max(y) + 1
else:
return y.shape[1]
@ -135,54 +135,41 @@ class BlackboxClassifier(Model):
"""
Wrapper for black-box ML classification models.
:param model: The training and/or test data along with the model's predictions for the data. Assumes that the data
is represented as numpy arrays. Labels are expected to either be one-hot encoded or
a 1D-array of categorical labels (consecutive integers starting at 0).
:type model: `Data` object
:param output_type: The type of output the model yields (vector/label only for classifiers,
value for regressors)
:param model: The training and/or test data along with the model's predictions for the data or a callable predict
method.
:type model: `Data` object or Callable
:param output_type: The type of output the model yields (vector/label only)
:type output_type: `ModelOutputType`
:param black_box_access: Boolean describing the type of deployment of the model (when in production).
Always assumed to be True for this wrapper.
Always assumed to be True (black box) for this wrapper.
:type black_box_access: boolean, optional
:param unlimited_queries: Boolean indicating whether a user can perform unlimited queries to the model API.
Always assumed to be False for this wrapper.
:type unlimited_queries: boolean, optional
"""
def __init__(self, model: Data, output_type: ModelOutputType, black_box_access: Optional[bool] = True,
def __init__(self, model: Any, output_type: ModelOutputType, black_box_access: Optional[bool] = True,
unlimited_queries: Optional[bool] = True, **kwargs):
super().__init__(model, output_type, black_box_access=True, unlimited_queries=False, **kwargs)
x_train_pred = model.get_train_samples()
y_train_pred = model.get_train_labels()
x_test_pred = model.get_test_samples()
y_test_pred = model.get_test_labels()
super().__init__(model, output_type, black_box_access=True, unlimited_queries=unlimited_queries, **kwargs)
self._nb_classes = None
self._input_shape = None
if x_train_pred is not None and y_train_pred is not None and x_test_pred is not None and y_test_pred is not None:
if type(y_train_pred) != np.ndarray or type(y_test_pred) != np.ndarray \
or type(y_train_pred) != np.ndarray or type(y_test_pred) != np.ndarray:
raise NotImplementedError("X/Y Data should be np ndarray")
@property
def nb_classes(self) -> int:
"""
Return the number of prediction classes of the model.
self.nb_classes = self.get_nb_classes(y_train_pred)
y_train_pred = check_and_transform_label_format(y_train_pred, nb_classes=self.nb_classes)
y_test_pred = check_and_transform_label_format(y_test_pred, nb_classes=self.nb_classes)
x_pred = np.vstack((x_train_pred, x_test_pred))
y_pred = np.vstack((y_train_pred, y_test_pred))
elif x_test_pred is not None and y_test_pred is not None:
self.nb_classes = self.get_nb_classes(y_test_pred)
y_test_pred = check_and_transform_label_format(y_test_pred, nb_classes=self.nb_classes)
x_pred = x_test_pred
y_pred = y_test_pred
elif x_train_pred is not None and y_train_pred is not None:
self.nb_classes = self.get_nb_classes(y_train_pred)
y_train_pred = check_and_transform_label_format(y_train_pred, nb_classes=self.nb_classes)
x_pred = x_train_pred
y_pred = y_train_pred
else:
raise NotImplementedError("Invalid data - None")
:return: Number of prediction classes of the model.
"""
return self._nb_classes
predict_fn = (x_pred, y_pred)
self._art_model = BlackBoxClassifier(predict_fn, x_pred.shape[1:], self.nb_classes, fuzzy_float_compare=True)
@property
def input_shape(self) -> Tuple[int, ...]:
"""
Return the shape of input to the model.
:return: Shape of input to the model.
"""
return self._input_shape
def fit(self, train_data: Dataset, **kwargs) -> None:
"""
@ -212,8 +199,91 @@ class BlackboxClassifier(Model):
:return: the score as float (for classifiers, between 0 and 1)
"""
predicted = self._art_model.predict(test_data.get_samples())
y = check_and_transform_label_format(test_data.get_labels(), nb_classes=self.nb_classes)
y = check_and_transform_label_format(test_data.get_labels(), nb_classes=self._nb_classes)
if scoring_method == ScoringMethod.ACCURACY:
return np.count_nonzero(np.argmax(y, axis=1) == np.argmax(predicted, axis=1)) / predicted.shape[0]
else:
raise NotImplementedError
class BlackboxClassifierPredictions(BlackboxClassifier):
"""
Wrapper for black-box ML classification models using data and predictions.
:param model: The training and/or test data along with the model's predictions for the data. Assumes that the data
is represented as numpy arrays. Labels are expected to either be class probabilities (multi-column) or
a 1D-array of categorical labels (consecutive integers starting at 0).
:type model: `Data` object
:param output_type: The type of output the model yields (vector/label only)
:type output_type: `ModelOutputType`
:param black_box_access: Boolean describing the type of deployment of the model (when in production).
Always assumed to be True for this wrapper.
:type black_box_access: boolean, optional
:param unlimited_queries: Boolean indicating whether a user can perform unlimited queries to the model API.
Always assumed to be False for this wrapper.
:type unlimited_queries: boolean, optional
"""
def __init__(self, model: Data, output_type: ModelOutputType, black_box_access: Optional[bool] = True,
unlimited_queries: Optional[bool] = True, **kwargs):
super().__init__(model, output_type, black_box_access=True, unlimited_queries=False, **kwargs)
x_train_pred = model.get_train_samples()
y_train_pred = model.get_train_labels()
x_test_pred = model.get_test_samples()
y_test_pred = model.get_test_labels()
if y_train_pred is not None and len(y_train_pred.shape) == 1:
self._nb_classes = self.get_nb_classes(y_train_pred)
y_train_pred = check_and_transform_label_format(y_train_pred, nb_classes=self._nb_classes)
if y_test_pred is not None and len(y_test_pred.shape) == 1:
if self._nb_classes is None:
self._nb_classes = self.get_nb_classes(y_test_pred)
y_test_pred = check_and_transform_label_format(y_test_pred, nb_classes=self._nb_classes)
if x_train_pred is not None and y_train_pred is not None and x_test_pred is not None and y_test_pred is not None:
if type(y_train_pred) != np.ndarray or type(y_test_pred) != np.ndarray \
or type(y_train_pred) != np.ndarray or type(y_test_pred) != np.ndarray:
raise NotImplementedError("X/Y Data should be numpy array")
x_pred = np.vstack((x_train_pred, x_test_pred))
y_pred = np.vstack((y_train_pred, y_test_pred))
elif x_test_pred is not None and y_test_pred is not None:
x_pred = x_test_pred
y_pred = y_test_pred
elif x_train_pred is not None and y_train_pred is not None:
x_pred = x_train_pred
y_pred = y_train_pred
else:
raise NotImplementedError("Invalid data - None")
self._nb_classes = self.get_nb_classes(y_pred)
self._input_shape = x_pred.shape[1:]
predict_fn = (x_pred, y_pred)
self._art_model = BlackBoxClassifier(predict_fn, self._input_shape, self._nb_classes, fuzzy_float_compare=True)
class BlackboxClassifierPredictFunction(BlackboxClassifier):
"""
Wrapper for black-box ML classification models using a predict function.
:param model: Function that takes in an `np.ndarray` of input data and returns predictions either as class
probabilities (multi-column) or a 1D-array of categorical labels (consecutive integers starting at 0).
:type model: Callable
:param output_type: The type of output the model yields (vector/label only)
:type output_type: `ModelOutputType`
:param input_shape: Shape of input to the model.
:type input_shape: Tuple[int, ...]
:param nb_classes: Number of prediction classes of the model.
:type nb_classes: int
:param black_box_access: Boolean describing the type of deployment of the model (when in production).
Always assumed to be True for this wrapper.
:type black_box_access: boolean, optional
:param unlimited_queries: Boolean indicating whether a user can perform unlimited queries to the model API.
:type unlimited_queries: boolean, optional
"""
def __init__(self, model: Callable, output_type: ModelOutputType, input_shape: Tuple[int, ...], nb_classes: int,
black_box_access: Optional[bool] = True, unlimited_queries: Optional[bool] = True, **kwargs):
super().__init__(model, output_type, black_box_access=True, unlimited_queries=unlimited_queries, **kwargs)
self._nb_classes = nb_classes
self._input_shape = input_shape
self._art_model = BlackBoxClassifier(model, self._input_shape, self._nb_classes)

View file

@ -1,4 +1,4 @@
numpy==1.21.0
numpy>=1.22
pandas==1.1.0
scipy==1.4.1
scikit-learn==0.22.2

View file

@ -18,7 +18,8 @@ from apt.minimization import GeneralizeToRepresentative
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
from apt.utils.dataset_utils import get_iris_dataset_np, get_adult_dataset_pd, get_german_credit_dataset_pd
from apt.utils.datasets import ArrayDataset, Data
from apt.utils.models import SklearnClassifier, ModelOutputType, SklearnRegressor, KerasClassifier, BlackboxClassifier
from apt.utils.models import SklearnClassifier, ModelOutputType, SklearnRegressor, KerasClassifier, \
BlackboxClassifierPredictions
@pytest.fixture
@ -949,7 +950,7 @@ def test_blackbox_model():
test_data = ArrayDataset(x_test, y_test)
data = Data(train_data, test_data)
model = BlackboxClassifier(data, ModelOutputType.CLASSIFIER_PROBABILITIES)
model = BlackboxClassifierPredictions(data, ModelOutputType.CLASSIFIER_PROBABILITIES)
ad = ArrayDataset(x_test)
predictions = model.predict(ad)
if predictions.shape[1] > 1:

View file

@ -1,6 +1,8 @@
import pytest
import numpy as np
from apt.utils.models import SklearnClassifier, SklearnRegressor, ModelOutputType, KerasClassifier, BlackboxClassifier
from apt.utils.models import SklearnClassifier, SklearnRegressor, ModelOutputType, KerasClassifier, \
BlackboxClassifierPredictions, BlackboxClassifierPredictFunction
from apt.utils.datasets import ArrayDataset, Data
from apt.utils import dataset_utils
@ -67,7 +69,7 @@ def test_blackbox_classifier():
train = ArrayDataset(x_train, y_train)
test = ArrayDataset(x_test, y_test)
data = Data(train, test)
model = BlackboxClassifier(data, ModelOutputType.CLASSIFIER_PROBABILITIES)
model = BlackboxClassifierPredictions(data, ModelOutputType.CLASSIFIER_PROBABILITIES)
pred = model.predict(test)
assert(pred.shape[0] == x_test.shape[0])
@ -80,7 +82,7 @@ def test_blackbox_classifier_no_test():
train = ArrayDataset(x_train, y_train)
data = Data(train)
model = BlackboxClassifier(data, ModelOutputType.CLASSIFIER_PROBABILITIES)
model = BlackboxClassifierPredictions(data, ModelOutputType.CLASSIFIER_PROBABILITIES)
pred = model.predict(train)
assert(pred.shape[0] == x_train.shape[0])
@ -93,7 +95,7 @@ def test_blackbox_classifier_no_train():
test = ArrayDataset(x_test, y_test)
data = Data(test=test)
model = BlackboxClassifier(data, ModelOutputType.CLASSIFIER_PROBABILITIES)
model = BlackboxClassifierPredictions(data, ModelOutputType.CLASSIFIER_PROBABILITIES)
pred = model.predict(test)
assert(pred.shape[0] == x_test.shape[0])
@ -107,7 +109,7 @@ def test_blackbox_classifier_no_test_y():
train = ArrayDataset(x_train, y_train)
test = ArrayDataset(x_test)
data = Data(train, test)
model = BlackboxClassifier(data, ModelOutputType.CLASSIFIER_PROBABILITIES)
model = BlackboxClassifierPredictions(data, ModelOutputType.CLASSIFIER_PROBABILITIES)
pred = model.predict(train)
assert(pred.shape[0] == x_train.shape[0])
@ -129,7 +131,7 @@ def test_blackbox_classifier_no_train_y():
train = ArrayDataset(x_train)
test = ArrayDataset(x_test, y_test)
data = Data(train, test)
model = BlackboxClassifier(data, ModelOutputType.CLASSIFIER_PROBABILITIES)
model = BlackboxClassifierPredictions(data, ModelOutputType.CLASSIFIER_PROBABILITIES)
pred = model.predict(test)
assert (pred.shape[0] == x_test.shape[0])
@ -145,3 +147,38 @@ def test_blackbox_classifier_no_train_y():
assert(unable_to_predict_train,True)
def test_blackbox_classifier_probabilities():
(x_train, _), (_, _) = dataset_utils.get_iris_dataset_np()
y_train = np.array([[0.23, 0.56, 0.21] for i in range(105)])
train = ArrayDataset(x_train, y_train)
data = Data(train)
model = BlackboxClassifierPredictions(data, ModelOutputType.CLASSIFIER_PROBABILITIES)
pred = model.predict(train)
assert (pred.shape[0] == x_train.shape[0])
assert (0.0 < pred).all()
assert (pred < 1.0).all()
score = model.score(train)
assert (0.0 <= score <= 1.0)
def test_blackbox_classifier_predict():
def predict(x):
return [0.23, 0.56, 0.21]
(x_train, y_train), (_, _) = dataset_utils.get_iris_dataset_np()
train = ArrayDataset(x_train, y_train)
model = BlackboxClassifierPredictFunction(predict, ModelOutputType.CLASSIFIER_PROBABILITIES, (4,), 3)
pred = model.predict(train)
assert (pred.shape[0] == x_train.shape[0])
assert (0.0 < pred).all()
assert (pred < 1.0).all()
score = model.score(train)
assert (0.0 <= score <= 1.0)