diff --git a/apt/utils/models/__init__.py b/apt/utils/models/__init__.py index f6a89fc..a48a1a3 100644 --- a/apt/utils/models/__init__.py +++ b/apt/utils/models/__init__.py @@ -1,3 +1,4 @@ -from apt.utils.models.model import Model, BlackboxClassifier, ModelOutputType, ScoringMethod +from apt.utils.models.model import Model, BlackboxClassifier, ModelOutputType, ScoringMethod, \ + BlackboxClassifierPredictions, BlackboxClassifierPredictFunction from apt.utils.models.sklearn_model import SklearnModel, SklearnClassifier, SklearnRegressor from apt.utils.models.keras_model import KerasClassifier diff --git a/apt/utils/models/model.py b/apt/utils/models/model.py index bba743b..eff9271 100644 --- a/apt/utils/models/model.py +++ b/apt/utils/models/model.py @@ -1,5 +1,5 @@ from abc import ABCMeta, abstractmethod -from typing import Any, Optional +from typing import Any, Optional, Callable, Tuple from enum import Enum, auto import numpy as np @@ -126,7 +126,7 @@ class Model(metaclass=ABCMeta): :return: the number of classes as integer """ if len(y.shape) == 1: - return len(np.unique(y)) + return np.max(y) + 1 else: return y.shape[1] @@ -135,54 +135,41 @@ class BlackboxClassifier(Model): """ Wrapper for black-box ML classification models. - :param model: The training and/or test data along with the model's predictions for the data. Assumes that the data - is represented as numpy arrays. Labels are expected to either be one-hot encoded or - a 1D-array of categorical labels (consecutive integers starting at 0). - :type model: `Data` object - :param output_type: The type of output the model yields (vector/label only for classifiers, - value for regressors) + :param model: The training and/or test data along with the model's predictions for the data or a callable predict + method. + :type model: `Data` object or Callable + :param output_type: The type of output the model yields (vector/label only) :type output_type: `ModelOutputType` :param black_box_access: Boolean describing the type of deployment of the model (when in production). - Always assumed to be True for this wrapper. + Always assumed to be True (black box) for this wrapper. :type black_box_access: boolean, optional :param unlimited_queries: Boolean indicating whether a user can perform unlimited queries to the model API. - Always assumed to be False for this wrapper. :type unlimited_queries: boolean, optional """ - def __init__(self, model: Data, output_type: ModelOutputType, black_box_access: Optional[bool] = True, + def __init__(self, model: Any, output_type: ModelOutputType, black_box_access: Optional[bool] = True, unlimited_queries: Optional[bool] = True, **kwargs): - super().__init__(model, output_type, black_box_access=True, unlimited_queries=False, **kwargs) - x_train_pred = model.get_train_samples() - y_train_pred = model.get_train_labels() - x_test_pred = model.get_test_samples() - y_test_pred = model.get_test_labels() + super().__init__(model, output_type, black_box_access=True, unlimited_queries=unlimited_queries, **kwargs) + self._nb_classes = None + self._input_shape = None - if x_train_pred is not None and y_train_pred is not None and x_test_pred is not None and y_test_pred is not None: - if type(y_train_pred) != np.ndarray or type(y_test_pred) != np.ndarray \ - or type(y_train_pred) != np.ndarray or type(y_test_pred) != np.ndarray: - raise NotImplementedError("X/Y Data should be np ndarray") + @property + def nb_classes(self) -> int: + """ + Return the number of prediction classes of the model. - self.nb_classes = self.get_nb_classes(y_train_pred) - y_train_pred = check_and_transform_label_format(y_train_pred, nb_classes=self.nb_classes) - y_test_pred = check_and_transform_label_format(y_test_pred, nb_classes=self.nb_classes) - x_pred = np.vstack((x_train_pred, x_test_pred)) - y_pred = np.vstack((y_train_pred, y_test_pred)) - elif x_test_pred is not None and y_test_pred is not None: - self.nb_classes = self.get_nb_classes(y_test_pred) - y_test_pred = check_and_transform_label_format(y_test_pred, nb_classes=self.nb_classes) - x_pred = x_test_pred - y_pred = y_test_pred - elif x_train_pred is not None and y_train_pred is not None: - self.nb_classes = self.get_nb_classes(y_train_pred) - y_train_pred = check_and_transform_label_format(y_train_pred, nb_classes=self.nb_classes) - x_pred = x_train_pred - y_pred = y_train_pred - else: - raise NotImplementedError("Invalid data - None") + :return: Number of prediction classes of the model. + """ + return self._nb_classes - predict_fn = (x_pred, y_pred) - self._art_model = BlackBoxClassifier(predict_fn, x_pred.shape[1:], self.nb_classes, fuzzy_float_compare=True) + @property + def input_shape(self) -> Tuple[int, ...]: + """ + Return the shape of input to the model. + + :return: Shape of input to the model. + """ + return self._input_shape def fit(self, train_data: Dataset, **kwargs) -> None: """ @@ -212,8 +199,91 @@ class BlackboxClassifier(Model): :return: the score as float (for classifiers, between 0 and 1) """ predicted = self._art_model.predict(test_data.get_samples()) - y = check_and_transform_label_format(test_data.get_labels(), nb_classes=self.nb_classes) + y = check_and_transform_label_format(test_data.get_labels(), nb_classes=self._nb_classes) if scoring_method == ScoringMethod.ACCURACY: return np.count_nonzero(np.argmax(y, axis=1) == np.argmax(predicted, axis=1)) / predicted.shape[0] else: raise NotImplementedError + + +class BlackboxClassifierPredictions(BlackboxClassifier): + """ + Wrapper for black-box ML classification models using data and predictions. + + :param model: The training and/or test data along with the model's predictions for the data. Assumes that the data + is represented as numpy arrays. Labels are expected to either be class probabilities (multi-column) or + a 1D-array of categorical labels (consecutive integers starting at 0). + :type model: `Data` object + :param output_type: The type of output the model yields (vector/label only) + :type output_type: `ModelOutputType` + :param black_box_access: Boolean describing the type of deployment of the model (when in production). + Always assumed to be True for this wrapper. + :type black_box_access: boolean, optional + :param unlimited_queries: Boolean indicating whether a user can perform unlimited queries to the model API. + Always assumed to be False for this wrapper. + :type unlimited_queries: boolean, optional + """ + + def __init__(self, model: Data, output_type: ModelOutputType, black_box_access: Optional[bool] = True, + unlimited_queries: Optional[bool] = True, **kwargs): + super().__init__(model, output_type, black_box_access=True, unlimited_queries=False, **kwargs) + x_train_pred = model.get_train_samples() + y_train_pred = model.get_train_labels() + x_test_pred = model.get_test_samples() + y_test_pred = model.get_test_labels() + + if y_train_pred is not None and len(y_train_pred.shape) == 1: + self._nb_classes = self.get_nb_classes(y_train_pred) + y_train_pred = check_and_transform_label_format(y_train_pred, nb_classes=self._nb_classes) + if y_test_pred is not None and len(y_test_pred.shape) == 1: + if self._nb_classes is None: + self._nb_classes = self.get_nb_classes(y_test_pred) + y_test_pred = check_and_transform_label_format(y_test_pred, nb_classes=self._nb_classes) + + if x_train_pred is not None and y_train_pred is not None and x_test_pred is not None and y_test_pred is not None: + if type(y_train_pred) != np.ndarray or type(y_test_pred) != np.ndarray \ + or type(y_train_pred) != np.ndarray or type(y_test_pred) != np.ndarray: + raise NotImplementedError("X/Y Data should be numpy array") + x_pred = np.vstack((x_train_pred, x_test_pred)) + y_pred = np.vstack((y_train_pred, y_test_pred)) + elif x_test_pred is not None and y_test_pred is not None: + x_pred = x_test_pred + y_pred = y_test_pred + elif x_train_pred is not None and y_train_pred is not None: + x_pred = x_train_pred + y_pred = y_train_pred + else: + raise NotImplementedError("Invalid data - None") + + self._nb_classes = self.get_nb_classes(y_pred) + self._input_shape = x_pred.shape[1:] + predict_fn = (x_pred, y_pred) + self._art_model = BlackBoxClassifier(predict_fn, self._input_shape, self._nb_classes, fuzzy_float_compare=True) + + +class BlackboxClassifierPredictFunction(BlackboxClassifier): + """ + Wrapper for black-box ML classification models using a predict function. + + :param model: Function that takes in an `np.ndarray` of input data and returns predictions either as class + probabilities (multi-column) or a 1D-array of categorical labels (consecutive integers starting at 0). + :type model: Callable + :param output_type: The type of output the model yields (vector/label only) + :type output_type: `ModelOutputType` + :param input_shape: Shape of input to the model. + :type input_shape: Tuple[int, ...] + :param nb_classes: Number of prediction classes of the model. + :type nb_classes: int + :param black_box_access: Boolean describing the type of deployment of the model (when in production). + Always assumed to be True for this wrapper. + :type black_box_access: boolean, optional + :param unlimited_queries: Boolean indicating whether a user can perform unlimited queries to the model API. + :type unlimited_queries: boolean, optional + """ + + def __init__(self, model: Callable, output_type: ModelOutputType, input_shape: Tuple[int, ...], nb_classes: int, + black_box_access: Optional[bool] = True, unlimited_queries: Optional[bool] = True, **kwargs): + super().__init__(model, output_type, black_box_access=True, unlimited_queries=unlimited_queries, **kwargs) + self._nb_classes = nb_classes + self._input_shape = input_shape + self._art_model = BlackBoxClassifier(model, self._input_shape, self._nb_classes) diff --git a/requirements.txt b/requirements.txt index f841d7c..4ce8d46 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ -numpy==1.21.0 +numpy>=1.22 pandas==1.1.0 scipy==1.4.1 scikit-learn==0.22.2 diff --git a/tests/test_minimizer.py b/tests/test_minimizer.py index 42e5cb2..2d888bc 100644 --- a/tests/test_minimizer.py +++ b/tests/test_minimizer.py @@ -18,7 +18,8 @@ from apt.minimization import GeneralizeToRepresentative from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor from apt.utils.dataset_utils import get_iris_dataset_np, get_adult_dataset_pd, get_german_credit_dataset_pd from apt.utils.datasets import ArrayDataset, Data -from apt.utils.models import SklearnClassifier, ModelOutputType, SklearnRegressor, KerasClassifier, BlackboxClassifier +from apt.utils.models import SklearnClassifier, ModelOutputType, SklearnRegressor, KerasClassifier, \ + BlackboxClassifierPredictions @pytest.fixture @@ -949,7 +950,7 @@ def test_blackbox_model(): test_data = ArrayDataset(x_test, y_test) data = Data(train_data, test_data) - model = BlackboxClassifier(data, ModelOutputType.CLASSIFIER_PROBABILITIES) + model = BlackboxClassifierPredictions(data, ModelOutputType.CLASSIFIER_PROBABILITIES) ad = ArrayDataset(x_test) predictions = model.predict(ad) if predictions.shape[1] > 1: diff --git a/tests/test_model.py b/tests/test_model.py index 3a5e45c..2acc0d9 100644 --- a/tests/test_model.py +++ b/tests/test_model.py @@ -1,6 +1,8 @@ import pytest +import numpy as np -from apt.utils.models import SklearnClassifier, SklearnRegressor, ModelOutputType, KerasClassifier, BlackboxClassifier +from apt.utils.models import SklearnClassifier, SklearnRegressor, ModelOutputType, KerasClassifier, \ + BlackboxClassifierPredictions, BlackboxClassifierPredictFunction from apt.utils.datasets import ArrayDataset, Data from apt.utils import dataset_utils @@ -67,7 +69,7 @@ def test_blackbox_classifier(): train = ArrayDataset(x_train, y_train) test = ArrayDataset(x_test, y_test) data = Data(train, test) - model = BlackboxClassifier(data, ModelOutputType.CLASSIFIER_PROBABILITIES) + model = BlackboxClassifierPredictions(data, ModelOutputType.CLASSIFIER_PROBABILITIES) pred = model.predict(test) assert(pred.shape[0] == x_test.shape[0]) @@ -80,7 +82,7 @@ def test_blackbox_classifier_no_test(): train = ArrayDataset(x_train, y_train) data = Data(train) - model = BlackboxClassifier(data, ModelOutputType.CLASSIFIER_PROBABILITIES) + model = BlackboxClassifierPredictions(data, ModelOutputType.CLASSIFIER_PROBABILITIES) pred = model.predict(train) assert(pred.shape[0] == x_train.shape[0]) @@ -93,7 +95,7 @@ def test_blackbox_classifier_no_train(): test = ArrayDataset(x_test, y_test) data = Data(test=test) - model = BlackboxClassifier(data, ModelOutputType.CLASSIFIER_PROBABILITIES) + model = BlackboxClassifierPredictions(data, ModelOutputType.CLASSIFIER_PROBABILITIES) pred = model.predict(test) assert(pred.shape[0] == x_test.shape[0]) @@ -107,7 +109,7 @@ def test_blackbox_classifier_no_test_y(): train = ArrayDataset(x_train, y_train) test = ArrayDataset(x_test) data = Data(train, test) - model = BlackboxClassifier(data, ModelOutputType.CLASSIFIER_PROBABILITIES) + model = BlackboxClassifierPredictions(data, ModelOutputType.CLASSIFIER_PROBABILITIES) pred = model.predict(train) assert(pred.shape[0] == x_train.shape[0]) @@ -129,7 +131,7 @@ def test_blackbox_classifier_no_train_y(): train = ArrayDataset(x_train) test = ArrayDataset(x_test, y_test) data = Data(train, test) - model = BlackboxClassifier(data, ModelOutputType.CLASSIFIER_PROBABILITIES) + model = BlackboxClassifierPredictions(data, ModelOutputType.CLASSIFIER_PROBABILITIES) pred = model.predict(test) assert (pred.shape[0] == x_test.shape[0]) @@ -145,3 +147,38 @@ def test_blackbox_classifier_no_train_y(): assert(unable_to_predict_train,True) +def test_blackbox_classifier_probabilities(): + (x_train, _), (_, _) = dataset_utils.get_iris_dataset_np() + y_train = np.array([[0.23, 0.56, 0.21] for i in range(105)]) + + train = ArrayDataset(x_train, y_train) + + data = Data(train) + model = BlackboxClassifierPredictions(data, ModelOutputType.CLASSIFIER_PROBABILITIES) + pred = model.predict(train) + assert (pred.shape[0] == x_train.shape[0]) + assert (0.0 < pred).all() + assert (pred < 1.0).all() + + score = model.score(train) + assert (0.0 <= score <= 1.0) + + +def test_blackbox_classifier_predict(): + def predict(x): + return [0.23, 0.56, 0.21] + + (x_train, y_train), (_, _) = dataset_utils.get_iris_dataset_np() + + train = ArrayDataset(x_train, y_train) + + model = BlackboxClassifierPredictFunction(predict, ModelOutputType.CLASSIFIER_PROBABILITIES, (4,), 3) + pred = model.predict(train) + assert (pred.shape[0] == x_train.shape[0]) + assert (0.0 < pred).all() + assert (pred < 1.0).all() + + score = model.score(train) + assert (0.0 <= score <= 1.0) + +