Blackbox predict method (#43)

* Support output probabilities * Support black box classifier with predict method * Update requirements (security alert #1)
2026-07-14 16:32:12 +02:00 · 2022-06-30 18:23:53 +03:00 · 2022-06-30 18:23:53 +03:00 · c6eb553a9f
commit c6eb553a9f
parent bb224cd3dd
5 changed files with 159 additions and 50 deletions
--- a/apt/utils/models/model.py
+++ b/apt/utils/models/model.py
@ -1,5 +1,5 @@
 from abc import ABCMeta, abstractmethod
-from typing import Any, Optional
+from typing import Any, Optional, Callable, Tuple
 from enum import Enum, auto
 import numpy as np

@ -126,7 +126,7 @@ class Model(metaclass=ABCMeta):
        :return: the number of classes as integer
        """
        if len(y.shape) == 1:
-            return len(np.unique(y))
+            return np.max(y) + 1
        else:
            return y.shape[1]

@ -135,54 +135,41 @@ class BlackboxClassifier(Model):
    """
    Wrapper for black-box ML classification models.

-    :param model: The training and/or test data along with the model's predictions for the data. Assumes that the data
-                  is represented as numpy arrays. Labels are expected to either be one-hot encoded or
-                  a 1D-array of categorical labels (consecutive integers starting at 0).
-    :type model: `Data` object
-    :param output_type: The type of output the model yields (vector/label only for classifiers,
-                        value for regressors)
+    :param model: The training and/or test data along with the model's predictions for the data or a callable predict
+                  method.
+    :type model: `Data` object or Callable
+    :param output_type: The type of output the model yields (vector/label only)
    :type output_type: `ModelOutputType`
    :param black_box_access: Boolean describing the type of deployment of the model (when in production).
-                             Always assumed to be True for this wrapper.
+                             Always assumed to be True (black box) for this wrapper.
    :type black_box_access: boolean, optional
    :param unlimited_queries: Boolean indicating whether a user can perform unlimited queries to the model API.
-                              Always assumed to be False for this wrapper.
    :type unlimited_queries: boolean, optional
    """

-    def __init__(self, model: Data, output_type: ModelOutputType, black_box_access: Optional[bool] = True,
+    def __init__(self, model: Any, output_type: ModelOutputType, black_box_access: Optional[bool] = True,
                 unlimited_queries: Optional[bool] = True, **kwargs):
-        super().__init__(model, output_type, black_box_access=True, unlimited_queries=False, **kwargs)
-        x_train_pred = model.get_train_samples()
-        y_train_pred = model.get_train_labels()
-        x_test_pred = model.get_test_samples()
-        y_test_pred = model.get_test_labels()
+        super().__init__(model, output_type, black_box_access=True, unlimited_queries=unlimited_queries, **kwargs)
+        self._nb_classes = None
+        self._input_shape = None

-        if x_train_pred is not None and y_train_pred is not None and x_test_pred is not None and y_test_pred is not None:
-            if type(y_train_pred) != np.ndarray or type(y_test_pred) != np.ndarray \
-               or type(y_train_pred) != np.ndarray or type(y_test_pred) != np.ndarray:
-                raise NotImplementedError("X/Y Data should be np ndarray")
+    @property
+    def nb_classes(self) -> int:
+        """
+        Return the number of prediction classes of the model.

-            self.nb_classes = self.get_nb_classes(y_train_pred)
-            y_train_pred = check_and_transform_label_format(y_train_pred, nb_classes=self.nb_classes)
-            y_test_pred = check_and_transform_label_format(y_test_pred, nb_classes=self.nb_classes)
-            x_pred = np.vstack((x_train_pred, x_test_pred))
-            y_pred = np.vstack((y_train_pred, y_test_pred))
-        elif x_test_pred is not None and y_test_pred is not None:
-            self.nb_classes = self.get_nb_classes(y_test_pred)
-            y_test_pred = check_and_transform_label_format(y_test_pred, nb_classes=self.nb_classes)
-            x_pred = x_test_pred
-            y_pred = y_test_pred
-        elif x_train_pred is not None and y_train_pred is not None:
-            self.nb_classes = self.get_nb_classes(y_train_pred)
-            y_train_pred = check_and_transform_label_format(y_train_pred, nb_classes=self.nb_classes)
-            x_pred = x_train_pred
-            y_pred = y_train_pred
-        else:
-            raise NotImplementedError("Invalid data - None")
+        :return: Number of prediction classes of the model.
+        """
+        return self._nb_classes

-        predict_fn = (x_pred, y_pred)
-        self._art_model = BlackBoxClassifier(predict_fn, x_pred.shape[1:], self.nb_classes, fuzzy_float_compare=True)
+    @property
+    def input_shape(self) -> Tuple[int, ...]:
+        """
+        Return the shape of input to the model.
+
+        :return: Shape of input to the model.
+        """
+        return self._input_shape

    def fit(self, train_data: Dataset, **kwargs) -> None:
        """
@ -212,8 +199,91 @@ class BlackboxClassifier(Model):
        :return: the score as float (for classifiers, between 0 and 1)
        """
        predicted = self._art_model.predict(test_data.get_samples())
-        y = check_and_transform_label_format(test_data.get_labels(), nb_classes=self.nb_classes)
+        y = check_and_transform_label_format(test_data.get_labels(), nb_classes=self._nb_classes)
        if scoring_method == ScoringMethod.ACCURACY:
            return np.count_nonzero(np.argmax(y, axis=1) == np.argmax(predicted, axis=1)) / predicted.shape[0]
        else:
            raise NotImplementedError
+
+
+class BlackboxClassifierPredictions(BlackboxClassifier):
+    """
+    Wrapper for black-box ML classification models using data and predictions.
+
+    :param model: The training and/or test data along with the model's predictions for the data. Assumes that the data
+                  is represented as numpy arrays. Labels are expected to either be class probabilities (multi-column) or
+                  a 1D-array of categorical labels (consecutive integers starting at 0).
+    :type model: `Data` object
+    :param output_type: The type of output the model yields (vector/label only)
+    :type output_type: `ModelOutputType`
+    :param black_box_access: Boolean describing the type of deployment of the model (when in production).
+                             Always assumed to be True for this wrapper.
+    :type black_box_access: boolean, optional
+    :param unlimited_queries: Boolean indicating whether a user can perform unlimited queries to the model API.
+                              Always assumed to be False for this wrapper.
+    :type unlimited_queries: boolean, optional
+    """
+
+    def __init__(self, model: Data, output_type: ModelOutputType, black_box_access: Optional[bool] = True,
+                 unlimited_queries: Optional[bool] = True, **kwargs):
+        super().__init__(model, output_type, black_box_access=True, unlimited_queries=False, **kwargs)
+        x_train_pred = model.get_train_samples()
+        y_train_pred = model.get_train_labels()
+        x_test_pred = model.get_test_samples()
+        y_test_pred = model.get_test_labels()
+
+        if y_train_pred is not None and len(y_train_pred.shape) == 1:
+            self._nb_classes = self.get_nb_classes(y_train_pred)
+            y_train_pred = check_and_transform_label_format(y_train_pred, nb_classes=self._nb_classes)
+        if y_test_pred is not None and len(y_test_pred.shape) == 1:
+            if self._nb_classes is None:
+                self._nb_classes = self.get_nb_classes(y_test_pred)
+            y_test_pred = check_and_transform_label_format(y_test_pred, nb_classes=self._nb_classes)
+
+        if x_train_pred is not None and y_train_pred is not None and x_test_pred is not None and y_test_pred is not None:
+            if type(y_train_pred) != np.ndarray or type(y_test_pred) != np.ndarray \
+               or type(y_train_pred) != np.ndarray or type(y_test_pred) != np.ndarray:
+                raise NotImplementedError("X/Y Data should be numpy array")
+            x_pred = np.vstack((x_train_pred, x_test_pred))
+            y_pred = np.vstack((y_train_pred, y_test_pred))
+        elif x_test_pred is not None and y_test_pred is not None:
+            x_pred = x_test_pred
+            y_pred = y_test_pred
+        elif x_train_pred is not None and y_train_pred is not None:
+            x_pred = x_train_pred
+            y_pred = y_train_pred
+        else:
+            raise NotImplementedError("Invalid data - None")
+
+        self._nb_classes = self.get_nb_classes(y_pred)
+        self._input_shape = x_pred.shape[1:]
+        predict_fn = (x_pred, y_pred)
+        self._art_model = BlackBoxClassifier(predict_fn, self._input_shape, self._nb_classes, fuzzy_float_compare=True)
+
+
+class BlackboxClassifierPredictFunction(BlackboxClassifier):
+    """
+    Wrapper for black-box ML classification models using a predict function.
+
+    :param model: Function that takes in an `np.ndarray` of input data and returns predictions either as class
+                  probabilities (multi-column) or a 1D-array of categorical labels (consecutive integers starting at 0).
+    :type model: Callable
+    :param output_type: The type of output the model yields (vector/label only)
+    :type output_type: `ModelOutputType`
+    :param input_shape: Shape of input to the model.
+    :type input_shape: Tuple[int, ...]
+    :param nb_classes: Number of prediction classes of the model.
+    :type  nb_classes: int
+    :param black_box_access: Boolean describing the type of deployment of the model (when in production).
+                             Always assumed to be True for this wrapper.
+    :type black_box_access: boolean, optional
+    :param unlimited_queries: Boolean indicating whether a user can perform unlimited queries to the model API.
+    :type unlimited_queries: boolean, optional
+    """
+
+    def __init__(self, model: Callable, output_type: ModelOutputType, input_shape: Tuple[int, ...], nb_classes: int,
+                 black_box_access: Optional[bool] = True, unlimited_queries: Optional[bool] = True, **kwargs):
+        super().__init__(model, output_type, black_box_access=True, unlimited_queries=unlimited_queries, **kwargs)
+        self._nb_classes = nb_classes
+        self._input_shape = input_shape
+        self._art_model = BlackBoxClassifier(model, self._input_shape, self._nb_classes)