Create initial version of wrappers for models (#1)

* New wrapper classes for models
2026-05-07 11:02:37 +02:00 · 2022-02-10 15:36:41 +02:00 · 2022-02-10 15:36:41 +02:00 · b0c6c4d28e
commit b0c6c4d28e
parent 9de078f937
8 changed files with 325 additions and 4 deletions
--- a/apt/utils/init.py
+++ b/apt/utils/init.py
--- a/apt/utils/dataset_utils.py
+++ b/apt/utils/dataset_utils.py
@ -13,8 +13,7 @@ def _load_iris(test_set_size: float = 0.3):

    # Split training and test sets
    x_train, x_test, y_train, y_test = model_selection.train_test_split(data, labels, test_size=test_set_size,
-                                                                        random_state=18, stratify=labels,
-                                                                        shuffle=True)
+                                                                                random_state=18, stratify=labels)

    return (x_train, y_train), (x_test, y_test)

@ -29,6 +28,28 @@ def get_iris_dataset():
    return _load_iris()


+def _load_diabetes(test_set_size: float = 0.3):
+    diabetes = datasets.load_diabetes()
+    data = diabetes.data
+    labels = diabetes.target
+
+    # Split training and test sets
+    x_train, x_test, y_train, y_test = model_selection.train_test_split(data, labels, test_size=test_set_size,
+                                                                        random_state=18)
+
+    return (x_train, y_train), (x_test, y_test)
+
+
+def get_diabetes_dataset():
+    """
+    Loads the Iris dataset from scikit-learn.
+
+    :param test_set: Proportion of the data to use as validation split (value between 0 and 1).
+    :return: Entire dataset and labels as numpy array.
+    """
+    return _load_diabetes()
+
+
 def get_german_credit_dataset(test_set: float = 0.3):
    """
    Loads the UCI German_credit dataset from `tests/datasets/german` or downloads it if necessary.
--- a/apt/utils/models/init.py
+++ b/apt/utils/models/init.py
@ -0,0 +1,2 @@
+from apt.utils.models.model import Model, ModelWithLoss, SingleOutputModel, MultipleOutputModel
+from apt.utils.models.sklearn_model import SklearnModel, SklearnClassifier, SklearnRegressor
--- a/apt/utils/models/model.py
+++ b/apt/utils/models/model.py
@ -0,0 +1,112 @@
+from abc import ABC, abstractmethod
+from typing import Union, List, Any, Optional
+import numpy as np
+
+class Model(ABC):
+    """
+    Base class for ML model wrappers.
+    """
+
+    def __init__(self, model: Any, **kwargs):
+        """
+            Initialize a `Model` wrapper object.
+
+            :param model: The original model object (of the underlying ML framework)
+        """
+        self._model = model
+
+    @abstractmethod
+    def fit(self, x: np.ndarray, y: np.ndarray, **kwargs) -> None:
+        """
+        Fit the model using the training data `(x, y)`.
+
+        :param x: Training data.
+        :type x: `np.ndarray` or `pandas.DataFrame`
+        :param y: True labels.
+        :type y: `np.ndarray` or `pandas.DataFrame`
+        """
+        raise NotImplementedError
+
+    @abstractmethod
+    def predict(self, x: np.ndarray, **kwargs) -> np.ndarray:
+        """
+        Perform predictions using the model for input `x`.
+
+        :param x: Input samples.
+        :type x: `np.ndarray` or `pandas.DataFrame`
+        :return: Predictions from the model.
+        """
+        raise NotImplementedError
+
+    @property
+    def model(self):
+        """
+        Return the model.
+
+        :return: The model.
+        """
+        return self._model
+
+
+class SingleOutputModel(Model):
+    """
+    Wrapper class for ML models whose output is a single value (e.g., classification with label only output, regression).
+    """
+
+
+class MultipleOutputModel(Model):
+    """
+    Wrapper class for ML models whose output is a vector (e.g., class probabilities or logits).
+    """
+
+
+class ModelWithLoss(Model):
+    """
+    Wrapper class for ML models that support computing loss values for predictions.
+    """
+
+    def __init__(self, model: Any, loss: Optional[Any] = None, **kwargs):
+        """
+            Initialize a `ModelWithLoss` wrapper object.
+
+            :param model: The original model object (of the underlying ML framework)
+            :param loss: The loss function/object of the model (of the underlying ML framework)
+        """
+        super().__init__(model, **kwargs)
+        self._loss = loss
+
+
+    # Probably not needed for now, as we will not be using these wrappers directly in ART.
+    # @abstractmethod
+    # def loss(self, x: np.ndarray, y: np.ndarray, **kwargs) -> np.ndarray:
+    #     """
+    #     Compute the loss of the model for samples `x`.
+    #
+    #     :param x: Input samples.
+    #     :type x: `np.ndarray` or `pandas.DataFrame`
+    #     :param y: True labels.
+    #     :type y: `np.ndarray` or `pandas.DataFrame`
+    #     :return: Loss values.
+    #     """
+    #     raise NotImplementedError
+
+
+# Probably not needed for now, as we will not be using these wrappers directly in ART.
+# class ModelWithGradients(Model):
+#     """
+#     Wrapper class for ML models that support computing gradients.
+#     """
+#     @abstractmethod
+#     def class_gradient(self, x: np.ndarray, label: Union[int, List[int], None] = None, **kwargs) -> np.ndarray:
+#         """
+#         Compute per-class derivatives w.r.t. input `x`.
+#
+#         :param x: Input samples.
+#         :type x: `np.ndarray` or `pandas.DataFrame`
+#         :param label: Index of a specific class. If provided, the gradient of the specified class
+#                      is computed for all samples. Otherwise, gradients for all classes are computed for all samples.
+#         :param label: int
+#         :return: Gradients of input features w.r.t. each class in the form `(batch_size, nb_classes, input_shape)` when
+#                  computing for all classes, or `(batch_size, 1, input_shape)` when `label` is specified.
+#         """
+#         raise NotImplementedError
--- a/apt/utils/models/sklearn_model.py
+++ b/apt/utils/models/sklearn_model.py
@ -0,0 +1,142 @@
+import numpy as np
+from sklearn.preprocessing import OneHotEncoder
+
+from apt.utils.models import Model, ModelWithLoss, SingleOutputModel
+
+from art.estimators.classification.scikitlearn import SklearnClassifier as ArtSklearnClassifier
+from art.estimators.regression.scikitlearn import ScikitlearnRegressor
+
+
+class SklearnModel(Model):
+    """
+    Wrapper class for scikitlearn models.
+    """
+    def score(self, x: np.ndarray, y: np.ndarray, **kwargs):
+        """
+        Score the model using test data `(x, y)`.
+
+        :param x: Test data.
+        :type x: `np.ndarray` or `pandas.DataFrame`
+        :param y: True labels.
+        :type y: `np.ndarray` or `pandas.DataFrame`
+        """
+        return self.model.score(x, y, **kwargs)
+
+
+class SklearnClassifier(SklearnModel):
+    """
+    Wrapper class for scikitlearn classification models.
+    """
+    def __init__(self, model, **kwargs):
+        """
+        Initialize a `SklearnClassifier` wrapper object.
+
+        :param model: The original sklearn model object
+        """
+        super().__init__(model, **kwargs)
+        self._art_model = ArtSklearnClassifier(model)
+
+    def fit(self, x: np.ndarray, y: np.ndarray, **kwargs) -> None:
+        """
+        Fit the model using the training data `(x, y)`.
+
+        :param x: Training data.
+        :type x: `np.ndarray` or `pandas.DataFrame`
+        :param y: True labels.
+        :type y: `np.ndarray` or `pandas.DataFrame`
+        """
+        encoder = OneHotEncoder(sparse=False)
+        y_encoded = encoder.fit_transform(y.reshape(-1, 1))
+        self._art_model.fit(x, y_encoded, **kwargs)
+
+    def predict(self, x: np.ndarray, **kwargs) -> np.ndarray:
+        """
+        Perform predictions using the model for input `x`.
+
+        :param x: Input samples.
+        :type x: `np.ndarray` or `pandas.DataFrame`
+        :return: Predictions from the model.
+        """
+        return self._art_model.predict(x, **kwargs)
+
+
+class SklearnRegressor(SklearnModel, SingleOutputModel, ModelWithLoss):
+    """
+    Wrapper class for scikitlearn regression models.
+    """
+    def __init__(self, model, **kwargs):
+        """
+        Initialize a `SklearnRegressor` wrapper object.
+
+        :param model: The original sklearn model object
+        """
+        super().__init__(model, **kwargs)
+        self._art_model = ScikitlearnRegressor(model)
+
+    def fit(self, x: np.ndarray, y: np.ndarray, **kwargs) -> None:
+        """
+        Fit the model using the training data `(x, y)`.
+
+        :param x: Training data.
+        :type x: `np.ndarray` or `pandas.DataFrame`
+        :param y: True labels.
+        :type y: `np.ndarray` or `pandas.DataFrame`
+        """
+        self._art_model.fit(x, y, **kwargs)
+
+    def predict(self, x: np.ndarray, **kwargs) -> np.ndarray:
+        """
+        Perform predictions using the model for input `x`.
+
+        :param x: Input samples.
+        :type x: `np.ndarray` or `pandas.DataFrame`
+        :return: Predictions from the model.
+        """
+        return self._art_model.predict(x, **kwargs)
+
+    def loss(self, x: np.ndarray, y: np.ndarray, **kwargs) -> np.ndarray:
+        """
+        Compute the loss of the model for samples `x`.
+
+        :param x: Input samples.
+        :type x: `np.ndarray` or `pandas.DataFrame`
+        :param y: True labels.
+        :type y: `np.ndarray` or `pandas.DataFrame`
+        :return: Loss values.
+        """
+        return self._art_model.compute_loss(x, y, **kwargs)
+
+
+# Probably not needed for now, as we will not be using these wrappers directly in ART.
+# class SklearnDecisionTreeClassifier(SklearnClassifier, MultipleOutputModel):
+#     """
+#     Wrapper class for scikitlearn decision tree classifier models.
+#     """
+#     def __init__(self, model):
+#         """
+#         Initialize a `DecisionTreeClassifier` wrapper object.
+#
+#         :param model: The original sklearn decision tree model object
+#         """
+#         super().__init__(model)
+#         self._art_model = ScikitlearnDecisionTreeClassifier(model)
+#
+#     def get_decision_path(self, x: np.ndarray) -> np.ndarray:
+#         """
+#         Returns the nodes along the path taken in the tree when classifying x. Last node is the leaf, first node is the
+#         root node.
+#
+#         :param x: Input samples.
+#         :type x: `np.ndarray` or `pandas.DataFrame`
+#         :return: The indices of the nodes in the array structure of the tree.
+#         """
+#         return self._art_model.get_decision_path(x)
+#
+#     def get_samples_at_node(self, node_id: int) -> int:
+#         """
+#         Returns the number of training samples mapped to a node.
+#
+#         :param node_id: The ID of the node.
+#         :return: Number of samples mapped this node.
+#         """
+#         return self._art_model.get_samples_at_node(node_id)