Remove redundant code.

Use data wrappers in model wrapper APIs. More typing.
2026-05-07 11:02:37 +02:00 · 2022-03-06 21:15:07 +02:00 · 2022-03-06 21:15:07 +02:00 · 3d82db80c4
commit 3d82db80c4
parent 9f4d649934
5 changed files with 57 additions and 166 deletions
--- a/apt/utils/datasets/init.py
+++ b/apt/utils/datasets/init.py
@ -3,4 +3,4 @@ The AI Privacy Toolbox (datasets).
 Implementation of datasets utility components for datasets creation, load, and store
 """

-from apt.utils.datasets.datasets import DatasetABC, StoredDatasetABC, DatasetFactory, Data, BaseDataset
+from apt.utils.datasets.datasets import DatasetABC, StoredDatasetABC, DatasetFactory, Data, BaseDataset, DATA_ARRAY_TYPE
--- a/apt/utils/datasets/datasets.py
+++ b/apt/utils/datasets/datasets.py
@ -5,17 +5,21 @@ Implementation of utility classes for dataset handling
 """

 from abc import ABCMeta, abstractmethod
-from typing import Callable, Collection, Any
+from typing import Callable, Collection, Any, Union

 import tarfile
 import os
 import urllib.request
 import numpy as np
+import pandas as pd
 import logging

 logger = logging.getLogger(__name__)


+DATA_ARRAY_TYPE = Union[np.ndarray, pd.DataFrame]
+
+
 class DatasetABC(metaclass=ABCMeta):
    """Base Abstract Class for Dataset"""

@ -122,7 +126,7 @@ class StoredDatasetABC(DatasetABC):
 class BaseDataset(DatasetABC):
    """Base Class for Dataset"""

-    def __init__(self, x, y, **kwargs):
+    def __init__(self, x: DATA_ARRAY_TYPE, y: DATA_ARRAY_TYPE, **kwargs):
        """
        BaseDataset constructor.
        :param x: collection of data samples
@ -135,11 +139,11 @@ class BaseDataset(DatasetABC):
        if len(self.x) != len(self.y):
            raise ValueError('Non equivalent lengths of x and y')

-    def get_samples(self) -> Collection[Any]:
+    def get_samples(self) -> DATA_ARRAY_TYPE:
        """Return data samples"""
        return self.x

-    def get_labels(self) -> Collection[Any]:
+    def get_labels(self) -> DATA_ARRAY_TYPE:
        """Return labels"""
        return self.y

@ -192,7 +196,7 @@ class Data:
        The class stores train and test datasets.
        If neither of the datasets was provided,
        Both train and test datasets will be create using
-        Factory command to create dataset instance
+        DatasetFactory to create a dataset instance
        """
        if train or test:
            self.train = train
@ -209,18 +213,18 @@ class Data:
        """Return test DatasetBase"""
        return self.test

-    def get_train_samples(self):
+    def get_train_samples(self) -> Collection[Any]:
        """Return train set samples"""
        return self.train.get_samples()

-    def get_train_labels(self):
+    def get_train_labels(self) -> Collection[Any]:
        """Return train set labels"""
        return self.train.get_labels()

-    def get_test_samples(self):
+    def get_test_samples(self) -> Collection[Any]:
        """Return test set samples"""
        return self.test.get_samples()

-    def get_test_labels(self):
+    def get_test_labels(self) -> Collection[Any]:
        """Return test set labels"""
-        return self.test.get_labels()
+        return self.test.get_labels()
--- a/apt/utils/models/model.py
+++ b/apt/utils/models/model.py
@ -1,34 +1,34 @@
-from abc import ABC, abstractmethod
-from typing import Union, List, Any, Optional
-import numpy as np
+from abc import ABCMeta, abstractmethod
+from typing import Any

-class Model(ABC):
+from apt.utils.datasets import BaseDataset, DATA_ARRAY_TYPE
+
+
+class Model(metaclass=ABCMeta):
    """
-    Base class for ML model wrappers.
+    Abstract base class for ML model wrappers.
    """

    def __init__(self, model: Any, **kwargs):
        """
-            Initialize a `Model` wrapper object.
+        Initialize a `Model` wrapper object.

-            :param model: The original model object (of the underlying ML framework)
+        :param model: The original model object (of the underlying ML framework)
        """
        self._model = model

    @abstractmethod
-    def fit(self, x: np.ndarray, y: np.ndarray, **kwargs) -> None:
+    def fit(self, train_data: BaseDataset, **kwargs) -> None:
        """
-        Fit the model using the training data `(x, y)`.
+        Fit the model using the training data.

-        :param x: Training data.
-        :type x: `np.ndarray` or `pandas.DataFrame`
-        :param y: True labels.
-        :type y: `np.ndarray` or `pandas.DataFrame`
+        :param train_data: Training data.
+        :type train_data: `BaseDataset`
        """
        raise NotImplementedError

    @abstractmethod
-    def predict(self, x: np.ndarray, **kwargs) -> np.ndarray:
+    def predict(self, x: DATA_ARRAY_TYPE, **kwargs) -> DATA_ARRAY_TYPE:
        """
        Perform predictions using the model for input `x`.

@ -46,67 +46,3 @@ class Model(ABC):
        :return: The model.
        """
        return self._model
-
-
-class SingleOutputModel(Model):
-    """
-    Wrapper class for ML models whose output is a single value (e.g., classification with label only output, regression).
-    """
-
-
-class MultipleOutputModel(Model):
-    """
-    Wrapper class for ML models whose output is a vector (e.g., class probabilities or logits).
-    """
-
-
-class ModelWithLoss(Model):
-    """
-    Wrapper class for ML models that support computing loss values for predictions.
-    """
-
-    def __init__(self, model: Any, loss: Optional[Any] = None, **kwargs):
-        """
-            Initialize a `ModelWithLoss` wrapper object.
-
-            :param model: The original model object (of the underlying ML framework)
-            :param loss: The loss function/object of the model (of the underlying ML framework)
-        """
-        super().__init__(model, **kwargs)
-        self._loss = loss
-
-
-    # Probably not needed for now, as we will not be using these wrappers directly in ART.
-    # @abstractmethod
-    # def loss(self, x: np.ndarray, y: np.ndarray, **kwargs) -> np.ndarray:
-    #     """
-    #     Compute the loss of the model for samples `x`.
-    #
-    #     :param x: Input samples.
-    #     :type x: `np.ndarray` or `pandas.DataFrame`
-    #     :param y: True labels.
-    #     :type y: `np.ndarray` or `pandas.DataFrame`
-    #     :return: Loss values.
-    #     """
-    #     raise NotImplementedError
-
-
-# Probably not needed for now, as we will not be using these wrappers directly in ART.
-# class ModelWithGradients(Model):
-#     """
-#     Wrapper class for ML models that support computing gradients.
-#     """
-#     @abstractmethod
-#     def class_gradient(self, x: np.ndarray, label: Union[int, List[int], None] = None, **kwargs) -> np.ndarray:
-#         """
-#         Compute per-class derivatives w.r.t. input `x`.
-#
-#         :param x: Input samples.
-#         :type x: `np.ndarray` or `pandas.DataFrame`
-#         :param label: Index of a specific class. If provided, the gradient of the specified class
-#                      is computed for all samples. Otherwise, gradients for all classes are computed for all samples.
-#         :param label: int
-#         :return: Gradients of input features w.r.t. each class in the form `(batch_size, nb_classes, input_shape)` when
-#                  computing for all classes, or `(batch_size, 1, input_shape)` when `label` is specified.
-#         """
-#         raise NotImplementedError
--- a/apt/utils/models/sklearn_model.py
+++ b/apt/utils/models/sklearn_model.py
@ -1,7 +1,10 @@
 import numpy as np
-from sklearn.preprocessing import OneHotEncoder

-from apt.utils.models import Model, ModelWithLoss, SingleOutputModel
+from sklearn.preprocessing import OneHotEncoder
+from sklearn.base import BaseEstimator
+
+from apt.utils.models import Model
+from apt.utils.datasets import BaseDataset, DATA_ARRAY_TYPE

 from art.estimators.classification.scikitlearn import SklearnClassifier as ArtSklearnClassifier
 from art.estimators.regression.scikitlearn import ScikitlearnRegressor
@ -11,23 +14,21 @@ class SklearnModel(Model):
    """
    Wrapper class for scikitlearn models.
    """
-    def score(self, x: np.ndarray, y: np.ndarray, **kwargs):
+    def score(self, test_data: BaseDataset, **kwargs):
        """
-        Score the model using test data `(x, y)`.
+        Score the model using test data.

-        :param x: Test data.
-        :type x: `np.ndarray` or `pandas.DataFrame`
-        :param y: True labels.
-        :type y: `np.ndarray` or `pandas.DataFrame`
+        :param test_data: Test data.
+        :type train_data: `BaseDataset`
        """
-        return self.model.score(x, y, **kwargs)
+        return self.model.score(test_data.get_samples(), test_data.get_labels(), **kwargs)


 class SklearnClassifier(SklearnModel):
    """
    Wrapper class for scikitlearn classification models.
    """
-    def __init__(self, model, **kwargs):
+    def __init__(self, model: BaseEstimator, **kwargs):
        """
        Initialize a `SklearnClassifier` wrapper object.

@ -36,35 +37,33 @@ class SklearnClassifier(SklearnModel):
        super().__init__(model, **kwargs)
        self._art_model = ArtSklearnClassifier(model)

-    def fit(self, x: np.ndarray, y: np.ndarray, **kwargs) -> None:
+    def fit(self, train_data: BaseDataset, **kwargs) -> None:
        """
-        Fit the model using the training data `(x, y)`.
+        Fit the model using the training data.

-        :param x: Training data.
-        :type x: `np.ndarray` or `pandas.DataFrame`
-        :param y: True labels.
-        :type y: `np.ndarray` or `pandas.DataFrame`
+        :param train_data: Training data.
+        :type train_data: `BaseDataset`
        """
        encoder = OneHotEncoder(sparse=False)
-        y_encoded = encoder.fit_transform(y.reshape(-1, 1))
-        self._art_model.fit(x, y_encoded, **kwargs)
+        y_encoded = encoder.fit_transform(train_data.get_labels().reshape(-1, 1))
+        self._art_model.fit(train_data.get_samples(), y_encoded, **kwargs)

-    def predict(self, x: np.ndarray, **kwargs) -> np.ndarray:
+    def predict(self, x: DATA_ARRAY_TYPE, **kwargs) -> DATA_ARRAY_TYPE:
        """
        Perform predictions using the model for input `x`.

        :param x: Input samples.
        :type x: `np.ndarray` or `pandas.DataFrame`
-        :return: Predictions from the model.
+        :return: Predictions from the model (class probabilities, if supported).
        """
        return self._art_model.predict(x, **kwargs)


-class SklearnRegressor(SklearnModel, SingleOutputModel, ModelWithLoss):
+class SklearnRegressor(SklearnModel):
    """
    Wrapper class for scikitlearn regression models.
    """
-    def __init__(self, model, **kwargs):
+    def __init__(self, model: BaseEstimator, **kwargs):
        """
        Initialize a `SklearnRegressor` wrapper object.

@ -73,18 +72,16 @@ class SklearnRegressor(SklearnModel, SingleOutputModel, ModelWithLoss):
        super().__init__(model, **kwargs)
        self._art_model = ScikitlearnRegressor(model)

-    def fit(self, x: np.ndarray, y: np.ndarray, **kwargs) -> None:
+    def fit(self, train_data: BaseDataset, **kwargs) -> None:
        """
-        Fit the model using the training data `(x, y)`.
+        Fit the model using the training data.

-        :param x: Training data.
-        :type x: `np.ndarray` or `pandas.DataFrame`
-        :param y: True labels.
-        :type y: `np.ndarray` or `pandas.DataFrame`
+        :param train_data: Training data.
+        :type train_data: `BaseDataset`
        """
-        self._art_model.fit(x, y, **kwargs)
+        self._art_model.fit(train_data.get_samples(), train_data.get_labels(), **kwargs)

-    def predict(self, x: np.ndarray, **kwargs) -> np.ndarray:
+    def predict(self, x: DATA_ARRAY_TYPE, **kwargs) -> DATA_ARRAY_TYPE:
        """
        Perform predictions using the model for input `x`.

@ -93,50 +90,3 @@ class SklearnRegressor(SklearnModel, SingleOutputModel, ModelWithLoss):
        :return: Predictions from the model.
        """
        return self._art_model.predict(x, **kwargs)
-
-    def loss(self, x: np.ndarray, y: np.ndarray, **kwargs) -> np.ndarray:
-        """
-        Compute the loss of the model for samples `x`.
-
-        :param x: Input samples.
-        :type x: `np.ndarray` or `pandas.DataFrame`
-        :param y: True labels.
-        :type y: `np.ndarray` or `pandas.DataFrame`
-        :return: Loss values.
-        """
-        return self._art_model.compute_loss(x, y, **kwargs)
-
-
-# Probably not needed for now, as we will not be using these wrappers directly in ART.
-# class SklearnDecisionTreeClassifier(SklearnClassifier, MultipleOutputModel):
-#     """
-#     Wrapper class for scikitlearn decision tree classifier models.
-#     """
-#     def __init__(self, model):
-#         """
-#         Initialize a `DecisionTreeClassifier` wrapper object.
-#
-#         :param model: The original sklearn decision tree model object
-#         """
-#         super().__init__(model)
-#         self._art_model = ScikitlearnDecisionTreeClassifier(model)
-#
-#     def get_decision_path(self, x: np.ndarray) -> np.ndarray:
-#         """
-#         Returns the nodes along the path taken in the tree when classifying x. Last node is the leaf, first node is the
-#         root node.
-#
-#         :param x: Input samples.
-#         :type x: `np.ndarray` or `pandas.DataFrame`
-#         :return: The indices of the nodes in the array structure of the tree.
-#         """
-#         return self._art_model.get_decision_path(x)
-#
-#     def get_samples_at_node(self, node_id: int) -> int:
-#         """
-#         Returns the number of training samples mapped to a node.
-#
-#         :param node_id: The ID of the node.
-#         :return: Number of samples mapped this node.
-#         """
-#         return self._art_model.get_samples_at_node(node_id)