Create initial version of wrappers for models (#1)

* New wrapper classes for models
This commit is contained in:
ABIGAIL GOLDSTEEN 2022-02-10 15:36:41 +02:00 committed by GitHub Enterprise
parent 9de078f937
commit b0c6c4d28e
8 changed files with 325 additions and 4 deletions

0
apt/utils/__init__.py Normal file
View file

View file

@ -13,8 +13,7 @@ def _load_iris(test_set_size: float = 0.3):
# Split training and test sets
x_train, x_test, y_train, y_test = model_selection.train_test_split(data, labels, test_size=test_set_size,
random_state=18, stratify=labels,
shuffle=True)
random_state=18, stratify=labels)
return (x_train, y_train), (x_test, y_test)
@ -29,6 +28,28 @@ def get_iris_dataset():
return _load_iris()
def _load_diabetes(test_set_size: float = 0.3):
diabetes = datasets.load_diabetes()
data = diabetes.data
labels = diabetes.target
# Split training and test sets
x_train, x_test, y_train, y_test = model_selection.train_test_split(data, labels, test_size=test_set_size,
random_state=18)
return (x_train, y_train), (x_test, y_test)
def get_diabetes_dataset():
"""
Loads the Iris dataset from scikit-learn.
:param test_set: Proportion of the data to use as validation split (value between 0 and 1).
:return: Entire dataset and labels as numpy array.
"""
return _load_diabetes()
def get_german_credit_dataset(test_set: float = 0.3):
"""
Loads the UCI German_credit dataset from `tests/datasets/german` or downloads it if necessary.

View file

@ -0,0 +1,2 @@
from apt.utils.models.model import Model, ModelWithLoss, SingleOutputModel, MultipleOutputModel
from apt.utils.models.sklearn_model import SklearnModel, SklearnClassifier, SklearnRegressor

112
apt/utils/models/model.py Normal file
View file

@ -0,0 +1,112 @@
from abc import ABC, abstractmethod
from typing import Union, List, Any, Optional
import numpy as np
class Model(ABC):
"""
Base class for ML model wrappers.
"""
def __init__(self, model: Any, **kwargs):
"""
Initialize a `Model` wrapper object.
:param model: The original model object (of the underlying ML framework)
"""
self._model = model
@abstractmethod
def fit(self, x: np.ndarray, y: np.ndarray, **kwargs) -> None:
"""
Fit the model using the training data `(x, y)`.
:param x: Training data.
:type x: `np.ndarray` or `pandas.DataFrame`
:param y: True labels.
:type y: `np.ndarray` or `pandas.DataFrame`
"""
raise NotImplementedError
@abstractmethod
def predict(self, x: np.ndarray, **kwargs) -> np.ndarray:
"""
Perform predictions using the model for input `x`.
:param x: Input samples.
:type x: `np.ndarray` or `pandas.DataFrame`
:return: Predictions from the model.
"""
raise NotImplementedError
@property
def model(self):
"""
Return the model.
:return: The model.
"""
return self._model
class SingleOutputModel(Model):
"""
Wrapper class for ML models whose output is a single value (e.g., classification with label only output, regression).
"""
class MultipleOutputModel(Model):
"""
Wrapper class for ML models whose output is a vector (e.g., class probabilities or logits).
"""
class ModelWithLoss(Model):
"""
Wrapper class for ML models that support computing loss values for predictions.
"""
def __init__(self, model: Any, loss: Optional[Any] = None, **kwargs):
"""
Initialize a `ModelWithLoss` wrapper object.
:param model: The original model object (of the underlying ML framework)
:param loss: The loss function/object of the model (of the underlying ML framework)
"""
super().__init__(model, **kwargs)
self._loss = loss
# Probably not needed for now, as we will not be using these wrappers directly in ART.
# @abstractmethod
# def loss(self, x: np.ndarray, y: np.ndarray, **kwargs) -> np.ndarray:
# """
# Compute the loss of the model for samples `x`.
#
# :param x: Input samples.
# :type x: `np.ndarray` or `pandas.DataFrame`
# :param y: True labels.
# :type y: `np.ndarray` or `pandas.DataFrame`
# :return: Loss values.
# """
# raise NotImplementedError
# Probably not needed for now, as we will not be using these wrappers directly in ART.
# class ModelWithGradients(Model):
# """
# Wrapper class for ML models that support computing gradients.
# """
# @abstractmethod
# def class_gradient(self, x: np.ndarray, label: Union[int, List[int], None] = None, **kwargs) -> np.ndarray:
# """
# Compute per-class derivatives w.r.t. input `x`.
#
# :param x: Input samples.
# :type x: `np.ndarray` or `pandas.DataFrame`
# :param label: Index of a specific class. If provided, the gradient of the specified class
# is computed for all samples. Otherwise, gradients for all classes are computed for all samples.
# :param label: int
# :return: Gradients of input features w.r.t. each class in the form `(batch_size, nb_classes, input_shape)` when
# computing for all classes, or `(batch_size, 1, input_shape)` when `label` is specified.
# """
# raise NotImplementedError

View file

@ -0,0 +1,142 @@
import numpy as np
from sklearn.preprocessing import OneHotEncoder
from apt.utils.models import Model, ModelWithLoss, SingleOutputModel
from art.estimators.classification.scikitlearn import SklearnClassifier as ArtSklearnClassifier
from art.estimators.regression.scikitlearn import ScikitlearnRegressor
class SklearnModel(Model):
"""
Wrapper class for scikitlearn models.
"""
def score(self, x: np.ndarray, y: np.ndarray, **kwargs):
"""
Score the model using test data `(x, y)`.
:param x: Test data.
:type x: `np.ndarray` or `pandas.DataFrame`
:param y: True labels.
:type y: `np.ndarray` or `pandas.DataFrame`
"""
return self.model.score(x, y, **kwargs)
class SklearnClassifier(SklearnModel):
"""
Wrapper class for scikitlearn classification models.
"""
def __init__(self, model, **kwargs):
"""
Initialize a `SklearnClassifier` wrapper object.
:param model: The original sklearn model object
"""
super().__init__(model, **kwargs)
self._art_model = ArtSklearnClassifier(model)
def fit(self, x: np.ndarray, y: np.ndarray, **kwargs) -> None:
"""
Fit the model using the training data `(x, y)`.
:param x: Training data.
:type x: `np.ndarray` or `pandas.DataFrame`
:param y: True labels.
:type y: `np.ndarray` or `pandas.DataFrame`
"""
encoder = OneHotEncoder(sparse=False)
y_encoded = encoder.fit_transform(y.reshape(-1, 1))
self._art_model.fit(x, y_encoded, **kwargs)
def predict(self, x: np.ndarray, **kwargs) -> np.ndarray:
"""
Perform predictions using the model for input `x`.
:param x: Input samples.
:type x: `np.ndarray` or `pandas.DataFrame`
:return: Predictions from the model.
"""
return self._art_model.predict(x, **kwargs)
class SklearnRegressor(SklearnModel, SingleOutputModel, ModelWithLoss):
"""
Wrapper class for scikitlearn regression models.
"""
def __init__(self, model, **kwargs):
"""
Initialize a `SklearnRegressor` wrapper object.
:param model: The original sklearn model object
"""
super().__init__(model, **kwargs)
self._art_model = ScikitlearnRegressor(model)
def fit(self, x: np.ndarray, y: np.ndarray, **kwargs) -> None:
"""
Fit the model using the training data `(x, y)`.
:param x: Training data.
:type x: `np.ndarray` or `pandas.DataFrame`
:param y: True labels.
:type y: `np.ndarray` or `pandas.DataFrame`
"""
self._art_model.fit(x, y, **kwargs)
def predict(self, x: np.ndarray, **kwargs) -> np.ndarray:
"""
Perform predictions using the model for input `x`.
:param x: Input samples.
:type x: `np.ndarray` or `pandas.DataFrame`
:return: Predictions from the model.
"""
return self._art_model.predict(x, **kwargs)
def loss(self, x: np.ndarray, y: np.ndarray, **kwargs) -> np.ndarray:
"""
Compute the loss of the model for samples `x`.
:param x: Input samples.
:type x: `np.ndarray` or `pandas.DataFrame`
:param y: True labels.
:type y: `np.ndarray` or `pandas.DataFrame`
:return: Loss values.
"""
return self._art_model.compute_loss(x, y, **kwargs)
# Probably not needed for now, as we will not be using these wrappers directly in ART.
# class SklearnDecisionTreeClassifier(SklearnClassifier, MultipleOutputModel):
# """
# Wrapper class for scikitlearn decision tree classifier models.
# """
# def __init__(self, model):
# """
# Initialize a `DecisionTreeClassifier` wrapper object.
#
# :param model: The original sklearn decision tree model object
# """
# super().__init__(model)
# self._art_model = ScikitlearnDecisionTreeClassifier(model)
#
# def get_decision_path(self, x: np.ndarray) -> np.ndarray:
# """
# Returns the nodes along the path taken in the tree when classifying x. Last node is the leaf, first node is the
# root node.
#
# :param x: Input samples.
# :type x: `np.ndarray` or `pandas.DataFrame`
# :return: The indices of the nodes in the array structure of the tree.
# """
# return self._art_model.get_decision_path(x)
#
# def get_samples_at_node(self, node_id: int) -> int:
# """
# Returns the number of training samples mapped to a node.
#
# :param node_id: The ID of the node.
# :return: Number of samples mapped this node.
# """
# return self._art_model.get_samples_at_node(node_id)