mirror of
https://github.com/IBM/ai-privacy-toolkit.git
synced 2026-05-07 11:02:37 +02:00
Create initial version of wrappers for models (#1)
* New wrapper classes for models
This commit is contained in:
parent
9de078f937
commit
b0c6c4d28e
8 changed files with 325 additions and 4 deletions
0
apt/utils/__init__.py
Normal file
0
apt/utils/__init__.py
Normal file
|
|
@ -13,8 +13,7 @@ def _load_iris(test_set_size: float = 0.3):
|
|||
|
||||
# Split training and test sets
|
||||
x_train, x_test, y_train, y_test = model_selection.train_test_split(data, labels, test_size=test_set_size,
|
||||
random_state=18, stratify=labels,
|
||||
shuffle=True)
|
||||
random_state=18, stratify=labels)
|
||||
|
||||
return (x_train, y_train), (x_test, y_test)
|
||||
|
||||
|
|
@ -29,6 +28,28 @@ def get_iris_dataset():
|
|||
return _load_iris()
|
||||
|
||||
|
||||
def _load_diabetes(test_set_size: float = 0.3):
|
||||
diabetes = datasets.load_diabetes()
|
||||
data = diabetes.data
|
||||
labels = diabetes.target
|
||||
|
||||
# Split training and test sets
|
||||
x_train, x_test, y_train, y_test = model_selection.train_test_split(data, labels, test_size=test_set_size,
|
||||
random_state=18)
|
||||
|
||||
return (x_train, y_train), (x_test, y_test)
|
||||
|
||||
|
||||
def get_diabetes_dataset():
|
||||
"""
|
||||
Loads the Iris dataset from scikit-learn.
|
||||
|
||||
:param test_set: Proportion of the data to use as validation split (value between 0 and 1).
|
||||
:return: Entire dataset and labels as numpy array.
|
||||
"""
|
||||
return _load_diabetes()
|
||||
|
||||
|
||||
def get_german_credit_dataset(test_set: float = 0.3):
|
||||
"""
|
||||
Loads the UCI German_credit dataset from `tests/datasets/german` or downloads it if necessary.
|
||||
2
apt/utils/models/__init__.py
Normal file
2
apt/utils/models/__init__.py
Normal file
|
|
@ -0,0 +1,2 @@
|
|||
from apt.utils.models.model import Model, ModelWithLoss, SingleOutputModel, MultipleOutputModel
|
||||
from apt.utils.models.sklearn_model import SklearnModel, SklearnClassifier, SklearnRegressor
|
||||
112
apt/utils/models/model.py
Normal file
112
apt/utils/models/model.py
Normal file
|
|
@ -0,0 +1,112 @@
|
|||
from abc import ABC, abstractmethod
|
||||
from typing import Union, List, Any, Optional
|
||||
import numpy as np
|
||||
|
||||
class Model(ABC):
|
||||
"""
|
||||
Base class for ML model wrappers.
|
||||
"""
|
||||
|
||||
def __init__(self, model: Any, **kwargs):
|
||||
"""
|
||||
Initialize a `Model` wrapper object.
|
||||
|
||||
:param model: The original model object (of the underlying ML framework)
|
||||
"""
|
||||
self._model = model
|
||||
|
||||
@abstractmethod
|
||||
def fit(self, x: np.ndarray, y: np.ndarray, **kwargs) -> None:
|
||||
"""
|
||||
Fit the model using the training data `(x, y)`.
|
||||
|
||||
:param x: Training data.
|
||||
:type x: `np.ndarray` or `pandas.DataFrame`
|
||||
:param y: True labels.
|
||||
:type y: `np.ndarray` or `pandas.DataFrame`
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
@abstractmethod
|
||||
def predict(self, x: np.ndarray, **kwargs) -> np.ndarray:
|
||||
"""
|
||||
Perform predictions using the model for input `x`.
|
||||
|
||||
:param x: Input samples.
|
||||
:type x: `np.ndarray` or `pandas.DataFrame`
|
||||
:return: Predictions from the model.
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
@property
|
||||
def model(self):
|
||||
"""
|
||||
Return the model.
|
||||
|
||||
:return: The model.
|
||||
"""
|
||||
return self._model
|
||||
|
||||
|
||||
class SingleOutputModel(Model):
|
||||
"""
|
||||
Wrapper class for ML models whose output is a single value (e.g., classification with label only output, regression).
|
||||
"""
|
||||
|
||||
|
||||
class MultipleOutputModel(Model):
|
||||
"""
|
||||
Wrapper class for ML models whose output is a vector (e.g., class probabilities or logits).
|
||||
"""
|
||||
|
||||
|
||||
class ModelWithLoss(Model):
|
||||
"""
|
||||
Wrapper class for ML models that support computing loss values for predictions.
|
||||
"""
|
||||
|
||||
def __init__(self, model: Any, loss: Optional[Any] = None, **kwargs):
|
||||
"""
|
||||
Initialize a `ModelWithLoss` wrapper object.
|
||||
|
||||
:param model: The original model object (of the underlying ML framework)
|
||||
:param loss: The loss function/object of the model (of the underlying ML framework)
|
||||
"""
|
||||
super().__init__(model, **kwargs)
|
||||
self._loss = loss
|
||||
|
||||
|
||||
# Probably not needed for now, as we will not be using these wrappers directly in ART.
|
||||
# @abstractmethod
|
||||
# def loss(self, x: np.ndarray, y: np.ndarray, **kwargs) -> np.ndarray:
|
||||
# """
|
||||
# Compute the loss of the model for samples `x`.
|
||||
#
|
||||
# :param x: Input samples.
|
||||
# :type x: `np.ndarray` or `pandas.DataFrame`
|
||||
# :param y: True labels.
|
||||
# :type y: `np.ndarray` or `pandas.DataFrame`
|
||||
# :return: Loss values.
|
||||
# """
|
||||
# raise NotImplementedError
|
||||
|
||||
|
||||
# Probably not needed for now, as we will not be using these wrappers directly in ART.
|
||||
# class ModelWithGradients(Model):
|
||||
# """
|
||||
# Wrapper class for ML models that support computing gradients.
|
||||
# """
|
||||
# @abstractmethod
|
||||
# def class_gradient(self, x: np.ndarray, label: Union[int, List[int], None] = None, **kwargs) -> np.ndarray:
|
||||
# """
|
||||
# Compute per-class derivatives w.r.t. input `x`.
|
||||
#
|
||||
# :param x: Input samples.
|
||||
# :type x: `np.ndarray` or `pandas.DataFrame`
|
||||
# :param label: Index of a specific class. If provided, the gradient of the specified class
|
||||
# is computed for all samples. Otherwise, gradients for all classes are computed for all samples.
|
||||
# :param label: int
|
||||
# :return: Gradients of input features w.r.t. each class in the form `(batch_size, nb_classes, input_shape)` when
|
||||
# computing for all classes, or `(batch_size, 1, input_shape)` when `label` is specified.
|
||||
# """
|
||||
# raise NotImplementedError
|
||||
142
apt/utils/models/sklearn_model.py
Normal file
142
apt/utils/models/sklearn_model.py
Normal file
|
|
@ -0,0 +1,142 @@
|
|||
import numpy as np
|
||||
from sklearn.preprocessing import OneHotEncoder
|
||||
|
||||
from apt.utils.models import Model, ModelWithLoss, SingleOutputModel
|
||||
|
||||
from art.estimators.classification.scikitlearn import SklearnClassifier as ArtSklearnClassifier
|
||||
from art.estimators.regression.scikitlearn import ScikitlearnRegressor
|
||||
|
||||
|
||||
class SklearnModel(Model):
|
||||
"""
|
||||
Wrapper class for scikitlearn models.
|
||||
"""
|
||||
def score(self, x: np.ndarray, y: np.ndarray, **kwargs):
|
||||
"""
|
||||
Score the model using test data `(x, y)`.
|
||||
|
||||
:param x: Test data.
|
||||
:type x: `np.ndarray` or `pandas.DataFrame`
|
||||
:param y: True labels.
|
||||
:type y: `np.ndarray` or `pandas.DataFrame`
|
||||
"""
|
||||
return self.model.score(x, y, **kwargs)
|
||||
|
||||
|
||||
class SklearnClassifier(SklearnModel):
|
||||
"""
|
||||
Wrapper class for scikitlearn classification models.
|
||||
"""
|
||||
def __init__(self, model, **kwargs):
|
||||
"""
|
||||
Initialize a `SklearnClassifier` wrapper object.
|
||||
|
||||
:param model: The original sklearn model object
|
||||
"""
|
||||
super().__init__(model, **kwargs)
|
||||
self._art_model = ArtSklearnClassifier(model)
|
||||
|
||||
def fit(self, x: np.ndarray, y: np.ndarray, **kwargs) -> None:
|
||||
"""
|
||||
Fit the model using the training data `(x, y)`.
|
||||
|
||||
:param x: Training data.
|
||||
:type x: `np.ndarray` or `pandas.DataFrame`
|
||||
:param y: True labels.
|
||||
:type y: `np.ndarray` or `pandas.DataFrame`
|
||||
"""
|
||||
encoder = OneHotEncoder(sparse=False)
|
||||
y_encoded = encoder.fit_transform(y.reshape(-1, 1))
|
||||
self._art_model.fit(x, y_encoded, **kwargs)
|
||||
|
||||
def predict(self, x: np.ndarray, **kwargs) -> np.ndarray:
|
||||
"""
|
||||
Perform predictions using the model for input `x`.
|
||||
|
||||
:param x: Input samples.
|
||||
:type x: `np.ndarray` or `pandas.DataFrame`
|
||||
:return: Predictions from the model.
|
||||
"""
|
||||
return self._art_model.predict(x, **kwargs)
|
||||
|
||||
|
||||
class SklearnRegressor(SklearnModel, SingleOutputModel, ModelWithLoss):
|
||||
"""
|
||||
Wrapper class for scikitlearn regression models.
|
||||
"""
|
||||
def __init__(self, model, **kwargs):
|
||||
"""
|
||||
Initialize a `SklearnRegressor` wrapper object.
|
||||
|
||||
:param model: The original sklearn model object
|
||||
"""
|
||||
super().__init__(model, **kwargs)
|
||||
self._art_model = ScikitlearnRegressor(model)
|
||||
|
||||
def fit(self, x: np.ndarray, y: np.ndarray, **kwargs) -> None:
|
||||
"""
|
||||
Fit the model using the training data `(x, y)`.
|
||||
|
||||
:param x: Training data.
|
||||
:type x: `np.ndarray` or `pandas.DataFrame`
|
||||
:param y: True labels.
|
||||
:type y: `np.ndarray` or `pandas.DataFrame`
|
||||
"""
|
||||
self._art_model.fit(x, y, **kwargs)
|
||||
|
||||
def predict(self, x: np.ndarray, **kwargs) -> np.ndarray:
|
||||
"""
|
||||
Perform predictions using the model for input `x`.
|
||||
|
||||
:param x: Input samples.
|
||||
:type x: `np.ndarray` or `pandas.DataFrame`
|
||||
:return: Predictions from the model.
|
||||
"""
|
||||
return self._art_model.predict(x, **kwargs)
|
||||
|
||||
def loss(self, x: np.ndarray, y: np.ndarray, **kwargs) -> np.ndarray:
|
||||
"""
|
||||
Compute the loss of the model for samples `x`.
|
||||
|
||||
:param x: Input samples.
|
||||
:type x: `np.ndarray` or `pandas.DataFrame`
|
||||
:param y: True labels.
|
||||
:type y: `np.ndarray` or `pandas.DataFrame`
|
||||
:return: Loss values.
|
||||
"""
|
||||
return self._art_model.compute_loss(x, y, **kwargs)
|
||||
|
||||
|
||||
# Probably not needed for now, as we will not be using these wrappers directly in ART.
|
||||
# class SklearnDecisionTreeClassifier(SklearnClassifier, MultipleOutputModel):
|
||||
# """
|
||||
# Wrapper class for scikitlearn decision tree classifier models.
|
||||
# """
|
||||
# def __init__(self, model):
|
||||
# """
|
||||
# Initialize a `DecisionTreeClassifier` wrapper object.
|
||||
#
|
||||
# :param model: The original sklearn decision tree model object
|
||||
# """
|
||||
# super().__init__(model)
|
||||
# self._art_model = ScikitlearnDecisionTreeClassifier(model)
|
||||
#
|
||||
# def get_decision_path(self, x: np.ndarray) -> np.ndarray:
|
||||
# """
|
||||
# Returns the nodes along the path taken in the tree when classifying x. Last node is the leaf, first node is the
|
||||
# root node.
|
||||
#
|
||||
# :param x: Input samples.
|
||||
# :type x: `np.ndarray` or `pandas.DataFrame`
|
||||
# :return: The indices of the nodes in the array structure of the tree.
|
||||
# """
|
||||
# return self._art_model.get_decision_path(x)
|
||||
#
|
||||
# def get_samples_at_node(self, node_id: int) -> int:
|
||||
# """
|
||||
# Returns the number of training samples mapped to a node.
|
||||
#
|
||||
# :param node_id: The ID of the node.
|
||||
# :return: Number of samples mapped this node.
|
||||
# """
|
||||
# return self._art_model.get_samples_at_node(node_id)
|
||||
Loading…
Add table
Add a link
Reference in a new issue