mirror of
https://github.com/IBM/ai-privacy-toolkit.git
synced 2026-05-07 11:02:37 +02:00
Remove redundant code.
Use data wrappers in model wrapper APIs. More typing.
This commit is contained in:
parent
9f4d649934
commit
3d82db80c4
5 changed files with 57 additions and 166 deletions
|
|
@ -3,4 +3,4 @@ The AI Privacy Toolbox (datasets).
|
|||
Implementation of datasets utility components for datasets creation, load, and store
|
||||
"""
|
||||
|
||||
from apt.utils.datasets.datasets import DatasetABC, StoredDatasetABC, DatasetFactory, Data, BaseDataset
|
||||
from apt.utils.datasets.datasets import DatasetABC, StoredDatasetABC, DatasetFactory, Data, BaseDataset, DATA_ARRAY_TYPE
|
||||
|
|
|
|||
|
|
@ -5,17 +5,21 @@ Implementation of utility classes for dataset handling
|
|||
"""
|
||||
|
||||
from abc import ABCMeta, abstractmethod
|
||||
from typing import Callable, Collection, Any
|
||||
from typing import Callable, Collection, Any, Union
|
||||
|
||||
import tarfile
|
||||
import os
|
||||
import urllib.request
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import logging
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
DATA_ARRAY_TYPE = Union[np.ndarray, pd.DataFrame]
|
||||
|
||||
|
||||
class DatasetABC(metaclass=ABCMeta):
|
||||
"""Base Abstract Class for Dataset"""
|
||||
|
||||
|
|
@ -122,7 +126,7 @@ class StoredDatasetABC(DatasetABC):
|
|||
class BaseDataset(DatasetABC):
|
||||
"""Base Class for Dataset"""
|
||||
|
||||
def __init__(self, x, y, **kwargs):
|
||||
def __init__(self, x: DATA_ARRAY_TYPE, y: DATA_ARRAY_TYPE, **kwargs):
|
||||
"""
|
||||
BaseDataset constructor.
|
||||
:param x: collection of data samples
|
||||
|
|
@ -135,11 +139,11 @@ class BaseDataset(DatasetABC):
|
|||
if len(self.x) != len(self.y):
|
||||
raise ValueError('Non equivalent lengths of x and y')
|
||||
|
||||
def get_samples(self) -> Collection[Any]:
|
||||
def get_samples(self) -> DATA_ARRAY_TYPE:
|
||||
"""Return data samples"""
|
||||
return self.x
|
||||
|
||||
def get_labels(self) -> Collection[Any]:
|
||||
def get_labels(self) -> DATA_ARRAY_TYPE:
|
||||
"""Return labels"""
|
||||
return self.y
|
||||
|
||||
|
|
@ -192,7 +196,7 @@ class Data:
|
|||
The class stores train and test datasets.
|
||||
If neither of the datasets was provided,
|
||||
Both train and test datasets will be create using
|
||||
Factory command to create dataset instance
|
||||
DatasetFactory to create a dataset instance
|
||||
"""
|
||||
if train or test:
|
||||
self.train = train
|
||||
|
|
@ -209,18 +213,18 @@ class Data:
|
|||
"""Return test DatasetBase"""
|
||||
return self.test
|
||||
|
||||
def get_train_samples(self):
|
||||
def get_train_samples(self) -> Collection[Any]:
|
||||
"""Return train set samples"""
|
||||
return self.train.get_samples()
|
||||
|
||||
def get_train_labels(self):
|
||||
def get_train_labels(self) -> Collection[Any]:
|
||||
"""Return train set labels"""
|
||||
return self.train.get_labels()
|
||||
|
||||
def get_test_samples(self):
|
||||
def get_test_samples(self) -> Collection[Any]:
|
||||
"""Return test set samples"""
|
||||
return self.test.get_samples()
|
||||
|
||||
def get_test_labels(self):
|
||||
def get_test_labels(self) -> Collection[Any]:
|
||||
"""Return test set labels"""
|
||||
return self.test.get_labels()
|
||||
return self.test.get_labels()
|
||||
|
|
|
|||
|
|
@ -1,34 +1,34 @@
|
|||
from abc import ABC, abstractmethod
|
||||
from typing import Union, List, Any, Optional
|
||||
import numpy as np
|
||||
from abc import ABCMeta, abstractmethod
|
||||
from typing import Any
|
||||
|
||||
class Model(ABC):
|
||||
from apt.utils.datasets import BaseDataset, DATA_ARRAY_TYPE
|
||||
|
||||
|
||||
class Model(metaclass=ABCMeta):
|
||||
"""
|
||||
Base class for ML model wrappers.
|
||||
Abstract base class for ML model wrappers.
|
||||
"""
|
||||
|
||||
def __init__(self, model: Any, **kwargs):
|
||||
"""
|
||||
Initialize a `Model` wrapper object.
|
||||
Initialize a `Model` wrapper object.
|
||||
|
||||
:param model: The original model object (of the underlying ML framework)
|
||||
:param model: The original model object (of the underlying ML framework)
|
||||
"""
|
||||
self._model = model
|
||||
|
||||
@abstractmethod
|
||||
def fit(self, x: np.ndarray, y: np.ndarray, **kwargs) -> None:
|
||||
def fit(self, train_data: BaseDataset, **kwargs) -> None:
|
||||
"""
|
||||
Fit the model using the training data `(x, y)`.
|
||||
Fit the model using the training data.
|
||||
|
||||
:param x: Training data.
|
||||
:type x: `np.ndarray` or `pandas.DataFrame`
|
||||
:param y: True labels.
|
||||
:type y: `np.ndarray` or `pandas.DataFrame`
|
||||
:param train_data: Training data.
|
||||
:type train_data: `BaseDataset`
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
@abstractmethod
|
||||
def predict(self, x: np.ndarray, **kwargs) -> np.ndarray:
|
||||
def predict(self, x: DATA_ARRAY_TYPE, **kwargs) -> DATA_ARRAY_TYPE:
|
||||
"""
|
||||
Perform predictions using the model for input `x`.
|
||||
|
||||
|
|
@ -46,67 +46,3 @@ class Model(ABC):
|
|||
:return: The model.
|
||||
"""
|
||||
return self._model
|
||||
|
||||
|
||||
class SingleOutputModel(Model):
|
||||
"""
|
||||
Wrapper class for ML models whose output is a single value (e.g., classification with label only output, regression).
|
||||
"""
|
||||
|
||||
|
||||
class MultipleOutputModel(Model):
|
||||
"""
|
||||
Wrapper class for ML models whose output is a vector (e.g., class probabilities or logits).
|
||||
"""
|
||||
|
||||
|
||||
class ModelWithLoss(Model):
|
||||
"""
|
||||
Wrapper class for ML models that support computing loss values for predictions.
|
||||
"""
|
||||
|
||||
def __init__(self, model: Any, loss: Optional[Any] = None, **kwargs):
|
||||
"""
|
||||
Initialize a `ModelWithLoss` wrapper object.
|
||||
|
||||
:param model: The original model object (of the underlying ML framework)
|
||||
:param loss: The loss function/object of the model (of the underlying ML framework)
|
||||
"""
|
||||
super().__init__(model, **kwargs)
|
||||
self._loss = loss
|
||||
|
||||
|
||||
# Probably not needed for now, as we will not be using these wrappers directly in ART.
|
||||
# @abstractmethod
|
||||
# def loss(self, x: np.ndarray, y: np.ndarray, **kwargs) -> np.ndarray:
|
||||
# """
|
||||
# Compute the loss of the model for samples `x`.
|
||||
#
|
||||
# :param x: Input samples.
|
||||
# :type x: `np.ndarray` or `pandas.DataFrame`
|
||||
# :param y: True labels.
|
||||
# :type y: `np.ndarray` or `pandas.DataFrame`
|
||||
# :return: Loss values.
|
||||
# """
|
||||
# raise NotImplementedError
|
||||
|
||||
|
||||
# Probably not needed for now, as we will not be using these wrappers directly in ART.
|
||||
# class ModelWithGradients(Model):
|
||||
# """
|
||||
# Wrapper class for ML models that support computing gradients.
|
||||
# """
|
||||
# @abstractmethod
|
||||
# def class_gradient(self, x: np.ndarray, label: Union[int, List[int], None] = None, **kwargs) -> np.ndarray:
|
||||
# """
|
||||
# Compute per-class derivatives w.r.t. input `x`.
|
||||
#
|
||||
# :param x: Input samples.
|
||||
# :type x: `np.ndarray` or `pandas.DataFrame`
|
||||
# :param label: Index of a specific class. If provided, the gradient of the specified class
|
||||
# is computed for all samples. Otherwise, gradients for all classes are computed for all samples.
|
||||
# :param label: int
|
||||
# :return: Gradients of input features w.r.t. each class in the form `(batch_size, nb_classes, input_shape)` when
|
||||
# computing for all classes, or `(batch_size, 1, input_shape)` when `label` is specified.
|
||||
# """
|
||||
# raise NotImplementedError
|
||||
|
|
|
|||
|
|
@ -1,7 +1,10 @@
|
|||
import numpy as np
|
||||
from sklearn.preprocessing import OneHotEncoder
|
||||
|
||||
from apt.utils.models import Model, ModelWithLoss, SingleOutputModel
|
||||
from sklearn.preprocessing import OneHotEncoder
|
||||
from sklearn.base import BaseEstimator
|
||||
|
||||
from apt.utils.models import Model
|
||||
from apt.utils.datasets import BaseDataset, DATA_ARRAY_TYPE
|
||||
|
||||
from art.estimators.classification.scikitlearn import SklearnClassifier as ArtSklearnClassifier
|
||||
from art.estimators.regression.scikitlearn import ScikitlearnRegressor
|
||||
|
|
@ -11,23 +14,21 @@ class SklearnModel(Model):
|
|||
"""
|
||||
Wrapper class for scikitlearn models.
|
||||
"""
|
||||
def score(self, x: np.ndarray, y: np.ndarray, **kwargs):
|
||||
def score(self, test_data: BaseDataset, **kwargs):
|
||||
"""
|
||||
Score the model using test data `(x, y)`.
|
||||
Score the model using test data.
|
||||
|
||||
:param x: Test data.
|
||||
:type x: `np.ndarray` or `pandas.DataFrame`
|
||||
:param y: True labels.
|
||||
:type y: `np.ndarray` or `pandas.DataFrame`
|
||||
:param test_data: Test data.
|
||||
:type train_data: `BaseDataset`
|
||||
"""
|
||||
return self.model.score(x, y, **kwargs)
|
||||
return self.model.score(test_data.get_samples(), test_data.get_labels(), **kwargs)
|
||||
|
||||
|
||||
class SklearnClassifier(SklearnModel):
|
||||
"""
|
||||
Wrapper class for scikitlearn classification models.
|
||||
"""
|
||||
def __init__(self, model, **kwargs):
|
||||
def __init__(self, model: BaseEstimator, **kwargs):
|
||||
"""
|
||||
Initialize a `SklearnClassifier` wrapper object.
|
||||
|
||||
|
|
@ -36,35 +37,33 @@ class SklearnClassifier(SklearnModel):
|
|||
super().__init__(model, **kwargs)
|
||||
self._art_model = ArtSklearnClassifier(model)
|
||||
|
||||
def fit(self, x: np.ndarray, y: np.ndarray, **kwargs) -> None:
|
||||
def fit(self, train_data: BaseDataset, **kwargs) -> None:
|
||||
"""
|
||||
Fit the model using the training data `(x, y)`.
|
||||
Fit the model using the training data.
|
||||
|
||||
:param x: Training data.
|
||||
:type x: `np.ndarray` or `pandas.DataFrame`
|
||||
:param y: True labels.
|
||||
:type y: `np.ndarray` or `pandas.DataFrame`
|
||||
:param train_data: Training data.
|
||||
:type train_data: `BaseDataset`
|
||||
"""
|
||||
encoder = OneHotEncoder(sparse=False)
|
||||
y_encoded = encoder.fit_transform(y.reshape(-1, 1))
|
||||
self._art_model.fit(x, y_encoded, **kwargs)
|
||||
y_encoded = encoder.fit_transform(train_data.get_labels().reshape(-1, 1))
|
||||
self._art_model.fit(train_data.get_samples(), y_encoded, **kwargs)
|
||||
|
||||
def predict(self, x: np.ndarray, **kwargs) -> np.ndarray:
|
||||
def predict(self, x: DATA_ARRAY_TYPE, **kwargs) -> DATA_ARRAY_TYPE:
|
||||
"""
|
||||
Perform predictions using the model for input `x`.
|
||||
|
||||
:param x: Input samples.
|
||||
:type x: `np.ndarray` or `pandas.DataFrame`
|
||||
:return: Predictions from the model.
|
||||
:return: Predictions from the model (class probabilities, if supported).
|
||||
"""
|
||||
return self._art_model.predict(x, **kwargs)
|
||||
|
||||
|
||||
class SklearnRegressor(SklearnModel, SingleOutputModel, ModelWithLoss):
|
||||
class SklearnRegressor(SklearnModel):
|
||||
"""
|
||||
Wrapper class for scikitlearn regression models.
|
||||
"""
|
||||
def __init__(self, model, **kwargs):
|
||||
def __init__(self, model: BaseEstimator, **kwargs):
|
||||
"""
|
||||
Initialize a `SklearnRegressor` wrapper object.
|
||||
|
||||
|
|
@ -73,18 +72,16 @@ class SklearnRegressor(SklearnModel, SingleOutputModel, ModelWithLoss):
|
|||
super().__init__(model, **kwargs)
|
||||
self._art_model = ScikitlearnRegressor(model)
|
||||
|
||||
def fit(self, x: np.ndarray, y: np.ndarray, **kwargs) -> None:
|
||||
def fit(self, train_data: BaseDataset, **kwargs) -> None:
|
||||
"""
|
||||
Fit the model using the training data `(x, y)`.
|
||||
Fit the model using the training data.
|
||||
|
||||
:param x: Training data.
|
||||
:type x: `np.ndarray` or `pandas.DataFrame`
|
||||
:param y: True labels.
|
||||
:type y: `np.ndarray` or `pandas.DataFrame`
|
||||
:param train_data: Training data.
|
||||
:type train_data: `BaseDataset`
|
||||
"""
|
||||
self._art_model.fit(x, y, **kwargs)
|
||||
self._art_model.fit(train_data.get_samples(), train_data.get_labels(), **kwargs)
|
||||
|
||||
def predict(self, x: np.ndarray, **kwargs) -> np.ndarray:
|
||||
def predict(self, x: DATA_ARRAY_TYPE, **kwargs) -> DATA_ARRAY_TYPE:
|
||||
"""
|
||||
Perform predictions using the model for input `x`.
|
||||
|
||||
|
|
@ -93,50 +90,3 @@ class SklearnRegressor(SklearnModel, SingleOutputModel, ModelWithLoss):
|
|||
:return: Predictions from the model.
|
||||
"""
|
||||
return self._art_model.predict(x, **kwargs)
|
||||
|
||||
def loss(self, x: np.ndarray, y: np.ndarray, **kwargs) -> np.ndarray:
|
||||
"""
|
||||
Compute the loss of the model for samples `x`.
|
||||
|
||||
:param x: Input samples.
|
||||
:type x: `np.ndarray` or `pandas.DataFrame`
|
||||
:param y: True labels.
|
||||
:type y: `np.ndarray` or `pandas.DataFrame`
|
||||
:return: Loss values.
|
||||
"""
|
||||
return self._art_model.compute_loss(x, y, **kwargs)
|
||||
|
||||
|
||||
# Probably not needed for now, as we will not be using these wrappers directly in ART.
|
||||
# class SklearnDecisionTreeClassifier(SklearnClassifier, MultipleOutputModel):
|
||||
# """
|
||||
# Wrapper class for scikitlearn decision tree classifier models.
|
||||
# """
|
||||
# def __init__(self, model):
|
||||
# """
|
||||
# Initialize a `DecisionTreeClassifier` wrapper object.
|
||||
#
|
||||
# :param model: The original sklearn decision tree model object
|
||||
# """
|
||||
# super().__init__(model)
|
||||
# self._art_model = ScikitlearnDecisionTreeClassifier(model)
|
||||
#
|
||||
# def get_decision_path(self, x: np.ndarray) -> np.ndarray:
|
||||
# """
|
||||
# Returns the nodes along the path taken in the tree when classifying x. Last node is the leaf, first node is the
|
||||
# root node.
|
||||
#
|
||||
# :param x: Input samples.
|
||||
# :type x: `np.ndarray` or `pandas.DataFrame`
|
||||
# :return: The indices of the nodes in the array structure of the tree.
|
||||
# """
|
||||
# return self._art_model.get_decision_path(x)
|
||||
#
|
||||
# def get_samples_at_node(self, node_id: int) -> int:
|
||||
# """
|
||||
# Returns the number of training samples mapped to a node.
|
||||
#
|
||||
# :param node_id: The ID of the node.
|
||||
# :return: Number of samples mapped this node.
|
||||
# """
|
||||
# return self._art_model.get_samples_at_node(node_id)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue