Remove redundant code.

Use data wrappers in model wrapper APIs.
More typing.
This commit is contained in:
abigailt 2022-03-06 21:15:07 +02:00
parent 9f4d649934
commit 3d82db80c4
5 changed files with 57 additions and 166 deletions

View file

@ -3,4 +3,4 @@ The AI Privacy Toolbox (datasets).
Implementation of datasets utility components for datasets creation, load, and store
"""
from apt.utils.datasets.datasets import DatasetABC, StoredDatasetABC, DatasetFactory, Data, BaseDataset
from apt.utils.datasets.datasets import DatasetABC, StoredDatasetABC, DatasetFactory, Data, BaseDataset, DATA_ARRAY_TYPE

View file

@ -5,17 +5,21 @@ Implementation of utility classes for dataset handling
"""
from abc import ABCMeta, abstractmethod
from typing import Callable, Collection, Any
from typing import Callable, Collection, Any, Union
import tarfile
import os
import urllib.request
import numpy as np
import pandas as pd
import logging
logger = logging.getLogger(__name__)
DATA_ARRAY_TYPE = Union[np.ndarray, pd.DataFrame]
class DatasetABC(metaclass=ABCMeta):
"""Base Abstract Class for Dataset"""
@ -122,7 +126,7 @@ class StoredDatasetABC(DatasetABC):
class BaseDataset(DatasetABC):
"""Base Class for Dataset"""
def __init__(self, x, y, **kwargs):
def __init__(self, x: DATA_ARRAY_TYPE, y: DATA_ARRAY_TYPE, **kwargs):
"""
BaseDataset constructor.
:param x: collection of data samples
@ -135,11 +139,11 @@ class BaseDataset(DatasetABC):
if len(self.x) != len(self.y):
raise ValueError('Non equivalent lengths of x and y')
def get_samples(self) -> Collection[Any]:
def get_samples(self) -> DATA_ARRAY_TYPE:
"""Return data samples"""
return self.x
def get_labels(self) -> Collection[Any]:
def get_labels(self) -> DATA_ARRAY_TYPE:
"""Return labels"""
return self.y
@ -192,7 +196,7 @@ class Data:
The class stores train and test datasets.
If neither of the datasets was provided,
Both train and test datasets will be create using
Factory command to create dataset instance
DatasetFactory to create a dataset instance
"""
if train or test:
self.train = train
@ -209,18 +213,18 @@ class Data:
"""Return test DatasetBase"""
return self.test
def get_train_samples(self):
def get_train_samples(self) -> Collection[Any]:
"""Return train set samples"""
return self.train.get_samples()
def get_train_labels(self):
def get_train_labels(self) -> Collection[Any]:
"""Return train set labels"""
return self.train.get_labels()
def get_test_samples(self):
def get_test_samples(self) -> Collection[Any]:
"""Return test set samples"""
return self.test.get_samples()
def get_test_labels(self):
def get_test_labels(self) -> Collection[Any]:
"""Return test set labels"""
return self.test.get_labels()
return self.test.get_labels()

View file

@ -1,34 +1,34 @@
from abc import ABC, abstractmethod
from typing import Union, List, Any, Optional
import numpy as np
from abc import ABCMeta, abstractmethod
from typing import Any
class Model(ABC):
from apt.utils.datasets import BaseDataset, DATA_ARRAY_TYPE
class Model(metaclass=ABCMeta):
"""
Base class for ML model wrappers.
Abstract base class for ML model wrappers.
"""
def __init__(self, model: Any, **kwargs):
"""
Initialize a `Model` wrapper object.
Initialize a `Model` wrapper object.
:param model: The original model object (of the underlying ML framework)
:param model: The original model object (of the underlying ML framework)
"""
self._model = model
@abstractmethod
def fit(self, x: np.ndarray, y: np.ndarray, **kwargs) -> None:
def fit(self, train_data: BaseDataset, **kwargs) -> None:
"""
Fit the model using the training data `(x, y)`.
Fit the model using the training data.
:param x: Training data.
:type x: `np.ndarray` or `pandas.DataFrame`
:param y: True labels.
:type y: `np.ndarray` or `pandas.DataFrame`
:param train_data: Training data.
:type train_data: `BaseDataset`
"""
raise NotImplementedError
@abstractmethod
def predict(self, x: np.ndarray, **kwargs) -> np.ndarray:
def predict(self, x: DATA_ARRAY_TYPE, **kwargs) -> DATA_ARRAY_TYPE:
"""
Perform predictions using the model for input `x`.
@ -46,67 +46,3 @@ class Model(ABC):
:return: The model.
"""
return self._model
class SingleOutputModel(Model):
"""
Wrapper class for ML models whose output is a single value (e.g., classification with label only output, regression).
"""
class MultipleOutputModel(Model):
"""
Wrapper class for ML models whose output is a vector (e.g., class probabilities or logits).
"""
class ModelWithLoss(Model):
"""
Wrapper class for ML models that support computing loss values for predictions.
"""
def __init__(self, model: Any, loss: Optional[Any] = None, **kwargs):
"""
Initialize a `ModelWithLoss` wrapper object.
:param model: The original model object (of the underlying ML framework)
:param loss: The loss function/object of the model (of the underlying ML framework)
"""
super().__init__(model, **kwargs)
self._loss = loss
# Probably not needed for now, as we will not be using these wrappers directly in ART.
# @abstractmethod
# def loss(self, x: np.ndarray, y: np.ndarray, **kwargs) -> np.ndarray:
# """
# Compute the loss of the model for samples `x`.
#
# :param x: Input samples.
# :type x: `np.ndarray` or `pandas.DataFrame`
# :param y: True labels.
# :type y: `np.ndarray` or `pandas.DataFrame`
# :return: Loss values.
# """
# raise NotImplementedError
# Probably not needed for now, as we will not be using these wrappers directly in ART.
# class ModelWithGradients(Model):
# """
# Wrapper class for ML models that support computing gradients.
# """
# @abstractmethod
# def class_gradient(self, x: np.ndarray, label: Union[int, List[int], None] = None, **kwargs) -> np.ndarray:
# """
# Compute per-class derivatives w.r.t. input `x`.
#
# :param x: Input samples.
# :type x: `np.ndarray` or `pandas.DataFrame`
# :param label: Index of a specific class. If provided, the gradient of the specified class
# is computed for all samples. Otherwise, gradients for all classes are computed for all samples.
# :param label: int
# :return: Gradients of input features w.r.t. each class in the form `(batch_size, nb_classes, input_shape)` when
# computing for all classes, or `(batch_size, 1, input_shape)` when `label` is specified.
# """
# raise NotImplementedError

View file

@ -1,7 +1,10 @@
import numpy as np
from sklearn.preprocessing import OneHotEncoder
from apt.utils.models import Model, ModelWithLoss, SingleOutputModel
from sklearn.preprocessing import OneHotEncoder
from sklearn.base import BaseEstimator
from apt.utils.models import Model
from apt.utils.datasets import BaseDataset, DATA_ARRAY_TYPE
from art.estimators.classification.scikitlearn import SklearnClassifier as ArtSklearnClassifier
from art.estimators.regression.scikitlearn import ScikitlearnRegressor
@ -11,23 +14,21 @@ class SklearnModel(Model):
"""
Wrapper class for scikitlearn models.
"""
def score(self, x: np.ndarray, y: np.ndarray, **kwargs):
def score(self, test_data: BaseDataset, **kwargs):
"""
Score the model using test data `(x, y)`.
Score the model using test data.
:param x: Test data.
:type x: `np.ndarray` or `pandas.DataFrame`
:param y: True labels.
:type y: `np.ndarray` or `pandas.DataFrame`
:param test_data: Test data.
:type train_data: `BaseDataset`
"""
return self.model.score(x, y, **kwargs)
return self.model.score(test_data.get_samples(), test_data.get_labels(), **kwargs)
class SklearnClassifier(SklearnModel):
"""
Wrapper class for scikitlearn classification models.
"""
def __init__(self, model, **kwargs):
def __init__(self, model: BaseEstimator, **kwargs):
"""
Initialize a `SklearnClassifier` wrapper object.
@ -36,35 +37,33 @@ class SklearnClassifier(SklearnModel):
super().__init__(model, **kwargs)
self._art_model = ArtSklearnClassifier(model)
def fit(self, x: np.ndarray, y: np.ndarray, **kwargs) -> None:
def fit(self, train_data: BaseDataset, **kwargs) -> None:
"""
Fit the model using the training data `(x, y)`.
Fit the model using the training data.
:param x: Training data.
:type x: `np.ndarray` or `pandas.DataFrame`
:param y: True labels.
:type y: `np.ndarray` or `pandas.DataFrame`
:param train_data: Training data.
:type train_data: `BaseDataset`
"""
encoder = OneHotEncoder(sparse=False)
y_encoded = encoder.fit_transform(y.reshape(-1, 1))
self._art_model.fit(x, y_encoded, **kwargs)
y_encoded = encoder.fit_transform(train_data.get_labels().reshape(-1, 1))
self._art_model.fit(train_data.get_samples(), y_encoded, **kwargs)
def predict(self, x: np.ndarray, **kwargs) -> np.ndarray:
def predict(self, x: DATA_ARRAY_TYPE, **kwargs) -> DATA_ARRAY_TYPE:
"""
Perform predictions using the model for input `x`.
:param x: Input samples.
:type x: `np.ndarray` or `pandas.DataFrame`
:return: Predictions from the model.
:return: Predictions from the model (class probabilities, if supported).
"""
return self._art_model.predict(x, **kwargs)
class SklearnRegressor(SklearnModel, SingleOutputModel, ModelWithLoss):
class SklearnRegressor(SklearnModel):
"""
Wrapper class for scikitlearn regression models.
"""
def __init__(self, model, **kwargs):
def __init__(self, model: BaseEstimator, **kwargs):
"""
Initialize a `SklearnRegressor` wrapper object.
@ -73,18 +72,16 @@ class SklearnRegressor(SklearnModel, SingleOutputModel, ModelWithLoss):
super().__init__(model, **kwargs)
self._art_model = ScikitlearnRegressor(model)
def fit(self, x: np.ndarray, y: np.ndarray, **kwargs) -> None:
def fit(self, train_data: BaseDataset, **kwargs) -> None:
"""
Fit the model using the training data `(x, y)`.
Fit the model using the training data.
:param x: Training data.
:type x: `np.ndarray` or `pandas.DataFrame`
:param y: True labels.
:type y: `np.ndarray` or `pandas.DataFrame`
:param train_data: Training data.
:type train_data: `BaseDataset`
"""
self._art_model.fit(x, y, **kwargs)
self._art_model.fit(train_data.get_samples(), train_data.get_labels(), **kwargs)
def predict(self, x: np.ndarray, **kwargs) -> np.ndarray:
def predict(self, x: DATA_ARRAY_TYPE, **kwargs) -> DATA_ARRAY_TYPE:
"""
Perform predictions using the model for input `x`.
@ -93,50 +90,3 @@ class SklearnRegressor(SklearnModel, SingleOutputModel, ModelWithLoss):
:return: Predictions from the model.
"""
return self._art_model.predict(x, **kwargs)
def loss(self, x: np.ndarray, y: np.ndarray, **kwargs) -> np.ndarray:
"""
Compute the loss of the model for samples `x`.
:param x: Input samples.
:type x: `np.ndarray` or `pandas.DataFrame`
:param y: True labels.
:type y: `np.ndarray` or `pandas.DataFrame`
:return: Loss values.
"""
return self._art_model.compute_loss(x, y, **kwargs)
# Probably not needed for now, as we will not be using these wrappers directly in ART.
# class SklearnDecisionTreeClassifier(SklearnClassifier, MultipleOutputModel):
# """
# Wrapper class for scikitlearn decision tree classifier models.
# """
# def __init__(self, model):
# """
# Initialize a `DecisionTreeClassifier` wrapper object.
#
# :param model: The original sklearn decision tree model object
# """
# super().__init__(model)
# self._art_model = ScikitlearnDecisionTreeClassifier(model)
#
# def get_decision_path(self, x: np.ndarray) -> np.ndarray:
# """
# Returns the nodes along the path taken in the tree when classifying x. Last node is the leaf, first node is the
# root node.
#
# :param x: Input samples.
# :type x: `np.ndarray` or `pandas.DataFrame`
# :return: The indices of the nodes in the array structure of the tree.
# """
# return self._art_model.get_decision_path(x)
#
# def get_samples_at_node(self, node_id: int) -> int:
# """
# Returns the number of training samples mapped to a node.
#
# :param node_id: The ID of the node.
# :return: Number of samples mapped this node.
# """
# return self._art_model.get_samples_at_node(node_id)