Add more to wrappers

This commit is contained in:
abigailt 2022-03-15 11:42:57 +02:00
parent f2df2fcc8c
commit 45cc9180b8
6 changed files with 74 additions and 30 deletions

View file

@ -3,4 +3,5 @@ The AI Privacy Toolbox (datasets).
Implementation of datasets utility components for datasets creation, load, and store
"""
from apt.utils.datasets.datasets import Dataset, StoredDataset, DatasetFactory, Data, ArrayDataset, DATA_ARRAY_TYPE
from apt.utils.datasets.datasets import Dataset, StoredDataset, DatasetFactory, Data, ArrayDataset, \
OUTPUT_DATA_ARRAY_TYPE, DATA_PANDAS_NUMPY_TYPE

View file

@ -5,7 +5,7 @@ Implementation of utility classes for dataset handling
"""
from abc import ABCMeta, abstractmethod
from typing import Callable, Collection, Any, Union
from typing import Callable, Collection, Any, Union, List, Optional
import tarfile
import os
@ -13,11 +13,14 @@ import urllib.request
import numpy as np
import pandas as pd
import logging
from torch import Tensor
logger = logging.getLogger(__name__)
DATA_ARRAY_TYPE = Union[np.ndarray, pd.DataFrame]
INPUT_DATA_ARRAY_TYPE = Union[np.ndarray, pd.DataFrame, List, Tensor]
OUTPUT_DATA_ARRAY_TYPE = np.ndarray
DATA_PANDAS_NUMPY_TYPE = Union[np.ndarray, pd.DataFrame]
class Dataset(metaclass=ABCMeta):
@ -124,28 +127,50 @@ class StoredDataset(Dataset):
class ArrayDataset(Dataset):
"""Dataset that is based on x and y arrays (e.g., numpy/pandas)"""
"""Dataset that is based on x and y arrays (e.g., numpy/pandas/list...)"""
def __init__(self, x: DATA_ARRAY_TYPE, y: DATA_ARRAY_TYPE, **kwargs):
def __init__(self, x: INPUT_DATA_ARRAY_TYPE, y: Optional[INPUT_DATA_ARRAY_TYPE] = None, **kwargs):
"""
ArrayDataset constructor.
:param x: collection of data samples
:param y: collection of labels
:param y: collection of labels (optional)
:param kwargs: dataset parameters
"""
self.x = x
self.y = y
# convert to numpy
if type(x) == np.ndarray:
self._x = x
elif type(x) == pd.DataFrame:
self._x = x.to_numpy()
elif isinstance(x, list):
self._x = np.array(x)
elif type(x) == Tensor:
self._x = x.numpy()
else:
raise ValueError('Non supported type for x: ', type(x).__name__)
if len(self.x) != len(self.y):
self._y = None
if y is not None:
if type(y) == np.ndarray:
self._y = y
elif type(y) == pd.DataFrame:
self._y = y.to_numpy()
elif isinstance(y, list):
self._y = np.array(y)
elif type(y) == Tensor:
self._y = y.numpy()
else:
raise ValueError('Non supported type for y: ', type(y).__name__)
if y is not None and len(self._x) != len(self._y):
raise ValueError('Non equivalent lengths of x and y')
def get_samples(self) -> DATA_ARRAY_TYPE:
"""Return data samples"""
return self.x
def get_samples(self) -> OUTPUT_DATA_ARRAY_TYPE:
"""Return data samples as numpy array"""
return self._x
def get_labels(self) -> DATA_ARRAY_TYPE:
"""Return labels"""
return self.y
def get_labels(self) -> OUTPUT_DATA_ARRAY_TYPE:
"""Return labels as numpy array"""
return self._y
class DatasetFactory:
@ -189,7 +214,6 @@ class DatasetFactory:
class Data:
def __init__(self, train: Dataset = None, test: Dataset = None, **kwargs):
"""
Data class constructor.

View file

@ -1,2 +1,2 @@
from apt.utils.models.model import Model
from apt.utils.models.model import Model, ModelOutputType
from apt.utils.models.sklearn_model import SklearnModel, SklearnClassifier, SklearnRegressor

View file

@ -1,7 +1,14 @@
from abc import ABCMeta, abstractmethod
from typing import Any
from enum import Enum, auto
from apt.utils.datasets import Dataset, DATA_ARRAY_TYPE
from apt.utils.datasets import Dataset, OUTPUT_DATA_ARRAY_TYPE
class ModelOutputType(Enum):
CLASSIFIER_VECTOR = auto() # probabilities or logits
CLASSIFIER_SCALAR = auto() # label only
REGRESSOR_SCALAR = auto() # value
class Model(metaclass=ABCMeta):
@ -9,13 +16,16 @@ class Model(metaclass=ABCMeta):
Abstract base class for ML model wrappers.
"""
def __init__(self, model: Any, **kwargs):
def __init__(self, model: Any, output_type: ModelOutputType, **kwargs):
"""
Initialize a `Model` wrapper object.
:param model: The original model object (of the underlying ML framework)
:param output_type: The type of output the model yields (vector/label only for classifiers,
value for regressors)
"""
self._model = model
self._output_type = output_type
@abstractmethod
def fit(self, train_data: Dataset, **kwargs) -> None:
@ -28,7 +38,7 @@ class Model(metaclass=ABCMeta):
raise NotImplementedError
@abstractmethod
def predict(self, x: DATA_ARRAY_TYPE, **kwargs) -> DATA_ARRAY_TYPE:
def predict(self, x: Dataset, **kwargs) -> OUTPUT_DATA_ARRAY_TYPE:
"""
Perform predictions using the model for input `x`.
@ -39,10 +49,19 @@ class Model(metaclass=ABCMeta):
raise NotImplementedError
@property
def model(self):
def model(self) -> Any:
"""
Return the model.
:return: The model.
"""
return self._model
@property
def output_type(self) -> ModelOutputType:
"""
Return the model's output type.
:return: The model's output type.
"""
return self._output_type

View file

@ -3,8 +3,8 @@ import numpy as np
from sklearn.preprocessing import OneHotEncoder
from sklearn.base import BaseEstimator
from apt.utils.models import Model
from apt.utils.datasets import Dataset, DATA_ARRAY_TYPE
from apt.utils.models import Model, ModelOutputType
from apt.utils.datasets import Dataset, OUTPUT_DATA_ARRAY_TYPE
from art.estimators.classification.scikitlearn import SklearnClassifier as ArtSklearnClassifier
from art.estimators.regression.scikitlearn import ScikitlearnRegressor
@ -28,13 +28,13 @@ class SklearnClassifier(SklearnModel):
"""
Wrapper class for scikitlearn classification models.
"""
def __init__(self, model: BaseEstimator, **kwargs):
def __init__(self, model: BaseEstimator, output_type: ModelOutputType, **kwargs):
"""
Initialize a `SklearnClassifier` wrapper object.
:param model: The original sklearn model object
"""
super().__init__(model, **kwargs)
super().__init__(model, output_type, **kwargs)
self._art_model = ArtSklearnClassifier(model)
def fit(self, train_data: Dataset, **kwargs) -> None:
@ -48,7 +48,7 @@ class SklearnClassifier(SklearnModel):
y_encoded = encoder.fit_transform(train_data.get_labels().reshape(-1, 1))
self._art_model.fit(train_data.get_samples(), y_encoded, **kwargs)
def predict(self, x: DATA_ARRAY_TYPE, **kwargs) -> DATA_ARRAY_TYPE:
def predict(self, x: Dataset, **kwargs) -> OUTPUT_DATA_ARRAY_TYPE:
"""
Perform predictions using the model for input `x`.
@ -69,7 +69,7 @@ class SklearnRegressor(SklearnModel):
:param model: The original sklearn model object
"""
super().__init__(model, **kwargs)
super().__init__(model, ModelOutputType.REGRESSOR_SCALAR, **kwargs)
self._art_model = ScikitlearnRegressor(model)
def fit(self, train_data: Dataset, **kwargs) -> None:
@ -81,7 +81,7 @@ class SklearnRegressor(SklearnModel):
"""
self._art_model.fit(train_data.get_samples(), train_data.get_labels(), **kwargs)
def predict(self, x: DATA_ARRAY_TYPE, **kwargs) -> DATA_ARRAY_TYPE:
def predict(self, x: Dataset, **kwargs) -> OUTPUT_DATA_ARRAY_TYPE:
"""
Perform predictions using the model for input `x`.

View file

@ -1,6 +1,6 @@
import pytest
from apt.utils.models import SklearnClassifier, SklearnRegressor
from apt.utils.models import SklearnClassifier, SklearnRegressor, ModelOutputType
from apt.utils.datasets import ArrayDataset
from apt.utils import dataset_utils
@ -11,7 +11,7 @@ from sklearn.ensemble import RandomForestClassifier
def test_sklearn_classifier():
(x_train, y_train), (x_test, y_test) = dataset_utils.get_iris_dataset()
underlying_model = RandomForestClassifier()
model = SklearnClassifier(underlying_model)
model = SklearnClassifier(underlying_model, ModelOutputType.CLASSIFIER_VECTOR)
train = ArrayDataset(x_train, y_train)
test = ArrayDataset(x_test, y_test)
model.fit(train)