mirror of
https://github.com/IBM/ai-privacy-toolkit.git
synced 2026-04-24 20:36:21 +02:00
Add more to wrappers
This commit is contained in:
parent
f2df2fcc8c
commit
45cc9180b8
6 changed files with 74 additions and 30 deletions
|
|
@ -3,4 +3,5 @@ The AI Privacy Toolbox (datasets).
|
|||
Implementation of datasets utility components for datasets creation, load, and store
|
||||
"""
|
||||
|
||||
from apt.utils.datasets.datasets import Dataset, StoredDataset, DatasetFactory, Data, ArrayDataset, DATA_ARRAY_TYPE
|
||||
from apt.utils.datasets.datasets import Dataset, StoredDataset, DatasetFactory, Data, ArrayDataset, \
|
||||
OUTPUT_DATA_ARRAY_TYPE, DATA_PANDAS_NUMPY_TYPE
|
||||
|
|
|
|||
|
|
@ -5,7 +5,7 @@ Implementation of utility classes for dataset handling
|
|||
"""
|
||||
|
||||
from abc import ABCMeta, abstractmethod
|
||||
from typing import Callable, Collection, Any, Union
|
||||
from typing import Callable, Collection, Any, Union, List, Optional
|
||||
|
||||
import tarfile
|
||||
import os
|
||||
|
|
@ -13,11 +13,14 @@ import urllib.request
|
|||
import numpy as np
|
||||
import pandas as pd
|
||||
import logging
|
||||
from torch import Tensor
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
DATA_ARRAY_TYPE = Union[np.ndarray, pd.DataFrame]
|
||||
INPUT_DATA_ARRAY_TYPE = Union[np.ndarray, pd.DataFrame, List, Tensor]
|
||||
OUTPUT_DATA_ARRAY_TYPE = np.ndarray
|
||||
DATA_PANDAS_NUMPY_TYPE = Union[np.ndarray, pd.DataFrame]
|
||||
|
||||
|
||||
class Dataset(metaclass=ABCMeta):
|
||||
|
|
@ -124,28 +127,50 @@ class StoredDataset(Dataset):
|
|||
|
||||
|
||||
class ArrayDataset(Dataset):
|
||||
"""Dataset that is based on x and y arrays (e.g., numpy/pandas)"""
|
||||
"""Dataset that is based on x and y arrays (e.g., numpy/pandas/list...)"""
|
||||
|
||||
def __init__(self, x: DATA_ARRAY_TYPE, y: DATA_ARRAY_TYPE, **kwargs):
|
||||
def __init__(self, x: INPUT_DATA_ARRAY_TYPE, y: Optional[INPUT_DATA_ARRAY_TYPE] = None, **kwargs):
|
||||
"""
|
||||
ArrayDataset constructor.
|
||||
:param x: collection of data samples
|
||||
:param y: collection of labels
|
||||
:param y: collection of labels (optional)
|
||||
:param kwargs: dataset parameters
|
||||
"""
|
||||
self.x = x
|
||||
self.y = y
|
||||
# convert to numpy
|
||||
if type(x) == np.ndarray:
|
||||
self._x = x
|
||||
elif type(x) == pd.DataFrame:
|
||||
self._x = x.to_numpy()
|
||||
elif isinstance(x, list):
|
||||
self._x = np.array(x)
|
||||
elif type(x) == Tensor:
|
||||
self._x = x.numpy()
|
||||
else:
|
||||
raise ValueError('Non supported type for x: ', type(x).__name__)
|
||||
|
||||
if len(self.x) != len(self.y):
|
||||
self._y = None
|
||||
if y is not None:
|
||||
if type(y) == np.ndarray:
|
||||
self._y = y
|
||||
elif type(y) == pd.DataFrame:
|
||||
self._y = y.to_numpy()
|
||||
elif isinstance(y, list):
|
||||
self._y = np.array(y)
|
||||
elif type(y) == Tensor:
|
||||
self._y = y.numpy()
|
||||
else:
|
||||
raise ValueError('Non supported type for y: ', type(y).__name__)
|
||||
|
||||
if y is not None and len(self._x) != len(self._y):
|
||||
raise ValueError('Non equivalent lengths of x and y')
|
||||
|
||||
def get_samples(self) -> DATA_ARRAY_TYPE:
|
||||
"""Return data samples"""
|
||||
return self.x
|
||||
def get_samples(self) -> OUTPUT_DATA_ARRAY_TYPE:
|
||||
"""Return data samples as numpy array"""
|
||||
return self._x
|
||||
|
||||
def get_labels(self) -> DATA_ARRAY_TYPE:
|
||||
"""Return labels"""
|
||||
return self.y
|
||||
def get_labels(self) -> OUTPUT_DATA_ARRAY_TYPE:
|
||||
"""Return labels as numpy array"""
|
||||
return self._y
|
||||
|
||||
|
||||
class DatasetFactory:
|
||||
|
|
@ -189,7 +214,6 @@ class DatasetFactory:
|
|||
|
||||
|
||||
class Data:
|
||||
|
||||
def __init__(self, train: Dataset = None, test: Dataset = None, **kwargs):
|
||||
"""
|
||||
Data class constructor.
|
||||
|
|
|
|||
|
|
@ -1,2 +1,2 @@
|
|||
from apt.utils.models.model import Model
|
||||
from apt.utils.models.model import Model, ModelOutputType
|
||||
from apt.utils.models.sklearn_model import SklearnModel, SklearnClassifier, SklearnRegressor
|
||||
|
|
|
|||
|
|
@ -1,7 +1,14 @@
|
|||
from abc import ABCMeta, abstractmethod
|
||||
from typing import Any
|
||||
from enum import Enum, auto
|
||||
|
||||
from apt.utils.datasets import Dataset, DATA_ARRAY_TYPE
|
||||
from apt.utils.datasets import Dataset, OUTPUT_DATA_ARRAY_TYPE
|
||||
|
||||
|
||||
class ModelOutputType(Enum):
|
||||
CLASSIFIER_VECTOR = auto() # probabilities or logits
|
||||
CLASSIFIER_SCALAR = auto() # label only
|
||||
REGRESSOR_SCALAR = auto() # value
|
||||
|
||||
|
||||
class Model(metaclass=ABCMeta):
|
||||
|
|
@ -9,13 +16,16 @@ class Model(metaclass=ABCMeta):
|
|||
Abstract base class for ML model wrappers.
|
||||
"""
|
||||
|
||||
def __init__(self, model: Any, **kwargs):
|
||||
def __init__(self, model: Any, output_type: ModelOutputType, **kwargs):
|
||||
"""
|
||||
Initialize a `Model` wrapper object.
|
||||
|
||||
:param model: The original model object (of the underlying ML framework)
|
||||
:param output_type: The type of output the model yields (vector/label only for classifiers,
|
||||
value for regressors)
|
||||
"""
|
||||
self._model = model
|
||||
self._output_type = output_type
|
||||
|
||||
@abstractmethod
|
||||
def fit(self, train_data: Dataset, **kwargs) -> None:
|
||||
|
|
@ -28,7 +38,7 @@ class Model(metaclass=ABCMeta):
|
|||
raise NotImplementedError
|
||||
|
||||
@abstractmethod
|
||||
def predict(self, x: DATA_ARRAY_TYPE, **kwargs) -> DATA_ARRAY_TYPE:
|
||||
def predict(self, x: Dataset, **kwargs) -> OUTPUT_DATA_ARRAY_TYPE:
|
||||
"""
|
||||
Perform predictions using the model for input `x`.
|
||||
|
||||
|
|
@ -39,10 +49,19 @@ class Model(metaclass=ABCMeta):
|
|||
raise NotImplementedError
|
||||
|
||||
@property
|
||||
def model(self):
|
||||
def model(self) -> Any:
|
||||
"""
|
||||
Return the model.
|
||||
|
||||
:return: The model.
|
||||
"""
|
||||
return self._model
|
||||
|
||||
@property
|
||||
def output_type(self) -> ModelOutputType:
|
||||
"""
|
||||
Return the model's output type.
|
||||
|
||||
:return: The model's output type.
|
||||
"""
|
||||
return self._output_type
|
||||
|
|
|
|||
|
|
@ -3,8 +3,8 @@ import numpy as np
|
|||
from sklearn.preprocessing import OneHotEncoder
|
||||
from sklearn.base import BaseEstimator
|
||||
|
||||
from apt.utils.models import Model
|
||||
from apt.utils.datasets import Dataset, DATA_ARRAY_TYPE
|
||||
from apt.utils.models import Model, ModelOutputType
|
||||
from apt.utils.datasets import Dataset, OUTPUT_DATA_ARRAY_TYPE
|
||||
|
||||
from art.estimators.classification.scikitlearn import SklearnClassifier as ArtSklearnClassifier
|
||||
from art.estimators.regression.scikitlearn import ScikitlearnRegressor
|
||||
|
|
@ -28,13 +28,13 @@ class SklearnClassifier(SklearnModel):
|
|||
"""
|
||||
Wrapper class for scikitlearn classification models.
|
||||
"""
|
||||
def __init__(self, model: BaseEstimator, **kwargs):
|
||||
def __init__(self, model: BaseEstimator, output_type: ModelOutputType, **kwargs):
|
||||
"""
|
||||
Initialize a `SklearnClassifier` wrapper object.
|
||||
|
||||
:param model: The original sklearn model object
|
||||
"""
|
||||
super().__init__(model, **kwargs)
|
||||
super().__init__(model, output_type, **kwargs)
|
||||
self._art_model = ArtSklearnClassifier(model)
|
||||
|
||||
def fit(self, train_data: Dataset, **kwargs) -> None:
|
||||
|
|
@ -48,7 +48,7 @@ class SklearnClassifier(SklearnModel):
|
|||
y_encoded = encoder.fit_transform(train_data.get_labels().reshape(-1, 1))
|
||||
self._art_model.fit(train_data.get_samples(), y_encoded, **kwargs)
|
||||
|
||||
def predict(self, x: DATA_ARRAY_TYPE, **kwargs) -> DATA_ARRAY_TYPE:
|
||||
def predict(self, x: Dataset, **kwargs) -> OUTPUT_DATA_ARRAY_TYPE:
|
||||
"""
|
||||
Perform predictions using the model for input `x`.
|
||||
|
||||
|
|
@ -69,7 +69,7 @@ class SklearnRegressor(SklearnModel):
|
|||
|
||||
:param model: The original sklearn model object
|
||||
"""
|
||||
super().__init__(model, **kwargs)
|
||||
super().__init__(model, ModelOutputType.REGRESSOR_SCALAR, **kwargs)
|
||||
self._art_model = ScikitlearnRegressor(model)
|
||||
|
||||
def fit(self, train_data: Dataset, **kwargs) -> None:
|
||||
|
|
@ -81,7 +81,7 @@ class SklearnRegressor(SklearnModel):
|
|||
"""
|
||||
self._art_model.fit(train_data.get_samples(), train_data.get_labels(), **kwargs)
|
||||
|
||||
def predict(self, x: DATA_ARRAY_TYPE, **kwargs) -> DATA_ARRAY_TYPE:
|
||||
def predict(self, x: Dataset, **kwargs) -> OUTPUT_DATA_ARRAY_TYPE:
|
||||
"""
|
||||
Perform predictions using the model for input `x`.
|
||||
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
import pytest
|
||||
|
||||
from apt.utils.models import SklearnClassifier, SklearnRegressor
|
||||
from apt.utils.models import SklearnClassifier, SklearnRegressor, ModelOutputType
|
||||
from apt.utils.datasets import ArrayDataset
|
||||
from apt.utils import dataset_utils
|
||||
|
||||
|
|
@ -11,7 +11,7 @@ from sklearn.ensemble import RandomForestClassifier
|
|||
def test_sklearn_classifier():
|
||||
(x_train, y_train), (x_test, y_test) = dataset_utils.get_iris_dataset()
|
||||
underlying_model = RandomForestClassifier()
|
||||
model = SklearnClassifier(underlying_model)
|
||||
model = SklearnClassifier(underlying_model, ModelOutputType.CLASSIFIER_VECTOR)
|
||||
train = ArrayDataset(x_train, y_train)
|
||||
test = ArrayDataset(x_test, y_test)
|
||||
model.fit(train)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue