mirror of
https://github.com/IBM/ai-privacy-toolkit.git
synced 2026-06-29 15:59:38 +02:00
Support additional use cases for data (#46)
* Make ART black box classifier not apply preprocessing to data * Add option to store predictions (in addition to x,y) in Dataset and Data classes
This commit is contained in:
parent
e25e58b253
commit
00f9c16863
6 changed files with 139 additions and 62 deletions
|
|
@ -4,4 +4,4 @@ Implementation of datasets utility components for datasets creation, load, and s
|
|||
"""
|
||||
|
||||
from apt.utils.datasets.datasets import Dataset, StoredDataset, DatasetFactory, Data, ArrayDataset, \
|
||||
OUTPUT_DATA_ARRAY_TYPE, DATA_PANDAS_NUMPY_TYPE
|
||||
DatasetWithPredictions, OUTPUT_DATA_ARRAY_TYPE, DATA_PANDAS_NUMPY_TYPE
|
||||
|
|
|
|||
|
|
@ -38,7 +38,7 @@ class Dataset(metaclass=ABCMeta):
|
|||
|
||||
:return: the data samples
|
||||
"""
|
||||
pass
|
||||
raise NotImplementedError
|
||||
|
||||
@abstractmethod
|
||||
def get_labels(self) -> Collection[Any]:
|
||||
|
|
@ -47,7 +47,16 @@ class Dataset(metaclass=ABCMeta):
|
|||
|
||||
:return: the labels
|
||||
"""
|
||||
pass
|
||||
raise NotImplementedError
|
||||
|
||||
@abstractmethod
|
||||
def get_predictions(self) -> OUTPUT_DATA_ARRAY_TYPE:
|
||||
"""
|
||||
Get predictions
|
||||
|
||||
:return: predictions as numpy array
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
def _array2numpy(self, arr: INPUT_DATA_ARRAY_TYPE) -> OUTPUT_DATA_ARRAY_TYPE:
|
||||
"""
|
||||
|
|
@ -102,7 +111,7 @@ class StoredDataset(Dataset):
|
|||
:type path: string
|
||||
:return: None
|
||||
"""
|
||||
pass
|
||||
raise NotImplementedError
|
||||
|
||||
@abstractmethod
|
||||
def load(self, **kwargs):
|
||||
|
|
@ -111,7 +120,7 @@ class StoredDataset(Dataset):
|
|||
|
||||
:return: None
|
||||
"""
|
||||
pass
|
||||
raise NotImplementedError
|
||||
|
||||
@staticmethod
|
||||
def download(url: str, dest_path: str, filename: str, unzip: Optional[bool] = False) -> None:
|
||||
|
|
@ -224,7 +233,7 @@ class ArrayDataset(Dataset):
|
|||
raise ValueError("The supplied features are not the same as in the data features")
|
||||
self.features_names = x.columns.to_list()
|
||||
|
||||
if y is not None and len(self._x) != len(self._y):
|
||||
if self._y is not None and len(self._x) != len(self._y):
|
||||
raise ValueError('Non equivalent lengths of x and y')
|
||||
|
||||
def get_samples(self) -> OUTPUT_DATA_ARRAY_TYPE:
|
||||
|
|
@ -243,6 +252,70 @@ class ArrayDataset(Dataset):
|
|||
"""
|
||||
return self._y
|
||||
|
||||
def get_predictions(self) -> OUTPUT_DATA_ARRAY_TYPE:
|
||||
"""
|
||||
Get predictions
|
||||
|
||||
:return: predictions as numpy array
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
class DatasetWithPredictions(Dataset):
|
||||
"""
|
||||
Dataset that is based on arrays (e.g., numpy/pandas/list...). Includes predictions from a model, and possibly also
|
||||
features and true labels.
|
||||
|
||||
:param x: collection of data samples
|
||||
:type x: numpy array or pandas DataFrame or list or pytorch Tensor
|
||||
:param y: collection of labels
|
||||
:type y: numpy array or pandas DataFrame or list or pytorch Tensor, optional
|
||||
:param feature_names: The feature names, in the order that they appear in the data
|
||||
:type feature_names: list of strings, optional
|
||||
"""
|
||||
|
||||
def __init__(self, pred: INPUT_DATA_ARRAY_TYPE, x: Optional[INPUT_DATA_ARRAY_TYPE] = None,
|
||||
y: Optional[INPUT_DATA_ARRAY_TYPE] = None, features_names: Optional[list] = None, **kwargs):
|
||||
self.is_pandas = False
|
||||
self.features_names = features_names
|
||||
self._pred = self._array2numpy(pred)
|
||||
self._y = self._array2numpy(y) if y is not None else None
|
||||
self._x = self._array2numpy(x) if x is not None else None
|
||||
if self.is_pandas and x is not None:
|
||||
if features_names and not np.array_equal(features_names, x.columns):
|
||||
raise ValueError("The supplied features are not the same as in the data features")
|
||||
self.features_names = x.columns.to_list()
|
||||
|
||||
if self._y is not None and len(self._pred) != len(self._y):
|
||||
raise ValueError('Non equivalent lengths of pred and y')
|
||||
|
||||
if self._x is not None and len(self._x) != len(self._pred):
|
||||
raise ValueError('Non equivalent lengths of x and pred')
|
||||
|
||||
def get_samples(self) -> OUTPUT_DATA_ARRAY_TYPE:
|
||||
"""
|
||||
Get data samples
|
||||
|
||||
:return: data samples as numpy array
|
||||
"""
|
||||
return self._x
|
||||
|
||||
def get_labels(self) -> OUTPUT_DATA_ARRAY_TYPE:
|
||||
"""
|
||||
Get labels
|
||||
|
||||
:return: labels as numpy array
|
||||
"""
|
||||
return self._y
|
||||
|
||||
def get_predictions(self) -> OUTPUT_DATA_ARRAY_TYPE:
|
||||
"""
|
||||
Get predictions
|
||||
|
||||
:return: predictions as numpy array
|
||||
"""
|
||||
return self._pred
|
||||
|
||||
|
||||
class PytorchData(Dataset):
|
||||
"""
|
||||
|
|
@ -284,6 +357,14 @@ class PytorchData(Dataset):
|
|||
"""
|
||||
return self._array2numpy(self._y) if self._y is not None else None
|
||||
|
||||
def get_predictions(self) -> OUTPUT_DATA_ARRAY_TYPE:
|
||||
"""
|
||||
Get predictions
|
||||
|
||||
:return: predictions as numpy array
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
def get_sample_item(self, idx: int) -> Tensor:
|
||||
"""
|
||||
Get the sample according to the given index
|
||||
|
|
|
|||
|
|
@ -270,7 +270,8 @@ class BlackboxClassifierPredictions(BlackboxClassifier):
|
|||
self._nb_classes = get_nb_classes(y_pred)
|
||||
self._input_shape = x_pred.shape[1:]
|
||||
predict_fn = (x_pred, y_pred)
|
||||
self._art_model = BlackBoxClassifier(predict_fn, self._input_shape, self._nb_classes, fuzzy_float_compare=True)
|
||||
self._art_model = BlackBoxClassifier(predict_fn, self._input_shape, self._nb_classes, fuzzy_float_compare=True,
|
||||
preprocessing=None)
|
||||
|
||||
|
||||
class BlackboxClassifierPredictFunction(BlackboxClassifier):
|
||||
|
|
@ -298,4 +299,4 @@ class BlackboxClassifierPredictFunction(BlackboxClassifier):
|
|||
super().__init__(model, output_type, black_box_access=True, unlimited_queries=unlimited_queries, **kwargs)
|
||||
self._nb_classes = nb_classes
|
||||
self._input_shape = input_shape
|
||||
self._art_model = BlackBoxClassifier(model, self._input_shape, self._nb_classes)
|
||||
self._art_model = BlackBoxClassifier(model, self._input_shape, self._nb_classes, preprocessing=None)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue