mirror of
https://github.com/IBM/ai-privacy-toolkit.git
synced 2026-06-23 15:48:06 +02:00
Renaming
This commit is contained in:
parent
3d82db80c4
commit
f2df2fcc8c
6 changed files with 35 additions and 43 deletions
|
|
@ -3,4 +3,4 @@ The AI Privacy Toolbox (datasets).
|
||||||
Implementation of datasets utility components for datasets creation, load, and store
|
Implementation of datasets utility components for datasets creation, load, and store
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from apt.utils.datasets.datasets import DatasetABC, StoredDatasetABC, DatasetFactory, Data, BaseDataset, DATA_ARRAY_TYPE
|
from apt.utils.datasets.datasets import Dataset, StoredDataset, DatasetFactory, Data, ArrayDataset, DATA_ARRAY_TYPE
|
||||||
|
|
|
||||||
|
|
@ -20,7 +20,7 @@ logger = logging.getLogger(__name__)
|
||||||
DATA_ARRAY_TYPE = Union[np.ndarray, pd.DataFrame]
|
DATA_ARRAY_TYPE = Union[np.ndarray, pd.DataFrame]
|
||||||
|
|
||||||
|
|
||||||
class DatasetABC(metaclass=ABCMeta):
|
class Dataset(metaclass=ABCMeta):
|
||||||
"""Base Abstract Class for Dataset"""
|
"""Base Abstract Class for Dataset"""
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
|
|
@ -38,7 +38,7 @@ class DatasetABC(metaclass=ABCMeta):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
class StoredDatasetABC(DatasetABC):
|
class StoredDataset(Dataset):
|
||||||
"""Abstract Class for Storable Dataset"""
|
"""Abstract Class for Storable Dataset"""
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
|
|
@ -73,7 +73,7 @@ class StoredDatasetABC(DatasetABC):
|
||||||
logger.info('Dataset Downloaded')
|
logger.info('Dataset Downloaded')
|
||||||
|
|
||||||
if unzip:
|
if unzip:
|
||||||
StoredDatasetABC.extract_archive(zip_path=file_path, dest_path=dest_path, remove_archive=False)
|
StoredDataset.extract_archive(zip_path=file_path, dest_path=dest_path, remove_archive=False)
|
||||||
|
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
|
|
@ -123,12 +123,12 @@ class StoredDatasetABC(DatasetABC):
|
||||||
np.savetxt(dest_datafile, debug_data, delimiter=delimiter, fmt=fmt)
|
np.savetxt(dest_datafile, debug_data, delimiter=delimiter, fmt=fmt)
|
||||||
|
|
||||||
|
|
||||||
class BaseDataset(DatasetABC):
|
class ArrayDataset(Dataset):
|
||||||
"""Base Class for Dataset"""
|
"""Dataset that is based on x and y arrays (e.g., numpy/pandas)"""
|
||||||
|
|
||||||
def __init__(self, x: DATA_ARRAY_TYPE, y: DATA_ARRAY_TYPE, **kwargs):
|
def __init__(self, x: DATA_ARRAY_TYPE, y: DATA_ARRAY_TYPE, **kwargs):
|
||||||
"""
|
"""
|
||||||
BaseDataset constructor.
|
ArrayDataset constructor.
|
||||||
:param x: collection of data samples
|
:param x: collection of data samples
|
||||||
:param y: collection of labels
|
:param y: collection of labels
|
||||||
:param kwargs: dataset parameters
|
:param kwargs: dataset parameters
|
||||||
|
|
@ -159,7 +159,7 @@ class DatasetFactory:
|
||||||
:param name: dataset name
|
:param name: dataset name
|
||||||
:return:
|
:return:
|
||||||
"""
|
"""
|
||||||
def inner_wrapper(wrapped_class: DatasetABC) -> Any:
|
def inner_wrapper(wrapped_class: Dataset) -> Any:
|
||||||
if name in cls.registry:
|
if name in cls.registry:
|
||||||
logger.warning('Dataset %s already exists. Will replace it', name)
|
logger.warning('Dataset %s already exists. Will replace it', name)
|
||||||
cls.registry[name] = wrapped_class
|
cls.registry[name] = wrapped_class
|
||||||
|
|
@ -168,7 +168,7 @@ class DatasetFactory:
|
||||||
return inner_wrapper
|
return inner_wrapper
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def create_dataset(cls, name: str, **kwargs) -> DatasetABC:
|
def create_dataset(cls, name: str, **kwargs) -> Dataset:
|
||||||
"""
|
"""
|
||||||
Factory command to create dataset instance.
|
Factory command to create dataset instance.
|
||||||
This method gets the appropriate Dataset class from the registry
|
This method gets the appropriate Dataset class from the registry
|
||||||
|
|
@ -190,7 +190,7 @@ class DatasetFactory:
|
||||||
|
|
||||||
class Data:
|
class Data:
|
||||||
|
|
||||||
def __init__(self, train: DatasetABC = None, test: DatasetABC = None, **kwargs):
|
def __init__(self, train: Dataset = None, test: Dataset = None, **kwargs):
|
||||||
"""
|
"""
|
||||||
Data class constructor.
|
Data class constructor.
|
||||||
The class stores train and test datasets.
|
The class stores train and test datasets.
|
||||||
|
|
@ -205,11 +205,11 @@ class Data:
|
||||||
self.train = DatasetFactory.create_dataset(train=True, **kwargs)
|
self.train = DatasetFactory.create_dataset(train=True, **kwargs)
|
||||||
self.test = DatasetFactory.create_dataset(train=False, **kwargs)
|
self.test = DatasetFactory.create_dataset(train=False, **kwargs)
|
||||||
|
|
||||||
def get_train_set(self) -> DatasetABC:
|
def get_train_set(self) -> Dataset:
|
||||||
"""Return train DatasetBase"""
|
"""Return train DatasetBase"""
|
||||||
return self.train
|
return self.train
|
||||||
|
|
||||||
def get_test_set(self) -> DatasetABC:
|
def get_test_set(self) -> Dataset:
|
||||||
"""Return test DatasetBase"""
|
"""Return test DatasetBase"""
|
||||||
return self.test
|
return self.test
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,2 +1,2 @@
|
||||||
from apt.utils.models.model import Model, ModelWithLoss, SingleOutputModel, MultipleOutputModel
|
from apt.utils.models.model import Model
|
||||||
from apt.utils.models.sklearn_model import SklearnModel, SklearnClassifier, SklearnRegressor
|
from apt.utils.models.sklearn_model import SklearnModel, SklearnClassifier, SklearnRegressor
|
||||||
|
|
|
||||||
|
|
@ -1,7 +1,7 @@
|
||||||
from abc import ABCMeta, abstractmethod
|
from abc import ABCMeta, abstractmethod
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
from apt.utils.datasets import BaseDataset, DATA_ARRAY_TYPE
|
from apt.utils.datasets import Dataset, DATA_ARRAY_TYPE
|
||||||
|
|
||||||
|
|
||||||
class Model(metaclass=ABCMeta):
|
class Model(metaclass=ABCMeta):
|
||||||
|
|
@ -18,12 +18,12 @@ class Model(metaclass=ABCMeta):
|
||||||
self._model = model
|
self._model = model
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
def fit(self, train_data: BaseDataset, **kwargs) -> None:
|
def fit(self, train_data: Dataset, **kwargs) -> None:
|
||||||
"""
|
"""
|
||||||
Fit the model using the training data.
|
Fit the model using the training data.
|
||||||
|
|
||||||
:param train_data: Training data.
|
:param train_data: Training data.
|
||||||
:type train_data: `BaseDataset`
|
:type train_data: `Dataset`
|
||||||
"""
|
"""
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -4,7 +4,7 @@ from sklearn.preprocessing import OneHotEncoder
|
||||||
from sklearn.base import BaseEstimator
|
from sklearn.base import BaseEstimator
|
||||||
|
|
||||||
from apt.utils.models import Model
|
from apt.utils.models import Model
|
||||||
from apt.utils.datasets import BaseDataset, DATA_ARRAY_TYPE
|
from apt.utils.datasets import Dataset, DATA_ARRAY_TYPE
|
||||||
|
|
||||||
from art.estimators.classification.scikitlearn import SklearnClassifier as ArtSklearnClassifier
|
from art.estimators.classification.scikitlearn import SklearnClassifier as ArtSklearnClassifier
|
||||||
from art.estimators.regression.scikitlearn import ScikitlearnRegressor
|
from art.estimators.regression.scikitlearn import ScikitlearnRegressor
|
||||||
|
|
@ -14,12 +14,12 @@ class SklearnModel(Model):
|
||||||
"""
|
"""
|
||||||
Wrapper class for scikitlearn models.
|
Wrapper class for scikitlearn models.
|
||||||
"""
|
"""
|
||||||
def score(self, test_data: BaseDataset, **kwargs):
|
def score(self, test_data: Dataset, **kwargs):
|
||||||
"""
|
"""
|
||||||
Score the model using test data.
|
Score the model using test data.
|
||||||
|
|
||||||
:param test_data: Test data.
|
:param test_data: Test data.
|
||||||
:type train_data: `BaseDataset`
|
:type train_data: `Dataset`
|
||||||
"""
|
"""
|
||||||
return self.model.score(test_data.get_samples(), test_data.get_labels(), **kwargs)
|
return self.model.score(test_data.get_samples(), test_data.get_labels(), **kwargs)
|
||||||
|
|
||||||
|
|
@ -37,12 +37,12 @@ class SklearnClassifier(SklearnModel):
|
||||||
super().__init__(model, **kwargs)
|
super().__init__(model, **kwargs)
|
||||||
self._art_model = ArtSklearnClassifier(model)
|
self._art_model = ArtSklearnClassifier(model)
|
||||||
|
|
||||||
def fit(self, train_data: BaseDataset, **kwargs) -> None:
|
def fit(self, train_data: Dataset, **kwargs) -> None:
|
||||||
"""
|
"""
|
||||||
Fit the model using the training data.
|
Fit the model using the training data.
|
||||||
|
|
||||||
:param train_data: Training data.
|
:param train_data: Training data.
|
||||||
:type train_data: `BaseDataset`
|
:type train_data: `Dataset`
|
||||||
"""
|
"""
|
||||||
encoder = OneHotEncoder(sparse=False)
|
encoder = OneHotEncoder(sparse=False)
|
||||||
y_encoded = encoder.fit_transform(train_data.get_labels().reshape(-1, 1))
|
y_encoded = encoder.fit_transform(train_data.get_labels().reshape(-1, 1))
|
||||||
|
|
@ -72,12 +72,12 @@ class SklearnRegressor(SklearnModel):
|
||||||
super().__init__(model, **kwargs)
|
super().__init__(model, **kwargs)
|
||||||
self._art_model = ScikitlearnRegressor(model)
|
self._art_model = ScikitlearnRegressor(model)
|
||||||
|
|
||||||
def fit(self, train_data: BaseDataset, **kwargs) -> None:
|
def fit(self, train_data: Dataset, **kwargs) -> None:
|
||||||
"""
|
"""
|
||||||
Fit the model using the training data.
|
Fit the model using the training data.
|
||||||
|
|
||||||
:param train_data: Training data.
|
:param train_data: Training data.
|
||||||
:type train_data: `BaseDataset`
|
:type train_data: `Dataset`
|
||||||
"""
|
"""
|
||||||
self._art_model.fit(train_data.get_samples(), train_data.get_labels(), **kwargs)
|
self._art_model.fit(train_data.get_samples(), train_data.get_labels(), **kwargs)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,44 +1,36 @@
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
from apt.utils.models import SklearnClassifier, SklearnRegressor
|
from apt.utils.models import SklearnClassifier, SklearnRegressor
|
||||||
|
from apt.utils.datasets import ArrayDataset
|
||||||
from apt.utils import dataset_utils
|
from apt.utils import dataset_utils
|
||||||
|
|
||||||
from sklearn.tree import DecisionTreeRegressor
|
from sklearn.tree import DecisionTreeRegressor
|
||||||
from sklearn.ensemble import RandomForestClassifier
|
from sklearn.ensemble import RandomForestClassifier
|
||||||
|
|
||||||
|
|
||||||
def test_sklearn_classifier():
|
def test_sklearn_classifier():
|
||||||
(x_train, y_train), (x_test, y_test) = dataset_utils.get_iris_dataset()
|
(x_train, y_train), (x_test, y_test) = dataset_utils.get_iris_dataset()
|
||||||
underlying_model = RandomForestClassifier()
|
underlying_model = RandomForestClassifier()
|
||||||
model = SklearnClassifier(underlying_model)
|
model = SklearnClassifier(underlying_model)
|
||||||
model.fit(x_train, y_train)
|
train = ArrayDataset(x_train, y_train)
|
||||||
|
test = ArrayDataset(x_test, y_test)
|
||||||
|
model.fit(train)
|
||||||
pred = model.predict(x_test)
|
pred = model.predict(x_test)
|
||||||
assert(pred.shape[0] == x_test.shape[0])
|
assert(pred.shape[0] == x_test.shape[0])
|
||||||
|
|
||||||
score = model.score(x_test, y_test)
|
score = model.score(test)
|
||||||
assert(0.0 <= score <= 1.0)
|
assert(0.0 <= score <= 1.0)
|
||||||
|
|
||||||
|
|
||||||
def test_sklearn_regressor():
|
def test_sklearn_regressor():
|
||||||
(x_train, y_train), (x_test, y_test) = dataset_utils.get_diabetes_dataset()
|
(x_train, y_train), (x_test, y_test) = dataset_utils.get_diabetes_dataset()
|
||||||
underlying_model = DecisionTreeRegressor()
|
underlying_model = DecisionTreeRegressor()
|
||||||
model = SklearnRegressor(underlying_model)
|
model = SklearnRegressor(underlying_model)
|
||||||
model.fit(x_train, y_train)
|
train = ArrayDataset(x_train, y_train)
|
||||||
|
test = ArrayDataset(x_test, y_test)
|
||||||
|
model.fit(train)
|
||||||
pred = model.predict(x_test)
|
pred = model.predict(x_test)
|
||||||
assert (pred.shape[0] == x_test.shape[0])
|
assert (pred.shape[0] == x_test.shape[0])
|
||||||
|
|
||||||
score = model.score(x_test, y_test)
|
score = model.score(test)
|
||||||
|
assert (0 <= score <= 1)
|
||||||
losses = model.loss(x_test, y_test)
|
|
||||||
assert (losses.shape[0] == x_test.shape[0])
|
|
||||||
|
|
||||||
|
|
||||||
# Probably not needed for now, as we will not be using these wrappers directly in ART.
|
|
||||||
# def test_sklearn_decision_tree():
|
|
||||||
# (x_train, y_train), (x_test, y_test) = dataset_utils.get_iris_dataset()
|
|
||||||
# underlying_model = DecisionTreeClassifier()
|
|
||||||
# model = SklearnDecisionTreeClassifier(underlying_model)
|
|
||||||
# model.fit(x_train, y_train)
|
|
||||||
# pred = model.predict(x_test)
|
|
||||||
# assert(pred.shape[0] == x_test.shape[0])
|
|
||||||
#
|
|
||||||
# score = model.score(x_test, y_test)
|
|
||||||
# assert(0.0 <= score <= 1.0)
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue