diff --git a/apt/utils/datasets/__init__.py b/apt/utils/datasets/__init__.py index 610dc46..09f38a4 100644 --- a/apt/utils/datasets/__init__.py +++ b/apt/utils/datasets/__init__.py @@ -3,4 +3,4 @@ The AI Privacy Toolbox (datasets). Implementation of datasets utility components for datasets creation, load, and store """ -from apt.utils.datasets.datasets import DatasetABC, StoredDatasetABC, DatasetFactory, Data, BaseDataset, DATA_ARRAY_TYPE +from apt.utils.datasets.datasets import Dataset, StoredDataset, DatasetFactory, Data, ArrayDataset, DATA_ARRAY_TYPE diff --git a/apt/utils/datasets/datasets.py b/apt/utils/datasets/datasets.py index f6dee7a..a164ba4 100644 --- a/apt/utils/datasets/datasets.py +++ b/apt/utils/datasets/datasets.py @@ -20,7 +20,7 @@ logger = logging.getLogger(__name__) DATA_ARRAY_TYPE = Union[np.ndarray, pd.DataFrame] -class DatasetABC(metaclass=ABCMeta): +class Dataset(metaclass=ABCMeta): """Base Abstract Class for Dataset""" @abstractmethod @@ -38,7 +38,7 @@ class DatasetABC(metaclass=ABCMeta): pass -class StoredDatasetABC(DatasetABC): +class StoredDataset(Dataset): """Abstract Class for Storable Dataset""" @abstractmethod @@ -73,7 +73,7 @@ class StoredDatasetABC(DatasetABC): logger.info('Dataset Downloaded') if unzip: - StoredDatasetABC.extract_archive(zip_path=file_path, dest_path=dest_path, remove_archive=False) + StoredDataset.extract_archive(zip_path=file_path, dest_path=dest_path, remove_archive=False) @staticmethod @@ -123,12 +123,12 @@ class StoredDatasetABC(DatasetABC): np.savetxt(dest_datafile, debug_data, delimiter=delimiter, fmt=fmt) -class BaseDataset(DatasetABC): - """Base Class for Dataset""" +class ArrayDataset(Dataset): + """Dataset that is based on x and y arrays (e.g., numpy/pandas)""" def __init__(self, x: DATA_ARRAY_TYPE, y: DATA_ARRAY_TYPE, **kwargs): """ - BaseDataset constructor. + ArrayDataset constructor. :param x: collection of data samples :param y: collection of labels :param kwargs: dataset parameters @@ -159,7 +159,7 @@ class DatasetFactory: :param name: dataset name :return: """ - def inner_wrapper(wrapped_class: DatasetABC) -> Any: + def inner_wrapper(wrapped_class: Dataset) -> Any: if name in cls.registry: logger.warning('Dataset %s already exists. Will replace it', name) cls.registry[name] = wrapped_class @@ -168,7 +168,7 @@ class DatasetFactory: return inner_wrapper @classmethod - def create_dataset(cls, name: str, **kwargs) -> DatasetABC: + def create_dataset(cls, name: str, **kwargs) -> Dataset: """ Factory command to create dataset instance. This method gets the appropriate Dataset class from the registry @@ -190,7 +190,7 @@ class DatasetFactory: class Data: - def __init__(self, train: DatasetABC = None, test: DatasetABC = None, **kwargs): + def __init__(self, train: Dataset = None, test: Dataset = None, **kwargs): """ Data class constructor. The class stores train and test datasets. @@ -205,11 +205,11 @@ class Data: self.train = DatasetFactory.create_dataset(train=True, **kwargs) self.test = DatasetFactory.create_dataset(train=False, **kwargs) - def get_train_set(self) -> DatasetABC: + def get_train_set(self) -> Dataset: """Return train DatasetBase""" return self.train - def get_test_set(self) -> DatasetABC: + def get_test_set(self) -> Dataset: """Return test DatasetBase""" return self.test diff --git a/apt/utils/models/__init__.py b/apt/utils/models/__init__.py index 9f48d82..dc50790 100644 --- a/apt/utils/models/__init__.py +++ b/apt/utils/models/__init__.py @@ -1,2 +1,2 @@ -from apt.utils.models.model import Model, ModelWithLoss, SingleOutputModel, MultipleOutputModel +from apt.utils.models.model import Model from apt.utils.models.sklearn_model import SklearnModel, SklearnClassifier, SklearnRegressor diff --git a/apt/utils/models/model.py b/apt/utils/models/model.py index d025a28..1ef13ad 100644 --- a/apt/utils/models/model.py +++ b/apt/utils/models/model.py @@ -1,7 +1,7 @@ from abc import ABCMeta, abstractmethod from typing import Any -from apt.utils.datasets import BaseDataset, DATA_ARRAY_TYPE +from apt.utils.datasets import Dataset, DATA_ARRAY_TYPE class Model(metaclass=ABCMeta): @@ -18,12 +18,12 @@ class Model(metaclass=ABCMeta): self._model = model @abstractmethod - def fit(self, train_data: BaseDataset, **kwargs) -> None: + def fit(self, train_data: Dataset, **kwargs) -> None: """ Fit the model using the training data. :param train_data: Training data. - :type train_data: `BaseDataset` + :type train_data: `Dataset` """ raise NotImplementedError diff --git a/apt/utils/models/sklearn_model.py b/apt/utils/models/sklearn_model.py index 92e8ba0..731a554 100644 --- a/apt/utils/models/sklearn_model.py +++ b/apt/utils/models/sklearn_model.py @@ -4,7 +4,7 @@ from sklearn.preprocessing import OneHotEncoder from sklearn.base import BaseEstimator from apt.utils.models import Model -from apt.utils.datasets import BaseDataset, DATA_ARRAY_TYPE +from apt.utils.datasets import Dataset, DATA_ARRAY_TYPE from art.estimators.classification.scikitlearn import SklearnClassifier as ArtSklearnClassifier from art.estimators.regression.scikitlearn import ScikitlearnRegressor @@ -14,12 +14,12 @@ class SklearnModel(Model): """ Wrapper class for scikitlearn models. """ - def score(self, test_data: BaseDataset, **kwargs): + def score(self, test_data: Dataset, **kwargs): """ Score the model using test data. :param test_data: Test data. - :type train_data: `BaseDataset` + :type train_data: `Dataset` """ return self.model.score(test_data.get_samples(), test_data.get_labels(), **kwargs) @@ -37,12 +37,12 @@ class SklearnClassifier(SklearnModel): super().__init__(model, **kwargs) self._art_model = ArtSklearnClassifier(model) - def fit(self, train_data: BaseDataset, **kwargs) -> None: + def fit(self, train_data: Dataset, **kwargs) -> None: """ Fit the model using the training data. :param train_data: Training data. - :type train_data: `BaseDataset` + :type train_data: `Dataset` """ encoder = OneHotEncoder(sparse=False) y_encoded = encoder.fit_transform(train_data.get_labels().reshape(-1, 1)) @@ -72,12 +72,12 @@ class SklearnRegressor(SklearnModel): super().__init__(model, **kwargs) self._art_model = ScikitlearnRegressor(model) - def fit(self, train_data: BaseDataset, **kwargs) -> None: + def fit(self, train_data: Dataset, **kwargs) -> None: """ Fit the model using the training data. :param train_data: Training data. - :type train_data: `BaseDataset` + :type train_data: `Dataset` """ self._art_model.fit(train_data.get_samples(), train_data.get_labels(), **kwargs) diff --git a/tests/test_model.py b/tests/test_model.py index d1dc6eb..7dd151c 100644 --- a/tests/test_model.py +++ b/tests/test_model.py @@ -1,44 +1,36 @@ import pytest from apt.utils.models import SklearnClassifier, SklearnRegressor +from apt.utils.datasets import ArrayDataset from apt.utils import dataset_utils from sklearn.tree import DecisionTreeRegressor from sklearn.ensemble import RandomForestClassifier + def test_sklearn_classifier(): (x_train, y_train), (x_test, y_test) = dataset_utils.get_iris_dataset() underlying_model = RandomForestClassifier() model = SklearnClassifier(underlying_model) - model.fit(x_train, y_train) + train = ArrayDataset(x_train, y_train) + test = ArrayDataset(x_test, y_test) + model.fit(train) pred = model.predict(x_test) assert(pred.shape[0] == x_test.shape[0]) - score = model.score(x_test, y_test) + score = model.score(test) assert(0.0 <= score <= 1.0) + def test_sklearn_regressor(): (x_train, y_train), (x_test, y_test) = dataset_utils.get_diabetes_dataset() underlying_model = DecisionTreeRegressor() model = SklearnRegressor(underlying_model) - model.fit(x_train, y_train) + train = ArrayDataset(x_train, y_train) + test = ArrayDataset(x_test, y_test) + model.fit(train) pred = model.predict(x_test) assert (pred.shape[0] == x_test.shape[0]) - score = model.score(x_test, y_test) - - losses = model.loss(x_test, y_test) - assert (losses.shape[0] == x_test.shape[0]) - - -# Probably not needed for now, as we will not be using these wrappers directly in ART. -# def test_sklearn_decision_tree(): -# (x_train, y_train), (x_test, y_test) = dataset_utils.get_iris_dataset() -# underlying_model = DecisionTreeClassifier() -# model = SklearnDecisionTreeClassifier(underlying_model) -# model.fit(x_train, y_train) -# pred = model.predict(x_test) -# assert(pred.shape[0] == x_test.shape[0]) -# -# score = model.score(x_test, y_test) -# assert(0.0 <= score <= 1.0) + score = model.score(test) + assert (0 <= score <= 1)