mirror of
https://github.com/IBM/ai-privacy-toolkit.git
synced 2026-04-24 20:36:21 +02:00
Renaming
This commit is contained in:
parent
3d82db80c4
commit
f2df2fcc8c
6 changed files with 35 additions and 43 deletions
|
|
@ -3,4 +3,4 @@ The AI Privacy Toolbox (datasets).
|
|||
Implementation of datasets utility components for datasets creation, load, and store
|
||||
"""
|
||||
|
||||
from apt.utils.datasets.datasets import DatasetABC, StoredDatasetABC, DatasetFactory, Data, BaseDataset, DATA_ARRAY_TYPE
|
||||
from apt.utils.datasets.datasets import Dataset, StoredDataset, DatasetFactory, Data, ArrayDataset, DATA_ARRAY_TYPE
|
||||
|
|
|
|||
|
|
@ -20,7 +20,7 @@ logger = logging.getLogger(__name__)
|
|||
DATA_ARRAY_TYPE = Union[np.ndarray, pd.DataFrame]
|
||||
|
||||
|
||||
class DatasetABC(metaclass=ABCMeta):
|
||||
class Dataset(metaclass=ABCMeta):
|
||||
"""Base Abstract Class for Dataset"""
|
||||
|
||||
@abstractmethod
|
||||
|
|
@ -38,7 +38,7 @@ class DatasetABC(metaclass=ABCMeta):
|
|||
pass
|
||||
|
||||
|
||||
class StoredDatasetABC(DatasetABC):
|
||||
class StoredDataset(Dataset):
|
||||
"""Abstract Class for Storable Dataset"""
|
||||
|
||||
@abstractmethod
|
||||
|
|
@ -73,7 +73,7 @@ class StoredDatasetABC(DatasetABC):
|
|||
logger.info('Dataset Downloaded')
|
||||
|
||||
if unzip:
|
||||
StoredDatasetABC.extract_archive(zip_path=file_path, dest_path=dest_path, remove_archive=False)
|
||||
StoredDataset.extract_archive(zip_path=file_path, dest_path=dest_path, remove_archive=False)
|
||||
|
||||
|
||||
@staticmethod
|
||||
|
|
@ -123,12 +123,12 @@ class StoredDatasetABC(DatasetABC):
|
|||
np.savetxt(dest_datafile, debug_data, delimiter=delimiter, fmt=fmt)
|
||||
|
||||
|
||||
class BaseDataset(DatasetABC):
|
||||
"""Base Class for Dataset"""
|
||||
class ArrayDataset(Dataset):
|
||||
"""Dataset that is based on x and y arrays (e.g., numpy/pandas)"""
|
||||
|
||||
def __init__(self, x: DATA_ARRAY_TYPE, y: DATA_ARRAY_TYPE, **kwargs):
|
||||
"""
|
||||
BaseDataset constructor.
|
||||
ArrayDataset constructor.
|
||||
:param x: collection of data samples
|
||||
:param y: collection of labels
|
||||
:param kwargs: dataset parameters
|
||||
|
|
@ -159,7 +159,7 @@ class DatasetFactory:
|
|||
:param name: dataset name
|
||||
:return:
|
||||
"""
|
||||
def inner_wrapper(wrapped_class: DatasetABC) -> Any:
|
||||
def inner_wrapper(wrapped_class: Dataset) -> Any:
|
||||
if name in cls.registry:
|
||||
logger.warning('Dataset %s already exists. Will replace it', name)
|
||||
cls.registry[name] = wrapped_class
|
||||
|
|
@ -168,7 +168,7 @@ class DatasetFactory:
|
|||
return inner_wrapper
|
||||
|
||||
@classmethod
|
||||
def create_dataset(cls, name: str, **kwargs) -> DatasetABC:
|
||||
def create_dataset(cls, name: str, **kwargs) -> Dataset:
|
||||
"""
|
||||
Factory command to create dataset instance.
|
||||
This method gets the appropriate Dataset class from the registry
|
||||
|
|
@ -190,7 +190,7 @@ class DatasetFactory:
|
|||
|
||||
class Data:
|
||||
|
||||
def __init__(self, train: DatasetABC = None, test: DatasetABC = None, **kwargs):
|
||||
def __init__(self, train: Dataset = None, test: Dataset = None, **kwargs):
|
||||
"""
|
||||
Data class constructor.
|
||||
The class stores train and test datasets.
|
||||
|
|
@ -205,11 +205,11 @@ class Data:
|
|||
self.train = DatasetFactory.create_dataset(train=True, **kwargs)
|
||||
self.test = DatasetFactory.create_dataset(train=False, **kwargs)
|
||||
|
||||
def get_train_set(self) -> DatasetABC:
|
||||
def get_train_set(self) -> Dataset:
|
||||
"""Return train DatasetBase"""
|
||||
return self.train
|
||||
|
||||
def get_test_set(self) -> DatasetABC:
|
||||
def get_test_set(self) -> Dataset:
|
||||
"""Return test DatasetBase"""
|
||||
return self.test
|
||||
|
||||
|
|
|
|||
|
|
@ -1,2 +1,2 @@
|
|||
from apt.utils.models.model import Model, ModelWithLoss, SingleOutputModel, MultipleOutputModel
|
||||
from apt.utils.models.model import Model
|
||||
from apt.utils.models.sklearn_model import SklearnModel, SklearnClassifier, SklearnRegressor
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
from abc import ABCMeta, abstractmethod
|
||||
from typing import Any
|
||||
|
||||
from apt.utils.datasets import BaseDataset, DATA_ARRAY_TYPE
|
||||
from apt.utils.datasets import Dataset, DATA_ARRAY_TYPE
|
||||
|
||||
|
||||
class Model(metaclass=ABCMeta):
|
||||
|
|
@ -18,12 +18,12 @@ class Model(metaclass=ABCMeta):
|
|||
self._model = model
|
||||
|
||||
@abstractmethod
|
||||
def fit(self, train_data: BaseDataset, **kwargs) -> None:
|
||||
def fit(self, train_data: Dataset, **kwargs) -> None:
|
||||
"""
|
||||
Fit the model using the training data.
|
||||
|
||||
:param train_data: Training data.
|
||||
:type train_data: `BaseDataset`
|
||||
:type train_data: `Dataset`
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
|
|
|
|||
|
|
@ -4,7 +4,7 @@ from sklearn.preprocessing import OneHotEncoder
|
|||
from sklearn.base import BaseEstimator
|
||||
|
||||
from apt.utils.models import Model
|
||||
from apt.utils.datasets import BaseDataset, DATA_ARRAY_TYPE
|
||||
from apt.utils.datasets import Dataset, DATA_ARRAY_TYPE
|
||||
|
||||
from art.estimators.classification.scikitlearn import SklearnClassifier as ArtSklearnClassifier
|
||||
from art.estimators.regression.scikitlearn import ScikitlearnRegressor
|
||||
|
|
@ -14,12 +14,12 @@ class SklearnModel(Model):
|
|||
"""
|
||||
Wrapper class for scikitlearn models.
|
||||
"""
|
||||
def score(self, test_data: BaseDataset, **kwargs):
|
||||
def score(self, test_data: Dataset, **kwargs):
|
||||
"""
|
||||
Score the model using test data.
|
||||
|
||||
:param test_data: Test data.
|
||||
:type train_data: `BaseDataset`
|
||||
:type train_data: `Dataset`
|
||||
"""
|
||||
return self.model.score(test_data.get_samples(), test_data.get_labels(), **kwargs)
|
||||
|
||||
|
|
@ -37,12 +37,12 @@ class SklearnClassifier(SklearnModel):
|
|||
super().__init__(model, **kwargs)
|
||||
self._art_model = ArtSklearnClassifier(model)
|
||||
|
||||
def fit(self, train_data: BaseDataset, **kwargs) -> None:
|
||||
def fit(self, train_data: Dataset, **kwargs) -> None:
|
||||
"""
|
||||
Fit the model using the training data.
|
||||
|
||||
:param train_data: Training data.
|
||||
:type train_data: `BaseDataset`
|
||||
:type train_data: `Dataset`
|
||||
"""
|
||||
encoder = OneHotEncoder(sparse=False)
|
||||
y_encoded = encoder.fit_transform(train_data.get_labels().reshape(-1, 1))
|
||||
|
|
@ -72,12 +72,12 @@ class SklearnRegressor(SklearnModel):
|
|||
super().__init__(model, **kwargs)
|
||||
self._art_model = ScikitlearnRegressor(model)
|
||||
|
||||
def fit(self, train_data: BaseDataset, **kwargs) -> None:
|
||||
def fit(self, train_data: Dataset, **kwargs) -> None:
|
||||
"""
|
||||
Fit the model using the training data.
|
||||
|
||||
:param train_data: Training data.
|
||||
:type train_data: `BaseDataset`
|
||||
:type train_data: `Dataset`
|
||||
"""
|
||||
self._art_model.fit(train_data.get_samples(), train_data.get_labels(), **kwargs)
|
||||
|
||||
|
|
|
|||
|
|
@ -1,44 +1,36 @@
|
|||
import pytest
|
||||
|
||||
from apt.utils.models import SklearnClassifier, SklearnRegressor
|
||||
from apt.utils.datasets import ArrayDataset
|
||||
from apt.utils import dataset_utils
|
||||
|
||||
from sklearn.tree import DecisionTreeRegressor
|
||||
from sklearn.ensemble import RandomForestClassifier
|
||||
|
||||
|
||||
def test_sklearn_classifier():
|
||||
(x_train, y_train), (x_test, y_test) = dataset_utils.get_iris_dataset()
|
||||
underlying_model = RandomForestClassifier()
|
||||
model = SklearnClassifier(underlying_model)
|
||||
model.fit(x_train, y_train)
|
||||
train = ArrayDataset(x_train, y_train)
|
||||
test = ArrayDataset(x_test, y_test)
|
||||
model.fit(train)
|
||||
pred = model.predict(x_test)
|
||||
assert(pred.shape[0] == x_test.shape[0])
|
||||
|
||||
score = model.score(x_test, y_test)
|
||||
score = model.score(test)
|
||||
assert(0.0 <= score <= 1.0)
|
||||
|
||||
|
||||
def test_sklearn_regressor():
|
||||
(x_train, y_train), (x_test, y_test) = dataset_utils.get_diabetes_dataset()
|
||||
underlying_model = DecisionTreeRegressor()
|
||||
model = SklearnRegressor(underlying_model)
|
||||
model.fit(x_train, y_train)
|
||||
train = ArrayDataset(x_train, y_train)
|
||||
test = ArrayDataset(x_test, y_test)
|
||||
model.fit(train)
|
||||
pred = model.predict(x_test)
|
||||
assert (pred.shape[0] == x_test.shape[0])
|
||||
|
||||
score = model.score(x_test, y_test)
|
||||
|
||||
losses = model.loss(x_test, y_test)
|
||||
assert (losses.shape[0] == x_test.shape[0])
|
||||
|
||||
|
||||
# Probably not needed for now, as we will not be using these wrappers directly in ART.
|
||||
# def test_sklearn_decision_tree():
|
||||
# (x_train, y_train), (x_test, y_test) = dataset_utils.get_iris_dataset()
|
||||
# underlying_model = DecisionTreeClassifier()
|
||||
# model = SklearnDecisionTreeClassifier(underlying_model)
|
||||
# model.fit(x_train, y_train)
|
||||
# pred = model.predict(x_test)
|
||||
# assert(pred.shape[0] == x_test.shape[0])
|
||||
#
|
||||
# score = model.score(x_test, y_test)
|
||||
# assert(0.0 <= score <= 1.0)
|
||||
score = model.score(test)
|
||||
assert (0 <= score <= 1)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue