mirror of
https://github.com/IBM/ai-privacy-toolkit.git
synced 2026-06-08 15:05:13 +02:00
No default encoder, if none provided data is supplied to the model as is. Fix data type of representative values. Fix and add more tests.
Signed-off-by: abigailt <abigailt@il.ibm.com>
This commit is contained in:
parent
ef406cea62
commit
30cb705062
2 changed files with 134 additions and 84 deletions
|
|
@ -57,7 +57,7 @@ class GeneralizeToRepresentative(BaseEstimator, MetaEstimatorMixin, TransformerM
|
|||
:param categorical_features: The list of categorical features (if supplied, these featurtes will be one-hot
|
||||
encoded before using them to train the decision tree model).
|
||||
:param encoder: Optional encoder for encoding data before feeding it into the estimator (e.g., for categorical
|
||||
features)
|
||||
features). If not provided, the data will be fed as is directly to the estimator.
|
||||
:type encoder: sklearn OrdinalEncoder or OneHotEncoder
|
||||
:type categorical_features: list of strings, optional
|
||||
:param features_to_minimize: The features to be minimized.
|
||||
|
|
@ -256,7 +256,6 @@ class GeneralizeToRepresentative(BaseEstimator, MetaEstimatorMixin, TransformerM
|
|||
# Going to fit
|
||||
# (currently not dealing with option to fit with only X and y and no estimator)
|
||||
if self.estimator and dataset and dataset.get_samples() is not None and dataset.get_labels() is not None:
|
||||
dtype = dataset.get_samples().dtype
|
||||
x = pd.DataFrame(dataset.get_samples(), columns=self._features)
|
||||
if not self.features_to_minimize:
|
||||
self.features_to_minimize = self._features
|
||||
|
|
@ -293,21 +292,6 @@ class GeneralizeToRepresentative(BaseEstimator, MetaEstimatorMixin, TransformerM
|
|||
# collect feature data (such as min, max)
|
||||
self._feature_data = self._get_feature_data(x)
|
||||
|
||||
# default encoder in case none provided
|
||||
if self.encoder is None:
|
||||
numeric_features = [f for f in self._features if f not in self.categorical_features]
|
||||
numeric_transformer = Pipeline(
|
||||
steps=[('imputer', SimpleImputer(strategy='constant', fill_value=0))]
|
||||
)
|
||||
categorical_transformer = OneHotEncoder(handle_unknown="ignore", sparse=False)
|
||||
self.encoder = ColumnTransformer(
|
||||
transformers=[
|
||||
("num", numeric_transformer, numeric_features),
|
||||
("cat", categorical_transformer, self.categorical_features),
|
||||
]
|
||||
)
|
||||
self.encoder.fit(x)
|
||||
|
||||
self.cells = []
|
||||
self._categorical_values = {}
|
||||
|
||||
|
|
@ -341,7 +325,10 @@ class GeneralizeToRepresentative(BaseEstimator, MetaEstimatorMixin, TransformerM
|
|||
generalized = self._generalize_from_generalizations(x_test, self.generalizations)
|
||||
|
||||
# check accuracy
|
||||
accuracy = self.estimator.score(ArrayDataset(self.encoder.transform(generalized).astype(dtype), y_test))
|
||||
if self.encoder:
|
||||
accuracy = self.estimator.score(ArrayDataset(self.encoder.transform(generalized), y_test))
|
||||
else:
|
||||
accuracy = self.estimator.score(ArrayDataset(generalized, y_test))
|
||||
print('Initial accuracy of model on generalized data, relative to original model predictions '
|
||||
'(base generalization derived from tree, before improvements): %f' % accuracy)
|
||||
|
||||
|
|
@ -371,8 +358,10 @@ class GeneralizeToRepresentative(BaseEstimator, MetaEstimatorMixin, TransformerM
|
|||
else:
|
||||
generalized = self._generalize_from_generalizations(x_test, self.generalizations)
|
||||
|
||||
accuracy = self.estimator.score(ArrayDataset(self.encoder.transform(generalized).astype(dtype),
|
||||
y_test))
|
||||
if self.encoder:
|
||||
accuracy = self.estimator.score(ArrayDataset(self.encoder.transform(generalized), y_test))
|
||||
else:
|
||||
accuracy = self.estimator.score(ArrayDataset(generalized, y_test))
|
||||
# if accuracy passed threshold roll back to previous iteration generalizations
|
||||
if accuracy < self.target_accuracy:
|
||||
self.cells = cells_previous_iter
|
||||
|
|
@ -401,8 +390,11 @@ class GeneralizeToRepresentative(BaseEstimator, MetaEstimatorMixin, TransformerM
|
|||
self._cells_by_id)
|
||||
else:
|
||||
generalized = self._generalize_from_generalizations(x_test, self.generalizations)
|
||||
accuracy = self.estimator.score(ArrayDataset(self.encoder.transform(generalized).astype(dtype),
|
||||
y_test))
|
||||
|
||||
if self.encoder:
|
||||
accuracy = self.estimator.score(ArrayDataset(self.encoder.transform(generalized), y_test))
|
||||
else:
|
||||
accuracy = self.estimator.score(ArrayDataset(generalized, y_test))
|
||||
print('Removed feature: %s, new relative accuracy: %f' % (removed_feature, accuracy))
|
||||
|
||||
# self._cells currently holds the chosen generalization based on target accuracy
|
||||
|
|
@ -893,7 +885,11 @@ class GeneralizeToRepresentative(BaseEstimator, MetaEstimatorMixin, TransformerM
|
|||
|
||||
def _generalize_indexes(self, original_data, cells, all_indexes):
|
||||
# prepared data include one hot encoded categorical data + QI
|
||||
representatives = pd.DataFrame(columns=self._features) # empty except for columns
|
||||
dtypes = original_data.dtypes.to_dict()
|
||||
new_dtypes = {}
|
||||
for t in dtypes.keys():
|
||||
new_dtypes[t] = pd.Series(dtype=dtypes[t].name)
|
||||
representatives = pd.DataFrame(new_dtypes) # empty except for columns
|
||||
original_data_generalized = pd.DataFrame(original_data, columns=self._features, copy=True)
|
||||
|
||||
# iterate over cells (leaves in decision tree)
|
||||
|
|
@ -1000,8 +996,11 @@ class GeneralizeToRepresentative(BaseEstimator, MetaEstimatorMixin, TransformerM
|
|||
GeneralizeToRepresentative._remove_feature_from_cells(new_cells, cells_by_id, feature)
|
||||
generalized = self._generalize_from_tree(original_data, prepared_data, nodes, new_cells,
|
||||
cells_by_id)
|
||||
accuracy_gain = self.estimator.score(ArrayDataset(self.encoder.transform(generalized),
|
||||
labels)) - current_accuracy
|
||||
if self.encoder:
|
||||
accuracy_gain = self.estimator.score(ArrayDataset(self.encoder.transform(generalized),
|
||||
labels)) - current_accuracy
|
||||
else:
|
||||
accuracy_gain = self.estimator.score(ArrayDataset(generalized, labels)) - current_accuracy
|
||||
if accuracy_gain < 0:
|
||||
accuracy_gain = 0
|
||||
if accuracy_gain != 0:
|
||||
|
|
@ -1027,8 +1026,11 @@ class GeneralizeToRepresentative(BaseEstimator, MetaEstimatorMixin, TransformerM
|
|||
GeneralizeToRepresentative._remove_feature_from_cells(new_cells, cells_by_id, feature)
|
||||
generalized = self._generalize_from_tree(original_data, prepared_data, nodes, new_cells,
|
||||
cells_by_id)
|
||||
accuracy_gain = self.estimator.score(ArrayDataset(self.encoder.transform(generalized),
|
||||
labels)) - current_accuracy
|
||||
if self.encoder:
|
||||
accuracy_gain = self.estimator.score(ArrayDataset(self.encoder.transform(generalized),
|
||||
labels)) - current_accuracy
|
||||
else:
|
||||
accuracy_gain = self.estimator.score(ArrayDataset(generalized, labels)) - current_accuracy
|
||||
|
||||
if accuracy_gain < 0:
|
||||
accuracy_gain = 0
|
||||
|
|
|
|||
|
|
@ -11,6 +11,8 @@ from sklearn.model_selection import train_test_split
|
|||
from sklearn.pipeline import Pipeline
|
||||
from sklearn.preprocessing import OneHotEncoder
|
||||
|
||||
from torch import nn, optim
|
||||
|
||||
import tensorflow as tf
|
||||
from tensorflow.keras.models import Sequential
|
||||
from tensorflow.keras.layers import Dense, Input
|
||||
|
|
@ -24,6 +26,9 @@ from apt.utils.models import SklearnClassifier, ModelOutputType, SklearnRegresso
|
|||
tf.compat.v1.disable_eager_execution()
|
||||
|
||||
|
||||
ACCURACY_DIFF = 0.05
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def diabetes_dataset():
|
||||
return load_diabetes()
|
||||
|
|
@ -286,7 +291,7 @@ def test_minimizer_fit(data_two_features):
|
|||
check_ncp(ncp, expected_generalizations)
|
||||
|
||||
rel_accuracy = model.score(ArrayDataset(transformed, predictions))
|
||||
assert ((rel_accuracy >= target_accuracy) or (target_accuracy - rel_accuracy) <= 0.05)
|
||||
assert ((rel_accuracy >= target_accuracy) or (target_accuracy - rel_accuracy) <= ACCURACY_DIFF)
|
||||
|
||||
|
||||
def test_minimizer_ncp(data_two_features):
|
||||
|
|
@ -348,12 +353,15 @@ def test_minimizer_ncp_categorical(data_four_features):
|
|||
train_dataset = ArrayDataset(x, predictions, features_names=features)
|
||||
|
||||
gen1 = GeneralizeToRepresentative(model, target_accuracy=target_accuracy,
|
||||
categorical_features=categorical_features, generalize_using_transform=False)
|
||||
categorical_features=categorical_features,
|
||||
generalize_using_transform=False,
|
||||
encoder=preprocessor)
|
||||
gen1.fit(dataset=train_dataset)
|
||||
ncp1 = gen1.ncp.fit_score
|
||||
ncp2 = gen1.calculate_ncp(ad1)
|
||||
|
||||
gen2 = GeneralizeToRepresentative(model, target_accuracy=target_accuracy, categorical_features=categorical_features)
|
||||
gen2 = GeneralizeToRepresentative(model, target_accuracy=target_accuracy, categorical_features=categorical_features,
|
||||
encoder=preprocessor)
|
||||
gen2.fit(dataset=train_dataset)
|
||||
ncp3 = gen2.ncp.fit_score
|
||||
gen2.transform(dataset=ad1)
|
||||
|
|
@ -414,7 +422,8 @@ def test_minimizer_fit_pandas(data_four_features):
|
|||
# Now we have a full prediction pipeline.
|
||||
target_accuracy = 0.5
|
||||
gen = GeneralizeToRepresentative(model, target_accuracy=target_accuracy,
|
||||
categorical_features=categorical_features)
|
||||
categorical_features=categorical_features,
|
||||
encoder=preprocessor)
|
||||
train_dataset = ArrayDataset(x, predictions)
|
||||
gen.fit(dataset=train_dataset)
|
||||
transformed = gen.transform(dataset=ArrayDataset(x))
|
||||
|
|
@ -428,7 +437,7 @@ def test_minimizer_fit_pandas(data_four_features):
|
|||
check_ncp(ncp, expected_generalizations)
|
||||
|
||||
rel_accuracy = model.score(ArrayDataset(preprocessor.transform(transformed), predictions))
|
||||
assert ((rel_accuracy >= target_accuracy) or (target_accuracy - rel_accuracy) <= 0.05)
|
||||
assert ((rel_accuracy >= target_accuracy) or (target_accuracy - rel_accuracy) <= ACCURACY_DIFF)
|
||||
|
||||
|
||||
def test_minimizer_params_categorical(cells_categorical):
|
||||
|
|
@ -450,13 +459,14 @@ def test_minimizer_params_categorical(cells_categorical):
|
|||
# Now we have a full prediction pipeline.
|
||||
target_accuracy = 0.5
|
||||
gen = GeneralizeToRepresentative(model, target_accuracy=target_accuracy,
|
||||
categorical_features=categorical_features, cells=cells)
|
||||
categorical_features=categorical_features, cells=cells,
|
||||
encoder=preprocessor)
|
||||
train_dataset = ArrayDataset(x, predictions)
|
||||
gen.fit(dataset=train_dataset)
|
||||
transformed = gen.transform(dataset=ArrayDataset(x))
|
||||
|
||||
rel_accuracy = model.score(ArrayDataset(preprocessor.transform(transformed), predictions))
|
||||
assert ((rel_accuracy >= target_accuracy) or (target_accuracy - rel_accuracy) <= 0.05)
|
||||
assert ((rel_accuracy >= target_accuracy) or (target_accuracy - rel_accuracy) <= ACCURACY_DIFF)
|
||||
|
||||
|
||||
def test_minimizer_fit_qi(data_three_features):
|
||||
|
|
@ -484,7 +494,7 @@ def test_minimizer_fit_qi(data_three_features):
|
|||
check_ncp(ncp, expected_generalizations)
|
||||
|
||||
rel_accuracy = model.score(ArrayDataset(transformed, predictions))
|
||||
assert ((rel_accuracy >= target_accuracy) or (target_accuracy - rel_accuracy) <= 0.05)
|
||||
assert ((rel_accuracy >= target_accuracy) or (target_accuracy - rel_accuracy) <= ACCURACY_DIFF)
|
||||
|
||||
|
||||
def test_minimizer_fit_pandas_qi(data_five_features):
|
||||
|
|
@ -508,7 +518,8 @@ def test_minimizer_fit_pandas_qi(data_five_features):
|
|||
# Now we have a full prediction pipeline.
|
||||
target_accuracy = 0.5
|
||||
gen = GeneralizeToRepresentative(model, target_accuracy=target_accuracy,
|
||||
categorical_features=categorical_features, features_to_minimize=qi)
|
||||
categorical_features=categorical_features, features_to_minimize=qi,
|
||||
encoder=preprocessor)
|
||||
train_dataset = ArrayDataset(x, predictions)
|
||||
gen.fit(dataset=train_dataset)
|
||||
transformed = gen.transform(dataset=ArrayDataset(x))
|
||||
|
|
@ -523,7 +534,7 @@ def test_minimizer_fit_pandas_qi(data_five_features):
|
|||
check_ncp(ncp, expected_generalizations)
|
||||
|
||||
rel_accuracy = model.score(ArrayDataset(preprocessor.transform(transformed), predictions))
|
||||
assert ((rel_accuracy >= target_accuracy) or (target_accuracy - rel_accuracy) <= 0.05)
|
||||
assert ((rel_accuracy >= target_accuracy) or (target_accuracy - rel_accuracy) <= ACCURACY_DIFF)
|
||||
|
||||
|
||||
def test_minimize_ndarray_iris():
|
||||
|
|
@ -552,7 +563,7 @@ def test_minimize_ndarray_iris():
|
|||
check_ncp(ncp, expected_generalizations)
|
||||
|
||||
rel_accuracy = model.score(ArrayDataset(transformed, predictions))
|
||||
assert ((rel_accuracy >= target_accuracy) or (target_accuracy - rel_accuracy) <= 0.05)
|
||||
assert ((rel_accuracy >= target_accuracy) or (target_accuracy - rel_accuracy) <= ACCURACY_DIFF)
|
||||
|
||||
|
||||
def test_minimize_pandas_adult():
|
||||
|
|
@ -582,7 +593,8 @@ def test_minimize_pandas_adult():
|
|||
predictions = np.argmax(predictions, axis=1)
|
||||
target_accuracy = 0.7
|
||||
gen = GeneralizeToRepresentative(model, target_accuracy=target_accuracy,
|
||||
categorical_features=categorical_features, features_to_minimize=qi)
|
||||
categorical_features=categorical_features, features_to_minimize=qi,
|
||||
encoder=preprocessor)
|
||||
gen.fit(dataset=ArrayDataset(x_train, predictions, features_names=features))
|
||||
transformed = gen.transform(dataset=ArrayDataset(x_train))
|
||||
gener = gen.generalizations
|
||||
|
|
@ -609,7 +621,7 @@ def test_minimize_pandas_adult():
|
|||
check_ncp(ncp, expected_generalizations)
|
||||
|
||||
rel_accuracy = model.score(ArrayDataset(preprocessor.transform(transformed), predictions))
|
||||
assert ((rel_accuracy >= target_accuracy) or (target_accuracy - rel_accuracy) <= 0.05)
|
||||
assert ((rel_accuracy >= target_accuracy) or (target_accuracy - rel_accuracy) <= ACCURACY_DIFF)
|
||||
|
||||
|
||||
def test_german_credit_pandas():
|
||||
|
|
@ -637,7 +649,8 @@ def test_german_credit_pandas():
|
|||
predictions = np.argmax(predictions, axis=1)
|
||||
target_accuracy = 0.7
|
||||
gen = GeneralizeToRepresentative(model, target_accuracy=target_accuracy,
|
||||
categorical_features=categorical_features, features_to_minimize=qi)
|
||||
categorical_features=categorical_features, features_to_minimize=qi,
|
||||
encoder=preprocessor)
|
||||
gen.fit(dataset=ArrayDataset(x_train, predictions))
|
||||
transformed = gen.transform(dataset=ArrayDataset(x_train))
|
||||
gener = gen.generalizations
|
||||
|
|
@ -666,7 +679,7 @@ def test_german_credit_pandas():
|
|||
check_ncp(ncp, expected_generalizations)
|
||||
|
||||
rel_accuracy = model.score(ArrayDataset(preprocessor.transform(transformed), predictions))
|
||||
assert ((rel_accuracy >= target_accuracy) or (target_accuracy - rel_accuracy) <= 0.05)
|
||||
assert ((rel_accuracy >= target_accuracy) or (target_accuracy - rel_accuracy) <= ACCURACY_DIFF)
|
||||
|
||||
|
||||
def test_regression(diabetes_dataset):
|
||||
|
|
@ -726,7 +739,7 @@ def test_regression(diabetes_dataset):
|
|||
check_ncp(ncp, expected_generalizations)
|
||||
|
||||
rel_accuracy = model.score(ArrayDataset(transformed, predictions))
|
||||
assert ((rel_accuracy >= target_accuracy) or (target_accuracy - rel_accuracy) <= 0.05)
|
||||
assert ((rel_accuracy >= target_accuracy) or (target_accuracy - rel_accuracy) <= ACCURACY_DIFF)
|
||||
|
||||
|
||||
def test_x_y():
|
||||
|
|
@ -766,7 +779,7 @@ def test_x_y():
|
|||
check_ncp(ncp, expected_generalizations)
|
||||
|
||||
rel_accuracy = model.score(ArrayDataset(transformed, predictions))
|
||||
assert ((rel_accuracy >= target_accuracy) or (target_accuracy - rel_accuracy) <= 0.05)
|
||||
assert ((rel_accuracy >= target_accuracy) or (target_accuracy - rel_accuracy) <= ACCURACY_DIFF)
|
||||
|
||||
|
||||
def test_x_y_features_names():
|
||||
|
|
@ -806,7 +819,7 @@ def test_x_y_features_names():
|
|||
check_ncp(ncp, expected_generalizations)
|
||||
|
||||
rel_accuracy = model.score(ArrayDataset(transformed, predictions))
|
||||
assert ((rel_accuracy >= target_accuracy) or (target_accuracy - rel_accuracy) <= 0.05)
|
||||
assert ((rel_accuracy >= target_accuracy) or (target_accuracy - rel_accuracy) <= ACCURACY_DIFF)
|
||||
|
||||
|
||||
def test_BaseEstimator_classification(data_five_features):
|
||||
|
|
@ -828,7 +841,8 @@ def test_BaseEstimator_classification(data_five_features):
|
|||
# Now we have a full prediction pipeline.
|
||||
target_accuracy = 0.5
|
||||
gen = GeneralizeToRepresentative(model, target_accuracy=target_accuracy,
|
||||
categorical_features=categorical_features, features_to_minimize=QI)
|
||||
categorical_features=categorical_features, features_to_minimize=QI,
|
||||
encoder=preprocessor)
|
||||
train_dataset = ArrayDataset(x, predictions)
|
||||
gen.fit(dataset=train_dataset)
|
||||
transformed = gen.transform(dataset=ArrayDataset(x))
|
||||
|
|
@ -844,7 +858,7 @@ def test_BaseEstimator_classification(data_five_features):
|
|||
check_ncp(ncp, expected_generalizations)
|
||||
|
||||
rel_accuracy = model.score(preprocessor.transform(transformed), predictions)
|
||||
assert ((rel_accuracy >= target_accuracy) or (target_accuracy - rel_accuracy) <= 0.05)
|
||||
assert ((rel_accuracy >= target_accuracy) or (target_accuracy - rel_accuracy) <= ACCURACY_DIFF)
|
||||
|
||||
|
||||
def test_BaseEstimator_regression(diabetes_dataset):
|
||||
|
|
@ -903,7 +917,7 @@ def test_BaseEstimator_regression(diabetes_dataset):
|
|||
check_ncp(ncp, expected_generalizations)
|
||||
|
||||
rel_accuracy = model.score(transformed, predictions)
|
||||
assert ((rel_accuracy >= target_accuracy) or (target_accuracy - rel_accuracy) <= 0.05)
|
||||
assert ((rel_accuracy >= target_accuracy) or (target_accuracy - rel_accuracy) <= ACCURACY_DIFF)
|
||||
|
||||
|
||||
def test_keras_model():
|
||||
|
|
@ -936,7 +950,39 @@ def test_keras_model():
|
|||
check_ncp(ncp, gener)
|
||||
|
||||
rel_accuracy = model.score(ArrayDataset(transformed, predictions))
|
||||
assert ((rel_accuracy >= target_accuracy) or (target_accuracy - rel_accuracy) <= 0.05)
|
||||
assert ((rel_accuracy >= target_accuracy) or (target_accuracy - rel_accuracy) <= ACCURACY_DIFF)
|
||||
|
||||
|
||||
class PytorchModel(nn.Module):
|
||||
|
||||
def __init__(self, num_classes, num_features):
|
||||
super(PytorchModel, self).__init__()
|
||||
|
||||
self.fc1 = nn.Sequential(
|
||||
nn.Linear(num_features, 1024),
|
||||
nn.Tanh(), )
|
||||
|
||||
self.fc2 = nn.Sequential(
|
||||
nn.Linear(1024, 512),
|
||||
nn.Tanh(), )
|
||||
|
||||
self.fc3 = nn.Sequential(
|
||||
nn.Linear(512, 256),
|
||||
nn.Tanh(), )
|
||||
|
||||
self.fc4 = nn.Sequential(
|
||||
nn.Linear(256, 128),
|
||||
nn.Tanh(),
|
||||
)
|
||||
|
||||
self.classifier = nn.Linear(128, num_classes)
|
||||
|
||||
def forward(self, x):
|
||||
out = self.fc1(x)
|
||||
out = self.fc2(out)
|
||||
out = self.fc3(out)
|
||||
out = self.fc4(out)
|
||||
return self.classifier(out)
|
||||
|
||||
|
||||
def test_minimizer_pytorch(data_three_features):
|
||||
|
|
@ -944,49 +990,17 @@ def test_minimizer_pytorch(data_three_features):
|
|||
x = x.astype(np.float32)
|
||||
qi = ['age', 'weight']
|
||||
|
||||
from torch import nn, optim
|
||||
from apt.utils.datasets.datasets import PytorchData
|
||||
from apt.utils.models.pytorch_model import PyTorchClassifier
|
||||
|
||||
class pytorch_model(nn.Module):
|
||||
|
||||
def __init__(self, num_classes, num_features):
|
||||
super(pytorch_model, self).__init__()
|
||||
|
||||
self.fc1 = nn.Sequential(
|
||||
nn.Linear(num_features, 1024),
|
||||
nn.Tanh(), )
|
||||
|
||||
self.fc2 = nn.Sequential(
|
||||
nn.Linear(1024, 512),
|
||||
nn.Tanh(), )
|
||||
|
||||
self.fc3 = nn.Sequential(
|
||||
nn.Linear(512, 256),
|
||||
nn.Tanh(), )
|
||||
|
||||
self.fc4 = nn.Sequential(
|
||||
nn.Linear(256, 128),
|
||||
nn.Tanh(),
|
||||
)
|
||||
|
||||
self.classifier = nn.Linear(128, num_classes)
|
||||
|
||||
def forward(self, x):
|
||||
out = self.fc1(x)
|
||||
out = self.fc2(out)
|
||||
out = self.fc3(out)
|
||||
out = self.fc4(out)
|
||||
return self.classifier(out)
|
||||
|
||||
base_est = pytorch_model(2, 3)
|
||||
base_est = PytorchModel(2, 3)
|
||||
criterion = nn.CrossEntropyLoss()
|
||||
optimizer = optim.Adam(base_est.parameters(), lr=0.01)
|
||||
|
||||
model = PyTorchClassifier(model=base_est, output_type=ModelOutputType.CLASSIFIER_LOGITS, loss=criterion,
|
||||
optimizer=optimizer, input_shape=(3,),
|
||||
nb_classes=2)
|
||||
model.fit(PytorchData(x.astype(np.float32), y), save_entire_model=False, nb_epochs=10)
|
||||
model.fit(PytorchData(x, y), save_entire_model=False, nb_epochs=10)
|
||||
|
||||
ad = ArrayDataset(x)
|
||||
predictions = model.predict(ad)
|
||||
|
|
@ -1006,7 +1020,41 @@ def test_minimizer_pytorch(data_three_features):
|
|||
check_ncp(ncp, expected_generalizations)
|
||||
|
||||
rel_accuracy = model.score(ArrayDataset(transformed.astype(np.float32), predictions))
|
||||
assert ((rel_accuracy >= target_accuracy) or (target_accuracy - rel_accuracy) <= 0.05)
|
||||
assert ((rel_accuracy >= target_accuracy) or (target_accuracy - rel_accuracy) <= ACCURACY_DIFF)
|
||||
|
||||
|
||||
def test_minimizer_pytorch_iris():
|
||||
features = ['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)', 'petal width (cm)']
|
||||
(x_train, y_train), _ = get_iris_dataset_np()
|
||||
x_train = x_train.astype(np.float32)
|
||||
qi = ['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)', 'petal width (cm)']
|
||||
|
||||
from apt.utils.datasets.datasets import PytorchData
|
||||
from apt.utils.models.pytorch_model import PyTorchClassifier
|
||||
|
||||
base_est = PytorchModel(3, 4)
|
||||
criterion = nn.CrossEntropyLoss()
|
||||
optimizer = optim.Adam(base_est.parameters(), lr=0.01)
|
||||
|
||||
model = PyTorchClassifier(model=base_est, output_type=ModelOutputType.CLASSIFIER_LOGITS, loss=criterion,
|
||||
optimizer=optimizer, input_shape=(4,),
|
||||
nb_classes=3)
|
||||
model.fit(PytorchData(x_train, y_train), save_entire_model=False, nb_epochs=10)
|
||||
|
||||
predictions = model.predict(ArrayDataset(x_train))
|
||||
if predictions.shape[1] > 1:
|
||||
predictions = np.argmax(predictions, axis=1)
|
||||
target_accuracy = 0.99
|
||||
gen = GeneralizeToRepresentative(model, target_accuracy=target_accuracy, features_to_minimize=qi)
|
||||
transformed = gen.fit_transform(dataset=ArrayDataset(x_train, predictions, features_names=features))
|
||||
gener = gen.generalizations
|
||||
|
||||
check_features(features, gener, transformed, x_train)
|
||||
ncp = gen.ncp.transform_score
|
||||
check_ncp(ncp, gener)
|
||||
|
||||
rel_accuracy = model.score(ArrayDataset(transformed.astype(np.float32), predictions))
|
||||
assert ((rel_accuracy >= target_accuracy) or (target_accuracy - rel_accuracy) <= ACCURACY_DIFF)
|
||||
|
||||
|
||||
def test_untouched():
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue