diff --git a/apt/minimization/minimizer.py b/apt/minimization/minimizer.py index 5993397..29b7eb0 100644 --- a/apt/minimization/minimizer.py +++ b/apt/minimization/minimizer.py @@ -256,6 +256,7 @@ class GeneralizeToRepresentative(BaseEstimator, MetaEstimatorMixin, TransformerM # Going to fit # (currently not dealing with option to fit with only X and y and no estimator) if self.estimator and dataset and dataset.get_samples() is not None and dataset.get_labels() is not None: + dtype = dataset.get_samples().dtype x = pd.DataFrame(dataset.get_samples(), columns=self._features) if not self.features_to_minimize: self.features_to_minimize = self._features @@ -340,7 +341,7 @@ class GeneralizeToRepresentative(BaseEstimator, MetaEstimatorMixin, TransformerM generalized = self._generalize_from_generalizations(x_test, self.generalizations) # check accuracy - accuracy = self.estimator.score(ArrayDataset(self.encoder.transform(generalized), y_test)) + accuracy = self.estimator.score(ArrayDataset(self.encoder.transform(generalized).astype(dtype), y_test)) print('Initial accuracy of model on generalized data, relative to original model predictions ' '(base generalization derived from tree, before improvements): %f' % accuracy) @@ -370,7 +371,8 @@ class GeneralizeToRepresentative(BaseEstimator, MetaEstimatorMixin, TransformerM else: generalized = self._generalize_from_generalizations(x_test, self.generalizations) - accuracy = self.estimator.score(ArrayDataset(self.encoder.transform(generalized), y_test)) + accuracy = self.estimator.score(ArrayDataset(self.encoder.transform(generalized).astype(dtype), + y_test)) # if accuracy passed threshold roll back to previous iteration generalizations if accuracy < self.target_accuracy: self.cells = cells_previous_iter @@ -399,7 +401,8 @@ class GeneralizeToRepresentative(BaseEstimator, MetaEstimatorMixin, TransformerM self._cells_by_id) else: generalized = self._generalize_from_generalizations(x_test, self.generalizations) - accuracy = self.estimator.score(ArrayDataset(self.encoder.transform(generalized), y_test)) + accuracy = self.estimator.score(ArrayDataset(self.encoder.transform(generalized).astype(dtype), + y_test)) print('Removed feature: %s, new relative accuracy: %f' % (removed_feature, accuracy)) # self._cells currently holds the chosen generalization based on target accuracy diff --git a/tests/test_minimizer.py b/tests/test_minimizer.py index 4a484c4..135adcf 100644 --- a/tests/test_minimizer.py +++ b/tests/test_minimizer.py @@ -939,6 +939,76 @@ def test_keras_model(): assert ((rel_accuracy >= target_accuracy) or (target_accuracy - rel_accuracy) <= 0.05) +def test_minimizer_pytorch(data_three_features): + x, y, features = data_three_features + x = x.astype(np.float32) + qi = ['age', 'weight'] + + from torch import nn, optim + from apt.utils.datasets.datasets import PytorchData + from apt.utils.models.pytorch_model import PyTorchClassifier + + class pytorch_model(nn.Module): + + def __init__(self, num_classes, num_features): + super(pytorch_model, self).__init__() + + self.fc1 = nn.Sequential( + nn.Linear(num_features, 1024), + nn.Tanh(), ) + + self.fc2 = nn.Sequential( + nn.Linear(1024, 512), + nn.Tanh(), ) + + self.fc3 = nn.Sequential( + nn.Linear(512, 256), + nn.Tanh(), ) + + self.fc4 = nn.Sequential( + nn.Linear(256, 128), + nn.Tanh(), + ) + + self.classifier = nn.Linear(128, num_classes) + + def forward(self, x): + out = self.fc1(x) + out = self.fc2(out) + out = self.fc3(out) + out = self.fc4(out) + return self.classifier(out) + + base_est = pytorch_model(2, 3) + criterion = nn.CrossEntropyLoss() + optimizer = optim.Adam(base_est.parameters(), lr=0.01) + + model = PyTorchClassifier(model=base_est, output_type=ModelOutputType.CLASSIFIER_LOGITS, loss=criterion, + optimizer=optimizer, input_shape=(3,), + nb_classes=2) + model.fit(PytorchData(x.astype(np.float32), y), save_entire_model=False, nb_epochs=10) + + ad = ArrayDataset(x) + predictions = model.predict(ad) + if predictions.shape[1] > 1: + predictions = np.argmax(predictions, axis=1) + target_accuracy = 0.5 + gen = GeneralizeToRepresentative(model, target_accuracy=target_accuracy, features_to_minimize=qi) + train_dataset = ArrayDataset(x, predictions, features_names=features) + gen.fit(dataset=train_dataset) + transformed = gen.transform(dataset=ad) + gener = gen.generalizations + expected_generalizations = {'ranges': {'age': [], 'weight': []}, 'categories': {}, 'untouched': ['height']} + compare_generalizations(gener, expected_generalizations) + check_features(features, expected_generalizations, transformed, x) + assert ((np.delete(transformed, [0, 2], axis=1) == np.delete(x, [0, 2], axis=1)).all()) + ncp = gen.ncp.transform_score + check_ncp(ncp, expected_generalizations) + + rel_accuracy = model.score(ArrayDataset(transformed.astype(np.float32), predictions)) + assert ((rel_accuracy >= target_accuracy) or (target_accuracy - rel_accuracy) <= 0.05) + + def test_untouched(): cells = [{"id": 1, "ranges": {"age": {"start": None, "end": 38}}, "label": 0, 'categories': {'gender': ['male']}, "representative": {"age": 26, "height": 149}},