From 30cb70506207d778c1eedd3417399e18ecf22c5f Mon Sep 17 00:00:00 2001
From: abigailt <abigailt@il.ibm.com>
Date: Thu, 5 Oct 2023 13:58:40 +0300
Subject: [PATCH] No default encoder, if none provided data is supplied to the
 model as is. Fix data type of representative values. Fix and add more tests.

Signed-off-by: abigailt <abigailt@il.ibm.com>
---
 apt/minimization/minimizer.py |  56 ++++++------
 tests/test_minimizer.py       | 162 ++++++++++++++++++++++------------
 2 files changed, 134 insertions(+), 84 deletions(-)

diff --git a/apt/minimization/minimizer.py b/apt/minimization/minimizer.py
index 29b7eb0..e0760fe 100644
--- a/apt/minimization/minimizer.py
+++ b/apt/minimization/minimizer.py
@@ -57,7 +57,7 @@ class GeneralizeToRepresentative(BaseEstimator, MetaEstimatorMixin, TransformerM
     :param categorical_features: The list of categorical features (if supplied, these featurtes will be one-hot
                                  encoded before using them to train the decision tree model).
     :param encoder: Optional encoder for encoding data before feeding it into the estimator (e.g., for categorical
-                    features)
+                    features). If not provided, the data will be fed as is directly to the estimator.
     :type encoder: sklearn OrdinalEncoder or OneHotEncoder
     :type categorical_features: list of strings, optional
     :param features_to_minimize: The features to be minimized.
@@ -256,7 +256,6 @@ class GeneralizeToRepresentative(BaseEstimator, MetaEstimatorMixin, TransformerM
         # Going to fit
         # (currently not dealing with option to fit with only X and y and no estimator)
         if self.estimator and dataset and dataset.get_samples() is not None and dataset.get_labels() is not None:
-            dtype = dataset.get_samples().dtype
             x = pd.DataFrame(dataset.get_samples(), columns=self._features)
             if not self.features_to_minimize:
                 self.features_to_minimize = self._features
@@ -293,21 +292,6 @@ class GeneralizeToRepresentative(BaseEstimator, MetaEstimatorMixin, TransformerM
             # collect feature data (such as min, max)
             self._feature_data = self._get_feature_data(x)
 
-            # default encoder in case none provided
-            if self.encoder is None:
-                numeric_features = [f for f in self._features if f not in self.categorical_features]
-                numeric_transformer = Pipeline(
-                    steps=[('imputer', SimpleImputer(strategy='constant', fill_value=0))]
-                )
-                categorical_transformer = OneHotEncoder(handle_unknown="ignore", sparse=False)
-                self.encoder = ColumnTransformer(
-                    transformers=[
-                        ("num", numeric_transformer, numeric_features),
-                        ("cat", categorical_transformer, self.categorical_features),
-                    ]
-                )
-                self.encoder.fit(x)
-
             self.cells = []
             self._categorical_values = {}
 
@@ -341,7 +325,10 @@ class GeneralizeToRepresentative(BaseEstimator, MetaEstimatorMixin, TransformerM
                 generalized = self._generalize_from_generalizations(x_test, self.generalizations)
 
             # check accuracy
-            accuracy = self.estimator.score(ArrayDataset(self.encoder.transform(generalized).astype(dtype), y_test))
+            if self.encoder:
+                accuracy = self.estimator.score(ArrayDataset(self.encoder.transform(generalized), y_test))
+            else:
+                accuracy = self.estimator.score(ArrayDataset(generalized, y_test))
             print('Initial accuracy of model on generalized data, relative to original model predictions '
                   '(base generalization derived from tree, before improvements): %f' % accuracy)
 
@@ -371,8 +358,10 @@ class GeneralizeToRepresentative(BaseEstimator, MetaEstimatorMixin, TransformerM
                     else:
                         generalized = self._generalize_from_generalizations(x_test, self.generalizations)
 
-                    accuracy = self.estimator.score(ArrayDataset(self.encoder.transform(generalized).astype(dtype),
-                                                                 y_test))
+                    if self.encoder:
+                        accuracy = self.estimator.score(ArrayDataset(self.encoder.transform(generalized), y_test))
+                    else:
+                        accuracy = self.estimator.score(ArrayDataset(generalized, y_test))
                     # if accuracy passed threshold roll back to previous iteration generalizations
                     if accuracy < self.target_accuracy:
                         self.cells = cells_previous_iter
@@ -401,8 +390,11 @@ class GeneralizeToRepresentative(BaseEstimator, MetaEstimatorMixin, TransformerM
                                                                  self._cells_by_id)
                     else:
                         generalized = self._generalize_from_generalizations(x_test, self.generalizations)
-                    accuracy = self.estimator.score(ArrayDataset(self.encoder.transform(generalized).astype(dtype),
-                                                                 y_test))
+
+                    if self.encoder:
+                        accuracy = self.estimator.score(ArrayDataset(self.encoder.transform(generalized), y_test))
+                    else:
+                        accuracy = self.estimator.score(ArrayDataset(generalized, y_test))
                     print('Removed feature: %s, new relative accuracy: %f' % (removed_feature, accuracy))
 
             # self._cells currently holds the chosen generalization based on target accuracy
@@ -893,7 +885,11 @@ class GeneralizeToRepresentative(BaseEstimator, MetaEstimatorMixin, TransformerM
 
     def _generalize_indexes(self, original_data, cells, all_indexes):
         # prepared data include one hot encoded categorical data + QI
-        representatives = pd.DataFrame(columns=self._features)  # empty except for columns
+        dtypes = original_data.dtypes.to_dict()
+        new_dtypes = {}
+        for t in dtypes.keys():
+            new_dtypes[t] = pd.Series(dtype=dtypes[t].name)
+        representatives = pd.DataFrame(new_dtypes)  # empty except for columns
         original_data_generalized = pd.DataFrame(original_data, columns=self._features, copy=True)
 
         # iterate over cells (leaves in decision tree)
@@ -1000,8 +996,11 @@ class GeneralizeToRepresentative(BaseEstimator, MetaEstimatorMixin, TransformerM
                     GeneralizeToRepresentative._remove_feature_from_cells(new_cells, cells_by_id, feature)
                     generalized = self._generalize_from_tree(original_data, prepared_data, nodes, new_cells,
                                                              cells_by_id)
-                    accuracy_gain = self.estimator.score(ArrayDataset(self.encoder.transform(generalized),
-                                                                      labels)) - current_accuracy
+                    if self.encoder:
+                        accuracy_gain = self.estimator.score(ArrayDataset(self.encoder.transform(generalized),
+                                                                          labels)) - current_accuracy
+                    else:
+                        accuracy_gain = self.estimator.score(ArrayDataset(generalized, labels)) - current_accuracy
                     if accuracy_gain < 0:
                         accuracy_gain = 0
                     if accuracy_gain != 0:
@@ -1027,8 +1026,11 @@ class GeneralizeToRepresentative(BaseEstimator, MetaEstimatorMixin, TransformerM
                     GeneralizeToRepresentative._remove_feature_from_cells(new_cells, cells_by_id, feature)
                     generalized = self._generalize_from_tree(original_data, prepared_data, nodes, new_cells,
                                                              cells_by_id)
-                    accuracy_gain = self.estimator.score(ArrayDataset(self.encoder.transform(generalized),
-                                                                      labels)) - current_accuracy
+                    if self.encoder:
+                        accuracy_gain = self.estimator.score(ArrayDataset(self.encoder.transform(generalized),
+                                                                          labels)) - current_accuracy
+                    else:
+                        accuracy_gain = self.estimator.score(ArrayDataset(generalized, labels)) - current_accuracy
 
                     if accuracy_gain < 0:
                         accuracy_gain = 0
diff --git a/tests/test_minimizer.py b/tests/test_minimizer.py
index 135adcf..ca34fbd 100644
--- a/tests/test_minimizer.py
+++ b/tests/test_minimizer.py
@@ -11,6 +11,8 @@ from sklearn.model_selection import train_test_split
 from sklearn.pipeline import Pipeline
 from sklearn.preprocessing import OneHotEncoder
 
+from torch import nn, optim
+
 import tensorflow as tf
 from tensorflow.keras.models import Sequential
 from tensorflow.keras.layers import Dense, Input
@@ -24,6 +26,9 @@ from apt.utils.models import SklearnClassifier, ModelOutputType, SklearnRegresso
 tf.compat.v1.disable_eager_execution()
 
 
+ACCURACY_DIFF = 0.05
+
+
 @pytest.fixture
 def diabetes_dataset():
     return load_diabetes()
@@ -286,7 +291,7 @@ def test_minimizer_fit(data_two_features):
     check_ncp(ncp, expected_generalizations)
 
     rel_accuracy = model.score(ArrayDataset(transformed, predictions))
-    assert ((rel_accuracy >= target_accuracy) or (target_accuracy - rel_accuracy) <= 0.05)
+    assert ((rel_accuracy >= target_accuracy) or (target_accuracy - rel_accuracy) <= ACCURACY_DIFF)
 
 
 def test_minimizer_ncp(data_two_features):
@@ -348,12 +353,15 @@ def test_minimizer_ncp_categorical(data_four_features):
     train_dataset = ArrayDataset(x, predictions, features_names=features)
 
     gen1 = GeneralizeToRepresentative(model, target_accuracy=target_accuracy,
-                                      categorical_features=categorical_features, generalize_using_transform=False)
+                                      categorical_features=categorical_features,
+                                      generalize_using_transform=False,
+                                      encoder=preprocessor)
     gen1.fit(dataset=train_dataset)
     ncp1 = gen1.ncp.fit_score
     ncp2 = gen1.calculate_ncp(ad1)
 
-    gen2 = GeneralizeToRepresentative(model, target_accuracy=target_accuracy, categorical_features=categorical_features)
+    gen2 = GeneralizeToRepresentative(model, target_accuracy=target_accuracy, categorical_features=categorical_features,
+                                      encoder=preprocessor)
     gen2.fit(dataset=train_dataset)
     ncp3 = gen2.ncp.fit_score
     gen2.transform(dataset=ad1)
@@ -414,7 +422,8 @@ def test_minimizer_fit_pandas(data_four_features):
     # Now we have a full prediction pipeline.
     target_accuracy = 0.5
     gen = GeneralizeToRepresentative(model, target_accuracy=target_accuracy,
-                                     categorical_features=categorical_features)
+                                     categorical_features=categorical_features,
+                                     encoder=preprocessor)
     train_dataset = ArrayDataset(x, predictions)
     gen.fit(dataset=train_dataset)
     transformed = gen.transform(dataset=ArrayDataset(x))
@@ -428,7 +437,7 @@ def test_minimizer_fit_pandas(data_four_features):
     check_ncp(ncp, expected_generalizations)
 
     rel_accuracy = model.score(ArrayDataset(preprocessor.transform(transformed), predictions))
-    assert ((rel_accuracy >= target_accuracy) or (target_accuracy - rel_accuracy) <= 0.05)
+    assert ((rel_accuracy >= target_accuracy) or (target_accuracy - rel_accuracy) <= ACCURACY_DIFF)
 
 
 def test_minimizer_params_categorical(cells_categorical):
@@ -450,13 +459,14 @@ def test_minimizer_params_categorical(cells_categorical):
     # Now we have a full prediction pipeline.
     target_accuracy = 0.5
     gen = GeneralizeToRepresentative(model, target_accuracy=target_accuracy,
-                                     categorical_features=categorical_features, cells=cells)
+                                     categorical_features=categorical_features, cells=cells,
+                                     encoder=preprocessor)
     train_dataset = ArrayDataset(x, predictions)
     gen.fit(dataset=train_dataset)
     transformed = gen.transform(dataset=ArrayDataset(x))
 
     rel_accuracy = model.score(ArrayDataset(preprocessor.transform(transformed), predictions))
-    assert ((rel_accuracy >= target_accuracy) or (target_accuracy - rel_accuracy) <= 0.05)
+    assert ((rel_accuracy >= target_accuracy) or (target_accuracy - rel_accuracy) <= ACCURACY_DIFF)
 
 
 def test_minimizer_fit_qi(data_three_features):
@@ -484,7 +494,7 @@ def test_minimizer_fit_qi(data_three_features):
     check_ncp(ncp, expected_generalizations)
 
     rel_accuracy = model.score(ArrayDataset(transformed, predictions))
-    assert ((rel_accuracy >= target_accuracy) or (target_accuracy - rel_accuracy) <= 0.05)
+    assert ((rel_accuracy >= target_accuracy) or (target_accuracy - rel_accuracy) <= ACCURACY_DIFF)
 
 
 def test_minimizer_fit_pandas_qi(data_five_features):
@@ -508,7 +518,8 @@ def test_minimizer_fit_pandas_qi(data_five_features):
     # Now we have a full prediction pipeline.
     target_accuracy = 0.5
     gen = GeneralizeToRepresentative(model, target_accuracy=target_accuracy,
-                                     categorical_features=categorical_features, features_to_minimize=qi)
+                                     categorical_features=categorical_features, features_to_minimize=qi,
+                                     encoder=preprocessor)
     train_dataset = ArrayDataset(x, predictions)
     gen.fit(dataset=train_dataset)
     transformed = gen.transform(dataset=ArrayDataset(x))
@@ -523,7 +534,7 @@ def test_minimizer_fit_pandas_qi(data_five_features):
     check_ncp(ncp, expected_generalizations)
 
     rel_accuracy = model.score(ArrayDataset(preprocessor.transform(transformed), predictions))
-    assert ((rel_accuracy >= target_accuracy) or (target_accuracy - rel_accuracy) <= 0.05)
+    assert ((rel_accuracy >= target_accuracy) or (target_accuracy - rel_accuracy) <= ACCURACY_DIFF)
 
 
 def test_minimize_ndarray_iris():
@@ -552,7 +563,7 @@ def test_minimize_ndarray_iris():
     check_ncp(ncp, expected_generalizations)
 
     rel_accuracy = model.score(ArrayDataset(transformed, predictions))
-    assert ((rel_accuracy >= target_accuracy) or (target_accuracy - rel_accuracy) <= 0.05)
+    assert ((rel_accuracy >= target_accuracy) or (target_accuracy - rel_accuracy) <= ACCURACY_DIFF)
 
 
 def test_minimize_pandas_adult():
@@ -582,7 +593,8 @@ def test_minimize_pandas_adult():
         predictions = np.argmax(predictions, axis=1)
     target_accuracy = 0.7
     gen = GeneralizeToRepresentative(model, target_accuracy=target_accuracy,
-                                     categorical_features=categorical_features, features_to_minimize=qi)
+                                     categorical_features=categorical_features, features_to_minimize=qi,
+                                     encoder=preprocessor)
     gen.fit(dataset=ArrayDataset(x_train, predictions, features_names=features))
     transformed = gen.transform(dataset=ArrayDataset(x_train))
     gener = gen.generalizations
@@ -609,7 +621,7 @@ def test_minimize_pandas_adult():
     check_ncp(ncp, expected_generalizations)
 
     rel_accuracy = model.score(ArrayDataset(preprocessor.transform(transformed), predictions))
-    assert ((rel_accuracy >= target_accuracy) or (target_accuracy - rel_accuracy) <= 0.05)
+    assert ((rel_accuracy >= target_accuracy) or (target_accuracy - rel_accuracy) <= ACCURACY_DIFF)
 
 
 def test_german_credit_pandas():
@@ -637,7 +649,8 @@ def test_german_credit_pandas():
         predictions = np.argmax(predictions, axis=1)
     target_accuracy = 0.7
     gen = GeneralizeToRepresentative(model, target_accuracy=target_accuracy,
-                                     categorical_features=categorical_features, features_to_minimize=qi)
+                                     categorical_features=categorical_features, features_to_minimize=qi,
+                                     encoder=preprocessor)
     gen.fit(dataset=ArrayDataset(x_train, predictions))
     transformed = gen.transform(dataset=ArrayDataset(x_train))
     gener = gen.generalizations
@@ -666,7 +679,7 @@ def test_german_credit_pandas():
     check_ncp(ncp, expected_generalizations)
 
     rel_accuracy = model.score(ArrayDataset(preprocessor.transform(transformed), predictions))
-    assert ((rel_accuracy >= target_accuracy) or (target_accuracy - rel_accuracy) <= 0.05)
+    assert ((rel_accuracy >= target_accuracy) or (target_accuracy - rel_accuracy) <= ACCURACY_DIFF)
 
 
 def test_regression(diabetes_dataset):
@@ -726,7 +739,7 @@ def test_regression(diabetes_dataset):
     check_ncp(ncp, expected_generalizations)
 
     rel_accuracy = model.score(ArrayDataset(transformed, predictions))
-    assert ((rel_accuracy >= target_accuracy) or (target_accuracy - rel_accuracy) <= 0.05)
+    assert ((rel_accuracy >= target_accuracy) or (target_accuracy - rel_accuracy) <= ACCURACY_DIFF)
 
 
 def test_x_y():
@@ -766,7 +779,7 @@ def test_x_y():
     check_ncp(ncp, expected_generalizations)
 
     rel_accuracy = model.score(ArrayDataset(transformed, predictions))
-    assert ((rel_accuracy >= target_accuracy) or (target_accuracy - rel_accuracy) <= 0.05)
+    assert ((rel_accuracy >= target_accuracy) or (target_accuracy - rel_accuracy) <= ACCURACY_DIFF)
 
 
 def test_x_y_features_names():
@@ -806,7 +819,7 @@ def test_x_y_features_names():
     check_ncp(ncp, expected_generalizations)
 
     rel_accuracy = model.score(ArrayDataset(transformed, predictions))
-    assert ((rel_accuracy >= target_accuracy) or (target_accuracy - rel_accuracy) <= 0.05)
+    assert ((rel_accuracy >= target_accuracy) or (target_accuracy - rel_accuracy) <= ACCURACY_DIFF)
 
 
 def test_BaseEstimator_classification(data_five_features):
@@ -828,7 +841,8 @@ def test_BaseEstimator_classification(data_five_features):
     # Now we have a full prediction pipeline.
     target_accuracy = 0.5
     gen = GeneralizeToRepresentative(model, target_accuracy=target_accuracy,
-                                     categorical_features=categorical_features, features_to_minimize=QI)
+                                     categorical_features=categorical_features, features_to_minimize=QI,
+                                     encoder=preprocessor)
     train_dataset = ArrayDataset(x, predictions)
     gen.fit(dataset=train_dataset)
     transformed = gen.transform(dataset=ArrayDataset(x))
@@ -844,7 +858,7 @@ def test_BaseEstimator_classification(data_five_features):
     check_ncp(ncp, expected_generalizations)
 
     rel_accuracy = model.score(preprocessor.transform(transformed), predictions)
-    assert ((rel_accuracy >= target_accuracy) or (target_accuracy - rel_accuracy) <= 0.05)
+    assert ((rel_accuracy >= target_accuracy) or (target_accuracy - rel_accuracy) <= ACCURACY_DIFF)
 
 
 def test_BaseEstimator_regression(diabetes_dataset):
@@ -903,7 +917,7 @@ def test_BaseEstimator_regression(diabetes_dataset):
     check_ncp(ncp, expected_generalizations)
 
     rel_accuracy = model.score(transformed, predictions)
-    assert ((rel_accuracy >= target_accuracy) or (target_accuracy - rel_accuracy) <= 0.05)
+    assert ((rel_accuracy >= target_accuracy) or (target_accuracy - rel_accuracy) <= ACCURACY_DIFF)
 
 
 def test_keras_model():
@@ -936,7 +950,39 @@ def test_keras_model():
     check_ncp(ncp, gener)
 
     rel_accuracy = model.score(ArrayDataset(transformed, predictions))
-    assert ((rel_accuracy >= target_accuracy) or (target_accuracy - rel_accuracy) <= 0.05)
+    assert ((rel_accuracy >= target_accuracy) or (target_accuracy - rel_accuracy) <= ACCURACY_DIFF)
+
+
+class PytorchModel(nn.Module):
+
+    def __init__(self, num_classes, num_features):
+        super(PytorchModel, self).__init__()
+
+        self.fc1 = nn.Sequential(
+            nn.Linear(num_features, 1024),
+            nn.Tanh(), )
+
+        self.fc2 = nn.Sequential(
+            nn.Linear(1024, 512),
+            nn.Tanh(), )
+
+        self.fc3 = nn.Sequential(
+            nn.Linear(512, 256),
+            nn.Tanh(), )
+
+        self.fc4 = nn.Sequential(
+            nn.Linear(256, 128),
+            nn.Tanh(),
+        )
+
+        self.classifier = nn.Linear(128, num_classes)
+
+    def forward(self, x):
+        out = self.fc1(x)
+        out = self.fc2(out)
+        out = self.fc3(out)
+        out = self.fc4(out)
+        return self.classifier(out)
 
 
 def test_minimizer_pytorch(data_three_features):
@@ -944,49 +990,17 @@ def test_minimizer_pytorch(data_three_features):
     x = x.astype(np.float32)
     qi = ['age', 'weight']
 
-    from torch import nn, optim
     from apt.utils.datasets.datasets import PytorchData
     from apt.utils.models.pytorch_model import PyTorchClassifier
 
-    class pytorch_model(nn.Module):
-
-        def __init__(self, num_classes, num_features):
-            super(pytorch_model, self).__init__()
-
-            self.fc1 = nn.Sequential(
-                nn.Linear(num_features, 1024),
-                nn.Tanh(), )
-
-            self.fc2 = nn.Sequential(
-                nn.Linear(1024, 512),
-                nn.Tanh(), )
-
-            self.fc3 = nn.Sequential(
-                nn.Linear(512, 256),
-                nn.Tanh(), )
-
-            self.fc4 = nn.Sequential(
-                nn.Linear(256, 128),
-                nn.Tanh(),
-            )
-
-            self.classifier = nn.Linear(128, num_classes)
-
-        def forward(self, x):
-            out = self.fc1(x)
-            out = self.fc2(out)
-            out = self.fc3(out)
-            out = self.fc4(out)
-            return self.classifier(out)
-
-    base_est = pytorch_model(2, 3)
+    base_est = PytorchModel(2, 3)
     criterion = nn.CrossEntropyLoss()
     optimizer = optim.Adam(base_est.parameters(), lr=0.01)
 
     model = PyTorchClassifier(model=base_est, output_type=ModelOutputType.CLASSIFIER_LOGITS, loss=criterion,
                               optimizer=optimizer, input_shape=(3,),
                               nb_classes=2)
-    model.fit(PytorchData(x.astype(np.float32), y), save_entire_model=False, nb_epochs=10)
+    model.fit(PytorchData(x, y), save_entire_model=False, nb_epochs=10)
 
     ad = ArrayDataset(x)
     predictions = model.predict(ad)
@@ -1006,7 +1020,41 @@ def test_minimizer_pytorch(data_three_features):
     check_ncp(ncp, expected_generalizations)
 
     rel_accuracy = model.score(ArrayDataset(transformed.astype(np.float32), predictions))
-    assert ((rel_accuracy >= target_accuracy) or (target_accuracy - rel_accuracy) <= 0.05)
+    assert ((rel_accuracy >= target_accuracy) or (target_accuracy - rel_accuracy) <= ACCURACY_DIFF)
+
+
+def test_minimizer_pytorch_iris():
+    features = ['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)', 'petal width (cm)']
+    (x_train, y_train), _ = get_iris_dataset_np()
+    x_train = x_train.astype(np.float32)
+    qi = ['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)', 'petal width (cm)']
+
+    from apt.utils.datasets.datasets import PytorchData
+    from apt.utils.models.pytorch_model import PyTorchClassifier
+
+    base_est = PytorchModel(3, 4)
+    criterion = nn.CrossEntropyLoss()
+    optimizer = optim.Adam(base_est.parameters(), lr=0.01)
+
+    model = PyTorchClassifier(model=base_est, output_type=ModelOutputType.CLASSIFIER_LOGITS, loss=criterion,
+                              optimizer=optimizer, input_shape=(4,),
+                              nb_classes=3)
+    model.fit(PytorchData(x_train, y_train), save_entire_model=False, nb_epochs=10)
+
+    predictions = model.predict(ArrayDataset(x_train))
+    if predictions.shape[1] > 1:
+        predictions = np.argmax(predictions, axis=1)
+    target_accuracy = 0.99
+    gen = GeneralizeToRepresentative(model, target_accuracy=target_accuracy, features_to_minimize=qi)
+    transformed = gen.fit_transform(dataset=ArrayDataset(x_train, predictions, features_names=features))
+    gener = gen.generalizations
+
+    check_features(features, gener, transformed, x_train)
+    ncp = gen.ncp.transform_score
+    check_ncp(ncp, gener)
+
+    rel_accuracy = model.score(ArrayDataset(transformed.astype(np.float32), predictions))
+    assert ((rel_accuracy >= target_accuracy) or (target_accuracy - rel_accuracy) <= ACCURACY_DIFF)
 
 
 def test_untouched():