Support additional use cases for data (#46)

* Make ART black box classifier not apply preprocessing to data * Add option to store predictions (in addition to x,y) in Dataset and Data classes
2026-06-29 15:59:38 +02:00 · 2022-07-11 14:28:09 +03:00 · 2022-07-11 14:28:09 +03:00 · 00f9c16863
commit 00f9c16863
parent e25e58b253
6 changed files with 139 additions and 62 deletions
--- a/tests/test_datasets.py
+++ b/tests/test_datasets.py
@ -0,0 +1,41 @@
+import pytest
+import numpy as np
+
+from apt.utils.datasets import Data, DatasetWithPredictions
+from apt.utils import dataset_utils
+
+
+def test_dataset_predictions():
+    (x_train, y_train), (_, _) = dataset_utils.get_iris_dataset_np()
+    pred = np.array([[0.23, 0.56, 0.21] for i in range(105)])
+
+    dataset = DatasetWithPredictions(pred)
+    data = Data(train=dataset)
+
+    new_pred = data.get_train_set().get_predictions()
+
+    assert np.equal(pred, new_pred).all()
+
+
+def test_dataset_predictions_x():
+    (x_train, y_train), (_, _) = dataset_utils.get_iris_dataset_np()
+    pred = np.array([[0.23, 0.56, 0.21] for i in range(105)])
+
+    dataset = DatasetWithPredictions(pred, x=x_train)
+    data = Data(train=dataset)
+
+    new_pred = data.get_train_set().get_predictions()
+
+    assert np.equal(pred, new_pred).all()
+
+
+def test_dataset_predictions_y():
+    (x_train, y_train), (_, _) = dataset_utils.get_iris_dataset_np()
+    pred = np.array([[0.23, 0.56, 0.21] for i in range(105)])
+
+    dataset = DatasetWithPredictions(pred, x=x_train, y=y_train)
+    data = Data(train=dataset)
+
+    new_pred = data.get_train_set().get_predictions()
+
+    assert np.equal(pred, new_pred).all()
--- a/tests/test_minimizer.py
+++ b/tests/test_minimizer.py
@ -944,53 +944,6 @@ def test_keras_model():
    assert ((rel_accuracy >= target_accuracy) or (target_accuracy - rel_accuracy) <= 0.05)


-def test_blackbox_model():
-    (X, y), (x_test, y_test) = get_iris_dataset_np()
-    train_data = ArrayDataset(X, y)
-    test_data = ArrayDataset(x_test, y_test)
-    data = Data(train_data, test_data)
-
-    model = BlackboxClassifierPredictions(data, ModelOutputType.CLASSIFIER_PROBABILITIES)
-    ad = ArrayDataset(x_test)
-    predictions = model.predict(ad)
-    if predictions.shape[1] > 1:
-        predictions = np.argmax(predictions, axis=1)
-    target_accuracy = 0.5
-    gen = GeneralizeToRepresentative(model, target_accuracy=target_accuracy)
-    train_dataset = ArrayDataset(x_test, predictions)
-
-    gen.fit(dataset=train_dataset)
-    transformed = gen.transform(dataset=ad)
-    gener = gen.generalizations
-    expected_generalizations = {'ranges': {'0': [], '1': [], '2': [4.849999904632568], '3': [0.7000000029802322]},
-                                'categories': {},
-                                'untouched': []}
-
-    for key in expected_generalizations['ranges']:
-        assert_almost_equal(expected_generalizations['ranges'][key], gener['ranges'][key])
-    for key in expected_generalizations['categories']:
-        assert (set([frozenset(sl) for sl in expected_generalizations['categories'][key]]) ==
-                set([frozenset(sl) for sl in gener['categories'][key]]))
-    assert (set(expected_generalizations['untouched']) == set(gener['untouched']))
-
-    features = ['0', '1', '2', '3']
-    modified_features = [f for f in features if
-                         f in expected_generalizations['categories'].keys() or f in expected_generalizations[
-                             'ranges'].keys()]
-    indexes = []
-    for i in range(len(features)):
-        if features[i] in modified_features:
-            indexes.append(i)
-    assert ((np.delete(transformed, indexes, axis=1) == np.delete(x_test, indexes, axis=1)).all())
-    ncp = gen.ncp
-    if len(expected_generalizations['ranges'].keys()) > 0 or len(expected_generalizations['categories'].keys()) > 0:
-        assert (ncp > 0)
-        assert (((transformed[indexes]) != (X[indexes])).any())
-
-    rel_accuracy = model.score(ArrayDataset(transformed, predictions))
-    assert ((rel_accuracy >= target_accuracy) or (target_accuracy - rel_accuracy) <= 0.05)
-
-
 def test_untouched():
    cells = [{"id": 1, "ranges": {"age": {"start": None, "end": 38}}, "label": 0,
              'categories': {'gender': ['male']}, "representative": {"age": 26, "height": 149}},
--- a/tests/test_model.py
+++ b/tests/test_model.py
@ -77,7 +77,7 @@ def test_blackbox_classifier():
    assert(pred.shape[0] == x_test.shape[0])

    score = model.score(test)
-    assert(0.0 <= score <= 1.0)
+    assert(score == 1.0)

 def test_blackbox_classifier_no_test():
    (x_train, y_train), (_, _) = dataset_utils.get_iris_dataset_np()
@ -90,7 +90,7 @@ def test_blackbox_classifier_no_test():
    assert(pred.shape[0] == x_train.shape[0])

    score = model.score(train)
-    assert(0.0 <= score <= 1.0)
+    assert (score == 1.0)


 def test_blackbox_classifier_no_train():
@ -103,7 +103,7 @@ def test_blackbox_classifier_no_train():
    assert(pred.shape[0] == x_test.shape[0])

    score = model.score(test)
-    assert(0.0 <= score <= 1.0)
+    assert (score == 1.0)


 def test_blackbox_classifier_no_test_y():
@ -117,7 +117,7 @@ def test_blackbox_classifier_no_test_y():
    assert(pred.shape[0] == x_train.shape[0])

    score = model.score(train)
-    assert(0.0 <= score <= 1.0)
+    assert (score == 1.0)

    # since no test_y, BBC should use only test thus predict test should fail
    unable_to_predict_test = False
@ -139,7 +139,7 @@ def test_blackbox_classifier_no_train_y():
    assert (pred.shape[0] == x_test.shape[0])

    score = model.score(test)
-    assert (0.0 <= score <= 1.0)
+    assert (score == 1.0)

    # since no train_y, BBC should use only test thus predict train should fail
    unable_to_predict_train = False
@ -164,7 +164,7 @@ def test_blackbox_classifier_probabilities():
    assert (pred < 1.0).all()

    score = model.score(train)
-    assert (0.0 <= score <= 1.0)
+    assert (score == 1.0)


 def test_blackbox_classifier_predict():
@ -172,6 +172,7 @@ def test_blackbox_classifier_predict():
        return [0.23, 0.56, 0.21]

    (x_train, y_train), (_, _) = dataset_utils.get_iris_dataset_np()
+    y_train = np.array([[0.23, 0.56, 0.21] for i in range(105)])

    train = ArrayDataset(x_train, y_train)

@ -182,7 +183,7 @@ def test_blackbox_classifier_predict():
    assert (pred < 1.0).all()

    score = model.score(train)
-    assert (0.0 <= score <= 1.0)
+    assert (score == 1.0)

 def test_is_one_hot():
    (_, y_train), (_, _) = dataset_utils.get_iris_dataset_np()