Fix issue with computed ranges for one-hot encoded features (#90)

Signed-off-by: abigailt <abigailt@il.ibm.com>
2026-06-14 15:25:12 +02:00 · 2024-01-17 12:45:22 -05:00 · 2024-01-17 12:45:22 -05:00 · a8f5326572
commit a8f5326572
parent d8de062d43
2 changed files with 46 additions and 1 deletions
--- a/apt/minimization/minimizer.py
+++ b/apt/minimization/minimizer.py
@ -576,7 +576,7 @@ class GeneralizeToRepresentative(BaseEstimator, MetaEstimatorMixin, TransformerM
            if feature not in feature_data.keys():
                fd = {}
                values = list(x.loc[:, feature])
-                if feature not in self.categorical_features:
+                if feature not in self.categorical_features and feature not in self.all_one_hot_features:
                    fd['min'] = min(values)
                    fd['max'] = max(values)
                    fd['range'] = max(values) - min(values)
--- a/tests/test_minimizer.py
+++ b/tests/test_minimizer.py
@ -965,6 +965,51 @@ def test_minimizer_ndarray_one_hot():
    assert ((np.min(transformed_slice, axis=1) == 0).all())


+def test_minimizer_ndarray_one_hot_single_value():
+    x_train = np.array([[23, 0, 1, 0, 165],
+                        [45, 0, 1, 0, 158],
+                        [56, 1, 0, 0, 123],
+                        [67, 0, 1, 0, 154],
+                        [45, 1, 0, 0, 149],
+                        [42, 1, 0, 0, 166],
+                        [73, 0, 1, 0, 172],
+                        [94, 0, 1, 0, 168],
+                        [69, 0, 1, 0, 175],
+                        [24, 1, 0, 0, 181],
+                        [18, 1, 0, 0, 190]])
+    y_train = np.array([1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0])
+
+    model = DecisionTreeClassifier()
+    model.fit(x_train, y_train)
+    predictions = model.predict(x_train)
+
+    features = ['0', '1', '2', '3', '4']
+    QI = [0, 1, 2, 3]
+    QI_slices = [[1, 2, 3]]
+    target_accuracy = 0.7
+    gen = GeneralizeToRepresentative(model, target_accuracy=target_accuracy, feature_slices=QI_slices,
+                                     features_to_minimize=QI)
+    gen.fit(dataset=ArrayDataset(x_train, predictions))
+    transformed = gen.transform(dataset=ArrayDataset(x_train))
+    gener = gen.generalizations
+    expected_generalizations = {'categories': {}, 'category_representatives': {},
+                                'range_representatives': {'0': [34.5]}, 'ranges': {'0': [34.5]},
+                                'untouched': ['4', '1', '2', '3']}
+
+    compare_generalizations(gener, expected_generalizations)
+
+    check_features(features, expected_generalizations, transformed, x_train)
+    ncp = gen.ncp.transform_score
+    check_ncp(ncp, expected_generalizations)
+
+    rel_accuracy = model.score(transformed, predictions)
+    assert ((rel_accuracy >= target_accuracy) or (target_accuracy - rel_accuracy) <= ACCURACY_DIFF)
+    transformed_slice = transformed[:, QI_slices[0]]
+    assert ((np.sum(transformed_slice, axis=1) == 1).all())
+    assert ((np.max(transformed_slice, axis=1) == 1).all())
+    assert ((np.min(transformed_slice, axis=1) == 0).all())
+
+
 def test_minimizer_ndarray_one_hot_gen():
    x_train = np.array([[23, 0, 1, 165],
                        [45, 0, 1, 158],