mirror of
https://github.com/IBM/ai-privacy-toolkit.git
synced 2026-06-02 14:45:13 +02:00
Support for many new model output types (#93)
* General model wrappers and methods supporting multi-label classifiers * Support for pytorch multi-label binary classifier * New model output types + single implementation of score method that supports multiple output types. * Anonymization with pytorch multi-output binary model * Support for multi-label binary models in minimizer. * Support for multi-label logits/probabilities --------- Signed-off-by: abigailt <abigailt@il.ibm.com>
This commit is contained in:
parent
e00535d120
commit
57e38ea4fa
13 changed files with 913 additions and 172 deletions
|
|
@ -1,8 +1,11 @@
|
|||
import pytest
|
||||
import numpy as np
|
||||
|
||||
from apt.utils.models import SklearnClassifier, SklearnRegressor, ModelOutputType, KerasClassifier, KerasRegressor, \
|
||||
BlackboxClassifierPredictions, BlackboxClassifierPredictFunction, is_one_hot, get_nb_classes, XGBoostClassifier
|
||||
from apt.utils.models import SklearnClassifier, SklearnRegressor, KerasClassifier, KerasRegressor, \
|
||||
BlackboxClassifierPredictions, BlackboxClassifierPredictFunction, is_one_hot, get_nb_classes, XGBoostClassifier, \
|
||||
CLASSIFIER_SINGLE_OUTPUT_CATEGORICAL, CLASSIFIER_SINGLE_OUTPUT_CLASS_PROBABILITIES, \
|
||||
CLASSIFIER_MULTI_OUTPUT_CATEGORICAL, CLASSIFIER_MULTI_OUTPUT_BINARY_PROBABILITIES, \
|
||||
CLASSIFIER_MULTI_OUTPUT_CLASS_PROBABILITIES
|
||||
from apt.utils.datasets import ArrayDataset, Data, DatasetWithPredictions
|
||||
from apt.utils import dataset_utils
|
||||
|
||||
|
|
@ -24,7 +27,7 @@ tf.compat.v1.disable_eager_execution()
|
|||
def test_sklearn_classifier():
|
||||
(x_train, y_train), (x_test, y_test) = dataset_utils.get_iris_dataset_np()
|
||||
underlying_model = RandomForestClassifier()
|
||||
model = SklearnClassifier(underlying_model, ModelOutputType.CLASSIFIER_PROBABILITIES)
|
||||
model = SklearnClassifier(underlying_model, CLASSIFIER_SINGLE_OUTPUT_CATEGORICAL)
|
||||
train = ArrayDataset(x_train, y_train)
|
||||
test = ArrayDataset(x_test, y_test)
|
||||
model.fit(train)
|
||||
|
|
@ -35,6 +38,28 @@ def test_sklearn_classifier():
|
|||
assert (0.0 <= score <= 1.0)
|
||||
|
||||
|
||||
# This test currently cannot pass due to ART dependency, so sklearn support will need to wait until ART is updated
|
||||
# def test_sklearn_classifier_predictions_multi_label_binary():
|
||||
# (x_train, y_train), (x_test, y_test) = dataset_utils.get_iris_dataset_np()
|
||||
#
|
||||
# # make multi-label binary
|
||||
# y_train = np.column_stack((y_train, y_train, y_train))
|
||||
# y_train[y_train > 1] = 1
|
||||
# y_test = np.column_stack((y_test, y_test, y_test))
|
||||
# y_test[y_test > 1] = 1
|
||||
#
|
||||
# test = ArrayDataset(x_test, y_test)
|
||||
#
|
||||
# underlying_model = RandomForestClassifier()
|
||||
# underlying_model.fit(x_train, y_train)
|
||||
# model = SklearnClassifier(underlying_model, ModelOutputType.CLASSIFIER_MULTI_OUTPUT_BINARY_PROBABILITIES)
|
||||
# pred = model.predict(test)
|
||||
# assert (pred[0].shape[0] == x_test.shape[0])
|
||||
#
|
||||
# score = model.score(test)
|
||||
# assert (score == 1.0)
|
||||
|
||||
|
||||
def test_sklearn_regressor():
|
||||
(x_train, y_train), (x_test, y_test) = dataset_utils.get_diabetes_dataset_np()
|
||||
underlying_model = DecisionTreeRegressor()
|
||||
|
|
@ -59,7 +84,7 @@ def test_keras_classifier():
|
|||
|
||||
underlying_model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"])
|
||||
|
||||
model = KerasClassifier(underlying_model, ModelOutputType.CLASSIFIER_PROBABILITIES)
|
||||
model = KerasClassifier(underlying_model, CLASSIFIER_SINGLE_OUTPUT_CATEGORICAL)
|
||||
|
||||
train = ArrayDataset(x_train, y_train)
|
||||
test = ArrayDataset(x_test, y_test)
|
||||
|
|
@ -97,7 +122,8 @@ def test_xgboost_classifier():
|
|||
(x_train, y_train), (x_test, y_test) = dataset_utils.get_iris_dataset_np()
|
||||
underlying_model = XGBClassifier()
|
||||
underlying_model.fit(x_train, y_train)
|
||||
model = XGBoostClassifier(underlying_model, ModelOutputType.CLASSIFIER_PROBABILITIES, input_shape=(4,), nb_classes=3)
|
||||
model = XGBoostClassifier(underlying_model, CLASSIFIER_SINGLE_OUTPUT_CATEGORICAL,
|
||||
input_shape=(4,), nb_classes=3)
|
||||
train = ArrayDataset(x_train, y_train)
|
||||
test = ArrayDataset(x_test, y_test)
|
||||
pred = model.predict(test)
|
||||
|
|
@ -115,7 +141,7 @@ def test_blackbox_classifier():
|
|||
train = ArrayDataset(x_train, y_train)
|
||||
test = ArrayDataset(x_test, y_test)
|
||||
data = Data(train, test)
|
||||
model = BlackboxClassifierPredictions(data, ModelOutputType.CLASSIFIER_SCALAR)
|
||||
model = BlackboxClassifierPredictions(data, CLASSIFIER_SINGLE_OUTPUT_CATEGORICAL)
|
||||
pred = model.predict(test)
|
||||
assert (pred.shape[0] == x_test.shape[0])
|
||||
|
||||
|
|
@ -131,7 +157,7 @@ def test_blackbox_classifier_predictions():
|
|||
train = DatasetWithPredictions(y_train, x_train)
|
||||
test = DatasetWithPredictions(y_test, x_test)
|
||||
data = Data(train, test)
|
||||
model = BlackboxClassifierPredictions(data, ModelOutputType.CLASSIFIER_SCALAR)
|
||||
model = BlackboxClassifierPredictions(data, CLASSIFIER_SINGLE_OUTPUT_CATEGORICAL)
|
||||
pred = model.predict(test)
|
||||
assert (pred.shape[0] == x_test.shape[0])
|
||||
assert model.model_type is None
|
||||
|
|
@ -146,7 +172,7 @@ def test_blackbox_classifier_predictions_y():
|
|||
train = DatasetWithPredictions(y_train, x_train, y_train)
|
||||
test = DatasetWithPredictions(y_test, x_test, y_test)
|
||||
data = Data(train, test)
|
||||
model = BlackboxClassifierPredictions(data, ModelOutputType.CLASSIFIER_SCALAR)
|
||||
model = BlackboxClassifierPredictions(data, CLASSIFIER_SINGLE_OUTPUT_CATEGORICAL)
|
||||
pred = model.predict(test)
|
||||
assert (pred.shape[0] == x_test.shape[0])
|
||||
|
||||
|
|
@ -156,14 +182,62 @@ def test_blackbox_classifier_predictions_y():
|
|||
assert model.model_type is None
|
||||
|
||||
|
||||
def test_blackbox_classifier_mismatch():
|
||||
def test_blackbox_classifier_predictions_multi_label_cat():
|
||||
(x_train, y_train), (x_test, y_test) = dataset_utils.get_iris_dataset_np()
|
||||
|
||||
train = ArrayDataset(x_train, y_train)
|
||||
test = ArrayDataset(x_test, y_test)
|
||||
# make multi-label categorical
|
||||
y_train = np.column_stack((y_train, y_train, y_train))
|
||||
y_test = np.column_stack((y_test, y_test, y_test))
|
||||
|
||||
train = DatasetWithPredictions(y_train, x_train, y_train)
|
||||
test = DatasetWithPredictions(y_test, x_test, y_test)
|
||||
data = Data(train, test)
|
||||
with pytest.raises(ValueError):
|
||||
BlackboxClassifierPredictions(data, ModelOutputType.CLASSIFIER_PROBABILITIES)
|
||||
model = BlackboxClassifierPredictions(data, CLASSIFIER_MULTI_OUTPUT_CATEGORICAL)
|
||||
pred = model.predict(test)
|
||||
assert (pred.shape[0] == x_test.shape[0])
|
||||
|
||||
score = model.score(test)
|
||||
assert (score == 1.0)
|
||||
|
||||
assert model.model_type is None
|
||||
|
||||
|
||||
def test_blackbox_classifier_predictions_multi_label_binary():
|
||||
(x_train, y_train), (x_test, y_test) = dataset_utils.get_iris_dataset_np()
|
||||
|
||||
# make multi-label binary
|
||||
y_train = np.column_stack((y_train, y_train, y_train))
|
||||
y_train[y_train > 1] = 1
|
||||
pred_train = y_train.copy().astype(float)
|
||||
pred_train[pred_train == 0] = 0.2
|
||||
pred_train[pred_train == 1] = 0.6
|
||||
y_test = np.column_stack((y_test, y_test, y_test))
|
||||
y_test[y_test > 1] = 1
|
||||
pred_test = y_test.copy().astype(float)
|
||||
pred_test[pred_test == 0] = 0.2
|
||||
pred_test[pred_test == 1] = 0.6
|
||||
|
||||
train = DatasetWithPredictions(pred_train, x_train, y_train)
|
||||
test = DatasetWithPredictions(pred_test, x_test, y_test)
|
||||
data = Data(train, test)
|
||||
model = BlackboxClassifierPredictions(data, CLASSIFIER_MULTI_OUTPUT_BINARY_PROBABILITIES)
|
||||
pred = model.predict(test)
|
||||
assert (pred.shape[0] == x_test.shape[0])
|
||||
|
||||
score = model.score(test)
|
||||
assert (score == 1.0)
|
||||
|
||||
assert model.model_type is None
|
||||
|
||||
|
||||
# def test_blackbox_classifier_mismatch():
|
||||
# (x_train, y_train), (x_test, y_test) = dataset_utils.get_iris_dataset_np()
|
||||
#
|
||||
# train = ArrayDataset(x_train, y_train)
|
||||
# test = ArrayDataset(x_test, y_test)
|
||||
# data = Data(train, test)
|
||||
# with pytest.raises(ValueError):
|
||||
# BlackboxClassifierPredictions(data, ModelOutputType.CLASSIFIER_SINGLE_OUTPUT_CLASS_PROBABILITIES)
|
||||
|
||||
|
||||
def test_blackbox_classifier_no_test():
|
||||
|
|
@ -172,7 +246,7 @@ def test_blackbox_classifier_no_test():
|
|||
train = ArrayDataset(x_train, y_train)
|
||||
|
||||
data = Data(train)
|
||||
model = BlackboxClassifierPredictions(data, ModelOutputType.CLASSIFIER_SCALAR)
|
||||
model = BlackboxClassifierPredictions(data, CLASSIFIER_SINGLE_OUTPUT_CATEGORICAL)
|
||||
pred = model.predict(train)
|
||||
assert (pred.shape[0] == x_train.shape[0])
|
||||
|
||||
|
|
@ -189,7 +263,7 @@ def test_blackbox_classifier_no_train():
|
|||
|
||||
test = ArrayDataset(x_test, y_test)
|
||||
data = Data(test=test)
|
||||
model = BlackboxClassifierPredictions(data, ModelOutputType.CLASSIFIER_SCALAR)
|
||||
model = BlackboxClassifierPredictions(data, CLASSIFIER_SINGLE_OUTPUT_CATEGORICAL)
|
||||
pred = model.predict(test)
|
||||
assert (pred.shape[0] == x_test.shape[0])
|
||||
|
||||
|
|
@ -207,7 +281,7 @@ def test_blackbox_classifier_no_test_y():
|
|||
train = ArrayDataset(x_train, y_train)
|
||||
test = ArrayDataset(x_test)
|
||||
data = Data(train, test)
|
||||
model = BlackboxClassifierPredictions(data, ModelOutputType.CLASSIFIER_SCALAR)
|
||||
model = BlackboxClassifierPredictions(data, CLASSIFIER_SINGLE_OUTPUT_CATEGORICAL)
|
||||
pred = model.predict(train)
|
||||
assert (pred.shape[0] == x_train.shape[0])
|
||||
|
||||
|
|
@ -230,7 +304,7 @@ def test_blackbox_classifier_no_train_y():
|
|||
train = ArrayDataset(x_train)
|
||||
test = ArrayDataset(x_test, y_test)
|
||||
data = Data(train, test)
|
||||
model = BlackboxClassifierPredictions(data, ModelOutputType.CLASSIFIER_SCALAR)
|
||||
model = BlackboxClassifierPredictions(data, CLASSIFIER_SINGLE_OUTPUT_CATEGORICAL)
|
||||
pred = model.predict(test)
|
||||
assert (pred.shape[0] == x_test.shape[0])
|
||||
|
||||
|
|
@ -254,7 +328,7 @@ def test_blackbox_classifier_probabilities():
|
|||
train = ArrayDataset(x_train, y_train)
|
||||
|
||||
data = Data(train)
|
||||
model = BlackboxClassifierPredictions(data, ModelOutputType.CLASSIFIER_PROBABILITIES)
|
||||
model = BlackboxClassifierPredictions(data, CLASSIFIER_SINGLE_OUTPUT_CLASS_PROBABILITIES)
|
||||
pred = model.predict(train)
|
||||
assert (pred.shape[0] == x_train.shape[0])
|
||||
assert (0.0 < pred).all()
|
||||
|
|
@ -264,6 +338,23 @@ def test_blackbox_classifier_probabilities():
|
|||
assert (score == 1.0)
|
||||
|
||||
|
||||
def test_blackbox_classifier_multi_label_probabilities():
|
||||
(x_train, _), (_, _) = dataset_utils.get_iris_dataset_np()
|
||||
y_train = np.array([[0.23, 0.56, 0.21] for i in range(105)])
|
||||
|
||||
# make multi-label categorical
|
||||
y_train = np.column_stack((y_train, y_train, y_train))
|
||||
|
||||
train = ArrayDataset(x_train, y_train)
|
||||
|
||||
data = Data(train)
|
||||
model = BlackboxClassifierPredictions(data, CLASSIFIER_MULTI_OUTPUT_CLASS_PROBABILITIES)
|
||||
pred = model.predict(train)
|
||||
assert (pred.shape[0] == x_train.shape[0])
|
||||
assert (0.0 < pred).all()
|
||||
assert (pred < 1.0).all()
|
||||
|
||||
|
||||
def test_blackbox_classifier_predict():
|
||||
def predict(x):
|
||||
return np.array([[0.23, 0.56, 0.21] for i in range(x.shape[0])])
|
||||
|
|
@ -273,7 +364,8 @@ def test_blackbox_classifier_predict():
|
|||
|
||||
train = ArrayDataset(x_train, y_train)
|
||||
|
||||
model = BlackboxClassifierPredictFunction(predict, ModelOutputType.CLASSIFIER_PROBABILITIES, (4,), 3)
|
||||
model = BlackboxClassifierPredictFunction(predict, CLASSIFIER_SINGLE_OUTPUT_CLASS_PROBABILITIES,
|
||||
(4,), 3)
|
||||
pred = model.predict(train)
|
||||
assert (pred.shape[0] == x_train.shape[0])
|
||||
assert (0.0 < pred).all()
|
||||
|
|
@ -292,7 +384,8 @@ def test_blackbox_classifier_predict_scalar():
|
|||
|
||||
train = ArrayDataset(x_train, y_train)
|
||||
|
||||
model = BlackboxClassifierPredictFunction(predict, ModelOutputType.CLASSIFIER_SCALAR, (4,), 3)
|
||||
model = BlackboxClassifierPredictFunction(predict, CLASSIFIER_SINGLE_OUTPUT_CLASS_PROBABILITIES,
|
||||
(4,), 3)
|
||||
pred = model.predict(train)
|
||||
assert (pred.shape[0] == x_train.shape[0])
|
||||
|
||||
|
|
@ -310,23 +403,23 @@ def test_is_one_hot():
|
|||
|
||||
def test_get_nb_classes():
|
||||
(_, y_train), (_, y_test) = dataset_utils.get_iris_dataset_np()
|
||||
|
||||
output_type = CLASSIFIER_SINGLE_OUTPUT_CATEGORICAL
|
||||
# shape: (x,) - not 1-hot
|
||||
nb_classes_test = get_nb_classes(y_test)
|
||||
nb_classes_train = get_nb_classes(y_train)
|
||||
nb_classes_test = get_nb_classes(y_test, output_type)
|
||||
nb_classes_train = get_nb_classes(y_train, output_type)
|
||||
assert (nb_classes_test == nb_classes_train)
|
||||
assert (nb_classes_test == 3)
|
||||
|
||||
# shape: (x,1) - not 1-hot
|
||||
nb_classes_test = get_nb_classes(y_test.reshape(-1, 1))
|
||||
nb_classes_test = get_nb_classes(y_test.reshape(-1, 1), output_type)
|
||||
assert (nb_classes_test == 3)
|
||||
|
||||
# shape: (x,3) - 1-hot
|
||||
y = to_categorical(y_test)
|
||||
nb_classes = get_nb_classes(y)
|
||||
nb_classes = get_nb_classes(y, output_type)
|
||||
assert (nb_classes == 3)
|
||||
|
||||
# gaps: 1,2,4 (0,3 missing)
|
||||
y_test[y_test == 0] = 4
|
||||
nb_classes = get_nb_classes(y_test)
|
||||
nb_classes = get_nb_classes(y_test, output_type)
|
||||
assert (nb_classes == 5)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue