mirror of
https://github.com/IBM/ai-privacy-toolkit.git
synced 2026-06-08 15:05:13 +02:00
Second test (pandas)
Signed-off-by: abigailt <abigailt@il.ibm.com>
This commit is contained in:
parent
2a657388af
commit
e7ee42fdc8
1 changed files with 21 additions and 13 deletions
|
|
@ -962,7 +962,7 @@ def test_minimizer_ndarray_one_hot():
|
||||||
|
|
||||||
|
|
||||||
def test_anonymize_pandas_one_hot():
|
def test_anonymize_pandas_one_hot():
|
||||||
feature_names = ["age", "gender_M", "gender_F", "height"]
|
features = ["age", "gender_M", "gender_F", "height"]
|
||||||
x_train = np.array([[23, 0, 1, 165],
|
x_train = np.array([[23, 0, 1, 165],
|
||||||
[45, 0, 1, 158],
|
[45, 0, 1, 158],
|
||||||
[56, 1, 0, 123],
|
[56, 1, 0, 123],
|
||||||
|
|
@ -975,25 +975,33 @@ def test_anonymize_pandas_one_hot():
|
||||||
[24, 1, 0, 181],
|
[24, 1, 0, 181],
|
||||||
[18, 1, 0, 190]])
|
[18, 1, 0, 190]])
|
||||||
y_train = np.array([1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0])
|
y_train = np.array([1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0])
|
||||||
x_train = pd.DataFrame(x_train, columns=feature_names)
|
x_train = pd.DataFrame(x_train, columns=features)
|
||||||
y_train = pd.Series(y_train)
|
y_train = pd.Series(y_train)
|
||||||
|
|
||||||
model = DecisionTreeClassifier()
|
model = DecisionTreeClassifier()
|
||||||
model.fit(x_train, y_train)
|
model.fit(x_train, y_train)
|
||||||
pred = model.predict(x_train)
|
predictions = model.predict(x_train)
|
||||||
|
|
||||||
k = 10
|
|
||||||
QI = ["age", "gender_M", "gender_F"]
|
QI = ["age", "gender_M", "gender_F"]
|
||||||
QI_slices = [["gender_M", "gender_F"]]
|
QI_slices = [["gender_M", "gender_F"]]
|
||||||
anonymizer = Anonymize(k, QI, train_only_QI=True, quasi_identifer_slices=QI_slices)
|
target_accuracy = 0.7
|
||||||
anon = anonymizer.anonymize(ArrayDataset(x_train, pred))
|
gen = GeneralizeToRepresentative(model, target_accuracy=target_accuracy, feature_slices=QI_slices,
|
||||||
assert (anon.loc[:, QI].drop_duplicates().shape[0] < x_train.loc[:, QI].drop_duplicates().shape[0])
|
features_to_minimize=QI)
|
||||||
assert (anon.loc[:, QI].value_counts().min() >= k)
|
gen.fit(dataset=ArrayDataset(x_train, predictions))
|
||||||
np.testing.assert_array_equal(anon.drop(QI, axis=1), x_train.drop(QI, axis=1))
|
transformed = gen.transform(dataset=ArrayDataset(x_train))
|
||||||
anonymized_slice = anon.loc[:, QI_slices[0]]
|
gener = gen.generalizations
|
||||||
assert ((np.sum(anonymized_slice, axis=1) == 1).all())
|
expected_generalizations = {'categories': {}, 'category_representatives': {},
|
||||||
assert ((np.max(anonymized_slice, axis=1) == 1).all())
|
'range_representatives': {'age': [34.5]},
|
||||||
assert ((np.min(anonymized_slice, axis=1) == 0).all())
|
'ranges': {'age': [34.5]}, 'untouched': ['height', 'gender_M', 'gender_F']}
|
||||||
|
|
||||||
|
compare_generalizations(gener, expected_generalizations)
|
||||||
|
|
||||||
|
check_features(features, expected_generalizations, transformed, x_train, True)
|
||||||
|
ncp = gen.ncp.transform_score
|
||||||
|
check_ncp(ncp, expected_generalizations)
|
||||||
|
|
||||||
|
rel_accuracy = model.score(transformed, predictions)
|
||||||
|
assert ((rel_accuracy >= target_accuracy) or (target_accuracy - rel_accuracy) <= ACCURACY_DIFF)
|
||||||
|
|
||||||
|
|
||||||
def test_keras_model():
|
def test_keras_model():
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue