mirror of
https://github.com/IBM/ai-privacy-toolkit.git
synced 2026-05-09 12:02:36 +02:00
* support DecisionTreeRegressor * support regression models * Update membership_inference_dp_diabetes_reg.ipynb
228 lines
No EOL
7.4 KiB
Text
228 lines
No EOL
7.4 KiB
Text
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"# Using ML anonymization to defend against membership inference attacks"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"In this tutorial we will show how to anonymize models using the ML anonymization module. \n",
|
|
"\n",
|
|
"We will demonstrate running inference attacks both on a vanilla model, and then on an anonymized version of the model. We will run a black-box membership inference attack using ART's inference module (https://github.com/Trusted-AI/adversarial-robustness-toolbox/tree/main/art/attacks/inference). \n",
|
|
"\n",
|
|
"This will be demonstarted using the Adult dataset (original dataset can be found here: https://archive.ics.uci.edu/ml/datasets/nursery). \n",
|
|
"\n",
|
|
"For simplicity, we used only the numerical features in the dataset."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Load data"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 121,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"import numpy as np\n",
|
|
"from sklearn.datasets import load_diabetes\n",
|
|
"from sklearn.model_selection import train_test_split\n",
|
|
"\n",
|
|
"dataset = load_diabetes()\n",
|
|
"X_train, X_test, y_train, y_test = train_test_split(dataset.data, dataset.target, test_size=0.5, random_state=14)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Train linear regression model"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 122,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"Base model accuracy (R2 score): 0.5080618258593721\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"from sklearn.linear_model import LinearRegression\n",
|
|
"from art.estimators.regression.scikitlearn import ScikitlearnRegressor\n",
|
|
"\n",
|
|
"model = LinearRegression()\n",
|
|
"model.fit(X_train, y_train)\n",
|
|
"\n",
|
|
"art_classifier = ScikitlearnRegressor(model)\n",
|
|
"\n",
|
|
"print('Base model accuracy (R2 score): ', model.score(X_test, y_test))\n",
|
|
"\n",
|
|
"x_train_predictions = art_classifier.predict(X_train)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Attack\n",
|
|
"The black-box attack basically trains an additional classifier (called the attack model) to predict the membership status of a sample.\n",
|
|
"#### Train attack model"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 123,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"0.4954954954954955\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"from art.attacks.inference.membership_inference import MembershipInferenceBlackBox\n",
|
|
"\n",
|
|
"# attack_model_type can be nn (neural network), rf (random forest) or gb (gradient boosting)\n",
|
|
"bb_attack = MembershipInferenceBlackBox(art_classifier, attack_model_type='nn', input_type='loss')\n",
|
|
"\n",
|
|
"# use half of each dataset for training the attack\n",
|
|
"attack_train_ratio = 0.5\n",
|
|
"attack_train_size = int(len(X_train) * attack_train_ratio)\n",
|
|
"attack_test_size = int(len(X_test) * attack_train_ratio)\n",
|
|
"\n",
|
|
"# train attack model\n",
|
|
"bb_attack.fit(X_train[:attack_train_size], y_train[:attack_train_size],\n",
|
|
" X_test[:attack_test_size], y_test[:attack_test_size])\n",
|
|
"\n",
|
|
"# get inferred values for remaining half\n",
|
|
"inferred_train_bb = bb_attack.infer(X_train[attack_train_size:], y_train[attack_train_size:])\n",
|
|
"inferred_test_bb = bb_attack.infer(X_test[attack_test_size:], y_test[attack_test_size:])\n",
|
|
"# check accuracy\n",
|
|
"train_acc = np.sum(inferred_train_bb) / len(inferred_train_bb)\n",
|
|
"test_acc = 1 - (np.sum(inferred_test_bb) / len(inferred_test_bb))\n",
|
|
"acc = (train_acc * len(inferred_train_bb) + test_acc * len(inferred_test_bb)) / (len(inferred_train_bb) + len(inferred_test_bb))\n",
|
|
"print(acc)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"This means that for 52% of the data, membership is inferred correctly using this attack.\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 124,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"unique rows in original data: 221\n"
|
|
]
|
|
},
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"k values: [5, 10, 20, 50, 75]\n",
|
|
"unique rows: [34, 19, 8, 4, 2]\n",
|
|
"model accuracy: [0.43165832354998956, 0.4509641063206041, -1.730181929385853, -5.577098823982753e+27, -1.2751609045828272e+25]\n",
|
|
"attack accuracy: [0.5, 0.47297297297297297, 0.49549549549549543, 0.5, 0.47297297297297297]\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"from apt.anonymization import Anonymize\n",
|
|
"k_values=[5, 10, 20, 50, 75]\n",
|
|
"model_accuracy = []\n",
|
|
"attack_accuracy = []\n",
|
|
"unique_values = []\n",
|
|
"\n",
|
|
"# QI = all\n",
|
|
"QI = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]\n",
|
|
"print('unique rows in original data: ', len(np.unique(X_train, axis=0)))\n",
|
|
"\n",
|
|
"for k in k_values:\n",
|
|
" anonymizer = Anonymize(k, QI, is_regression=True)\n",
|
|
" anon = anonymizer.anonymize(X_train, x_train_predictions)\n",
|
|
" unique_values.append(len(np.unique(anon, axis=0)))\n",
|
|
" \n",
|
|
" anon_model = LinearRegression()\n",
|
|
" anon_model.fit(anon, y_train)\n",
|
|
"\n",
|
|
" anon_art_classifier = ScikitlearnRegressor(anon_model)\n",
|
|
"\n",
|
|
" model_accuracy.append(anon_model.score(X_test, y_test))\n",
|
|
" \n",
|
|
" anon_bb_attack = MembershipInferenceBlackBox(anon_art_classifier, attack_model_type='rf', input_type='loss')\n",
|
|
"\n",
|
|
" # train attack model\n",
|
|
" anon_bb_attack.fit(X_train[:attack_train_size], y_train[:attack_train_size],\n",
|
|
" X_test[:attack_test_size], y_test[:attack_test_size])\n",
|
|
"\n",
|
|
" # get inferred values\n",
|
|
" anon_inferred_train_bb = anon_bb_attack.infer(X_train[attack_train_size:], y_train[attack_train_size:])\n",
|
|
" anon_inferred_test_bb = anon_bb_attack.infer(X_test[attack_test_size:], y_test[attack_test_size:])\n",
|
|
" # check accuracy\n",
|
|
" anon_train_acc = np.sum(anon_inferred_train_bb) / len(anon_inferred_train_bb)\n",
|
|
" anon_test_acc = 1 - (np.sum(anon_inferred_test_bb) / len(anon_inferred_test_bb))\n",
|
|
" anon_acc = (anon_train_acc * len(anon_inferred_train_bb) + anon_test_acc * len(anon_inferred_test_bb)) / (len(anon_inferred_train_bb) + len(anon_inferred_test_bb))\n",
|
|
" attack_accuracy.append(anon_acc)\n",
|
|
" \n",
|
|
"print('k values: ', k_values)\n",
|
|
"print('unique rows:', unique_values)\n",
|
|
"print('model accuracy:', model_accuracy)\n",
|
|
"print('attack accuracy:', attack_accuracy)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 124,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": []
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "Python 3",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.8.3"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 2
|
|
} |