mirror of
https://github.com/IBM/ai-privacy-toolkit.git
synced 2026-06-08 15:05:13 +02:00
Fix anonymization adult notebook + new notebook to demonstrate anonymization on 1-hot encoded data
Signed-off-by: abigailt <abigailt@il.ibm.com>
This commit is contained in:
parent
88f4258493
commit
570c6f8966
2 changed files with 393 additions and 106 deletions
303
notebooks/anonymization_one_hot_adult.ipynb
Normal file
303
notebooks/anonymization_one_hot_adult.ipynb
Normal file
|
|
@ -0,0 +1,303 @@
|
|||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Using ML anonymization on one-hot encoded data"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"In this tutorial we will show how to anonymize models using the ML anonymization module, specifically when the inout data is already one-hot encoded. \n",
|
||||
"\n",
|
||||
"This will be demonstarted using the Adult dataset (original dataset can be found here: https://archive.ics.uci.edu/ml/datasets/adult). "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Load data"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 21,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[['State-gov' 'Never-married' 'Adm-clerical' ... 'White' 'Male'\n",
|
||||
" 'UnitedStates']\n",
|
||||
" ['Self-emp-not-inc' 'Married-civ-spouse' 'Exec-managerial' ... 'White'\n",
|
||||
" 'Male' 'UnitedStates']\n",
|
||||
" ['Private' 'Divorced' 'Handlers-cleaners' ... 'White' 'Male'\n",
|
||||
" 'UnitedStates']\n",
|
||||
" ...\n",
|
||||
" ['Private' 'Never-married' 'Sales' ... 'White' 'Female' 'UnitedStates']\n",
|
||||
" ['Private' 'Never-married' 'Craft-repair' ... 'White' 'Male'\n",
|
||||
" 'UnitedStates']\n",
|
||||
" ['Private' 'Never-married' 'Handlers-cleaners' ... 'White' 'Male'\n",
|
||||
" 'UnitedStates']]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import numpy as np\n",
|
||||
"\n",
|
||||
"import os\n",
|
||||
"import sys\n",
|
||||
"sys.path.insert(0, os.path.abspath('..'))\n",
|
||||
"from apt.utils.dataset_utils import get_adult_dataset_pd\n",
|
||||
"\n",
|
||||
"# 'workclass', 'marital-status', 'occupation', 'relationship', 'race', 'sex', 'native-country'\n",
|
||||
"categorical_features = [1, 3, 4, 5, 6, 7, 11]\n",
|
||||
"\n",
|
||||
"# requires a folder called 'datasets' in the current directory\n",
|
||||
"(x_train, y_train), (x_test, y_test) = get_adult_dataset_pd()\n",
|
||||
"x_train = x_train.to_numpy()[:, [1, 3, 4, 5, 6, 7, 11]]\n",
|
||||
"y_train = y_train.to_numpy().astype(int)\n",
|
||||
"x_test = x_test.to_numpy()[:, [1, 3, 4, 5, 6, 7, 11]]\n",
|
||||
"y_test = y_test.to_numpy().astype(int)\n",
|
||||
"\n",
|
||||
"# get balanced dataset\n",
|
||||
"x_train = x_train[:x_test.shape[0]]\n",
|
||||
"y_train = y_train[:y_test.shape[0]]\n",
|
||||
"\n",
|
||||
"print(x_train)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Encode data"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 22,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[[0 0 0 ... 0 1 0]\n",
|
||||
" [0 0 0 ... 0 1 0]\n",
|
||||
" [0 0 0 ... 0 1 0]\n",
|
||||
" ...\n",
|
||||
" [0 0 0 ... 0 1 0]\n",
|
||||
" [0 0 0 ... 0 1 0]\n",
|
||||
" [0 0 0 ... 0 1 0]]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from sklearn.preprocessing import OneHotEncoder\n",
|
||||
"import scipy\n",
|
||||
"\n",
|
||||
"preprocessor = OneHotEncoder(handle_unknown=\"ignore\")\n",
|
||||
"\n",
|
||||
"x_train = preprocessor.fit_transform(x_train)\n",
|
||||
"x_test = preprocessor.transform(x_test)\n",
|
||||
"if scipy.sparse.issparse(x_train):\n",
|
||||
" x_train = x_train.toarray().astype(int)\n",
|
||||
"if scipy.sparse.issparse(x_test):\n",
|
||||
" x_test = x_test.toarray().astype(int)\n",
|
||||
"\n",
|
||||
"print(x_train)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Train decision tree model"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 23,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Base model accuracy: 0.814446287083103\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"/Users/abigailt/Library/Python/3.9/lib/python/site-packages/sklearn/utils/deprecation.py:103: FutureWarning: The attribute `n_features_` is deprecated in 1.0 and will be removed in 1.2. Use `n_features_in_` instead.\n",
|
||||
" warnings.warn(msg, category=FutureWarning)\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from sklearn.tree import DecisionTreeClassifier\n",
|
||||
"from art.estimators.classification.scikitlearn import ScikitlearnDecisionTreeClassifier\n",
|
||||
"\n",
|
||||
"model = DecisionTreeClassifier()\n",
|
||||
"model.fit(x_train, y_train)\n",
|
||||
"\n",
|
||||
"art_classifier = ScikitlearnDecisionTreeClassifier(model)\n",
|
||||
"\n",
|
||||
"print('Base model accuracy: ', model.score(x_test, y_test))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Anonymize data\n",
|
||||
"## k=100\n",
|
||||
"\n",
|
||||
"The data is anonymized on the quasi-identifiers: age, education-num, capital-gain, hours-per-week and with a privact parameter k=100.\n",
|
||||
"\n",
|
||||
"This means that each record in the anonymized dataset is identical to 99 others on the quasi-identifier values (i.e., when looking only at those features, the records are indistinguishable)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 25,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[[0 0 0 ... 0 1 0]\n",
|
||||
" [0 0 0 ... 0 1 0]\n",
|
||||
" [0 0 0 ... 0 1 0]\n",
|
||||
" ...\n",
|
||||
" [0 0 0 ... 0 1 0]\n",
|
||||
" [0 0 0 ... 0 1 0]\n",
|
||||
" [0 0 0 ... 0 1 0]]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from apt.utils.datasets import ArrayDataset\n",
|
||||
"from apt.anonymization import Anonymize\n",
|
||||
"\n",
|
||||
"x_train_predictions = np.array([np.argmax(arr) for arr in art_classifier.predict(x_train)])\n",
|
||||
"\n",
|
||||
"# QI = (race, sex)\n",
|
||||
"QI = [53, 52, 51, 50, 49, 48, 47]\n",
|
||||
"QI_slices = [[47, 48, 49, 50, 51], [52, 53]]\n",
|
||||
"anonymizer = Anonymize(100, QI)\n",
|
||||
"anon = anonymizer.anonymize(ArrayDataset(x_train, x_train_predictions))\n",
|
||||
"print(anon)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 26,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"2711"
|
||||
]
|
||||
},
|
||||
"execution_count": 26,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# number of distinct rows in original data\n",
|
||||
"len(np.unique(x_train, axis=0))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 27,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"2476"
|
||||
]
|
||||
},
|
||||
"execution_count": 27,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# number of distinct rows in anonymized data\n",
|
||||
"len(np.unique(anon, axis=0))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Train decision tree model"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 28,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Anonymized model accuracy: 0.8135863890424421\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"/Users/abigailt/Library/Python/3.9/lib/python/site-packages/sklearn/utils/deprecation.py:103: FutureWarning: The attribute `n_features_` is deprecated in 1.0 and will be removed in 1.2. Use `n_features_in_` instead.\n",
|
||||
" warnings.warn(msg, category=FutureWarning)\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"anon_model = DecisionTreeClassifier()\n",
|
||||
"anon_model.fit(anon, y_train)\n",
|
||||
"\n",
|
||||
"anon_art_classifier = ScikitlearnDecisionTreeClassifier(anon_model)\n",
|
||||
"\n",
|
||||
"print('Anonymized model accuracy: ', anon_model.score(x_test, y_test))"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
|
|
@ -1,7 +1,6 @@
|
|||
{
|
||||
"cells": [
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
|
|
@ -9,7 +8,6 @@
|
|||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
|
|
@ -23,13 +21,72 @@
|
|||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Load data"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"/Users/abigailt/Library/Python/3.9/lib/python/site-packages/tqdm/auto.py:22: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
|
||||
" from .autonotebook import tqdm as notebook_tqdm\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[[ 39 13 2174 0 40]\n",
|
||||
" [ 50 13 0 0 13]\n",
|
||||
" [ 38 9 0 0 40]\n",
|
||||
" ...\n",
|
||||
" [ 27 13 0 0 40]\n",
|
||||
" [ 26 11 0 0 48]\n",
|
||||
" [ 27 9 0 0 40]]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import numpy as np\n",
|
||||
"\n",
|
||||
"import os\n",
|
||||
"import sys\n",
|
||||
"sys.path.insert(0, os.path.abspath('..'))\n",
|
||||
"from apt.utils.dataset_utils import get_adult_dataset_pd\n",
|
||||
"\n",
|
||||
"# requires a folder called 'datasets' in the current directory\n",
|
||||
"(x_train, y_train), (x_test, y_test) = get_adult_dataset_pd()\n",
|
||||
"x_train = x_train.to_numpy()\n",
|
||||
"y_train = y_train.to_numpy().astype(int)\n",
|
||||
"x_test = x_test.to_numpy()\n",
|
||||
"y_test = y_test.to_numpy().astype(int)\n",
|
||||
"\n",
|
||||
"# Use only numeric features (age, education-num, capital-gain, capital-loss, hours-per-week)\n",
|
||||
"x_train = x_train[:, [0, 2, 8, 9, 10]].astype(int)\n",
|
||||
"x_test = x_test[:, [0, 2, 8, 9, 10]].astype(int)\n",
|
||||
"\n",
|
||||
"# get balanced dataset\n",
|
||||
"x_train = x_train[:x_test.shape[0]]\n",
|
||||
"y_train = y_train[:y_test.shape[0]]\n",
|
||||
"\n",
|
||||
"print(x_train)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Train decision tree model"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
|
|
@ -39,76 +96,14 @@
|
|||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[[ 39. 13. 2174. 0. 40.]\n",
|
||||
" [ 50. 13. 0. 0. 13.]\n",
|
||||
" [ 38. 9. 0. 0. 40.]\n",
|
||||
" ...\n",
|
||||
" [ 27. 13. 0. 0. 40.]\n",
|
||||
" [ 26. 11. 0. 0. 48.]\n",
|
||||
" [ 27. 9. 0. 0. 40.]]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import numpy as np\n",
|
||||
"\n",
|
||||
"# Use only numeric features (age, education-num, capital-gain, capital-loss, hours-per-week)\n",
|
||||
"x_train = np.loadtxt(\"https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.data\",\n",
|
||||
" usecols=(0, 4, 10, 11, 12), delimiter=\", \")\n",
|
||||
"\n",
|
||||
"y_train = np.loadtxt(\"https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.data\",\n",
|
||||
" usecols=14, dtype=str, delimiter=\", \")\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"x_test = np.loadtxt(\"https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.test\",\n",
|
||||
" usecols=(0, 4, 10, 11, 12), delimiter=\", \", skiprows=1)\n",
|
||||
"\n",
|
||||
"y_test = np.loadtxt(\"https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.test\",\n",
|
||||
" usecols=14, dtype=str, delimiter=\", \", skiprows=1)\n",
|
||||
"\n",
|
||||
"# Trim trailing period \".\" from label\n",
|
||||
"y_test = np.array([a[:-1] for a in y_test])\n",
|
||||
"\n",
|
||||
"y_train[y_train == '<=50K'] = 0\n",
|
||||
"y_train[y_train == '>50K'] = 1\n",
|
||||
"y_train = y_train.astype(int)\n",
|
||||
"\n",
|
||||
"y_test[y_test == '<=50K'] = 0\n",
|
||||
"y_test[y_test == '>50K'] = 1\n",
|
||||
"y_test = y_test.astype(int)\n",
|
||||
"\n",
|
||||
"# get balanced dataset\n",
|
||||
"x_train = x_train[:x_test.shape[0]]\n",
|
||||
"y_train = y_train[:y_test.shape[0]]\n",
|
||||
"\n",
|
||||
"print(x_train)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Train decision tree model"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Base model accuracy: 0.8076285240464345\n"
|
||||
"Base model accuracy: 0.8087341072415699\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"/home/mayaa/Development/GitHub/aiprivacy/ai-privacy-toolkit/venv1/lib/python3.8/site-packages/sklearn/utils/deprecation.py:103: FutureWarning: The attribute `n_features_` is deprecated in 1.0 and will be removed in 1.2. Use `n_features_in_` instead.\n",
|
||||
"/Users/abigailt/Library/Python/3.9/lib/python/site-packages/sklearn/utils/deprecation.py:103: FutureWarning: The attribute `n_features_` is deprecated in 1.0 and will be removed in 1.2. Use `n_features_in_` instead.\n",
|
||||
" warnings.warn(msg, category=FutureWarning)\n"
|
||||
]
|
||||
}
|
||||
|
|
@ -122,13 +117,10 @@
|
|||
"\n",
|
||||
"art_classifier = ScikitlearnDecisionTreeClassifier(model)\n",
|
||||
"\n",
|
||||
"print('Base model accuracy: ', model.score(x_test, y_test))\n",
|
||||
"\n",
|
||||
"x_train_predictions = np.array([np.argmax(arr) for arr in art_classifier.predict(x_train)]).reshape(-1,1)"
|
||||
"print('Base model accuracy: ', model.score(x_test, y_test))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
|
|
@ -139,7 +131,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
|
|
@ -159,7 +151,6 @@
|
|||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
|
|
@ -168,14 +159,14 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"0.5460017196904557\n"
|
||||
"0.5434836015231544\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
|
|
@ -191,7 +182,6 @@
|
|||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
|
|
@ -199,7 +189,6 @@
|
|||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
|
|
@ -213,30 +202,29 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[[38. 13. 0. 0. 40.]\n",
|
||||
" [46. 13. 0. 0. 35.]\n",
|
||||
" [28. 9. 0. 0. 40.]\n",
|
||||
"[[38 13 0 0 40]\n",
|
||||
" [46 13 0 0 35]\n",
|
||||
" [28 9 0 0 40]\n",
|
||||
" ...\n",
|
||||
" [26. 13. 0. 0. 40.]\n",
|
||||
" [27. 10. 0. 0. 50.]\n",
|
||||
" [28. 9. 0. 0. 40.]]\n"
|
||||
" [26 13 0 0 40]\n",
|
||||
" [27 10 0 0 50]\n",
|
||||
" [28 9 0 0 40]]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"import sys\n",
|
||||
"sys.path.insert(0, os.path.abspath('..'))\n",
|
||||
"from apt.utils.datasets import ArrayDataset\n",
|
||||
"from apt.anonymization import Anonymize\n",
|
||||
"\n",
|
||||
"x_train_predictions = np.array([np.argmax(arr) for arr in art_classifier.predict(x_train)])\n",
|
||||
"\n",
|
||||
"# QI = (age, education-num, capital-gain, hours-per-week)\n",
|
||||
"QI = [0, 1, 2, 4]\n",
|
||||
"anonymizer = Anonymize(100, QI)\n",
|
||||
|
|
@ -246,7 +234,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
|
|
@ -255,7 +243,7 @@
|
|||
"6739"
|
||||
]
|
||||
},
|
||||
"execution_count": 8,
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
|
|
@ -267,7 +255,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
|
|
@ -276,7 +264,7 @@
|
|||
"401"
|
||||
]
|
||||
},
|
||||
"execution_count": 9,
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
|
|
@ -287,7 +275,6 @@
|
|||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
|
|
@ -296,21 +283,21 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Anonymized model accuracy: 0.826914808672686\n"
|
||||
"Anonymized model accuracy: 0.8308457711442786\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"/home/mayaa/Development/GitHub/aiprivacy/ai-privacy-toolkit/venv1/lib/python3.8/site-packages/sklearn/utils/deprecation.py:103: FutureWarning: The attribute `n_features_` is deprecated in 1.0 and will be removed in 1.2. Use `n_features_in_` instead.\n",
|
||||
"/Users/abigailt/Library/Python/3.9/lib/python/site-packages/sklearn/utils/deprecation.py:103: FutureWarning: The attribute `n_features_` is deprecated in 1.0 and will be removed in 1.2. Use `n_features_in_` instead.\n",
|
||||
" warnings.warn(msg, category=FutureWarning)\n"
|
||||
]
|
||||
}
|
||||
|
|
@ -325,7 +312,6 @@
|
|||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
|
|
@ -335,14 +321,14 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"execution_count": 10,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"0.49692912418621793\n"
|
||||
"0.4944724235351923\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
|
|
@ -364,7 +350,6 @@
|
|||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
|
|
@ -380,8 +365,8 @@
|
|||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"(0.5316007088009451, 0.7738607050730868)\n",
|
||||
"(0.4971184877823882, 0.5297874953936863)\n"
|
||||
"without anonymization: (0.5303914835164835, 0.7588748311018303)\n",
|
||||
"with anonymization: (0.49255952380952384, 0.3659255619702739)\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
|
|
@ -411,15 +396,14 @@
|
|||
" return precision, recall\n",
|
||||
"\n",
|
||||
"# regular\n",
|
||||
"print(calc_precision_recall(np.concatenate((inferred_train_bb, inferred_test_bb)), \n",
|
||||
"print('without anonymization:', calc_precision_recall(np.concatenate((inferred_train_bb, inferred_test_bb)), \n",
|
||||
" np.concatenate((np.ones(len(inferred_train_bb)), np.zeros(len(inferred_test_bb))))))\n",
|
||||
"# anon\n",
|
||||
"print(calc_precision_recall(np.concatenate((anon_inferred_train_bb, anon_inferred_test_bb)), \n",
|
||||
"print('with anonymization:', calc_precision_recall(np.concatenate((anon_inferred_train_bb, anon_inferred_test_bb)), \n",
|
||||
" np.concatenate((np.ones(len(anon_inferred_train_bb)), np.zeros(len(anon_inferred_test_bb))))))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
|
|
@ -429,7 +413,7 @@
|
|||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
|
|
@ -443,7 +427,7 @@
|
|||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.8.10"
|
||||
"version": "3.9.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue