mirror of
https://github.com/IBM/ai-privacy-toolkit.git
synced 2026-05-10 20:42:38 +02:00
Support for one-hot encoded features in minimization (#87)
* Initial version with first working test * Make sure representative values in generalizations for 1-hot encoded features are consistent. * Updated notebooks for one-hot encoded data * Review comments Signed-off-by: abigailt <abigailt@il.ibm.com>
This commit is contained in:
parent
5dce961092
commit
6d81cd8ed4
4 changed files with 26703 additions and 48 deletions
|
|
@ -11,7 +11,7 @@
|
|||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"In this tutorial we will show how to anonymize models using the ML anonymization module, specifically when the inout data is already one-hot encoded. \n",
|
||||
"In this tutorial we will show how to anonymize models using the ML anonymization module, specifically when the input data is already one-hot encoded. \n",
|
||||
"\n",
|
||||
"This will be demonstarted using the Adult dataset (original dataset can be found here: https://archive.ics.uci.edu/ml/datasets/adult). "
|
||||
]
|
||||
|
|
@ -25,7 +25,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 21,
|
||||
"execution_count": 29,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
|
|
@ -81,7 +81,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 22,
|
||||
"execution_count": 30,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
|
|
@ -123,14 +123,14 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 23,
|
||||
"execution_count": 31,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Base model accuracy: 0.814446287083103\n"
|
||||
"Base model accuracy: 0.8143234445058657\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
|
@ -168,7 +168,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 25,
|
||||
"execution_count": 32,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
|
|
@ -194,14 +194,14 @@
|
|||
"# QI = (race, sex)\n",
|
||||
"QI = [53, 52, 51, 50, 49, 48, 47]\n",
|
||||
"QI_slices = [[47, 48, 49, 50, 51], [52, 53]]\n",
|
||||
"anonymizer = Anonymize(100, QI)\n",
|
||||
"anonymizer = Anonymize(100, QI, quasi_identifer_slices=QI_slices)\n",
|
||||
"anon = anonymizer.anonymize(ArrayDataset(x_train, x_train_predictions))\n",
|
||||
"print(anon)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 26,
|
||||
"execution_count": 33,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
|
|
@ -210,7 +210,7 @@
|
|||
"2711"
|
||||
]
|
||||
},
|
||||
"execution_count": 26,
|
||||
"execution_count": 33,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
|
|
@ -222,7 +222,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 27,
|
||||
"execution_count": 34,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
|
|
@ -231,7 +231,7 @@
|
|||
"2476"
|
||||
]
|
||||
},
|
||||
"execution_count": 27,
|
||||
"execution_count": 34,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
|
|
@ -250,14 +250,14 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 28,
|
||||
"execution_count": 35,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Anonymized model accuracy: 0.8135863890424421\n"
|
||||
"Anonymized model accuracy: 0.8124808058473066\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
|
|
|||
26325
notebooks/minimization_one_hot_adult.ipynb
Normal file
26325
notebooks/minimization_one_hot_adult.ipynb
Normal file
File diff suppressed because it is too large
Load diff
Loading…
Add table
Add a link
Reference in a new issue