mirror of
https://github.com/IBM/ai-privacy-toolkit.git
synced 2026-06-23 15:48:06 +02:00
Data and Model wrappers (#26)
* Squashed commit of wrappers:
Wrapper minimizer
* apply dataset wrapper on minimizer
* apply changes on minimization notebook
* add black_box_access and unlimited_queries params
Dataset wrapper anonymizer
Add features_names to ArrayDataset
and allow providing features names in QI and Cat features not just indexes
update notebooks
categorical features and QI passed by indexes
dataset include feature names and is_pandas param
add pytorch Dataset
Remove redundant code.
Use data wrappers in model wrapper APIs.
add generic dataset components
Create initial version of wrappers for models
* Fix handling of categorical features
This commit is contained in:
parent
d53818644e
commit
2b2dab6bef
17 changed files with 1340 additions and 752 deletions
|
|
@ -29,198 +29,15 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 61,
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<div>\n",
|
||||
"<style scoped>\n",
|
||||
" .dataframe tbody tr th:only-of-type {\n",
|
||||
" vertical-align: middle;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe tbody tr th {\n",
|
||||
" vertical-align: top;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe thead th {\n",
|
||||
" text-align: right;\n",
|
||||
" }\n",
|
||||
"</style>\n",
|
||||
"<table border=\"1\" class=\"dataframe\">\n",
|
||||
" <thead>\n",
|
||||
" <tr style=\"text-align: right;\">\n",
|
||||
" <th></th>\n",
|
||||
" <th>parents</th>\n",
|
||||
" <th>has_nurs</th>\n",
|
||||
" <th>form</th>\n",
|
||||
" <th>children</th>\n",
|
||||
" <th>housing</th>\n",
|
||||
" <th>finance</th>\n",
|
||||
" <th>social</th>\n",
|
||||
" <th>health</th>\n",
|
||||
" </tr>\n",
|
||||
" </thead>\n",
|
||||
" <tbody>\n",
|
||||
" <tr>\n",
|
||||
" <th>8450</th>\n",
|
||||
" <td>pretentious</td>\n",
|
||||
" <td>very_crit</td>\n",
|
||||
" <td>foster</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>less_conv</td>\n",
|
||||
" <td>convenient</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>not_recom</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>12147</th>\n",
|
||||
" <td>great_pret</td>\n",
|
||||
" <td>very_crit</td>\n",
|
||||
" <td>complete</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>critical</td>\n",
|
||||
" <td>inconv</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>recommended</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>2780</th>\n",
|
||||
" <td>usual</td>\n",
|
||||
" <td>critical</td>\n",
|
||||
" <td>complete</td>\n",
|
||||
" <td>4</td>\n",
|
||||
" <td>less_conv</td>\n",
|
||||
" <td>convenient</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>not_recom</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>11924</th>\n",
|
||||
" <td>great_pret</td>\n",
|
||||
" <td>critical</td>\n",
|
||||
" <td>foster</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>critical</td>\n",
|
||||
" <td>convenient</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>not_recom</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>59</th>\n",
|
||||
" <td>usual</td>\n",
|
||||
" <td>proper</td>\n",
|
||||
" <td>complete</td>\n",
|
||||
" <td>2</td>\n",
|
||||
" <td>convenient</td>\n",
|
||||
" <td>convenient</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>not_recom</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>...</th>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>5193</th>\n",
|
||||
" <td>pretentious</td>\n",
|
||||
" <td>less_proper</td>\n",
|
||||
" <td>complete</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>convenient</td>\n",
|
||||
" <td>inconv</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>recommended</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>1375</th>\n",
|
||||
" <td>usual</td>\n",
|
||||
" <td>less_proper</td>\n",
|
||||
" <td>incomplete</td>\n",
|
||||
" <td>2</td>\n",
|
||||
" <td>less_conv</td>\n",
|
||||
" <td>convenient</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>priority</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>10318</th>\n",
|
||||
" <td>great_pret</td>\n",
|
||||
" <td>less_proper</td>\n",
|
||||
" <td>foster</td>\n",
|
||||
" <td>4</td>\n",
|
||||
" <td>convenient</td>\n",
|
||||
" <td>convenient</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>priority</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>6396</th>\n",
|
||||
" <td>pretentious</td>\n",
|
||||
" <td>improper</td>\n",
|
||||
" <td>completed</td>\n",
|
||||
" <td>3</td>\n",
|
||||
" <td>less_conv</td>\n",
|
||||
" <td>convenient</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>recommended</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>485</th>\n",
|
||||
" <td>usual</td>\n",
|
||||
" <td>proper</td>\n",
|
||||
" <td>incomplete</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>critical</td>\n",
|
||||
" <td>inconv</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>not_recom</td>\n",
|
||||
" </tr>\n",
|
||||
" </tbody>\n",
|
||||
"</table>\n",
|
||||
"<p>10366 rows × 8 columns</p>\n",
|
||||
"</div>"
|
||||
],
|
||||
"text/plain": [
|
||||
" parents has_nurs form children housing finance \\\n",
|
||||
"8450 pretentious very_crit foster 1 less_conv convenient \n",
|
||||
"12147 great_pret very_crit complete 1 critical inconv \n",
|
||||
"2780 usual critical complete 4 less_conv convenient \n",
|
||||
"11924 great_pret critical foster 1 critical convenient \n",
|
||||
"59 usual proper complete 2 convenient convenient \n",
|
||||
"... ... ... ... ... ... ... \n",
|
||||
"5193 pretentious less_proper complete 1 convenient inconv \n",
|
||||
"1375 usual less_proper incomplete 2 less_conv convenient \n",
|
||||
"10318 great_pret less_proper foster 4 convenient convenient \n",
|
||||
"6396 pretentious improper completed 3 less_conv convenient \n",
|
||||
"485 usual proper incomplete 1 critical inconv \n",
|
||||
"\n",
|
||||
" social health \n",
|
||||
"8450 1 not_recom \n",
|
||||
"12147 1 recommended \n",
|
||||
"2780 1 not_recom \n",
|
||||
"11924 1 not_recom \n",
|
||||
"59 0 not_recom \n",
|
||||
"... ... ... \n",
|
||||
"5193 0 recommended \n",
|
||||
"1375 1 priority \n",
|
||||
"10318 0 priority \n",
|
||||
"6396 1 recommended \n",
|
||||
"485 1 not_recom \n",
|
||||
"\n",
|
||||
"[10366 rows x 8 columns]"
|
||||
]
|
||||
"text/plain": " parents has_nurs form children housing finance \\\n8450 pretentious very_crit foster 1 less_conv convenient \n12147 great_pret very_crit complete 1 critical inconv \n2780 usual critical complete 4 less_conv convenient \n11924 great_pret critical foster 1 critical convenient \n59 usual proper complete 2 convenient convenient \n... ... ... ... ... ... ... \n5193 pretentious less_proper complete 1 convenient inconv \n1375 usual less_proper incomplete 2 less_conv convenient \n10318 great_pret less_proper foster 4 convenient convenient \n6396 pretentious improper completed 3 less_conv convenient \n485 usual proper incomplete 1 critical inconv \n\n social health \n8450 1 not_recom \n12147 1 recommended \n2780 1 not_recom \n11924 1 not_recom \n59 0 not_recom \n... ... ... \n5193 0 recommended \n1375 1 priority \n10318 0 priority \n6396 1 recommended \n485 1 not_recom \n\n[10366 rows x 8 columns]",
|
||||
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>parents</th>\n <th>has_nurs</th>\n <th>form</th>\n <th>children</th>\n <th>housing</th>\n <th>finance</th>\n <th>social</th>\n <th>health</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>8450</th>\n <td>pretentious</td>\n <td>very_crit</td>\n <td>foster</td>\n <td>1</td>\n <td>less_conv</td>\n <td>convenient</td>\n <td>1</td>\n <td>not_recom</td>\n </tr>\n <tr>\n <th>12147</th>\n <td>great_pret</td>\n <td>very_crit</td>\n <td>complete</td>\n <td>1</td>\n <td>critical</td>\n <td>inconv</td>\n <td>1</td>\n <td>recommended</td>\n </tr>\n <tr>\n <th>2780</th>\n <td>usual</td>\n <td>critical</td>\n <td>complete</td>\n <td>4</td>\n <td>less_conv</td>\n <td>convenient</td>\n <td>1</td>\n <td>not_recom</td>\n </tr>\n <tr>\n <th>11924</th>\n <td>great_pret</td>\n <td>critical</td>\n <td>foster</td>\n <td>1</td>\n <td>critical</td>\n <td>convenient</td>\n <td>1</td>\n <td>not_recom</td>\n </tr>\n <tr>\n <th>59</th>\n <td>usual</td>\n <td>proper</td>\n <td>complete</td>\n <td>2</td>\n <td>convenient</td>\n <td>convenient</td>\n <td>0</td>\n <td>not_recom</td>\n </tr>\n <tr>\n <th>...</th>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n </tr>\n <tr>\n <th>5193</th>\n <td>pretentious</td>\n <td>less_proper</td>\n <td>complete</td>\n <td>1</td>\n <td>convenient</td>\n <td>inconv</td>\n <td>0</td>\n <td>recommended</td>\n </tr>\n <tr>\n <th>1375</th>\n <td>usual</td>\n <td>less_proper</td>\n <td>incomplete</td>\n <td>2</td>\n <td>less_conv</td>\n <td>convenient</td>\n <td>1</td>\n <td>priority</td>\n </tr>\n <tr>\n <th>10318</th>\n <td>great_pret</td>\n <td>less_proper</td>\n <td>foster</td>\n <td>4</td>\n <td>convenient</td>\n <td>convenient</td>\n <td>0</td>\n <td>priority</td>\n </tr>\n <tr>\n <th>6396</th>\n <td>pretentious</td>\n <td>improper</td>\n <td>completed</td>\n <td>3</td>\n <td>less_conv</td>\n <td>convenient</td>\n <td>1</td>\n <td>recommended</td>\n </tr>\n <tr>\n <th>485</th>\n <td>usual</td>\n <td>proper</td>\n <td>incomplete</td>\n <td>1</td>\n <td>critical</td>\n <td>inconv</td>\n <td>1</td>\n <td>not_recom</td>\n </tr>\n </tbody>\n</table>\n<p>10366 rows × 8 columns</p>\n</div>"
|
||||
},
|
||||
"execution_count": 61,
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
|
|
@ -230,7 +47,7 @@
|
|||
"import sys\n",
|
||||
"sys.path.insert(0, os.path.abspath('..'))\n",
|
||||
"\n",
|
||||
"from apt.utils import get_nursery_dataset\n",
|
||||
"from apt.utils.dataset_utils import get_nursery_dataset\n",
|
||||
"\n",
|
||||
"(x_train, y_train), (x_test, y_test) = get_nursery_dataset(transform_social=True)\n",
|
||||
"\n",
|
||||
|
|
@ -246,7 +63,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 62,
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
|
|
@ -263,9 +80,9 @@
|
|||
"from sklearn.preprocessing import OneHotEncoder\n",
|
||||
"\n",
|
||||
"x_train_str = x_train.astype(str)\n",
|
||||
"train_encoded = OneHotEncoder(sparse=False, drop='if_binary').fit_transform(x_train_str)\n",
|
||||
"train_encoded = OneHotEncoder(sparse=False).fit_transform(x_train_str)\n",
|
||||
"x_test_str = x_test.astype(str)\n",
|
||||
"test_encoded = OneHotEncoder(sparse=False, drop='if_binary').fit_transform(x_test_str)\n",
|
||||
"test_encoded = OneHotEncoder(sparse=False).fit_transform(x_test_str)\n",
|
||||
" \n",
|
||||
"model = DecisionTreeClassifier()\n",
|
||||
"model.fit(train_encoded, y_train)\n",
|
||||
|
|
@ -287,7 +104,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 91,
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
|
|
@ -323,14 +140,14 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 96,
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"0.6430638626278217\n"
|
||||
"1.0\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
|
|
@ -361,14 +178,14 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 55,
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"0.6980513216284006\n"
|
||||
"0.5122515917422342\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
|
|
@ -408,224 +225,43 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 97,
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<div>\n",
|
||||
"<style scoped>\n",
|
||||
" .dataframe tbody tr th:only-of-type {\n",
|
||||
" vertical-align: middle;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe tbody tr th {\n",
|
||||
" vertical-align: top;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe thead th {\n",
|
||||
" text-align: right;\n",
|
||||
" }\n",
|
||||
"</style>\n",
|
||||
"<table border=\"1\" class=\"dataframe\">\n",
|
||||
" <thead>\n",
|
||||
" <tr style=\"text-align: right;\">\n",
|
||||
" <th></th>\n",
|
||||
" <th>parents</th>\n",
|
||||
" <th>has_nurs</th>\n",
|
||||
" <th>form</th>\n",
|
||||
" <th>children</th>\n",
|
||||
" <th>housing</th>\n",
|
||||
" <th>finance</th>\n",
|
||||
" <th>social</th>\n",
|
||||
" <th>health</th>\n",
|
||||
" </tr>\n",
|
||||
" </thead>\n",
|
||||
" <tbody>\n",
|
||||
" <tr>\n",
|
||||
" <th>8450</th>\n",
|
||||
" <td>pretentious</td>\n",
|
||||
" <td>very_crit</td>\n",
|
||||
" <td>foster</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>less_conv</td>\n",
|
||||
" <td>convenient</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>not_recom</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>12147</th>\n",
|
||||
" <td>great_pret</td>\n",
|
||||
" <td>very_crit</td>\n",
|
||||
" <td>complete</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>critical</td>\n",
|
||||
" <td>inconv</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>recommended</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>2780</th>\n",
|
||||
" <td>usual</td>\n",
|
||||
" <td>critical</td>\n",
|
||||
" <td>complete</td>\n",
|
||||
" <td>4</td>\n",
|
||||
" <td>less_conv</td>\n",
|
||||
" <td>convenient</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>not_recom</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>11924</th>\n",
|
||||
" <td>great_pret</td>\n",
|
||||
" <td>critical</td>\n",
|
||||
" <td>foster</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>critical</td>\n",
|
||||
" <td>convenient</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>not_recom</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>59</th>\n",
|
||||
" <td>usual</td>\n",
|
||||
" <td>proper</td>\n",
|
||||
" <td>complete</td>\n",
|
||||
" <td>2</td>\n",
|
||||
" <td>convenient</td>\n",
|
||||
" <td>convenient</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>not_recom</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>...</th>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>5193</th>\n",
|
||||
" <td>pretentious</td>\n",
|
||||
" <td>less_proper</td>\n",
|
||||
" <td>complete</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>convenient</td>\n",
|
||||
" <td>inconv</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>recommended</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>1375</th>\n",
|
||||
" <td>usual</td>\n",
|
||||
" <td>less_proper</td>\n",
|
||||
" <td>incomplete</td>\n",
|
||||
" <td>2</td>\n",
|
||||
" <td>less_conv</td>\n",
|
||||
" <td>convenient</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>priority</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>10318</th>\n",
|
||||
" <td>great_pret</td>\n",
|
||||
" <td>less_proper</td>\n",
|
||||
" <td>foster</td>\n",
|
||||
" <td>4</td>\n",
|
||||
" <td>convenient</td>\n",
|
||||
" <td>convenient</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>priority</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>6396</th>\n",
|
||||
" <td>pretentious</td>\n",
|
||||
" <td>improper</td>\n",
|
||||
" <td>completed</td>\n",
|
||||
" <td>3</td>\n",
|
||||
" <td>less_conv</td>\n",
|
||||
" <td>convenient</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>recommended</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>485</th>\n",
|
||||
" <td>usual</td>\n",
|
||||
" <td>proper</td>\n",
|
||||
" <td>incomplete</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>critical</td>\n",
|
||||
" <td>convenient</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>not_recom</td>\n",
|
||||
" </tr>\n",
|
||||
" </tbody>\n",
|
||||
"</table>\n",
|
||||
"<p>10366 rows × 8 columns</p>\n",
|
||||
"</div>"
|
||||
],
|
||||
"text/plain": [
|
||||
" parents has_nurs form children housing finance \\\n",
|
||||
"8450 pretentious very_crit foster 1 less_conv convenient \n",
|
||||
"12147 great_pret very_crit complete 1 critical inconv \n",
|
||||
"2780 usual critical complete 4 less_conv convenient \n",
|
||||
"11924 great_pret critical foster 1 critical convenient \n",
|
||||
"59 usual proper complete 2 convenient convenient \n",
|
||||
"... ... ... ... ... ... ... \n",
|
||||
"5193 pretentious less_proper complete 1 convenient inconv \n",
|
||||
"1375 usual less_proper incomplete 2 less_conv convenient \n",
|
||||
"10318 great_pret less_proper foster 4 convenient convenient \n",
|
||||
"6396 pretentious improper completed 3 less_conv convenient \n",
|
||||
"485 usual proper incomplete 1 critical convenient \n",
|
||||
"\n",
|
||||
" social health \n",
|
||||
"8450 0 not_recom \n",
|
||||
"12147 1 recommended \n",
|
||||
"2780 0 not_recom \n",
|
||||
"11924 0 not_recom \n",
|
||||
"59 0 not_recom \n",
|
||||
"... ... ... \n",
|
||||
"5193 0 recommended \n",
|
||||
"1375 1 priority \n",
|
||||
"10318 0 priority \n",
|
||||
"6396 1 recommended \n",
|
||||
"485 0 not_recom \n",
|
||||
"\n",
|
||||
"[10366 rows x 8 columns]"
|
||||
]
|
||||
"text/plain": " parents has_nurs form children housing finance \\\n0 pretentious very_crit foster 1 less_conv convenient \n1 great_pret very_crit complete 1 critical inconv \n2 usual critical complete 4 less_conv convenient \n3 great_pret critical foster 1 critical convenient \n4 usual proper complete 2 convenient convenient \n... ... ... ... ... ... ... \n10361 pretentious less_proper complete 1 convenient inconv \n10362 usual less_proper incomplete 2 less_conv convenient \n10363 great_pret less_proper foster 4 convenient convenient \n10364 pretentious improper completed 3 less_conv convenient \n10365 usual proper incomplete 1 critical convenient \n\n social health \n0 0 not_recom \n1 1 recommended \n2 0 not_recom \n3 0 not_recom \n4 0 not_recom \n... ... ... \n10361 0 recommended \n10362 1 priority \n10363 0 priority \n10364 1 recommended \n10365 0 not_recom \n\n[10366 rows x 8 columns]",
|
||||
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>parents</th>\n <th>has_nurs</th>\n <th>form</th>\n <th>children</th>\n <th>housing</th>\n <th>finance</th>\n <th>social</th>\n <th>health</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>pretentious</td>\n <td>very_crit</td>\n <td>foster</td>\n <td>1</td>\n <td>less_conv</td>\n <td>convenient</td>\n <td>0</td>\n <td>not_recom</td>\n </tr>\n <tr>\n <th>1</th>\n <td>great_pret</td>\n <td>very_crit</td>\n <td>complete</td>\n <td>1</td>\n <td>critical</td>\n <td>inconv</td>\n <td>1</td>\n <td>recommended</td>\n </tr>\n <tr>\n <th>2</th>\n <td>usual</td>\n <td>critical</td>\n <td>complete</td>\n <td>4</td>\n <td>less_conv</td>\n <td>convenient</td>\n <td>0</td>\n <td>not_recom</td>\n </tr>\n <tr>\n <th>3</th>\n <td>great_pret</td>\n <td>critical</td>\n <td>foster</td>\n <td>1</td>\n <td>critical</td>\n <td>convenient</td>\n <td>0</td>\n <td>not_recom</td>\n </tr>\n <tr>\n <th>4</th>\n <td>usual</td>\n <td>proper</td>\n <td>complete</td>\n <td>2</td>\n <td>convenient</td>\n <td>convenient</td>\n <td>0</td>\n <td>not_recom</td>\n </tr>\n <tr>\n <th>...</th>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n </tr>\n <tr>\n <th>10361</th>\n <td>pretentious</td>\n <td>less_proper</td>\n <td>complete</td>\n <td>1</td>\n <td>convenient</td>\n <td>inconv</td>\n <td>0</td>\n <td>recommended</td>\n </tr>\n <tr>\n <th>10362</th>\n <td>usual</td>\n <td>less_proper</td>\n <td>incomplete</td>\n <td>2</td>\n <td>less_conv</td>\n <td>convenient</td>\n <td>1</td>\n <td>priority</td>\n </tr>\n <tr>\n <th>10363</th>\n <td>great_pret</td>\n <td>less_proper</td>\n <td>foster</td>\n <td>4</td>\n <td>convenient</td>\n <td>convenient</td>\n <td>0</td>\n <td>priority</td>\n </tr>\n <tr>\n <th>10364</th>\n <td>pretentious</td>\n <td>improper</td>\n <td>completed</td>\n <td>3</td>\n <td>less_conv</td>\n <td>convenient</td>\n <td>1</td>\n <td>recommended</td>\n </tr>\n <tr>\n <th>10365</th>\n <td>usual</td>\n <td>proper</td>\n <td>incomplete</td>\n <td>1</td>\n <td>critical</td>\n <td>convenient</td>\n <td>0</td>\n <td>not_recom</td>\n </tr>\n </tbody>\n</table>\n<p>10366 rows × 8 columns</p>\n</div>"
|
||||
},
|
||||
"execution_count": 97,
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from apt.utils.datasets import ArrayDataset\n",
|
||||
"from apt.anonymization import Anonymize\n",
|
||||
"\n",
|
||||
"features = x_train.columns\n",
|
||||
"QI = [\"finance\", \"social\", \"health\"]\n",
|
||||
"categorical_features = [\"parents\", \"has_nurs\", \"form\", \"housing\", \"finance\", \"health\", 'children']\n",
|
||||
"anonymizer = Anonymize(100, QI, categorical_features=categorical_features)\n",
|
||||
"anon = anonymizer.anonymize(x_train, x_train_predictions)\n",
|
||||
"anon"
|
||||
"QI_indexes = [i for i, v in enumerate(features) if v in QI]\n",
|
||||
"categorical_features_indexes = [i for i, v in enumerate(features) if v in categorical_features]\n",
|
||||
"anonymizer = Anonymize(100, QI_indexes, categorical_features=categorical_features_indexes)\n",
|
||||
"anon = anonymizer.anonymize(ArrayDataset(x_train, x_train_predictions))\n",
|
||||
"anon\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 64,
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"7585"
|
||||
]
|
||||
"text/plain": "7585"
|
||||
},
|
||||
"execution_count": 64,
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
|
|
@ -637,16 +273,14 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 65,
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"5766"
|
||||
]
|
||||
"text/plain": "5766"
|
||||
},
|
||||
"execution_count": 65,
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
|
|
@ -665,7 +299,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 66,
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
|
|
@ -678,7 +312,7 @@
|
|||
],
|
||||
"source": [
|
||||
"anon_str = anon.astype(str)\n",
|
||||
"anon_encoded = OneHotEncoder(sparse=False, drop='if_binary').fit_transform(anon_str)\n",
|
||||
"anon_encoded = OneHotEncoder(sparse=False).fit_transform(anon_str)\n",
|
||||
"\n",
|
||||
"anon_model = DecisionTreeClassifier()\n",
|
||||
"anon_model.fit(anon_encoded, y_train)\n",
|
||||
|
|
@ -698,14 +332,14 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 98,
|
||||
"execution_count": 10,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"0.6471155701331275\n"
|
||||
"1.0\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
|
|
@ -734,14 +368,14 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 69,
|
||||
"execution_count": 11,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"0.6982442600810341\n"
|
||||
"0.5245996527107852\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
|
|
@ -765,15 +399,15 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 87,
|
||||
"execution_count": 12,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"(0.33056202194878614, 0.2888695146759663)\n",
|
||||
"(0.34112301200908796, 0.3054344667247893)\n"
|
||||
"(0.49415432579890883, 0.48976438779451525)\n",
|
||||
"(0.49415432579890883, 0.48976438779451525)\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
|
|
@ -810,15 +444,15 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 88,
|
||||
"execution_count": 13,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"(0.6457357075913777, 0.2002324905550712)\n",
|
||||
"(0.6472248353715898, 0.1999418773612322)\n"
|
||||
"(1.0, 0.019204655674102813)\n",
|
||||
"(0.9829787234042553, 0.04481086323957323)\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
|
|
@ -849,26 +483,24 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 74,
|
||||
"execution_count": 14,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"anonymizer2 = Anonymize(1000, QI, categorical_features=categorical_features)\n",
|
||||
"anon2 = anonymizer2.anonymize(x_train, x_train_predictions)"
|
||||
"anonymizer2 = Anonymize(1000, QI_indexes, categorical_features=categorical_features_indexes)\n",
|
||||
"anon2 = anonymizer2.anonymize(ArrayDataset(x_train, x_train_predictions))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 75,
|
||||
"execution_count": 15,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"4226"
|
||||
]
|
||||
"text/plain": "4226"
|
||||
},
|
||||
"execution_count": 75,
|
||||
"execution_count": 15,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
|
|
@ -887,7 +519,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 104,
|
||||
"execution_count": 16,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
|
|
@ -900,7 +532,7 @@
|
|||
],
|
||||
"source": [
|
||||
"anon2_str = anon2.astype(str)\n",
|
||||
"anon2_encoded = OneHotEncoder(sparse=False, drop='if_binary').fit_transform(anon2_str)\n",
|
||||
"anon2_encoded = OneHotEncoder(sparse=False).fit_transform(anon2_str)\n",
|
||||
"\n",
|
||||
"anon2_model = DecisionTreeClassifier()\n",
|
||||
"anon2_model.fit(anon2_encoded, y_train)\n",
|
||||
|
|
@ -920,14 +552,14 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 105,
|
||||
"execution_count": 17,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"0.6266640941539648\n"
|
||||
"1.0\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
|
|
@ -956,14 +588,14 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 106,
|
||||
"execution_count": 18,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"0.6944819602546788\n"
|
||||
"0.515820953115956\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
|
|
@ -980,17 +612,17 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 107,
|
||||
"execution_count": 19,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"(0.35793357933579334, 0.17037470725995316)\n",
|
||||
"(0.3360655737704918, 0.1680327868852459)\n",
|
||||
"(0.6457357075913777, 0.2002324905550712)\n",
|
||||
"(0.6327519379844961, 0.1897704155768672)\n"
|
||||
"(0.49415432579890883, 0.48976438779451525)\n",
|
||||
"(0.49415432579890883, 0.48976438779451525)\n",
|
||||
"(1.0, 0.019204655674102813)\n",
|
||||
"(1.0, 0.026382153249272552)\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
|
|
@ -1023,27 +655,26 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 111,
|
||||
"execution_count": 20,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"QI2 = [\"parents\", \"has_nurs\", \"form\", \"children\", \"housing\", \"finance\", \"social\", \"health\"]\n",
|
||||
"anonymizer3 = Anonymize(100, QI2, categorical_features=categorical_features)\n",
|
||||
"anon3 = anonymizer3.anonymize(x_train, x_train_predictions)"
|
||||
"QI2_indexes = [i for i, v in enumerate(features) if v in QI2]\n",
|
||||
"anonymizer3 = Anonymize(100, QI2_indexes, categorical_features=categorical_features_indexes)\n",
|
||||
"anon3 = anonymizer3.anonymize(ArrayDataset(x_train, x_train_predictions))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 112,
|
||||
"execution_count": 21,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"39"
|
||||
]
|
||||
"text/plain": "39"
|
||||
},
|
||||
"execution_count": 112,
|
||||
"execution_count": 21,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
|
|
@ -1055,22 +686,22 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 113,
|
||||
"execution_count": 22,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Anonymized model accuracy: 0.7723765432098766\n",
|
||||
"BB attack accuracy: 0.5792012348060969\n",
|
||||
"WB attack accuracy: 0.6680493922438742\n"
|
||||
"Anonymized model accuracy: 0.751929012345679\n",
|
||||
"BB attack accuracy: 1.0\n",
|
||||
"WB attack accuracy: 0.5187150299054601\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"anon3_str = anon3.astype(str)\n",
|
||||
"anon3_encoded = OneHotEncoder(sparse=False, drop='if_binary').fit_transform(anon3_str)\n",
|
||||
"anon3_encoded = OneHotEncoder(sparse=False).fit_transform(anon3_str)\n",
|
||||
"\n",
|
||||
"anon3_model = DecisionTreeClassifier()\n",
|
||||
"anon3_model.fit(anon3_encoded, y_train)\n",
|
||||
|
|
@ -1105,17 +736,17 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 114,
|
||||
"execution_count": 23,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"(0.35793357933579334, 0.17037470725995316)\n",
|
||||
"(0.3393939393939394, 0.13114754098360656)\n",
|
||||
"(0.6457357075913777, 0.2002324905550712)\n",
|
||||
"(1, 0.0)\n"
|
||||
"(0.49415432579890883, 0.48976438779451525)\n",
|
||||
"(0.49415432579890883, 0.48976438779451525)\n",
|
||||
"(1.0, 0.019204655674102813)\n",
|
||||
"(1.0, 0.032201745877788554)\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
|
|
@ -1162,4 +793,4 @@
|
|||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
}
|
||||
|
|
@ -29,7 +29,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 97,
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
|
|
@ -44,6 +44,18 @@
|
|||
" [ 26. 11. 0. 0. 48.]\n",
|
||||
" [ 27. 9. 0. 0. 40.]]\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"/var/folders/9b/qbtw28w53355cvpjs4qn83yc0000gn/T/ipykernel_85828/3975777015.py:22: DeprecationWarning: `np.int` is a deprecated alias for the builtin `int`. To silence this warning, use `int` by itself. Doing this will not modify any behavior and is safe. When replacing `np.int`, you may wish to use e.g. `np.int64` or `np.int32` to specify the precision. If you wish to review your current use, check the release note link for additional information.\n",
|
||||
"Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations\n",
|
||||
" y_train = y_train.astype(np.int)\n",
|
||||
"/var/folders/9b/qbtw28w53355cvpjs4qn83yc0000gn/T/ipykernel_85828/3975777015.py:26: DeprecationWarning: `np.int` is a deprecated alias for the builtin `int`. To silence this warning, use `int` by itself. Doing this will not modify any behavior and is safe. When replacing `np.int`, you may wish to use e.g. `np.int64` or `np.int32` to specify the precision. If you wish to review your current use, check the release note link for additional information.\n",
|
||||
"Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations\n",
|
||||
" y_test = y_test.astype(np.int)\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
|
|
@ -90,14 +102,14 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 116,
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Base model accuracy: 0.8075056814691972\n"
|
||||
"Base model accuracy: 0.8074442601805786\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
|
|
@ -126,9 +138,18 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 124,
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"/Users/olasaadi/PycharmProjects/ai-privacy-toolkit-internal/venv/lib/python3.8/site-packages/art/attacks/inference/membership_inference/black_box.py:262: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n",
|
||||
" self.attack_model.fit(np.c_[x_1, x_2], y_ready) # type: ignore\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from art.attacks.inference.membership_inference import MembershipInferenceBlackBox\n",
|
||||
"\n",
|
||||
|
|
@ -154,14 +175,14 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 125,
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"0.5440363591696352\n"
|
||||
"0.545264709495148\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
|
|
@ -197,7 +218,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 128,
|
||||
"execution_count": 10,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
|
|
@ -215,6 +236,7 @@
|
|||
}
|
||||
],
|
||||
"source": [
|
||||
"from apt.utils.datasets import ArrayDataset\n",
|
||||
"import os\n",
|
||||
"import sys\n",
|
||||
"sys.path.insert(0, os.path.abspath('..'))\n",
|
||||
|
|
@ -223,22 +245,20 @@
|
|||
"# QI = (age, education-num, capital-gain, hours-per-week)\n",
|
||||
"QI = [0, 1, 2, 4]\n",
|
||||
"anonymizer = Anonymize(100, QI)\n",
|
||||
"anon = anonymizer.anonymize(x_train, x_train_predictions)\n",
|
||||
"anon = anonymizer.anonymize(ArrayDataset(x_train, x_train_predictions))\n",
|
||||
"print(anon)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 104,
|
||||
"execution_count": 11,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"6739"
|
||||
]
|
||||
"text/plain": "6739"
|
||||
},
|
||||
"execution_count": 104,
|
||||
"execution_count": 11,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
|
|
@ -250,16 +270,14 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 129,
|
||||
"execution_count": 12,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"658"
|
||||
]
|
||||
"text/plain": "658"
|
||||
},
|
||||
"execution_count": 129,
|
||||
"execution_count": 12,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
|
|
@ -278,14 +296,14 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 130,
|
||||
"execution_count": 13,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Anonymized model accuracy: 0.8304158221239482\n"
|
||||
"Anonymized model accuracy: 0.83078434985566\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
|
|
@ -308,14 +326,22 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 131,
|
||||
"execution_count": 14,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"/Users/olasaadi/PycharmProjects/ai-privacy-toolkit-internal/venv/lib/python3.8/site-packages/art/attacks/inference/membership_inference/black_box.py:262: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n",
|
||||
" self.attack_model.fit(np.c_[x_1, x_2], y_ready) # type: ignore\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"0.5034393809114359\n"
|
||||
"0.5047291487532244\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
|
|
@ -345,15 +371,15 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 132,
|
||||
"execution_count": 15,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"(0.5298924372550654, 0.7806166318634075)\n",
|
||||
"(0.5030507735890172, 0.5671293452892765)\n"
|
||||
"(0.5312420517168291, 0.7696843139663432)\n",
|
||||
"(0.5048372911169745, 0.4935511607910576)\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
|
|
@ -419,4 +445,4 @@
|
|||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
}
|
||||
|
|
@ -29,7 +29,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 121,
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
|
|
@ -50,7 +50,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 122,
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
|
|
@ -86,14 +86,14 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 123,
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"0.4954954954954955\n"
|
||||
"0.527027027027027\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
|
|
@ -131,7 +131,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 124,
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
|
|
@ -141,6 +141,22 @@
|
|||
"unique rows in original data: 221\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"/Users/olasaadi/PycharmProjects/ai-privacy-toolkit-internal/venv/lib/python3.8/site-packages/art/attacks/inference/membership_inference/black_box.py:262: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n",
|
||||
" self.attack_model.fit(np.c_[x_1, x_2], y_ready) # type: ignore\n",
|
||||
"/Users/olasaadi/PycharmProjects/ai-privacy-toolkit-internal/venv/lib/python3.8/site-packages/art/attacks/inference/membership_inference/black_box.py:262: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n",
|
||||
" self.attack_model.fit(np.c_[x_1, x_2], y_ready) # type: ignore\n",
|
||||
"/Users/olasaadi/PycharmProjects/ai-privacy-toolkit-internal/venv/lib/python3.8/site-packages/art/attacks/inference/membership_inference/black_box.py:262: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n",
|
||||
" self.attack_model.fit(np.c_[x_1, x_2], y_ready) # type: ignore\n",
|
||||
"/Users/olasaadi/PycharmProjects/ai-privacy-toolkit-internal/venv/lib/python3.8/site-packages/art/attacks/inference/membership_inference/black_box.py:262: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n",
|
||||
" self.attack_model.fit(np.c_[x_1, x_2], y_ready) # type: ignore\n",
|
||||
"/Users/olasaadi/PycharmProjects/ai-privacy-toolkit-internal/venv/lib/python3.8/site-packages/art/attacks/inference/membership_inference/black_box.py:262: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n",
|
||||
" self.attack_model.fit(np.c_[x_1, x_2], y_ready) # type: ignore\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
|
|
@ -148,11 +164,12 @@
|
|||
"k values: [5, 10, 20, 50, 75]\n",
|
||||
"unique rows: [34, 19, 8, 4, 2]\n",
|
||||
"model accuracy: [0.43165832354998956, 0.4509641063206041, -1.730181929385853, -5.577098823982753e+27, -1.2751609045828272e+25]\n",
|
||||
"attack accuracy: [0.5, 0.47297297297297297, 0.49549549549549543, 0.5, 0.47297297297297297]\n"
|
||||
"attack accuracy: [0.509009009009009, 0.481981981981982, 0.509009009009009, 0.5045045045045045, 0.4954954954954955]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from apt.utils.datasets import ArrayDataset\n",
|
||||
"from apt.anonymization import Anonymize\n",
|
||||
"k_values=[5, 10, 20, 50, 75]\n",
|
||||
"model_accuracy = []\n",
|
||||
|
|
@ -165,7 +182,7 @@
|
|||
"\n",
|
||||
"for k in k_values:\n",
|
||||
" anonymizer = Anonymize(k, QI, is_regression=True)\n",
|
||||
" anon = anonymizer.anonymize(X_train, x_train_predictions)\n",
|
||||
" anon = anonymizer.anonymize(ArrayDataset(X_train, x_train_predictions))\n",
|
||||
" unique_values.append(len(np.unique(anon, axis=0)))\n",
|
||||
" \n",
|
||||
" anon_model = LinearRegression()\n",
|
||||
|
|
@ -198,7 +215,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 124,
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
|
|
|
|||
|
|
@ -27,7 +27,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"execution_count": 15,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
|
|
@ -42,6 +42,18 @@
|
|||
" [2.2000e+01 9.0000e+00 0.0000e+00 0.0000e+00 2.0000e+01]\n",
|
||||
" [5.2000e+01 9.0000e+00 1.5024e+04 0.0000e+00 4.0000e+01]]\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"/var/folders/9b/qbtw28w53355cvpjs4qn83yc0000gn/T/ipykernel_13726/1357868359.py:22: DeprecationWarning: `np.int` is a deprecated alias for the builtin `int`. To silence this warning, use `int` by itself. Doing this will not modify any behavior and is safe. When replacing `np.int`, you may wish to use e.g. `np.int64` or `np.int32` to specify the precision. If you wish to review your current use, check the release note link for additional information.\n",
|
||||
"Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations\n",
|
||||
" y_train = y_train.astype(np.int)\n",
|
||||
"/var/folders/9b/qbtw28w53355cvpjs4qn83yc0000gn/T/ipykernel_13726/1357868359.py:26: DeprecationWarning: `np.int` is a deprecated alias for the builtin `int`. To silence this warning, use `int` by itself. Doing this will not modify any behavior and is safe. When replacing `np.int`, you may wish to use e.g. `np.int64` or `np.int32` to specify the precision. If you wish to review your current use, check the release note link for additional information.\n",
|
||||
"Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations\n",
|
||||
" y_test = y_test.astype(np.int)\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
|
|
@ -84,24 +96,27 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"execution_count": 16,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Base model accuracy: 0.8189914624408821\n"
|
||||
"Base model accuracy: 0.8183158282660771\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from apt.utils.datasets import ArrayDataset\n",
|
||||
"from apt.utils.models import SklearnClassifier, ModelOutputType\n",
|
||||
"from sklearn.tree import DecisionTreeClassifier\n",
|
||||
"\n",
|
||||
"model = DecisionTreeClassifier()\n",
|
||||
"model.fit(x_train, y_train)\n",
|
||||
"base_est = DecisionTreeClassifier()\n",
|
||||
"model = SklearnClassifier(base_est, ModelOutputType.CLASSIFIER_VECTOR)\n",
|
||||
"model.fit(ArrayDataset(x_train, y_train))\n",
|
||||
"\n",
|
||||
"print('Base model accuracy: ', model.score(x_test, y_test))"
|
||||
"print('Base model accuracy: ', model.score(ArrayDataset(x_test, y_test)))"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
|
@ -114,26 +129,26 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"execution_count": 17,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Initial accuracy of model on generalized data, relative to original model predictions (base generalization derived from tree, before improvements): 0.929376\n",
|
||||
"Initial accuracy of model on generalized data, relative to original model predictions (base generalization derived from tree, before improvements): 0.936540\n",
|
||||
"Improving accuracy\n",
|
||||
"feature to remove: 0\n",
|
||||
"Removed feature: 0, new relative accuracy: 0.939867\n",
|
||||
"feature to remove: 4\n",
|
||||
"Removed feature: 4, new relative accuracy: 0.967247\n",
|
||||
"feature to remove: 2\n",
|
||||
"Removed feature: 2, new relative accuracy: 0.972620\n",
|
||||
"Removed feature: 2, new relative accuracy: 0.935261\n",
|
||||
"feature to remove: 4\n",
|
||||
"Removed feature: 4, new relative accuracy: 0.946776\n",
|
||||
"feature to remove: 0\n",
|
||||
"Removed feature: 0, new relative accuracy: 0.972876\n",
|
||||
"feature to remove: 1\n",
|
||||
"Removed feature: 1, new relative accuracy: 0.992323\n",
|
||||
"Removed feature: 1, new relative accuracy: 0.992835\n",
|
||||
"feature to remove: 3\n",
|
||||
"Removed feature: 3, new relative accuracy: 1.000000\n",
|
||||
"Accuracy on minimized data: 0.8237371411024106\n"
|
||||
"Accuracy on minimized data: 0.8231229847996315\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
|
|
@ -155,10 +170,12 @@
|
|||
"X_generalizer_train, x_test, y_generalizer_train, y_test = train_test_split(x_test, y_test, stratify=y_test,\n",
|
||||
" test_size = 0.4, random_state = 38)\n",
|
||||
"x_train_predictions = model.predict(X_generalizer_train)\n",
|
||||
"minimizer.fit(X_generalizer_train, x_train_predictions)\n",
|
||||
"transformed = minimizer.transform(x_test)\n",
|
||||
"if x_train_predictions.shape[1] > 1:\n",
|
||||
" x_train_predictions = np.argmax(x_train_predictions, axis=1)\n",
|
||||
"minimizer.fit(dataset=ArrayDataset(X_generalizer_train, x_train_predictions))\n",
|
||||
"transformed = minimizer.transform(dataset=ArrayDataset(x_test))\n",
|
||||
"\n",
|
||||
"print('Accuracy on minimized data: ', model.score(transformed, y_test))"
|
||||
"print('Accuracy on minimized data: ', model.score(ArrayDataset(transformed, y_test)))"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
|
@ -170,14 +187,14 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"execution_count": 18,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"{'ranges': {}, 'untouched': [0, 1, 2, 3, 4]}\n"
|
||||
"{'ranges': {}, 'categories': {}, 'untouched': ['4', '1', '3', '0', '2']}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
|
|
@ -197,25 +214,25 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"execution_count": 19,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Initial accuracy of model on generalized data, relative to original model predictions (base generalization derived from tree, before improvements): 0.929376\n",
|
||||
"Initial accuracy of model on generalized data, relative to original model predictions (base generalization derived from tree, before improvements): 0.936540\n",
|
||||
"Improving accuracy\n",
|
||||
"feature to remove: 0\n",
|
||||
"Removed feature: 0, new relative accuracy: 0.939867\n",
|
||||
"feature to remove: 4\n",
|
||||
"Removed feature: 4, new relative accuracy: 0.967247\n",
|
||||
"feature to remove: 2\n",
|
||||
"Removed feature: 2, new relative accuracy: 0.972620\n",
|
||||
"Removed feature: 2, new relative accuracy: 0.935261\n",
|
||||
"feature to remove: 4\n",
|
||||
"Removed feature: 4, new relative accuracy: 0.946776\n",
|
||||
"feature to remove: 0\n",
|
||||
"Removed feature: 0, new relative accuracy: 0.972876\n",
|
||||
"feature to remove: 1\n",
|
||||
"Removed feature: 1, new relative accuracy: 0.992323\n",
|
||||
"Accuracy on minimized data: 0.820205742361431\n",
|
||||
"{'ranges': {3: [546.0, 704.0, 705.5, 742.5, 782.0, 834.0, 870.0, 1446.5, 1538.5, 1612.5, 1699.0, 1744.0, 1801.0, 1814.0, 1846.0, 1881.5, 1978.5, 2248.0, 2298.5, 2537.5]}, 'untouched': [0, 1, 2, 4]}\n"
|
||||
"Removed feature: 1, new relative accuracy: 0.992835\n",
|
||||
"Accuracy on minimized data: 0.8192845079072624\n",
|
||||
"{'ranges': {'3': [569.0, 782.0, 870.0, 870.5, 938.0, 1016.5, 1311.5, 1457.0, 1494.5, 1596.0, 1629.5, 1684.0, 1805.0, 1859.0, 1867.5, 1881.5, 1938.0, 1978.5, 2119.0, 2210.0, 2218.0, 2244.5, 2298.5, 2443.5]}, 'categories': {}, 'untouched': ['2', '1', '0', '4']}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
|
|
@ -223,9 +240,9 @@
|
|||
"# We allow a 1% deviation in accuracy from the original model accuracy\n",
|
||||
"minimizer2 = GeneralizeToRepresentative(model, target_accuracy=0.99)\n",
|
||||
"\n",
|
||||
"minimizer2.fit(X_generalizer_train, x_train_predictions)\n",
|
||||
"transformed2 = minimizer2.transform(x_test)\n",
|
||||
"print('Accuracy on minimized data: ', model.score(transformed2, y_test))\n",
|
||||
"minimizer2.fit(dataset=ArrayDataset(X_generalizer_train, x_train_predictions))\n",
|
||||
"transformed2 = minimizer2.transform(dataset=ArrayDataset(x_test))\n",
|
||||
"print('Accuracy on minimized data: ', model.score(test_data=ArrayDataset(transformed2, y_test)))\n",
|
||||
"generalizations2 = minimizer2.generalizations\n",
|
||||
"print(generalizations2)"
|
||||
]
|
||||
|
|
@ -259,4 +276,4 @@
|
|||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue