mirror of
https://github.com/IBM/ai-privacy-toolkit.git
synced 2026-04-25 04:46:21 +02:00
update notebooks
This commit is contained in:
parent
06158c8508
commit
137167fb0c
3 changed files with 175 additions and 506 deletions
|
|
@ -29,198 +29,15 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 61,
|
||||
"execution_count": 136,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<div>\n",
|
||||
"<style scoped>\n",
|
||||
" .dataframe tbody tr th:only-of-type {\n",
|
||||
" vertical-align: middle;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe tbody tr th {\n",
|
||||
" vertical-align: top;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe thead th {\n",
|
||||
" text-align: right;\n",
|
||||
" }\n",
|
||||
"</style>\n",
|
||||
"<table border=\"1\" class=\"dataframe\">\n",
|
||||
" <thead>\n",
|
||||
" <tr style=\"text-align: right;\">\n",
|
||||
" <th></th>\n",
|
||||
" <th>parents</th>\n",
|
||||
" <th>has_nurs</th>\n",
|
||||
" <th>form</th>\n",
|
||||
" <th>children</th>\n",
|
||||
" <th>housing</th>\n",
|
||||
" <th>finance</th>\n",
|
||||
" <th>social</th>\n",
|
||||
" <th>health</th>\n",
|
||||
" </tr>\n",
|
||||
" </thead>\n",
|
||||
" <tbody>\n",
|
||||
" <tr>\n",
|
||||
" <th>8450</th>\n",
|
||||
" <td>pretentious</td>\n",
|
||||
" <td>very_crit</td>\n",
|
||||
" <td>foster</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>less_conv</td>\n",
|
||||
" <td>convenient</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>not_recom</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>12147</th>\n",
|
||||
" <td>great_pret</td>\n",
|
||||
" <td>very_crit</td>\n",
|
||||
" <td>complete</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>critical</td>\n",
|
||||
" <td>inconv</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>recommended</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>2780</th>\n",
|
||||
" <td>usual</td>\n",
|
||||
" <td>critical</td>\n",
|
||||
" <td>complete</td>\n",
|
||||
" <td>4</td>\n",
|
||||
" <td>less_conv</td>\n",
|
||||
" <td>convenient</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>not_recom</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>11924</th>\n",
|
||||
" <td>great_pret</td>\n",
|
||||
" <td>critical</td>\n",
|
||||
" <td>foster</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>critical</td>\n",
|
||||
" <td>convenient</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>not_recom</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>59</th>\n",
|
||||
" <td>usual</td>\n",
|
||||
" <td>proper</td>\n",
|
||||
" <td>complete</td>\n",
|
||||
" <td>2</td>\n",
|
||||
" <td>convenient</td>\n",
|
||||
" <td>convenient</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>not_recom</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>...</th>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>5193</th>\n",
|
||||
" <td>pretentious</td>\n",
|
||||
" <td>less_proper</td>\n",
|
||||
" <td>complete</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>convenient</td>\n",
|
||||
" <td>inconv</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>recommended</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>1375</th>\n",
|
||||
" <td>usual</td>\n",
|
||||
" <td>less_proper</td>\n",
|
||||
" <td>incomplete</td>\n",
|
||||
" <td>2</td>\n",
|
||||
" <td>less_conv</td>\n",
|
||||
" <td>convenient</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>priority</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>10318</th>\n",
|
||||
" <td>great_pret</td>\n",
|
||||
" <td>less_proper</td>\n",
|
||||
" <td>foster</td>\n",
|
||||
" <td>4</td>\n",
|
||||
" <td>convenient</td>\n",
|
||||
" <td>convenient</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>priority</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>6396</th>\n",
|
||||
" <td>pretentious</td>\n",
|
||||
" <td>improper</td>\n",
|
||||
" <td>completed</td>\n",
|
||||
" <td>3</td>\n",
|
||||
" <td>less_conv</td>\n",
|
||||
" <td>convenient</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>recommended</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>485</th>\n",
|
||||
" <td>usual</td>\n",
|
||||
" <td>proper</td>\n",
|
||||
" <td>incomplete</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>critical</td>\n",
|
||||
" <td>inconv</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>not_recom</td>\n",
|
||||
" </tr>\n",
|
||||
" </tbody>\n",
|
||||
"</table>\n",
|
||||
"<p>10366 rows × 8 columns</p>\n",
|
||||
"</div>"
|
||||
],
|
||||
"text/plain": [
|
||||
" parents has_nurs form children housing finance \\\n",
|
||||
"8450 pretentious very_crit foster 1 less_conv convenient \n",
|
||||
"12147 great_pret very_crit complete 1 critical inconv \n",
|
||||
"2780 usual critical complete 4 less_conv convenient \n",
|
||||
"11924 great_pret critical foster 1 critical convenient \n",
|
||||
"59 usual proper complete 2 convenient convenient \n",
|
||||
"... ... ... ... ... ... ... \n",
|
||||
"5193 pretentious less_proper complete 1 convenient inconv \n",
|
||||
"1375 usual less_proper incomplete 2 less_conv convenient \n",
|
||||
"10318 great_pret less_proper foster 4 convenient convenient \n",
|
||||
"6396 pretentious improper completed 3 less_conv convenient \n",
|
||||
"485 usual proper incomplete 1 critical inconv \n",
|
||||
"\n",
|
||||
" social health \n",
|
||||
"8450 1 not_recom \n",
|
||||
"12147 1 recommended \n",
|
||||
"2780 1 not_recom \n",
|
||||
"11924 1 not_recom \n",
|
||||
"59 0 not_recom \n",
|
||||
"... ... ... \n",
|
||||
"5193 0 recommended \n",
|
||||
"1375 1 priority \n",
|
||||
"10318 0 priority \n",
|
||||
"6396 1 recommended \n",
|
||||
"485 1 not_recom \n",
|
||||
"\n",
|
||||
"[10366 rows x 8 columns]"
|
||||
]
|
||||
"text/plain": " parents has_nurs form children housing finance \\\n8450 pretentious very_crit foster 1 less_conv convenient \n12147 great_pret very_crit complete 1 critical inconv \n2780 usual critical complete 4 less_conv convenient \n11924 great_pret critical foster 1 critical convenient \n59 usual proper complete 2 convenient convenient \n... ... ... ... ... ... ... \n5193 pretentious less_proper complete 1 convenient inconv \n1375 usual less_proper incomplete 2 less_conv convenient \n10318 great_pret less_proper foster 4 convenient convenient \n6396 pretentious improper completed 3 less_conv convenient \n485 usual proper incomplete 1 critical inconv \n\n social health \n8450 1 not_recom \n12147 1 recommended \n2780 1 not_recom \n11924 1 not_recom \n59 0 not_recom \n... ... ... \n5193 0 recommended \n1375 1 priority \n10318 0 priority \n6396 1 recommended \n485 1 not_recom \n\n[10366 rows x 8 columns]",
|
||||
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>parents</th>\n <th>has_nurs</th>\n <th>form</th>\n <th>children</th>\n <th>housing</th>\n <th>finance</th>\n <th>social</th>\n <th>health</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>8450</th>\n <td>pretentious</td>\n <td>very_crit</td>\n <td>foster</td>\n <td>1</td>\n <td>less_conv</td>\n <td>convenient</td>\n <td>1</td>\n <td>not_recom</td>\n </tr>\n <tr>\n <th>12147</th>\n <td>great_pret</td>\n <td>very_crit</td>\n <td>complete</td>\n <td>1</td>\n <td>critical</td>\n <td>inconv</td>\n <td>1</td>\n <td>recommended</td>\n </tr>\n <tr>\n <th>2780</th>\n <td>usual</td>\n <td>critical</td>\n <td>complete</td>\n <td>4</td>\n <td>less_conv</td>\n <td>convenient</td>\n <td>1</td>\n <td>not_recom</td>\n </tr>\n <tr>\n <th>11924</th>\n <td>great_pret</td>\n <td>critical</td>\n <td>foster</td>\n <td>1</td>\n <td>critical</td>\n <td>convenient</td>\n <td>1</td>\n <td>not_recom</td>\n </tr>\n <tr>\n <th>59</th>\n <td>usual</td>\n <td>proper</td>\n <td>complete</td>\n <td>2</td>\n <td>convenient</td>\n <td>convenient</td>\n <td>0</td>\n <td>not_recom</td>\n </tr>\n <tr>\n <th>...</th>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n </tr>\n <tr>\n <th>5193</th>\n <td>pretentious</td>\n <td>less_proper</td>\n <td>complete</td>\n <td>1</td>\n <td>convenient</td>\n <td>inconv</td>\n <td>0</td>\n <td>recommended</td>\n </tr>\n <tr>\n <th>1375</th>\n <td>usual</td>\n <td>less_proper</td>\n <td>incomplete</td>\n <td>2</td>\n <td>less_conv</td>\n <td>convenient</td>\n <td>1</td>\n <td>priority</td>\n </tr>\n <tr>\n <th>10318</th>\n <td>great_pret</td>\n <td>less_proper</td>\n <td>foster</td>\n <td>4</td>\n <td>convenient</td>\n <td>convenient</td>\n <td>0</td>\n <td>priority</td>\n </tr>\n <tr>\n <th>6396</th>\n <td>pretentious</td>\n <td>improper</td>\n <td>completed</td>\n <td>3</td>\n <td>less_conv</td>\n <td>convenient</td>\n <td>1</td>\n <td>recommended</td>\n </tr>\n <tr>\n <th>485</th>\n <td>usual</td>\n <td>proper</td>\n <td>incomplete</td>\n <td>1</td>\n <td>critical</td>\n <td>inconv</td>\n <td>1</td>\n <td>not_recom</td>\n </tr>\n </tbody>\n</table>\n<p>10366 rows × 8 columns</p>\n</div>"
|
||||
},
|
||||
"execution_count": 61,
|
||||
"execution_count": 136,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
|
|
@ -230,7 +47,7 @@
|
|||
"import sys\n",
|
||||
"sys.path.insert(0, os.path.abspath('..'))\n",
|
||||
"\n",
|
||||
"from apt.utils import get_nursery_dataset\n",
|
||||
"from apt.utils.dataset_utils import get_nursery_dataset\n",
|
||||
"\n",
|
||||
"(x_train, y_train), (x_test, y_test) = get_nursery_dataset(transform_social=True)\n",
|
||||
"\n",
|
||||
|
|
@ -246,7 +63,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 62,
|
||||
"execution_count": 137,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
|
|
@ -263,9 +80,9 @@
|
|||
"from sklearn.preprocessing import OneHotEncoder\n",
|
||||
"\n",
|
||||
"x_train_str = x_train.astype(str)\n",
|
||||
"train_encoded = OneHotEncoder(sparse=False, drop='if_binary').fit_transform(x_train_str)\n",
|
||||
"train_encoded = OneHotEncoder(sparse=False).fit_transform(x_train_str)\n",
|
||||
"x_test_str = x_test.astype(str)\n",
|
||||
"test_encoded = OneHotEncoder(sparse=False, drop='if_binary').fit_transform(x_test_str)\n",
|
||||
"test_encoded = OneHotEncoder(sparse=False).fit_transform(x_test_str)\n",
|
||||
" \n",
|
||||
"model = DecisionTreeClassifier()\n",
|
||||
"model.fit(train_encoded, y_train)\n",
|
||||
|
|
@ -287,7 +104,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 91,
|
||||
"execution_count": 138,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
|
|
@ -323,14 +140,14 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 96,
|
||||
"execution_count": 139,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"0.6430638626278217\n"
|
||||
"1.0\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
|
|
@ -361,14 +178,14 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 55,
|
||||
"execution_count": 140,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"0.6980513216284006\n"
|
||||
"0.5076210688790276\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
|
|
@ -408,224 +225,43 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 97,
|
||||
"execution_count": 141,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<div>\n",
|
||||
"<style scoped>\n",
|
||||
" .dataframe tbody tr th:only-of-type {\n",
|
||||
" vertical-align: middle;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe tbody tr th {\n",
|
||||
" vertical-align: top;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe thead th {\n",
|
||||
" text-align: right;\n",
|
||||
" }\n",
|
||||
"</style>\n",
|
||||
"<table border=\"1\" class=\"dataframe\">\n",
|
||||
" <thead>\n",
|
||||
" <tr style=\"text-align: right;\">\n",
|
||||
" <th></th>\n",
|
||||
" <th>parents</th>\n",
|
||||
" <th>has_nurs</th>\n",
|
||||
" <th>form</th>\n",
|
||||
" <th>children</th>\n",
|
||||
" <th>housing</th>\n",
|
||||
" <th>finance</th>\n",
|
||||
" <th>social</th>\n",
|
||||
" <th>health</th>\n",
|
||||
" </tr>\n",
|
||||
" </thead>\n",
|
||||
" <tbody>\n",
|
||||
" <tr>\n",
|
||||
" <th>8450</th>\n",
|
||||
" <td>pretentious</td>\n",
|
||||
" <td>very_crit</td>\n",
|
||||
" <td>foster</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>less_conv</td>\n",
|
||||
" <td>convenient</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>not_recom</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>12147</th>\n",
|
||||
" <td>great_pret</td>\n",
|
||||
" <td>very_crit</td>\n",
|
||||
" <td>complete</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>critical</td>\n",
|
||||
" <td>inconv</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>recommended</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>2780</th>\n",
|
||||
" <td>usual</td>\n",
|
||||
" <td>critical</td>\n",
|
||||
" <td>complete</td>\n",
|
||||
" <td>4</td>\n",
|
||||
" <td>less_conv</td>\n",
|
||||
" <td>convenient</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>not_recom</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>11924</th>\n",
|
||||
" <td>great_pret</td>\n",
|
||||
" <td>critical</td>\n",
|
||||
" <td>foster</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>critical</td>\n",
|
||||
" <td>convenient</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>not_recom</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>59</th>\n",
|
||||
" <td>usual</td>\n",
|
||||
" <td>proper</td>\n",
|
||||
" <td>complete</td>\n",
|
||||
" <td>2</td>\n",
|
||||
" <td>convenient</td>\n",
|
||||
" <td>convenient</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>not_recom</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>...</th>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>5193</th>\n",
|
||||
" <td>pretentious</td>\n",
|
||||
" <td>less_proper</td>\n",
|
||||
" <td>complete</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>convenient</td>\n",
|
||||
" <td>inconv</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>recommended</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>1375</th>\n",
|
||||
" <td>usual</td>\n",
|
||||
" <td>less_proper</td>\n",
|
||||
" <td>incomplete</td>\n",
|
||||
" <td>2</td>\n",
|
||||
" <td>less_conv</td>\n",
|
||||
" <td>convenient</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>priority</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>10318</th>\n",
|
||||
" <td>great_pret</td>\n",
|
||||
" <td>less_proper</td>\n",
|
||||
" <td>foster</td>\n",
|
||||
" <td>4</td>\n",
|
||||
" <td>convenient</td>\n",
|
||||
" <td>convenient</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>priority</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>6396</th>\n",
|
||||
" <td>pretentious</td>\n",
|
||||
" <td>improper</td>\n",
|
||||
" <td>completed</td>\n",
|
||||
" <td>3</td>\n",
|
||||
" <td>less_conv</td>\n",
|
||||
" <td>convenient</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>recommended</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>485</th>\n",
|
||||
" <td>usual</td>\n",
|
||||
" <td>proper</td>\n",
|
||||
" <td>incomplete</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>critical</td>\n",
|
||||
" <td>convenient</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>not_recom</td>\n",
|
||||
" </tr>\n",
|
||||
" </tbody>\n",
|
||||
"</table>\n",
|
||||
"<p>10366 rows × 8 columns</p>\n",
|
||||
"</div>"
|
||||
],
|
||||
"text/plain": [
|
||||
" parents has_nurs form children housing finance \\\n",
|
||||
"8450 pretentious very_crit foster 1 less_conv convenient \n",
|
||||
"12147 great_pret very_crit complete 1 critical inconv \n",
|
||||
"2780 usual critical complete 4 less_conv convenient \n",
|
||||
"11924 great_pret critical foster 1 critical convenient \n",
|
||||
"59 usual proper complete 2 convenient convenient \n",
|
||||
"... ... ... ... ... ... ... \n",
|
||||
"5193 pretentious less_proper complete 1 convenient inconv \n",
|
||||
"1375 usual less_proper incomplete 2 less_conv convenient \n",
|
||||
"10318 great_pret less_proper foster 4 convenient convenient \n",
|
||||
"6396 pretentious improper completed 3 less_conv convenient \n",
|
||||
"485 usual proper incomplete 1 critical convenient \n",
|
||||
"\n",
|
||||
" social health \n",
|
||||
"8450 0 not_recom \n",
|
||||
"12147 1 recommended \n",
|
||||
"2780 0 not_recom \n",
|
||||
"11924 0 not_recom \n",
|
||||
"59 0 not_recom \n",
|
||||
"... ... ... \n",
|
||||
"5193 0 recommended \n",
|
||||
"1375 1 priority \n",
|
||||
"10318 0 priority \n",
|
||||
"6396 1 recommended \n",
|
||||
"485 0 not_recom \n",
|
||||
"\n",
|
||||
"[10366 rows x 8 columns]"
|
||||
]
|
||||
"text/plain": " parents has_nurs form children housing finance \\\n0 pretentious very_crit foster 1 less_conv convenient \n1 great_pret very_crit complete 1 critical inconv \n2 usual critical complete 4 less_conv convenient \n3 great_pret critical foster 1 critical convenient \n4 usual proper complete 2 convenient convenient \n... ... ... ... ... ... ... \n10361 pretentious less_proper complete 1 convenient inconv \n10362 usual less_proper incomplete 2 less_conv convenient \n10363 great_pret less_proper foster 4 convenient convenient \n10364 pretentious improper completed 3 less_conv convenient \n10365 usual proper incomplete 1 critical convenient \n\n social health \n0 0 not_recom \n1 1 recommended \n2 0 not_recom \n3 0 not_recom \n4 0 not_recom \n... ... ... \n10361 0 recommended \n10362 1 priority \n10363 0 priority \n10364 1 recommended \n10365 0 not_recom \n\n[10366 rows x 8 columns]",
|
||||
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>parents</th>\n <th>has_nurs</th>\n <th>form</th>\n <th>children</th>\n <th>housing</th>\n <th>finance</th>\n <th>social</th>\n <th>health</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>pretentious</td>\n <td>very_crit</td>\n <td>foster</td>\n <td>1</td>\n <td>less_conv</td>\n <td>convenient</td>\n <td>0</td>\n <td>not_recom</td>\n </tr>\n <tr>\n <th>1</th>\n <td>great_pret</td>\n <td>very_crit</td>\n <td>complete</td>\n <td>1</td>\n <td>critical</td>\n <td>inconv</td>\n <td>1</td>\n <td>recommended</td>\n </tr>\n <tr>\n <th>2</th>\n <td>usual</td>\n <td>critical</td>\n <td>complete</td>\n <td>4</td>\n <td>less_conv</td>\n <td>convenient</td>\n <td>0</td>\n <td>not_recom</td>\n </tr>\n <tr>\n <th>3</th>\n <td>great_pret</td>\n <td>critical</td>\n <td>foster</td>\n <td>1</td>\n <td>critical</td>\n <td>convenient</td>\n <td>0</td>\n <td>not_recom</td>\n </tr>\n <tr>\n <th>4</th>\n <td>usual</td>\n <td>proper</td>\n <td>complete</td>\n <td>2</td>\n <td>convenient</td>\n <td>convenient</td>\n <td>0</td>\n <td>not_recom</td>\n </tr>\n <tr>\n <th>...</th>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n </tr>\n <tr>\n <th>10361</th>\n <td>pretentious</td>\n <td>less_proper</td>\n <td>complete</td>\n <td>1</td>\n <td>convenient</td>\n <td>inconv</td>\n <td>0</td>\n <td>recommended</td>\n </tr>\n <tr>\n <th>10362</th>\n <td>usual</td>\n <td>less_proper</td>\n <td>incomplete</td>\n <td>2</td>\n <td>less_conv</td>\n <td>convenient</td>\n <td>1</td>\n <td>priority</td>\n </tr>\n <tr>\n <th>10363</th>\n <td>great_pret</td>\n <td>less_proper</td>\n <td>foster</td>\n <td>4</td>\n <td>convenient</td>\n <td>convenient</td>\n <td>0</td>\n <td>priority</td>\n </tr>\n <tr>\n <th>10364</th>\n <td>pretentious</td>\n <td>improper</td>\n <td>completed</td>\n <td>3</td>\n <td>less_conv</td>\n <td>convenient</td>\n <td>1</td>\n <td>recommended</td>\n </tr>\n <tr>\n <th>10365</th>\n <td>usual</td>\n <td>proper</td>\n <td>incomplete</td>\n <td>1</td>\n <td>critical</td>\n <td>convenient</td>\n <td>0</td>\n <td>not_recom</td>\n </tr>\n </tbody>\n</table>\n<p>10366 rows × 8 columns</p>\n</div>"
|
||||
},
|
||||
"execution_count": 97,
|
||||
"execution_count": 141,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from apt.utils.datasets import ArrayDataset\n",
|
||||
"from apt.anonymization import Anonymize\n",
|
||||
"\n",
|
||||
"features = x_train.columns\n",
|
||||
"QI = [\"finance\", \"social\", \"health\"]\n",
|
||||
"categorical_features = [\"parents\", \"has_nurs\", \"form\", \"housing\", \"finance\", \"health\", 'children']\n",
|
||||
"anonymizer = Anonymize(100, QI, categorical_features=categorical_features)\n",
|
||||
"anon = anonymizer.anonymize(x_train, x_train_predictions)\n",
|
||||
"anon"
|
||||
"QI_indexes = [i for i, v in enumerate(features) if v in QI]\n",
|
||||
"categorical_features_indexes = [i for i, v in enumerate(features) if v in categorical_features]\n",
|
||||
"anonymizer = Anonymize(100, QI_indexes, categorical_features=categorical_features_indexes)\n",
|
||||
"anon = anonymizer.anonymize(ArrayDataset(x_train, x_train_predictions))\n",
|
||||
"anon\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 64,
|
||||
"execution_count": 142,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"7585"
|
||||
]
|
||||
"text/plain": "7585"
|
||||
},
|
||||
"execution_count": 64,
|
||||
"execution_count": 142,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
|
|
@ -637,16 +273,14 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 65,
|
||||
"execution_count": 143,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"5766"
|
||||
]
|
||||
"text/plain": "5766"
|
||||
},
|
||||
"execution_count": 65,
|
||||
"execution_count": 143,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
|
|
@ -665,7 +299,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 66,
|
||||
"execution_count": 144,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
|
|
@ -678,7 +312,7 @@
|
|||
],
|
||||
"source": [
|
||||
"anon_str = anon.astype(str)\n",
|
||||
"anon_encoded = OneHotEncoder(sparse=False, drop='if_binary').fit_transform(anon_str)\n",
|
||||
"anon_encoded = OneHotEncoder(sparse=False).fit_transform(anon_str)\n",
|
||||
"\n",
|
||||
"anon_model = DecisionTreeClassifier()\n",
|
||||
"anon_model.fit(anon_encoded, y_train)\n",
|
||||
|
|
@ -698,14 +332,14 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 98,
|
||||
"execution_count": 145,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"0.6471155701331275\n"
|
||||
"1.0\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
|
|
@ -734,14 +368,14 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 69,
|
||||
"execution_count": 146,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"0.6982442600810341\n"
|
||||
"0.5218985143739148\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
|
|
@ -765,15 +399,15 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 87,
|
||||
"execution_count": 147,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"(0.33056202194878614, 0.2888695146759663)\n",
|
||||
"(0.34112301200908796, 0.3054344667247893)\n"
|
||||
"(0.49415432579890883, 0.48976438779451525)\n",
|
||||
"(0.49415432579890883, 0.48976438779451525)\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
|
|
@ -810,15 +444,15 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 88,
|
||||
"execution_count": 148,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"(0.6457357075913777, 0.2002324905550712)\n",
|
||||
"(0.6472248353715898, 0.1999418773612322)\n"
|
||||
"(0.9322033898305084, 0.01066925315227934)\n",
|
||||
"(0.9806763285024155, 0.03937924345295829)\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
|
|
@ -849,26 +483,24 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 74,
|
||||
"execution_count": 149,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"anonymizer2 = Anonymize(1000, QI, categorical_features=categorical_features)\n",
|
||||
"anon2 = anonymizer2.anonymize(x_train, x_train_predictions)"
|
||||
"anonymizer2 = Anonymize(1000, QI_indexes, categorical_features=categorical_features_indexes)\n",
|
||||
"anon2 = anonymizer2.anonymize(ArrayDataset(x_train, x_train_predictions))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 75,
|
||||
"execution_count": 150,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"4226"
|
||||
]
|
||||
"text/plain": "4226"
|
||||
},
|
||||
"execution_count": 75,
|
||||
"execution_count": 150,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
|
|
@ -887,7 +519,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 104,
|
||||
"execution_count": 151,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
|
|
@ -900,7 +532,7 @@
|
|||
],
|
||||
"source": [
|
||||
"anon2_str = anon2.astype(str)\n",
|
||||
"anon2_encoded = OneHotEncoder(sparse=False, drop='if_binary').fit_transform(anon2_str)\n",
|
||||
"anon2_encoded = OneHotEncoder(sparse=False).fit_transform(anon2_str)\n",
|
||||
"\n",
|
||||
"anon2_model = DecisionTreeClassifier()\n",
|
||||
"anon2_model.fit(anon2_encoded, y_train)\n",
|
||||
|
|
@ -920,14 +552,14 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 105,
|
||||
"execution_count": 152,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"0.6266640941539648\n"
|
||||
"1.0\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
|
|
@ -956,14 +588,14 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 106,
|
||||
"execution_count": 153,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"0.6944819602546788\n"
|
||||
"0.5184256222265098\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
|
|
@ -980,17 +612,17 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 107,
|
||||
"execution_count": 154,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"(0.35793357933579334, 0.17037470725995316)\n",
|
||||
"(0.3360655737704918, 0.1680327868852459)\n",
|
||||
"(0.6457357075913777, 0.2002324905550712)\n",
|
||||
"(0.6327519379844961, 0.1897704155768672)\n"
|
||||
"(0.49415432579890883, 0.48976438779451525)\n",
|
||||
"(0.49415432579890883, 0.48976438779451525)\n",
|
||||
"(0.9322033898305084, 0.01066925315227934)\n",
|
||||
"(1.0, 0.03161978661493695)\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
|
|
@ -1023,31 +655,46 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 111,
|
||||
"execution_count": 155,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"ename": "TypeError",
|
||||
"evalue": "argument must be a string or number",
|
||||
"output_type": "error",
|
||||
"traceback": [
|
||||
"\u001B[0;31m---------------------------------------------------------------------------\u001B[0m",
|
||||
"\u001B[0;31mTypeError\u001B[0m Traceback (most recent call last)",
|
||||
"File \u001B[0;32m~/PycharmProjects/ai-privacy-toolkit-internal/venv/lib/python3.8/site-packages/sklearn/preprocessing/_label.py:112\u001B[0m, in \u001B[0;36m_encode\u001B[0;34m(values, uniques, encode, check_unknown)\u001B[0m\n\u001B[1;32m 111\u001B[0m \u001B[38;5;28;01mtry\u001B[39;00m:\n\u001B[0;32m--> 112\u001B[0m res \u001B[38;5;241m=\u001B[39m \u001B[43m_encode_python\u001B[49m\u001B[43m(\u001B[49m\u001B[43mvalues\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43muniques\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mencode\u001B[49m\u001B[43m)\u001B[49m\n\u001B[1;32m 113\u001B[0m \u001B[38;5;28;01mexcept\u001B[39;00m \u001B[38;5;167;01mTypeError\u001B[39;00m:\n",
|
||||
"File \u001B[0;32m~/PycharmProjects/ai-privacy-toolkit-internal/venv/lib/python3.8/site-packages/sklearn/preprocessing/_label.py:60\u001B[0m, in \u001B[0;36m_encode_python\u001B[0;34m(values, uniques, encode)\u001B[0m\n\u001B[1;32m 59\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m uniques \u001B[38;5;129;01mis\u001B[39;00m \u001B[38;5;28;01mNone\u001B[39;00m:\n\u001B[0;32m---> 60\u001B[0m uniques \u001B[38;5;241m=\u001B[39m \u001B[38;5;28;43msorted\u001B[39;49m\u001B[43m(\u001B[49m\u001B[38;5;28;43mset\u001B[39;49m\u001B[43m(\u001B[49m\u001B[43mvalues\u001B[49m\u001B[43m)\u001B[49m\u001B[43m)\u001B[49m\n\u001B[1;32m 61\u001B[0m uniques \u001B[38;5;241m=\u001B[39m np\u001B[38;5;241m.\u001B[39marray(uniques, dtype\u001B[38;5;241m=\u001B[39mvalues\u001B[38;5;241m.\u001B[39mdtype)\n",
|
||||
"\u001B[0;31mTypeError\u001B[0m: '<' not supported between instances of 'int' and 'str'",
|
||||
"\nDuring handling of the above exception, another exception occurred:\n",
|
||||
"\u001B[0;31mTypeError\u001B[0m Traceback (most recent call last)",
|
||||
"Input \u001B[0;32mIn [155]\u001B[0m, in \u001B[0;36m<cell line: 4>\u001B[0;34m()\u001B[0m\n\u001B[1;32m 2\u001B[0m QI2_indexes \u001B[38;5;241m=\u001B[39m [i \u001B[38;5;28;01mfor\u001B[39;00m i, v \u001B[38;5;129;01min\u001B[39;00m \u001B[38;5;28menumerate\u001B[39m(features) \u001B[38;5;28;01mif\u001B[39;00m v \u001B[38;5;129;01min\u001B[39;00m QI2]\n\u001B[1;32m 3\u001B[0m anonymizer3 \u001B[38;5;241m=\u001B[39m Anonymize(\u001B[38;5;241m100\u001B[39m, QI2_indexes, categorical_features\u001B[38;5;241m=\u001B[39mcategorical_features_indexes)\n\u001B[0;32m----> 4\u001B[0m anon3 \u001B[38;5;241m=\u001B[39m \u001B[43manonymizer3\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43manonymize\u001B[49m\u001B[43m(\u001B[49m\u001B[43mArrayDataset\u001B[49m\u001B[43m(\u001B[49m\u001B[43mx_train\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mx_train_predictions\u001B[49m\u001B[43m)\u001B[49m\u001B[43m)\u001B[49m\n",
|
||||
"File \u001B[0;32m~/PycharmProjects/ai-privacy-toolkit-internal/apt/anonymization/anonymizer.py:55\u001B[0m, in \u001B[0;36mAnonymize.anonymize\u001B[0;34m(self, dataset)\u001B[0m\n\u001B[1;32m 52\u001B[0m \u001B[38;5;28;01melse\u001B[39;00m:\n\u001B[1;32m 53\u001B[0m \u001B[38;5;28;01mraise\u001B[39;00m \u001B[38;5;167;01mValueError\u001B[39;00m(\u001B[38;5;124m'\u001B[39m\u001B[38;5;124mNo data provided\u001B[39m\u001B[38;5;124m'\u001B[39m)\n\u001B[0;32m---> 55\u001B[0m transformed \u001B[38;5;241m=\u001B[39m \u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43m_anonymize\u001B[49m\u001B[43m(\u001B[49m\u001B[43mdataset\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mget_samples\u001B[49m\u001B[43m(\u001B[49m\u001B[43m)\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mcopy\u001B[49m\u001B[43m(\u001B[49m\u001B[43m)\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mdataset\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mget_labels\u001B[49m\u001B[43m(\u001B[49m\u001B[43m)\u001B[49m\u001B[43m)\u001B[49m\n\u001B[1;32m 56\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m dataset\u001B[38;5;241m.\u001B[39mis_pandas:\n\u001B[1;32m 57\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m pd\u001B[38;5;241m.\u001B[39mDataFrame(transformed, columns\u001B[38;5;241m=\u001B[39m\u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_features)\n",
|
||||
"File \u001B[0;32m~/PycharmProjects/ai-privacy-toolkit-internal/apt/anonymization/anonymizer.py:68\u001B[0m, in \u001B[0;36mAnonymize._anonymize\u001B[0;34m(self, x, y)\u001B[0m\n\u001B[1;32m 66\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m \u001B[38;5;129;01mnot\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mcategorical_features:\n\u001B[1;32m 67\u001B[0m \u001B[38;5;28;01mraise\u001B[39;00m \u001B[38;5;167;01mValueError\u001B[39;00m(\u001B[38;5;124m'\u001B[39m\u001B[38;5;124mwhen supplying an array with non-numeric data, categorical_features must be defined\u001B[39m\u001B[38;5;124m'\u001B[39m)\n\u001B[0;32m---> 68\u001B[0m x_prepared \u001B[38;5;241m=\u001B[39m \u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43m_modify_categorical_features\u001B[49m\u001B[43m(\u001B[49m\u001B[43mx_anonymizer_train\u001B[49m\u001B[43m)\u001B[49m\n\u001B[1;32m 69\u001B[0m \u001B[38;5;28;01melse\u001B[39;00m:\n\u001B[1;32m 70\u001B[0m x_prepared \u001B[38;5;241m=\u001B[39m x_anonymizer_train\n",
|
||||
"File \u001B[0;32m~/PycharmProjects/ai-privacy-toolkit-internal/apt/anonymization/anonymizer.py:144\u001B[0m, in \u001B[0;36mAnonymize._modify_categorical_features\u001B[0;34m(self, x)\u001B[0m\n\u001B[1;32m 142\u001B[0m \u001B[38;5;28;01mdef\u001B[39;00m \u001B[38;5;21m_modify_categorical_features\u001B[39m(\u001B[38;5;28mself\u001B[39m, x):\n\u001B[1;32m 143\u001B[0m encoder \u001B[38;5;241m=\u001B[39m OneHotEncoder()\n\u001B[0;32m--> 144\u001B[0m one_hot_encoded \u001B[38;5;241m=\u001B[39m \u001B[43mencoder\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mfit_transform\u001B[49m\u001B[43m(\u001B[49m\u001B[43mx\u001B[49m\u001B[43m)\u001B[49m\n\u001B[1;32m 145\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m one_hot_encoded\n",
|
||||
"File \u001B[0;32m~/PycharmProjects/ai-privacy-toolkit-internal/venv/lib/python3.8/site-packages/sklearn/preprocessing/_encoders.py:372\u001B[0m, in \u001B[0;36mOneHotEncoder.fit_transform\u001B[0;34m(self, X, y)\u001B[0m\n\u001B[1;32m 352\u001B[0m \u001B[38;5;124;03m\"\"\"\u001B[39;00m\n\u001B[1;32m 353\u001B[0m \u001B[38;5;124;03mFit OneHotEncoder to X, then transform X.\u001B[39;00m\n\u001B[1;32m 354\u001B[0m \n\u001B[0;32m (...)\u001B[0m\n\u001B[1;32m 369\u001B[0m \u001B[38;5;124;03m Transformed input.\u001B[39;00m\n\u001B[1;32m 370\u001B[0m \u001B[38;5;124;03m\"\"\"\u001B[39;00m\n\u001B[1;32m 371\u001B[0m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_validate_keywords()\n\u001B[0;32m--> 372\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[38;5;28;43msuper\u001B[39;49m\u001B[43m(\u001B[49m\u001B[43m)\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mfit_transform\u001B[49m\u001B[43m(\u001B[49m\u001B[43mX\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43my\u001B[49m\u001B[43m)\u001B[49m\n",
|
||||
"File \u001B[0;32m~/PycharmProjects/ai-privacy-toolkit-internal/venv/lib/python3.8/site-packages/sklearn/base.py:571\u001B[0m, in \u001B[0;36mTransformerMixin.fit_transform\u001B[0;34m(self, X, y, **fit_params)\u001B[0m\n\u001B[1;32m 567\u001B[0m \u001B[38;5;66;03m# non-optimized default implementation; override when a better\u001B[39;00m\n\u001B[1;32m 568\u001B[0m \u001B[38;5;66;03m# method is possible for a given clustering algorithm\u001B[39;00m\n\u001B[1;32m 569\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m y \u001B[38;5;129;01mis\u001B[39;00m \u001B[38;5;28;01mNone\u001B[39;00m:\n\u001B[1;32m 570\u001B[0m \u001B[38;5;66;03m# fit method of arity 1 (unsupervised transformation)\u001B[39;00m\n\u001B[0;32m--> 571\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mfit\u001B[49m\u001B[43m(\u001B[49m\u001B[43mX\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[38;5;241;43m*\u001B[39;49m\u001B[38;5;241;43m*\u001B[39;49m\u001B[43mfit_params\u001B[49m\u001B[43m)\u001B[49m\u001B[38;5;241m.\u001B[39mtransform(X)\n\u001B[1;32m 572\u001B[0m \u001B[38;5;28;01melse\u001B[39;00m:\n\u001B[1;32m 573\u001B[0m \u001B[38;5;66;03m# fit method of arity 2 (supervised transformation)\u001B[39;00m\n\u001B[1;32m 574\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mfit(X, y, \u001B[38;5;241m*\u001B[39m\u001B[38;5;241m*\u001B[39mfit_params)\u001B[38;5;241m.\u001B[39mtransform(X)\n",
|
||||
"File \u001B[0;32m~/PycharmProjects/ai-privacy-toolkit-internal/venv/lib/python3.8/site-packages/sklearn/preprocessing/_encoders.py:347\u001B[0m, in \u001B[0;36mOneHotEncoder.fit\u001B[0;34m(self, X, y)\u001B[0m\n\u001B[1;32m 330\u001B[0m \u001B[38;5;124;03m\"\"\"\u001B[39;00m\n\u001B[1;32m 331\u001B[0m \u001B[38;5;124;03mFit OneHotEncoder to X.\u001B[39;00m\n\u001B[1;32m 332\u001B[0m \n\u001B[0;32m (...)\u001B[0m\n\u001B[1;32m 344\u001B[0m \u001B[38;5;124;03mself\u001B[39;00m\n\u001B[1;32m 345\u001B[0m \u001B[38;5;124;03m\"\"\"\u001B[39;00m\n\u001B[1;32m 346\u001B[0m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_validate_keywords()\n\u001B[0;32m--> 347\u001B[0m \u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43m_fit\u001B[49m\u001B[43m(\u001B[49m\u001B[43mX\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mhandle_unknown\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mhandle_unknown\u001B[49m\u001B[43m)\u001B[49m\n\u001B[1;32m 348\u001B[0m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mdrop_idx_ \u001B[38;5;241m=\u001B[39m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_compute_drop_idx()\n\u001B[1;32m 349\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[38;5;28mself\u001B[39m\n",
|
||||
"File \u001B[0;32m~/PycharmProjects/ai-privacy-toolkit-internal/venv/lib/python3.8/site-packages/sklearn/preprocessing/_encoders.py:86\u001B[0m, in \u001B[0;36m_BaseEncoder._fit\u001B[0;34m(self, X, handle_unknown)\u001B[0m\n\u001B[1;32m 84\u001B[0m Xi \u001B[38;5;241m=\u001B[39m X_list[i]\n\u001B[1;32m 85\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mcategories \u001B[38;5;241m==\u001B[39m \u001B[38;5;124m'\u001B[39m\u001B[38;5;124mauto\u001B[39m\u001B[38;5;124m'\u001B[39m:\n\u001B[0;32m---> 86\u001B[0m cats \u001B[38;5;241m=\u001B[39m \u001B[43m_encode\u001B[49m\u001B[43m(\u001B[49m\u001B[43mXi\u001B[49m\u001B[43m)\u001B[49m\n\u001B[1;32m 87\u001B[0m \u001B[38;5;28;01melse\u001B[39;00m:\n\u001B[1;32m 88\u001B[0m cats \u001B[38;5;241m=\u001B[39m np\u001B[38;5;241m.\u001B[39marray(\u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mcategories[i], dtype\u001B[38;5;241m=\u001B[39mXi\u001B[38;5;241m.\u001B[39mdtype)\n",
|
||||
"File \u001B[0;32m~/PycharmProjects/ai-privacy-toolkit-internal/venv/lib/python3.8/site-packages/sklearn/preprocessing/_label.py:114\u001B[0m, in \u001B[0;36m_encode\u001B[0;34m(values, uniques, encode, check_unknown)\u001B[0m\n\u001B[1;32m 112\u001B[0m res \u001B[38;5;241m=\u001B[39m _encode_python(values, uniques, encode)\n\u001B[1;32m 113\u001B[0m \u001B[38;5;28;01mexcept\u001B[39;00m \u001B[38;5;167;01mTypeError\u001B[39;00m:\n\u001B[0;32m--> 114\u001B[0m \u001B[38;5;28;01mraise\u001B[39;00m \u001B[38;5;167;01mTypeError\u001B[39;00m(\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124margument must be a string or number\u001B[39m\u001B[38;5;124m\"\u001B[39m)\n\u001B[1;32m 115\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m res\n\u001B[1;32m 116\u001B[0m \u001B[38;5;28;01melse\u001B[39;00m:\n",
|
||||
"\u001B[0;31mTypeError\u001B[0m: argument must be a string or number"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"QI2 = [\"parents\", \"has_nurs\", \"form\", \"children\", \"housing\", \"finance\", \"social\", \"health\"]\n",
|
||||
"anonymizer3 = Anonymize(100, QI2, categorical_features=categorical_features)\n",
|
||||
"anon3 = anonymizer3.anonymize(x_train, x_train_predictions)"
|
||||
"QI2_indexes = [i for i, v in enumerate(features) if v in QI2]\n",
|
||||
"anonymizer3 = Anonymize(100, QI2_indexes, categorical_features=categorical_features_indexes)\n",
|
||||
"anon3 = anonymizer3.anonymize(ArrayDataset(x_train, x_train_predictions))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 112,
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"39"
|
||||
]
|
||||
},
|
||||
"execution_count": 112,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# number of distinct rows in anonymized data\n",
|
||||
"len(anon3.drop_duplicates())"
|
||||
|
|
@ -1055,22 +702,12 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 113,
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Anonymized model accuracy: 0.7723765432098766\n",
|
||||
"BB attack accuracy: 0.5792012348060969\n",
|
||||
"WB attack accuracy: 0.6680493922438742\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"anon3_str = anon3.astype(str)\n",
|
||||
"anon3_encoded = OneHotEncoder(sparse=False, drop='if_binary').fit_transform(anon3_str)\n",
|
||||
"anon3_encoded = OneHotEncoder(sparse=False).fit_transform(anon3_str)\n",
|
||||
"\n",
|
||||
"anon3_model = DecisionTreeClassifier()\n",
|
||||
"anon3_model.fit(anon3_encoded, y_train)\n",
|
||||
|
|
@ -1105,20 +742,9 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 114,
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"(0.35793357933579334, 0.17037470725995316)\n",
|
||||
"(0.3393939393939394, 0.13114754098360656)\n",
|
||||
"(0.6457357075913777, 0.2002324905550712)\n",
|
||||
"(1, 0.0)\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# black-box regular\n",
|
||||
"print(calc_precision_recall(inferred_train_bb, x_train_feature))\n",
|
||||
|
|
@ -1162,4 +788,4 @@
|
|||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
}
|
||||
|
|
@ -29,7 +29,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 97,
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
|
|
@ -44,6 +44,18 @@
|
|||
" [ 26. 11. 0. 0. 48.]\n",
|
||||
" [ 27. 9. 0. 0. 40.]]\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"/var/folders/9b/qbtw28w53355cvpjs4qn83yc0000gn/T/ipykernel_85828/3975777015.py:22: DeprecationWarning: `np.int` is a deprecated alias for the builtin `int`. To silence this warning, use `int` by itself. Doing this will not modify any behavior and is safe. When replacing `np.int`, you may wish to use e.g. `np.int64` or `np.int32` to specify the precision. If you wish to review your current use, check the release note link for additional information.\n",
|
||||
"Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations\n",
|
||||
" y_train = y_train.astype(np.int)\n",
|
||||
"/var/folders/9b/qbtw28w53355cvpjs4qn83yc0000gn/T/ipykernel_85828/3975777015.py:26: DeprecationWarning: `np.int` is a deprecated alias for the builtin `int`. To silence this warning, use `int` by itself. Doing this will not modify any behavior and is safe. When replacing `np.int`, you may wish to use e.g. `np.int64` or `np.int32` to specify the precision. If you wish to review your current use, check the release note link for additional information.\n",
|
||||
"Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations\n",
|
||||
" y_test = y_test.astype(np.int)\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
|
|
@ -90,14 +102,14 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 116,
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Base model accuracy: 0.8075056814691972\n"
|
||||
"Base model accuracy: 0.8074442601805786\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
|
|
@ -126,9 +138,18 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 124,
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"/Users/olasaadi/PycharmProjects/ai-privacy-toolkit-internal/venv/lib/python3.8/site-packages/art/attacks/inference/membership_inference/black_box.py:262: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n",
|
||||
" self.attack_model.fit(np.c_[x_1, x_2], y_ready) # type: ignore\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from art.attacks.inference.membership_inference import MembershipInferenceBlackBox\n",
|
||||
"\n",
|
||||
|
|
@ -154,14 +175,14 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 125,
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"0.5440363591696352\n"
|
||||
"0.545264709495148\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
|
|
@ -197,7 +218,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 128,
|
||||
"execution_count": 10,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
|
|
@ -215,6 +236,7 @@
|
|||
}
|
||||
],
|
||||
"source": [
|
||||
"from apt.utils.datasets import ArrayDataset\n",
|
||||
"import os\n",
|
||||
"import sys\n",
|
||||
"sys.path.insert(0, os.path.abspath('..'))\n",
|
||||
|
|
@ -223,22 +245,20 @@
|
|||
"# QI = (age, education-num, capital-gain, hours-per-week)\n",
|
||||
"QI = [0, 1, 2, 4]\n",
|
||||
"anonymizer = Anonymize(100, QI)\n",
|
||||
"anon = anonymizer.anonymize(x_train, x_train_predictions)\n",
|
||||
"anon = anonymizer.anonymize(ArrayDataset(x_train, x_train_predictions))\n",
|
||||
"print(anon)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 104,
|
||||
"execution_count": 11,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"6739"
|
||||
]
|
||||
"text/plain": "6739"
|
||||
},
|
||||
"execution_count": 104,
|
||||
"execution_count": 11,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
|
|
@ -250,16 +270,14 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 129,
|
||||
"execution_count": 12,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"658"
|
||||
]
|
||||
"text/plain": "658"
|
||||
},
|
||||
"execution_count": 129,
|
||||
"execution_count": 12,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
|
|
@ -278,14 +296,14 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 130,
|
||||
"execution_count": 13,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Anonymized model accuracy: 0.8304158221239482\n"
|
||||
"Anonymized model accuracy: 0.83078434985566\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
|
|
@ -308,14 +326,22 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 131,
|
||||
"execution_count": 14,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"/Users/olasaadi/PycharmProjects/ai-privacy-toolkit-internal/venv/lib/python3.8/site-packages/art/attacks/inference/membership_inference/black_box.py:262: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n",
|
||||
" self.attack_model.fit(np.c_[x_1, x_2], y_ready) # type: ignore\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"0.5034393809114359\n"
|
||||
"0.5047291487532244\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
|
|
@ -345,15 +371,15 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 132,
|
||||
"execution_count": 15,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"(0.5298924372550654, 0.7806166318634075)\n",
|
||||
"(0.5030507735890172, 0.5671293452892765)\n"
|
||||
"(0.5312420517168291, 0.7696843139663432)\n",
|
||||
"(0.5048372911169745, 0.4935511607910576)\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
|
|
@ -419,4 +445,4 @@
|
|||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
}
|
||||
|
|
@ -29,7 +29,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 121,
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
|
|
@ -50,7 +50,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 122,
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
|
|
@ -86,14 +86,14 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 123,
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"0.4954954954954955\n"
|
||||
"0.527027027027027\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
|
|
@ -131,7 +131,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 124,
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
|
|
@ -141,6 +141,22 @@
|
|||
"unique rows in original data: 221\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"/Users/olasaadi/PycharmProjects/ai-privacy-toolkit-internal/venv/lib/python3.8/site-packages/art/attacks/inference/membership_inference/black_box.py:262: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n",
|
||||
" self.attack_model.fit(np.c_[x_1, x_2], y_ready) # type: ignore\n",
|
||||
"/Users/olasaadi/PycharmProjects/ai-privacy-toolkit-internal/venv/lib/python3.8/site-packages/art/attacks/inference/membership_inference/black_box.py:262: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n",
|
||||
" self.attack_model.fit(np.c_[x_1, x_2], y_ready) # type: ignore\n",
|
||||
"/Users/olasaadi/PycharmProjects/ai-privacy-toolkit-internal/venv/lib/python3.8/site-packages/art/attacks/inference/membership_inference/black_box.py:262: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n",
|
||||
" self.attack_model.fit(np.c_[x_1, x_2], y_ready) # type: ignore\n",
|
||||
"/Users/olasaadi/PycharmProjects/ai-privacy-toolkit-internal/venv/lib/python3.8/site-packages/art/attacks/inference/membership_inference/black_box.py:262: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n",
|
||||
" self.attack_model.fit(np.c_[x_1, x_2], y_ready) # type: ignore\n",
|
||||
"/Users/olasaadi/PycharmProjects/ai-privacy-toolkit-internal/venv/lib/python3.8/site-packages/art/attacks/inference/membership_inference/black_box.py:262: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n",
|
||||
" self.attack_model.fit(np.c_[x_1, x_2], y_ready) # type: ignore\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
|
|
@ -148,11 +164,12 @@
|
|||
"k values: [5, 10, 20, 50, 75]\n",
|
||||
"unique rows: [34, 19, 8, 4, 2]\n",
|
||||
"model accuracy: [0.43165832354998956, 0.4509641063206041, -1.730181929385853, -5.577098823982753e+27, -1.2751609045828272e+25]\n",
|
||||
"attack accuracy: [0.5, 0.47297297297297297, 0.49549549549549543, 0.5, 0.47297297297297297]\n"
|
||||
"attack accuracy: [0.509009009009009, 0.481981981981982, 0.509009009009009, 0.5045045045045045, 0.4954954954954955]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from apt.utils.datasets import ArrayDataset\n",
|
||||
"from apt.anonymization import Anonymize\n",
|
||||
"k_values=[5, 10, 20, 50, 75]\n",
|
||||
"model_accuracy = []\n",
|
||||
|
|
@ -165,7 +182,7 @@
|
|||
"\n",
|
||||
"for k in k_values:\n",
|
||||
" anonymizer = Anonymize(k, QI, is_regression=True)\n",
|
||||
" anon = anonymizer.anonymize(X_train, x_train_predictions)\n",
|
||||
" anon = anonymizer.anonymize(ArrayDataset(X_train, x_train_predictions))\n",
|
||||
" unique_values.append(len(np.unique(anon, axis=0)))\n",
|
||||
" \n",
|
||||
" anon_model = LinearRegression()\n",
|
||||
|
|
@ -198,7 +215,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 124,
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue