mirror of
https://github.com/IBM/ai-privacy-toolkit.git
synced 2026-05-12 13:32:37 +02:00
Wrapper minimizer (#20)
* apply dataset wrapper on minimizer * apply changes on minimization notebook * add black_box_access and unlimited_queries params
This commit is contained in:
parent
6b04fd5564
commit
ac5d82aab6
6 changed files with 583 additions and 215 deletions
|
|
@ -27,7 +27,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"execution_count": 15,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
|
|
@ -42,6 +42,18 @@
|
|||
" [2.2000e+01 9.0000e+00 0.0000e+00 0.0000e+00 2.0000e+01]\n",
|
||||
" [5.2000e+01 9.0000e+00 1.5024e+04 0.0000e+00 4.0000e+01]]\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"/var/folders/9b/qbtw28w53355cvpjs4qn83yc0000gn/T/ipykernel_13726/1357868359.py:22: DeprecationWarning: `np.int` is a deprecated alias for the builtin `int`. To silence this warning, use `int` by itself. Doing this will not modify any behavior and is safe. When replacing `np.int`, you may wish to use e.g. `np.int64` or `np.int32` to specify the precision. If you wish to review your current use, check the release note link for additional information.\n",
|
||||
"Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations\n",
|
||||
" y_train = y_train.astype(np.int)\n",
|
||||
"/var/folders/9b/qbtw28w53355cvpjs4qn83yc0000gn/T/ipykernel_13726/1357868359.py:26: DeprecationWarning: `np.int` is a deprecated alias for the builtin `int`. To silence this warning, use `int` by itself. Doing this will not modify any behavior and is safe. When replacing `np.int`, you may wish to use e.g. `np.int64` or `np.int32` to specify the precision. If you wish to review your current use, check the release note link for additional information.\n",
|
||||
"Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations\n",
|
||||
" y_test = y_test.astype(np.int)\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
|
|
@ -84,24 +96,27 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"execution_count": 16,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Base model accuracy: 0.8189914624408821\n"
|
||||
"Base model accuracy: 0.8183158282660771\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from apt.utils.datasets import ArrayDataset\n",
|
||||
"from apt.utils.models import SklearnClassifier, ModelOutputType\n",
|
||||
"from sklearn.tree import DecisionTreeClassifier\n",
|
||||
"\n",
|
||||
"model = DecisionTreeClassifier()\n",
|
||||
"model.fit(x_train, y_train)\n",
|
||||
"base_est = DecisionTreeClassifier()\n",
|
||||
"model = SklearnClassifier(base_est, ModelOutputType.CLASSIFIER_VECTOR)\n",
|
||||
"model.fit(ArrayDataset(x_train, y_train))\n",
|
||||
"\n",
|
||||
"print('Base model accuracy: ', model.score(x_test, y_test))"
|
||||
"print('Base model accuracy: ', model.score(ArrayDataset(x_test, y_test)))"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
|
@ -114,26 +129,26 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"execution_count": 17,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Initial accuracy of model on generalized data, relative to original model predictions (base generalization derived from tree, before improvements): 0.929376\n",
|
||||
"Initial accuracy of model on generalized data, relative to original model predictions (base generalization derived from tree, before improvements): 0.936540\n",
|
||||
"Improving accuracy\n",
|
||||
"feature to remove: 0\n",
|
||||
"Removed feature: 0, new relative accuracy: 0.939867\n",
|
||||
"feature to remove: 4\n",
|
||||
"Removed feature: 4, new relative accuracy: 0.967247\n",
|
||||
"feature to remove: 2\n",
|
||||
"Removed feature: 2, new relative accuracy: 0.972620\n",
|
||||
"Removed feature: 2, new relative accuracy: 0.935261\n",
|
||||
"feature to remove: 4\n",
|
||||
"Removed feature: 4, new relative accuracy: 0.946776\n",
|
||||
"feature to remove: 0\n",
|
||||
"Removed feature: 0, new relative accuracy: 0.972876\n",
|
||||
"feature to remove: 1\n",
|
||||
"Removed feature: 1, new relative accuracy: 0.992323\n",
|
||||
"Removed feature: 1, new relative accuracy: 0.992835\n",
|
||||
"feature to remove: 3\n",
|
||||
"Removed feature: 3, new relative accuracy: 1.000000\n",
|
||||
"Accuracy on minimized data: 0.8237371411024106\n"
|
||||
"Accuracy on minimized data: 0.8231229847996315\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
|
|
@ -155,10 +170,12 @@
|
|||
"X_generalizer_train, x_test, y_generalizer_train, y_test = train_test_split(x_test, y_test, stratify=y_test,\n",
|
||||
" test_size = 0.4, random_state = 38)\n",
|
||||
"x_train_predictions = model.predict(X_generalizer_train)\n",
|
||||
"minimizer.fit(X_generalizer_train, x_train_predictions)\n",
|
||||
"transformed = minimizer.transform(x_test)\n",
|
||||
"if x_train_predictions.shape[1] > 1:\n",
|
||||
" x_train_predictions = np.argmax(x_train_predictions, axis=1)\n",
|
||||
"minimizer.fit(dataset=ArrayDataset(X_generalizer_train, x_train_predictions))\n",
|
||||
"transformed = minimizer.transform(dataset=ArrayDataset(x_test))\n",
|
||||
"\n",
|
||||
"print('Accuracy on minimized data: ', model.score(transformed, y_test))"
|
||||
"print('Accuracy on minimized data: ', model.score(ArrayDataset(transformed, y_test)))"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
|
@ -170,14 +187,14 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"execution_count": 18,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"{'ranges': {}, 'untouched': [0, 1, 2, 3, 4]}\n"
|
||||
"{'ranges': {}, 'categories': {}, 'untouched': ['4', '1', '3', '0', '2']}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
|
|
@ -197,25 +214,25 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"execution_count": 19,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Initial accuracy of model on generalized data, relative to original model predictions (base generalization derived from tree, before improvements): 0.929376\n",
|
||||
"Initial accuracy of model on generalized data, relative to original model predictions (base generalization derived from tree, before improvements): 0.936540\n",
|
||||
"Improving accuracy\n",
|
||||
"feature to remove: 0\n",
|
||||
"Removed feature: 0, new relative accuracy: 0.939867\n",
|
||||
"feature to remove: 4\n",
|
||||
"Removed feature: 4, new relative accuracy: 0.967247\n",
|
||||
"feature to remove: 2\n",
|
||||
"Removed feature: 2, new relative accuracy: 0.972620\n",
|
||||
"Removed feature: 2, new relative accuracy: 0.935261\n",
|
||||
"feature to remove: 4\n",
|
||||
"Removed feature: 4, new relative accuracy: 0.946776\n",
|
||||
"feature to remove: 0\n",
|
||||
"Removed feature: 0, new relative accuracy: 0.972876\n",
|
||||
"feature to remove: 1\n",
|
||||
"Removed feature: 1, new relative accuracy: 0.992323\n",
|
||||
"Accuracy on minimized data: 0.820205742361431\n",
|
||||
"{'ranges': {3: [546.0, 704.0, 705.5, 742.5, 782.0, 834.0, 870.0, 1446.5, 1538.5, 1612.5, 1699.0, 1744.0, 1801.0, 1814.0, 1846.0, 1881.5, 1978.5, 2248.0, 2298.5, 2537.5]}, 'untouched': [0, 1, 2, 4]}\n"
|
||||
"Removed feature: 1, new relative accuracy: 0.992835\n",
|
||||
"Accuracy on minimized data: 0.8192845079072624\n",
|
||||
"{'ranges': {'3': [569.0, 782.0, 870.0, 870.5, 938.0, 1016.5, 1311.5, 1457.0, 1494.5, 1596.0, 1629.5, 1684.0, 1805.0, 1859.0, 1867.5, 1881.5, 1938.0, 1978.5, 2119.0, 2210.0, 2218.0, 2244.5, 2298.5, 2443.5]}, 'categories': {}, 'untouched': ['2', '1', '0', '4']}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
|
|
@ -223,9 +240,9 @@
|
|||
"# We allow a 1% deviation in accuracy from the original model accuracy\n",
|
||||
"minimizer2 = GeneralizeToRepresentative(model, target_accuracy=0.99)\n",
|
||||
"\n",
|
||||
"minimizer2.fit(X_generalizer_train, x_train_predictions)\n",
|
||||
"transformed2 = minimizer2.transform(x_test)\n",
|
||||
"print('Accuracy on minimized data: ', model.score(transformed2, y_test))\n",
|
||||
"minimizer2.fit(dataset=ArrayDataset(X_generalizer_train, x_train_predictions))\n",
|
||||
"transformed2 = minimizer2.transform(dataset=ArrayDataset(x_test))\n",
|
||||
"print('Accuracy on minimized data: ', model.score(test_data=ArrayDataset(transformed2, y_test)))\n",
|
||||
"generalizations2 = minimizer2.generalizations\n",
|
||||
"print(generalizations2)"
|
||||
]
|
||||
|
|
@ -259,4 +276,4 @@
|
|||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue