Wrapper minimizer (#20)

* apply dataset wrapper on minimizer
* apply changes on minimization notebook
* add black_box_access and unlimited_queries params
This commit is contained in:
Ola Saadi 2022-04-18 13:14:49 +03:00 committed by GitHub Enterprise
parent 6b04fd5564
commit ac5d82aab6
6 changed files with 583 additions and 215 deletions

View file

@ -27,7 +27,7 @@
},
{
"cell_type": "code",
"execution_count": 1,
"execution_count": 15,
"metadata": {},
"outputs": [
{
@ -42,6 +42,18 @@
" [2.2000e+01 9.0000e+00 0.0000e+00 0.0000e+00 2.0000e+01]\n",
" [5.2000e+01 9.0000e+00 1.5024e+04 0.0000e+00 4.0000e+01]]\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/var/folders/9b/qbtw28w53355cvpjs4qn83yc0000gn/T/ipykernel_13726/1357868359.py:22: DeprecationWarning: `np.int` is a deprecated alias for the builtin `int`. To silence this warning, use `int` by itself. Doing this will not modify any behavior and is safe. When replacing `np.int`, you may wish to use e.g. `np.int64` or `np.int32` to specify the precision. If you wish to review your current use, check the release note link for additional information.\n",
"Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations\n",
" y_train = y_train.astype(np.int)\n",
"/var/folders/9b/qbtw28w53355cvpjs4qn83yc0000gn/T/ipykernel_13726/1357868359.py:26: DeprecationWarning: `np.int` is a deprecated alias for the builtin `int`. To silence this warning, use `int` by itself. Doing this will not modify any behavior and is safe. When replacing `np.int`, you may wish to use e.g. `np.int64` or `np.int32` to specify the precision. If you wish to review your current use, check the release note link for additional information.\n",
"Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations\n",
" y_test = y_test.astype(np.int)\n"
]
}
],
"source": [
@ -84,24 +96,27 @@
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": 16,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Base model accuracy: 0.8189914624408821\n"
"Base model accuracy: 0.8183158282660771\n"
]
}
],
"source": [
"from apt.utils.datasets import ArrayDataset\n",
"from apt.utils.models import SklearnClassifier, ModelOutputType\n",
"from sklearn.tree import DecisionTreeClassifier\n",
"\n",
"model = DecisionTreeClassifier()\n",
"model.fit(x_train, y_train)\n",
"base_est = DecisionTreeClassifier()\n",
"model = SklearnClassifier(base_est, ModelOutputType.CLASSIFIER_VECTOR)\n",
"model.fit(ArrayDataset(x_train, y_train))\n",
"\n",
"print('Base model accuracy: ', model.score(x_test, y_test))"
"print('Base model accuracy: ', model.score(ArrayDataset(x_test, y_test)))"
]
},
{
@ -114,26 +129,26 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": 17,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Initial accuracy of model on generalized data, relative to original model predictions (base generalization derived from tree, before improvements): 0.929376\n",
"Initial accuracy of model on generalized data, relative to original model predictions (base generalization derived from tree, before improvements): 0.936540\n",
"Improving accuracy\n",
"feature to remove: 0\n",
"Removed feature: 0, new relative accuracy: 0.939867\n",
"feature to remove: 4\n",
"Removed feature: 4, new relative accuracy: 0.967247\n",
"feature to remove: 2\n",
"Removed feature: 2, new relative accuracy: 0.972620\n",
"Removed feature: 2, new relative accuracy: 0.935261\n",
"feature to remove: 4\n",
"Removed feature: 4, new relative accuracy: 0.946776\n",
"feature to remove: 0\n",
"Removed feature: 0, new relative accuracy: 0.972876\n",
"feature to remove: 1\n",
"Removed feature: 1, new relative accuracy: 0.992323\n",
"Removed feature: 1, new relative accuracy: 0.992835\n",
"feature to remove: 3\n",
"Removed feature: 3, new relative accuracy: 1.000000\n",
"Accuracy on minimized data: 0.8237371411024106\n"
"Accuracy on minimized data: 0.8231229847996315\n"
]
}
],
@ -155,10 +170,12 @@
"X_generalizer_train, x_test, y_generalizer_train, y_test = train_test_split(x_test, y_test, stratify=y_test,\n",
" test_size = 0.4, random_state = 38)\n",
"x_train_predictions = model.predict(X_generalizer_train)\n",
"minimizer.fit(X_generalizer_train, x_train_predictions)\n",
"transformed = minimizer.transform(x_test)\n",
"if x_train_predictions.shape[1] > 1:\n",
" x_train_predictions = np.argmax(x_train_predictions, axis=1)\n",
"minimizer.fit(dataset=ArrayDataset(X_generalizer_train, x_train_predictions))\n",
"transformed = minimizer.transform(dataset=ArrayDataset(x_test))\n",
"\n",
"print('Accuracy on minimized data: ', model.score(transformed, y_test))"
"print('Accuracy on minimized data: ', model.score(ArrayDataset(transformed, y_test)))"
]
},
{
@ -170,14 +187,14 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": 18,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'ranges': {}, 'untouched': [0, 1, 2, 3, 4]}\n"
"{'ranges': {}, 'categories': {}, 'untouched': ['4', '1', '3', '0', '2']}\n"
]
}
],
@ -197,25 +214,25 @@
},
{
"cell_type": "code",
"execution_count": 9,
"execution_count": 19,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Initial accuracy of model on generalized data, relative to original model predictions (base generalization derived from tree, before improvements): 0.929376\n",
"Initial accuracy of model on generalized data, relative to original model predictions (base generalization derived from tree, before improvements): 0.936540\n",
"Improving accuracy\n",
"feature to remove: 0\n",
"Removed feature: 0, new relative accuracy: 0.939867\n",
"feature to remove: 4\n",
"Removed feature: 4, new relative accuracy: 0.967247\n",
"feature to remove: 2\n",
"Removed feature: 2, new relative accuracy: 0.972620\n",
"Removed feature: 2, new relative accuracy: 0.935261\n",
"feature to remove: 4\n",
"Removed feature: 4, new relative accuracy: 0.946776\n",
"feature to remove: 0\n",
"Removed feature: 0, new relative accuracy: 0.972876\n",
"feature to remove: 1\n",
"Removed feature: 1, new relative accuracy: 0.992323\n",
"Accuracy on minimized data: 0.820205742361431\n",
"{'ranges': {3: [546.0, 704.0, 705.5, 742.5, 782.0, 834.0, 870.0, 1446.5, 1538.5, 1612.5, 1699.0, 1744.0, 1801.0, 1814.0, 1846.0, 1881.5, 1978.5, 2248.0, 2298.5, 2537.5]}, 'untouched': [0, 1, 2, 4]}\n"
"Removed feature: 1, new relative accuracy: 0.992835\n",
"Accuracy on minimized data: 0.8192845079072624\n",
"{'ranges': {'3': [569.0, 782.0, 870.0, 870.5, 938.0, 1016.5, 1311.5, 1457.0, 1494.5, 1596.0, 1629.5, 1684.0, 1805.0, 1859.0, 1867.5, 1881.5, 1938.0, 1978.5, 2119.0, 2210.0, 2218.0, 2244.5, 2298.5, 2443.5]}, 'categories': {}, 'untouched': ['2', '1', '0', '4']}\n"
]
}
],
@ -223,9 +240,9 @@
"# We allow a 1% deviation in accuracy from the original model accuracy\n",
"minimizer2 = GeneralizeToRepresentative(model, target_accuracy=0.99)\n",
"\n",
"minimizer2.fit(X_generalizer_train, x_train_predictions)\n",
"transformed2 = minimizer2.transform(x_test)\n",
"print('Accuracy on minimized data: ', model.score(transformed2, y_test))\n",
"minimizer2.fit(dataset=ArrayDataset(X_generalizer_train, x_train_predictions))\n",
"transformed2 = minimizer2.transform(dataset=ArrayDataset(x_test))\n",
"print('Accuracy on minimized data: ', model.score(test_data=ArrayDataset(transformed2, y_test)))\n",
"generalizations2 = minimizer2.generalizations\n",
"print(generalizations2)"
]
@ -259,4 +276,4 @@
},
"nbformat": 4,
"nbformat_minor": 2
}
}