Data and Model wrappers (#26)

* Squashed commit of wrappers: Wrapper minimizer * apply dataset wrapper on minimizer * apply changes on minimization notebook * add black_box_access and unlimited_queries params Dataset wrapper anonymizer Add features_names to ArrayDataset and allow providing features names in QI and Cat features not just indexes update notebooks categorical features and QI passed by indexes dataset include feature names and is_pandas param add pytorch Dataset Remove redundant code. Use data wrappers in model wrapper APIs. add generic dataset components Create initial version of wrappers for models * Fix handling of categorical features
2026-06-23 15:48:06 +02:00 · 2022-04-27 12:33:27 +03:00 · 2022-04-27 12:33:27 +03:00 · 2b2dab6bef
commit 2b2dab6bef
parent d53818644e
17 changed files with 1340 additions and 752 deletions
--- a/notebooks/attribute_inference_anonymization_nursery.ipynb
+++ b/notebooks/attribute_inference_anonymization_nursery.ipynb
@ -29,198 +29,15 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 61,
+   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>parents</th>\n",
-       "      <th>has_nurs</th>\n",
-       "      <th>form</th>\n",
-       "      <th>children</th>\n",
-       "      <th>housing</th>\n",
-       "      <th>finance</th>\n",
-       "      <th>social</th>\n",
-       "      <th>health</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>8450</th>\n",
-       "      <td>pretentious</td>\n",
-       "      <td>very_crit</td>\n",
-       "      <td>foster</td>\n",
-       "      <td>1</td>\n",
-       "      <td>less_conv</td>\n",
-       "      <td>convenient</td>\n",
-       "      <td>1</td>\n",
-       "      <td>not_recom</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>12147</th>\n",
-       "      <td>great_pret</td>\n",
-       "      <td>very_crit</td>\n",
-       "      <td>complete</td>\n",
-       "      <td>1</td>\n",
-       "      <td>critical</td>\n",
-       "      <td>inconv</td>\n",
-       "      <td>1</td>\n",
-       "      <td>recommended</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2780</th>\n",
-       "      <td>usual</td>\n",
-       "      <td>critical</td>\n",
-       "      <td>complete</td>\n",
-       "      <td>4</td>\n",
-       "      <td>less_conv</td>\n",
-       "      <td>convenient</td>\n",
-       "      <td>1</td>\n",
-       "      <td>not_recom</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>11924</th>\n",
-       "      <td>great_pret</td>\n",
-       "      <td>critical</td>\n",
-       "      <td>foster</td>\n",
-       "      <td>1</td>\n",
-       "      <td>critical</td>\n",
-       "      <td>convenient</td>\n",
-       "      <td>1</td>\n",
-       "      <td>not_recom</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>59</th>\n",
-       "      <td>usual</td>\n",
-       "      <td>proper</td>\n",
-       "      <td>complete</td>\n",
-       "      <td>2</td>\n",
-       "      <td>convenient</td>\n",
-       "      <td>convenient</td>\n",
-       "      <td>0</td>\n",
-       "      <td>not_recom</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>...</th>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>5193</th>\n",
-       "      <td>pretentious</td>\n",
-       "      <td>less_proper</td>\n",
-       "      <td>complete</td>\n",
-       "      <td>1</td>\n",
-       "      <td>convenient</td>\n",
-       "      <td>inconv</td>\n",
-       "      <td>0</td>\n",
-       "      <td>recommended</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1375</th>\n",
-       "      <td>usual</td>\n",
-       "      <td>less_proper</td>\n",
-       "      <td>incomplete</td>\n",
-       "      <td>2</td>\n",
-       "      <td>less_conv</td>\n",
-       "      <td>convenient</td>\n",
-       "      <td>1</td>\n",
-       "      <td>priority</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>10318</th>\n",
-       "      <td>great_pret</td>\n",
-       "      <td>less_proper</td>\n",
-       "      <td>foster</td>\n",
-       "      <td>4</td>\n",
-       "      <td>convenient</td>\n",
-       "      <td>convenient</td>\n",
-       "      <td>0</td>\n",
-       "      <td>priority</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>6396</th>\n",
-       "      <td>pretentious</td>\n",
-       "      <td>improper</td>\n",
-       "      <td>completed</td>\n",
-       "      <td>3</td>\n",
-       "      <td>less_conv</td>\n",
-       "      <td>convenient</td>\n",
-       "      <td>1</td>\n",
-       "      <td>recommended</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>485</th>\n",
-       "      <td>usual</td>\n",
-       "      <td>proper</td>\n",
-       "      <td>incomplete</td>\n",
-       "      <td>1</td>\n",
-       "      <td>critical</td>\n",
-       "      <td>inconv</td>\n",
-       "      <td>1</td>\n",
-       "      <td>not_recom</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "<p>10366 rows × 8 columns</p>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "           parents     has_nurs        form children     housing     finance  \\\n",
-       "8450   pretentious    very_crit      foster        1   less_conv  convenient   \n",
-       "12147   great_pret    very_crit    complete        1    critical      inconv   \n",
-       "2780         usual     critical    complete        4   less_conv  convenient   \n",
-       "11924   great_pret     critical      foster        1    critical  convenient   \n",
-       "59           usual       proper    complete        2  convenient  convenient   \n",
-       "...            ...          ...         ...      ...         ...         ...   \n",
-       "5193   pretentious  less_proper    complete        1  convenient      inconv   \n",
-       "1375         usual  less_proper  incomplete        2   less_conv  convenient   \n",
-       "10318   great_pret  less_proper      foster        4  convenient  convenient   \n",
-       "6396   pretentious     improper   completed        3   less_conv  convenient   \n",
-       "485          usual       proper  incomplete        1    critical      inconv   \n",
-       "\n",
-       "       social       health  \n",
-       "8450        1    not_recom  \n",
-       "12147       1  recommended  \n",
-       "2780        1    not_recom  \n",
-       "11924       1    not_recom  \n",
-       "59          0    not_recom  \n",
-       "...       ...          ...  \n",
-       "5193        0  recommended  \n",
-       "1375        1     priority  \n",
-       "10318       0     priority  \n",
-       "6396        1  recommended  \n",
-       "485         1    not_recom  \n",
-       "\n",
-       "[10366 rows x 8 columns]"
-      ]
+      "text/plain": "           parents     has_nurs        form children     housing     finance  \\\n8450   pretentious    very_crit      foster        1   less_conv  convenient   \n12147   great_pret    very_crit    complete        1    critical      inconv   \n2780         usual     critical    complete        4   less_conv  convenient   \n11924   great_pret     critical      foster        1    critical  convenient   \n59           usual       proper    complete        2  convenient  convenient   \n...            ...          ...         ...      ...         ...         ...   \n5193   pretentious  less_proper    complete        1  convenient      inconv   \n1375         usual  less_proper  incomplete        2   less_conv  convenient   \n10318   great_pret  less_proper      foster        4  convenient  convenient   \n6396   pretentious     improper   completed        3   less_conv  convenient   \n485          usual       proper  incomplete        1    critical      inconv   \n\n       social       health  \n8450        1    not_recom  \n12147       1  recommended  \n2780        1    not_recom  \n11924       1    not_recom  \n59          0    not_recom  \n...       ...          ...  \n5193        0  recommended  \n1375        1     priority  \n10318       0     priority  \n6396        1  recommended  \n485         1    not_recom  \n\n[10366 rows x 8 columns]",
+      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>parents</th>\n      <th>has_nurs</th>\n      <th>form</th>\n      <th>children</th>\n      <th>housing</th>\n      <th>finance</th>\n      <th>social</th>\n      <th>health</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>8450</th>\n      <td>pretentious</td>\n      <td>very_crit</td>\n      <td>foster</td>\n      <td>1</td>\n      <td>less_conv</td>\n      <td>convenient</td>\n      <td>1</td>\n      <td>not_recom</td>\n    </tr>\n    <tr>\n      <th>12147</th>\n      <td>great_pret</td>\n      <td>very_crit</td>\n      <td>complete</td>\n      <td>1</td>\n      <td>critical</td>\n      <td>inconv</td>\n      <td>1</td>\n      <td>recommended</td>\n    </tr>\n    <tr>\n      <th>2780</th>\n      <td>usual</td>\n      <td>critical</td>\n      <td>complete</td>\n      <td>4</td>\n      <td>less_conv</td>\n      <td>convenient</td>\n      <td>1</td>\n      <td>not_recom</td>\n    </tr>\n    <tr>\n      <th>11924</th>\n      <td>great_pret</td>\n      <td>critical</td>\n      <td>foster</td>\n      <td>1</td>\n      <td>critical</td>\n      <td>convenient</td>\n      <td>1</td>\n      <td>not_recom</td>\n    </tr>\n    <tr>\n      <th>59</th>\n      <td>usual</td>\n      <td>proper</td>\n      <td>complete</td>\n      <td>2</td>\n      <td>convenient</td>\n      <td>convenient</td>\n      <td>0</td>\n      <td>not_recom</td>\n    </tr>\n    <tr>\n      <th>...</th>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n    </tr>\n    <tr>\n      <th>5193</th>\n      <td>pretentious</td>\n      <td>less_proper</td>\n      <td>complete</td>\n      <td>1</td>\n      <td>convenient</td>\n      <td>inconv</td>\n      <td>0</td>\n      <td>recommended</td>\n    </tr>\n    <tr>\n      <th>1375</th>\n      <td>usual</td>\n      <td>less_proper</td>\n      <td>incomplete</td>\n      <td>2</td>\n      <td>less_conv</td>\n      <td>convenient</td>\n      <td>1</td>\n      <td>priority</td>\n    </tr>\n    <tr>\n      <th>10318</th>\n      <td>great_pret</td>\n      <td>less_proper</td>\n      <td>foster</td>\n      <td>4</td>\n      <td>convenient</td>\n      <td>convenient</td>\n      <td>0</td>\n      <td>priority</td>\n    </tr>\n    <tr>\n      <th>6396</th>\n      <td>pretentious</td>\n      <td>improper</td>\n      <td>completed</td>\n      <td>3</td>\n      <td>less_conv</td>\n      <td>convenient</td>\n      <td>1</td>\n      <td>recommended</td>\n    </tr>\n    <tr>\n      <th>485</th>\n      <td>usual</td>\n      <td>proper</td>\n      <td>incomplete</td>\n      <td>1</td>\n      <td>critical</td>\n      <td>inconv</td>\n      <td>1</td>\n      <td>not_recom</td>\n    </tr>\n  </tbody>\n</table>\n<p>10366 rows × 8 columns</p>\n</div>"
     },
-     "execution_count": 61,
+     "execution_count": 1,
     "metadata": {},
     "output_type": "execute_result"
    }
@ -230,7 +47,7 @@
    "import sys\n",
    "sys.path.insert(0, os.path.abspath('..'))\n",
    "\n",
-    "from apt.utils import get_nursery_dataset\n",
+    "from apt.utils.dataset_utils import get_nursery_dataset\n",
    "\n",
    "(x_train, y_train), (x_test, y_test) = get_nursery_dataset(transform_social=True)\n",
    "\n",
@ -246,7 +63,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 62,
+   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
@ -263,9 +80,9 @@
    "from sklearn.preprocessing import OneHotEncoder\n",
    "\n",
    "x_train_str = x_train.astype(str)\n",
-    "train_encoded = OneHotEncoder(sparse=False, drop='if_binary').fit_transform(x_train_str)\n",
+    "train_encoded = OneHotEncoder(sparse=False).fit_transform(x_train_str)\n",
    "x_test_str = x_test.astype(str)\n",
-    "test_encoded = OneHotEncoder(sparse=False, drop='if_binary').fit_transform(x_test_str)\n",
+    "test_encoded = OneHotEncoder(sparse=False).fit_transform(x_test_str)\n",
    "    \n",
    "model = DecisionTreeClassifier()\n",
    "model.fit(train_encoded, y_train)\n",
@ -287,7 +104,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 91,
+   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
@ -323,14 +140,14 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 96,
+   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "0.6430638626278217\n"
+      "1.0\n"
     ]
    }
   ],
@ -361,14 +178,14 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 55,
+   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "0.6980513216284006\n"
+      "0.5122515917422342\n"
     ]
    }
   ],
@ -408,224 +225,43 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 97,
+   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>parents</th>\n",
-       "      <th>has_nurs</th>\n",
-       "      <th>form</th>\n",
-       "      <th>children</th>\n",
-       "      <th>housing</th>\n",
-       "      <th>finance</th>\n",
-       "      <th>social</th>\n",
-       "      <th>health</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>8450</th>\n",
-       "      <td>pretentious</td>\n",
-       "      <td>very_crit</td>\n",
-       "      <td>foster</td>\n",
-       "      <td>1</td>\n",
-       "      <td>less_conv</td>\n",
-       "      <td>convenient</td>\n",
-       "      <td>0</td>\n",
-       "      <td>not_recom</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>12147</th>\n",
-       "      <td>great_pret</td>\n",
-       "      <td>very_crit</td>\n",
-       "      <td>complete</td>\n",
-       "      <td>1</td>\n",
-       "      <td>critical</td>\n",
-       "      <td>inconv</td>\n",
-       "      <td>1</td>\n",
-       "      <td>recommended</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2780</th>\n",
-       "      <td>usual</td>\n",
-       "      <td>critical</td>\n",
-       "      <td>complete</td>\n",
-       "      <td>4</td>\n",
-       "      <td>less_conv</td>\n",
-       "      <td>convenient</td>\n",
-       "      <td>0</td>\n",
-       "      <td>not_recom</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>11924</th>\n",
-       "      <td>great_pret</td>\n",
-       "      <td>critical</td>\n",
-       "      <td>foster</td>\n",
-       "      <td>1</td>\n",
-       "      <td>critical</td>\n",
-       "      <td>convenient</td>\n",
-       "      <td>0</td>\n",
-       "      <td>not_recom</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>59</th>\n",
-       "      <td>usual</td>\n",
-       "      <td>proper</td>\n",
-       "      <td>complete</td>\n",
-       "      <td>2</td>\n",
-       "      <td>convenient</td>\n",
-       "      <td>convenient</td>\n",
-       "      <td>0</td>\n",
-       "      <td>not_recom</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>...</th>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>5193</th>\n",
-       "      <td>pretentious</td>\n",
-       "      <td>less_proper</td>\n",
-       "      <td>complete</td>\n",
-       "      <td>1</td>\n",
-       "      <td>convenient</td>\n",
-       "      <td>inconv</td>\n",
-       "      <td>0</td>\n",
-       "      <td>recommended</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1375</th>\n",
-       "      <td>usual</td>\n",
-       "      <td>less_proper</td>\n",
-       "      <td>incomplete</td>\n",
-       "      <td>2</td>\n",
-       "      <td>less_conv</td>\n",
-       "      <td>convenient</td>\n",
-       "      <td>1</td>\n",
-       "      <td>priority</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>10318</th>\n",
-       "      <td>great_pret</td>\n",
-       "      <td>less_proper</td>\n",
-       "      <td>foster</td>\n",
-       "      <td>4</td>\n",
-       "      <td>convenient</td>\n",
-       "      <td>convenient</td>\n",
-       "      <td>0</td>\n",
-       "      <td>priority</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>6396</th>\n",
-       "      <td>pretentious</td>\n",
-       "      <td>improper</td>\n",
-       "      <td>completed</td>\n",
-       "      <td>3</td>\n",
-       "      <td>less_conv</td>\n",
-       "      <td>convenient</td>\n",
-       "      <td>1</td>\n",
-       "      <td>recommended</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>485</th>\n",
-       "      <td>usual</td>\n",
-       "      <td>proper</td>\n",
-       "      <td>incomplete</td>\n",
-       "      <td>1</td>\n",
-       "      <td>critical</td>\n",
-       "      <td>convenient</td>\n",
-       "      <td>0</td>\n",
-       "      <td>not_recom</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "<p>10366 rows × 8 columns</p>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "           parents     has_nurs        form children     housing     finance  \\\n",
-       "8450   pretentious    very_crit      foster        1   less_conv  convenient   \n",
-       "12147   great_pret    very_crit    complete        1    critical      inconv   \n",
-       "2780         usual     critical    complete        4   less_conv  convenient   \n",
-       "11924   great_pret     critical      foster        1    critical  convenient   \n",
-       "59           usual       proper    complete        2  convenient  convenient   \n",
-       "...            ...          ...         ...      ...         ...         ...   \n",
-       "5193   pretentious  less_proper    complete        1  convenient      inconv   \n",
-       "1375         usual  less_proper  incomplete        2   less_conv  convenient   \n",
-       "10318   great_pret  less_proper      foster        4  convenient  convenient   \n",
-       "6396   pretentious     improper   completed        3   less_conv  convenient   \n",
-       "485          usual       proper  incomplete        1    critical  convenient   \n",
-       "\n",
-       "       social       health  \n",
-       "8450        0    not_recom  \n",
-       "12147       1  recommended  \n",
-       "2780        0    not_recom  \n",
-       "11924       0    not_recom  \n",
-       "59          0    not_recom  \n",
-       "...       ...          ...  \n",
-       "5193        0  recommended  \n",
-       "1375        1     priority  \n",
-       "10318       0     priority  \n",
-       "6396        1  recommended  \n",
-       "485         0    not_recom  \n",
-       "\n",
-       "[10366 rows x 8 columns]"
-      ]
+      "text/plain": "           parents     has_nurs        form children     housing     finance  \\\n0      pretentious    very_crit      foster        1   less_conv  convenient   \n1       great_pret    very_crit    complete        1    critical      inconv   \n2            usual     critical    complete        4   less_conv  convenient   \n3       great_pret     critical      foster        1    critical  convenient   \n4            usual       proper    complete        2  convenient  convenient   \n...            ...          ...         ...      ...         ...         ...   \n10361  pretentious  less_proper    complete        1  convenient      inconv   \n10362        usual  less_proper  incomplete        2   less_conv  convenient   \n10363   great_pret  less_proper      foster        4  convenient  convenient   \n10364  pretentious     improper   completed        3   less_conv  convenient   \n10365        usual       proper  incomplete        1    critical  convenient   \n\n      social       health  \n0          0    not_recom  \n1          1  recommended  \n2          0    not_recom  \n3          0    not_recom  \n4          0    not_recom  \n...      ...          ...  \n10361      0  recommended  \n10362      1     priority  \n10363      0     priority  \n10364      1  recommended  \n10365      0    not_recom  \n\n[10366 rows x 8 columns]",
+      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>parents</th>\n      <th>has_nurs</th>\n      <th>form</th>\n      <th>children</th>\n      <th>housing</th>\n      <th>finance</th>\n      <th>social</th>\n      <th>health</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>pretentious</td>\n      <td>very_crit</td>\n      <td>foster</td>\n      <td>1</td>\n      <td>less_conv</td>\n      <td>convenient</td>\n      <td>0</td>\n      <td>not_recom</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>great_pret</td>\n      <td>very_crit</td>\n      <td>complete</td>\n      <td>1</td>\n      <td>critical</td>\n      <td>inconv</td>\n      <td>1</td>\n      <td>recommended</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>usual</td>\n      <td>critical</td>\n      <td>complete</td>\n      <td>4</td>\n      <td>less_conv</td>\n      <td>convenient</td>\n      <td>0</td>\n      <td>not_recom</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>great_pret</td>\n      <td>critical</td>\n      <td>foster</td>\n      <td>1</td>\n      <td>critical</td>\n      <td>convenient</td>\n      <td>0</td>\n      <td>not_recom</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>usual</td>\n      <td>proper</td>\n      <td>complete</td>\n      <td>2</td>\n      <td>convenient</td>\n      <td>convenient</td>\n      <td>0</td>\n      <td>not_recom</td>\n    </tr>\n    <tr>\n      <th>...</th>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n    </tr>\n    <tr>\n      <th>10361</th>\n      <td>pretentious</td>\n      <td>less_proper</td>\n      <td>complete</td>\n      <td>1</td>\n      <td>convenient</td>\n      <td>inconv</td>\n      <td>0</td>\n      <td>recommended</td>\n    </tr>\n    <tr>\n      <th>10362</th>\n      <td>usual</td>\n      <td>less_proper</td>\n      <td>incomplete</td>\n      <td>2</td>\n      <td>less_conv</td>\n      <td>convenient</td>\n      <td>1</td>\n      <td>priority</td>\n    </tr>\n    <tr>\n      <th>10363</th>\n      <td>great_pret</td>\n      <td>less_proper</td>\n      <td>foster</td>\n      <td>4</td>\n      <td>convenient</td>\n      <td>convenient</td>\n      <td>0</td>\n      <td>priority</td>\n    </tr>\n    <tr>\n      <th>10364</th>\n      <td>pretentious</td>\n      <td>improper</td>\n      <td>completed</td>\n      <td>3</td>\n      <td>less_conv</td>\n      <td>convenient</td>\n      <td>1</td>\n      <td>recommended</td>\n    </tr>\n    <tr>\n      <th>10365</th>\n      <td>usual</td>\n      <td>proper</td>\n      <td>incomplete</td>\n      <td>1</td>\n      <td>critical</td>\n      <td>convenient</td>\n      <td>0</td>\n      <td>not_recom</td>\n    </tr>\n  </tbody>\n</table>\n<p>10366 rows × 8 columns</p>\n</div>"
     },
-     "execution_count": 97,
+     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
+    "from apt.utils.datasets import ArrayDataset\n",
    "from apt.anonymization import Anonymize\n",
    "\n",
+    "features = x_train.columns\n",
    "QI = [\"finance\", \"social\", \"health\"]\n",
    "categorical_features = [\"parents\", \"has_nurs\", \"form\", \"housing\", \"finance\", \"health\", 'children']\n",
-    "anonymizer = Anonymize(100, QI, categorical_features=categorical_features)\n",
-    "anon = anonymizer.anonymize(x_train, x_train_predictions)\n",
-    "anon"
+    "QI_indexes = [i for i, v in enumerate(features) if v in QI]\n",
+    "categorical_features_indexes = [i for i, v in enumerate(features) if v in categorical_features]\n",
+    "anonymizer = Anonymize(100, QI_indexes, categorical_features=categorical_features_indexes)\n",
+    "anon = anonymizer.anonymize(ArrayDataset(x_train, x_train_predictions))\n",
+    "anon\n"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 64,
+   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
-      "text/plain": [
-       "7585"
-      ]
+      "text/plain": "7585"
     },
-     "execution_count": 64,
+     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
@ -637,16 +273,14 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 65,
+   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
-      "text/plain": [
-       "5766"
-      ]
+      "text/plain": "5766"
     },
-     "execution_count": 65,
+     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
@ -665,7 +299,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 66,
+   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
@ -678,7 +312,7 @@
   ],
   "source": [
    "anon_str = anon.astype(str)\n",
-    "anon_encoded = OneHotEncoder(sparse=False, drop='if_binary').fit_transform(anon_str)\n",
+    "anon_encoded = OneHotEncoder(sparse=False).fit_transform(anon_str)\n",
    "\n",
    "anon_model = DecisionTreeClassifier()\n",
    "anon_model.fit(anon_encoded, y_train)\n",
@ -698,14 +332,14 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 98,
+   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "0.6471155701331275\n"
+      "1.0\n"
     ]
    }
   ],
@ -734,14 +368,14 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 69,
+   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "0.6982442600810341\n"
+      "0.5245996527107852\n"
     ]
    }
   ],
@ -765,15 +399,15 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 87,
+   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "(0.33056202194878614, 0.2888695146759663)\n",
-      "(0.34112301200908796, 0.3054344667247893)\n"
+      "(0.49415432579890883, 0.48976438779451525)\n",
+      "(0.49415432579890883, 0.48976438779451525)\n"
     ]
    }
   ],
@ -810,15 +444,15 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 88,
+   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "(0.6457357075913777, 0.2002324905550712)\n",
-      "(0.6472248353715898, 0.1999418773612322)\n"
+      "(1.0, 0.019204655674102813)\n",
+      "(0.9829787234042553, 0.04481086323957323)\n"
     ]
    }
   ],
@ -849,26 +483,24 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 74,
+   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
-    "anonymizer2 = Anonymize(1000, QI, categorical_features=categorical_features)\n",
-    "anon2 = anonymizer2.anonymize(x_train, x_train_predictions)"
+    "anonymizer2 = Anonymize(1000, QI_indexes, categorical_features=categorical_features_indexes)\n",
+    "anon2 = anonymizer2.anonymize(ArrayDataset(x_train, x_train_predictions))"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 75,
+   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
-      "text/plain": [
-       "4226"
-      ]
+      "text/plain": "4226"
     },
-     "execution_count": 75,
+     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
@ -887,7 +519,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 104,
+   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
@ -900,7 +532,7 @@
   ],
   "source": [
    "anon2_str = anon2.astype(str)\n",
-    "anon2_encoded = OneHotEncoder(sparse=False, drop='if_binary').fit_transform(anon2_str)\n",
+    "anon2_encoded = OneHotEncoder(sparse=False).fit_transform(anon2_str)\n",
    "\n",
    "anon2_model = DecisionTreeClassifier()\n",
    "anon2_model.fit(anon2_encoded, y_train)\n",
@ -920,14 +552,14 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 105,
+   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "0.6266640941539648\n"
+      "1.0\n"
     ]
    }
   ],
@ -956,14 +588,14 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 106,
+   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "0.6944819602546788\n"
+      "0.515820953115956\n"
     ]
    }
   ],
@ -980,17 +612,17 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 107,
+   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "(0.35793357933579334, 0.17037470725995316)\n",
-      "(0.3360655737704918, 0.1680327868852459)\n",
-      "(0.6457357075913777, 0.2002324905550712)\n",
-      "(0.6327519379844961, 0.1897704155768672)\n"
+      "(0.49415432579890883, 0.48976438779451525)\n",
+      "(0.49415432579890883, 0.48976438779451525)\n",
+      "(1.0, 0.019204655674102813)\n",
+      "(1.0, 0.026382153249272552)\n"
     ]
    }
   ],
@ -1023,27 +655,26 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 111,
+   "execution_count": 20,
   "metadata": {},
   "outputs": [],
   "source": [
    "QI2 = [\"parents\", \"has_nurs\", \"form\", \"children\", \"housing\", \"finance\", \"social\", \"health\"]\n",
-    "anonymizer3 = Anonymize(100, QI2, categorical_features=categorical_features)\n",
-    "anon3 = anonymizer3.anonymize(x_train, x_train_predictions)"
+    "QI2_indexes = [i for i, v in enumerate(features) if v in QI2]\n",
+    "anonymizer3 = Anonymize(100, QI2_indexes, categorical_features=categorical_features_indexes)\n",
+    "anon3 = anonymizer3.anonymize(ArrayDataset(x_train, x_train_predictions))"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 112,
+   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "data": {
-      "text/plain": [
-       "39"
-      ]
+      "text/plain": "39"
     },
-     "execution_count": 112,
+     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
@ -1055,22 +686,22 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 113,
+   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "Anonymized model accuracy:  0.7723765432098766\n",
-      "BB attack accuracy:  0.5792012348060969\n",
-      "WB attack accuracy:  0.6680493922438742\n"
+      "Anonymized model accuracy:  0.751929012345679\n",
+      "BB attack accuracy:  1.0\n",
+      "WB attack accuracy:  0.5187150299054601\n"
     ]
    }
   ],
   "source": [
    "anon3_str = anon3.astype(str)\n",
-    "anon3_encoded = OneHotEncoder(sparse=False, drop='if_binary').fit_transform(anon3_str)\n",
+    "anon3_encoded = OneHotEncoder(sparse=False).fit_transform(anon3_str)\n",
    "\n",
    "anon3_model = DecisionTreeClassifier()\n",
    "anon3_model.fit(anon3_encoded, y_train)\n",
@ -1105,17 +736,17 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 114,
+   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "(0.35793357933579334, 0.17037470725995316)\n",
-      "(0.3393939393939394, 0.13114754098360656)\n",
-      "(0.6457357075913777, 0.2002324905550712)\n",
-      "(1, 0.0)\n"
+      "(0.49415432579890883, 0.48976438779451525)\n",
+      "(0.49415432579890883, 0.48976438779451525)\n",
+      "(1.0, 0.019204655674102813)\n",
+      "(1.0, 0.032201745877788554)\n"
     ]
    }
   ],
@ -1162,4 +793,4 @@
 },
 "nbformat": 4,
 "nbformat_minor": 2
-}
+}
--- a/notebooks/membership_inference_anonymization_adult.ipynb
+++ b/notebooks/membership_inference_anonymization_adult.ipynb
@ -29,7 +29,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 97,
+   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
@ -44,6 +44,18 @@
      " [  26.   11.    0.    0.   48.]\n",
      " [  27.    9.    0.    0.   40.]]\n"
     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/var/folders/9b/qbtw28w53355cvpjs4qn83yc0000gn/T/ipykernel_85828/3975777015.py:22: DeprecationWarning: `np.int` is a deprecated alias for the builtin `int`. To silence this warning, use `int` by itself. Doing this will not modify any behavior and is safe. When replacing `np.int`, you may wish to use e.g. `np.int64` or `np.int32` to specify the precision. If you wish to review your current use, check the release note link for additional information.\n",
+      "Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations\n",
+      "  y_train = y_train.astype(np.int)\n",
+      "/var/folders/9b/qbtw28w53355cvpjs4qn83yc0000gn/T/ipykernel_85828/3975777015.py:26: DeprecationWarning: `np.int` is a deprecated alias for the builtin `int`. To silence this warning, use `int` by itself. Doing this will not modify any behavior and is safe. When replacing `np.int`, you may wish to use e.g. `np.int64` or `np.int32` to specify the precision. If you wish to review your current use, check the release note link for additional information.\n",
+      "Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations\n",
+      "  y_test = y_test.astype(np.int)\n"
+     ]
    }
   ],
   "source": [
@ -90,14 +102,14 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 116,
+   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "Base model accuracy:  0.8075056814691972\n"
+      "Base model accuracy:  0.8074442601805786\n"
     ]
    }
   ],
@ -126,9 +138,18 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 124,
+   "execution_count": 8,
   "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/Users/olasaadi/PycharmProjects/ai-privacy-toolkit-internal/venv/lib/python3.8/site-packages/art/attacks/inference/membership_inference/black_box.py:262: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n",
+      "  self.attack_model.fit(np.c_[x_1, x_2], y_ready)  # type: ignore\n"
+     ]
+    }
+   ],
   "source": [
    "from art.attacks.inference.membership_inference import MembershipInferenceBlackBox\n",
    "\n",
@ -154,14 +175,14 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 125,
+   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "0.5440363591696352\n"
+      "0.545264709495148\n"
     ]
    }
   ],
@ -197,7 +218,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 128,
+   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
@ -215,6 +236,7 @@
    }
   ],
   "source": [
+    "from apt.utils.datasets import ArrayDataset\n",
    "import os\n",
    "import sys\n",
    "sys.path.insert(0, os.path.abspath('..'))\n",
@ -223,22 +245,20 @@
    "# QI = (age, education-num, capital-gain, hours-per-week)\n",
    "QI = [0, 1, 2, 4]\n",
    "anonymizer = Anonymize(100, QI)\n",
-    "anon = anonymizer.anonymize(x_train, x_train_predictions)\n",
+    "anon = anonymizer.anonymize(ArrayDataset(x_train, x_train_predictions))\n",
    "print(anon)"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 104,
+   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
-      "text/plain": [
-       "6739"
-      ]
+      "text/plain": "6739"
     },
-     "execution_count": 104,
+     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
@ -250,16 +270,14 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 129,
+   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
-      "text/plain": [
-       "658"
-      ]
+      "text/plain": "658"
     },
-     "execution_count": 129,
+     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
@ -278,14 +296,14 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 130,
+   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "Anonymized model accuracy:  0.8304158221239482\n"
+      "Anonymized model accuracy:  0.83078434985566\n"
     ]
    }
   ],
@ -308,14 +326,22 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 131,
+   "execution_count": 14,
   "metadata": {},
   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/Users/olasaadi/PycharmProjects/ai-privacy-toolkit-internal/venv/lib/python3.8/site-packages/art/attacks/inference/membership_inference/black_box.py:262: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n",
+      "  self.attack_model.fit(np.c_[x_1, x_2], y_ready)  # type: ignore\n"
+     ]
+    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "0.5034393809114359\n"
+      "0.5047291487532244\n"
     ]
    }
   ],
@ -345,15 +371,15 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 132,
+   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "(0.5298924372550654, 0.7806166318634075)\n",
-      "(0.5030507735890172, 0.5671293452892765)\n"
+      "(0.5312420517168291, 0.7696843139663432)\n",
+      "(0.5048372911169745, 0.4935511607910576)\n"
     ]
    }
   ],
@ -419,4 +445,4 @@
 },
 "nbformat": 4,
 "nbformat_minor": 2
-}
+}
--- a/notebooks/membership_inference_dp_diabetes_reg.ipynb
+++ b/notebooks/membership_inference_dp_diabetes_reg.ipynb
@ -29,7 +29,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 121,
+   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
@ -50,7 +50,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 122,
+   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
@ -86,14 +86,14 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 123,
+   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "0.4954954954954955\n"
+      "0.527027027027027\n"
     ]
    }
   ],
@ -131,7 +131,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 124,
+   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
@ -141,6 +141,22 @@
      "unique rows in original data:  221\n"
     ]
    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/Users/olasaadi/PycharmProjects/ai-privacy-toolkit-internal/venv/lib/python3.8/site-packages/art/attacks/inference/membership_inference/black_box.py:262: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n",
+      "  self.attack_model.fit(np.c_[x_1, x_2], y_ready)  # type: ignore\n",
+      "/Users/olasaadi/PycharmProjects/ai-privacy-toolkit-internal/venv/lib/python3.8/site-packages/art/attacks/inference/membership_inference/black_box.py:262: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n",
+      "  self.attack_model.fit(np.c_[x_1, x_2], y_ready)  # type: ignore\n",
+      "/Users/olasaadi/PycharmProjects/ai-privacy-toolkit-internal/venv/lib/python3.8/site-packages/art/attacks/inference/membership_inference/black_box.py:262: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n",
+      "  self.attack_model.fit(np.c_[x_1, x_2], y_ready)  # type: ignore\n",
+      "/Users/olasaadi/PycharmProjects/ai-privacy-toolkit-internal/venv/lib/python3.8/site-packages/art/attacks/inference/membership_inference/black_box.py:262: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n",
+      "  self.attack_model.fit(np.c_[x_1, x_2], y_ready)  # type: ignore\n",
+      "/Users/olasaadi/PycharmProjects/ai-privacy-toolkit-internal/venv/lib/python3.8/site-packages/art/attacks/inference/membership_inference/black_box.py:262: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n",
+      "  self.attack_model.fit(np.c_[x_1, x_2], y_ready)  # type: ignore\n"
+     ]
+    },
    {
     "name": "stdout",
     "output_type": "stream",
@ -148,11 +164,12 @@
      "k values:  [5, 10, 20, 50, 75]\n",
      "unique rows: [34, 19, 8, 4, 2]\n",
      "model accuracy: [0.43165832354998956, 0.4509641063206041, -1.730181929385853, -5.577098823982753e+27, -1.2751609045828272e+25]\n",
-      "attack accuracy: [0.5, 0.47297297297297297, 0.49549549549549543, 0.5, 0.47297297297297297]\n"
+      "attack accuracy: [0.509009009009009, 0.481981981981982, 0.509009009009009, 0.5045045045045045, 0.4954954954954955]\n"
     ]
    }
   ],
   "source": [
+    "from apt.utils.datasets import ArrayDataset\n",
    "from apt.anonymization import Anonymize\n",
    "k_values=[5, 10, 20, 50, 75]\n",
    "model_accuracy = []\n",
@ -165,7 +182,7 @@
    "\n",
    "for k in k_values:\n",
    "    anonymizer = Anonymize(k, QI, is_regression=True)\n",
-    "    anon = anonymizer.anonymize(X_train, x_train_predictions)\n",
+    "    anon = anonymizer.anonymize(ArrayDataset(X_train, x_train_predictions))\n",
    "    unique_values.append(len(np.unique(anon, axis=0)))\n",
    "    \n",
    "    anon_model = LinearRegression()\n",
@ -198,7 +215,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 124,
+   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": []
--- a/notebooks/minimization_adult.ipynb
+++ b/notebooks/minimization_adult.ipynb
@ -27,7 +27,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
@ -42,6 +42,18 @@
      " [2.2000e+01 9.0000e+00 0.0000e+00 0.0000e+00 2.0000e+01]\n",
      " [5.2000e+01 9.0000e+00 1.5024e+04 0.0000e+00 4.0000e+01]]\n"
     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/var/folders/9b/qbtw28w53355cvpjs4qn83yc0000gn/T/ipykernel_13726/1357868359.py:22: DeprecationWarning: `np.int` is a deprecated alias for the builtin `int`. To silence this warning, use `int` by itself. Doing this will not modify any behavior and is safe. When replacing `np.int`, you may wish to use e.g. `np.int64` or `np.int32` to specify the precision. If you wish to review your current use, check the release note link for additional information.\n",
+      "Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations\n",
+      "  y_train = y_train.astype(np.int)\n",
+      "/var/folders/9b/qbtw28w53355cvpjs4qn83yc0000gn/T/ipykernel_13726/1357868359.py:26: DeprecationWarning: `np.int` is a deprecated alias for the builtin `int`. To silence this warning, use `int` by itself. Doing this will not modify any behavior and is safe. When replacing `np.int`, you may wish to use e.g. `np.int64` or `np.int32` to specify the precision. If you wish to review your current use, check the release note link for additional information.\n",
+      "Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations\n",
+      "  y_test = y_test.astype(np.int)\n"
+     ]
    }
   ],
   "source": [
@ -84,24 +96,27 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "Base model accuracy:  0.8189914624408821\n"
+      "Base model accuracy:  0.8183158282660771\n"
     ]
    }
   ],
   "source": [
+    "from apt.utils.datasets import ArrayDataset\n",
+    "from apt.utils.models import SklearnClassifier, ModelOutputType\n",
    "from sklearn.tree import DecisionTreeClassifier\n",
    "\n",
-    "model = DecisionTreeClassifier()\n",
-    "model.fit(x_train, y_train)\n",
+    "base_est = DecisionTreeClassifier()\n",
+    "model = SklearnClassifier(base_est, ModelOutputType.CLASSIFIER_VECTOR)\n",
+    "model.fit(ArrayDataset(x_train, y_train))\n",
    "\n",
-    "print('Base model accuracy: ', model.score(x_test, y_test))"
+    "print('Base model accuracy: ', model.score(ArrayDataset(x_test, y_test)))"
   ]
  },
  {
@ -114,26 +129,26 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "Initial accuracy of model on generalized data, relative to original model predictions (base generalization derived from tree, before improvements): 0.929376\n",
+      "Initial accuracy of model on generalized data, relative to original model predictions (base generalization derived from tree, before improvements): 0.936540\n",
      "Improving accuracy\n",
-      "feature to remove: 0\n",
-      "Removed feature: 0, new relative accuracy: 0.939867\n",
-      "feature to remove: 4\n",
-      "Removed feature: 4, new relative accuracy: 0.967247\n",
      "feature to remove: 2\n",
-      "Removed feature: 2, new relative accuracy: 0.972620\n",
+      "Removed feature: 2, new relative accuracy: 0.935261\n",
+      "feature to remove: 4\n",
+      "Removed feature: 4, new relative accuracy: 0.946776\n",
+      "feature to remove: 0\n",
+      "Removed feature: 0, new relative accuracy: 0.972876\n",
      "feature to remove: 1\n",
-      "Removed feature: 1, new relative accuracy: 0.992323\n",
+      "Removed feature: 1, new relative accuracy: 0.992835\n",
      "feature to remove: 3\n",
      "Removed feature: 3, new relative accuracy: 1.000000\n",
-      "Accuracy on minimized data:  0.8237371411024106\n"
+      "Accuracy on minimized data:  0.8231229847996315\n"
     ]
    }
   ],
@ -155,10 +170,12 @@
    "X_generalizer_train, x_test, y_generalizer_train, y_test = train_test_split(x_test, y_test, stratify=y_test,\n",
    "                                                                test_size = 0.4, random_state = 38)\n",
    "x_train_predictions = model.predict(X_generalizer_train)\n",
-    "minimizer.fit(X_generalizer_train, x_train_predictions)\n",
-    "transformed = minimizer.transform(x_test)\n",
+    "if x_train_predictions.shape[1] > 1:\n",
+    "    x_train_predictions = np.argmax(x_train_predictions, axis=1)\n",
+    "minimizer.fit(dataset=ArrayDataset(X_generalizer_train, x_train_predictions))\n",
+    "transformed = minimizer.transform(dataset=ArrayDataset(x_test))\n",
    "\n",
-    "print('Accuracy on minimized data: ', model.score(transformed, y_test))"
+    "print('Accuracy on minimized data: ', model.score(ArrayDataset(transformed, y_test)))"
   ]
  },
  {
@ -170,14 +187,14 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "{'ranges': {}, 'untouched': [0, 1, 2, 3, 4]}\n"
+      "{'ranges': {}, 'categories': {}, 'untouched': ['4', '1', '3', '0', '2']}\n"
     ]
    }
   ],
@ -197,25 +214,25 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "Initial accuracy of model on generalized data, relative to original model predictions (base generalization derived from tree, before improvements): 0.929376\n",
+      "Initial accuracy of model on generalized data, relative to original model predictions (base generalization derived from tree, before improvements): 0.936540\n",
      "Improving accuracy\n",
-      "feature to remove: 0\n",
-      "Removed feature: 0, new relative accuracy: 0.939867\n",
-      "feature to remove: 4\n",
-      "Removed feature: 4, new relative accuracy: 0.967247\n",
      "feature to remove: 2\n",
-      "Removed feature: 2, new relative accuracy: 0.972620\n",
+      "Removed feature: 2, new relative accuracy: 0.935261\n",
+      "feature to remove: 4\n",
+      "Removed feature: 4, new relative accuracy: 0.946776\n",
+      "feature to remove: 0\n",
+      "Removed feature: 0, new relative accuracy: 0.972876\n",
      "feature to remove: 1\n",
-      "Removed feature: 1, new relative accuracy: 0.992323\n",
-      "Accuracy on minimized data:  0.820205742361431\n",
-      "{'ranges': {3: [546.0, 704.0, 705.5, 742.5, 782.0, 834.0, 870.0, 1446.5, 1538.5, 1612.5, 1699.0, 1744.0, 1801.0, 1814.0, 1846.0, 1881.5, 1978.5, 2248.0, 2298.5, 2537.5]}, 'untouched': [0, 1, 2, 4]}\n"
+      "Removed feature: 1, new relative accuracy: 0.992835\n",
+      "Accuracy on minimized data:  0.8192845079072624\n",
+      "{'ranges': {'3': [569.0, 782.0, 870.0, 870.5, 938.0, 1016.5, 1311.5, 1457.0, 1494.5, 1596.0, 1629.5, 1684.0, 1805.0, 1859.0, 1867.5, 1881.5, 1938.0, 1978.5, 2119.0, 2210.0, 2218.0, 2244.5, 2298.5, 2443.5]}, 'categories': {}, 'untouched': ['2', '1', '0', '4']}\n"
     ]
    }
   ],
@ -223,9 +240,9 @@
    "# We allow a 1% deviation in accuracy from the original model accuracy\n",
    "minimizer2 = GeneralizeToRepresentative(model, target_accuracy=0.99)\n",
    "\n",
-    "minimizer2.fit(X_generalizer_train, x_train_predictions)\n",
-    "transformed2 = minimizer2.transform(x_test)\n",
-    "print('Accuracy on minimized data: ', model.score(transformed2, y_test))\n",
+    "minimizer2.fit(dataset=ArrayDataset(X_generalizer_train, x_train_predictions))\n",
+    "transformed2 = minimizer2.transform(dataset=ArrayDataset(x_test))\n",
+    "print('Accuracy on minimized data: ', model.score(test_data=ArrayDataset(transformed2, y_test)))\n",
    "generalizations2 = minimizer2.generalizations\n",
    "print(generalizations2)"
   ]
@ -259,4 +276,4 @@
 },
 "nbformat": 4,
 "nbformat_minor": 2
-}
+}