diff --git a/apt/anonymization/anonymizer.py b/apt/anonymization/anonymizer.py
index 9f82c7c..02854f5 100644
--- a/apt/anonymization/anonymizer.py
+++ b/apt/anonymization/anonymizer.py
@@ -8,6 +8,7 @@ from sklearn.impute import SimpleImputer
 from sklearn.pipeline import Pipeline
 from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
 from sklearn.preprocessing import OneHotEncoder
+from apt.utils.datasets import ArrayDataset, DATA_PANDAS_NUMPY_TYPE
 
 from typing import Union, Optional
 
@@ -49,61 +50,64 @@ class Anonymize:
         self.categorical_features = categorical_features
         self.is_regression = is_regression
         self.train_only_QI = train_only_QI
+        self.features_names = None
+        self.features = None
 
-    def anonymize(self, x: Union[np.ndarray, pd.DataFrame], y: Union[np.ndarray, pd.DataFrame]) \
-            -> Union[np.ndarray, pd.DataFrame]:
+    def anonymize(self, dataset: ArrayDataset) -> DATA_PANDAS_NUMPY_TYPE:
         """
         Method for performing model-guided anonymization.
 
-        :param x: The training data for the model. If provided as a pandas dataframe, may contain both numeric and
-                  categorical data.
-        :param y: The predictions of the original model on the training data.
+        :param dataset: Data wrapper containing the training data for the model and the predictions of the
+                        original model on the training data.
         :return: An array containing the anonymized training dataset.
         """
-        if type(x) == np.ndarray:
-            self.features = [i for i in range(x.shape[1])]
-            return self._anonymize_ndarray(x.copy(), y)
-        else:  # pandas
-            self.features = x.columns
-            if not self.categorical_features:
-                raise ValueError('When supplying a pandas dataframe, categorical_features must be defined')
-            return self._anonymize_pandas(x.copy(), y)
+        if dataset.get_samples().shape[1] != 0:
+            self.features = [i for i in range(dataset.get_samples().shape[1])]
+        else:
+            raise ValueError('No data provided')
 
-    def _anonymize_ndarray(self, x, y):
+        if dataset.features_names is not None:
+            self.features_names = dataset.features_names
+        else: # if no names provided, use numbers instead
+            self.features_names = self.features
+
+        if not set(self.quasi_identifiers).issubset(set(self.features_names)):
+            raise ValueError('Quasi identifiers should bs a subset of the supplied features or indexes in range of '
+                             'the data columns')
+        if self.categorical_features and not set(self.categorical_features).issubset(set(self.features_names)):
+            raise ValueError('Categorical features should bs a subset of the supplied features or indexes in range of '
+                             'the data columns')
+        self.quasi_identifiers = [i for i, v in enumerate(self.features_names) if v in self.quasi_identifiers]
+        if self.categorical_features:
+            self.categorical_features = [i for i, v in enumerate(self.features_names) if v in self.categorical_features]
+
+        transformed = self._anonymize(dataset.get_samples().copy(), dataset.get_labels())
+        if dataset.is_pandas:
+            return pd.DataFrame(transformed, columns=self.features_names)
+        else:
+            return transformed
+
+    def _anonymize(self, x, y):
         if x.shape[0] != y.shape[0]:
             raise ValueError("x and y should have same number of rows")
-        x_anonymizer_train = x
-        if self.train_only_QI:
-            # build DT just on QI features
-            x_anonymizer_train = x[:, self.quasi_identifiers]
         if x.dtype.kind not in 'iufc':
-            x_prepared = self._modify_categorical_features(x_anonymizer_train)
+            if not self.categorical_features:
+                raise ValueError('when supplying an array with non-numeric data, categorical_features must be defined')
+            x_prepared = self._modify_categorical_features(x)
         else:
-            x_prepared = x_anonymizer_train
-        if self.is_regression:
-            self.anonymizer = DecisionTreeRegressor(random_state=10, min_samples_split=2, min_samples_leaf=self.k)
-        else:
-            self.anonymizer = DecisionTreeClassifier(random_state=10, min_samples_split=2, min_samples_leaf=self.k)
-        self.anonymizer.fit(x_prepared, y)
-        cells_by_id = self._calculate_cells(x, x_prepared)
-        return self._anonymize_data_numpy(x, x_prepared, cells_by_id)
-
-    def _anonymize_pandas(self, x, y):
-        if x.shape[0] != y.shape[0]:
-            raise ValueError("x and y should have same number of rows")
-        x_anonymizer_train = x
+            x_prepared = x
+        x_anonymizer_train = x_prepared
         if self.train_only_QI:
             # build DT just on QI features
-            x_anonymizer_train = x.loc[:, self.quasi_identifiers]
-        # need to one-hot encode before training the decision tree
-        x_prepared = self._modify_categorical_features(x_anonymizer_train)
+            x_anonymizer_train = x_prepared[:, self.quasi_identifiers]
         if self.is_regression:
             self.anonymizer = DecisionTreeRegressor(random_state=10, min_samples_split=2, min_samples_leaf=self.k)
         else:
             self.anonymizer = DecisionTreeClassifier(random_state=10, min_samples_split=2, min_samples_leaf=self.k)
-        self.anonymizer.fit(x_prepared, y)
-        cells_by_id = self._calculate_cells(x, x_prepared)
-        return self._anonymize_data_pandas(x, x_prepared, cells_by_id)
+
+        self.anonymizer.fit(x_anonymizer_train, y)
+        cells_by_id = self._calculate_cells(x, x_anonymizer_train)
+        return self._anonymize_data(x, x_anonymizer_train, cells_by_id)
 
     def _calculate_cells(self, x, x_anonymizer_train):
         # x is original data, x_anonymizer_train is only QIs + 1-hot encoded
@@ -130,15 +134,9 @@ class Anonymize:
             # get all rows in cell
             indexes = [index for index, node_id in enumerate(node_ids) if node_id == cell['id']]
             # TODO: should we filter only those with majority label? (using hist)
-            if type(x) == np.ndarray:
-                rows = x[indexes]
-            else:  # pandas
-                rows = x.iloc[indexes]
+            rows = x[indexes]
             for feature in self.quasi_identifiers:
-                if type(x) == np.ndarray:
-                    values = rows[:, feature]
-                else:  # pandas
-                    values = rows.loc[:, feature]
+                values = rows[:, feature]
                 if self.categorical_features and feature in self.categorical_features:
                     # find most common value
                     cell['representative'][feature] = Counter(values).most_common(1)[0][0]
@@ -163,7 +161,7 @@ class Anonymize:
         node_ids = self._find_sample_nodes(samples)
         return [cells_by_id[node_id] for node_id in node_ids]
 
-    def _anonymize_data_numpy(self, x, x_anonymizer_train, cells_by_id):
+    def _anonymize_data(self, x, x_anonymizer_train, cells_by_id):
         cells = self._find_sample_cells(x_anonymizer_train, cells_by_id)
         index = 0
         for row in x:
@@ -173,22 +171,12 @@ class Anonymize:
                 row[feature] = cell['representative'][feature]
         return x
 
-    def _anonymize_data_pandas(self, x, x_anonymizer_train, cells_by_id):
-        cells = self._find_sample_cells(x_anonymizer_train, cells_by_id)
-        index = 0
-        for i, row in x.iterrows():
-            cell = cells[index]
-            index += 1
-            for feature in cell['representative']:
-                x.at[i, feature] = cell['representative'][feature]
-        return x
-
     def _modify_categorical_features(self, x):
         # prepare data for DT
         used_features = self.features
         if self.train_only_QI:
             used_features = self.quasi_identifiers
-        numeric_features = [f for f in x.columns if f in used_features and f not in self.categorical_features]
+        numeric_features = [f for f in self.features if f in used_features and f not in self.categorical_features]
         categorical_features = [f for f in self.categorical_features if f in used_features]
         numeric_transformer = Pipeline(
             steps=[('imputer', SimpleImputer(strategy='constant', fill_value=0))]
diff --git a/apt/minimization/minimizer.py b/apt/minimization/minimizer.py
index d04cc03..27b6b6e 100644
--- a/apt/minimization/minimizer.py
+++ b/apt/minimization/minimizer.py
@@ -1,7 +1,7 @@
 """
 This module implements all classes needed to perform data minimization
 """
-from typing import Union
+from typing import Union, Optional
 import pandas as pd
 import numpy as np
 import copy
@@ -16,6 +16,9 @@ from sklearn.utils.validation import check_X_y, check_array, check_is_fitted
 from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
 from sklearn.model_selection import train_test_split
 
+from apt.utils.datasets import ArrayDataset, Data, DATA_PANDAS_NUMPY_TYPE
+from apt.utils.models import Model, SklearnRegressor, ModelOutputType, SklearnClassifier
+
 
 class GeneralizeToRepresentative(BaseEstimator, MetaEstimatorMixin, TransformerMixin):
     """ A transformer that generalizes data to representative points.
@@ -24,16 +27,13 @@ class GeneralizeToRepresentative(BaseEstimator, MetaEstimatorMixin, TransformerM
     and a target accuracy. Once the generalizations are learned, can
     receive one or more data records and transform them to representative
     points based on the learned generalization.
-
-    An alternative way to use the transformer is to supply ``cells`` and
-    ``features`` in init or set_params and those will be used to transform
+    An alternative way to use the transformer is to supply ``cells`` in
+    init or set_params and those will be used to transform
     data to representatives. In this case, fit must still be called but
     there is no need to supply it with ``X`` and ``y``, and there is no
     need to supply an existing ``estimator`` to init.
-
     In summary, either ``estimator`` and ``target_accuracy`` should be
-    supplied or ``cells`` and ``features`` should be supplied.
-
+    supplied or ``cells`` should be supplied.
     Parameters
     ----------
     estimator : estimator, optional
@@ -43,8 +43,6 @@ class GeneralizeToRepresentative(BaseEstimator, MetaEstimatorMixin, TransformerM
         The required accuracy when applying the base model to the
         generalized data. Accuracy is measured relative to the original
         accuracy of the model.
-    features : list of str, optional
-        The feature names, in the order that they appear in the data.
     categorical_features: list of str, optional
         The list of categorical features should only be supplied when
          passing data as a pandas dataframe.
@@ -67,28 +65,29 @@ class GeneralizeToRepresentative(BaseEstimator, MetaEstimatorMixin, TransformerM
 
     Attributes
     ----------
+    features_ : list of str
+        The feature names, in the order that they appear in the data.
     cells_ : list of object
         The cells used to generalize records, as learned when calling fit.
-
     ncp_ : float
         The NCP (information loss) score of the resulting generalization,
         as measured on the training data.
-
     generalizations_ : object
         The generalizations that were learned (actual feature ranges).
-
-    Notes
-    -----
-
-
     """
 
-    def __init__(self, estimator=None, target_accuracy=0.998, features=None,
-                 cells=None, categorical_features=None, features_to_minimize: Union[np.ndarray, list] = None
-                 , train_only_QI=True, is_regression=False):
-        self.estimator = estimator
+    def __init__(self, estimator: Union[BaseEstimator, Model] = None, target_accuracy: float = 0.998,
+                 cells: list = None, categorical_features: Union[np.ndarray, list] = None,
+                 features_to_minimize: Union[np.ndarray, list] = None, train_only_QI: bool = True,
+                 is_regression: bool = False):
+        if issubclass(estimator.__class__, Model):
+            self.estimator = estimator
+        else:
+            if is_regression:
+                self.estimator = SklearnRegressor(estimator)
+            else:
+                self.estimator = SklearnClassifier(estimator, ModelOutputType.CLASSIFIER_VECTOR)
         self.target_accuracy = target_accuracy
-        self.features = features
         self.cells = cells
         self.categorical_features = []
         if categorical_features:
@@ -114,11 +113,9 @@ class GeneralizeToRepresentative(BaseEstimator, MetaEstimatorMixin, TransformerM
         ret = {}
         ret['target_accuracy'] = self.target_accuracy
         if deep:
-            ret['features'] = copy.deepcopy(self.features)
             ret['cells'] = copy.deepcopy(self.cells)
             ret['estimator'] = self.estimator
         else:
-            ret['features'] = copy.copy(self.features)
             ret['cells'] = copy.copy(self.cells)
         return ret
 
@@ -132,8 +129,6 @@ class GeneralizeToRepresentative(BaseEstimator, MetaEstimatorMixin, TransformerM
         """
         if 'target_accuracy' in params:
             self.target_accuracy = params['target_accuracy']
-        if 'features' in params:
-            self.features = params['features']
         if 'cells' in params:
             self.cells = params['cells']
         return self
@@ -142,7 +137,8 @@ class GeneralizeToRepresentative(BaseEstimator, MetaEstimatorMixin, TransformerM
     def generalizations(self):
         return self.generalizations_
 
-    def fit_transform(self, X: Union[np.ndarray, pd.DataFrame] = None, y: Union[np.ndarray, pd.DataFrame] = None):
+    def fit_transform(self, X: Optional[DATA_PANDAS_NUMPY_TYPE] = None, y: Optional[DATA_PANDAS_NUMPY_TYPE] = None,
+                      features_names: Optional = None, dataset: Optional[ArrayDataset] = None):
         """Learns the generalizations based on training data, and applies them to the data.
 
         Parameters
@@ -152,17 +148,22 @@ class GeneralizeToRepresentative(BaseEstimator, MetaEstimatorMixin, TransformerM
         y : array-like, shape (n_samples,), optional
             The target values. An array of int.
             This should contain the predictions of the original model on ``X``.
-
+        features_names : list of str, The feature names, in the order that they appear in the data,
+                        provided just if X and y were provided (optional).
+        dataset : Data wrapper containing the training input samples and the predictions of the
+                  original model on the training data.
+        Either X,y OR dataset need to be provided, not both.
         Returns
         -------
         X_transformed : numpy or pandas according to the input type, shape (n_samples, n_features)
             The array containing the representative values to which each record in
             ``X`` is mapped.
         """
-        self.fit(X, y)
-        return self.transform(X)
+        self.fit(X, y, features_names, dataset=dataset)
+        return self.transform(X, features_names, dataset=dataset)
 
-    def fit(self, X: Union[np.ndarray, pd.DataFrame] = None, y: Union[np.ndarray, pd.DataFrame] = None):
+    def fit(self, X: Optional[DATA_PANDAS_NUMPY_TYPE] = None, y: Optional[DATA_PANDAS_NUMPY_TYPE] = None,
+            features_names: Optional = None, dataset: ArrayDataset = None):
         """Learns the generalizations based on training data.
 
         Parameters
@@ -172,7 +173,11 @@ class GeneralizeToRepresentative(BaseEstimator, MetaEstimatorMixin, TransformerM
         y : array-like, shape (n_samples,), optional
             The target values. An array of int.
             This should contain the predictions of the original model on ``X``.
-
+        features_names : list of str, The feature names, in the order that they appear in the data,
+                        provided just if X and y were provided (optional).
+        dataset : Data wrapper containing the training input samples and the predictions of the
+                  original model on the training data.
+        Either X,y OR dataset need to be provided, not both.
         Returns
         -------
         X_transformed : numpy or pandas according to the input type, shape (n_samples, n_features)
@@ -181,26 +186,25 @@ class GeneralizeToRepresentative(BaseEstimator, MetaEstimatorMixin, TransformerM
         """
 
         # take into account that estimator, X, y, cells, features may be None
-        if X is not None:
-            if type(X) == np.ndarray:
-                self.is_numpy = True
-            else:
-                self.is_numpy = False
-
         if X is not None and y is not None:
-            if self.is_numpy:
-                X, y = check_X_y(X, y, accept_sparse=True)
-            self.n_features_ = X.shape[1]
-        elif self.features:
-            self.n_features_ = len(self.features)
+            if dataset is not None:
+                raise ValueError('Either X,y OR dataset need to be provided, not both')
+            else:
+                dataset = ArrayDataset(X, y, features_names)
+
+        if dataset and dataset.get_samples() is not None and dataset.get_labels() is not None:
+            self.n_features_ = dataset.get_samples().shape[1]
+
+        elif dataset and dataset.features_names:
+            self.n_features_ = len(dataset.features_names)
         else:
             self.n_features_ = 0
 
-        if self.features:
-            self._features = self.features
+        if dataset and dataset.features_names:
+            self._features = dataset.features_names
         # if features is None, use numbers instead of names
         elif self.n_features_ != 0:
-            self._features = [i for i in range(self.n_features_)]
+            self._features = [str(i) for i in range(self.n_features_)]
         else:
             self._features = None
 
@@ -212,27 +216,24 @@ class GeneralizeToRepresentative(BaseEstimator, MetaEstimatorMixin, TransformerM
 
         # Going to fit
         # (currently not dealing with option to fit with only X and y and no estimator)
-        if self.estimator and X is not None and y is not None:
+        if self.estimator and dataset and dataset.get_samples() is not None and dataset.get_labels() is not None:
+            x = pd.DataFrame(dataset.get_samples(), columns=self._features)
+            if not self.features_to_minimize:
+                self.features_to_minimize = self._features
+            self.features_to_minimize = [str(i) for i in self.features_to_minimize]
+            if not all(elem in self._features for elem in self.features_to_minimize):
+                raise ValueError('features to minimize should be a subset of features names')
+            x_QI = x.loc[:, self.features_to_minimize]
 
-            if self.is_numpy:
-                if not self.features_to_minimize:
-                    self.features_to_minimize = [i for i in range(len(self._features))]
-                x_QI = X[:, self.features_to_minimize]
-                self.features_to_minimize = [self._features[i] for i in self.features_to_minimize]
-                X = pd.DataFrame(X, columns=self._features)
-            else:
-                if not self.features_to_minimize:
-                    self.features_to_minimize = self._features
-                x_QI = X.loc[:, self.features_to_minimize]
-            x_QI = pd.DataFrame(x_QI, columns=self.features_to_minimize)
             # divide dataset into train and test
-            used_data = X
+            used_data = x
             if self.train_only_QI:
                 used_data = x_QI
             if self.is_regression:
-                X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=14)
+                X_train, X_test, y_train, y_test = train_test_split(x, dataset.get_labels(), test_size=0.4, random_state=14)
             else:
-                X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, test_size=0.4, random_state=18)
+                X_train, X_test, y_train, y_test = train_test_split(x, dataset.get_labels(), stratify=dataset.get_labels(), test_size=0.4,
+                                                                    random_state=18)
 
             X_train_QI = X_train.loc[:, self.features_to_minimize]
             X_test_QI = X_test.loc[:, self.features_to_minimize]
@@ -246,7 +247,7 @@ class GeneralizeToRepresentative(BaseEstimator, MetaEstimatorMixin, TransformerM
             for feature in self._features:
                 if feature not in feature_data.keys():
                     fd = {}
-                    values = list(X.loc[:, feature])
+                    values = list(x.loc[:, feature])
                     if feature not in self.categorical_features:
                         fd['min'] = min(values)
                         fd['max'] = max(values)
@@ -259,7 +260,6 @@ class GeneralizeToRepresentative(BaseEstimator, MetaEstimatorMixin, TransformerM
             categorical_features = [f for f in self._features if f in self.categorical_features and
                                     f in self.features_to_minimize]
 
-
             numeric_transformer = Pipeline(
                 steps=[('imputer', SimpleImputer(strategy='constant', fill_value=0))]
             )
@@ -288,7 +288,7 @@ class GeneralizeToRepresentative(BaseEstimator, MetaEstimatorMixin, TransformerM
                     ("cat", categorical_transformer, self.categorical_features),
                 ]
             )
-            preprocessor.fit(X)
+            preprocessor.fit(x)
             x_prepared = preprocessor.transform(X_train)
             if self.train_only_QI:
                 x_prepared = preprocessor_QI_features.transform(X_train_QI)
@@ -300,7 +300,7 @@ class GeneralizeToRepresentative(BaseEstimator, MetaEstimatorMixin, TransformerM
                 self.dt_ = DecisionTreeRegressor(random_state=10, min_samples_split=2, min_samples_leaf=1)
             else:
                 self.dt_ = DecisionTreeClassifier(random_state=0, min_samples_split=2,
-                                              min_samples_leaf=1)
+                                                  min_samples_leaf=1)
             self.dt_.fit(x_prepared, y_train)
             self._modify_categorical_features(used_data)
 
@@ -329,7 +329,7 @@ class GeneralizeToRepresentative(BaseEstimator, MetaEstimatorMixin, TransformerM
             generalized = self._generalize(X_test, x_prepared_test, nodes, self.cells_, self.cells_by_id_)
 
             # check accuracy
-            accuracy = self.estimator.score(preprocessor.transform(generalized), y_test)
+            accuracy = self.estimator.score(ArrayDataset(preprocessor.transform(generalized), y_test))
             print('Initial accuracy of model on generalized data, relative to original model predictions '
                   '(base generalization derived from tree, before improvements): %f' % accuracy)
 
@@ -349,7 +349,7 @@ class GeneralizeToRepresentative(BaseEstimator, MetaEstimatorMixin, TransformerM
                         self._calculate_generalizations()
                         generalized = self._generalize(X_test, x_prepared_test, nodes, self.cells_,
                                                        self.cells_by_id_)
-                        accuracy = self.estimator.score(preprocessor.transform(generalized), y_test)
+                        accuracy = self.estimator.score(ArrayDataset(preprocessor.transform(generalized), y_test))
                         # if accuracy passed threshold roll back to previous iteration generalizations
                         if accuracy < self.target_accuracy:
                             self.cells_ = cells_previous_iter
@@ -375,7 +375,7 @@ class GeneralizeToRepresentative(BaseEstimator, MetaEstimatorMixin, TransformerM
 
                     self._calculate_generalizations()
                     generalized = self._generalize(X_test, x_prepared_test, nodes, self.cells_, self.cells_by_id_)
-                    accuracy = self.estimator.score(preprocessor.transform(generalized), y_test)
+                    accuracy = self.estimator.score(ArrayDataset(preprocessor.transform(generalized), y_test))
                     print('Removed feature: %s, new relative accuracy: %f' % (removed_feature, accuracy))
 
             # self.cells_ currently holds the chosen generalization based on target accuracy
@@ -386,7 +386,7 @@ class GeneralizeToRepresentative(BaseEstimator, MetaEstimatorMixin, TransformerM
         # Return the transformer
         return self
 
-    def transform(self, X: Union[np.ndarray, pd.DataFrame]):
+    def transform(self, X: Optional[DATA_PANDAS_NUMPY_TYPE] = None, features_names: Optional = None, dataset: ArrayDataset = None):
         """ Transforms data records to representative points.
 
         Parameters
@@ -394,6 +394,10 @@ class GeneralizeToRepresentative(BaseEstimator, MetaEstimatorMixin, TransformerM
         X : {array-like, sparse-matrix}, shape (n_samples, n_features), If provided as a pandas dataframe,
          may contain both numeric and categorical data.
             The input samples.
+        features_names : list of str, The feature names, in the order that they appear in the data,
+                        provided just if X was provided (optional).
+        dataset : Data wrapper containing the training input samples.
+        Either X OR dataset need to be provided, not both.
         Returns
         -------
         X_transformed : numpy or pandas according to the input type, shape (n_samples, n_features)
@@ -405,26 +409,30 @@ class GeneralizeToRepresentative(BaseEstimator, MetaEstimatorMixin, TransformerM
         msg = 'This %(name)s instance is not initialized yet. ' \
               'Call ‘fit’ or ‘set_params’ with ' \
               'appropriate arguments before using this method.'
-        check_is_fitted(self, ['cells', 'features'], msg=msg)
+        check_is_fitted(self, ['cells'], msg=msg)
 
-        if type(X) == np.ndarray:
-            # Input validation
-            X = check_array(X, accept_sparse=True)
-            self.is_numpy = True
-            X = pd.DataFrame(X, columns=self._features)
-        else:
-            self.is_numpy = False
+        if X is not None:
+            if dataset is not None:
+                raise ValueError('Either X OR dataset need to be provided, not both')
+            else:
+                dataset = ArrayDataset(X, features_names=features_names)
+        elif dataset is None:
+            raise ValueError('Either X OR dataset need to be provided, not both')
+        if dataset and dataset.features_names:
+            self._features = dataset.features_names
+        if dataset and dataset.get_samples() is not None:
+            x = pd.DataFrame(dataset.get_samples(), columns=self._features)
 
-        if X.shape[1] != self.n_features_ and self.n_features_ != 0:
+        if x.shape[1] != self.n_features_ and self.n_features_ != 0:
             raise ValueError('Shape of input is different from what was seen'
                              'in `fit`')
 
         if not self._features:
-            self._features = [i for i in range(X.shape[1])]
+            self._features = [i for i in range(x.shape[1])]
 
         representatives = pd.DataFrame(columns=self._features)  # only columns
-        generalized = pd.DataFrame(X, columns=self._features, copy=True)  # original data
-        mapped = np.zeros(X.shape[0])  # to mark records we already mapped
+        generalized = pd.DataFrame(x, columns=self._features, copy=True)  # original data
+        mapped = np.zeros(x.shape[0])  # to mark records we already mapped
 
         # iterate over cells (leaves in decision tree)
         for i in range(len(self.cells_)):
@@ -443,7 +451,7 @@ class GeneralizeToRepresentative(BaseEstimator, MetaEstimatorMixin, TransformerM
                     representatives = representatives.drop(feature, axis=1)
 
             # get the indexes of all records that map to this cell
-            indexes = self._get_record_indexes_for_cell(X, self.cells_[i], mapped)
+            indexes = self._get_record_indexes_for_cell(x, self.cells_[i], mapped)
 
             # replace the values in the representative columns with the representative
             # values (leaves others untouched)
@@ -454,9 +462,11 @@ class GeneralizeToRepresentative(BaseEstimator, MetaEstimatorMixin, TransformerM
                     replace = representatives.loc[i].to_frame().T.reset_index(drop=True)
                 replace.index = indexes
                 generalized.loc[indexes, representatives.columns] = replace
-        if self.is_numpy:
-            return generalized.to_numpy()
-        return generalized
+        if dataset and dataset.is_pandas:
+            return generalized
+        elif isinstance(X, pd.DataFrame):
+            return generalized
+        return generalized.to_numpy()
 
     def _get_record_indexes_for_cell(self, X, cell, mapped):
         indexes = []
@@ -640,7 +650,8 @@ class GeneralizeToRepresentative(BaseEstimator, MetaEstimatorMixin, TransformerM
             # else: nothing to do, stay with previous cells
 
     def _calculate_level_cell_label(self, left_cell, right_cell, new_cell):
-        new_cell['hist'] = [x + y for x, y in zip(left_cell['hist'], right_cell['hist'])] if not self.is_regression else []
+        new_cell['hist'] = [x + y for x, y in
+                            zip(left_cell['hist'], right_cell['hist'])] if not self.is_regression else []
         new_cell['label'] = int(self.dt_.classes_[np.argmax(new_cell['hist'])]) if not self.is_regression else 1
 
     def _get_nodes_level(self, level):
@@ -797,8 +808,8 @@ class GeneralizeToRepresentative(BaseEstimator, MetaEstimatorMixin, TransformerM
                     cells_by_id = copy.deepcopy(self.cells_by_id_)
                     GeneralizeToRepresentative._remove_feature_from_cells(new_cells, cells_by_id, feature)
                     generalized = self._generalize(original_data, prepared_data, nodes, new_cells, cells_by_id)
-                    accuracy_gain = self.estimator.score(self._preprocessor.transform(generalized),
-                                                         labels) - current_accuracy
+                    accuracy_gain = self.estimator.score(ArrayDataset(self._preprocessor.transform(generalized),
+                                                                      labels)) - current_accuracy
                     if accuracy_gain < 0:
                         accuracy_gain = 0
                     if accuracy_gain != 0:
@@ -820,8 +831,8 @@ class GeneralizeToRepresentative(BaseEstimator, MetaEstimatorMixin, TransformerM
                     cells_by_id = copy.deepcopy(self.cells_by_id_)
                     GeneralizeToRepresentative._remove_feature_from_cells(new_cells, cells_by_id, feature)
                     generalized = self._generalize(original_data, prepared_data, nodes, new_cells, cells_by_id)
-                    accuracy_gain = self.estimator.score(self._preprocessor.transform(generalized),
-                                                         labels) - current_accuracy
+                    accuracy_gain = self.estimator.score(ArrayDataset(self._preprocessor.transform(generalized),
+                                                                      labels)) - current_accuracy
 
                     if accuracy_gain < 0:
                         accuracy_gain = 0
diff --git a/apt/utils/__init__.py b/apt/utils/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/apt/utils.py b/apt/utils/dataset_utils.py
similarity index 93%
rename from apt/utils.py
rename to apt/utils/dataset_utils.py
index bc73cbc..e3eb959 100644
--- a/apt/utils.py
+++ b/apt/utils/dataset_utils.py
@@ -13,8 +13,7 @@ def _load_iris(test_set_size: float = 0.3):
 
     # Split training and test sets
     x_train, x_test, y_train, y_test = model_selection.train_test_split(data, labels, test_size=test_set_size,
-                                                                        random_state=18, stratify=labels,
-                                                                        shuffle=True)
+                                                                                random_state=18, stratify=labels)
 
     return (x_train, y_train), (x_test, y_test)
 
@@ -29,6 +28,28 @@ def get_iris_dataset(test_set: float = 0.3):
     return _load_iris(test_set)
 
 
+def _load_diabetes(test_set_size: float = 0.3):
+    diabetes = datasets.load_diabetes()
+    data = diabetes.data
+    labels = diabetes.target
+
+    # Split training and test sets
+    x_train, x_test, y_train, y_test = model_selection.train_test_split(data, labels, test_size=test_set_size,
+                                                                        random_state=18)
+
+    return (x_train, y_train), (x_test, y_test)
+
+
+def get_diabetes_dataset():
+    """
+    Loads the Iris dataset from scikit-learn.
+
+    :param test_set: Proportion of the data to use as validation split (value between 0 and 1).
+    :return: Entire dataset and labels as numpy array.
+    """
+    return _load_diabetes()
+
+
 def get_german_credit_dataset(test_set: float = 0.3):
     """
     Loads the UCI German_credit dataset from `tests/datasets/german` or downloads it if necessary.
@@ -253,7 +274,7 @@ def get_nursery_dataset(raw: bool = True, test_set: float = 0.2, transform_socia
             raise Exception("Bad label value: %s" % value)
 
     data["label"] = data["label"].apply(modify_label)
-    data["children"] = data["children"].apply(lambda x: 4 if x == "more" else x)
+    data["children"] = data["children"].apply(lambda x: "4" if x == "more" else x)
 
     if transform_social:
 
diff --git a/apt/utils/datasets/__init__.py b/apt/utils/datasets/__init__.py
new file mode 100644
index 0000000..6e7c640
--- /dev/null
+++ b/apt/utils/datasets/__init__.py
@@ -0,0 +1,7 @@
+"""
+The AI Privacy Toolbox (datasets).
+Implementation of datasets utility components for datasets creation, load, and store
+"""
+
+from apt.utils.datasets.datasets import Dataset, StoredDataset, DatasetFactory, Data, ArrayDataset, \
+    OUTPUT_DATA_ARRAY_TYPE, DATA_PANDAS_NUMPY_TYPE
diff --git a/apt/utils/datasets/datasets.py b/apt/utils/datasets/datasets.py
new file mode 100644
index 0000000..ff7c296
--- /dev/null
+++ b/apt/utils/datasets/datasets.py
@@ -0,0 +1,320 @@
+# !/usr/bin/env python
+"""
+The AI Privacy Toolbox (datasets).
+Implementation of utility classes for dataset handling
+"""
+
+from abc import ABCMeta, abstractmethod
+from typing import Callable, Collection, Any, Union, List, Optional
+
+import tarfile
+import os
+import urllib.request
+import numpy as np
+import pandas as pd
+import logging
+import torch
+from torch import Tensor
+
+logger = logging.getLogger(__name__)
+
+
+INPUT_DATA_ARRAY_TYPE = Union[np.ndarray, pd.DataFrame, List, Tensor]
+OUTPUT_DATA_ARRAY_TYPE = np.ndarray
+DATA_PANDAS_NUMPY_TYPE = Union[np.ndarray, pd.DataFrame]
+
+
+def array2numpy(self, arr: INPUT_DATA_ARRAY_TYPE) -> OUTPUT_DATA_ARRAY_TYPE:
+
+    """
+    converts from INPUT_DATA_ARRAY_TYPE to numpy array
+    """
+    if type(arr) == np.ndarray:
+        return arr
+    if type(arr) == pd.DataFrame or type(arr) == pd.Series:
+        self.is_pandas = True
+        return arr.to_numpy()
+    if isinstance(arr, list):
+        return np.array(arr)
+    if type(arr) == Tensor:
+        return arr.detach().cpu().numpy()
+
+    raise ValueError('Non supported type: ', type(arr).__name__)
+
+
+def array2torch_tensor(self, arr: INPUT_DATA_ARRAY_TYPE) -> Tensor:
+    """
+    converts from INPUT_DATA_ARRAY_TYPE to torch tensor array
+    """
+    if type(arr) == np.ndarray:
+        return torch.from_numpy(arr)
+    if type(arr) == pd.DataFrame or type(arr) == pd.Series:
+        self.is_pandas = True
+        return torch.from_numpy(arr.to_numpy())
+    if isinstance(arr, list):
+        return torch.tensor(arr)
+    if type(arr) == Tensor:
+        return arr
+
+    raise ValueError('Non supported type: ', type(arr).__name__)
+
+
+class Dataset(metaclass=ABCMeta):
+    """Base Abstract Class for Dataset"""
+
+    @abstractmethod
+    def __init__(self, **kwargs):
+        pass
+
+    @abstractmethod
+    def get_samples(self) -> Collection[Any]:
+        """Return data samples"""
+        pass
+
+    @abstractmethod
+    def get_labels(self) -> Collection[Any]:
+        """Return labels"""
+        pass
+
+
+class StoredDataset(Dataset):
+    """Abstract Class for Storable Dataset"""
+
+    @abstractmethod
+    def load_from_file(self, path: str):
+        """Load dataset from file"""
+        pass
+
+    @abstractmethod
+    def load(self, **kwargs):
+        """Load dataset"""
+        pass
+
+    @staticmethod
+    def download(url: str, dest_path: str, filename: str, unzip: bool = False) -> None:
+        """
+        Download the dataset from URL
+        :param url: dataset URL, the dataset will be requested from this URL
+        :param dest_path: local dataset destination path
+        :param filename: local dataset filename
+        :param unzip: flag whether or not perform extraction
+        :return: None
+        """
+        file_path = os.path.join(dest_path, filename)
+
+        if os.path.exists(file_path):
+            logger.warning("Files already downloaded, skipping downloading")
+
+        else:
+            os.makedirs(dest_path, exist_ok=True)
+            logger.info("Downloading the dataset...")
+            urllib.request.urlretrieve(url, file_path)
+            logger.info('Dataset Downloaded')
+
+        if unzip:
+            StoredDataset.extract_archive(zip_path=file_path, dest_path=dest_path, remove_archive=False)
+
+    @staticmethod
+    def extract_archive(zip_path: str, dest_path=None, remove_archive=False):
+        """
+        Extract dataset from archived file
+        :param zip_path: path to archived file
+        :param dest_path: directory path to uncompress the file to
+        :param remove_archive: whether remove the archive file after uncompress (default False)
+        :return: None
+        """
+        logger.info("Extracting the dataset...")
+        tar = tarfile.open(zip_path)
+        tar.extractall(path=dest_path)
+
+        logger.info("Dataset was extracted to {}".format(dest_path))
+        if remove_archive:
+            logger.info("Removing a zip file")
+            os.remove(zip_path)
+        logger.info("Extracted the dataset")
+
+    @staticmethod
+    def split_debug(datafile: str, dest_datafile: str, ratio: int, shuffle=True, delimiter=",", fmt=None) -> None:
+        """
+        Split the data and take only a part of it
+        :param datafile: dataset file path
+        :param dest_datafile: destination path for the partial dataset file
+        :param ratio: part of the dataset to save
+        :param shuffle: whether to shuffle the data or not (default True)
+        :param delimiter: dataset delimiter (default ",")
+        :param fmt: format for the correct data saving
+        :return: None
+        """
+        if os.path.isfile(dest_datafile):
+            logger.info(f"The partial debug split already exists {dest_datafile}")
+            return
+        else:
+            os.makedirs(os.path.dirname(dest_datafile), exist_ok=True)
+
+        data = np.genfromtxt(datafile, delimiter=delimiter)
+        if shuffle:
+            logger.info("Shuffling data")
+            np.random.shuffle(data)
+
+        debug_data = data[:int(len(data) * ratio)]
+        logger.info(f"Saving {ratio} of the data to {dest_datafile}")
+        np.savetxt(dest_datafile, debug_data, delimiter=delimiter, fmt=fmt)
+
+
+class ArrayDataset(Dataset):
+    """Dataset that is based on x and y arrays (e.g., numpy/pandas/list...)"""
+
+    def __init__(self, x: INPUT_DATA_ARRAY_TYPE, y: Optional[INPUT_DATA_ARRAY_TYPE] = None,
+                 features_names: Optional = None, **kwargs):
+        """
+        ArrayDataset constructor.
+        :param x: collection of data samples
+        :param y: collection of labels (optional)
+        :param feature_names: list of str, The feature names, in the order that they appear in the data (optional)
+        :param kwargs: dataset parameters
+        """
+        self.is_pandas = False
+        self.features_names = features_names
+        self._y = array2numpy(self, y) if y is not None else None
+        self._x = array2numpy(self, x)
+        if self.is_pandas:
+            if features_names and not np.array_equal(features_names, x.columns):
+                raise ValueError("The supplied features are not the same as in the data features")
+            self.features_names = x.columns.to_list()
+
+        if y is not None and len(self._x) != len(self._y):
+            raise ValueError('Non equivalent lengths of x and y')
+
+    def get_samples(self) -> OUTPUT_DATA_ARRAY_TYPE:
+        """Return data samples as numpy array"""
+        return self._x
+
+    def get_labels(self) -> OUTPUT_DATA_ARRAY_TYPE:
+        """Return labels as numpy array"""
+        return self._y
+
+
+class PytorchData(Dataset):
+
+    def __init__(self, x: INPUT_DATA_ARRAY_TYPE, y: Optional[INPUT_DATA_ARRAY_TYPE] = None, **kwargs):
+        """
+        PytorchData constructor.
+        :param x: collection of data samples
+        :param y: collection of labels (optional)
+        :param kwargs: dataset parameters
+        """
+        self.is_pandas = False
+        self._y = array2torch_tensor(self, y) if y is not None else None
+        self._x = array2torch_tensor(self, x)
+        if self.is_pandas:
+            self.features_names = x.columns
+
+        if y is not None and len(self._x) != len(self._y):
+            raise ValueError('Non equivalent lengths of x and y')
+
+
+        if self._y is not None:
+            self.__getitem__ = self.get_item
+        else:
+            self.__getitem__ = self.get_sample_item
+
+
+    def get_samples(self) -> OUTPUT_DATA_ARRAY_TYPE:
+        """Return data samples as numpy array"""
+        return array2numpy(self._x)
+
+    def get_labels(self) -> OUTPUT_DATA_ARRAY_TYPE:
+        """Return labels as numpy array"""
+        return array2numpy(self._y) if self._y is not None else None
+
+    def get_sample_item(self, idx) -> Tensor:
+        return self.x[idx]
+
+    def get_item(self, idx) -> Tensor:
+        sample, label = self.x[idx], self.y[idx]
+        return sample, label
+
+    def __len__(self):
+        return len(self.x)
+
+
+class DatasetFactory:
+    """Factory class for dataset creation"""
+    registry = {}
+
+    @classmethod
+    def register(cls, name: str) -> Callable:
+        """
+        Class method to register Dataset to the internal registry
+        :param name: dataset name
+        :return:
+        """
+
+        def inner_wrapper(wrapped_class: Dataset) -> Any:
+            if name in cls.registry:
+                logger.warning('Dataset %s already exists. Will replace it', name)
+            cls.registry[name] = wrapped_class
+            return wrapped_class
+
+        return inner_wrapper
+
+    @classmethod
+    def create_dataset(cls, name: str, **kwargs) -> Dataset:
+        """
+        Factory command to create dataset instance.
+        This method gets the appropriate Dataset class from the registry
+        and creates an instance of it, while passing in the parameters
+        given in ``kwargs``.
+        :param name: The name of the dataset to create.
+        :param kwargs: dataset parameters
+        :return: An instance of the dataset that is created.
+        """
+        if name not in cls.registry:
+            msg = f'Dataset {name} does not exist in the registry'
+            logger.error(msg)
+            raise ValueError(msg)
+
+        exec_class = cls.registry[name]
+        executor = exec_class(**kwargs)
+        return executor
+
+
+class Data:
+    def __init__(self, train: Dataset = None, test: Dataset = None, **kwargs):
+        """
+        Data class constructor.
+        The class stores train and test datasets.
+        If neither of the datasets was provided,
+        Both train and test datasets will be create using
+        DatasetFactory to create a dataset instance
+        """
+        if train or test:
+            self.train = train
+            self.test = test
+        else:
+            self.train = DatasetFactory.create_dataset(train=True, **kwargs)
+            self.test = DatasetFactory.create_dataset(train=False, **kwargs)
+
+    def get_train_set(self) -> Dataset:
+        """Return train DatasetBase"""
+        return self.train
+
+    def get_test_set(self) -> Dataset:
+        """Return test DatasetBase"""
+        return self.test
+
+    def get_train_samples(self) -> Collection[Any]:
+        """Return train set samples"""
+        return self.train.get_samples()
+
+    def get_train_labels(self) -> Collection[Any]:
+        """Return train set labels"""
+        return self.train.get_labels()
+
+    def get_test_samples(self) -> Collection[Any]:
+        """Return test set samples"""
+        return self.test.get_samples()
+
+    def get_test_labels(self) -> Collection[Any]:
+        """Return test set labels"""
+        return self.test.get_labels()
diff --git a/apt/utils/models/__init__.py b/apt/utils/models/__init__.py
new file mode 100644
index 0000000..11efd5f
--- /dev/null
+++ b/apt/utils/models/__init__.py
@@ -0,0 +1,2 @@
+from apt.utils.models.model import Model, ModelOutputType
+from apt.utils.models.sklearn_model import SklearnModel, SklearnClassifier, SklearnRegressor
diff --git a/apt/utils/models/model.py b/apt/utils/models/model.py
new file mode 100644
index 0000000..9616459
--- /dev/null
+++ b/apt/utils/models/model.py
@@ -0,0 +1,109 @@
+from abc import ABCMeta, abstractmethod
+from typing import Any, Optional
+from enum import Enum, auto
+
+from apt.utils.datasets import Dataset, OUTPUT_DATA_ARRAY_TYPE
+
+
+class ModelOutputType(Enum):
+    CLASSIFIER_VECTOR = auto()  # probabilities or logits
+    CLASSIFIER_SCALAR = auto()  # label only
+    REGRESSOR_SCALAR = auto()  # value
+
+
+class Model(metaclass=ABCMeta):
+    """
+    Abstract base class for ML model wrappers.
+    """
+
+    def __init__(self, model: Any, output_type: ModelOutputType, black_box_access: Optional[bool] = True,
+                 unlimited_queries: Optional[bool] = True, **kwargs):
+        """
+        Initialize a `Model` wrapper object.
+
+        :param model: The original model object (of the underlying ML framework)
+        :param output_type: The type of output the model yields (vector/label only for classifiers,
+                            value for regressors)
+        :param black_box_access: Boolean describing the type of deployment of the model (when in production).
+                                 Set to True if the model is only available via query (API) access, i.e.,
+                                 only the outputs of the model are exposed, and False if the model internals
+                                 are also available. Optional, Default is True.
+        :param unlimited_queries: If black_box_access is True, this boolean indicates whether a user can perform
+                                  unlimited queries to the model API or whether there is a limit to the number of
+                                  queries that can be submitted. Optional, Default is True.
+        """
+        self._model = model
+        self._output_type = output_type
+        self._black_box_access = black_box_access
+        self._unlimited_queries = unlimited_queries
+
+    @abstractmethod
+    def fit(self, train_data: Dataset, **kwargs) -> None:
+        """
+        Fit the model using the training data.
+
+        :param train_data: Training data.
+        :type train_data: `Dataset`
+        """
+        raise NotImplementedError
+
+    @abstractmethod
+    def predict(self, x: Dataset, **kwargs) -> OUTPUT_DATA_ARRAY_TYPE:
+        """
+        Perform predictions using the model for input `x`.
+
+        :param x: Input samples.
+        :type x: `np.ndarray` or `pandas.DataFrame`
+        :return: Predictions from the model.
+        """
+        raise NotImplementedError
+
+    @abstractmethod
+    def score(self, test_data: Dataset, **kwargs):
+        """
+        Score the model using test data.
+
+        :param test_data: Test data.
+        :type train_data: `Dataset`
+        """
+        return NotImplementedError
+
+    @property
+    def model(self) -> Any:
+        """
+        Return the model.
+
+        :return: The model.
+        """
+        return self._model
+
+    @property
+    def output_type(self) -> ModelOutputType:
+        """
+        Return the model's output type.
+
+        :return: The model's output type.
+        """
+        return self._output_type
+
+    @property
+    def black_box_access(self) -> bool:
+        """
+        Return True if the model is only available via query (API) access, i.e.,
+        only the outputs of the model are exposed, and False if the model internals are also available.
+
+        :return: True if the model is only available via query (API) access, i.e.,
+                 only the outputs of the model are exposed, and False if the model internals are also available.
+        """
+        return self._black_box_access
+
+    @property
+    def unlimited_queries(self) -> bool:
+        """
+        If black_box_access is True, Return whether a user can perform unlimited queries to the model API
+        or whether there is a limit to the number of queries that can be submitted.
+
+        :return: If black_box_access is True, Return whether a user can perform unlimited queries to the model API
+                 or whether there is a limit to the number of queries that can be submitted.
+        """
+        return self._unlimited_queries
diff --git a/apt/utils/models/sklearn_model.py b/apt/utils/models/sklearn_model.py
new file mode 100644
index 0000000..f7afaa6
--- /dev/null
+++ b/apt/utils/models/sklearn_model.py
@@ -0,0 +1,112 @@
+from typing import Optional
+
+import numpy as np
+
+from sklearn.preprocessing import OneHotEncoder
+from sklearn.base import BaseEstimator
+
+from apt.utils.models import Model, ModelOutputType
+from apt.utils.datasets import Dataset, OUTPUT_DATA_ARRAY_TYPE
+
+from art.estimators.classification.scikitlearn import SklearnClassifier as ArtSklearnClassifier
+from art.estimators.regression.scikitlearn import ScikitlearnRegressor
+
+
+class SklearnModel(Model):
+    """
+    Wrapper class for scikitlearn models.
+    """
+    def score(self, test_data: Dataset, **kwargs):
+        """
+        Score the model using test data.
+
+        :param test_data: Test data.
+        :type train_data: `Dataset`
+        """
+        return self.model.score(test_data.get_samples(), test_data.get_labels(), **kwargs)
+
+
+class SklearnClassifier(SklearnModel):
+    """
+    Wrapper class for scikitlearn classification models.
+    """
+    def __init__(self, model: BaseEstimator, output_type: ModelOutputType, black_box_access: Optional[bool] = True,
+                 unlimited_queries: Optional[bool] = True, **kwargs):
+        """
+        Initialize a `SklearnClassifier` wrapper object.
+
+        :param model: The original sklearn model object.
+        :param output_type: The type of output the model yields (vector/label only for classifiers,
+                            value for regressors)
+        :param black_box_access: Boolean describing the type of deployment of the model (when in production).
+                                 Set to True if the model is only available via query (API) access, i.e.,
+                                 only the outputs of the model are exposed, and False if the model internals
+                                 are also available. Optional, Default is True.
+        :param unlimited_queries: If black_box_access is True, this boolean indicates whether a user can perform
+                                  unlimited queries to the model API or whether there is a limit to the number of
+                                  queries that can be submitted. Optional, Default is True.
+        """
+        super().__init__(model, output_type, black_box_access, unlimited_queries, **kwargs)
+        self._art_model = ArtSklearnClassifier(model)
+
+    def fit(self, train_data: Dataset, **kwargs) -> None:
+        """
+        Fit the model using the training data.
+
+        :param train_data: Training data.
+        :type train_data: `Dataset`
+        """
+        encoder = OneHotEncoder(sparse=False)
+        y_encoded = encoder.fit_transform(train_data.get_labels().reshape(-1, 1))
+        self._art_model.fit(train_data.get_samples(), y_encoded, **kwargs)
+
+    def predict(self, x: Dataset, **kwargs) -> OUTPUT_DATA_ARRAY_TYPE:
+        """
+        Perform predictions using the model for input `x`.
+
+        :param x: Input samples.
+        :type x: `np.ndarray` or `pandas.DataFrame`
+        :return: Predictions from the model (class probabilities, if supported).
+        """
+        return self._art_model.predict(x, **kwargs)
+
+
+class SklearnRegressor(SklearnModel):
+    """
+    Wrapper class for scikitlearn regression models.
+    """
+    def __init__(self, model: BaseEstimator, black_box_access: Optional[bool] = True,
+                 unlimited_queries: Optional[bool] = True, **kwargs):
+        """
+        Initialize a `SklearnRegressor` wrapper object.
+
+        :param model: The original sklearn model object.
+        :param black_box_access: Boolean describing the type of deployment of the model (when in production).
+                                 Set to True if the model is only available via query (API) access, i.e.,
+                                 only the outputs of the model are exposed, and False if the model internals
+                                 are also available. Optional, Default is True.
+        :param unlimited_queries: If black_box_access is True, this boolean indicates whether a user can perform
+                                  unlimited queries to the model API or whether there is a limit to the number of
+                                  queries that can be submitted. Optional, Default is True.
+        """
+        super().__init__(model, ModelOutputType.REGRESSOR_SCALAR, black_box_access, unlimited_queries, **kwargs)
+        self._art_model = ScikitlearnRegressor(model)
+
+    def fit(self, train_data: Dataset, **kwargs) -> None:
+        """
+        Fit the model using the training data.
+
+        :param train_data: Training data.
+        :type train_data: `Dataset`
+        """
+        self._art_model.fit(train_data.get_samples(), train_data.get_labels(), **kwargs)
+
+    def predict(self, x: Dataset, **kwargs) -> OUTPUT_DATA_ARRAY_TYPE:
+        """
+        Perform predictions using the model for input `x`.
+
+        :param x: Input samples.
+        :type x: `np.ndarray` or `pandas.DataFrame`
+        :return: Predictions from the model.
+        """
+        return self._art_model.predict(x, **kwargs)
diff --git a/notebooks/attribute_inference_anonymization_nursery.ipynb b/notebooks/attribute_inference_anonymization_nursery.ipynb
index 9952885..bfba540 100644
--- a/notebooks/attribute_inference_anonymization_nursery.ipynb
+++ b/notebooks/attribute_inference_anonymization_nursery.ipynb
@@ -29,198 +29,15 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 61,
+   "execution_count": 1,
    "metadata": {},
    "outputs": [
     {
      "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>parents</th>\n",
-       "      <th>has_nurs</th>\n",
-       "      <th>form</th>\n",
-       "      <th>children</th>\n",
-       "      <th>housing</th>\n",
-       "      <th>finance</th>\n",
-       "      <th>social</th>\n",
-       "      <th>health</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>8450</th>\n",
-       "      <td>pretentious</td>\n",
-       "      <td>very_crit</td>\n",
-       "      <td>foster</td>\n",
-       "      <td>1</td>\n",
-       "      <td>less_conv</td>\n",
-       "      <td>convenient</td>\n",
-       "      <td>1</td>\n",
-       "      <td>not_recom</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>12147</th>\n",
-       "      <td>great_pret</td>\n",
-       "      <td>very_crit</td>\n",
-       "      <td>complete</td>\n",
-       "      <td>1</td>\n",
-       "      <td>critical</td>\n",
-       "      <td>inconv</td>\n",
-       "      <td>1</td>\n",
-       "      <td>recommended</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2780</th>\n",
-       "      <td>usual</td>\n",
-       "      <td>critical</td>\n",
-       "      <td>complete</td>\n",
-       "      <td>4</td>\n",
-       "      <td>less_conv</td>\n",
-       "      <td>convenient</td>\n",
-       "      <td>1</td>\n",
-       "      <td>not_recom</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>11924</th>\n",
-       "      <td>great_pret</td>\n",
-       "      <td>critical</td>\n",
-       "      <td>foster</td>\n",
-       "      <td>1</td>\n",
-       "      <td>critical</td>\n",
-       "      <td>convenient</td>\n",
-       "      <td>1</td>\n",
-       "      <td>not_recom</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>59</th>\n",
-       "      <td>usual</td>\n",
-       "      <td>proper</td>\n",
-       "      <td>complete</td>\n",
-       "      <td>2</td>\n",
-       "      <td>convenient</td>\n",
-       "      <td>convenient</td>\n",
-       "      <td>0</td>\n",
-       "      <td>not_recom</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>...</th>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>5193</th>\n",
-       "      <td>pretentious</td>\n",
-       "      <td>less_proper</td>\n",
-       "      <td>complete</td>\n",
-       "      <td>1</td>\n",
-       "      <td>convenient</td>\n",
-       "      <td>inconv</td>\n",
-       "      <td>0</td>\n",
-       "      <td>recommended</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1375</th>\n",
-       "      <td>usual</td>\n",
-       "      <td>less_proper</td>\n",
-       "      <td>incomplete</td>\n",
-       "      <td>2</td>\n",
-       "      <td>less_conv</td>\n",
-       "      <td>convenient</td>\n",
-       "      <td>1</td>\n",
-       "      <td>priority</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>10318</th>\n",
-       "      <td>great_pret</td>\n",
-       "      <td>less_proper</td>\n",
-       "      <td>foster</td>\n",
-       "      <td>4</td>\n",
-       "      <td>convenient</td>\n",
-       "      <td>convenient</td>\n",
-       "      <td>0</td>\n",
-       "      <td>priority</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>6396</th>\n",
-       "      <td>pretentious</td>\n",
-       "      <td>improper</td>\n",
-       "      <td>completed</td>\n",
-       "      <td>3</td>\n",
-       "      <td>less_conv</td>\n",
-       "      <td>convenient</td>\n",
-       "      <td>1</td>\n",
-       "      <td>recommended</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>485</th>\n",
-       "      <td>usual</td>\n",
-       "      <td>proper</td>\n",
-       "      <td>incomplete</td>\n",
-       "      <td>1</td>\n",
-       "      <td>critical</td>\n",
-       "      <td>inconv</td>\n",
-       "      <td>1</td>\n",
-       "      <td>not_recom</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "<p>10366 rows × 8 columns</p>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "           parents     has_nurs        form children     housing     finance  \\\n",
-       "8450   pretentious    very_crit      foster        1   less_conv  convenient   \n",
-       "12147   great_pret    very_crit    complete        1    critical      inconv   \n",
-       "2780         usual     critical    complete        4   less_conv  convenient   \n",
-       "11924   great_pret     critical      foster        1    critical  convenient   \n",
-       "59           usual       proper    complete        2  convenient  convenient   \n",
-       "...            ...          ...         ...      ...         ...         ...   \n",
-       "5193   pretentious  less_proper    complete        1  convenient      inconv   \n",
-       "1375         usual  less_proper  incomplete        2   less_conv  convenient   \n",
-       "10318   great_pret  less_proper      foster        4  convenient  convenient   \n",
-       "6396   pretentious     improper   completed        3   less_conv  convenient   \n",
-       "485          usual       proper  incomplete        1    critical      inconv   \n",
-       "\n",
-       "       social       health  \n",
-       "8450        1    not_recom  \n",
-       "12147       1  recommended  \n",
-       "2780        1    not_recom  \n",
-       "11924       1    not_recom  \n",
-       "59          0    not_recom  \n",
-       "...       ...          ...  \n",
-       "5193        0  recommended  \n",
-       "1375        1     priority  \n",
-       "10318       0     priority  \n",
-       "6396        1  recommended  \n",
-       "485         1    not_recom  \n",
-       "\n",
-       "[10366 rows x 8 columns]"
-      ]
+      "text/plain": "           parents     has_nurs        form children     housing     finance  \\\n8450   pretentious    very_crit      foster        1   less_conv  convenient   \n12147   great_pret    very_crit    complete        1    critical      inconv   \n2780         usual     critical    complete        4   less_conv  convenient   \n11924   great_pret     critical      foster        1    critical  convenient   \n59           usual       proper    complete        2  convenient  convenient   \n...            ...          ...         ...      ...         ...         ...   \n5193   pretentious  less_proper    complete        1  convenient      inconv   \n1375         usual  less_proper  incomplete        2   less_conv  convenient   \n10318   great_pret  less_proper      foster        4  convenient  convenient   \n6396   pretentious     improper   completed        3   less_conv  convenient   \n485          usual       proper  incomplete        1    critical      inconv   \n\n       social       health  \n8450        1    not_recom  \n12147       1  recommended  \n2780        1    not_recom  \n11924       1    not_recom  \n59          0    not_recom  \n...       ...          ...  \n5193        0  recommended  \n1375        1     priority  \n10318       0     priority  \n6396        1  recommended  \n485         1    not_recom  \n\n[10366 rows x 8 columns]",
+      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>parents</th>\n      <th>has_nurs</th>\n      <th>form</th>\n      <th>children</th>\n      <th>housing</th>\n      <th>finance</th>\n      <th>social</th>\n      <th>health</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>8450</th>\n      <td>pretentious</td>\n      <td>very_crit</td>\n      <td>foster</td>\n      <td>1</td>\n      <td>less_conv</td>\n      <td>convenient</td>\n      <td>1</td>\n      <td>not_recom</td>\n    </tr>\n    <tr>\n      <th>12147</th>\n      <td>great_pret</td>\n      <td>very_crit</td>\n      <td>complete</td>\n      <td>1</td>\n      <td>critical</td>\n      <td>inconv</td>\n      <td>1</td>\n      <td>recommended</td>\n    </tr>\n    <tr>\n      <th>2780</th>\n      <td>usual</td>\n      <td>critical</td>\n      <td>complete</td>\n      <td>4</td>\n      <td>less_conv</td>\n      <td>convenient</td>\n      <td>1</td>\n      <td>not_recom</td>\n    </tr>\n    <tr>\n      <th>11924</th>\n      <td>great_pret</td>\n      <td>critical</td>\n      <td>foster</td>\n      <td>1</td>\n      <td>critical</td>\n      <td>convenient</td>\n      <td>1</td>\n      <td>not_recom</td>\n    </tr>\n    <tr>\n      <th>59</th>\n      <td>usual</td>\n      <td>proper</td>\n      <td>complete</td>\n      <td>2</td>\n      <td>convenient</td>\n      <td>convenient</td>\n      <td>0</td>\n      <td>not_recom</td>\n    </tr>\n    <tr>\n      <th>...</th>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n    </tr>\n    <tr>\n      <th>5193</th>\n      <td>pretentious</td>\n      <td>less_proper</td>\n      <td>complete</td>\n      <td>1</td>\n      <td>convenient</td>\n      <td>inconv</td>\n      <td>0</td>\n      <td>recommended</td>\n    </tr>\n    <tr>\n      <th>1375</th>\n      <td>usual</td>\n      <td>less_proper</td>\n      <td>incomplete</td>\n      <td>2</td>\n      <td>less_conv</td>\n      <td>convenient</td>\n      <td>1</td>\n      <td>priority</td>\n    </tr>\n    <tr>\n      <th>10318</th>\n      <td>great_pret</td>\n      <td>less_proper</td>\n      <td>foster</td>\n      <td>4</td>\n      <td>convenient</td>\n      <td>convenient</td>\n      <td>0</td>\n      <td>priority</td>\n    </tr>\n    <tr>\n      <th>6396</th>\n      <td>pretentious</td>\n      <td>improper</td>\n      <td>completed</td>\n      <td>3</td>\n      <td>less_conv</td>\n      <td>convenient</td>\n      <td>1</td>\n      <td>recommended</td>\n    </tr>\n    <tr>\n      <th>485</th>\n      <td>usual</td>\n      <td>proper</td>\n      <td>incomplete</td>\n      <td>1</td>\n      <td>critical</td>\n      <td>inconv</td>\n      <td>1</td>\n      <td>not_recom</td>\n    </tr>\n  </tbody>\n</table>\n<p>10366 rows × 8 columns</p>\n</div>"
      },
-     "execution_count": 61,
+     "execution_count": 1,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -230,7 +47,7 @@
     "import sys\n",
     "sys.path.insert(0, os.path.abspath('..'))\n",
     "\n",
-    "from apt.utils import get_nursery_dataset\n",
+    "from apt.utils.dataset_utils import get_nursery_dataset\n",
     "\n",
     "(x_train, y_train), (x_test, y_test) = get_nursery_dataset(transform_social=True)\n",
     "\n",
@@ -246,7 +63,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 62,
+   "execution_count": 2,
    "metadata": {},
    "outputs": [
     {
@@ -263,9 +80,9 @@
     "from sklearn.preprocessing import OneHotEncoder\n",
     "\n",
     "x_train_str = x_train.astype(str)\n",
-    "train_encoded = OneHotEncoder(sparse=False, drop='if_binary').fit_transform(x_train_str)\n",
+    "train_encoded = OneHotEncoder(sparse=False).fit_transform(x_train_str)\n",
     "x_test_str = x_test.astype(str)\n",
-    "test_encoded = OneHotEncoder(sparse=False, drop='if_binary').fit_transform(x_test_str)\n",
+    "test_encoded = OneHotEncoder(sparse=False).fit_transform(x_test_str)\n",
     "    \n",
     "model = DecisionTreeClassifier()\n",
     "model.fit(train_encoded, y_train)\n",
@@ -287,7 +104,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 91,
+   "execution_count": 3,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -323,14 +140,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 96,
+   "execution_count": 4,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "0.6430638626278217\n"
+      "1.0\n"
      ]
     }
    ],
@@ -361,14 +178,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 55,
+   "execution_count": 5,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "0.6980513216284006\n"
+      "0.5122515917422342\n"
      ]
     }
    ],
@@ -408,224 +225,43 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 97,
+   "execution_count": 6,
    "metadata": {},
    "outputs": [
     {
      "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>parents</th>\n",
-       "      <th>has_nurs</th>\n",
-       "      <th>form</th>\n",
-       "      <th>children</th>\n",
-       "      <th>housing</th>\n",
-       "      <th>finance</th>\n",
-       "      <th>social</th>\n",
-       "      <th>health</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>8450</th>\n",
-       "      <td>pretentious</td>\n",
-       "      <td>very_crit</td>\n",
-       "      <td>foster</td>\n",
-       "      <td>1</td>\n",
-       "      <td>less_conv</td>\n",
-       "      <td>convenient</td>\n",
-       "      <td>0</td>\n",
-       "      <td>not_recom</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>12147</th>\n",
-       "      <td>great_pret</td>\n",
-       "      <td>very_crit</td>\n",
-       "      <td>complete</td>\n",
-       "      <td>1</td>\n",
-       "      <td>critical</td>\n",
-       "      <td>inconv</td>\n",
-       "      <td>1</td>\n",
-       "      <td>recommended</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2780</th>\n",
-       "      <td>usual</td>\n",
-       "      <td>critical</td>\n",
-       "      <td>complete</td>\n",
-       "      <td>4</td>\n",
-       "      <td>less_conv</td>\n",
-       "      <td>convenient</td>\n",
-       "      <td>0</td>\n",
-       "      <td>not_recom</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>11924</th>\n",
-       "      <td>great_pret</td>\n",
-       "      <td>critical</td>\n",
-       "      <td>foster</td>\n",
-       "      <td>1</td>\n",
-       "      <td>critical</td>\n",
-       "      <td>convenient</td>\n",
-       "      <td>0</td>\n",
-       "      <td>not_recom</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>59</th>\n",
-       "      <td>usual</td>\n",
-       "      <td>proper</td>\n",
-       "      <td>complete</td>\n",
-       "      <td>2</td>\n",
-       "      <td>convenient</td>\n",
-       "      <td>convenient</td>\n",
-       "      <td>0</td>\n",
-       "      <td>not_recom</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>...</th>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>5193</th>\n",
-       "      <td>pretentious</td>\n",
-       "      <td>less_proper</td>\n",
-       "      <td>complete</td>\n",
-       "      <td>1</td>\n",
-       "      <td>convenient</td>\n",
-       "      <td>inconv</td>\n",
-       "      <td>0</td>\n",
-       "      <td>recommended</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1375</th>\n",
-       "      <td>usual</td>\n",
-       "      <td>less_proper</td>\n",
-       "      <td>incomplete</td>\n",
-       "      <td>2</td>\n",
-       "      <td>less_conv</td>\n",
-       "      <td>convenient</td>\n",
-       "      <td>1</td>\n",
-       "      <td>priority</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>10318</th>\n",
-       "      <td>great_pret</td>\n",
-       "      <td>less_proper</td>\n",
-       "      <td>foster</td>\n",
-       "      <td>4</td>\n",
-       "      <td>convenient</td>\n",
-       "      <td>convenient</td>\n",
-       "      <td>0</td>\n",
-       "      <td>priority</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>6396</th>\n",
-       "      <td>pretentious</td>\n",
-       "      <td>improper</td>\n",
-       "      <td>completed</td>\n",
-       "      <td>3</td>\n",
-       "      <td>less_conv</td>\n",
-       "      <td>convenient</td>\n",
-       "      <td>1</td>\n",
-       "      <td>recommended</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>485</th>\n",
-       "      <td>usual</td>\n",
-       "      <td>proper</td>\n",
-       "      <td>incomplete</td>\n",
-       "      <td>1</td>\n",
-       "      <td>critical</td>\n",
-       "      <td>convenient</td>\n",
-       "      <td>0</td>\n",
-       "      <td>not_recom</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "<p>10366 rows × 8 columns</p>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "           parents     has_nurs        form children     housing     finance  \\\n",
-       "8450   pretentious    very_crit      foster        1   less_conv  convenient   \n",
-       "12147   great_pret    very_crit    complete        1    critical      inconv   \n",
-       "2780         usual     critical    complete        4   less_conv  convenient   \n",
-       "11924   great_pret     critical      foster        1    critical  convenient   \n",
-       "59           usual       proper    complete        2  convenient  convenient   \n",
-       "...            ...          ...         ...      ...         ...         ...   \n",
-       "5193   pretentious  less_proper    complete        1  convenient      inconv   \n",
-       "1375         usual  less_proper  incomplete        2   less_conv  convenient   \n",
-       "10318   great_pret  less_proper      foster        4  convenient  convenient   \n",
-       "6396   pretentious     improper   completed        3   less_conv  convenient   \n",
-       "485          usual       proper  incomplete        1    critical  convenient   \n",
-       "\n",
-       "       social       health  \n",
-       "8450        0    not_recom  \n",
-       "12147       1  recommended  \n",
-       "2780        0    not_recom  \n",
-       "11924       0    not_recom  \n",
-       "59          0    not_recom  \n",
-       "...       ...          ...  \n",
-       "5193        0  recommended  \n",
-       "1375        1     priority  \n",
-       "10318       0     priority  \n",
-       "6396        1  recommended  \n",
-       "485         0    not_recom  \n",
-       "\n",
-       "[10366 rows x 8 columns]"
-      ]
+      "text/plain": "           parents     has_nurs        form children     housing     finance  \\\n0      pretentious    very_crit      foster        1   less_conv  convenient   \n1       great_pret    very_crit    complete        1    critical      inconv   \n2            usual     critical    complete        4   less_conv  convenient   \n3       great_pret     critical      foster        1    critical  convenient   \n4            usual       proper    complete        2  convenient  convenient   \n...            ...          ...         ...      ...         ...         ...   \n10361  pretentious  less_proper    complete        1  convenient      inconv   \n10362        usual  less_proper  incomplete        2   less_conv  convenient   \n10363   great_pret  less_proper      foster        4  convenient  convenient   \n10364  pretentious     improper   completed        3   less_conv  convenient   \n10365        usual       proper  incomplete        1    critical  convenient   \n\n      social       health  \n0          0    not_recom  \n1          1  recommended  \n2          0    not_recom  \n3          0    not_recom  \n4          0    not_recom  \n...      ...          ...  \n10361      0  recommended  \n10362      1     priority  \n10363      0     priority  \n10364      1  recommended  \n10365      0    not_recom  \n\n[10366 rows x 8 columns]",
+      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>parents</th>\n      <th>has_nurs</th>\n      <th>form</th>\n      <th>children</th>\n      <th>housing</th>\n      <th>finance</th>\n      <th>social</th>\n      <th>health</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>pretentious</td>\n      <td>very_crit</td>\n      <td>foster</td>\n      <td>1</td>\n      <td>less_conv</td>\n      <td>convenient</td>\n      <td>0</td>\n      <td>not_recom</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>great_pret</td>\n      <td>very_crit</td>\n      <td>complete</td>\n      <td>1</td>\n      <td>critical</td>\n      <td>inconv</td>\n      <td>1</td>\n      <td>recommended</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>usual</td>\n      <td>critical</td>\n      <td>complete</td>\n      <td>4</td>\n      <td>less_conv</td>\n      <td>convenient</td>\n      <td>0</td>\n      <td>not_recom</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>great_pret</td>\n      <td>critical</td>\n      <td>foster</td>\n      <td>1</td>\n      <td>critical</td>\n      <td>convenient</td>\n      <td>0</td>\n      <td>not_recom</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>usual</td>\n      <td>proper</td>\n      <td>complete</td>\n      <td>2</td>\n      <td>convenient</td>\n      <td>convenient</td>\n      <td>0</td>\n      <td>not_recom</td>\n    </tr>\n    <tr>\n      <th>...</th>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n    </tr>\n    <tr>\n      <th>10361</th>\n      <td>pretentious</td>\n      <td>less_proper</td>\n      <td>complete</td>\n      <td>1</td>\n      <td>convenient</td>\n      <td>inconv</td>\n      <td>0</td>\n      <td>recommended</td>\n    </tr>\n    <tr>\n      <th>10362</th>\n      <td>usual</td>\n      <td>less_proper</td>\n      <td>incomplete</td>\n      <td>2</td>\n      <td>less_conv</td>\n      <td>convenient</td>\n      <td>1</td>\n      <td>priority</td>\n    </tr>\n    <tr>\n      <th>10363</th>\n      <td>great_pret</td>\n      <td>less_proper</td>\n      <td>foster</td>\n      <td>4</td>\n      <td>convenient</td>\n      <td>convenient</td>\n      <td>0</td>\n      <td>priority</td>\n    </tr>\n    <tr>\n      <th>10364</th>\n      <td>pretentious</td>\n      <td>improper</td>\n      <td>completed</td>\n      <td>3</td>\n      <td>less_conv</td>\n      <td>convenient</td>\n      <td>1</td>\n      <td>recommended</td>\n    </tr>\n    <tr>\n      <th>10365</th>\n      <td>usual</td>\n      <td>proper</td>\n      <td>incomplete</td>\n      <td>1</td>\n      <td>critical</td>\n      <td>convenient</td>\n      <td>0</td>\n      <td>not_recom</td>\n    </tr>\n  </tbody>\n</table>\n<p>10366 rows × 8 columns</p>\n</div>"
      },
-     "execution_count": 97,
+     "execution_count": 6,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
+    "from apt.utils.datasets import ArrayDataset\n",
     "from apt.anonymization import Anonymize\n",
     "\n",
+    "features = x_train.columns\n",
     "QI = [\"finance\", \"social\", \"health\"]\n",
     "categorical_features = [\"parents\", \"has_nurs\", \"form\", \"housing\", \"finance\", \"health\", 'children']\n",
-    "anonymizer = Anonymize(100, QI, categorical_features=categorical_features)\n",
-    "anon = anonymizer.anonymize(x_train, x_train_predictions)\n",
-    "anon"
+    "QI_indexes = [i for i, v in enumerate(features) if v in QI]\n",
+    "categorical_features_indexes = [i for i, v in enumerate(features) if v in categorical_features]\n",
+    "anonymizer = Anonymize(100, QI_indexes, categorical_features=categorical_features_indexes)\n",
+    "anon = anonymizer.anonymize(ArrayDataset(x_train, x_train_predictions))\n",
+    "anon\n"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 64,
+   "execution_count": 7,
    "metadata": {},
    "outputs": [
     {
      "data": {
-      "text/plain": [
-       "7585"
-      ]
+      "text/plain": "7585"
      },
-     "execution_count": 64,
+     "execution_count": 7,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -637,16 +273,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 65,
+   "execution_count": 8,
    "metadata": {},
    "outputs": [
     {
      "data": {
-      "text/plain": [
-       "5766"
-      ]
+      "text/plain": "5766"
      },
-     "execution_count": 65,
+     "execution_count": 8,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -665,7 +299,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 66,
+   "execution_count": 9,
    "metadata": {},
    "outputs": [
     {
@@ -678,7 +312,7 @@
    ],
    "source": [
     "anon_str = anon.astype(str)\n",
-    "anon_encoded = OneHotEncoder(sparse=False, drop='if_binary').fit_transform(anon_str)\n",
+    "anon_encoded = OneHotEncoder(sparse=False).fit_transform(anon_str)\n",
     "\n",
     "anon_model = DecisionTreeClassifier()\n",
     "anon_model.fit(anon_encoded, y_train)\n",
@@ -698,14 +332,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 98,
+   "execution_count": 10,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "0.6471155701331275\n"
+      "1.0\n"
      ]
     }
    ],
@@ -734,14 +368,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 69,
+   "execution_count": 11,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "0.6982442600810341\n"
+      "0.5245996527107852\n"
      ]
     }
    ],
@@ -765,15 +399,15 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 87,
+   "execution_count": 12,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "(0.33056202194878614, 0.2888695146759663)\n",
-      "(0.34112301200908796, 0.3054344667247893)\n"
+      "(0.49415432579890883, 0.48976438779451525)\n",
+      "(0.49415432579890883, 0.48976438779451525)\n"
      ]
     }
    ],
@@ -810,15 +444,15 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 88,
+   "execution_count": 13,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "(0.6457357075913777, 0.2002324905550712)\n",
-      "(0.6472248353715898, 0.1999418773612322)\n"
+      "(1.0, 0.019204655674102813)\n",
+      "(0.9829787234042553, 0.04481086323957323)\n"
      ]
     }
    ],
@@ -849,26 +483,24 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 74,
+   "execution_count": 14,
    "metadata": {},
    "outputs": [],
    "source": [
-    "anonymizer2 = Anonymize(1000, QI, categorical_features=categorical_features)\n",
-    "anon2 = anonymizer2.anonymize(x_train, x_train_predictions)"
+    "anonymizer2 = Anonymize(1000, QI_indexes, categorical_features=categorical_features_indexes)\n",
+    "anon2 = anonymizer2.anonymize(ArrayDataset(x_train, x_train_predictions))"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 75,
+   "execution_count": 15,
    "metadata": {},
    "outputs": [
     {
      "data": {
-      "text/plain": [
-       "4226"
-      ]
+      "text/plain": "4226"
      },
-     "execution_count": 75,
+     "execution_count": 15,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -887,7 +519,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 104,
+   "execution_count": 16,
    "metadata": {},
    "outputs": [
     {
@@ -900,7 +532,7 @@
    ],
    "source": [
     "anon2_str = anon2.astype(str)\n",
-    "anon2_encoded = OneHotEncoder(sparse=False, drop='if_binary').fit_transform(anon2_str)\n",
+    "anon2_encoded = OneHotEncoder(sparse=False).fit_transform(anon2_str)\n",
     "\n",
     "anon2_model = DecisionTreeClassifier()\n",
     "anon2_model.fit(anon2_encoded, y_train)\n",
@@ -920,14 +552,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 105,
+   "execution_count": 17,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "0.6266640941539648\n"
+      "1.0\n"
      ]
     }
    ],
@@ -956,14 +588,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 106,
+   "execution_count": 18,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "0.6944819602546788\n"
+      "0.515820953115956\n"
      ]
     }
    ],
@@ -980,17 +612,17 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 107,
+   "execution_count": 19,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "(0.35793357933579334, 0.17037470725995316)\n",
-      "(0.3360655737704918, 0.1680327868852459)\n",
-      "(0.6457357075913777, 0.2002324905550712)\n",
-      "(0.6327519379844961, 0.1897704155768672)\n"
+      "(0.49415432579890883, 0.48976438779451525)\n",
+      "(0.49415432579890883, 0.48976438779451525)\n",
+      "(1.0, 0.019204655674102813)\n",
+      "(1.0, 0.026382153249272552)\n"
      ]
     }
    ],
@@ -1023,27 +655,26 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 111,
+   "execution_count": 20,
    "metadata": {},
    "outputs": [],
    "source": [
     "QI2 = [\"parents\", \"has_nurs\", \"form\", \"children\", \"housing\", \"finance\", \"social\", \"health\"]\n",
-    "anonymizer3 = Anonymize(100, QI2, categorical_features=categorical_features)\n",
-    "anon3 = anonymizer3.anonymize(x_train, x_train_predictions)"
+    "QI2_indexes = [i for i, v in enumerate(features) if v in QI2]\n",
+    "anonymizer3 = Anonymize(100, QI2_indexes, categorical_features=categorical_features_indexes)\n",
+    "anon3 = anonymizer3.anonymize(ArrayDataset(x_train, x_train_predictions))"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 112,
+   "execution_count": 21,
    "metadata": {},
    "outputs": [
     {
      "data": {
-      "text/plain": [
-       "39"
-      ]
+      "text/plain": "39"
      },
-     "execution_count": 112,
+     "execution_count": 21,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1055,22 +686,22 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 113,
+   "execution_count": 22,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Anonymized model accuracy:  0.7723765432098766\n",
-      "BB attack accuracy:  0.5792012348060969\n",
-      "WB attack accuracy:  0.6680493922438742\n"
+      "Anonymized model accuracy:  0.751929012345679\n",
+      "BB attack accuracy:  1.0\n",
+      "WB attack accuracy:  0.5187150299054601\n"
      ]
     }
    ],
    "source": [
     "anon3_str = anon3.astype(str)\n",
-    "anon3_encoded = OneHotEncoder(sparse=False, drop='if_binary').fit_transform(anon3_str)\n",
+    "anon3_encoded = OneHotEncoder(sparse=False).fit_transform(anon3_str)\n",
     "\n",
     "anon3_model = DecisionTreeClassifier()\n",
     "anon3_model.fit(anon3_encoded, y_train)\n",
@@ -1105,17 +736,17 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 114,
+   "execution_count": 23,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "(0.35793357933579334, 0.17037470725995316)\n",
-      "(0.3393939393939394, 0.13114754098360656)\n",
-      "(0.6457357075913777, 0.2002324905550712)\n",
-      "(1, 0.0)\n"
+      "(0.49415432579890883, 0.48976438779451525)\n",
+      "(0.49415432579890883, 0.48976438779451525)\n",
+      "(1.0, 0.019204655674102813)\n",
+      "(1.0, 0.032201745877788554)\n"
      ]
     }
    ],
@@ -1162,4 +793,4 @@
  },
  "nbformat": 4,
  "nbformat_minor": 2
-}
+}
\ No newline at end of file
diff --git a/notebooks/membership_inference_anonymization_adult.ipynb b/notebooks/membership_inference_anonymization_adult.ipynb
index c2c7e74..4a0ea00 100644
--- a/notebooks/membership_inference_anonymization_adult.ipynb
+++ b/notebooks/membership_inference_anonymization_adult.ipynb
@@ -29,7 +29,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 97,
+   "execution_count": 6,
    "metadata": {},
    "outputs": [
     {
@@ -44,6 +44,18 @@
       " [  26.   11.    0.    0.   48.]\n",
       " [  27.    9.    0.    0.   40.]]\n"
      ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/var/folders/9b/qbtw28w53355cvpjs4qn83yc0000gn/T/ipykernel_85828/3975777015.py:22: DeprecationWarning: `np.int` is a deprecated alias for the builtin `int`. To silence this warning, use `int` by itself. Doing this will not modify any behavior and is safe. When replacing `np.int`, you may wish to use e.g. `np.int64` or `np.int32` to specify the precision. If you wish to review your current use, check the release note link for additional information.\n",
+      "Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations\n",
+      "  y_train = y_train.astype(np.int)\n",
+      "/var/folders/9b/qbtw28w53355cvpjs4qn83yc0000gn/T/ipykernel_85828/3975777015.py:26: DeprecationWarning: `np.int` is a deprecated alias for the builtin `int`. To silence this warning, use `int` by itself. Doing this will not modify any behavior and is safe. When replacing `np.int`, you may wish to use e.g. `np.int64` or `np.int32` to specify the precision. If you wish to review your current use, check the release note link for additional information.\n",
+      "Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations\n",
+      "  y_test = y_test.astype(np.int)\n"
+     ]
     }
    ],
    "source": [
@@ -90,14 +102,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 116,
+   "execution_count": 7,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Base model accuracy:  0.8075056814691972\n"
+      "Base model accuracy:  0.8074442601805786\n"
      ]
     }
    ],
@@ -126,9 +138,18 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 124,
+   "execution_count": 8,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/Users/olasaadi/PycharmProjects/ai-privacy-toolkit-internal/venv/lib/python3.8/site-packages/art/attacks/inference/membership_inference/black_box.py:262: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n",
+      "  self.attack_model.fit(np.c_[x_1, x_2], y_ready)  # type: ignore\n"
+     ]
+    }
+   ],
    "source": [
     "from art.attacks.inference.membership_inference import MembershipInferenceBlackBox\n",
     "\n",
@@ -154,14 +175,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 125,
+   "execution_count": 9,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "0.5440363591696352\n"
+      "0.545264709495148\n"
      ]
     }
    ],
@@ -197,7 +218,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 128,
+   "execution_count": 10,
    "metadata": {},
    "outputs": [
     {
@@ -215,6 +236,7 @@
     }
    ],
    "source": [
+    "from apt.utils.datasets import ArrayDataset\n",
     "import os\n",
     "import sys\n",
     "sys.path.insert(0, os.path.abspath('..'))\n",
@@ -223,22 +245,20 @@
     "# QI = (age, education-num, capital-gain, hours-per-week)\n",
     "QI = [0, 1, 2, 4]\n",
     "anonymizer = Anonymize(100, QI)\n",
-    "anon = anonymizer.anonymize(x_train, x_train_predictions)\n",
+    "anon = anonymizer.anonymize(ArrayDataset(x_train, x_train_predictions))\n",
     "print(anon)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 104,
+   "execution_count": 11,
    "metadata": {},
    "outputs": [
     {
      "data": {
-      "text/plain": [
-       "6739"
-      ]
+      "text/plain": "6739"
      },
-     "execution_count": 104,
+     "execution_count": 11,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -250,16 +270,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 129,
+   "execution_count": 12,
    "metadata": {},
    "outputs": [
     {
      "data": {
-      "text/plain": [
-       "658"
-      ]
+      "text/plain": "658"
      },
-     "execution_count": 129,
+     "execution_count": 12,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -278,14 +296,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 130,
+   "execution_count": 13,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Anonymized model accuracy:  0.8304158221239482\n"
+      "Anonymized model accuracy:  0.83078434985566\n"
      ]
     }
    ],
@@ -308,14 +326,22 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 131,
+   "execution_count": 14,
    "metadata": {},
    "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/Users/olasaadi/PycharmProjects/ai-privacy-toolkit-internal/venv/lib/python3.8/site-packages/art/attacks/inference/membership_inference/black_box.py:262: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n",
+      "  self.attack_model.fit(np.c_[x_1, x_2], y_ready)  # type: ignore\n"
+     ]
+    },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "0.5034393809114359\n"
+      "0.5047291487532244\n"
      ]
     }
    ],
@@ -345,15 +371,15 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 132,
+   "execution_count": 15,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "(0.5298924372550654, 0.7806166318634075)\n",
-      "(0.5030507735890172, 0.5671293452892765)\n"
+      "(0.5312420517168291, 0.7696843139663432)\n",
+      "(0.5048372911169745, 0.4935511607910576)\n"
      ]
     }
    ],
@@ -419,4 +445,4 @@
  },
  "nbformat": 4,
  "nbformat_minor": 2
-}
+}
\ No newline at end of file
diff --git a/notebooks/membership_inference_dp_diabetes_reg.ipynb b/notebooks/membership_inference_dp_diabetes_reg.ipynb
index 1376dc6..92922ab 100644
--- a/notebooks/membership_inference_dp_diabetes_reg.ipynb
+++ b/notebooks/membership_inference_dp_diabetes_reg.ipynb
@@ -29,7 +29,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 121,
+   "execution_count": 1,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -50,7 +50,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 122,
+   "execution_count": 2,
    "metadata": {},
    "outputs": [
     {
@@ -86,14 +86,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 123,
+   "execution_count": 3,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "0.4954954954954955\n"
+      "0.527027027027027\n"
      ]
     }
    ],
@@ -131,7 +131,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 124,
+   "execution_count": 4,
    "metadata": {},
    "outputs": [
     {
@@ -141,6 +141,22 @@
       "unique rows in original data:  221\n"
      ]
     },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/Users/olasaadi/PycharmProjects/ai-privacy-toolkit-internal/venv/lib/python3.8/site-packages/art/attacks/inference/membership_inference/black_box.py:262: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n",
+      "  self.attack_model.fit(np.c_[x_1, x_2], y_ready)  # type: ignore\n",
+      "/Users/olasaadi/PycharmProjects/ai-privacy-toolkit-internal/venv/lib/python3.8/site-packages/art/attacks/inference/membership_inference/black_box.py:262: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n",
+      "  self.attack_model.fit(np.c_[x_1, x_2], y_ready)  # type: ignore\n",
+      "/Users/olasaadi/PycharmProjects/ai-privacy-toolkit-internal/venv/lib/python3.8/site-packages/art/attacks/inference/membership_inference/black_box.py:262: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n",
+      "  self.attack_model.fit(np.c_[x_1, x_2], y_ready)  # type: ignore\n",
+      "/Users/olasaadi/PycharmProjects/ai-privacy-toolkit-internal/venv/lib/python3.8/site-packages/art/attacks/inference/membership_inference/black_box.py:262: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n",
+      "  self.attack_model.fit(np.c_[x_1, x_2], y_ready)  # type: ignore\n",
+      "/Users/olasaadi/PycharmProjects/ai-privacy-toolkit-internal/venv/lib/python3.8/site-packages/art/attacks/inference/membership_inference/black_box.py:262: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n",
+      "  self.attack_model.fit(np.c_[x_1, x_2], y_ready)  # type: ignore\n"
+     ]
+    },
     {
      "name": "stdout",
      "output_type": "stream",
@@ -148,11 +164,12 @@
       "k values:  [5, 10, 20, 50, 75]\n",
       "unique rows: [34, 19, 8, 4, 2]\n",
       "model accuracy: [0.43165832354998956, 0.4509641063206041, -1.730181929385853, -5.577098823982753e+27, -1.2751609045828272e+25]\n",
-      "attack accuracy: [0.5, 0.47297297297297297, 0.49549549549549543, 0.5, 0.47297297297297297]\n"
+      "attack accuracy: [0.509009009009009, 0.481981981981982, 0.509009009009009, 0.5045045045045045, 0.4954954954954955]\n"
      ]
     }
    ],
    "source": [
+    "from apt.utils.datasets import ArrayDataset\n",
     "from apt.anonymization import Anonymize\n",
     "k_values=[5, 10, 20, 50, 75]\n",
     "model_accuracy = []\n",
@@ -165,7 +182,7 @@
     "\n",
     "for k in k_values:\n",
     "    anonymizer = Anonymize(k, QI, is_regression=True)\n",
-    "    anon = anonymizer.anonymize(X_train, x_train_predictions)\n",
+    "    anon = anonymizer.anonymize(ArrayDataset(X_train, x_train_predictions))\n",
     "    unique_values.append(len(np.unique(anon, axis=0)))\n",
     "    \n",
     "    anon_model = LinearRegression()\n",
@@ -198,7 +215,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 124,
+   "execution_count": 4,
    "metadata": {},
    "outputs": [],
    "source": []
diff --git a/notebooks/minimization_adult.ipynb b/notebooks/minimization_adult.ipynb
index 17610a3..e8ccc20 100644
--- a/notebooks/minimization_adult.ipynb
+++ b/notebooks/minimization_adult.ipynb
@@ -27,7 +27,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 15,
    "metadata": {},
    "outputs": [
     {
@@ -42,6 +42,18 @@
       " [2.2000e+01 9.0000e+00 0.0000e+00 0.0000e+00 2.0000e+01]\n",
       " [5.2000e+01 9.0000e+00 1.5024e+04 0.0000e+00 4.0000e+01]]\n"
      ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/var/folders/9b/qbtw28w53355cvpjs4qn83yc0000gn/T/ipykernel_13726/1357868359.py:22: DeprecationWarning: `np.int` is a deprecated alias for the builtin `int`. To silence this warning, use `int` by itself. Doing this will not modify any behavior and is safe. When replacing `np.int`, you may wish to use e.g. `np.int64` or `np.int32` to specify the precision. If you wish to review your current use, check the release note link for additional information.\n",
+      "Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations\n",
+      "  y_train = y_train.astype(np.int)\n",
+      "/var/folders/9b/qbtw28w53355cvpjs4qn83yc0000gn/T/ipykernel_13726/1357868359.py:26: DeprecationWarning: `np.int` is a deprecated alias for the builtin `int`. To silence this warning, use `int` by itself. Doing this will not modify any behavior and is safe. When replacing `np.int`, you may wish to use e.g. `np.int64` or `np.int32` to specify the precision. If you wish to review your current use, check the release note link for additional information.\n",
+      "Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations\n",
+      "  y_test = y_test.astype(np.int)\n"
+     ]
     }
    ],
    "source": [
@@ -84,24 +96,27 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 16,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Base model accuracy:  0.8189914624408821\n"
+      "Base model accuracy:  0.8183158282660771\n"
      ]
     }
    ],
    "source": [
+    "from apt.utils.datasets import ArrayDataset\n",
+    "from apt.utils.models import SklearnClassifier, ModelOutputType\n",
     "from sklearn.tree import DecisionTreeClassifier\n",
     "\n",
-    "model = DecisionTreeClassifier()\n",
-    "model.fit(x_train, y_train)\n",
+    "base_est = DecisionTreeClassifier()\n",
+    "model = SklearnClassifier(base_est, ModelOutputType.CLASSIFIER_VECTOR)\n",
+    "model.fit(ArrayDataset(x_train, y_train))\n",
     "\n",
-    "print('Base model accuracy: ', model.score(x_test, y_test))"
+    "print('Base model accuracy: ', model.score(ArrayDataset(x_test, y_test)))"
    ]
   },
   {
@@ -114,26 +129,26 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 17,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Initial accuracy of model on generalized data, relative to original model predictions (base generalization derived from tree, before improvements): 0.929376\n",
+      "Initial accuracy of model on generalized data, relative to original model predictions (base generalization derived from tree, before improvements): 0.936540\n",
       "Improving accuracy\n",
-      "feature to remove: 0\n",
-      "Removed feature: 0, new relative accuracy: 0.939867\n",
-      "feature to remove: 4\n",
-      "Removed feature: 4, new relative accuracy: 0.967247\n",
       "feature to remove: 2\n",
-      "Removed feature: 2, new relative accuracy: 0.972620\n",
+      "Removed feature: 2, new relative accuracy: 0.935261\n",
+      "feature to remove: 4\n",
+      "Removed feature: 4, new relative accuracy: 0.946776\n",
+      "feature to remove: 0\n",
+      "Removed feature: 0, new relative accuracy: 0.972876\n",
       "feature to remove: 1\n",
-      "Removed feature: 1, new relative accuracy: 0.992323\n",
+      "Removed feature: 1, new relative accuracy: 0.992835\n",
       "feature to remove: 3\n",
       "Removed feature: 3, new relative accuracy: 1.000000\n",
-      "Accuracy on minimized data:  0.8237371411024106\n"
+      "Accuracy on minimized data:  0.8231229847996315\n"
      ]
     }
    ],
@@ -155,10 +170,12 @@
     "X_generalizer_train, x_test, y_generalizer_train, y_test = train_test_split(x_test, y_test, stratify=y_test,\n",
     "                                                                test_size = 0.4, random_state = 38)\n",
     "x_train_predictions = model.predict(X_generalizer_train)\n",
-    "minimizer.fit(X_generalizer_train, x_train_predictions)\n",
-    "transformed = minimizer.transform(x_test)\n",
+    "if x_train_predictions.shape[1] > 1:\n",
+    "    x_train_predictions = np.argmax(x_train_predictions, axis=1)\n",
+    "minimizer.fit(dataset=ArrayDataset(X_generalizer_train, x_train_predictions))\n",
+    "transformed = minimizer.transform(dataset=ArrayDataset(x_test))\n",
     "\n",
-    "print('Accuracy on minimized data: ', model.score(transformed, y_test))"
+    "print('Accuracy on minimized data: ', model.score(ArrayDataset(transformed, y_test)))"
    ]
   },
   {
@@ -170,14 +187,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 18,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "{'ranges': {}, 'untouched': [0, 1, 2, 3, 4]}\n"
+      "{'ranges': {}, 'categories': {}, 'untouched': ['4', '1', '3', '0', '2']}\n"
      ]
     }
    ],
@@ -197,25 +214,25 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 19,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Initial accuracy of model on generalized data, relative to original model predictions (base generalization derived from tree, before improvements): 0.929376\n",
+      "Initial accuracy of model on generalized data, relative to original model predictions (base generalization derived from tree, before improvements): 0.936540\n",
       "Improving accuracy\n",
-      "feature to remove: 0\n",
-      "Removed feature: 0, new relative accuracy: 0.939867\n",
-      "feature to remove: 4\n",
-      "Removed feature: 4, new relative accuracy: 0.967247\n",
       "feature to remove: 2\n",
-      "Removed feature: 2, new relative accuracy: 0.972620\n",
+      "Removed feature: 2, new relative accuracy: 0.935261\n",
+      "feature to remove: 4\n",
+      "Removed feature: 4, new relative accuracy: 0.946776\n",
+      "feature to remove: 0\n",
+      "Removed feature: 0, new relative accuracy: 0.972876\n",
       "feature to remove: 1\n",
-      "Removed feature: 1, new relative accuracy: 0.992323\n",
-      "Accuracy on minimized data:  0.820205742361431\n",
-      "{'ranges': {3: [546.0, 704.0, 705.5, 742.5, 782.0, 834.0, 870.0, 1446.5, 1538.5, 1612.5, 1699.0, 1744.0, 1801.0, 1814.0, 1846.0, 1881.5, 1978.5, 2248.0, 2298.5, 2537.5]}, 'untouched': [0, 1, 2, 4]}\n"
+      "Removed feature: 1, new relative accuracy: 0.992835\n",
+      "Accuracy on minimized data:  0.8192845079072624\n",
+      "{'ranges': {'3': [569.0, 782.0, 870.0, 870.5, 938.0, 1016.5, 1311.5, 1457.0, 1494.5, 1596.0, 1629.5, 1684.0, 1805.0, 1859.0, 1867.5, 1881.5, 1938.0, 1978.5, 2119.0, 2210.0, 2218.0, 2244.5, 2298.5, 2443.5]}, 'categories': {}, 'untouched': ['2', '1', '0', '4']}\n"
      ]
     }
    ],
@@ -223,9 +240,9 @@
     "# We allow a 1% deviation in accuracy from the original model accuracy\n",
     "minimizer2 = GeneralizeToRepresentative(model, target_accuracy=0.99)\n",
     "\n",
-    "minimizer2.fit(X_generalizer_train, x_train_predictions)\n",
-    "transformed2 = minimizer2.transform(x_test)\n",
-    "print('Accuracy on minimized data: ', model.score(transformed2, y_test))\n",
+    "minimizer2.fit(dataset=ArrayDataset(X_generalizer_train, x_train_predictions))\n",
+    "transformed2 = minimizer2.transform(dataset=ArrayDataset(x_test))\n",
+    "print('Accuracy on minimized data: ', model.score(test_data=ArrayDataset(transformed2, y_test)))\n",
     "generalizations2 = minimizer2.generalizations\n",
     "print(generalizations2)"
    ]
@@ -259,4 +276,4 @@
  },
  "nbformat": 4,
  "nbformat_minor": 2
-}
+}
\ No newline at end of file
diff --git a/requirements.txt b/requirements.txt
index fa4131d..ec37771 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -2,6 +2,7 @@ numpy==1.21.0
 pandas==1.1.0
 scipy==1.4.1
 scikit-learn==0.22.2
+adversarial-robustness-toolkit>=1.9.1
 
 # testing
 pytest==5.4.2
diff --git a/tests/test_anonymizer.py b/tests/test_anonymizer.py
index 000eefa..358398c 100644
--- a/tests/test_anonymizer.py
+++ b/tests/test_anonymizer.py
@@ -7,13 +7,15 @@ from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
 from sklearn.preprocessing import OneHotEncoder
 
 from apt.anonymization import Anonymize
-from apt.utils import get_iris_dataset, get_adult_dataset, get_nursery_dataset
+from apt.utils.dataset_utils import get_iris_dataset, get_adult_dataset, get_nursery_dataset
 from sklearn.datasets import load_diabetes
 from sklearn.model_selection import train_test_split
+from apt.utils.datasets import ArrayDataset, DATA_PANDAS_NUMPY_TYPE
 
 
 def test_anonymize_ndarray_iris():
     (x_train, y_train), _ = get_iris_dataset()
+
     model = DecisionTreeClassifier()
     model.fit(x_train, y_train)
     pred = model.predict(x_train)
@@ -21,7 +23,7 @@ def test_anonymize_ndarray_iris():
     k = 10
     QI = [0, 2]
     anonymizer = Anonymize(k, QI, train_only_QI=True)
-    anon = anonymizer.anonymize(x_train, pred)
+    anon = anonymizer.anonymize(ArrayDataset(x_train, pred))
     assert(len(np.unique(anon[:, QI], axis=0)) < len(np.unique(x_train[:, QI], axis=0)))
     _, counts_elements = np.unique(anon[:, QI], return_counts=True)
     assert (np.min(counts_elements) >= k)
@@ -30,10 +32,14 @@ def test_anonymize_ndarray_iris():
 
 def test_anonymize_pandas_adult():
     (x_train, y_train), _ = get_adult_dataset()
+    encoded = OneHotEncoder().fit_transform(x_train)
+    model = DecisionTreeClassifier()
+    model.fit(encoded, y_train)
+    pred = model.predict(encoded)
 
     k = 100
-    features = ['age', 'workclass', 'education-num', 'marital-status', 'occupation',
-                'relationship', 'race', 'sex', 'capital-gain', 'capital-loss', 'hours-per-week', 'native-country']
+    features = ['age', 'workclass', 'education-num', 'marital-status', 'occupation', 'relationship', 'race', 'sex',
+                'capital-gain', 'capital-loss', 'hours-per-week', 'native-country']
     QI = ['age', 'workclass', 'education-num', 'marital-status', 'occupation', 'relationship', 'race', 'sex',
           'native-country']
     categorical_features = ['workclass', 'marital-status', 'occupation', 'relationship', 'race', 'sex',
@@ -56,12 +62,11 @@ def test_anonymize_pandas_adult():
     pred = model.predict(encoded)
 
     anonymizer = Anonymize(k, QI, categorical_features=categorical_features)
-    anon = anonymizer.anonymize(x_train, pred)
+    anon = anonymizer.anonymize(ArrayDataset(x_train, pred, features))
 
     assert(anon.loc[:, QI].drop_duplicates().shape[0] < x_train.loc[:, QI].drop_duplicates().shape[0])
     assert (anon.loc[:, QI].value_counts().min() >= k)
-    assert (anon.drop(QI, axis=1).equals(x_train.drop(QI, axis=1)))
-
+    np.testing.assert_array_equal(anon.drop(QI, axis=1), x_train.drop(QI, axis=1))
 
 def test_anonymize_pandas_nursery():
     (x_train, y_train), _ = get_nursery_dataset()
@@ -89,11 +94,11 @@ def test_anonymize_pandas_nursery():
     pred = model.predict(encoded)
 
     anonymizer = Anonymize(k, QI, categorical_features=categorical_features, train_only_QI=True)
-    anon = anonymizer.anonymize(x_train, pred)
+    anon = anonymizer.anonymize(ArrayDataset(x_train, pred))
 
     assert(anon.loc[:, QI].drop_duplicates().shape[0] < x_train.loc[:, QI].drop_duplicates().shape[0])
     assert (anon.loc[:, QI].value_counts().min() >= k)
-    assert (anon.drop(QI, axis=1).equals(x_train.drop(QI, axis=1)))
+    np.testing.assert_array_equal(anon.drop(QI, axis=1), x_train.drop(QI, axis=1))
 
 
 def test_regression():
@@ -107,7 +112,7 @@ def test_regression():
     k = 10
     QI = [0, 2, 5, 8]
     anonymizer = Anonymize(k, QI, is_regression=True, train_only_QI=True)
-    anon = anonymizer.anonymize(x_train, pred)
+    anon = anonymizer.anonymize(ArrayDataset(x_train, pred))
     print('Base model accuracy (R2 score): ', model.score(x_test, y_test))
     model.fit(anon, y_train)
     print('Base model accuracy (R2 score) after anonymization: ', model.score(x_test, y_test))
@@ -127,7 +132,7 @@ def test_errors():
     anonymizer = Anonymize(10, [0, 2])
     (x_train, y_train), (x_test, y_test) = get_iris_dataset()
     with pytest.raises(ValueError):
-        anonymizer.anonymize(x_train, y_test)
+        anonymizer.anonymize(dataset=ArrayDataset(x_train, y_test))
     (x_train, y_train), _ = get_adult_dataset()
     with pytest.raises(ValueError):
-        anonymizer.anonymize(x_train, y_train)
+        anonymizer.anonymize(dataset=ArrayDataset(x_train, y_test))
diff --git a/tests/test_minimizer.py b/tests/test_minimizer.py
index e6f50be..630cd49 100644
--- a/tests/test_minimizer.py
+++ b/tests/test_minimizer.py
@@ -5,14 +5,15 @@ from sklearn.compose import ColumnTransformer
 
 from sklearn.datasets import load_boston, load_diabetes
 from sklearn.impute import SimpleImputer
-from sklearn.linear_model import LogisticRegression
 from sklearn.model_selection import train_test_split
 from sklearn.pipeline import Pipeline
-from sklearn.preprocessing import OneHotEncoder, StandardScaler
+from sklearn.preprocessing import OneHotEncoder
 
 from apt.minimization import GeneralizeToRepresentative
 from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
-from apt.utils import get_iris_dataset, get_adult_dataset, get_nursery_dataset, get_german_credit_dataset
+from apt.utils.dataset_utils import get_iris_dataset, get_adult_dataset, get_nursery_dataset, get_german_credit_dataset
+from apt.utils.datasets import ArrayDataset
+from apt.utils.models import SklearnClassifier, ModelOutputType, SklearnRegressor
 
 
 @pytest.fixture
@@ -38,11 +39,12 @@ def test_minimizer_params(data):
     y = [1, 1, 0]
     base_est = DecisionTreeClassifier(random_state=0, min_samples_split=2,
                                       min_samples_leaf=1)
-    base_est.fit(X, y)
+    model = SklearnClassifier(base_est, ModelOutputType.CLASSIFIER_VECTOR)
+    model.fit(ArrayDataset(X, y))
 
-    gen = GeneralizeToRepresentative(base_est, features=features, cells=cells)
+    gen = GeneralizeToRepresentative(model, cells=cells)
     gen.fit()
-    transformed = gen.transform(X)
+    transformed = gen.transform(dataset=ArrayDataset(X, features_names=features))
 
 
 def test_minimizer_fit(data):
@@ -58,15 +60,20 @@ def test_minimizer_fit(data):
                   [69, 175],
                   [24, 181],
                   [18, 190]])
-    y = [1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0]
+    y = np.array([1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0])
     base_est = DecisionTreeClassifier(random_state=0, min_samples_split=2,
                                       min_samples_leaf=1)
-    base_est.fit(X, y)
-    predictions = base_est.predict(X)
+    model = SklearnClassifier(base_est, ModelOutputType.CLASSIFIER_VECTOR)
+    model.fit(ArrayDataset(X, y))
+    predictions = model.predict(X)
+    if predictions.shape[1] > 1:
+        predictions = np.argmax(predictions, axis=1)
 
-    gen = GeneralizeToRepresentative(base_est, features=features, target_accuracy=0.5)
-    gen.fit(X, predictions)
-    transformed = gen.transform(X)
+    gen = GeneralizeToRepresentative(model, target_accuracy=0.5)
+    train_dataset = ArrayDataset(X, predictions, features_names=features)
+
+    gen.fit(dataset=train_dataset)
+    transformed = gen.transform(dataset=ArrayDataset(X))
     gener = gen.generalizations_
     expexted_generalizations = {'ranges': {}, 'categories': {}, 'untouched': ['height', 'age']}
 
@@ -103,7 +110,7 @@ def test_minimizer_fit_pandas(data):
          [69, 175, 'm', 'aa'],
          [24, 181, 'm', 'bb'],
          [18, 190, 'm', 'bb']]
-    y = [1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0]
+    y = np.array([1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0])
     X = pd.DataFrame(X, columns=features)
 
     numeric_features = ["age", "height"]
@@ -121,16 +128,22 @@ def test_minimizer_fit_pandas(data):
         ]
     )
     encoded = preprocessor.fit_transform(X)
+    encoded = pd.DataFrame(encoded)
     base_est = DecisionTreeClassifier(random_state=0, min_samples_split=2,
                                       min_samples_leaf=1)
-    base_est.fit(encoded, y)
-    predictions = base_est.predict(encoded)
+    model = SklearnClassifier(base_est, ModelOutputType.CLASSIFIER_VECTOR)
+    model.fit(ArrayDataset(encoded, y))
+    predictions = model.predict(encoded)
+    if predictions.shape[1] > 1:
+        predictions = np.argmax(predictions, axis=1)
+
     # Append classifier to preprocessing pipeline.
     # Now we have a full prediction pipeline.
-    gen = GeneralizeToRepresentative(base_est, features=features, target_accuracy=0.5,
+    gen = GeneralizeToRepresentative(model, target_accuracy=0.5,
                                      categorical_features=categorical_features)
-    gen.fit(X, predictions)
-    transformed = gen.transform(X)
+    train_dataset = ArrayDataset(X, predictions)
+    gen.fit(dataset=train_dataset)
+    transformed = gen.transform(dataset=ArrayDataset(X))
     gener = gen.generalizations_
     expexted_generalizations = {'ranges': {'age': []}, 'categories': {}, 'untouched': ['ola', 'height', 'sex']}
 
@@ -143,7 +156,7 @@ def test_minimizer_fit_pandas(data):
     modified_features = [f for f in features if
                          f in expexted_generalizations['categories'].keys() or f in expexted_generalizations[
                              'ranges'].keys()]
-    assert (transformed.drop(modified_features, axis=1).equals(X.drop(modified_features, axis=1)))
+    np.testing.assert_array_equal(transformed.drop(modified_features, axis=1), X.drop(modified_features, axis=1))
     ncp = gen.ncp_
     if len(expexted_generalizations['ranges'].keys()) > 0 or len(expexted_generalizations['categories'].keys()) > 0:
         assert (ncp > 0)
@@ -179,7 +192,7 @@ def test_minimizer_params_categorical(data):
          [24, 181, 'm'],
          [18, 190, 'm']]
 
-    y = [1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0]
+    y = np.array([1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0])
     X = pd.DataFrame(X, columns=features)
     numeric_features = ["age", "height"]
     numeric_transformer = Pipeline(
@@ -196,16 +209,21 @@ def test_minimizer_params_categorical(data):
         ]
     )
     encoded = preprocessor.fit_transform(X)
+    encoded = pd.DataFrame(encoded)
     base_est = DecisionTreeClassifier(random_state=0, min_samples_split=2,
                                       min_samples_leaf=1)
-    base_est.fit(encoded, y)
-    predictions = base_est.predict(encoded)
+    model = SklearnClassifier(base_est, ModelOutputType.CLASSIFIER_VECTOR)
+    model.fit(ArrayDataset(encoded, y))
+    predictions = model.predict(encoded)
+    if predictions.shape[1] > 1:
+        predictions = np.argmax(predictions, axis=1)
     # Append classifier to preprocessing pipeline.
     # Now we have a full prediction pipeline.
-    gen = GeneralizeToRepresentative(base_est, features=features, target_accuracy=0.5,
+    gen = GeneralizeToRepresentative(model, target_accuracy=0.5,
                                      categorical_features=categorical_features, cells=cells)
-    gen.fit(X, predictions)
-    transformed = gen.transform(X)
+    train_dataset = ArrayDataset(X, predictions)
+    gen.fit(dataset=train_dataset)
+    transformed = gen.transform(dataset=ArrayDataset(X))
 
 
 def test_minimizer_fit_QI(data):
@@ -222,16 +240,20 @@ def test_minimizer_fit_QI(data):
                   [24, 181, 95],
                   [18, 190, 102]])
     print(X)
-    y = [1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0]
-    QI = [0, 2]
+    y = np.array([1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0])
+    QI = ['age', 'weight']
     base_est = DecisionTreeClassifier(random_state=0, min_samples_split=2,
                                       min_samples_leaf=1)
-    base_est.fit(X, y)
-    predictions = base_est.predict(X)
+    model = SklearnClassifier(base_est, ModelOutputType.CLASSIFIER_VECTOR)
+    model.fit(ArrayDataset(X, y))
+    predictions = model.predict(X)
+    if predictions.shape[1] > 1:
+        predictions = np.argmax(predictions, axis=1)
 
-    gen = GeneralizeToRepresentative(base_est, features=features, target_accuracy=0.5, features_to_minimize=QI)
-    gen.fit(X, predictions)
-    transformed = gen.transform(X)
+    gen = GeneralizeToRepresentative(model, target_accuracy=0.5, features_to_minimize=QI)
+    train_dataset = ArrayDataset(X, predictions, features_names=features)
+    gen.fit(dataset=train_dataset)
+    transformed = gen.transform(dataset=ArrayDataset(X))
     gener = gen.generalizations_
     expexted_generalizations = {'ranges': {'age': [], 'weight': [67.5]}, 'categories': {}, 'untouched': ['height']}
     for key in expexted_generalizations['ranges']:
@@ -240,7 +262,7 @@ def test_minimizer_fit_QI(data):
         assert (set([frozenset(sl) for sl in expexted_generalizations['categories'][key]]) ==
                 set([frozenset(sl) for sl in gener['categories'][key]]))
     assert (set(expexted_generalizations['untouched']) == set(gener['untouched']))
-    assert ((np.delete(transformed, QI, axis=1) == np.delete(X, QI, axis=1)).all())
+    assert ((np.delete(transformed, [0, 2], axis=1) == np.delete(X, [0, 2], axis=1)).all())
     modified_features = [f for f in features if
                          f in expexted_generalizations['categories'].keys() or f in expexted_generalizations[
                              'ranges'].keys()]
@@ -269,7 +291,7 @@ def test_minimizer_fit_pandas_QI(data):
          [24, 181, 49, 'm', 'bb'],
          [18, 190, 69, 'm', 'bb']]
 
-    y = [1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0]
+    y = pd.Series([1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0])
     X = pd.DataFrame(X, columns=features)
     QI = ['age', 'weight', 'ola']
 
@@ -288,16 +310,22 @@ def test_minimizer_fit_pandas_QI(data):
         ]
     )
     encoded = preprocessor.fit_transform(X)
+    encoded = pd.DataFrame(encoded)
     base_est = DecisionTreeClassifier(random_state=0, min_samples_split=2,
                                       min_samples_leaf=1)
-    base_est.fit(encoded, y)
-    predictions = base_est.predict(encoded)
+    model = SklearnClassifier(base_est, ModelOutputType.CLASSIFIER_VECTOR)
+    model.fit(ArrayDataset(encoded, y))
+    predictions = model.predict(encoded)
+    if predictions.shape[1] > 1:
+        predictions = np.argmax(predictions, axis=1)
+
     # Append classifier to preprocessing pipeline.
     # Now we have a full prediction pipeline.
-    gen = GeneralizeToRepresentative(base_est, features=features, target_accuracy=0.5,
+    gen = GeneralizeToRepresentative(model, target_accuracy=0.5,
                                      categorical_features=categorical_features, features_to_minimize=QI)
-    gen.fit(X, predictions)
-    transformed = gen.transform(X)
+    train_dataset = ArrayDataset(X, predictions)
+    gen.fit(dataset=train_dataset)
+    transformed = gen.transform(dataset=ArrayDataset(X))
     gener = gen.generalizations_
     expexted_generalizations = {'ranges': {'age': [], 'weight': [47.0]}, 'categories': {'ola': [['bb', 'aa']]},
                                 'untouched': ['height', 'sex']}
@@ -308,12 +336,13 @@ def test_minimizer_fit_pandas_QI(data):
         assert (set([frozenset(sl) for sl in expexted_generalizations['categories'][key]]) ==
                 set([frozenset(sl) for sl in gener['categories'][key]]))
     assert (set(expexted_generalizations['untouched']) == set(gener['untouched']))
-    assert (transformed.drop(QI, axis=1).equals(X.drop(QI, axis=1)))
-
+    # assert (transformed.drop(QI, axis=1).equals(X.drop(QI, axis=1)))
+    np.testing.assert_array_equal(transformed.drop(QI, axis=1), X.drop(QI, axis=1))
     modified_features = [f for f in features if
                          f in expexted_generalizations['categories'].keys() or f in expexted_generalizations[
                              'ranges'].keys()]
-    assert (transformed.drop(modified_features, axis=1).equals(X.drop(modified_features, axis=1)))
+    # assert (transformed.drop(modified_features, axis=1).equals(X.drop(modified_features, axis=1)))
+    np.testing.assert_array_equal(transformed.drop(modified_features, axis=1), X.drop(modified_features, axis=1))
     ncp = gen.ncp_
     if len(expexted_generalizations['ranges'].keys()) > 0 or len(expexted_generalizations['categories'].keys()) > 0:
         assert (ncp > 0)
@@ -322,16 +351,19 @@ def test_minimizer_fit_pandas_QI(data):
 
 def test_minimize_ndarray_iris():
     features = ['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)', 'petal width (cm)']
-    (x_train, y_train), _ = get_iris_dataset()
-    QI = [0, 2]
-    model = DecisionTreeClassifier(random_state=0, min_samples_split=2,
-                                   min_samples_leaf=1)
-    model.fit(x_train, y_train)
-    pred = model.predict(x_train)
+    (x_train, y_train), (x_test, y_test) = get_iris_dataset()
+    QI = ['sepal length (cm)', 'petal length (cm)']
+    base_est = DecisionTreeClassifier(random_state=0, min_samples_split=2,
+                                      min_samples_leaf=1)
+    model = SklearnClassifier(base_est, ModelOutputType.CLASSIFIER_VECTOR)
+    model.fit(ArrayDataset(x_train, y_train))
+    predictions = model.predict(x_train)
+    if predictions.shape[1] > 1:
+        predictions = np.argmax(predictions, axis=1)
 
-    gen = GeneralizeToRepresentative(model, target_accuracy=0.3, features=features, features_to_minimize=QI)
-    gen.fit(x_train, pred)
-    transformed = gen.transform(x_train)
+    gen = GeneralizeToRepresentative(model, target_accuracy=0.3, features_to_minimize=QI)
+    # gen.fit(dataset=ArrayDataset(x_train, predictions))
+    transformed = gen.fit_transform(dataset=ArrayDataset(x_train, predictions, features_names=features))
     gener = gen.generalizations_
     expexted_generalizations = {'ranges': {'sepal length (cm)': [], 'petal length (cm)': [2.449999988079071]},
                                 'categories': {}, 'untouched': ['petal width (cm)', 'sepal width (cm)']}
@@ -342,7 +374,7 @@ def test_minimize_ndarray_iris():
         assert (set([frozenset(sl) for sl in expexted_generalizations['categories'][key]]) ==
                 set([frozenset(sl) for sl in gener['categories'][key]]))
     assert (set(expexted_generalizations['untouched']) == set(gener['untouched']))
-    assert ((np.delete(transformed, QI, axis=1) == np.delete(x_train, QI, axis=1)).all())
+    assert ((np.delete(transformed, [0, 2], axis=1) == np.delete(x_train, [0, 2], axis=1)).all())
 
     modified_features = [f for f in features if
                          f in expexted_generalizations['categories'].keys() or f in expexted_generalizations[
@@ -359,12 +391,13 @@ def test_minimize_ndarray_iris():
 
 
 def test_minimize_pandas_adult():
-    (x_train, y_train), _ = get_adult_dataset()
+    (x_train, y_train), (x_test, y_test) = get_adult_dataset()
     x_train = x_train.head(1000)
     y_train = y_train.head(1000)
 
     features = ['age', 'workclass', 'education-num', 'marital-status', 'occupation', 'relationship', 'race', 'sex',
                 'capital-gain', 'capital-loss', 'hours-per-week', 'native-country']
+    x_train = pd.DataFrame(x_train, columns=features)
 
     categorical_features = ['workclass', 'marital-status', 'occupation', 'relationship', 'race', 'sex',
                             'hours-per-week', 'native-country']
@@ -384,15 +417,19 @@ def test_minimize_pandas_adult():
         ]
     )
     encoded = preprocessor.fit_transform(x_train)
+    encoded = pd.DataFrame(encoded)
     base_est = DecisionTreeClassifier(random_state=0, min_samples_split=2,
                                       min_samples_leaf=1)
-    base_est.fit(encoded, y_train)
-    predictions = base_est.predict(encoded)
+    model = SklearnClassifier(base_est, ModelOutputType.CLASSIFIER_VECTOR)
+    model.fit(ArrayDataset(encoded, y_train))
+    predictions = model.predict(encoded)
+    if predictions.shape[1] > 1:
+        predictions = np.argmax(predictions, axis=1)
 
-    gen = GeneralizeToRepresentative(base_est, target_accuracy=0.7, features=features,
+    gen = GeneralizeToRepresentative(model, target_accuracy=0.7,
                                      categorical_features=categorical_features, features_to_minimize=QI)
-    gen.fit(x_train, predictions)
-    transformed = gen.transform(x_train)
+    gen.fit(dataset=ArrayDataset(x_train, predictions, features_names=features))
+    transformed = gen.transform(dataset=ArrayDataset(x_train))
     gener = gen.generalizations_
     expexted_generalizations = {'ranges': {'age': [], 'education-num': []}, 'categories': {
         'workclass': [['Self-emp-not-inc', 'Private', 'Federal-gov', 'Self-emp-inc', '?', 'Local-gov', 'State-gov']],
@@ -414,12 +451,14 @@ def test_minimize_pandas_adult():
         assert (set([frozenset(sl) for sl in expexted_generalizations['categories'][key]]) ==
                 set([frozenset(sl) for sl in gener['categories'][key]]))
     assert (set(expexted_generalizations['untouched']) == set(gener['untouched']))
-    assert (transformed.drop(QI, axis=1).equals(x_train.drop(QI, axis=1)))
+    # assert (transformed.drop(QI, axis=1).equals(x_train.drop(QI, axis=1)))
+    np.testing.assert_array_equal(transformed.drop(QI, axis=1), x_train.drop(QI, axis=1))
 
     modified_features = [f for f in features if
                          f in expexted_generalizations['categories'].keys() or f in expexted_generalizations[
                              'ranges'].keys()]
-    assert (transformed.drop(modified_features, axis=1).equals(x_train.drop(modified_features, axis=1)))
+    # assert (transformed.drop(modified_features, axis=1).equals(x_train.drop(modified_features, axis=1)))
+    np.testing.assert_array_equal(transformed.drop(modified_features, axis=1), x_train.drop(modified_features, axis=1))
     ncp = gen.ncp_
     if len(expexted_generalizations['ranges'].keys()) > 0 or len(expexted_generalizations['categories'].keys()) > 0:
         assert (ncp > 0)
@@ -451,15 +490,19 @@ def test_german_credit_pandas():
         ]
     )
     encoded = preprocessor.fit_transform(x_train)
+    encoded = pd.DataFrame(encoded)
     base_est = DecisionTreeClassifier(random_state=0, min_samples_split=2,
                                       min_samples_leaf=1)
-    base_est.fit(encoded, y_train)
-    predictions = base_est.predict(encoded)
+    model = SklearnClassifier(base_est, ModelOutputType.CLASSIFIER_VECTOR)
+    model.fit(ArrayDataset(encoded, y_train))
+    predictions = model.predict(encoded)
+    if predictions.shape[1] > 1:
+        predictions = np.argmax(predictions, axis=1)
 
-    gen = GeneralizeToRepresentative(base_est, target_accuracy=0.7, features=features,
+    gen = GeneralizeToRepresentative(model, target_accuracy=0.7,
                                      categorical_features=categorical_features, features_to_minimize=QI)
-    gen.fit(x_train, predictions)
-    transformed = gen.transform(x_train)
+    gen.fit(dataset=ArrayDataset(x_train, predictions))
+    transformed = gen.transform(dataset=ArrayDataset(x_train))
     gener = gen.generalizations_
     expexted_generalizations = {'ranges': {'Duration_in_month': [31.5]},
                                 'categories': {'Credit_history': [['A30', 'A32', 'A31', 'A34', 'A33']], 'Purpose': [
@@ -481,12 +524,14 @@ def test_german_credit_pandas():
         assert (set([frozenset(sl) for sl in expexted_generalizations['categories'][key]]) ==
                 set([frozenset(sl) for sl in gener['categories'][key]]))
     assert (set(expexted_generalizations['untouched']) == set(gener['untouched']))
-    assert (transformed.drop(QI, axis=1).equals(x_train.drop(QI, axis=1)))
+    # assert (transformed.drop(QI, axis=1).equals(x_train.drop(QI, axis=1)))
+    np.testing.assert_array_equal(transformed.drop(QI, axis=1), x_train.drop(QI, axis=1))
 
     modified_features = [f for f in features if
                          f in expexted_generalizations['categories'].keys() or f in expexted_generalizations[
                              'ranges'].keys()]
-    assert (transformed.drop(modified_features, axis=1).equals(x_train.drop(modified_features, axis=1)))
+    # assert (transformed.drop(modified_features, axis=1).equals(x_train.drop(modified_features, axis=1)))
+    np.testing.assert_array_equal(transformed.drop(modified_features, axis=1), x_train.drop(modified_features, axis=1))
     ncp = gen.ncp_
     if len(expexted_generalizations['ranges'].keys()) > 0 or len(expexted_generalizations['categories'].keys()) > 0:
         assert (ncp > 0)
@@ -497,17 +542,258 @@ def test_regression():
     dataset = load_diabetes()
     x_train, x_test, y_train, y_test = train_test_split(dataset.data, dataset.target, test_size=0.5, random_state=14)
 
-    model = DecisionTreeRegressor(random_state=10, min_samples_split=2)
-    model.fit(x_train, y_train)
-    pred = model.predict(x_train)
-    QI = [0, 2, 5, 8]
+    base_est = DecisionTreeRegressor(random_state=10, min_samples_split=2)
+    model = SklearnRegressor(base_est)
+    model.fit(ArrayDataset(x_train, y_train))
+    predictions = model.predict(x_train)
+    QI = ['age', 'bmi', 's2', 's5']
     features = ['age', 'sex', 'bmi', 'bp',
                 's1', 's2', 's3', 's4', 's5', 's6']
 
-    gen = GeneralizeToRepresentative(model, target_accuracy=0.7, features=features, is_regression=True,
+    gen = GeneralizeToRepresentative(model, target_accuracy=0.7, is_regression=True,
                                      features_to_minimize=QI)
-    gen.fit(x_train, pred)
-    transformed = gen.transform(x_train)
+    gen.fit(dataset=ArrayDataset(x_train, predictions, features_names=features))
+    transformed = gen.transform(dataset=ArrayDataset(x_train, features_names=features))
+    print('Base model accuracy (R2 score): ', model.score(ArrayDataset(x_test, y_test)))
+    model.fit(ArrayDataset(transformed, y_train))
+    print('Base model accuracy (R2 score) after anonymization: ', model.score(ArrayDataset(x_test, y_test)))
+    gener = gen.generalizations_
+    expexted_generalizations = {'ranges': {
+        'age': [-0.07816532626748085, -0.07090024650096893, -0.05637009255588055, -0.05092128552496433,
+                -0.04728874587453902, -0.04547247663140297, -0.04183994047343731, -0.027309784665703773,
+                -0.023677248042076826, -0.020044708624482155, -0.01641217083670199, -0.001882016600575298,
+                0.0017505218856967986, 0.0035667913616634905, 0.007199329789727926, 0.010831868276000023,
+                0.02354575227946043, 0.030810829252004623, 0.03262709779664874, 0.03444336913526058,
+                0.03625963814556599, 0.03807590529322624, 0.03807590715587139, 0.047157252207398415,
+                0.06168740428984165, 0.0635036751627922, 0.06895248219370842, 0.07258502021431923, 0.07621755823493004,
+                0.1034616008400917],
+        'bmi': [-0.07626373693346977, -0.060635464265942574, -0.056863121688365936, -0.05578530766069889,
+                -0.054168591275811195, -0.042312657460570335, -0.0374625027179718, -0.03422906715422869,
+                -0.033690162003040314, -0.03261234890669584, -0.02614547684788704, -0.025067666545510292,
+                -0.022373135201632977, -0.016984074376523495, -0.01375063881278038, -0.007822672137990594,
+                -0.004589236050378531, 0.008344509289599955, 0.015889193629845977, 0.016967005096375942,
+                0.024511689320206642, 0.0272062208969146, 0.030978563241660595, 0.032595280557870865,
+                0.033673093654215336, 0.04391230642795563, 0.04552902653813362, 0.05469042807817459,
+                0.06977979838848114, 0.07301323488354683, 0.09349166229367256],
+        's2': [-0.1044962927699089, -0.08649025857448578, -0.07740895450115204, -0.07114598527550697,
+               -0.06378699466586113, -0.05971606448292732, -0.04437179118394852, -0.0398311372846365,
+               -0.03137612994760275, -0.022138250060379505, -0.018067320343106985, -0.017910746857523918,
+               -0.017910745926201344, -0.01618842873722315, -0.007576846517622471, -0.007263698382303119,
+               -0.0010007291566580534, 0.0010347360512241721, 0.006514834007248282, 0.00933317095041275,
+               0.012464655097573996, 0.019197346206055954, 0.020919663831591606, 0.02217225730419159,
+               0.032036433927714825, 0.036420512944459915, 0.04080459102988243, 0.04127431474626064,
+               0.04268348217010498, 0.04424922354519367, 0.04424922540783882, 0.056462014093995094, 0.05928034894168377,
+               0.061315815430134535, 0.06272498145699501, 0.06460387445986271]}, 'categories': {},
+        'untouched': ['s5', 's3', 'bp', 's1', 'sex', 's6', 's4']}
+
+    for key in expexted_generalizations['ranges']:
+        assert (set(expexted_generalizations['ranges'][key]) == set(gener['ranges'][key]))
+    for key in expexted_generalizations['categories']:
+        assert (set([frozenset(sl) for sl in expexted_generalizations['categories'][key]]) ==
+                set([frozenset(sl) for sl in gener['categories'][key]]))
+    assert (set(expexted_generalizations['untouched']) == set(gener['untouched']))
+    assert ((np.delete(transformed, [0, 2, 5, 8], axis=1) == np.delete(x_train, [0, 2, 5, 8], axis=1)).all())
+
+    modified_features = [f for f in features if
+                         f in expexted_generalizations['categories'].keys() or f in expexted_generalizations[
+                             'ranges'].keys()]
+    indexes = []
+    for i in range(len(features)):
+        if features[i] in modified_features:
+            indexes.append(i)
+    assert ((np.delete(transformed, indexes, axis=1) == np.delete(x_train, indexes, axis=1)).all())
+    ncp = gen.ncp_
+    if len(expexted_generalizations['ranges'].keys()) > 0 or len(expexted_generalizations['categories'].keys()) > 0:
+        assert (ncp > 0)
+        assert (((transformed[indexes]) != (x_train[indexes])).any())
+
+
+def test_X_y(data):
+    features = [0, 1, 2]
+    X = np.array([[23, 165, 70],
+                  [45, 158, 67],
+                  [56, 123, 65],
+                  [67, 154, 90],
+                  [45, 149, 67],
+                  [42, 166, 58],
+                  [73, 172, 68],
+                  [94, 168, 69],
+                  [69, 175, 80],
+                  [24, 181, 95],
+                  [18, 190, 102]])
+    print(X)
+    y = np.array([1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0])
+    QI = [0, 2]
+    base_est = DecisionTreeClassifier(random_state=0, min_samples_split=2,
+                                      min_samples_leaf=1)
+    model = SklearnClassifier(base_est, ModelOutputType.CLASSIFIER_VECTOR)
+    model.fit(ArrayDataset(X, y))
+    predictions = model.predict(X)
+    if predictions.shape[1] > 1:
+        predictions = np.argmax(predictions, axis=1)
+
+    gen = GeneralizeToRepresentative(model, target_accuracy=0.5, features_to_minimize=QI)
+    gen.fit(X=X, y=predictions)
+    transformed = gen.transform(X)
+    gener = gen.generalizations_
+    expexted_generalizations = {'ranges': {'0': [], '2': [67.5]}, 'categories': {}, 'untouched': ['1']}
+    for key in expexted_generalizations['ranges']:
+        assert (set(expexted_generalizations['ranges'][key]) == set(gener['ranges'][key]))
+    for key in expexted_generalizations['categories']:
+        assert (set([frozenset(sl) for sl in expexted_generalizations['categories'][key]]) ==
+                set([frozenset(sl) for sl in gener['categories'][key]]))
+    assert (set(expexted_generalizations['untouched']) == set(gener['untouched']))
+    assert ((np.delete(transformed, [0, 2], axis=1) == np.delete(X, [0, 2], axis=1)).all())
+    modified_features = [f for f in features if
+                         str(f) in expexted_generalizations['categories'].keys() or str(f) in expexted_generalizations[
+                             'ranges'].keys()]
+    indexes = []
+    for i in range(len(features)):
+        if features[i] in modified_features:
+            indexes.append(i)
+    assert ((np.delete(transformed, indexes, axis=1) == np.delete(X, indexes, axis=1)).all())
+    ncp = gen.ncp_
+    if len(expexted_generalizations['ranges'].keys()) > 0 or len(expexted_generalizations['categories'].keys()) > 0:
+        assert (ncp > 0)
+        assert (((transformed[indexes]) != (X[indexes])).any())
+
+
+def test_X_y_features_names(data):
+    features = ['age', 'height', 'weight']
+    X = np.array([[23, 165, 70],
+                  [45, 158, 67],
+                  [56, 123, 65],
+                  [67, 154, 90],
+                  [45, 149, 67],
+                  [42, 166, 58],
+                  [73, 172, 68],
+                  [94, 168, 69],
+                  [69, 175, 80],
+                  [24, 181, 95],
+                  [18, 190, 102]])
+    print(X)
+    y = np.array([1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0])
+    QI = ['age', 'weight']
+    base_est = DecisionTreeClassifier(random_state=0, min_samples_split=2,
+                                      min_samples_leaf=1)
+    model = SklearnClassifier(base_est, ModelOutputType.CLASSIFIER_VECTOR)
+    model.fit(ArrayDataset(X, y))
+    predictions = model.predict(X)
+    if predictions.shape[1] > 1:
+        predictions = np.argmax(predictions, axis=1)
+
+    gen = GeneralizeToRepresentative(model, target_accuracy=0.5, features_to_minimize=QI)
+    gen.fit(X=X, y=predictions, features_names=features)
+    transformed = gen.transform(X=X, features_names=features)
+    gener = gen.generalizations_
+    expexted_generalizations = {'ranges': {'age': [], 'weight': [67.5]}, 'categories': {}, 'untouched': ['height']}
+    for key in expexted_generalizations['ranges']:
+        assert (set(expexted_generalizations['ranges'][key]) == set(gener['ranges'][key]))
+    for key in expexted_generalizations['categories']:
+        assert (set([frozenset(sl) for sl in expexted_generalizations['categories'][key]]) ==
+                set([frozenset(sl) for sl in gener['categories'][key]]))
+    assert (set(expexted_generalizations['untouched']) == set(gener['untouched']))
+    assert ((np.delete(transformed, [0, 2], axis=1) == np.delete(X, [0, 2], axis=1)).all())
+    modified_features = [f for f in features if
+                         f in expexted_generalizations['categories'].keys() or f in expexted_generalizations[
+                             'ranges'].keys()]
+    indexes = []
+    for i in range(len(features)):
+        if features[i] in modified_features:
+            indexes.append(i)
+    assert ((np.delete(transformed, indexes, axis=1) == np.delete(X, indexes, axis=1)).all())
+    ncp = gen.ncp_
+    if len(expexted_generalizations['ranges'].keys()) > 0 or len(expexted_generalizations['categories'].keys()) > 0:
+        assert (ncp > 0)
+        assert (((transformed[indexes]) != (X[indexes])).any())
+
+
+def test_BaseEstimator_classification(data):
+    features = ['age', 'height', 'weight', 'sex', 'ola']
+    X = [[23, 165, 65, 'f', 'aa'],
+         [45, 158, 76, 'f', 'aa'],
+         [56, 123, 78, 'f', 'bb'],
+         [67, 154, 87, 'm', 'aa'],
+         [45, 149, 45, 'f', 'bb'],
+         [42, 166, 76, 'm', 'bb'],
+         [73, 172, 85, 'm', 'bb'],
+         [94, 168, 92, 'f', 'aa'],
+         [69, 175, 95, 'm', 'aa'],
+         [24, 181, 49, 'm', 'bb'],
+         [18, 190, 69, 'm', 'bb']]
+
+    y = pd.Series([1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0])
+    X = pd.DataFrame(X, columns=features)
+    QI = ['age', 'weight', 'ola']
+
+    numeric_features = ["age", "height", "weight"]
+    numeric_transformer = Pipeline(
+        steps=[('imputer', SimpleImputer(strategy='constant', fill_value=0))]
+    )
+
+    categorical_features = ["sex", "ola"]
+    categorical_transformer = OneHotEncoder(handle_unknown="ignore")
+
+    preprocessor = ColumnTransformer(
+        transformers=[
+            ("num", numeric_transformer, numeric_features),
+            ("cat", categorical_transformer, categorical_features),
+        ]
+    )
+    encoded = preprocessor.fit_transform(X)
+    encoded = pd.DataFrame(encoded)
+    base_est = DecisionTreeClassifier(random_state=0, min_samples_split=2,
+                                      min_samples_leaf=1)
+    model = base_est
+    model.fit(encoded, y)
+    predictions = model.predict(encoded)
+
+    # Append classifier to preprocessing pipeline.
+    # Now we have a full prediction pipeline.
+    gen = GeneralizeToRepresentative(model, target_accuracy=0.5,
+                                     categorical_features=categorical_features, features_to_minimize=QI)
+    train_dataset = ArrayDataset(X, predictions)
+    gen.fit(dataset=train_dataset)
+    transformed = gen.transform(dataset=ArrayDataset(X))
+    gener = gen.generalizations_
+    expexted_generalizations = {'ranges': {'age': [], 'weight': [47.0]}, 'categories': {'ola': [['bb', 'aa']]},
+                                'untouched': ['height', 'sex']}
+
+    for key in expexted_generalizations['ranges']:
+        assert (set(expexted_generalizations['ranges'][key]) == set(gener['ranges'][key]))
+    for key in expexted_generalizations['categories']:
+        assert (set([frozenset(sl) for sl in expexted_generalizations['categories'][key]]) ==
+                set([frozenset(sl) for sl in gener['categories'][key]]))
+    assert (set(expexted_generalizations['untouched']) == set(gener['untouched']))
+    # assert (transformed.drop(QI, axis=1).equals(X.drop(QI, axis=1)))
+    np.testing.assert_array_equal(transformed.drop(QI, axis=1), X.drop(QI, axis=1))
+    modified_features = [f for f in features if
+                         f in expexted_generalizations['categories'].keys() or f in expexted_generalizations[
+                             'ranges'].keys()]
+    # assert (transformed.drop(modified_features, axis=1).equals(X.drop(modified_features, axis=1)))
+    np.testing.assert_array_equal(transformed.drop(modified_features, axis=1), X.drop(modified_features, axis=1))
+    ncp = gen.ncp_
+    if len(expexted_generalizations['ranges'].keys()) > 0 or len(expexted_generalizations['categories'].keys()) > 0:
+        assert (ncp > 0)
+        assert (((transformed[modified_features]).equals(X[modified_features])) == False)
+
+
+def test_BaseEstimator_regression():
+    dataset = load_diabetes()
+    x_train, x_test, y_train, y_test = train_test_split(dataset.data, dataset.target, test_size=0.5, random_state=14)
+
+    base_est = DecisionTreeRegressor(random_state=10, min_samples_split=2)
+    model = base_est
+    model.fit(x_train, y_train)
+    predictions = model.predict(x_train)
+    QI = ['age', 'bmi', 's2', 's5']
+    features = ['age', 'sex', 'bmi', 'bp',
+                's1', 's2', 's3', 's4', 's5', 's6']
+
+    gen = GeneralizeToRepresentative(model, target_accuracy=0.7, is_regression=True,
+                                     features_to_minimize=QI)
+    gen.fit(dataset=ArrayDataset(x_train, predictions, features_names=features))
+    transformed = gen.transform(dataset=ArrayDataset(x_train, features_names=features))
     print('Base model accuracy (R2 score): ', model.score(x_test, y_test))
     model.fit(transformed, y_train)
     print('Base model accuracy (R2 score) after minimization: ', model.score(x_test, y_test))
@@ -546,7 +832,7 @@ def test_regression():
         assert (set([frozenset(sl) for sl in expexted_generalizations['categories'][key]]) ==
                 set([frozenset(sl) for sl in gener['categories'][key]]))
     assert (set(expexted_generalizations['untouched']) == set(gener['untouched']))
-    assert ((np.delete(transformed, QI, axis=1) == np.delete(x_train, QI, axis=1)).all())
+    assert ((np.delete(transformed, [0, 2, 5, 8], axis=1) == np.delete(x_train, [0, 2, 5, 8], axis=1)).all())
 
     modified_features = [f for f in features if
                          f in expexted_generalizations['categories'].keys() or f in expexted_generalizations[
diff --git a/tests/test_model.py b/tests/test_model.py
new file mode 100644
index 0000000..bbb951b
--- /dev/null
+++ b/tests/test_model.py
@@ -0,0 +1,35 @@
+import pytest
+
+from apt.utils.models import SklearnClassifier, SklearnRegressor, ModelOutputType
+from apt.utils.datasets import ArrayDataset
+from apt.utils import dataset_utils
+
+from sklearn.tree import DecisionTreeRegressor
+from sklearn.ensemble import RandomForestClassifier
+
+
+def test_sklearn_classifier():
+    (x_train, y_train), (x_test, y_test) = dataset_utils.get_iris_dataset()
+    underlying_model = RandomForestClassifier()
+    model = SklearnClassifier(underlying_model, ModelOutputType.CLASSIFIER_VECTOR)
+    train = ArrayDataset(x_train, y_train)
+    test = ArrayDataset(x_test, y_test)
+    model.fit(train)
+    pred = model.predict(x_test)
+    assert(pred.shape[0] == x_test.shape[0])
+
+    score = model.score(test)
+    assert(0.0 <= score <= 1.0)
+
+
+def test_sklearn_regressor():
+    (x_train, y_train), (x_test, y_test) = dataset_utils.get_diabetes_dataset()
+    underlying_model = DecisionTreeRegressor()
+    model = SklearnRegressor(underlying_model)
+    train = ArrayDataset(x_train, y_train)
+    test = ArrayDataset(x_test, y_test)
+    model.fit(train)
+    pred = model.predict(x_test)
+    assert (pred.shape[0] == x_test.shape[0])
+
+    score = model.score(test)