diff --git a/apt/minimization/minimizer.py b/apt/minimization/minimizer.py
index 1e15d3d..f5b1219 100644
--- a/apt/minimization/minimizer.py
+++ b/apt/minimization/minimizer.py
@@ -95,7 +95,7 @@ class GeneralizeToRepresentative(BaseEstimator, MetaEstimatorMixin, TransformerM
         self.encoder = encoder
         self.generalize_using_transform = generalize_using_transform
         self._ncp = 0.0
-        self._feature_data = {}
+        self._feature_data = None
         self._categorical_values = {}
         self._dt = None
         self._features = None
@@ -204,8 +204,8 @@ class GeneralizeToRepresentative(BaseEstimator, MetaEstimatorMixin, TransformerM
         :type X: {array-like, sparse matrix}, shape (n_samples, n_features), optional
         :param y: The target values. This should contain the predictions of the original model on ``X``.
         :type y: array-like, shape (n_samples,), optional
-        :param features_names: The feature names, in the order that they appear in the data. Can be provided when
-                               passing the data as ``X`` and ``y``
+        :param features_names: The feature names, in the order that they appear in the data. Should be provided when
+                               passing the data as ``X`` as a numpy array
         :type features_names: list of strings, optional
         :param dataset: Data wrapper containing the training input samples and the predictions of the original model
                         on the training data. Either ``X``, ``y`` OR ``dataset`` need to be provided, not both.
@@ -272,18 +272,7 @@ class GeneralizeToRepresentative(BaseEstimator, MetaEstimatorMixin, TransformerM
                 used_X_test = X_test_QI
 
             # collect feature data (such as min, max)
-            self._feature_data = {}
-            for feature in self._features:
-                if feature not in self._feature_data.keys():
-                    fd = {}
-                    values = list(x.loc[:, feature])
-                    if feature not in self.categorical_features:
-                        fd['min'] = min(values)
-                        fd['max'] = max(values)
-                        fd['range'] = max(values) - min(values)
-                    else:
-                        fd['range'] = len(np.unique(values))
-                    self._feature_data[feature] = fd
+            self._feature_data = self._get_feature_data(x)
 
             # default encoder in case none provided
             if self.encoder is None:
@@ -386,7 +375,8 @@ class GeneralizeToRepresentative(BaseEstimator, MetaEstimatorMixin, TransformerM
             # self._cells currently holds the chosen generalization based on target accuracy
 
             # calculate iLoss
-            self.calculate_ncp(X_test)
+            X_test_dataset = ArrayDataset(X_test, features_names=self._features)
+            self.calculate_ncp(X_test_dataset)
 
         # Return the transformer
         return self
@@ -397,8 +387,8 @@ class GeneralizeToRepresentative(BaseEstimator, MetaEstimatorMixin, TransformerM
 
         :param X: The training input samples.
         :type X: {array-like, sparse matrix}, shape (n_samples, n_features), optional
-        :param features_names: The feature names, in the order that they appear in the data. Can be provided when
-                               passing the data as ``X`` and ``y``
+        :param features_names: The feature names, in the order that they appear in the data. Should be provided when
+                               passing the data as ``X`` as a numpy array
         :type features_names: list of strings, optional
         :param dataset: Data wrapper containing the training input samples and the predictions of the original model
                         on the training data. Either ``X`` OR ``dataset`` need to be provided, not both.
@@ -410,10 +400,11 @@ class GeneralizeToRepresentative(BaseEstimator, MetaEstimatorMixin, TransformerM
         if not self.generalize_using_transform:
             raise ValueError('transform method called even though generalize_using_transform parameter was False. This '
                              'can lead to inconsistent results.')
-        self.calculate_ncp(transformed, True)
+        transformed_dataset = ArrayDataset(transformed, features_names=self._features)
+        self.calculate_ncp(transformed_dataset, True)
         return transformed
 
-    def calculate_ncp(self, samples: Optional[DATA_PANDAS_NUMPY_TYPE] = None, transformed: Optional[bool] = False):
+    def calculate_ncp(self, samples: Optional[ArrayDataset] = None, transformed: Optional[bool] = False):
         """
         Compute the NCP score of the generalization. Calculation is based on the value of the
         generalize_using_transform param.
@@ -423,7 +414,7 @@ class GeneralizeToRepresentative(BaseEstimator, MetaEstimatorMixin, TransformerM
         :param samples: The input samples to compute the NCP score on. Ideally should be the data that will be
                         transformed (e.g., test/runtime data). If not samples supplied, will return the last NCP score
                         computed by the `fit` or `transform` method.
-        :type samples: {array-like, sparse matrix}, shape (n_samples, n_features), optional
+        :type samples: ArrayDataset, optional. feature_names should be set.
         :param transformed: Whether the supplied samples have already been transformed using the `transform` method.
                             Default is False.
         :type transformed: boolean, optional
@@ -431,37 +422,50 @@ class GeneralizeToRepresentative(BaseEstimator, MetaEstimatorMixin, TransformerM
         """
         if samples is None:
             return self._ncp
-        elif self.generalize_using_transform:
+
+        samples_pd = pd.DataFrame(samples.get_samples(), columns=samples.features_names)
+        if self._features is None:
+            self._features = samples.features_names
+        if self._feature_data is None:
+            self._feature_data = self._get_feature_data(samples_pd)
+
+        if self.generalize_using_transform:
             if not transformed:
                 # transform data
-                transformed_data = self._inner_transform(samples)
+                transformed_data = self._inner_transform(dataset=samples)  # can return numpy or pandas
+                if not samples.is_pandas:
+                    transformed_data = pd.DataFrame(transformed_data, columns=samples.features_names)
             else:
-                transformed_data = samples
-            #TODO
+                transformed_data = samples_pd
+            range_counts, category_counts = self._calculate_transformed_generalizations(transformed_data)
+            generalizations = self._transformed_generalizations
         else:  # use generalizations
-            # suppressed features are already taken care of within _calc_ncp_numeric
-            ranges = self.generalizations['ranges']
-            categories = self.generalizations['categories']
-            range_counts = self._find_range_count(samples, ranges)
-            category_counts = self._find_categories_count(samples, categories)
+            generalizations = self.generalizations
+            range_counts = self._find_range_counts(samples_pd, generalizations['ranges'])
+            category_counts = self._find_categories_counts(samples_pd, generalizations['categories'])
 
-            total = samples.shape[0]
-            total_ncp = 0
-            total_features = len(self.generalizations['untouched'])
-            for feature in ranges.keys():
-                feature_ncp = self._calc_ncp_numeric(ranges[feature], range_counts[feature],
-                                                     self._feature_data[feature], total)
-                total_ncp = total_ncp + feature_ncp
-                total_features += 1
-            for feature in categories.keys():
-                feature_ncp = self._calc_ncp_categorical(categories[feature], category_counts[feature],
-                                                         self._feature_data[feature],
-                                                         total)
-                total_ncp = total_ncp + feature_ncp
-                total_features += 1
-            if total_features == 0:
-                return 0
-            self._ncp = total_ncp / total_features
+        # suppressed features are already taken care of within _calc_ncp_numeric
+        #TODO: check that this is the case for tramsformed as well
+        ranges = generalizations['ranges']
+        categories = generalizations['categories']
+
+        total = samples_pd.shape[0]
+        total_ncp = 0
+        total_features = len(generalizations['untouched'])
+        for feature in ranges.keys():
+            feature_ncp = self._calc_ncp_numeric(ranges[feature], range_counts[feature],
+                                                 self._feature_data[feature], total)
+            total_ncp = total_ncp + feature_ncp
+            total_features += 1
+        for feature in categories.keys():
+            feature_ncp = self._calc_ncp_categorical(categories[feature], category_counts[feature],
+                                                     self._feature_data[feature],
+                                                     total)
+            total_ncp = total_ncp + feature_ncp
+            total_features += 1
+        if total_features == 0:
+            return 0
+        self._ncp = total_ncp / total_features
         return self._ncp
 
     def _inner_transform(self, X: Optional[DATA_PANDAS_NUMPY_TYPE] = None, features_names: Optional[list] = None,
@@ -480,7 +484,8 @@ class GeneralizeToRepresentative(BaseEstimator, MetaEstimatorMixin, TransformerM
         elif dataset is None:
             raise ValueError('Either X OR dataset need to be provided, not both')
         if dataset and dataset.features_names:
-            self._features = dataset.features_names
+            if self._features is None:
+                self._features = dataset.features_names
         if dataset and dataset.get_samples() is not None:
             x = pd.DataFrame(dataset.get_samples(), columns=self._features)
 
@@ -522,7 +527,7 @@ class GeneralizeToRepresentative(BaseEstimator, MetaEstimatorMixin, TransformerM
 
     @staticmethod
     def _calc_ncp_numeric(feature_range, range_count, feature_data, total):
-        # if there are no ranges, feature is supressed and iLoss is 1
+        # if there are no ranges, feature is suppressed and iLoss is 1
         if not feature_range:
             return 1
         # range only contains the split values, need to add min and max value of feature
@@ -533,6 +538,22 @@ class GeneralizeToRepresentative(BaseEstimator, MetaEstimatorMixin, TransformerM
         average_range_size = sum(normalized_range_sizes) / len(normalized_range_sizes)
         return average_range_size / (feature_data['max'] - feature_data['min'])
 
+
+    def _get_feature_data(self, x):
+        feature_data = {}
+        for feature in self._features:
+            if feature not in feature_data.keys():
+                fd = {}
+                values = list(x.loc[:, feature])
+                if feature not in self.categorical_features:
+                    fd['min'] = min(values)
+                    fd['max'] = max(values)
+                    fd['range'] = max(values) - min(values)
+                else:
+                    fd['range'] = len(np.unique(values))
+                feature_data[feature] = fd
+        return feature_data
+
     def _get_record_indexes_for_cell(self, X, cell, mapped):
         indexes = []
         for index, row in X.iterrows():
@@ -868,12 +889,12 @@ class GeneralizeToRepresentative(BaseEstimator, MetaEstimatorMixin, TransformerM
         # We want to remove features with low iLoss (NCP) and high accuracy gain
         # (after removing them)
         ranges = self._generalizations['ranges']
-        range_counts = self._find_range_count(original_data, ranges)
+        range_counts = self._find_range_counts(original_data, ranges)
         total = prepared_data.size
         range_min = sys.float_info.max
         remove_feature = None
         categories = self.generalizations['categories']
-        category_counts = self._find_categories_count(original_data, categories)
+        category_counts = self._find_categories_counts(original_data, categories)
 
         for feature in ranges.keys():
             if feature not in self._generalizations['untouched']:
@@ -930,25 +951,109 @@ class GeneralizeToRepresentative(BaseEstimator, MetaEstimatorMixin, TransformerM
                                  'untouched': GeneralizeToRepresentative._calculate_untouched(self.cells)}
         self._remove_categorical_untouched(self._generalizations)
 
-    def _find_range_count(self, samples, ranges):
-        samples_df = pd.DataFrame(samples, columns=self._features)
+    def _calculate_transformed_generalizations(self, transformed):
+        # transformed data should only consist of representative values from cells (when removing untouched features)
+        ranges = {}
+        categories = {}
+        range_counts = {}
+        category_counts = {}
+
+        unique_records = transformed.value_counts().reset_index(name='count')
+        representatives = unique_records.drop('count', axis=1)
+        representative_counts = unique_records['count']  # needed to normalize ncp according to quantity
+        index = 0
+        for _, record in representatives.iterrows():
+            # TODO: what if some cells are not present, we will not take their generalizations into account. We need to
+            # "gain" ncp in this case...
+            record_dict = self.pandas_record_to_dict(record)
+            for cell in self.cells:
+                representative = cell["representative"].copy()
+                record_copy = record_dict.copy()
+                if 'untouched' in cell:
+                    for feature in cell['untouched']:
+                        record_copy.pop(feature)
+                        representative.pop(feature)
+                if record_copy == representative:
+                    # handle numerical features
+                    for feature in [key for key in cell['ranges'].keys() if
+                                    'untouched' not in cell or key not in cell['untouched']]:
+                        if feature not in ranges.keys():
+                            ranges[feature] = []
+                        if cell['ranges'][feature]['start'] is not None:
+                            ranges[feature].append(cell['ranges'][feature]['start'])
+                        if cell['ranges'][feature]['end'] is not None:
+                            ranges[feature].append(cell['ranges'][feature]['end'])
+                        if feature in range_counts:
+                            range_counts[feature].append(representative_counts[index])
+                        else:
+                            range_counts[feature] = [representative_counts[index]]
+                    # handle categorical features
+                    categorical_features_values = {}
+                    for feature in [key for key in cell['categories'].keys() if
+                                    'untouched' not in cell or key not in cell['untouched']]:
+                        if feature not in categorical_features_values.keys():
+                            categorical_features_values[feature] = []
+                        for value in cell['categories'][feature]:
+                            if value not in categorical_features_values[feature]:
+                                categorical_features_values[feature].append(value)
+                    for feature in categorical_features_values.keys():
+                        partitions = []
+                        values = categorical_features_values[feature]
+                        assigned = []
+                        for i in range(len(values)):
+                            value1 = values[i]
+                            if value1 in assigned:
+                                continue
+                            partition = [value1]
+                            assigned.append(value1)
+                            for j in range(len(values)):
+                                if j <= i:
+                                    continue
+                                value2 = values[j]
+                                if GeneralizeToRepresentative._are_inseparable(self.cells, feature, value1, value2):
+                                    partition.append(value2)
+                                    assigned.append(value2)
+                            partitions.append(partition)
+                        if feature in categories:
+                            categories[feature].append(partitions)
+                        else:
+                            categories[feature] = [partitions]
+                        if feature in category_counts:
+                            category_counts[feature].append(representative_counts[index])
+                        else:
+                            category_counts[feature] = [representative_counts[index]]
+                    break
+            index += 1
+
+        for feature in ranges.keys():
+            ranges[feature] = list(set(ranges[feature]))
+            ranges[feature].sort()
+
+        self._transformed_generalizations = {
+            'ranges': ranges,
+            'categories': categories,
+            'untouched': GeneralizeToRepresentative._calculate_untouched(self.cells)}
+        self._remove_categorical_untouched(self._transformed_generalizations)
+        return range_counts, category_counts
+
+    def _find_range_counts(self, samples, ranges):
         range_counts = {}
         last_value = None
         for r in ranges.keys():
             range_counts[r] = []
             # if empty list, all samples should be counted
             if not ranges[r]:
-                range_counts[r].append(samples_df.shape[0])
+                range_counts[r].append(samples.shape[0])
             else:
                 for value in ranges[r]:
-                    counter = [item for item in samples_df[r] if int(item) <= value]
+                    counter = [item for item in samples[r] if int(item) <= value]
                     range_counts[r].append(len(counter))
                     last_value = value
-                counter = [item for item in samples_df[r] if int(item) <= last_value]
+                counter = [item for item in samples[r] if int(item) <= last_value]
                 range_counts[r].append(len(counter))
         return range_counts
 
-    def _find_categories_count(self, samples, categories):
+    def _find_categories_counts(self, samples, categories):
         category_counts = {}
         for c in categories.keys():
             category_counts[c] = []
@@ -1054,3 +1159,11 @@ class GeneralizeToRepresentative(BaseEstimator, MetaEstimatorMixin, TransformerM
 
         for feature in to_remove:
             del generalizations['categories'][feature]
+
+
+    @staticmethod
+    def pandas_record_to_dict(record):
+        dict = {}
+        for feature in record.index:
+            dict[feature] = record[feature]
+        return dict
\ No newline at end of file
diff --git a/tests/test_minimizer.py b/tests/test_minimizer.py
index 8e5a6cc..3b2f543 100644
--- a/tests/test_minimizer.py
+++ b/tests/test_minimizer.py
@@ -70,13 +70,42 @@ def test_minimizer_params_not_transform(data):
                   [45, 158],
                   [18, 190]])
     y = [1, 1, 0]
+    samples = ArrayDataset(X, y, features)
     base_est = DecisionTreeClassifier(random_state=0, min_samples_split=2,
                                       min_samples_leaf=1)
     model = SklearnClassifier(base_est, ModelOutputType.CLASSIFIER_PROBABILITIES)
     model.fit(ArrayDataset(X, y))
 
     gen = GeneralizeToRepresentative(model, cells=cells, generalize_using_transform=False)
-    gen.calculate_ncp(X)
+    gen.calculate_ncp(samples)
+    ncp = gen.ncp
+    assert (ncp > 0.0)
+
+
+def test_minimizer_params_not_transform_no_data(data):
+    # Assume two features, age and height, and boolean label
+    cells = [{"id": 1, "ranges": {"age": {"start": None, "end": 38}, "height": {"start": None, "end": 170}}, "label": 0,
+              'categories': {}, "representative": {"age": 26, "height": 149}},
+             {"id": 2, "ranges": {"age": {"start": 39, "end": None}, "height": {"start": None, "end": 170}}, "label": 1,
+              'categories': {}, "representative": {"age": 58, "height": 163}},
+             {"id": 3, "ranges": {"age": {"start": None, "end": 38}, "height": {"start": 171, "end": None}}, "label": 0,
+              'categories': {}, "representative": {"age": 31, "height": 184}},
+             {"id": 4, "ranges": {"age": {"start": 39, "end": None}, "height": {"start": 171, "end": None}}, "label": 1,
+              'categories': {}, "representative": {"age": 45, "height": 176}}
+             ]
+    features = ['age', 'height']
+    X = np.array([[23, 165],
+                  [45, 158],
+                  [18, 190]])
+    y = [1, 1, 0]
+    samples = ArrayDataset(X, y, features)
+    base_est = DecisionTreeClassifier(random_state=0, min_samples_split=2,
+                                      min_samples_leaf=1)
+    model = SklearnClassifier(base_est, ModelOutputType.CLASSIFIER_PROBABILITIES)
+    model.fit(ArrayDataset(X, y))
+
+    gen = GeneralizeToRepresentative(model, cells=cells, generalize_using_transform=False)
+    gen.calculate_ncp(samples)
     ncp = gen.ncp
     assert (ncp > 0.0)