diff --git a/apt/anonymization/anonymizer.py b/apt/anonymization/anonymizer.py index 94eb403..1720d99 100644 --- a/apt/anonymization/anonymizer.py +++ b/apt/anonymization/anonymizer.py @@ -22,8 +22,10 @@ class Anonymize: """ :param k: The privacy parameter that determines the number of records that will be indistinguishable from each other (when looking at the quasi identifiers). Should be at least 2. - :param quasi_identifiers: The features that need to be minimized. - :param categorical_features: The list of categorical features. + :param quasi_identifiers: The features that need to be minimized. It can be a list of feature name (strings) if + dataset.feature_names is set, otherwise a list of indexes (integers). + :param categorical_features: The list of categorical features. It can be a list of feature name (strings) if + dataset.feature_names is set, otherwise a list of indexes (integers). :param is_regression: Boolean param indicates that is is a regression problem. """ if k < 2: @@ -48,7 +50,7 @@ class Anonymize: self.features_names = dataset.features_names # if features is None, use numbers instead of names elif dataset.get_samples().shape[0] != 0: - self.features_names = [i for i in range(dataset.get_samples().shape[0])] + self.features_names = [i for i in range(dataset.get_samples().shape[1])] else: raise ValueError('No data provided') if not set(self.quasi_identifiers).issubset(set(self.features_names)): diff --git a/apt/utils/datasets/datasets.py b/apt/utils/datasets/datasets.py index 29dd4e9..6db183e 100644 --- a/apt/utils/datasets/datasets.py +++ b/apt/utils/datasets/datasets.py @@ -162,12 +162,13 @@ class StoredDataset(Dataset): class ArrayDataset(Dataset): """Dataset that is based on x and y arrays (e.g., numpy/pandas/list...)""" - def __init__(self, x: INPUT_DATA_ARRAY_TYPE, y: Optional[INPUT_DATA_ARRAY_TYPE] = None, features_names=None, - **kwargs): + def __init__(self, x: INPUT_DATA_ARRAY_TYPE, y: Optional[INPUT_DATA_ARRAY_TYPE] = None, + features_names: Optional = None, **kwargs): """ ArrayDataset constructor. :param x: collection of data samples :param y: collection of labels (optional) + :param feature_names: list of str, The feature names, in the order that they appear in the data (optional) :param kwargs: dataset parameters """ self.is_pandas = False