update docstring

2026-06-26 15:49:37 +02:00 · 2022-03-28 00:07:04 +03:00 · 2022-03-28 00:07:04 +03:00 · 3bc1341dad
commit 3bc1341dad
parent b54f0a2382
2 changed files with 8 additions and 5 deletions
--- a/apt/anonymization/anonymizer.py
+++ b/apt/anonymization/anonymizer.py
@ -22,8 +22,10 @@ class Anonymize:
        """
        :param k: The privacy parameter that determines the number of records that will be indistinguishable from each
                  other (when looking at the quasi identifiers). Should be at least 2.
-        :param quasi_identifiers: The features that need to be minimized.
-        :param categorical_features: The list of categorical features.
+        :param quasi_identifiers: The features that need to be minimized. It can be a list of feature name (strings) if
+                  dataset.feature_names is set, otherwise a list of indexes (integers).
+        :param categorical_features: The list of categorical features. It can be a list of feature name (strings) if
+                  dataset.feature_names is set, otherwise a list of indexes (integers).
        :param is_regression: Boolean param indicates that is is a regression problem.
        """
        if k < 2:
@ -48,7 +50,7 @@ class Anonymize:
            self.features_names = dataset.features_names
            # if features is None, use numbers instead of names
        elif dataset.get_samples().shape[0] != 0:
-            self.features_names = [i for i in range(dataset.get_samples().shape[0])]
+            self.features_names = [i for i in range(dataset.get_samples().shape[1])]
        else:
            raise ValueError('No data provided')
        if not set(self.quasi_identifiers).issubset(set(self.features_names)):
--- a/apt/utils/datasets/datasets.py
+++ b/apt/utils/datasets/datasets.py
@ -162,12 +162,13 @@ class StoredDataset(Dataset):
 class ArrayDataset(Dataset):
    """Dataset that is based on x and y arrays (e.g., numpy/pandas/list...)"""

-    def __init__(self, x: INPUT_DATA_ARRAY_TYPE, y: Optional[INPUT_DATA_ARRAY_TYPE] = None, features_names=None,
-                 **kwargs):
+    def __init__(self, x: INPUT_DATA_ARRAY_TYPE, y: Optional[INPUT_DATA_ARRAY_TYPE] = None,
+                 features_names: Optional = None, **kwargs):
        """
        ArrayDataset constructor.
        :param x: collection of data samples
        :param y: collection of labels (optional)
+        :param feature_names: list of str, The feature names, in the order that they appear in the data (optional)
        :param kwargs: dataset parameters
        """
        self.is_pandas = False