mirror of
https://github.com/IBM/ai-privacy-toolkit.git
synced 2026-05-07 02:52:39 +02:00
update docstring
This commit is contained in:
parent
b54f0a2382
commit
3bc1341dad
2 changed files with 8 additions and 5 deletions
|
|
@ -22,8 +22,10 @@ class Anonymize:
|
|||
"""
|
||||
:param k: The privacy parameter that determines the number of records that will be indistinguishable from each
|
||||
other (when looking at the quasi identifiers). Should be at least 2.
|
||||
:param quasi_identifiers: The features that need to be minimized.
|
||||
:param categorical_features: The list of categorical features.
|
||||
:param quasi_identifiers: The features that need to be minimized. It can be a list of feature name (strings) if
|
||||
dataset.feature_names is set, otherwise a list of indexes (integers).
|
||||
:param categorical_features: The list of categorical features. It can be a list of feature name (strings) if
|
||||
dataset.feature_names is set, otherwise a list of indexes (integers).
|
||||
:param is_regression: Boolean param indicates that is is a regression problem.
|
||||
"""
|
||||
if k < 2:
|
||||
|
|
@ -48,7 +50,7 @@ class Anonymize:
|
|||
self.features_names = dataset.features_names
|
||||
# if features is None, use numbers instead of names
|
||||
elif dataset.get_samples().shape[0] != 0:
|
||||
self.features_names = [i for i in range(dataset.get_samples().shape[0])]
|
||||
self.features_names = [i for i in range(dataset.get_samples().shape[1])]
|
||||
else:
|
||||
raise ValueError('No data provided')
|
||||
if not set(self.quasi_identifiers).issubset(set(self.features_names)):
|
||||
|
|
|
|||
|
|
@ -162,12 +162,13 @@ class StoredDataset(Dataset):
|
|||
class ArrayDataset(Dataset):
|
||||
"""Dataset that is based on x and y arrays (e.g., numpy/pandas/list...)"""
|
||||
|
||||
def __init__(self, x: INPUT_DATA_ARRAY_TYPE, y: Optional[INPUT_DATA_ARRAY_TYPE] = None, features_names=None,
|
||||
**kwargs):
|
||||
def __init__(self, x: INPUT_DATA_ARRAY_TYPE, y: Optional[INPUT_DATA_ARRAY_TYPE] = None,
|
||||
features_names: Optional = None, **kwargs):
|
||||
"""
|
||||
ArrayDataset constructor.
|
||||
:param x: collection of data samples
|
||||
:param y: collection of labels (optional)
|
||||
:param feature_names: list of str, The feature names, in the order that they appear in the data (optional)
|
||||
:param kwargs: dataset parameters
|
||||
"""
|
||||
self.is_pandas = False
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue