mirror of
https://github.com/IBM/ai-privacy-toolkit.git
synced 2026-04-25 04:46:21 +02:00
Update docs
This commit is contained in:
parent
7e2ce7fe96
commit
c47819a031
3 changed files with 21 additions and 13 deletions
|
|
@ -14,19 +14,25 @@ class Anonymize:
|
|||
Class for performing tailored, model-guided anonymization of training datasets for ML models.
|
||||
|
||||
Based on the implementation described in: https://arxiv.org/abs/2007.13086
|
||||
|
||||
Parameters
|
||||
----------
|
||||
k : int
|
||||
The privacy parameter that determines the number of records that will be indistinguishable from each
|
||||
other (when looking at the quasi identifiers). Should be at least 2.
|
||||
quasi_identifiers : np.ndarray or list
|
||||
The features that need to be minimized in case of pandas data, and indexes of features
|
||||
in case of numpy data.
|
||||
categorical_features : list, optional
|
||||
The list of categorical features (should only be supplied when passing data as a
|
||||
pandas dataframe.
|
||||
is_regression : Bool, optional
|
||||
Whether the model is a regression model or not (if False, assumes
|
||||
a classification model). Default is False.
|
||||
"""
|
||||
|
||||
def __init__(self, k: int, quasi_identifiers: Union[np.ndarray, list], categorical_features: Optional[list] = None,
|
||||
is_regression=False):
|
||||
"""
|
||||
:param k: The privacy parameter that determines the number of records that will be indistinguishable from each
|
||||
other (when looking at the quasi identifiers). Should be at least 2.
|
||||
:param quasi_identifiers: The features that need to be minimized in case of pandas data, and indexes of features
|
||||
in case of numpy data.
|
||||
:param categorical_features: The list of categorical features (should only be supplied when passing data as a
|
||||
pandas dataframe.
|
||||
:param is_regression: Boolean param indicates that is is a regression problem.
|
||||
"""
|
||||
if k < 2:
|
||||
raise ValueError("k should be a positive integer with a value of 2 or higher")
|
||||
if quasi_identifiers is None or len(quasi_identifiers) < 1:
|
||||
|
|
|
|||
|
|
@ -39,12 +39,10 @@ class GeneralizeToRepresentative(BaseEstimator, MetaEstimatorMixin, TransformerM
|
|||
estimator : estimator, optional
|
||||
The original model for which generalization is being performed.
|
||||
Should be pre-fitted.
|
||||
|
||||
target_accuracy : float, optional
|
||||
The required accuracy when applying the base model to the
|
||||
generalized data. Accuracy is measured relative to the original
|
||||
accuracy of the model.
|
||||
|
||||
features : list of str, optional
|
||||
The feature names, in the order that they appear in the data.
|
||||
categorical_features: list of str, optional
|
||||
|
|
@ -63,6 +61,9 @@ class GeneralizeToRepresentative(BaseEstimator, MetaEstimatorMixin, TransformerM
|
|||
The required method to train data set for minimizing. Default is
|
||||
to train the tree just on the features that are given as
|
||||
features_to_minimize.
|
||||
is_regression : Bool, optional
|
||||
Whether the model is a regression model or not (if False, assumes
|
||||
a classification model). Default is False.
|
||||
|
||||
Attributes
|
||||
----------
|
||||
|
|
|
|||
|
|
@ -19,20 +19,21 @@ def _load_iris(test_set_size: float = 0.3):
|
|||
return (x_train, y_train), (x_test, y_test)
|
||||
|
||||
|
||||
def get_iris_dataset():
|
||||
def get_iris_dataset(test_set: float = 0.3):
|
||||
"""
|
||||
Loads the Iris dataset from scikit-learn.
|
||||
|
||||
:param test_set: Proportion of the data to use as validation split (value between 0 and 1).
|
||||
:return: Entire dataset and labels as numpy array.
|
||||
"""
|
||||
return _load_iris()
|
||||
return _load_iris(test_set)
|
||||
|
||||
|
||||
def get_german_credit_dataset(test_set: float = 0.3):
|
||||
"""
|
||||
Loads the UCI German_credit dataset from `tests/datasets/german` or downloads it if necessary.
|
||||
|
||||
:param test_set: Proportion of the data to use as validation split (value between 0 and 1).
|
||||
:return: Dataset and labels as pandas dataframes.
|
||||
"""
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue