Update documentation of classes to reflect that all data should

be encoded and scaled.

Signed-off-by: abigailt <abigailt@il.ibm.com>
This commit is contained in:
abigailt 2024-02-13 11:22:18 +02:00
parent 132812f772
commit 70c827154d
4 changed files with 17 additions and 10 deletions

View file

@ -47,7 +47,8 @@ class DatasetAssessmentManager:
synthetic_data: ArrayDataset, dataset_name: str = DEFAULT_DATASET_NAME, categorical_features: list = [])\
-> list[DatasetAttackScore]:
"""
Do dataset privacy risk assessment by running dataset attacks, and return their scores.
Do dataset privacy risk assessment by running dataset attacks, and return their scores. All data is assumed
to be encoded and scaled.
:param original_data_members: A container for the training original samples and labels,
only samples are used in the assessment

View file

@ -71,9 +71,11 @@ class DatasetAttackMembershipClassification(DatasetAttackMembership):
config: DatasetAttackConfigMembershipClassification = DatasetAttackConfigMembershipClassification(),
dataset_name: str = DEFAULT_DATASET_NAME, categorical_features: list = None):
"""
:param original_data_members: A container for the training original samples and labels
:param original_data_non_members: A container for the holdout original samples and labels
:param synthetic_data: A container for the synthetic samples and labels
:param original_data_members: A container for the training original samples and labels. Should be encoded and
scaled.
:param original_data_non_members: A container for the holdout original samples and labels. Should be encoded
and scaled.
:param synthetic_data: A container for the synthetic samples and labels. Should be encoded and scaled.
:param config: Configuration parameters to guide the attack, optional
:param dataset_name: A name to identify this dataset, optional
"""

View file

@ -84,9 +84,11 @@ class DatasetAttackMembershipKnnProbabilities(DatasetAttackMembership):
dataset_name: str = DEFAULT_DATASET_NAME,
categorical_features: list = None, **kwargs):
"""
:param original_data_members: A container for the training original samples and labels
:param original_data_non_members: A container for the holdout original samples and labels
:param synthetic_data: A container for the synthetic samples and labels
:param original_data_members: A container for the training original samples and labels. Should be encoded and
scaled.
:param original_data_non_members: A container for the holdout original samples and labels. Should be encoded and
scaled.
:param synthetic_data: A container for the synthetic samples and labels. Should be encoded and scaled.
:param config: Configuration parameters to guide the attack, optional
:param dataset_name: A name to identify this dataset, optional
"""

View file

@ -77,9 +77,11 @@ class DatasetAttackWholeDatasetKnnDistance(DatasetAttack):
config: DatasetAttackConfigWholeDatasetKnnDistance = DatasetAttackConfigWholeDatasetKnnDistance(),
dataset_name: str = DEFAULT_DATASET_NAME, categorical_features: list = None, **kwargs):
"""
:param original_data_members: A container for the training original samples and labels
:param original_data_non_members: A container for the holdout original samples and labels
:param synthetic_data: A container for the synthetic samples and labels
:param original_data_members: A container for the training original samples and labels. Should be encoded and
scaled.
:param original_data_non_members: A container for the holdout original samples and labels. Should be encoded
and scaled.
:param synthetic_data: A container for the synthetic samples and labels. Should be encoded and scaled.
:param config: Configuration parameters to guide the assessment process, optional
:param dataset_name: A name to identify this dataset, optional
"""