From 70c827154df27f9b53a36edbb3f5d016d10179f3 Mon Sep 17 00:00:00 2001 From: abigailt Date: Tue, 13 Feb 2024 11:22:18 +0200 Subject: [PATCH] Update documentation of classes to reflect that all data should be encoded and scaled. Signed-off-by: abigailt --- apt/risk/data_assessment/dataset_assessment_manager.py | 3 ++- .../dataset_attack_membership_classification.py | 8 +++++--- .../dataset_attack_membership_knn_probabilities.py | 8 +++++--- .../dataset_attack_whole_dataset_knn_distance.py | 8 +++++--- 4 files changed, 17 insertions(+), 10 deletions(-) diff --git a/apt/risk/data_assessment/dataset_assessment_manager.py b/apt/risk/data_assessment/dataset_assessment_manager.py index 4d699bc..a83c0e4 100644 --- a/apt/risk/data_assessment/dataset_assessment_manager.py +++ b/apt/risk/data_assessment/dataset_assessment_manager.py @@ -47,7 +47,8 @@ class DatasetAssessmentManager: synthetic_data: ArrayDataset, dataset_name: str = DEFAULT_DATASET_NAME, categorical_features: list = [])\ -> list[DatasetAttackScore]: """ - Do dataset privacy risk assessment by running dataset attacks, and return their scores. + Do dataset privacy risk assessment by running dataset attacks, and return their scores. All data is assumed + to be encoded and scaled. :param original_data_members: A container for the training original samples and labels, only samples are used in the assessment diff --git a/apt/risk/data_assessment/dataset_attack_membership_classification.py b/apt/risk/data_assessment/dataset_attack_membership_classification.py index 1da4aa1..abfab21 100644 --- a/apt/risk/data_assessment/dataset_attack_membership_classification.py +++ b/apt/risk/data_assessment/dataset_attack_membership_classification.py @@ -71,9 +71,11 @@ class DatasetAttackMembershipClassification(DatasetAttackMembership): config: DatasetAttackConfigMembershipClassification = DatasetAttackConfigMembershipClassification(), dataset_name: str = DEFAULT_DATASET_NAME, categorical_features: list = None): """ - :param original_data_members: A container for the training original samples and labels - :param original_data_non_members: A container for the holdout original samples and labels - :param synthetic_data: A container for the synthetic samples and labels + :param original_data_members: A container for the training original samples and labels. Should be encoded and + scaled. + :param original_data_non_members: A container for the holdout original samples and labels. Should be encoded + and scaled. + :param synthetic_data: A container for the synthetic samples and labels. Should be encoded and scaled. :param config: Configuration parameters to guide the attack, optional :param dataset_name: A name to identify this dataset, optional """ diff --git a/apt/risk/data_assessment/dataset_attack_membership_knn_probabilities.py b/apt/risk/data_assessment/dataset_attack_membership_knn_probabilities.py index 8b3abe9..21f46c6 100644 --- a/apt/risk/data_assessment/dataset_attack_membership_knn_probabilities.py +++ b/apt/risk/data_assessment/dataset_attack_membership_knn_probabilities.py @@ -84,9 +84,11 @@ class DatasetAttackMembershipKnnProbabilities(DatasetAttackMembership): dataset_name: str = DEFAULT_DATASET_NAME, categorical_features: list = None, **kwargs): """ - :param original_data_members: A container for the training original samples and labels - :param original_data_non_members: A container for the holdout original samples and labels - :param synthetic_data: A container for the synthetic samples and labels + :param original_data_members: A container for the training original samples and labels. Should be encoded and + scaled. + :param original_data_non_members: A container for the holdout original samples and labels. Should be encoded and + scaled. + :param synthetic_data: A container for the synthetic samples and labels. Should be encoded and scaled. :param config: Configuration parameters to guide the attack, optional :param dataset_name: A name to identify this dataset, optional """ diff --git a/apt/risk/data_assessment/dataset_attack_whole_dataset_knn_distance.py b/apt/risk/data_assessment/dataset_attack_whole_dataset_knn_distance.py index 2eb9aeb..2c24a3f 100644 --- a/apt/risk/data_assessment/dataset_attack_whole_dataset_knn_distance.py +++ b/apt/risk/data_assessment/dataset_attack_whole_dataset_knn_distance.py @@ -77,9 +77,11 @@ class DatasetAttackWholeDatasetKnnDistance(DatasetAttack): config: DatasetAttackConfigWholeDatasetKnnDistance = DatasetAttackConfigWholeDatasetKnnDistance(), dataset_name: str = DEFAULT_DATASET_NAME, categorical_features: list = None, **kwargs): """ - :param original_data_members: A container for the training original samples and labels - :param original_data_non_members: A container for the holdout original samples and labels - :param synthetic_data: A container for the synthetic samples and labels + :param original_data_members: A container for the training original samples and labels. Should be encoded and + scaled. + :param original_data_non_members: A container for the holdout original samples and labels. Should be encoded + and scaled. + :param synthetic_data: A container for the synthetic samples and labels. Should be encoded and scaled. :param config: Configuration parameters to guide the assessment process, optional :param dataset_name: A name to identify this dataset, optional """