Add a risk score to the base class DatasetAttackScore, so that every implementation could set it based on its specific values.

Signed-off-by: Maya Anderson <mayaa@il.ibm.com>
2026-06-23 15:48:06 +02:00 · 2023-03-16 17:51:49 +02:00 · 2023-03-16 17:51:49 +02:00 · 80bec0c45b
commit 80bec0c45b
parent 8a4df5a4a2
7 changed files with 53 additions and 36 deletions
--- a/apt/risk/data_assessment/attack_strategy_utils.py
+++ b/apt/risk/data_assessment/attack_strategy_utils.py
@ -39,8 +39,8 @@ class KNNAttackStrategyUtils(AttackStrategyUtils):
        :param query_samples: query samples, to which nearest neighbors are to be found
        :param knn_learner: unsupervised learner for implementing neighbor searches, after it was fitted
        :param distance_processor: function for processing the distance into another more relevant metric per sample.
-            Its input is an array representing distances (the distances returned by NearestNeighbors.kneighbors() ),
-            and the output should be another array with distance-based values that enable to compute the final score
+            Its input is an array representing distances (the distances returned by NearestNeighbors.kneighbors() ), and
+            the output should be another array with distance-based values that enable to compute the final risk score
        :return:
            distances of the query samples to their nearest neighbors, or a metric based on that distance and calculated
            by the distance_processor function
--- a/apt/risk/data_assessment/dataset_assessment_manager.py
+++ b/apt/risk/data_assessment/dataset_assessment_manager.py
@ -45,7 +45,7 @@ class DatasetAssessmentManager:
        :param dataset_name: A name to identify this dataset, optional

        :return:
-            a list of dataset attack scores
+            a list of dataset attack risk scores
        """
        config_gl = DatasetAttackConfigMembershipKnnProbabilities(use_batches=False,
                                                                  generate_plot=self.config.generate_plots)
--- a/apt/risk/data_assessment/dataset_attack.py
+++ b/apt/risk/data_assessment/dataset_attack.py
@ -54,7 +54,7 @@ class DatasetAttack(abc.ABC):
        """
        Assess the privacy of the dataset
        :return:
-            score: DatasetAttackScore the privacy attack score
+            score: DatasetAttackScore the privacy attack risk score
        """
        pass

--- a/apt/risk/data_assessment/dataset_attack_membership_knn_probabilities.py
+++ b/apt/risk/data_assessment/dataset_attack_membership_knn_probabilities.py
@ -42,17 +42,24 @@ class DatasetAttackConfigMembershipKnnProbabilities(Config):

@dataclass
 class DatasetAttackScoreMembershipKnnProbabilities(DatasetAttackScore):
-    """DatasetAttackMembershipKnnProbabilities privacy score.
-    Attributes
-    ----------
-    roc_auc_score :   the area under the receiver operating characteristic curve (AUC ROC) to evaluate the attack
-                      performance.
-    average_precision_score: the proportion of predicted members that are correctly members
-    assessment_type : assessment type is 'MembershipKnnProbabilities', to be used in reports
+    """DatasetAttackMembershipKnnProbabilities privacy risk score.
    """
-    roc_auc_score: float = -1.0
-    average_precision_score: float = -1.0
-    assessment_type: str = 'MembershipKnnProbabilities'
+    roc_auc_score: float
+    average_precision_score: float
+    assessment_type: str = 'MembershipKnnProbabilities'  # to be used in reports
+
+    def __init__(self, dataset_name: str, roc_auc_score: float, average_precision_score: float,
+                 result: DatasetAttackResultMembership) -> None:
+        """
+        dataset_name:    dataset name to be used in reports
+        roc_auc_score:   the area under the receiver operating characteristic curve (AUC ROC) to evaluate the attack
+                          performance.
+        average_precision_score: the proportion of predicted members that are correctly members
+        result:          the result of the membership inference attack
+        """
+        super().__init__(dataset_name=dataset_name, risk_score=roc_auc_score, result=result)
+        self.roc_auc_score = roc_auc_score
+        self.average_precision_score = average_precision_score


 class DatasetAttackMembershipKnnProbabilities(DatasetAttackMembership):
--- a/apt/risk/data_assessment/dataset_attack_result.py
+++ b/apt/risk/data_assessment/dataset_attack_result.py
@ -14,7 +14,8 @@ class DatasetAttackResult:
@dataclass
 class DatasetAttackScore:
    dataset_name: str
-    result: Optional[DatasetAttackResult] = None
+    risk_score: float
+    result: Optional[DatasetAttackResult]


@dataclass
--- a/apt/risk/data_assessment/dataset_attack_whole_dataset_knn_distance.py
+++ b/apt/risk/data_assessment/dataset_attack_whole_dataset_knn_distance.py
@ -39,15 +39,19 @@ class DatasetAttackConfigWholeDatasetKnnDistance(Config):

@dataclass
 class DatasetAttackScoreWholeDatasetKnnDistance(DatasetAttackScore):
-    """DatasetAttackWholeDatasetKnnDistance privacy score.
-    Attributes
-    ----------
-    share : the share of synthetic records closer to the training than the holdout dataset.
-            A value of 0.5 or close to it means good privacy.
-    assessment_type : assessment type is 'WholeDatasetKnnDistance', to be used in reports
+    """DatasetAttackWholeDatasetKnnDistance privacy risk score.
    """
-    share: float = -1.0
-    assessment_type: str = 'WholeDatasetKnnDistance'
+    share: float
+    assessment_type: str = 'WholeDatasetKnnDistance'  # to be used in reports
+
+    def __init__(self, dataset_name, share) -> None:
+        """
+        dataset_name:    dataset name to be used in reports
+        share : the share of synthetic records closer to the training than the holdout dataset.
+                A value of 0.5 or close to it means good privacy.
+        """
+        super().__init__(dataset_name=dataset_name, risk_score=share, result=None)
+        self.share = share


 class DatasetAttackWholeDatasetKnnDistance(DatasetAttack):