Increase version to 0.2.0 (#74)

* Remove tensorflow dependency if not using keras model * Remove xgboost dependency if not using xgboost model * Documentation updates Signed-off-by: abigailt <abigailt@il.ibm.com>
2026-04-26 05:16:22 +02:00 · 2023-05-08 12:50:55 +03:00 · 2023-05-08 12:50:55 +03:00 · 8a9ef80146
commit 8a9ef80146
parent 782edabd58
25 changed files with 306 additions and 152 deletions
--- a/apt/risk/data_assessment/dataset_attack.py
+++ b/apt/risk/data_assessment/dataset_attack.py
@ -16,32 +16,30 @@ from apt.utils.datasets import ArrayDataset

 class Config(abc.ABC):
    """
-        The base class for dataset attack configurations
+    The base class for dataset attack configurations
    """
    pass


 class DatasetAttack(abc.ABC):
    """
-         The interface for performing privacy attack for risk assessment of synthetic datasets to be used in AI model
-         training. The original data members (training data) and non-members (the holdout data) should be available.
-         For reliability, all the datasets should be preprocessed and normalized.
+     The interface for performing privacy attack for risk assessment of synthetic datasets to be used in AI model
+     training. The original data members (training data) and non-members (the holdout data) should be available.
+     For reliability, all the datasets should be preprocessed and normalized.
+
+     :param original_data_members: A container for the training original samples and labels,
+            only samples are used in the assessment
+     :param original_data_non_members: A container for the holdout original samples and labels,
+            only samples are used in the assessment
+     :param synthetic_data: A container for the synthetic samples and labels, only samples are used in the assessment
+     :param config: Configuration parameters to guide the assessment process
+     :param dataset_name: A name to identify the dataset under attack, optional
+     :param attack_strategy_utils: Utils for use with the attack strategy, optional
    """

    def __init__(self, original_data_members: ArrayDataset, original_data_non_members: ArrayDataset,
                 synthetic_data: ArrayDataset, config: Config, dataset_name: str,
                 attack_strategy_utils: Optional[AttackStrategyUtils] = None) -> None:
-        """
-        :param original_data_members: A container for the training original samples and labels,
-            only samples are used in the assessment
-        :param original_data_non_members: A container for the holdout original samples and labels,
-            only samples are used in the assessment
-        :param synthetic_data: A container for the synthetic samples and labels, only samples are used in the assessment
-        :param config: Configuration parameters to guide the assessment process
-        :param dataset_name: A name to identify the dataset under attack, optional
-        :param attack_strategy_utils: Utils for use with the attack strategy, optional
-        """
-
        self.original_data_members = original_data_members
        self.original_data_non_members = original_data_non_members
        self.synthetic_data = synthetic_data
@ -52,7 +50,8 @@ class DatasetAttack(abc.ABC):
    @abc.abstractmethod
    def assess_privacy(self) -> DatasetAttackScore:
        """
-        Assess the privacy of the dataset
+        Assess the privacy of the dataset.
+
        :return:
            score: DatasetAttackScore the privacy attack risk score
        """
@ -61,14 +60,15 @@ class DatasetAttack(abc.ABC):

 class DatasetAttackMembership(DatasetAttack):
    """
-         An abstract base class for performing privacy risk assessment for synthetic datasets on a per-record level.
+    An abstract base class for performing privacy risk assessment for synthetic datasets on a per-record level.
    """

    @abc.abstractmethod
    def calculate_privacy_score(self, dataset_attack_result: DatasetAttackResultMembership,
                                generate_plot: bool = False) -> DatasetAttackScore:
        """
-        Calculate dataset privacy score based on the result of the privacy attack
+        Calculate dataset privacy score based on the result of the privacy attack.
+
        :return:
            score: DatasetAttackScore
        """
@ -78,15 +78,16 @@ class DatasetAttackMembership(DatasetAttack):
    def plot_roc_curve(dataset_name: str, member_probabilities: np.ndarray, non_member_probabilities: np.ndarray,
                       filename_prefix: str = ""):
        """
-        Plot ROC curve
-        :param dataset_name: dataset name, will become part of the plot filename
-        :param member_probabilities: probability estimates of the member samples, the training data
-        :param non_member_probabilities: probability estimates of the non-member samples, the hold-out data
-        :param filename_prefix: name prefix for the ROC curve plot
+        Plot ROC curve.
+
+        :param dataset_name: dataset name, will become part of the plot filename.
+        :param member_probabilities: probability estimates of the member samples, the training data.
+        :param non_member_probabilities: probability estimates of the non-member samples, the hold-out data.
+        :param filename_prefix: name prefix for the ROC curve plot.
        """
        labels = np.concatenate((np.zeros((len(non_member_probabilities),)), np.ones((len(member_probabilities),))))
        results = np.concatenate((non_member_probabilities, member_probabilities))
-        svc_disp = RocCurveDisplay.from_predictions(labels, results)
+        RocCurveDisplay.from_predictions(labels, results)
        plt.plot([0, 1], [0, 1], color="navy", linewidth=2, linestyle="--", label='No skills')
        plt.title('ROC curve')
        plt.savefig(f'{filename_prefix}{dataset_name}_roc_curve.png')
@ -94,9 +95,10 @@ class DatasetAttackMembership(DatasetAttack):
    @staticmethod
    def calculate_metrics(member_probabilities: np.ndarray, non_member_probabilities: np.ndarray):
        """
-        Calculate attack performance metrics
-        :param member_probabilities: probability estimates of the member samples, the training data
-        :param non_member_probabilities: probability estimates of the non-member samples, the hold-out data
+        Calculate attack performance metrics.
+
+        :param member_probabilities: probability estimates of the member samples, the training data.
+        :param non_member_probabilities: probability estimates of the non-member samples, the hold-out data.
        :return:
            fpr: False Positive rate
            tpr: True Positive rate