mirror of
https://github.com/IBM/ai-privacy-toolkit.git
synced 2026-06-08 15:05:13 +02:00
Rename gan_leaks=>per_record_knn_probabilities and holdout=>whole_dataset_knn_distance
Signed-off-by: Maya Anderson <mayaa@il.ibm.com>
This commit is contained in:
parent
e7e725ea80
commit
e5f6089b23
3 changed files with 46 additions and 41 deletions
|
|
@ -3,10 +3,10 @@ from typing import Optional
|
|||
|
||||
import pandas as pd
|
||||
|
||||
from apt.risk.data_assessment.dataset_attack_gan_leaks import DatasetAttackGanLeaksConfig, DatasetAttackGanLeaks, \
|
||||
DatasetAttackScoreGanLeaks
|
||||
from apt.risk.data_assessment.dataset_attack_holdout import DatasetAttackHoldoutConfig, DatasetAttackHoldout, \
|
||||
DatasetAttackScoreHoldout
|
||||
from apt.risk.data_assessment.per_record_knn_probabilities_dataset_attack_ import DatasetAttackConfigPerRecordKnnProbabilities, \
|
||||
DatasetAttackPerRecordKnnProbabilities, DatasetAttackScorePerRecordKnnProbabilities
|
||||
from apt.risk.data_assessment.dataset_attack_whole_dataset_knn_distance import DatasetAttackConfigWholeDatasetKnnDistance, \
|
||||
DatasetAttackWholeDatasetKnnDistance, DatasetAttackScoreWholeDatasetKnnDistance
|
||||
from apt.utils.datasets import ArrayDataset
|
||||
|
||||
|
||||
|
|
@ -20,8 +20,8 @@ class DatasetAssessmentManager:
|
|||
"""
|
||||
The main class for running dataset assessment attacks.
|
||||
"""
|
||||
gan_leaks_attack_scores = []
|
||||
holdout_attack_scores = []
|
||||
attack_scores_per_record_knn_probabilities = []
|
||||
attack_scores_whole_dataset_knn_distance = []
|
||||
|
||||
def __init__(self, config: Optional[DatasetAssessmentManagerConfig] = DatasetAssessmentManagerConfig) -> None:
|
||||
"""
|
||||
|
|
@ -31,32 +31,34 @@ class DatasetAssessmentManager:
|
|||
|
||||
def assess(self, original_data_members: ArrayDataset, original_data_non_members: ArrayDataset,
|
||||
synthetic_data: ArrayDataset, dataset_name: str = "dataset") -> (
|
||||
DatasetAttackScoreGanLeaks, DatasetAttackScoreHoldout):
|
||||
config_gl = DatasetAttackGanLeaksConfig(use_batches=False, k=5)
|
||||
mgr = DatasetAttackGanLeaks(original_data_members,
|
||||
original_data_non_members,
|
||||
synthetic_data,
|
||||
dataset_name,
|
||||
config_gl)
|
||||
DatasetAttackScorePerRecordKnnProbabilities, DatasetAttackScoreWholeDatasetKnnDistance):
|
||||
config_gl = DatasetAttackConfigPerRecordKnnProbabilities(use_batches=False, k=5)
|
||||
mgr = DatasetAttackPerRecordKnnProbabilities(original_data_members,
|
||||
original_data_non_members,
|
||||
synthetic_data,
|
||||
dataset_name,
|
||||
config_gl)
|
||||
|
||||
result = mgr.assess_privacy()
|
||||
score_g = mgr.calculate_privacy_score(result, generate_plot=self.config.generate_plots)
|
||||
self.gan_leaks_attack_scores.append(score_g)
|
||||
self.attack_scores_per_record_knn_probabilities.append(score_g)
|
||||
|
||||
config_h = DatasetAttackHoldoutConfig(use_batches=False, k=5)
|
||||
mgr_h = DatasetAttackHoldout(original_data_members, original_data_non_members, synthetic_data,
|
||||
dataset_name,
|
||||
config_h)
|
||||
config_h = DatasetAttackConfigWholeDatasetKnnDistance(use_batches=False, k=5)
|
||||
mgr_h = DatasetAttackWholeDatasetKnnDistance(original_data_members, original_data_non_members, synthetic_data,
|
||||
dataset_name,
|
||||
config_h)
|
||||
|
||||
score_h = mgr_h.assess_privacy()
|
||||
self.holdout_attack_scores.append(score_h)
|
||||
self.attack_scores_whole_dataset_knn_distance.append(score_h)
|
||||
return score_g, score_h
|
||||
|
||||
def dump_all_scores_to_files(self):
|
||||
if self.config.persist_reports:
|
||||
results_log_file = "_results.log.csv"
|
||||
self.dump_scores_to_file(self.gan_leaks_attack_scores, "gan_leaks" + results_log_file, True)
|
||||
self.dump_scores_to_file(self.holdout_attack_scores, "holdout" + results_log_file, True)
|
||||
self.dump_scores_to_file(self.attack_scores_per_record_knn_probabilities,
|
||||
"per_record_knn_probabilities" + results_log_file, True)
|
||||
self.dump_scores_to_file(self.attack_scores_whole_dataset_knn_distance,
|
||||
"whole_dataset_knn_distance" + results_log_file, True)
|
||||
|
||||
@staticmethod
|
||||
def dump_scores_to_file(attack_scores, filename, header: bool):
|
||||
|
|
|
|||
|
|
@ -16,8 +16,8 @@ from apt.utils.datasets import ArrayDataset
|
|||
|
||||
|
||||
@dataclass
|
||||
class DatasetAttackHoldoutConfig(Config):
|
||||
"""Configuration for DatasetAttackHoldout.
|
||||
class DatasetAttackConfigWholeDatasetKnnDistance(Config):
|
||||
"""Configuration for DatasetAttackWholeDatasetKnnDistance.
|
||||
|
||||
Attributes:
|
||||
k: Number of nearest neighbors to search
|
||||
|
|
@ -37,18 +37,18 @@ class DatasetAttackHoldoutConfig(Config):
|
|||
|
||||
|
||||
@dataclass
|
||||
class DatasetAttackScoreHoldout(DatasetAttackScore):
|
||||
"""Configuration for DatasetAttackHoldout.
|
||||
class DatasetAttackScoreWholeDatasetKnnDistance(DatasetAttackScore):
|
||||
"""Configuration for DatasetAttackWholeDatasetKnnDistance.
|
||||
Attributes
|
||||
----------
|
||||
share : the share of synthetic records closer to the training than the holdout dataset
|
||||
assessment_type : assessment type is 'Holdout', to be used in reports
|
||||
assessment_type : assessment type is 'WholeDatasetKnnDistance', to be used in reports
|
||||
"""
|
||||
share: float
|
||||
assessment_type: str = 'Holdout'
|
||||
assessment_type: str = 'WholeDatasetKnnDistance'
|
||||
|
||||
|
||||
class DatasetAttackHoldout(DatasetAttackWhole):
|
||||
class DatasetAttackWholeDatasetKnnDistance(DatasetAttackWhole):
|
||||
"""
|
||||
Privacy risk assessment for synthetic datasets based on distances of synthetic data records from
|
||||
members (training set) and non-members (holdout set). The privacy risk measure is the share of synthetic
|
||||
|
|
@ -57,7 +57,7 @@ class DatasetAttackHoldout(DatasetAttackWhole):
|
|||
|
||||
def __init__(self, original_data_members: ArrayDataset, original_data_non_members: ArrayDataset,
|
||||
synthetic_data: ArrayDataset, dataset_name: str,
|
||||
config: Optional[DatasetAttackHoldoutConfig] = DatasetAttackHoldoutConfig()):
|
||||
config: Optional[DatasetAttackConfigWholeDatasetKnnDistance] = DatasetAttackConfigWholeDatasetKnnDistance()):
|
||||
"""
|
||||
:param original_data_members: A container for the training original samples and labels
|
||||
:param original_data_non_members: A container for the holdout original samples and labels
|
||||
|
|
@ -80,7 +80,7 @@ class DatasetAttackHoldout(DatasetAttackWhole):
|
|||
self.nn_obj_members = NearestNeighbors(n_neighbors=config.k, algorithm='auto')
|
||||
self.nn_obj_non_members = NearestNeighbors(n_neighbors=config.k, algorithm='auto')
|
||||
|
||||
def assess_privacy(self) -> DatasetAttackScoreHoldout:
|
||||
def assess_privacy(self) -> DatasetAttackScoreWholeDatasetKnnDistance:
|
||||
"""
|
||||
Calculate the share of synthetic records closer to the training than the holdout dataset
|
||||
:return:
|
||||
|
|
@ -94,7 +94,7 @@ class DatasetAttackHoldout(DatasetAttackWhole):
|
|||
# and half those whose distance is similar to members and non-members
|
||||
share = np.mean(member_distances < non_member_distances) + 0.5 * np.mean(
|
||||
member_distances == non_member_distances)
|
||||
score = DatasetAttackScoreHoldout(self.dataset_name, share=share)
|
||||
score = DatasetAttackScoreWholeDatasetKnnDistance(self.dataset_name, share=share)
|
||||
return score
|
||||
|
||||
def calculate_distances(self):
|
||||
|
|
@ -17,8 +17,8 @@ from apt.utils.datasets import ArrayDataset
|
|||
|
||||
|
||||
@dataclass
|
||||
class DatasetAttackGanLeaksConfig(Config):
|
||||
"""Configuration for DatasetAttackGanLeaks.
|
||||
class DatasetAttackConfigPerRecordKnnProbabilities(Config):
|
||||
"""Configuration for DatasetAttackPerRecordKnnProbabilities.
|
||||
|
||||
Attributes:
|
||||
k: Number of nearest neighbors to search
|
||||
|
|
@ -38,20 +38,21 @@ class DatasetAttackGanLeaksConfig(Config):
|
|||
|
||||
|
||||
@dataclass
|
||||
class DatasetAttackScoreGanLeaks(DatasetAttackScore):
|
||||
"""DatasetAttackGanLeaks privacy score.
|
||||
class DatasetAttackScorePerRecordKnnProbabilities(DatasetAttackScore):
|
||||
"""DatasetAttackPerRecordKnnProbabilities privacy score.
|
||||
Attributes
|
||||
----------
|
||||
roc_auc_score : the area under the receiver operating characteristic curve (AUC ROC) to evaluate the attack performance.
|
||||
roc_auc_score : the area under the receiver operating characteristic curve (AUC ROC) to evaluate the attack
|
||||
performance.
|
||||
average_precision_score: the proportion of Predicted Positive cases that are correctly Real Positives (members)
|
||||
assessment_type : assessment type is 'GANLeaks', to be used in reports
|
||||
assessment_type : assessment type is 'PerRecordKnnProbabilities', to be used in reports
|
||||
"""
|
||||
roc_auc_score: float
|
||||
average_precision_score: float
|
||||
assessment_type: str = 'GANLeaks'
|
||||
assessment_type: str = 'PerRecordKnnProbabilities'
|
||||
|
||||
|
||||
class DatasetAttackGanLeaks(DatasetAttackPerRecord):
|
||||
class DatasetAttackPerRecordKnnProbabilities(DatasetAttackPerRecord):
|
||||
"""
|
||||
Privacy risk assessment for synthetic datasets based on Black-Box MIA attack using distances of
|
||||
members (training set) and non-members (holdout set) from their nearest neighbors in the synthetic dataset.
|
||||
|
|
@ -60,7 +61,8 @@ class DatasetAttackGanLeaks(DatasetAttackPerRecord):
|
|||
|
||||
def __init__(self, original_data_members: ArrayDataset, original_data_non_members: ArrayDataset,
|
||||
synthetic_data: ArrayDataset, dataset_name: str,
|
||||
config: Optional[DatasetAttackGanLeaksConfig] = DatasetAttackGanLeaksConfig()):
|
||||
config: Optional[
|
||||
DatasetAttackConfigPerRecordKnnProbabilities] = DatasetAttackConfigPerRecordKnnProbabilities()):
|
||||
"""
|
||||
:param original_data_members: A container for the training original samples and labels
|
||||
:param original_data_non_members: A container for the holdout original samples and labels
|
||||
|
|
@ -122,7 +124,8 @@ class DatasetAttackGanLeaks(DatasetAttackPerRecord):
|
|||
pos_proba, neg_proba = \
|
||||
dataset_attack_result.positive_probabilities, dataset_attack_result.negative_probabilities
|
||||
fpr, tpr, threshold, auc, ap = self.calculate_metrics(pos_proba, neg_proba)
|
||||
score = DatasetAttackScoreGanLeaks(self.dataset_name, roc_auc_score=auc, average_precision_score=ap)
|
||||
score = DatasetAttackScorePerRecordKnnProbabilities(self.dataset_name, roc_auc_score=auc,
|
||||
average_precision_score=ap)
|
||||
if generate_plot:
|
||||
self.plot_roc_curve(pos_proba, neg_proba)
|
||||
return score
|
||||
Loading…
Add table
Add a link
Reference in a new issue