mirror of
https://github.com/IBM/ai-privacy-toolkit.git
synced 2026-04-26 05:16:22 +02:00
Increase version to 0.2.0 (#74)
* Remove tensorflow dependency if not using keras model * Remove xgboost dependency if not using xgboost model * Documentation updates Signed-off-by: abigailt <abigailt@il.ibm.com>
This commit is contained in:
parent
782edabd58
commit
8a9ef80146
25 changed files with 306 additions and 152 deletions
|
|
@ -16,32 +16,30 @@ from apt.utils.datasets import ArrayDataset
|
|||
|
||||
class Config(abc.ABC):
|
||||
"""
|
||||
The base class for dataset attack configurations
|
||||
The base class for dataset attack configurations
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
class DatasetAttack(abc.ABC):
|
||||
"""
|
||||
The interface for performing privacy attack for risk assessment of synthetic datasets to be used in AI model
|
||||
training. The original data members (training data) and non-members (the holdout data) should be available.
|
||||
For reliability, all the datasets should be preprocessed and normalized.
|
||||
The interface for performing privacy attack for risk assessment of synthetic datasets to be used in AI model
|
||||
training. The original data members (training data) and non-members (the holdout data) should be available.
|
||||
For reliability, all the datasets should be preprocessed and normalized.
|
||||
|
||||
:param original_data_members: A container for the training original samples and labels,
|
||||
only samples are used in the assessment
|
||||
:param original_data_non_members: A container for the holdout original samples and labels,
|
||||
only samples are used in the assessment
|
||||
:param synthetic_data: A container for the synthetic samples and labels, only samples are used in the assessment
|
||||
:param config: Configuration parameters to guide the assessment process
|
||||
:param dataset_name: A name to identify the dataset under attack, optional
|
||||
:param attack_strategy_utils: Utils for use with the attack strategy, optional
|
||||
"""
|
||||
|
||||
def __init__(self, original_data_members: ArrayDataset, original_data_non_members: ArrayDataset,
|
||||
synthetic_data: ArrayDataset, config: Config, dataset_name: str,
|
||||
attack_strategy_utils: Optional[AttackStrategyUtils] = None) -> None:
|
||||
"""
|
||||
:param original_data_members: A container for the training original samples and labels,
|
||||
only samples are used in the assessment
|
||||
:param original_data_non_members: A container for the holdout original samples and labels,
|
||||
only samples are used in the assessment
|
||||
:param synthetic_data: A container for the synthetic samples and labels, only samples are used in the assessment
|
||||
:param config: Configuration parameters to guide the assessment process
|
||||
:param dataset_name: A name to identify the dataset under attack, optional
|
||||
:param attack_strategy_utils: Utils for use with the attack strategy, optional
|
||||
"""
|
||||
|
||||
self.original_data_members = original_data_members
|
||||
self.original_data_non_members = original_data_non_members
|
||||
self.synthetic_data = synthetic_data
|
||||
|
|
@ -52,7 +50,8 @@ class DatasetAttack(abc.ABC):
|
|||
@abc.abstractmethod
|
||||
def assess_privacy(self) -> DatasetAttackScore:
|
||||
"""
|
||||
Assess the privacy of the dataset
|
||||
Assess the privacy of the dataset.
|
||||
|
||||
:return:
|
||||
score: DatasetAttackScore the privacy attack risk score
|
||||
"""
|
||||
|
|
@ -61,14 +60,15 @@ class DatasetAttack(abc.ABC):
|
|||
|
||||
class DatasetAttackMembership(DatasetAttack):
|
||||
"""
|
||||
An abstract base class for performing privacy risk assessment for synthetic datasets on a per-record level.
|
||||
An abstract base class for performing privacy risk assessment for synthetic datasets on a per-record level.
|
||||
"""
|
||||
|
||||
@abc.abstractmethod
|
||||
def calculate_privacy_score(self, dataset_attack_result: DatasetAttackResultMembership,
|
||||
generate_plot: bool = False) -> DatasetAttackScore:
|
||||
"""
|
||||
Calculate dataset privacy score based on the result of the privacy attack
|
||||
Calculate dataset privacy score based on the result of the privacy attack.
|
||||
|
||||
:return:
|
||||
score: DatasetAttackScore
|
||||
"""
|
||||
|
|
@ -78,15 +78,16 @@ class DatasetAttackMembership(DatasetAttack):
|
|||
def plot_roc_curve(dataset_name: str, member_probabilities: np.ndarray, non_member_probabilities: np.ndarray,
|
||||
filename_prefix: str = ""):
|
||||
"""
|
||||
Plot ROC curve
|
||||
:param dataset_name: dataset name, will become part of the plot filename
|
||||
:param member_probabilities: probability estimates of the member samples, the training data
|
||||
:param non_member_probabilities: probability estimates of the non-member samples, the hold-out data
|
||||
:param filename_prefix: name prefix for the ROC curve plot
|
||||
Plot ROC curve.
|
||||
|
||||
:param dataset_name: dataset name, will become part of the plot filename.
|
||||
:param member_probabilities: probability estimates of the member samples, the training data.
|
||||
:param non_member_probabilities: probability estimates of the non-member samples, the hold-out data.
|
||||
:param filename_prefix: name prefix for the ROC curve plot.
|
||||
"""
|
||||
labels = np.concatenate((np.zeros((len(non_member_probabilities),)), np.ones((len(member_probabilities),))))
|
||||
results = np.concatenate((non_member_probabilities, member_probabilities))
|
||||
svc_disp = RocCurveDisplay.from_predictions(labels, results)
|
||||
RocCurveDisplay.from_predictions(labels, results)
|
||||
plt.plot([0, 1], [0, 1], color="navy", linewidth=2, linestyle="--", label='No skills')
|
||||
plt.title('ROC curve')
|
||||
plt.savefig(f'{filename_prefix}{dataset_name}_roc_curve.png')
|
||||
|
|
@ -94,9 +95,10 @@ class DatasetAttackMembership(DatasetAttack):
|
|||
@staticmethod
|
||||
def calculate_metrics(member_probabilities: np.ndarray, non_member_probabilities: np.ndarray):
|
||||
"""
|
||||
Calculate attack performance metrics
|
||||
:param member_probabilities: probability estimates of the member samples, the training data
|
||||
:param non_member_probabilities: probability estimates of the non-member samples, the hold-out data
|
||||
Calculate attack performance metrics.
|
||||
|
||||
:param member_probabilities: probability estimates of the member samples, the training data.
|
||||
:param non_member_probabilities: probability estimates of the non-member samples, the hold-out data.
|
||||
:return:
|
||||
fpr: False Positive rate
|
||||
tpr: True Positive rate
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue