Increase version to 0.2.0 (#74)

* Remove tensorflow dependency if not using keras model
* Remove xgboost dependency if not using xgboost model
* Documentation updates

Signed-off-by: abigailt <abigailt@il.ibm.com>
This commit is contained in:
abigailgold 2023-05-08 12:50:55 +03:00 committed by GitHub
parent 782edabd58
commit 8a9ef80146
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
25 changed files with 306 additions and 152 deletions

View file

@ -16,32 +16,30 @@ from apt.utils.datasets import ArrayDataset
class Config(abc.ABC):
"""
The base class for dataset attack configurations
The base class for dataset attack configurations
"""
pass
class DatasetAttack(abc.ABC):
"""
The interface for performing privacy attack for risk assessment of synthetic datasets to be used in AI model
training. The original data members (training data) and non-members (the holdout data) should be available.
For reliability, all the datasets should be preprocessed and normalized.
The interface for performing privacy attack for risk assessment of synthetic datasets to be used in AI model
training. The original data members (training data) and non-members (the holdout data) should be available.
For reliability, all the datasets should be preprocessed and normalized.
:param original_data_members: A container for the training original samples and labels,
only samples are used in the assessment
:param original_data_non_members: A container for the holdout original samples and labels,
only samples are used in the assessment
:param synthetic_data: A container for the synthetic samples and labels, only samples are used in the assessment
:param config: Configuration parameters to guide the assessment process
:param dataset_name: A name to identify the dataset under attack, optional
:param attack_strategy_utils: Utils for use with the attack strategy, optional
"""
def __init__(self, original_data_members: ArrayDataset, original_data_non_members: ArrayDataset,
synthetic_data: ArrayDataset, config: Config, dataset_name: str,
attack_strategy_utils: Optional[AttackStrategyUtils] = None) -> None:
"""
:param original_data_members: A container for the training original samples and labels,
only samples are used in the assessment
:param original_data_non_members: A container for the holdout original samples and labels,
only samples are used in the assessment
:param synthetic_data: A container for the synthetic samples and labels, only samples are used in the assessment
:param config: Configuration parameters to guide the assessment process
:param dataset_name: A name to identify the dataset under attack, optional
:param attack_strategy_utils: Utils for use with the attack strategy, optional
"""
self.original_data_members = original_data_members
self.original_data_non_members = original_data_non_members
self.synthetic_data = synthetic_data
@ -52,7 +50,8 @@ class DatasetAttack(abc.ABC):
@abc.abstractmethod
def assess_privacy(self) -> DatasetAttackScore:
"""
Assess the privacy of the dataset
Assess the privacy of the dataset.
:return:
score: DatasetAttackScore the privacy attack risk score
"""
@ -61,14 +60,15 @@ class DatasetAttack(abc.ABC):
class DatasetAttackMembership(DatasetAttack):
"""
An abstract base class for performing privacy risk assessment for synthetic datasets on a per-record level.
An abstract base class for performing privacy risk assessment for synthetic datasets on a per-record level.
"""
@abc.abstractmethod
def calculate_privacy_score(self, dataset_attack_result: DatasetAttackResultMembership,
generate_plot: bool = False) -> DatasetAttackScore:
"""
Calculate dataset privacy score based on the result of the privacy attack
Calculate dataset privacy score based on the result of the privacy attack.
:return:
score: DatasetAttackScore
"""
@ -78,15 +78,16 @@ class DatasetAttackMembership(DatasetAttack):
def plot_roc_curve(dataset_name: str, member_probabilities: np.ndarray, non_member_probabilities: np.ndarray,
filename_prefix: str = ""):
"""
Plot ROC curve
:param dataset_name: dataset name, will become part of the plot filename
:param member_probabilities: probability estimates of the member samples, the training data
:param non_member_probabilities: probability estimates of the non-member samples, the hold-out data
:param filename_prefix: name prefix for the ROC curve plot
Plot ROC curve.
:param dataset_name: dataset name, will become part of the plot filename.
:param member_probabilities: probability estimates of the member samples, the training data.
:param non_member_probabilities: probability estimates of the non-member samples, the hold-out data.
:param filename_prefix: name prefix for the ROC curve plot.
"""
labels = np.concatenate((np.zeros((len(non_member_probabilities),)), np.ones((len(member_probabilities),))))
results = np.concatenate((non_member_probabilities, member_probabilities))
svc_disp = RocCurveDisplay.from_predictions(labels, results)
RocCurveDisplay.from_predictions(labels, results)
plt.plot([0, 1], [0, 1], color="navy", linewidth=2, linestyle="--", label='No skills')
plt.title('ROC curve')
plt.savefig(f'{filename_prefix}{dataset_name}_roc_curve.png')
@ -94,9 +95,10 @@ class DatasetAttackMembership(DatasetAttack):
@staticmethod
def calculate_metrics(member_probabilities: np.ndarray, non_member_probabilities: np.ndarray):
"""
Calculate attack performance metrics
:param member_probabilities: probability estimates of the member samples, the training data
:param non_member_probabilities: probability estimates of the non-member samples, the hold-out data
Calculate attack performance metrics.
:param member_probabilities: probability estimates of the member samples, the training data.
:param non_member_probabilities: probability estimates of the non-member samples, the hold-out data.
:return:
fpr: False Positive rate
tpr: True Positive rate