mirror of
https://github.com/IBM/ai-privacy-toolkit.git
synced 2026-06-02 14:45:13 +02:00
Increase version to 0.2.0 (#74)
* Remove tensorflow dependency if not using keras model * Remove xgboost dependency if not using xgboost model * Documentation updates Signed-off-by: abigailt <abigailt@il.ibm.com>
This commit is contained in:
parent
98a7a078bb
commit
04d8df8091
25 changed files with 306 additions and 152 deletions
|
|
@ -6,4 +6,4 @@ from apt import anonymization
|
||||||
from apt import minimization
|
from apt import minimization
|
||||||
from apt import utils
|
from apt import utils
|
||||||
|
|
||||||
__version__ = "0.1.0"
|
__version__ = "0.2.0"
|
||||||
|
|
|
||||||
1
apt/risk/__init__.py
Normal file
1
apt/risk/__init__.py
Normal file
|
|
@ -0,0 +1 @@
|
||||||
|
|
||||||
|
|
@ -9,21 +9,20 @@ from apt.utils.datasets import ArrayDataset
|
||||||
|
|
||||||
class AttackStrategyUtils(abc.ABC):
|
class AttackStrategyUtils(abc.ABC):
|
||||||
"""
|
"""
|
||||||
Abstract base class for common utilities of various privacy attack strategies.
|
Abstract base class for common utilities of various privacy attack strategies.
|
||||||
"""
|
"""
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
class KNNAttackStrategyUtils(AttackStrategyUtils):
|
class KNNAttackStrategyUtils(AttackStrategyUtils):
|
||||||
"""
|
"""
|
||||||
Common utilities for attack strategy based on KNN distances.
|
Common utilities for attack strategy based on KNN distances.
|
||||||
|
|
||||||
|
:param use_batches: Use batches with a progress meter or not when finding KNNs for query set.
|
||||||
|
:param batch_size: if use_batches=True, the size of batch_size should be > 0.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, use_batches: bool = False, batch_size: int = 10) -> None:
|
def __init__(self, use_batches: bool = False, batch_size: int = 10) -> None:
|
||||||
"""
|
|
||||||
:param use_batches: Use batches with a progress meter or not when finding KNNs for query set
|
|
||||||
:param batch_size: if use_batches=True, the size of batch_size should be > 0
|
|
||||||
"""
|
|
||||||
self.use_batches = use_batches
|
self.use_batches = use_batches
|
||||||
self.batch_size = batch_size
|
self.batch_size = batch_size
|
||||||
if use_batches:
|
if use_batches:
|
||||||
|
|
@ -31,11 +30,18 @@ class KNNAttackStrategyUtils(AttackStrategyUtils):
|
||||||
raise ValueError(f"When using batching batch_size should be > 0, and not {batch_size}")
|
raise ValueError(f"When using batching batch_size should be > 0, and not {batch_size}")
|
||||||
|
|
||||||
def fit(self, knn_learner: NearestNeighbors, dataset: ArrayDataset):
|
def fit(self, knn_learner: NearestNeighbors, dataset: ArrayDataset):
|
||||||
|
"""
|
||||||
|
Fit the KNN learner.
|
||||||
|
|
||||||
|
:param knn_learner: The KNN model to fit.
|
||||||
|
:param dataset: The training set to fit the model on.
|
||||||
|
"""
|
||||||
knn_learner.fit(dataset.get_samples())
|
knn_learner.fit(dataset.get_samples())
|
||||||
|
|
||||||
def find_knn(self, knn_learner: NearestNeighbors, query_samples: ArrayDataset, distance_processor=None):
|
def find_knn(self, knn_learner: NearestNeighbors, query_samples: ArrayDataset, distance_processor=None):
|
||||||
"""
|
"""
|
||||||
Nearest neighbor search function.
|
Nearest neighbor search function.
|
||||||
|
|
||||||
:param query_samples: query samples, to which nearest neighbors are to be found
|
:param query_samples: query samples, to which nearest neighbors are to be found
|
||||||
:param knn_learner: unsupervised learner for implementing neighbor searches, after it was fitted
|
:param knn_learner: unsupervised learner for implementing neighbor searches, after it was fitted
|
||||||
:param distance_processor: function for processing the distance into another more relevant metric per sample.
|
:param distance_processor: function for processing the distance into another more relevant metric per sample.
|
||||||
|
|
|
||||||
|
|
@ -15,6 +15,12 @@ from apt.utils.datasets import ArrayDataset
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class DatasetAssessmentManagerConfig:
|
class DatasetAssessmentManagerConfig:
|
||||||
|
"""
|
||||||
|
Configuration for DatasetAssessmentManager.
|
||||||
|
|
||||||
|
:param persist_reports: Whether to save assessment results to filesystem.
|
||||||
|
:param generate_plots: Whether to generate and visualize plots as part of assessment.
|
||||||
|
"""
|
||||||
persist_reports: bool = False
|
persist_reports: bool = False
|
||||||
generate_plots: bool = False
|
generate_plots: bool = False
|
||||||
|
|
||||||
|
|
@ -22,14 +28,13 @@ class DatasetAssessmentManagerConfig:
|
||||||
class DatasetAssessmentManager:
|
class DatasetAssessmentManager:
|
||||||
"""
|
"""
|
||||||
The main class for running dataset assessment attacks.
|
The main class for running dataset assessment attacks.
|
||||||
|
|
||||||
|
:param config: Configuration parameters to guide the dataset assessment process
|
||||||
"""
|
"""
|
||||||
attack_scores_per_record_knn_probabilities: list[DatasetAttackScore] = []
|
attack_scores_per_record_knn_probabilities: list[DatasetAttackScore] = []
|
||||||
attack_scores_whole_dataset_knn_distance: list[DatasetAttackScore] = []
|
attack_scores_whole_dataset_knn_distance: list[DatasetAttackScore] = []
|
||||||
|
|
||||||
def __init__(self, config: Optional[DatasetAssessmentManagerConfig] = DatasetAssessmentManagerConfig) -> None:
|
def __init__(self, config: Optional[DatasetAssessmentManagerConfig] = DatasetAssessmentManagerConfig) -> None:
|
||||||
"""
|
|
||||||
:param config: Configuration parameters to guide the dataset assessment process
|
|
||||||
"""
|
|
||||||
self.config = config
|
self.config = config
|
||||||
|
|
||||||
def assess(self, original_data_members: ArrayDataset, original_data_non_members: ArrayDataset,
|
def assess(self, original_data_members: ArrayDataset, original_data_non_members: ArrayDataset,
|
||||||
|
|
@ -67,14 +72,17 @@ class DatasetAssessmentManager:
|
||||||
return [score_gl, score_h]
|
return [score_gl, score_h]
|
||||||
|
|
||||||
def dump_all_scores_to_files(self):
|
def dump_all_scores_to_files(self):
|
||||||
|
"""
|
||||||
|
Save assessment results to filesystem.
|
||||||
|
"""
|
||||||
if self.config.persist_reports:
|
if self.config.persist_reports:
|
||||||
results_log_file = "_results.log.csv"
|
results_log_file = "_results.log.csv"
|
||||||
self.dump_scores_to_file(self.attack_scores_per_record_knn_probabilities,
|
self._dump_scores_to_file(self.attack_scores_per_record_knn_probabilities,
|
||||||
"per_record_knn_probabilities" + results_log_file, True)
|
"per_record_knn_probabilities" + results_log_file, True)
|
||||||
self.dump_scores_to_file(self.attack_scores_whole_dataset_knn_distance,
|
self._dump_scores_to_file(self.attack_scores_whole_dataset_knn_distance,
|
||||||
"whole_dataset_knn_distance" + results_log_file, True)
|
"whole_dataset_knn_distance" + results_log_file, True)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def dump_scores_to_file(attack_scores: list[DatasetAttackScore], filename: str, header: bool):
|
def _dump_scores_to_file(attack_scores: list[DatasetAttackScore], filename: str, header: bool):
|
||||||
run_results_df = pd.DataFrame(attack_scores).drop('result', axis=1, errors='ignore') # don't serialize result
|
run_results_df = pd.DataFrame(attack_scores).drop('result', axis=1, errors='ignore') # don't serialize result
|
||||||
run_results_df.to_csv(filename, header=header, encoding='utf-8', index=False, mode='w') # Overwrite
|
run_results_df.to_csv(filename, header=header, encoding='utf-8', index=False, mode='w') # Overwrite
|
||||||
|
|
|
||||||
|
|
@ -16,32 +16,30 @@ from apt.utils.datasets import ArrayDataset
|
||||||
|
|
||||||
class Config(abc.ABC):
|
class Config(abc.ABC):
|
||||||
"""
|
"""
|
||||||
The base class for dataset attack configurations
|
The base class for dataset attack configurations
|
||||||
"""
|
"""
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
class DatasetAttack(abc.ABC):
|
class DatasetAttack(abc.ABC):
|
||||||
"""
|
"""
|
||||||
The interface for performing privacy attack for risk assessment of synthetic datasets to be used in AI model
|
The interface for performing privacy attack for risk assessment of synthetic datasets to be used in AI model
|
||||||
training. The original data members (training data) and non-members (the holdout data) should be available.
|
training. The original data members (training data) and non-members (the holdout data) should be available.
|
||||||
For reliability, all the datasets should be preprocessed and normalized.
|
For reliability, all the datasets should be preprocessed and normalized.
|
||||||
|
|
||||||
|
:param original_data_members: A container for the training original samples and labels,
|
||||||
|
only samples are used in the assessment
|
||||||
|
:param original_data_non_members: A container for the holdout original samples and labels,
|
||||||
|
only samples are used in the assessment
|
||||||
|
:param synthetic_data: A container for the synthetic samples and labels, only samples are used in the assessment
|
||||||
|
:param config: Configuration parameters to guide the assessment process
|
||||||
|
:param dataset_name: A name to identify the dataset under attack, optional
|
||||||
|
:param attack_strategy_utils: Utils for use with the attack strategy, optional
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, original_data_members: ArrayDataset, original_data_non_members: ArrayDataset,
|
def __init__(self, original_data_members: ArrayDataset, original_data_non_members: ArrayDataset,
|
||||||
synthetic_data: ArrayDataset, config: Config, dataset_name: str,
|
synthetic_data: ArrayDataset, config: Config, dataset_name: str,
|
||||||
attack_strategy_utils: Optional[AttackStrategyUtils] = None) -> None:
|
attack_strategy_utils: Optional[AttackStrategyUtils] = None) -> None:
|
||||||
"""
|
|
||||||
:param original_data_members: A container for the training original samples and labels,
|
|
||||||
only samples are used in the assessment
|
|
||||||
:param original_data_non_members: A container for the holdout original samples and labels,
|
|
||||||
only samples are used in the assessment
|
|
||||||
:param synthetic_data: A container for the synthetic samples and labels, only samples are used in the assessment
|
|
||||||
:param config: Configuration parameters to guide the assessment process
|
|
||||||
:param dataset_name: A name to identify the dataset under attack, optional
|
|
||||||
:param attack_strategy_utils: Utils for use with the attack strategy, optional
|
|
||||||
"""
|
|
||||||
|
|
||||||
self.original_data_members = original_data_members
|
self.original_data_members = original_data_members
|
||||||
self.original_data_non_members = original_data_non_members
|
self.original_data_non_members = original_data_non_members
|
||||||
self.synthetic_data = synthetic_data
|
self.synthetic_data = synthetic_data
|
||||||
|
|
@ -52,7 +50,8 @@ class DatasetAttack(abc.ABC):
|
||||||
@abc.abstractmethod
|
@abc.abstractmethod
|
||||||
def assess_privacy(self) -> DatasetAttackScore:
|
def assess_privacy(self) -> DatasetAttackScore:
|
||||||
"""
|
"""
|
||||||
Assess the privacy of the dataset
|
Assess the privacy of the dataset.
|
||||||
|
|
||||||
:return:
|
:return:
|
||||||
score: DatasetAttackScore the privacy attack risk score
|
score: DatasetAttackScore the privacy attack risk score
|
||||||
"""
|
"""
|
||||||
|
|
@ -61,14 +60,15 @@ class DatasetAttack(abc.ABC):
|
||||||
|
|
||||||
class DatasetAttackMembership(DatasetAttack):
|
class DatasetAttackMembership(DatasetAttack):
|
||||||
"""
|
"""
|
||||||
An abstract base class for performing privacy risk assessment for synthetic datasets on a per-record level.
|
An abstract base class for performing privacy risk assessment for synthetic datasets on a per-record level.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
@abc.abstractmethod
|
@abc.abstractmethod
|
||||||
def calculate_privacy_score(self, dataset_attack_result: DatasetAttackResultMembership,
|
def calculate_privacy_score(self, dataset_attack_result: DatasetAttackResultMembership,
|
||||||
generate_plot: bool = False) -> DatasetAttackScore:
|
generate_plot: bool = False) -> DatasetAttackScore:
|
||||||
"""
|
"""
|
||||||
Calculate dataset privacy score based on the result of the privacy attack
|
Calculate dataset privacy score based on the result of the privacy attack.
|
||||||
|
|
||||||
:return:
|
:return:
|
||||||
score: DatasetAttackScore
|
score: DatasetAttackScore
|
||||||
"""
|
"""
|
||||||
|
|
@ -78,15 +78,16 @@ class DatasetAttackMembership(DatasetAttack):
|
||||||
def plot_roc_curve(dataset_name: str, member_probabilities: np.ndarray, non_member_probabilities: np.ndarray,
|
def plot_roc_curve(dataset_name: str, member_probabilities: np.ndarray, non_member_probabilities: np.ndarray,
|
||||||
filename_prefix: str = ""):
|
filename_prefix: str = ""):
|
||||||
"""
|
"""
|
||||||
Plot ROC curve
|
Plot ROC curve.
|
||||||
:param dataset_name: dataset name, will become part of the plot filename
|
|
||||||
:param member_probabilities: probability estimates of the member samples, the training data
|
:param dataset_name: dataset name, will become part of the plot filename.
|
||||||
:param non_member_probabilities: probability estimates of the non-member samples, the hold-out data
|
:param member_probabilities: probability estimates of the member samples, the training data.
|
||||||
:param filename_prefix: name prefix for the ROC curve plot
|
:param non_member_probabilities: probability estimates of the non-member samples, the hold-out data.
|
||||||
|
:param filename_prefix: name prefix for the ROC curve plot.
|
||||||
"""
|
"""
|
||||||
labels = np.concatenate((np.zeros((len(non_member_probabilities),)), np.ones((len(member_probabilities),))))
|
labels = np.concatenate((np.zeros((len(non_member_probabilities),)), np.ones((len(member_probabilities),))))
|
||||||
results = np.concatenate((non_member_probabilities, member_probabilities))
|
results = np.concatenate((non_member_probabilities, member_probabilities))
|
||||||
svc_disp = RocCurveDisplay.from_predictions(labels, results)
|
RocCurveDisplay.from_predictions(labels, results)
|
||||||
plt.plot([0, 1], [0, 1], color="navy", linewidth=2, linestyle="--", label='No skills')
|
plt.plot([0, 1], [0, 1], color="navy", linewidth=2, linestyle="--", label='No skills')
|
||||||
plt.title('ROC curve')
|
plt.title('ROC curve')
|
||||||
plt.savefig(f'{filename_prefix}{dataset_name}_roc_curve.png')
|
plt.savefig(f'{filename_prefix}{dataset_name}_roc_curve.png')
|
||||||
|
|
@ -94,9 +95,10 @@ class DatasetAttackMembership(DatasetAttack):
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def calculate_metrics(member_probabilities: np.ndarray, non_member_probabilities: np.ndarray):
|
def calculate_metrics(member_probabilities: np.ndarray, non_member_probabilities: np.ndarray):
|
||||||
"""
|
"""
|
||||||
Calculate attack performance metrics
|
Calculate attack performance metrics.
|
||||||
:param member_probabilities: probability estimates of the member samples, the training data
|
|
||||||
:param non_member_probabilities: probability estimates of the non-member samples, the hold-out data
|
:param member_probabilities: probability estimates of the member samples, the training data.
|
||||||
|
:param non_member_probabilities: probability estimates of the non-member samples, the hold-out data.
|
||||||
:return:
|
:return:
|
||||||
fpr: False Positive rate
|
fpr: False Positive rate
|
||||||
tpr: True Positive rate
|
tpr: True Positive rate
|
||||||
|
|
|
||||||
|
|
@ -19,18 +19,18 @@ from apt.utils.datasets import ArrayDataset
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class DatasetAttackConfigMembershipKnnProbabilities(Config):
|
class DatasetAttackConfigMembershipKnnProbabilities(Config):
|
||||||
"""Configuration for DatasetAttackMembershipKnnProbabilities.
|
"""
|
||||||
|
Configuration for DatasetAttackMembershipKnnProbabilities.
|
||||||
|
|
||||||
Attributes:
|
:param k: Number of nearest neighbors to search.
|
||||||
k: Number of nearest neighbors to search
|
:param use_batches: Divide query samples into batches or not.
|
||||||
use_batches: Divide query samples into batches or not.
|
:param batch_size: Query sample batch size.
|
||||||
batch_size: Query sample batch size.
|
:param compute_distance: A callable function, which takes two arrays representing 1D vectors as inputs and must
|
||||||
compute_distance: A callable function, which takes two arrays representing 1D vectors as inputs and must return
|
return one value indicating the distance between those vectors.
|
||||||
one value indicating the distance between those vectors.
|
See 'metric' parameter in sklearn.neighbors.NearestNeighbors documentation.
|
||||||
See 'metric' parameter in sklearn.neighbors.NearestNeighbors documentation.
|
:param distance_params: Additional keyword arguments for the distance computation function, see 'metric_params' in
|
||||||
distance_params: Additional keyword arguments for the distance computation function, see 'metric_params' in
|
sklearn.neighbors.NearestNeighbors documentation.
|
||||||
sklearn.neighbors.NearestNeighbors documentation.
|
:param generate_plot: Generate or not an AUR ROC curve and persist it in a file.
|
||||||
generate_plot: Generate or not an AUR ROC curve and persist it in a file
|
|
||||||
"""
|
"""
|
||||||
k: int = 5
|
k: int = 5
|
||||||
use_batches: bool = False
|
use_batches: bool = False
|
||||||
|
|
@ -42,7 +42,14 @@ class DatasetAttackConfigMembershipKnnProbabilities(Config):
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class DatasetAttackScoreMembershipKnnProbabilities(DatasetAttackScore):
|
class DatasetAttackScoreMembershipKnnProbabilities(DatasetAttackScore):
|
||||||
"""DatasetAttackMembershipKnnProbabilities privacy risk score.
|
"""
|
||||||
|
DatasetAttackMembershipKnnProbabilities privacy risk score.
|
||||||
|
|
||||||
|
:param dataset_name: dataset name to be used in reports
|
||||||
|
:param roc_auc_score: the area under the receiver operating characteristic curve (AUC ROC) to evaluate the
|
||||||
|
attack performance.
|
||||||
|
:param average_precision_score: the proportion of predicted members that are correctly members.
|
||||||
|
:param result: the result of the membership inference attack.
|
||||||
"""
|
"""
|
||||||
roc_auc_score: float
|
roc_auc_score: float
|
||||||
average_precision_score: float
|
average_precision_score: float
|
||||||
|
|
@ -50,13 +57,6 @@ class DatasetAttackScoreMembershipKnnProbabilities(DatasetAttackScore):
|
||||||
|
|
||||||
def __init__(self, dataset_name: str, roc_auc_score: float, average_precision_score: float,
|
def __init__(self, dataset_name: str, roc_auc_score: float, average_precision_score: float,
|
||||||
result: DatasetAttackResultMembership) -> None:
|
result: DatasetAttackResultMembership) -> None:
|
||||||
"""
|
|
||||||
dataset_name: dataset name to be used in reports
|
|
||||||
roc_auc_score: the area under the receiver operating characteristic curve (AUC ROC) to evaluate the attack
|
|
||||||
performance.
|
|
||||||
average_precision_score: the proportion of predicted members that are correctly members
|
|
||||||
result: the result of the membership inference attack
|
|
||||||
"""
|
|
||||||
super().__init__(dataset_name=dataset_name, risk_score=roc_auc_score, result=result)
|
super().__init__(dataset_name=dataset_name, risk_score=roc_auc_score, result=result)
|
||||||
self.roc_auc_score = roc_auc_score
|
self.roc_auc_score = roc_auc_score
|
||||||
self.average_precision_score = average_precision_score
|
self.average_precision_score = average_precision_score
|
||||||
|
|
@ -64,24 +64,23 @@ class DatasetAttackScoreMembershipKnnProbabilities(DatasetAttackScore):
|
||||||
|
|
||||||
class DatasetAttackMembershipKnnProbabilities(DatasetAttackMembership):
|
class DatasetAttackMembershipKnnProbabilities(DatasetAttackMembership):
|
||||||
"""
|
"""
|
||||||
Privacy risk assessment for synthetic datasets based on Black-Box MIA attack using distances of
|
Privacy risk assessment for synthetic datasets based on Black-Box MIA attack using distances of
|
||||||
members (training set) and non-members (holdout set) from their nearest neighbors in the synthetic dataset.
|
members (training set) and non-members (holdout set) from their nearest neighbors in the synthetic dataset.
|
||||||
By default, the Euclidean distance is used (L2 norm), but another ``compute_distance()`` method can be provided
|
By default, the Euclidean distance is used (L2 norm), but another ``compute_distance()`` method can be provided
|
||||||
in configuration instead.
|
in configuration instead.
|
||||||
The area under the receiver operating characteristic curve (AUC ROC) gives the privacy risk measure.
|
The area under the receiver operating characteristic curve (AUC ROC) gives the privacy risk measure.
|
||||||
|
|
||||||
|
:param original_data_members: A container for the training original samples and labels
|
||||||
|
:param original_data_non_members: A container for the holdout original samples and labels
|
||||||
|
:param synthetic_data: A container for the synthetic samples and labels
|
||||||
|
:param config: Configuration parameters to guide the attack, optional
|
||||||
|
:param dataset_name: A name to identify this dataset, optional
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, original_data_members: ArrayDataset, original_data_non_members: ArrayDataset,
|
def __init__(self, original_data_members: ArrayDataset, original_data_non_members: ArrayDataset,
|
||||||
synthetic_data: ArrayDataset,
|
synthetic_data: ArrayDataset,
|
||||||
config: DatasetAttackConfigMembershipKnnProbabilities = DatasetAttackConfigMembershipKnnProbabilities(),
|
config: DatasetAttackConfigMembershipKnnProbabilities = DatasetAttackConfigMembershipKnnProbabilities(),
|
||||||
dataset_name: str = DEFAULT_DATASET_NAME):
|
dataset_name: str = DEFAULT_DATASET_NAME):
|
||||||
"""
|
|
||||||
:param original_data_members: A container for the training original samples and labels
|
|
||||||
:param original_data_non_members: A container for the holdout original samples and labels
|
|
||||||
:param synthetic_data: A container for the synthetic samples and labels
|
|
||||||
:param config: Configuration parameters to guide the attack, optional
|
|
||||||
:param dataset_name: A name to identify this dataset, optional
|
|
||||||
"""
|
|
||||||
attack_strategy_utils = KNNAttackStrategyUtils(config.use_batches, config.batch_size)
|
attack_strategy_utils = KNNAttackStrategyUtils(config.use_batches, config.batch_size)
|
||||||
super().__init__(original_data_members, original_data_non_members, synthetic_data, config, dataset_name,
|
super().__init__(original_data_members, original_data_non_members, synthetic_data, config, dataset_name,
|
||||||
attack_strategy_utils)
|
attack_strategy_utils)
|
||||||
|
|
@ -103,10 +102,9 @@ class DatasetAttackMembershipKnnProbabilities(DatasetAttackMembership):
|
||||||
by the Parzen window density estimation in ``probability_per_sample()``, computed from the NN distances from the
|
by the Parzen window density estimation in ``probability_per_sample()``, computed from the NN distances from the
|
||||||
query samples to the synthetic data samples.
|
query samples to the synthetic data samples.
|
||||||
|
|
||||||
:return:
|
:return: Privacy score of the attack together with the attack result with the probabilities of member and
|
||||||
Privacy score of the attack together with the attack result with the probabilities of member and
|
non-member samples to be generated by the synthetic data generator based on the NN distances from the
|
||||||
non-member samples to be generated by the synthetic data generator based on the NN distances from the
|
query samples to the synthetic data samples
|
||||||
query samples to the synthetic data samples
|
|
||||||
"""
|
"""
|
||||||
# nearest neighbor search
|
# nearest neighbor search
|
||||||
self.attack_strategy_utils.fit(self.knn_learner, self.synthetic_data)
|
self.attack_strategy_utils.fit(self.knn_learner, self.synthetic_data)
|
||||||
|
|
@ -130,11 +128,11 @@ class DatasetAttackMembershipKnnProbabilities(DatasetAttackMembership):
|
||||||
"""
|
"""
|
||||||
Evaluate privacy score from the probabilities of member and non-member samples to be generated by the synthetic
|
Evaluate privacy score from the probabilities of member and non-member samples to be generated by the synthetic
|
||||||
data generator. The probabilities are computed by the ``assess_privacy()`` method.
|
data generator. The probabilities are computed by the ``assess_privacy()`` method.
|
||||||
:param dataset_attack_result attack result containing probabilities of member and non-member samples to be
|
|
||||||
generated by the synthetic data generator
|
:param dataset_attack_result: attack result containing probabilities of member and non-member samples to be
|
||||||
:param generate_plot generate AUC ROC curve plot and persist it
|
generated by the synthetic data generator.
|
||||||
:return:
|
:param generate_plot: generate AUC ROC curve plot and persist it.
|
||||||
score of the attack, based on distance-based probabilities - mainly the ROC AUC score
|
:return: score of the attack, based on distance-based probabilities - mainly the ROC AUC score.
|
||||||
"""
|
"""
|
||||||
member_proba, non_member_proba = \
|
member_proba, non_member_proba = \
|
||||||
dataset_attack_result.member_probabilities, dataset_attack_result.non_member_probabilities
|
dataset_attack_result.member_probabilities, dataset_attack_result.non_member_probabilities
|
||||||
|
|
@ -151,10 +149,10 @@ class DatasetAttackMembershipKnnProbabilities(DatasetAttackMembership):
|
||||||
"""
|
"""
|
||||||
For every sample represented by its distance from the query sample to its KNN in synthetic data,
|
For every sample represented by its distance from the query sample to its KNN in synthetic data,
|
||||||
computes the probability of the synthetic data to be part of the query dataset.
|
computes the probability of the synthetic data to be part of the query dataset.
|
||||||
|
|
||||||
:param distances: distance between every query sample in batch to its KNNs among synthetic samples, a numpy
|
:param distances: distance between every query sample in batch to its KNNs among synthetic samples, a numpy
|
||||||
array of size (n, k) with n being the number of samples, k - the number of KNNs
|
array of size (n, k) with n being the number of samples, k - the number of KNNs.
|
||||||
:return:
|
:return: probability estimates of the query samples being generated and so - of being part of the synthetic set,
|
||||||
probability estimates of the query samples being generated and so - of being part of the synthetic set, a
|
a numpy array of size (n,)
|
||||||
numpy array of size (n,)
|
|
||||||
"""
|
"""
|
||||||
return np.average(np.exp(-distances), axis=1)
|
return np.average(np.exp(-distances), axis=1)
|
||||||
|
|
|
||||||
|
|
@ -8,11 +8,21 @@ DEFAULT_DATASET_NAME = "dataset"
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class DatasetAttackResult:
|
class DatasetAttackResult:
|
||||||
|
"""
|
||||||
|
Basic class for storing privacy risk assessment results.
|
||||||
|
"""
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class DatasetAttackScore:
|
class DatasetAttackScore:
|
||||||
|
"""
|
||||||
|
Basic class for storing privacy risk assessment scores.
|
||||||
|
|
||||||
|
:param dataset_name: The name of the dataset that was assessed.
|
||||||
|
:param risk_score: The privacy risk score.
|
||||||
|
:param result: An optional list of more detailed results.
|
||||||
|
"""
|
||||||
dataset_name: str
|
dataset_name: str
|
||||||
risk_score: float
|
risk_score: float
|
||||||
result: Optional[DatasetAttackResult]
|
result: Optional[DatasetAttackResult]
|
||||||
|
|
@ -20,5 +30,11 @@ class DatasetAttackScore:
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class DatasetAttackResultMembership(DatasetAttackResult):
|
class DatasetAttackResultMembership(DatasetAttackResult):
|
||||||
|
"""
|
||||||
|
Class for storing membership attack results.
|
||||||
|
|
||||||
|
:param member_probabilities: The attack probabilities for member samples.
|
||||||
|
:param non_member_probabilities: The attack probabilities for non-member samples.
|
||||||
|
"""
|
||||||
member_probabilities: np.ndarray
|
member_probabilities: np.ndarray
|
||||||
non_member_probabilities: np.ndarray
|
non_member_probabilities: np.ndarray
|
||||||
|
|
|
||||||
|
|
@ -20,16 +20,16 @@ K = 1 # Number of nearest neighbors to search. For DCR we need only the nearest
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class DatasetAttackConfigWholeDatasetKnnDistance(Config):
|
class DatasetAttackConfigWholeDatasetKnnDistance(Config):
|
||||||
"""Configuration for DatasetAttackWholeDatasetKnnDistance.
|
"""
|
||||||
|
Configuration for DatasetAttackWholeDatasetKnnDistance.
|
||||||
|
|
||||||
Attributes:
|
:param use_batches: Divide query samples into batches or not.
|
||||||
use_batches: Divide query samples into batches or not.
|
:param batch_size: Query sample batch size.
|
||||||
batch_size: Query sample batch size.
|
:param compute_distance: A callable function, which takes two arrays representing 1D vectors as inputs and must
|
||||||
compute_distance: A callable function, which takes two arrays representing 1D vectors as inputs and must return
|
return one value indicating the distance between those vectors.
|
||||||
one value indicating the distance between those vectors.
|
See 'metric' parameter in sklearn.neighbors.NearestNeighbors documentation.
|
||||||
See 'metric' parameter in sklearn.neighbors.NearestNeighbors documentation.
|
:param distance_params: Additional keyword arguments for the distance computation function, see 'metric_params' in
|
||||||
distance_params: Additional keyword arguments for the distance computation function, see 'metric_params' in
|
sklearn.neighbors.NearestNeighbors documentation.
|
||||||
sklearn.neighbors.NearestNeighbors documentation.
|
|
||||||
"""
|
"""
|
||||||
use_batches: bool = False
|
use_batches: bool = False
|
||||||
batch_size: int = 10
|
batch_size: int = 10
|
||||||
|
|
@ -39,41 +39,40 @@ class DatasetAttackConfigWholeDatasetKnnDistance(Config):
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class DatasetAttackScoreWholeDatasetKnnDistance(DatasetAttackScore):
|
class DatasetAttackScoreWholeDatasetKnnDistance(DatasetAttackScore):
|
||||||
"""DatasetAttackWholeDatasetKnnDistance privacy risk score.
|
"""
|
||||||
|
DatasetAttackWholeDatasetKnnDistance privacy risk score.
|
||||||
|
|
||||||
|
:param dataset_name: Dataset name to be used in reports.
|
||||||
|
:param share: The share of synthetic records closer to the training than the holdout dataset.
|
||||||
|
A value of 0.5 or close to it means good privacy.
|
||||||
"""
|
"""
|
||||||
share: float
|
share: float
|
||||||
assessment_type: str = 'WholeDatasetKnnDistance' # to be used in reports
|
assessment_type: str = 'WholeDatasetKnnDistance' # to be used in reports
|
||||||
|
|
||||||
def __init__(self, dataset_name: str, share: float) -> None:
|
def __init__(self, dataset_name: str, share: float) -> None:
|
||||||
"""
|
|
||||||
dataset_name: dataset name to be used in reports
|
|
||||||
share : the share of synthetic records closer to the training than the holdout dataset.
|
|
||||||
A value of 0.5 or close to it means good privacy.
|
|
||||||
"""
|
|
||||||
super().__init__(dataset_name=dataset_name, risk_score=share, result=None)
|
super().__init__(dataset_name=dataset_name, risk_score=share, result=None)
|
||||||
self.share = share
|
self.share = share
|
||||||
|
|
||||||
|
|
||||||
class DatasetAttackWholeDatasetKnnDistance(DatasetAttack):
|
class DatasetAttackWholeDatasetKnnDistance(DatasetAttack):
|
||||||
"""
|
"""
|
||||||
Privacy risk assessment for synthetic datasets based on distances of synthetic data records from
|
Privacy risk assessment for synthetic datasets based on distances of synthetic data records from
|
||||||
members (training set) and non-members (holdout set). The privacy risk measure is the share of synthetic
|
members (training set) and non-members (holdout set). The privacy risk measure is the share of synthetic
|
||||||
records closer to the training than the holdout dataset.
|
records closer to the training than the holdout dataset.
|
||||||
By default, the Euclidean distance is used (L2 norm), but another compute_distance() method can be provided in
|
By default, the Euclidean distance is used (L2 norm), but another compute_distance() method can be provided in
|
||||||
configuration instead.
|
configuration instead.
|
||||||
|
|
||||||
|
:param original_data_members: A container for the training original samples and labels.
|
||||||
|
:param original_data_non_members: A container for the holdout original samples and labels.
|
||||||
|
:param synthetic_data: A container for the synthetic samples and labels.
|
||||||
|
:param config: Configuration parameters to guide the assessment process, optional.
|
||||||
|
:param dataset_name: A name to identify this dataset, optional.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, original_data_members: ArrayDataset, original_data_non_members: ArrayDataset,
|
def __init__(self, original_data_members: ArrayDataset, original_data_non_members: ArrayDataset,
|
||||||
synthetic_data: ArrayDataset,
|
synthetic_data: ArrayDataset,
|
||||||
config: DatasetAttackConfigWholeDatasetKnnDistance = DatasetAttackConfigWholeDatasetKnnDistance(),
|
config: DatasetAttackConfigWholeDatasetKnnDistance = DatasetAttackConfigWholeDatasetKnnDistance(),
|
||||||
dataset_name: str = DEFAULT_DATASET_NAME):
|
dataset_name: str = DEFAULT_DATASET_NAME):
|
||||||
"""
|
|
||||||
:param original_data_members: A container for the training original samples and labels
|
|
||||||
:param original_data_non_members: A container for the holdout original samples and labels
|
|
||||||
:param synthetic_data: A container for the synthetic samples and labels
|
|
||||||
:param config: Configuration parameters to guide the assessment process, optional
|
|
||||||
:param dataset_name: A name to identify this dataset, optional
|
|
||||||
"""
|
|
||||||
attack_strategy_utils = KNNAttackStrategyUtils(config.use_batches, config.batch_size)
|
attack_strategy_utils = KNNAttackStrategyUtils(config.use_batches, config.batch_size)
|
||||||
super().__init__(original_data_members, original_data_non_members, synthetic_data, config, dataset_name,
|
super().__init__(original_data_members, original_data_non_members, synthetic_data, config, dataset_name,
|
||||||
attack_strategy_utils)
|
attack_strategy_utils)
|
||||||
|
|
@ -90,6 +89,7 @@ class DatasetAttackWholeDatasetKnnDistance(DatasetAttack):
|
||||||
"""
|
"""
|
||||||
Calculate the share of synthetic records closer to the training than the holdout dataset, based on the
|
Calculate the share of synthetic records closer to the training than the holdout dataset, based on the
|
||||||
DCR computed by 'calculate_distances()'.
|
DCR computed by 'calculate_distances()'.
|
||||||
|
|
||||||
:return:
|
:return:
|
||||||
score of the attack, based on the NN distances from the query samples to the synthetic data samples
|
score of the attack, based on the NN distances from the query samples to the synthetic data samples
|
||||||
"""
|
"""
|
||||||
|
|
|
||||||
|
|
@ -2,9 +2,6 @@ from typing import Optional
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
import tensorflow as tf
|
|
||||||
from tensorflow import keras
|
|
||||||
|
|
||||||
from sklearn.metrics import mean_squared_error
|
from sklearn.metrics import mean_squared_error
|
||||||
|
|
||||||
from apt.utils.models import Model, ModelOutputType, ScoringMethod, check_correct_model_output
|
from apt.utils.models import Model, ModelOutputType, ScoringMethod, check_correct_model_output
|
||||||
|
|
@ -14,8 +11,6 @@ from art.utils import check_and_transform_label_format
|
||||||
from art.estimators.classification.keras import KerasClassifier as ArtKerasClassifier
|
from art.estimators.classification.keras import KerasClassifier as ArtKerasClassifier
|
||||||
from art.estimators.regression.keras import KerasRegressor as ArtKerasRegressor
|
from art.estimators.regression.keras import KerasRegressor as ArtKerasRegressor
|
||||||
|
|
||||||
tf.compat.v1.disable_eager_execution()
|
|
||||||
|
|
||||||
|
|
||||||
class KerasModel(Model):
|
class KerasModel(Model):
|
||||||
"""
|
"""
|
||||||
|
|
@ -41,7 +36,7 @@ class KerasClassifier(KerasModel):
|
||||||
queries that can be submitted. Default is True.
|
queries that can be submitted. Default is True.
|
||||||
:type unlimited_queries: boolean, optional
|
:type unlimited_queries: boolean, optional
|
||||||
"""
|
"""
|
||||||
def __init__(self, model: keras.models.Model, output_type: ModelOutputType, black_box_access: Optional[bool] = True,
|
def __init__(self, model: "keras.models.Model", output_type: ModelOutputType, black_box_access: Optional[bool] = True,
|
||||||
unlimited_queries: Optional[bool] = True, **kwargs):
|
unlimited_queries: Optional[bool] = True, **kwargs):
|
||||||
super().__init__(model, output_type, black_box_access, unlimited_queries, **kwargs)
|
super().__init__(model, output_type, black_box_access, unlimited_queries, **kwargs)
|
||||||
logits = False
|
logits = False
|
||||||
|
|
@ -107,7 +102,7 @@ class KerasRegressor(KerasModel):
|
||||||
queries that can be submitted. Default is True.
|
queries that can be submitted. Default is True.
|
||||||
:type unlimited_queries: boolean, optional
|
:type unlimited_queries: boolean, optional
|
||||||
"""
|
"""
|
||||||
def __init__(self, model: keras.models.Model, black_box_access: Optional[bool] = True,
|
def __init__(self, model: "keras.models.Model", black_box_access: Optional[bool] = True,
|
||||||
unlimited_queries: Optional[bool] = True, **kwargs):
|
unlimited_queries: Optional[bool] = True, **kwargs):
|
||||||
super().__init__(model, ModelOutputType.REGRESSOR_SCALAR, black_box_access, unlimited_queries, **kwargs)
|
super().__init__(model, ModelOutputType.REGRESSOR_SCALAR, black_box_access, unlimited_queries, **kwargs)
|
||||||
self._art_model = ArtKerasRegressor(model)
|
self._art_model = ArtKerasRegressor(model)
|
||||||
|
|
|
||||||
|
|
@ -31,7 +31,9 @@ class PyTorchClassifierWrapper(ArtPyTorchClassifier):
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def get_step_correct(self, outputs, targets) -> int:
|
def get_step_correct(self, outputs, targets) -> int:
|
||||||
"""get number of correctly classified labels"""
|
"""
|
||||||
|
Get number of correctly classified labels.
|
||||||
|
"""
|
||||||
if len(outputs) != len(targets):
|
if len(outputs) != len(targets):
|
||||||
raise ValueError("outputs and targets should be the same length.")
|
raise ValueError("outputs and targets should be the same length.")
|
||||||
if self.nb_classes > 1:
|
if self.nb_classes > 1:
|
||||||
|
|
@ -40,7 +42,9 @@ class PyTorchClassifierWrapper(ArtPyTorchClassifier):
|
||||||
return int(torch.sum(torch.round(outputs, axis=-1) == targets).item())
|
return int(torch.sum(torch.round(outputs, axis=-1) == targets).item())
|
||||||
|
|
||||||
def _eval(self, loader: DataLoader):
|
def _eval(self, loader: DataLoader):
|
||||||
"""inner function for model evaluation"""
|
"""
|
||||||
|
Inner function for model evaluation.
|
||||||
|
"""
|
||||||
self.model.eval()
|
self.model.eval()
|
||||||
|
|
||||||
total_loss = 0
|
total_loss = 0
|
||||||
|
|
@ -74,19 +78,20 @@ class PyTorchClassifierWrapper(ArtPyTorchClassifier):
|
||||||
) -> None:
|
) -> None:
|
||||||
"""
|
"""
|
||||||
Fit the classifier on the training set `(x, y)`.
|
Fit the classifier on the training set `(x, y)`.
|
||||||
|
|
||||||
:param x: Training data.
|
:param x: Training data.
|
||||||
:param y: Target values (class labels) one-hot-encoded of shape (nb_samples, nb_classes) or index labels
|
:param y: Target values (class labels) one-hot-encoded of shape (nb_samples, nb_classes) or index labels
|
||||||
of shape (nb_samples,).
|
of shape (nb_samples,).
|
||||||
:param x_validation: Validation data (optional).
|
:param x_validation: Validation data (optional).
|
||||||
:param y_validation: Target validation values (class labels) one-hot-encoded of shape
|
:param y_validation: Target validation values (class labels) one-hot-encoded of shape
|
||||||
(nb_samples, nb_classes) or index labels of shape (nb_samples,) (optional).
|
(nb_samples, nb_classes) or index labels of shape (nb_samples,) (optional).
|
||||||
:param batch_size: Size of batches.
|
:param batch_size: Size of batches.
|
||||||
:param nb_epochs: Number of epochs to use for training.
|
:param nb_epochs: Number of epochs to use for training.
|
||||||
:param save_checkpoints: Boolean, save checkpoints if True.
|
:param save_checkpoints: Boolean, save checkpoints if True.
|
||||||
:param save_entire_model: Boolean, save entire model if True, else save state dict.
|
:param save_entire_model: Boolean, save entire model if True, else save state dict.
|
||||||
:param path: path for saving checkpoint.
|
:param path: path for saving checkpoint.
|
||||||
:param kwargs: Dictionary of framework-specific arguments. This parameter is not currently
|
:param kwargs: Dictionary of framework-specific arguments. This parameter is not currently
|
||||||
supported for PyTorch and providing it takes no effect.
|
supported for PyTorch and providing it takes no effect.
|
||||||
"""
|
"""
|
||||||
# Put the model in the training mode
|
# Put the model in the training mode
|
||||||
self._model.train()
|
self._model.train()
|
||||||
|
|
@ -153,7 +158,8 @@ class PyTorchClassifierWrapper(ArtPyTorchClassifier):
|
||||||
|
|
||||||
def save_checkpoint_state_dict(self, is_best: bool, path=os.getcwd(), filename="latest.tar") -> None:
|
def save_checkpoint_state_dict(self, is_best: bool, path=os.getcwd(), filename="latest.tar") -> None:
|
||||||
"""
|
"""
|
||||||
Saves checkpoint as latest.tar or best.tar
|
Saves checkpoint as latest.tar or best.tar.
|
||||||
|
|
||||||
:param is_best: whether the model is the best achieved model
|
:param is_best: whether the model is the best achieved model
|
||||||
:param path: path for saving checkpoint
|
:param path: path for saving checkpoint
|
||||||
:param filename: checkpoint name
|
:param filename: checkpoint name
|
||||||
|
|
@ -176,7 +182,8 @@ class PyTorchClassifierWrapper(ArtPyTorchClassifier):
|
||||||
|
|
||||||
def save_checkpoint_model(self, is_best: bool, path=os.getcwd(), filename="latest.tar") -> None:
|
def save_checkpoint_model(self, is_best: bool, path=os.getcwd(), filename="latest.tar") -> None:
|
||||||
"""
|
"""
|
||||||
Saves checkpoint as latest.tar or best.tar
|
Saves checkpoint as latest.tar or best.tar.
|
||||||
|
|
||||||
:param is_best: whether the model is the best achieved model
|
:param is_best: whether the model is the best achieved model
|
||||||
:param path: path for saving checkpoint
|
:param path: path for saving checkpoint
|
||||||
:param filename: checkpoint name
|
:param filename: checkpoint name
|
||||||
|
|
@ -194,7 +201,8 @@ class PyTorchClassifierWrapper(ArtPyTorchClassifier):
|
||||||
|
|
||||||
def load_checkpoint_state_dict_by_path(self, model_name: str, path: str = None):
|
def load_checkpoint_state_dict_by_path(self, model_name: str, path: str = None):
|
||||||
"""
|
"""
|
||||||
Load model only based on the check point path
|
Load model only based on the check point path.
|
||||||
|
|
||||||
:param model_name: check point filename
|
:param model_name: check point filename
|
||||||
:param path: checkpoint path (default current work dir)
|
:param path: checkpoint path (default current work dir)
|
||||||
:return: loaded model
|
:return: loaded model
|
||||||
|
|
@ -219,21 +227,24 @@ class PyTorchClassifierWrapper(ArtPyTorchClassifier):
|
||||||
|
|
||||||
def load_latest_state_dict_checkpoint(self):
|
def load_latest_state_dict_checkpoint(self):
|
||||||
"""
|
"""
|
||||||
Load model state dict only based on the check point path (latest.tar)
|
Load model state dict only based on the check point path (latest.tar).
|
||||||
|
|
||||||
:return: loaded model
|
:return: loaded model
|
||||||
"""
|
"""
|
||||||
self.load_checkpoint_state_dict_by_path("latest.tar")
|
self.load_checkpoint_state_dict_by_path("latest.tar")
|
||||||
|
|
||||||
def load_best_state_dict_checkpoint(self):
|
def load_best_state_dict_checkpoint(self):
|
||||||
"""
|
"""
|
||||||
Load model state dict only based on the check point path (model_best.tar)
|
Load model state dict only based on the check point path (model_best.tar).
|
||||||
|
|
||||||
:return: loaded model
|
:return: loaded model
|
||||||
"""
|
"""
|
||||||
self.load_checkpoint_state_dict_by_path("model_best.tar")
|
self.load_checkpoint_state_dict_by_path("model_best.tar")
|
||||||
|
|
||||||
def load_checkpoint_model_by_path(self, model_name: str, path: str = None):
|
def load_checkpoint_model_by_path(self, model_name: str, path: str = None):
|
||||||
"""
|
"""
|
||||||
Load model only based on the check point path
|
Load model only based on the check point path.
|
||||||
|
|
||||||
:param model_name: check point filename
|
:param model_name: check point filename
|
||||||
:param path: checkpoint path (default current work dir)
|
:param path: checkpoint path (default current work dir)
|
||||||
:return: loaded model
|
:return: loaded model
|
||||||
|
|
@ -254,14 +265,16 @@ class PyTorchClassifierWrapper(ArtPyTorchClassifier):
|
||||||
|
|
||||||
def load_latest_model_checkpoint(self):
|
def load_latest_model_checkpoint(self):
|
||||||
"""
|
"""
|
||||||
Load entire model only based on the check point path (latest.tar)
|
Load entire model only based on the check point path (latest.tar).
|
||||||
|
|
||||||
:return: loaded model
|
:return: loaded model
|
||||||
"""
|
"""
|
||||||
self.load_checkpoint_model_by_path("latest.tar")
|
self.load_checkpoint_model_by_path("latest.tar")
|
||||||
|
|
||||||
def load_best_model_checkpoint(self):
|
def load_best_model_checkpoint(self):
|
||||||
"""
|
"""
|
||||||
Load entire model only based on the check point path (model_best.tar)
|
Load entire model only based on the check point path (model_best.tar).
|
||||||
|
|
||||||
:return: loaded model
|
:return: loaded model
|
||||||
"""
|
"""
|
||||||
self.load_checkpoint_model_by_path("model_best.tar")
|
self.load_checkpoint_model_by_path("model_best.tar")
|
||||||
|
|
@ -288,11 +301,11 @@ class PyTorchClassifier(PyTorchModel):
|
||||||
Initialization specifically for the PyTorch-based implementation.
|
Initialization specifically for the PyTorch-based implementation.
|
||||||
|
|
||||||
:param model: PyTorch model. The output of the model can be logits, probabilities or anything else. Logits
|
:param model: PyTorch model. The output of the model can be logits, probabilities or anything else. Logits
|
||||||
output should be preferred where possible to ensure attack efficiency.
|
output should be preferred where possible to ensure attack efficiency.
|
||||||
:param output_type: The type of output the model yields (vector/label only for classifiers,
|
:param output_type: The type of output the model yields (vector/label only for classifiers,
|
||||||
value for regressors)
|
value for regressors)
|
||||||
:param loss: The loss function for which to compute gradients for training. The target label must be raw
|
:param loss: The loss function for which to compute gradients for training. The target label must be raw
|
||||||
categorical, i.e. not converted to one-hot encoding.
|
categorical, i.e. not converted to one-hot encoding.
|
||||||
:param input_shape: The shape of one input instance.
|
:param input_shape: The shape of one input instance.
|
||||||
:param optimizer: The optimizer used to train the classifier.
|
:param optimizer: The optimizer used to train the classifier.
|
||||||
:param black_box_access: Boolean describing the type of deployment of the model (when in production).
|
:param black_box_access: Boolean describing the type of deployment of the model (when in production).
|
||||||
|
|
@ -311,7 +324,7 @@ class PyTorchClassifier(PyTorchModel):
|
||||||
@property
|
@property
|
||||||
def loss(self):
|
def loss(self):
|
||||||
"""
|
"""
|
||||||
The pytorch model's loss function
|
The pytorch model's loss function.
|
||||||
|
|
||||||
:return: The pytorch model's loss function
|
:return: The pytorch model's loss function
|
||||||
"""
|
"""
|
||||||
|
|
@ -320,7 +333,7 @@ class PyTorchClassifier(PyTorchModel):
|
||||||
@property
|
@property
|
||||||
def optimizer(self):
|
def optimizer(self):
|
||||||
"""
|
"""
|
||||||
The pytorch model's optimizer
|
The pytorch model's optimizer.
|
||||||
|
|
||||||
:return: The pytorch model's optimizer
|
:return: The pytorch model's optimizer
|
||||||
"""
|
"""
|
||||||
|
|
@ -350,7 +363,7 @@ class PyTorchClassifier(PyTorchModel):
|
||||||
:param save_entire_model: Boolean, save entire model if True, else save state dict.
|
:param save_entire_model: Boolean, save entire model if True, else save state dict.
|
||||||
:param path: path for saving checkpoint.
|
:param path: path for saving checkpoint.
|
||||||
:param kwargs: Dictionary of framework-specific arguments. This parameter is not currently
|
:param kwargs: Dictionary of framework-specific arguments. This parameter is not currently
|
||||||
supported for PyTorch and providing it takes no effect.
|
supported for PyTorch and providing it takes no effect.
|
||||||
"""
|
"""
|
||||||
if validation_data is None:
|
if validation_data is None:
|
||||||
self._art_model.fit(
|
self._art_model.fit(
|
||||||
|
|
@ -390,6 +403,7 @@ class PyTorchClassifier(PyTorchModel):
|
||||||
def score(self, test_data: PytorchData, **kwargs):
|
def score(self, test_data: PytorchData, **kwargs):
|
||||||
"""
|
"""
|
||||||
Score the model using test data.
|
Score the model using test data.
|
||||||
|
|
||||||
:param test_data: Test data.
|
:param test_data: Test data.
|
||||||
:type test_data: `PytorchData`
|
:type test_data: `PytorchData`
|
||||||
:return: the score as float (between 0 and 1)
|
:return: the score as float (between 0 and 1)
|
||||||
|
|
@ -400,7 +414,8 @@ class PyTorchClassifier(PyTorchModel):
|
||||||
|
|
||||||
def load_checkpoint_state_dict_by_path(self, model_name: str, path: str = None):
|
def load_checkpoint_state_dict_by_path(self, model_name: str, path: str = None):
|
||||||
"""
|
"""
|
||||||
Load model only based on the check point path
|
Load model only based on the check point path.
|
||||||
|
|
||||||
:param model_name: check point filename
|
:param model_name: check point filename
|
||||||
:param path: checkpoint path (default current work dir)
|
:param path: checkpoint path (default current work dir)
|
||||||
:return: loaded model
|
:return: loaded model
|
||||||
|
|
@ -409,21 +424,24 @@ class PyTorchClassifier(PyTorchModel):
|
||||||
|
|
||||||
def load_latest_state_dict_checkpoint(self):
|
def load_latest_state_dict_checkpoint(self):
|
||||||
"""
|
"""
|
||||||
Load model state dict only based on the check point path (latest.tar)
|
Load model state dict only based on the check point path (latest.tar).
|
||||||
|
|
||||||
:return: loaded model
|
:return: loaded model
|
||||||
"""
|
"""
|
||||||
self._art_model.load_latest_state_dict_checkpoint()
|
self._art_model.load_latest_state_dict_checkpoint()
|
||||||
|
|
||||||
def load_best_state_dict_checkpoint(self):
|
def load_best_state_dict_checkpoint(self):
|
||||||
"""
|
"""
|
||||||
Load model state dict only based on the check point path (model_best.tar)
|
Load model state dict only based on the check point path (model_best.tar).
|
||||||
|
|
||||||
:return: loaded model
|
:return: loaded model
|
||||||
"""
|
"""
|
||||||
self._art_model.load_best_state_dict_checkpoint()
|
self._art_model.load_best_state_dict_checkpoint()
|
||||||
|
|
||||||
def load_checkpoint_model_by_path(self, model_name: str, path: str = None):
|
def load_checkpoint_model_by_path(self, model_name: str, path: str = None):
|
||||||
"""
|
"""
|
||||||
Load model only based on the check point path
|
Load model only based on the check point path.
|
||||||
|
|
||||||
:param model_name: check point filename
|
:param model_name: check point filename
|
||||||
:param path: checkpoint path (default current work dir)
|
:param path: checkpoint path (default current work dir)
|
||||||
:return: loaded model
|
:return: loaded model
|
||||||
|
|
@ -432,14 +450,16 @@ class PyTorchClassifier(PyTorchModel):
|
||||||
|
|
||||||
def load_latest_model_checkpoint(self):
|
def load_latest_model_checkpoint(self):
|
||||||
"""
|
"""
|
||||||
Load entire model only based on the check point path (latest.tar)
|
Load entire model only based on the check point path (latest.tar).
|
||||||
|
|
||||||
:return: loaded model
|
:return: loaded model
|
||||||
"""
|
"""
|
||||||
self._art_model.load_latest_model_checkpoint()
|
self._art_model.load_latest_model_checkpoint()
|
||||||
|
|
||||||
def load_best_model_checkpoint(self):
|
def load_best_model_checkpoint(self):
|
||||||
"""
|
"""
|
||||||
Load entire model only based on the check point path (model_best.tar)
|
Load entire model only based on the check point path (model_best.tar).
|
||||||
|
|
||||||
:return: loaded model
|
:return: loaded model
|
||||||
"""
|
"""
|
||||||
self._art_model.load_best_model_checkpoint()
|
self._art_model.load_best_model_checkpoint()
|
||||||
|
|
|
||||||
|
|
@ -3,7 +3,6 @@ from typing import Optional, Tuple
|
||||||
from apt.utils.models import Model, ModelOutputType, ScoringMethod, check_correct_model_output, is_one_hot
|
from apt.utils.models import Model, ModelOutputType, ScoringMethod, check_correct_model_output, is_one_hot
|
||||||
from apt.utils.datasets import Dataset, OUTPUT_DATA_ARRAY_TYPE
|
from apt.utils.datasets import Dataset, OUTPUT_DATA_ARRAY_TYPE
|
||||||
|
|
||||||
from xgboost import XGBClassifier
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
from art.estimators.classification.xgboost import XGBoostClassifier as ArtXGBoostClassifier
|
from art.estimators.classification.xgboost import XGBoostClassifier as ArtXGBoostClassifier
|
||||||
|
|
@ -37,7 +36,7 @@ class XGBoostClassifier(XGBoostModel):
|
||||||
queries that can be submitted. Default is True.
|
queries that can be submitted. Default is True.
|
||||||
:type unlimited_queries: boolean, optional
|
:type unlimited_queries: boolean, optional
|
||||||
"""
|
"""
|
||||||
def __init__(self, model: XGBClassifier, output_type: ModelOutputType, input_shape: Tuple[int, ...],
|
def __init__(self, model: "xgboost.XGBClassifier", output_type: ModelOutputType, input_shape: Tuple[int, ...],
|
||||||
nb_classes: int, black_box_access: Optional[bool] = True,
|
nb_classes: int, black_box_access: Optional[bool] = True,
|
||||||
unlimited_queries: Optional[bool] = True, **kwargs):
|
unlimited_queries: Optional[bool] = True, **kwargs):
|
||||||
super().__init__(model, output_type, black_box_access, unlimited_queries, **kwargs)
|
super().__init__(model, output_type, black_box_access, unlimited_queries, **kwargs)
|
||||||
|
|
|
||||||
|
|
@ -22,7 +22,7 @@ copyright = '2021, IBM'
|
||||||
author = 'Abigail Goldsteen'
|
author = 'Abigail Goldsteen'
|
||||||
|
|
||||||
# The full version, including alpha/beta/rc tags
|
# The full version, including alpha/beta/rc tags
|
||||||
release = '0.1.0'
|
release = '0.2.0'
|
||||||
|
|
||||||
master_doc = 'index'
|
master_doc = 'index'
|
||||||
|
|
||||||
|
|
@ -53,7 +53,7 @@ exclude_patterns = []
|
||||||
# The theme to use for HTML and HTML Help pages. See the documentation for
|
# The theme to use for HTML and HTML Help pages. See the documentation for
|
||||||
# a list of builtin themes.
|
# a list of builtin themes.
|
||||||
#
|
#
|
||||||
html_theme = 'pyramid'
|
html_theme = "sphinx_rtd_theme"
|
||||||
|
|
||||||
# Add any paths that contain custom static files (such as style sheets) here,
|
# Add any paths that contain custom static files (such as style sheets) here,
|
||||||
# relative to this directory. They are copied after the builtin static files,
|
# relative to this directory. They are copied after the builtin static files,
|
||||||
|
|
|
||||||
|
|
@ -18,6 +18,8 @@ minimization principle in GDPR for ML models. It enables to reduce the amount of
|
||||||
personal data needed to perform predictions with a machine learning model, while still enabling the model
|
personal data needed to perform predictions with a machine learning model, while still enabling the model
|
||||||
to make accurate predictions. This is done by by removing or generalizing some of the input features.
|
to make accurate predictions. This is done by by removing or generalizing some of the input features.
|
||||||
|
|
||||||
|
The dataset risk assessment module implements a tool for privacy assessment of synthetic datasets that are to be used in AI model training.
|
||||||
|
|
||||||
.. toctree::
|
.. toctree::
|
||||||
:maxdepth: 2
|
:maxdepth: 2
|
||||||
:caption: Getting Started:
|
:caption: Getting Started:
|
||||||
|
|
|
||||||
|
|
@ -12,7 +12,6 @@ apt.anonymization.anonymizer module
|
||||||
:undoc-members:
|
:undoc-members:
|
||||||
:show-inheritance:
|
:show-inheritance:
|
||||||
|
|
||||||
|
|
||||||
Module contents
|
Module contents
|
||||||
---------------
|
---------------
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -12,7 +12,6 @@ apt.minimization.minimizer module
|
||||||
:undoc-members:
|
:undoc-members:
|
||||||
:show-inheritance:
|
:show-inheritance:
|
||||||
|
|
||||||
|
|
||||||
Module contents
|
Module contents
|
||||||
---------------
|
---------------
|
||||||
|
|
||||||
|
|
|
||||||
61
docs/source/apt.risk.data_assessment.rst
Normal file
61
docs/source/apt.risk.data_assessment.rst
Normal file
|
|
@ -0,0 +1,61 @@
|
||||||
|
apt.risk.data\_assessment package
|
||||||
|
=================================
|
||||||
|
|
||||||
|
Submodules
|
||||||
|
----------
|
||||||
|
|
||||||
|
apt.risk.data\_assessment.attack\_strategy\_utils module
|
||||||
|
--------------------------------------------------------
|
||||||
|
|
||||||
|
.. automodule:: apt.risk.data_assessment.attack_strategy_utils
|
||||||
|
:members:
|
||||||
|
:undoc-members:
|
||||||
|
:show-inheritance:
|
||||||
|
|
||||||
|
apt.risk.data\_assessment.dataset\_assessment\_manager module
|
||||||
|
-------------------------------------------------------------
|
||||||
|
|
||||||
|
.. automodule:: apt.risk.data_assessment.dataset_assessment_manager
|
||||||
|
:members:
|
||||||
|
:undoc-members:
|
||||||
|
:show-inheritance:
|
||||||
|
|
||||||
|
apt.risk.data\_assessment.dataset\_attack module
|
||||||
|
------------------------------------------------
|
||||||
|
|
||||||
|
.. automodule:: apt.risk.data_assessment.dataset_attack
|
||||||
|
:members:
|
||||||
|
:undoc-members:
|
||||||
|
:show-inheritance:
|
||||||
|
|
||||||
|
apt.risk.data\_assessment.dataset\_attack\_membership\_knn\_probabilities module
|
||||||
|
--------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
.. automodule:: apt.risk.data_assessment.dataset_attack_membership_knn_probabilities
|
||||||
|
:members:
|
||||||
|
:undoc-members:
|
||||||
|
:show-inheritance:
|
||||||
|
|
||||||
|
apt.risk.data\_assessment.dataset\_attack\_result module
|
||||||
|
--------------------------------------------------------
|
||||||
|
|
||||||
|
.. automodule:: apt.risk.data_assessment.dataset_attack_result
|
||||||
|
:members:
|
||||||
|
:undoc-members:
|
||||||
|
:show-inheritance:
|
||||||
|
|
||||||
|
apt.risk.data\_assessment.dataset\_attack\_whole\_dataset\_knn\_distance module
|
||||||
|
-------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
.. automodule:: apt.risk.data_assessment.dataset_attack_whole_dataset_knn_distance
|
||||||
|
:members:
|
||||||
|
:undoc-members:
|
||||||
|
:show-inheritance:
|
||||||
|
|
||||||
|
Module contents
|
||||||
|
---------------
|
||||||
|
|
||||||
|
.. automodule:: apt.risk.data_assessment
|
||||||
|
:members:
|
||||||
|
:undoc-members:
|
||||||
|
:show-inheritance:
|
||||||
18
docs/source/apt.risk.rst
Normal file
18
docs/source/apt.risk.rst
Normal file
|
|
@ -0,0 +1,18 @@
|
||||||
|
apt.risk package
|
||||||
|
================
|
||||||
|
|
||||||
|
Subpackages
|
||||||
|
-----------
|
||||||
|
|
||||||
|
.. toctree::
|
||||||
|
:maxdepth: 4
|
||||||
|
|
||||||
|
apt.risk.data_assessment
|
||||||
|
|
||||||
|
Module contents
|
||||||
|
---------------
|
||||||
|
|
||||||
|
.. automodule:: apt.risk
|
||||||
|
:members:
|
||||||
|
:undoc-members:
|
||||||
|
:show-inheritance:
|
||||||
|
|
@ -9,6 +9,7 @@ Subpackages
|
||||||
|
|
||||||
apt.anonymization
|
apt.anonymization
|
||||||
apt.minimization
|
apt.minimization
|
||||||
|
apt.risk
|
||||||
apt.utils
|
apt.utils
|
||||||
|
|
||||||
Module contents
|
Module contents
|
||||||
|
|
|
||||||
|
|
@ -12,7 +12,6 @@ apt.utils.datasets.datasets module
|
||||||
:undoc-members:
|
:undoc-members:
|
||||||
:show-inheritance:
|
:show-inheritance:
|
||||||
|
|
||||||
|
|
||||||
Module contents
|
Module contents
|
||||||
---------------
|
---------------
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -4,6 +4,14 @@ apt.utils.models package
|
||||||
Submodules
|
Submodules
|
||||||
----------
|
----------
|
||||||
|
|
||||||
|
apt.utils.models.keras\_model module
|
||||||
|
------------------------------------
|
||||||
|
|
||||||
|
.. automodule:: apt.utils.models.keras_model
|
||||||
|
:members:
|
||||||
|
:undoc-members:
|
||||||
|
:show-inheritance:
|
||||||
|
|
||||||
apt.utils.models.model module
|
apt.utils.models.model module
|
||||||
-----------------------------
|
-----------------------------
|
||||||
|
|
||||||
|
|
@ -12,6 +20,14 @@ apt.utils.models.model module
|
||||||
:undoc-members:
|
:undoc-members:
|
||||||
:show-inheritance:
|
:show-inheritance:
|
||||||
|
|
||||||
|
apt.utils.models.pytorch\_model module
|
||||||
|
--------------------------------------
|
||||||
|
|
||||||
|
.. automodule:: apt.utils.models.pytorch_model
|
||||||
|
:members:
|
||||||
|
:undoc-members:
|
||||||
|
:show-inheritance:
|
||||||
|
|
||||||
apt.utils.models.sklearn\_model module
|
apt.utils.models.sklearn\_model module
|
||||||
--------------------------------------
|
--------------------------------------
|
||||||
|
|
||||||
|
|
@ -20,6 +36,13 @@ apt.utils.models.sklearn\_model module
|
||||||
:undoc-members:
|
:undoc-members:
|
||||||
:show-inheritance:
|
:show-inheritance:
|
||||||
|
|
||||||
|
apt.utils.models.xgboost\_model module
|
||||||
|
--------------------------------------
|
||||||
|
|
||||||
|
.. automodule:: apt.utils.models.xgboost_model
|
||||||
|
:members:
|
||||||
|
:undoc-members:
|
||||||
|
:show-inheritance:
|
||||||
|
|
||||||
Module contents
|
Module contents
|
||||||
---------------
|
---------------
|
||||||
|
|
|
||||||
|
|
@ -21,7 +21,6 @@ apt.utils.dataset\_utils module
|
||||||
:undoc-members:
|
:undoc-members:
|
||||||
:show-inheritance:
|
:show-inheritance:
|
||||||
|
|
||||||
|
|
||||||
Module contents
|
Module contents
|
||||||
---------------
|
---------------
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -18,3 +18,6 @@ sortedcontainers==2.4.0
|
||||||
notebook
|
notebook
|
||||||
jupyter
|
jupyter
|
||||||
ipywidgets
|
ipywidgets
|
||||||
|
|
||||||
|
#doc
|
||||||
|
sphinx_rtd_theme
|
||||||
|
|
|
||||||
|
|
@ -1,7 +1,7 @@
|
||||||
[metadata]
|
[metadata]
|
||||||
# replace with your username:
|
# replace with your username:
|
||||||
name = ai-privacy-toolkit
|
name = ai-privacy-toolkit
|
||||||
version = 0.1.0
|
version = 0.2.0
|
||||||
author = Abigail Goldsteen
|
author = Abigail Goldsteen
|
||||||
author_email = abigailt@il.ibm.com
|
author_email = abigailt@il.ibm.com
|
||||||
description = A toolkit for tools and techniques related to the privacy and compliance of AI models.
|
description = A toolkit for tools and techniques related to the privacy and compliance of AI models.
|
||||||
|
|
|
||||||
|
|
@ -10,6 +10,7 @@ from sklearn.model_selection import train_test_split
|
||||||
from sklearn.pipeline import Pipeline
|
from sklearn.pipeline import Pipeline
|
||||||
from sklearn.preprocessing import OneHotEncoder
|
from sklearn.preprocessing import OneHotEncoder
|
||||||
|
|
||||||
|
import tensorflow as tf
|
||||||
from tensorflow.keras.models import Sequential
|
from tensorflow.keras.models import Sequential
|
||||||
from tensorflow.keras.layers import Dense, Input
|
from tensorflow.keras.layers import Dense, Input
|
||||||
|
|
||||||
|
|
@ -19,6 +20,8 @@ from apt.utils.dataset_utils import get_iris_dataset_np, get_adult_dataset_pd, g
|
||||||
from apt.utils.datasets import ArrayDataset
|
from apt.utils.datasets import ArrayDataset
|
||||||
from apt.utils.models import SklearnClassifier, ModelOutputType, SklearnRegressor, KerasClassifier
|
from apt.utils.models import SklearnClassifier, ModelOutputType, SklearnRegressor, KerasClassifier
|
||||||
|
|
||||||
|
tf.compat.v1.disable_eager_execution()
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
def data():
|
def data():
|
||||||
|
|
|
||||||
|
|
@ -10,14 +10,16 @@ from sklearn.tree import DecisionTreeRegressor
|
||||||
from sklearn.ensemble import RandomForestClassifier
|
from sklearn.ensemble import RandomForestClassifier
|
||||||
from xgboost import XGBClassifier
|
from xgboost import XGBClassifier
|
||||||
|
|
||||||
|
import tensorflow as tf
|
||||||
from tensorflow.keras.models import Sequential
|
from tensorflow.keras.models import Sequential
|
||||||
from tensorflow.keras.layers import Dense, Input
|
from tensorflow.keras.layers import Dense, Input
|
||||||
|
|
||||||
from art.utils import check_and_transform_label_format
|
from art.utils import check_and_transform_label_format
|
||||||
|
|
||||||
|
|
||||||
from art.utils import to_categorical
|
from art.utils import to_categorical
|
||||||
|
|
||||||
|
tf.compat.v1.disable_eager_execution()
|
||||||
|
|
||||||
|
|
||||||
def test_sklearn_classifier():
|
def test_sklearn_classifier():
|
||||||
(x_train, y_train), (x_test, y_test) = dataset_utils.get_iris_dataset_np()
|
(x_train, y_train), (x_test, y_test) = dataset_utils.get_iris_dataset_np()
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue