mirror of
https://github.com/IBM/ai-privacy-toolkit.git
synced 2026-06-05 14:55:13 +02:00
Add dataset privacy risk assessment example notebook. (#73)
* Add dataset assessment notebook and reference to module from project README Signed-off-by: Maya Anderson <mayaa@il.ibm.com>
This commit is contained in:
parent
a4e07faf01
commit
98a7a078bb
4 changed files with 410 additions and 1 deletions
|
|
@ -16,6 +16,9 @@ minimization principle in GDPR for ML models. It enables to reduce the amount of
|
||||||
personal data needed to perform predictions with a machine learning model, while still enabling the model
|
personal data needed to perform predictions with a machine learning model, while still enabling the model
|
||||||
to make accurate predictions. This is done by by removing or generalizing some of the input features.
|
to make accurate predictions. This is done by by removing or generalizing some of the input features.
|
||||||
|
|
||||||
|
The [**dataset assessment**](apt/risk/data_assessment/README.md) module implements a tool for privacy assessment of
|
||||||
|
synthetic datasets that are to be used in AI model training.
|
||||||
|
|
||||||
Official ai-privacy-toolkit documentation: https://ai-privacy-toolkit.readthedocs.io/en/latest/
|
Official ai-privacy-toolkit documentation: https://ai-privacy-toolkit.readthedocs.io/en/latest/
|
||||||
|
|
||||||
Installation: pip install ai-privacy-toolkit
|
Installation: pip install ai-privacy-toolkit
|
||||||
|
|
|
||||||
|
|
@ -87,7 +87,6 @@ class DatasetAttackMembership(DatasetAttack):
|
||||||
labels = np.concatenate((np.zeros((len(non_member_probabilities),)), np.ones((len(member_probabilities),))))
|
labels = np.concatenate((np.zeros((len(non_member_probabilities),)), np.ones((len(member_probabilities),))))
|
||||||
results = np.concatenate((non_member_probabilities, member_probabilities))
|
results = np.concatenate((non_member_probabilities, member_probabilities))
|
||||||
svc_disp = RocCurveDisplay.from_predictions(labels, results)
|
svc_disp = RocCurveDisplay.from_predictions(labels, results)
|
||||||
svc_disp.plot()
|
|
||||||
plt.plot([0, 1], [0, 1], color="navy", linewidth=2, linestyle="--", label='No skills')
|
plt.plot([0, 1], [0, 1], color="navy", linewidth=2, linestyle="--", label='No skills')
|
||||||
plt.title('ROC curve')
|
plt.title('ROC curve')
|
||||||
plt.savefig(f'{filename_prefix}{dataset_name}_roc_curve.png')
|
plt.savefig(f'{filename_prefix}{dataset_name}_roc_curve.png')
|
||||||
|
|
|
||||||
402
notebooks/dataset_assessment_nursery.ipynb
Normal file
402
notebooks/dataset_assessment_nursery.ipynb
Normal file
File diff suppressed because one or more lines are too long
|
|
@ -13,3 +13,8 @@ tensorflow==2.8.3
|
||||||
xgboost==1.7.2
|
xgboost==1.7.2
|
||||||
Pillow==9.3.0
|
Pillow==9.3.0
|
||||||
sortedcontainers==2.4.0
|
sortedcontainers==2.4.0
|
||||||
|
|
||||||
|
#notebooks
|
||||||
|
notebook
|
||||||
|
jupyter
|
||||||
|
ipywidgets
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue