Add data minimization functionality to the ai-privacy-toolkit (#3)

* Fix directory issue when running tests for first time

* Initial version of data minimization

* Update version and documentation

* Fix documentation
This commit is contained in:
abigailgold 2021-07-12 15:56:42 +03:00 committed by GitHub
parent bcc3d67ba4
commit f2e1364b43
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
14 changed files with 920 additions and 34 deletions

View file

@ -2,7 +2,7 @@ from sklearn import datasets, model_selection
import sklearn.preprocessing
import pandas as pd
import ssl
from os import path
from os import path, mkdir
from six.moves.urllib.request import urlretrieve
@ -40,9 +40,13 @@ def get_adult_dataset():
'label']
train_url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.data'
test_url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.test'
data_dir = '../datasets/adult'
train_file = '../datasets/adult/train'
test_file = '../datasets/adult/test'
if not path.exists(data_dir):
mkdir(data_dir)
ssl._create_default_https_context = ssl._create_unverified_context
if not path.exists(train_file):
urlretrieve(train_url, train_file)
@ -139,8 +143,12 @@ def get_nursery_dataset(raw: bool = True, test_set: float = 0.2, transform_socia
:return: Dataset and labels as pandas dataframes.
"""
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/nursery/nursery.data'
data_dir = '../datasets/nursery'
data_file = '../datasets/nursery/data'
if not path.exists(data_dir):
mkdir(data_dir)
ssl._create_default_https_context = ssl._create_unverified_context
if not path.exists(data_file):
urlretrieve(url, data_file)