mirror of
https://github.com/IBM/ai-privacy-toolkit.git
synced 2026-06-29 15:59:38 +02:00
Add data minimization functionality to the ai-privacy-toolkit (#3)
* Fix directory issue when running tests for first time * Initial version of data minimization * Update version and documentation * Fix documentation
This commit is contained in:
parent
bcc3d67ba4
commit
f2e1364b43
14 changed files with 920 additions and 34 deletions
10
apt/utils.py
10
apt/utils.py
|
|
@ -2,7 +2,7 @@ from sklearn import datasets, model_selection
|
|||
import sklearn.preprocessing
|
||||
import pandas as pd
|
||||
import ssl
|
||||
from os import path
|
||||
from os import path, mkdir
|
||||
from six.moves.urllib.request import urlretrieve
|
||||
|
||||
|
||||
|
|
@ -40,9 +40,13 @@ def get_adult_dataset():
|
|||
'label']
|
||||
train_url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.data'
|
||||
test_url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.test'
|
||||
data_dir = '../datasets/adult'
|
||||
train_file = '../datasets/adult/train'
|
||||
test_file = '../datasets/adult/test'
|
||||
|
||||
if not path.exists(data_dir):
|
||||
mkdir(data_dir)
|
||||
|
||||
ssl._create_default_https_context = ssl._create_unverified_context
|
||||
if not path.exists(train_file):
|
||||
urlretrieve(train_url, train_file)
|
||||
|
|
@ -139,8 +143,12 @@ def get_nursery_dataset(raw: bool = True, test_set: float = 0.2, transform_socia
|
|||
:return: Dataset and labels as pandas dataframes.
|
||||
"""
|
||||
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/nursery/nursery.data'
|
||||
data_dir = '../datasets/nursery'
|
||||
data_file = '../datasets/nursery/data'
|
||||
|
||||
if not path.exists(data_dir):
|
||||
mkdir(data_dir)
|
||||
|
||||
ssl._create_default_https_context = ssl._create_unverified_context
|
||||
if not path.exists(data_file):
|
||||
urlretrieve(url, data_file)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue