mirror of
https://github.com/IBM/ai-privacy-toolkit.git
synced 2026-05-08 11:32:37 +02:00
fix tests
This commit is contained in:
parent
66c86dc595
commit
b54f0a2382
2 changed files with 5 additions and 9 deletions
|
|
@ -45,10 +45,10 @@ class Anonymize:
|
||||||
:return: An array containing the anonymized training dataset.
|
:return: An array containing the anonymized training dataset.
|
||||||
"""
|
"""
|
||||||
if dataset.features_names is not None:
|
if dataset.features_names is not None:
|
||||||
self._features = dataset.features_names
|
self.features_names = dataset.features_names
|
||||||
# if features is None, use numbers instead of names
|
# if features is None, use numbers instead of names
|
||||||
elif dataset.get_samples().shape[0] != 0:
|
elif dataset.get_samples().shape[0] != 0:
|
||||||
self._features = [i for i in range(dataset.get_samples().shape[0])]
|
self.features_names = [i for i in range(dataset.get_samples().shape[0])]
|
||||||
else:
|
else:
|
||||||
raise ValueError('No data provided')
|
raise ValueError('No data provided')
|
||||||
if not set(self.quasi_identifiers).issubset(set(self.features_names)):
|
if not set(self.quasi_identifiers).issubset(set(self.features_names)):
|
||||||
|
|
@ -63,7 +63,7 @@ class Anonymize:
|
||||||
|
|
||||||
transformed = self._anonymize(dataset.get_samples().copy(), dataset.get_labels())
|
transformed = self._anonymize(dataset.get_samples().copy(), dataset.get_labels())
|
||||||
if dataset.is_pandas:
|
if dataset.is_pandas:
|
||||||
return pd.DataFrame(transformed, columns=self._features)
|
return pd.DataFrame(transformed, columns=self.features_names)
|
||||||
else:
|
else:
|
||||||
return transformed
|
return transformed
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -41,9 +41,7 @@ def test_anonymize_pandas_adult():
|
||||||
'native-country']
|
'native-country']
|
||||||
categorical_features = ['workclass', 'marital-status', 'occupation', 'relationship', 'race', 'sex',
|
categorical_features = ['workclass', 'marital-status', 'occupation', 'relationship', 'race', 'sex',
|
||||||
'native-country']
|
'native-country']
|
||||||
QI_indexes = [i for i, v in enumerate(features) if v in QI]
|
anonymizer = Anonymize(k, QI, categorical_features=categorical_features)
|
||||||
categorical_features_indexes = [i for i, v in enumerate(features) if v in categorical_features]
|
|
||||||
anonymizer = Anonymize(k, QI_indexes, categorical_features=categorical_features_indexes)
|
|
||||||
anon = anonymizer.anonymize(ArrayDataset(x_train, pred, features))
|
anon = anonymizer.anonymize(ArrayDataset(x_train, pred, features))
|
||||||
|
|
||||||
assert(anon.loc[:, QI].drop_duplicates().shape[0] < x_train.loc[:, QI].drop_duplicates().shape[0])
|
assert(anon.loc[:, QI].drop_duplicates().shape[0] < x_train.loc[:, QI].drop_duplicates().shape[0])
|
||||||
|
|
@ -62,9 +60,7 @@ def test_anonymize_pandas_nursery():
|
||||||
k = 100
|
k = 100
|
||||||
QI = ["finance", "social", "health"]
|
QI = ["finance", "social", "health"]
|
||||||
categorical_features = ["parents", "has_nurs", "form", "housing", "finance", "social", "health", 'children']
|
categorical_features = ["parents", "has_nurs", "form", "housing", "finance", "social", "health", 'children']
|
||||||
QI_indexes = [i for i, v in enumerate(features) if v in QI]
|
anonymizer = Anonymize(k, QI, categorical_features=categorical_features)
|
||||||
categorical_features_indexes = [i for i, v in enumerate(features) if v in categorical_features]
|
|
||||||
anonymizer = Anonymize(k, QI_indexes, categorical_features=categorical_features_indexes)
|
|
||||||
anon = anonymizer.anonymize(ArrayDataset(x_train, pred))
|
anon = anonymizer.anonymize(ArrayDataset(x_train, pred))
|
||||||
|
|
||||||
assert(anon.loc[:, QI].drop_duplicates().shape[0] < x_train.loc[:, QI].drop_duplicates().shape[0])
|
assert(anon.loc[:, QI].drop_duplicates().shape[0] < x_train.loc[:, QI].drop_duplicates().shape[0])
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue