mirror of
https://github.com/IBM/ai-privacy-toolkit.git
synced 2026-04-25 04:46:21 +02:00
fix bug and update test_model
This commit is contained in:
parent
fd9f134336
commit
300e391432
3 changed files with 19 additions and 13 deletions
|
|
@ -66,6 +66,7 @@ class Anonymize:
|
|||
self.anonymizer = DecisionTreeRegressor(random_state=10, min_samples_split=2, min_samples_leaf=self.k)
|
||||
else:
|
||||
self.anonymizer = DecisionTreeClassifier(random_state=10, min_samples_split=2, min_samples_leaf=self.k)
|
||||
|
||||
self.anonymizer.fit(x_prepared, y)
|
||||
cells_by_id = self._calculate_cells(x, x_prepared)
|
||||
return self._anonymize_data_numpy(x, x_prepared, cells_by_id)
|
||||
|
|
@ -80,6 +81,8 @@ class Anonymize:
|
|||
self.anonymizer = DecisionTreeRegressor(random_state=10, min_samples_split=2, min_samples_leaf=self.k)
|
||||
else:
|
||||
self.anonymizer = DecisionTreeClassifier(random_state=10, min_samples_split=2, min_samples_leaf=self.k)
|
||||
if len(y.shape) > 1:
|
||||
y = np.argmax(y, axis=1)
|
||||
self.anonymizer.fit(x_prepared, y)
|
||||
cells_by_id = self._calculate_cells(x, x_prepared)
|
||||
return self._anonymize_data_pandas(x, x_prepared, cells_by_id)
|
||||
|
|
|
|||
|
|
@ -46,7 +46,10 @@ class SklearnClassifier(SklearnModel):
|
|||
:type y: `np.ndarray` or `pandas.DataFrame`
|
||||
"""
|
||||
encoder = OneHotEncoder(sparse=False)
|
||||
y_encoded = encoder.fit_transform(y.reshape(-1, 1))
|
||||
if type(y) == np.ndarray:
|
||||
y_encoded = encoder.fit_transform(y.reshape(-1, 1))
|
||||
else:
|
||||
y_encoded = encoder.fit_transform(y.values.reshape(-1, 1))
|
||||
self._art_model.fit(x, y_encoded, **kwargs)
|
||||
|
||||
def predict(self, x: np.ndarray, **kwargs) -> np.ndarray:
|
||||
|
|
|
|||
|
|
@ -7,28 +7,28 @@ from sklearn.tree import DecisionTreeRegressor
|
|||
from sklearn.ensemble import RandomForestClassifier
|
||||
|
||||
def test_sklearn_classifier():
|
||||
(x_train, y_train), (x_test, y_test) = dataset_utils.get_iris_dataset()
|
||||
dataset = dataset_utils.get_iris_dataset()
|
||||
underlying_model = RandomForestClassifier()
|
||||
model = SklearnClassifier(underlying_model)
|
||||
model.fit(x_train, y_train)
|
||||
pred = model.predict(x_test)
|
||||
assert(pred.shape[0] == x_test.shape[0])
|
||||
model.fit(dataset.get_train_samples(), dataset.get_train_labels())
|
||||
pred = model.predict(dataset.get_test_samples())
|
||||
assert(pred.shape[0] == dataset.get_test_samples().shape[0])
|
||||
|
||||
score = model.score(x_test, y_test)
|
||||
score = model.score(dataset.get_test_samples(), dataset.get_test_labels())
|
||||
assert(0.0 <= score <= 1.0)
|
||||
|
||||
def test_sklearn_regressor():
|
||||
(x_train, y_train), (x_test, y_test) = dataset_utils.get_diabetes_dataset()
|
||||
dataset = dataset_utils.get_diabetes_dataset()
|
||||
underlying_model = DecisionTreeRegressor()
|
||||
model = SklearnRegressor(underlying_model)
|
||||
model.fit(x_train, y_train)
|
||||
pred = model.predict(x_test)
|
||||
assert (pred.shape[0] == x_test.shape[0])
|
||||
model.fit(dataset.get_train_samples(), dataset.get_train_labels())
|
||||
pred = model.predict(dataset.get_test_samples())
|
||||
assert (pred.shape[0] == dataset.get_test_samples().shape[0])
|
||||
|
||||
score = model.score(x_test, y_test)
|
||||
score = model.score(dataset.get_test_samples(), dataset.get_test_labels())
|
||||
|
||||
losses = model.loss(x_test, y_test)
|
||||
assert (losses.shape[0] == x_test.shape[0])
|
||||
losses = model.loss(dataset.get_test_samples(), dataset.get_test_labels())
|
||||
assert (losses.shape[0] == dataset.get_test_samples().shape[0])
|
||||
|
||||
|
||||
# Probably not needed for now, as we will not be using these wrappers directly in ART.
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue