fix bug and update test_model

This commit is contained in:
olasaadi 2022-03-10 12:56:41 +02:00
parent fd9f134336
commit 300e391432
3 changed files with 19 additions and 13 deletions

View file

@ -66,6 +66,7 @@ class Anonymize:
self.anonymizer = DecisionTreeRegressor(random_state=10, min_samples_split=2, min_samples_leaf=self.k)
else:
self.anonymizer = DecisionTreeClassifier(random_state=10, min_samples_split=2, min_samples_leaf=self.k)
self.anonymizer.fit(x_prepared, y)
cells_by_id = self._calculate_cells(x, x_prepared)
return self._anonymize_data_numpy(x, x_prepared, cells_by_id)
@ -80,6 +81,8 @@ class Anonymize:
self.anonymizer = DecisionTreeRegressor(random_state=10, min_samples_split=2, min_samples_leaf=self.k)
else:
self.anonymizer = DecisionTreeClassifier(random_state=10, min_samples_split=2, min_samples_leaf=self.k)
if len(y.shape) > 1:
y = np.argmax(y, axis=1)
self.anonymizer.fit(x_prepared, y)
cells_by_id = self._calculate_cells(x, x_prepared)
return self._anonymize_data_pandas(x, x_prepared, cells_by_id)

View file

@ -46,7 +46,10 @@ class SklearnClassifier(SklearnModel):
:type y: `np.ndarray` or `pandas.DataFrame`
"""
encoder = OneHotEncoder(sparse=False)
y_encoded = encoder.fit_transform(y.reshape(-1, 1))
if type(y) == np.ndarray:
y_encoded = encoder.fit_transform(y.reshape(-1, 1))
else:
y_encoded = encoder.fit_transform(y.values.reshape(-1, 1))
self._art_model.fit(x, y_encoded, **kwargs)
def predict(self, x: np.ndarray, **kwargs) -> np.ndarray:

View file

@ -7,28 +7,28 @@ from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestClassifier
def test_sklearn_classifier():
(x_train, y_train), (x_test, y_test) = dataset_utils.get_iris_dataset()
dataset = dataset_utils.get_iris_dataset()
underlying_model = RandomForestClassifier()
model = SklearnClassifier(underlying_model)
model.fit(x_train, y_train)
pred = model.predict(x_test)
assert(pred.shape[0] == x_test.shape[0])
model.fit(dataset.get_train_samples(), dataset.get_train_labels())
pred = model.predict(dataset.get_test_samples())
assert(pred.shape[0] == dataset.get_test_samples().shape[0])
score = model.score(x_test, y_test)
score = model.score(dataset.get_test_samples(), dataset.get_test_labels())
assert(0.0 <= score <= 1.0)
def test_sklearn_regressor():
(x_train, y_train), (x_test, y_test) = dataset_utils.get_diabetes_dataset()
dataset = dataset_utils.get_diabetes_dataset()
underlying_model = DecisionTreeRegressor()
model = SklearnRegressor(underlying_model)
model.fit(x_train, y_train)
pred = model.predict(x_test)
assert (pred.shape[0] == x_test.shape[0])
model.fit(dataset.get_train_samples(), dataset.get_train_labels())
pred = model.predict(dataset.get_test_samples())
assert (pred.shape[0] == dataset.get_test_samples().shape[0])
score = model.score(x_test, y_test)
score = model.score(dataset.get_test_samples(), dataset.get_test_labels())
losses = model.loss(x_test, y_test)
assert (losses.shape[0] == x_test.shape[0])
losses = model.loss(dataset.get_test_samples(), dataset.get_test_labels())
assert (losses.shape[0] == dataset.get_test_samples().shape[0])
# Probably not needed for now, as we will not be using these wrappers directly in ART.