diff --git a/apt/utils/models/__init__.py b/apt/utils/models/__init__.py index a48a1a3..fe1721b 100644 --- a/apt/utils/models/__init__.py +++ b/apt/utils/models/__init__.py @@ -1,4 +1,4 @@ from apt.utils.models.model import Model, BlackboxClassifier, ModelOutputType, ScoringMethod, \ - BlackboxClassifierPredictions, BlackboxClassifierPredictFunction + BlackboxClassifierPredictions, BlackboxClassifierPredictFunction, get_nb_classes, is_one_hot from apt.utils.models.sklearn_model import SklearnModel, SklearnClassifier, SklearnRegressor from apt.utils.models.keras_model import KerasClassifier diff --git a/apt/utils/models/model.py b/apt/utils/models/model.py index eff9271..ad5b00d 100644 --- a/apt/utils/models/model.py +++ b/apt/utils/models/model.py @@ -8,6 +8,29 @@ from art.estimators.classification import BlackBoxClassifier from art.utils import check_and_transform_label_format +def is_one_hot(y: OUTPUT_DATA_ARRAY_TYPE) -> bool: + return len(y.shape) == 2 and y.shape[1] > 1 + + +def get_nb_classes(y: OUTPUT_DATA_ARRAY_TYPE) -> int: + """ + Get the number of classes from an array of labels + + :param y: the labels + :type y: numpy array + :return: the number of classes as integer + """ + if y is None: + return 0 + + if type(y) != np.ndarray: + raise ValueError("Input should be numpy array") + + if is_one_hot(y): + return y.shape[1] + else: + return int(np.max(y) + 1) + class ModelOutputType(Enum): CLASSIFIER_PROBABILITIES = auto() # vector of probabilities CLASSIFIER_LOGITS = auto() # vector of logits @@ -117,18 +140,7 @@ class Model(metaclass=ABCMeta): """ return self._unlimited_queries - def get_nb_classes(self, y: OUTPUT_DATA_ARRAY_TYPE) -> int: - """ - Get the number of classes from an array of labels - :param y: the labels - :type y: numpy array - :return: the number of classes as integer - """ - if len(y.shape) == 1: - return np.max(y) + 1 - else: - return y.shape[1] class BlackboxClassifier(Model): @@ -233,11 +245,11 @@ class BlackboxClassifierPredictions(BlackboxClassifier): y_test_pred = model.get_test_labels() if y_train_pred is not None and len(y_train_pred.shape) == 1: - self._nb_classes = self.get_nb_classes(y_train_pred) + self._nb_classes = get_nb_classes(y_train_pred) y_train_pred = check_and_transform_label_format(y_train_pred, nb_classes=self._nb_classes) if y_test_pred is not None and len(y_test_pred.shape) == 1: if self._nb_classes is None: - self._nb_classes = self.get_nb_classes(y_test_pred) + self._nb_classes = get_nb_classes(y_test_pred) y_test_pred = check_and_transform_label_format(y_test_pred, nb_classes=self._nb_classes) if x_train_pred is not None and y_train_pred is not None and x_test_pred is not None and y_test_pred is not None: @@ -255,7 +267,7 @@ class BlackboxClassifierPredictions(BlackboxClassifier): else: raise NotImplementedError("Invalid data - None") - self._nb_classes = self.get_nb_classes(y_pred) + self._nb_classes = get_nb_classes(y_pred) self._input_shape = x_pred.shape[1:] predict_fn = (x_pred, y_pred) self._art_model = BlackBoxClassifier(predict_fn, self._input_shape, self._nb_classes, fuzzy_float_compare=True) diff --git a/apt/utils/models/sklearn_model.py b/apt/utils/models/sklearn_model.py index 81294a5..8becc18 100644 --- a/apt/utils/models/sklearn_model.py +++ b/apt/utils/models/sklearn_model.py @@ -3,7 +3,7 @@ from typing import Optional from sklearn.preprocessing import OneHotEncoder from sklearn.base import BaseEstimator -from apt.utils.models import Model, ModelOutputType +from apt.utils.models import Model, ModelOutputType, get_nb_classes from apt.utils.datasets import Dataset, OUTPUT_DATA_ARRAY_TYPE from art.estimators.classification.scikitlearn import SklearnClassifier as ArtSklearnClassifier @@ -59,7 +59,7 @@ class SklearnClassifier(SklearnModel): :return: None """ y = train_data.get_labels() - self.nb_classes = self.get_nb_classes(y) + self.nb_classes = get_nb_classes(y) y_encoded = check_and_transform_label_format(y, nb_classes=self.nb_classes) self._art_model.fit(train_data.get_samples(), y_encoded, **kwargs) diff --git a/tests/test_model.py b/tests/test_model.py index 2acc0d9..4274a02 100644 --- a/tests/test_model.py +++ b/tests/test_model.py @@ -2,7 +2,7 @@ import pytest import numpy as np from apt.utils.models import SklearnClassifier, SklearnRegressor, ModelOutputType, KerasClassifier, \ - BlackboxClassifierPredictions, BlackboxClassifierPredictFunction + BlackboxClassifierPredictions, BlackboxClassifierPredictFunction, is_one_hot, get_nb_classes from apt.utils.datasets import ArrayDataset, Data from apt.utils import dataset_utils @@ -13,6 +13,9 @@ from tensorflow.keras.models import Sequential from tensorflow.keras.layers import Dense, Input +from art.utils import to_categorical + + def test_sklearn_classifier(): (x_train, y_train), (x_test, y_test) = dataset_utils.get_iris_dataset_np() underlying_model = RandomForestClassifier() @@ -181,4 +184,33 @@ def test_blackbox_classifier_predict(): score = model.score(train) assert (0.0 <= score <= 1.0) +def test_is_one_hot(): + (_, y_train), (_, _) = dataset_utils.get_iris_dataset_np() + + assert (not is_one_hot(y_train)) + assert (not is_one_hot(y_train.reshape(-1,1))) + assert (is_one_hot(to_categorical(y_train))) + +def test_get_nb_classes(): + (_, y_train), (_, y_test) = dataset_utils.get_iris_dataset_np() + + # shape: (x,) - not 1-hot + nb_classes_test = get_nb_classes(y_test) + nb_classes_train = get_nb_classes(y_train) + assert (nb_classes_test == nb_classes_train) + assert (nb_classes_test == 3) + + # shape: (x,1) - not 1-hot + nb_classes_test = get_nb_classes(y_test.reshape(-1,1)) + assert (nb_classes_test == 3) + + # shape: (x,3) - 1-hot + y = to_categorical(y_test) + nb_classes = get_nb_classes(y) + assert (nb_classes == 3) + + # gaps: 1,2,4 (0,3 missing) + y_test[y_test == 0] = 4 + nb_classes = get_nb_classes(y_test) + assert (nb_classes == 5)