diff --git a/apt/utils/models/model.py b/apt/utils/models/model.py index 157158e..f470de7 100644 --- a/apt/utils/models/model.py +++ b/apt/utils/models/model.py @@ -29,7 +29,15 @@ class ScoringMethod(Enum): def is_one_hot(y: OUTPUT_DATA_ARRAY_TYPE) -> bool: - return len(y.shape) == 2 and y.shape[1] > 1 + return len(y.shape) == 2 and y.shape[1] > 1 and np.all(np.around(np.sum(y, axis=1), decimals=4) == 1) + + +def is_multi_label(y: OUTPUT_DATA_ARRAY_TYPE) -> bool: + return len(y.shape) == 2 and y.shape[1] > 1 and not is_one_hot(y) + + +def is_multi_label_binary(y: OUTPUT_DATA_ARRAY_TYPE) -> bool: + return is_multi_label(y) and np.max(y) <= 1 def get_nb_classes(y: OUTPUT_DATA_ARRAY_TYPE) -> int: @@ -38,7 +46,7 @@ def get_nb_classes(y: OUTPUT_DATA_ARRAY_TYPE) -> int: :param y: The labels :type y: numpy array - :return: The number of classes as integer + :return: The number of classes as integer, or list of integers for multi-label """ if y is None: return 0 @@ -48,6 +56,10 @@ def get_nb_classes(y: OUTPUT_DATA_ARRAY_TYPE) -> int: if is_one_hot(y): return y.shape[1] + elif is_multi_label(y): + # for now just return the number of labels + return y.shape[1] + # return [int(np.max(y.T[i]) + 1) for i in range(y.shape[1])] else: return int(np.max(y) + 1) @@ -61,7 +73,7 @@ def check_correct_model_output(y: OUTPUT_DATA_ARRAY_TYPE, output_type: ModelOutp :type output_type: ModelOutputType :raises: ValueError (in case of mismatch) """ - if not is_one_hot(y): # 1D array + if not is_one_hot(y) and not is_multi_label(y): # 1D array if output_type == ModelOutputType.CLASSIFIER_PROBABILITIES or output_type == ModelOutputType.CLASSIFIER_LOGITS: raise ValueError("Incompatible model output types. Model outputs 1D array of categorical scalars while " "output type is set to ", output_type) @@ -167,7 +179,8 @@ class Model(metaclass=ABCMeta): class BlackboxClassifier(Model): """ - Wrapper for black-box ML classification models. + Wrapper for black-box ML classification models. This is an abstract class and must be instantiated as either + BlackboxClassifierPredictFunction or BlackboxClassifierPredictions. :param model: The training and/or test data along with the model's predictions for the data or a callable predict method. @@ -266,7 +279,8 @@ class BlackboxClassifier(Model): check_correct_model_output(predictions, self.output_type) return predictions - def score(self, test_data: Dataset, scoring_method: Optional[ScoringMethod] = ScoringMethod.ACCURACY, **kwargs): + def score(self, test_data: Dataset, scoring_method: Optional[ScoringMethod] = ScoringMethod.ACCURACY, + binary_threshold: Optional[float] = 0.5, **kwargs): """ Score the model using test data. @@ -274,14 +288,25 @@ class BlackboxClassifier(Model): :type train_data: `Dataset` :param scoring_method: The method for scoring predictions. Default is ACCURACY. :type scoring_method: `ScoringMethod`, optional + :param binary_threshold: The threshold to use on binary classification probabilities to assign the positive + class. + :type binary_threshold: float, optional. Default is 0.5. :return: the score as float (for classifiers, between 0 and 1) """ if test_data.get_samples() is None or test_data.get_labels() is None: raise ValueError('score can only be computed when test data and labels are available') predicted = self._art_model.predict(test_data.get_samples()) - y = check_and_transform_label_format(test_data.get_labels(), nb_classes=self._nb_classes) + y = test_data.get_labels() + if not is_multi_label(y): + y = check_and_transform_label_format(y, nb_classes=self._nb_classes) if scoring_method == ScoringMethod.ACCURACY: - return np.count_nonzero(np.argmax(y, axis=1) == np.argmax(predicted, axis=1)) / predicted.shape[0] + if not is_multi_label(y): + return np.count_nonzero(np.argmax(y, axis=1) == np.argmax(predicted, axis=1)) / predicted.shape[0] + else: + if is_multi_label_binary(y): + predicted[predicted < binary_threshold] = 0 + predicted[predicted >= binary_threshold] = 1 + return np.count_nonzero(y == predicted) / (predicted.shape[0] * y.shape[1]) else: raise NotImplementedError diff --git a/tests/test_model.py b/tests/test_model.py index b8fb8f1..b7b2909 100644 --- a/tests/test_model.py +++ b/tests/test_model.py @@ -156,6 +156,54 @@ def test_blackbox_classifier_predictions_y(): assert model.model_type is None +def test_blackbox_classifier_predictions_multi_label_cat(): + (x_train, y_train), (x_test, y_test) = dataset_utils.get_iris_dataset_np() + + # make multi-label categorical + y_train = np.column_stack((y_train, y_train, y_train)) + y_test = np.column_stack((y_test, y_test, y_test)) + + train = DatasetWithPredictions(y_train, x_train, y_train) + test = DatasetWithPredictions(y_test, x_test, y_test) + data = Data(train, test) + model = BlackboxClassifierPredictions(data, ModelOutputType.CLASSIFIER_SCALAR) + pred = model.predict(test) + assert (pred.shape[0] == x_test.shape[0]) + + score = model.score(test) + assert (score == 1.0) + + assert model.model_type is None + + +def test_blackbox_classifier_predictions_multi_label_binary(): + (x_train, y_train), (x_test, y_test) = dataset_utils.get_iris_dataset_np() + + # make multi-label categorical + y_train = np.column_stack((y_train, y_train, y_train)) + y_train[y_train > 1] = 1 + pred_train = y_train.copy().astype(float) + pred_train[pred_train == 0] = 0.2 + pred_train[pred_train == 1] = 0.6 + y_test = np.column_stack((y_test, y_test, y_test)) + y_test[y_test > 1] = 1 + pred_test = y_test.copy().astype(float) + pred_test[pred_test == 0] = 0.2 + pred_test[pred_test == 1] = 0.6 + + train = DatasetWithPredictions(pred_train, x_train, y_train) + test = DatasetWithPredictions(pred_test, x_test, y_test) + data = Data(train, test) + model = BlackboxClassifierPredictions(data, ModelOutputType.CLASSIFIER_SCALAR) + pred = model.predict(test) + assert (pred.shape[0] == x_test.shape[0]) + + score = model.score(test) + assert (score == 1.0) + + assert model.model_type is None + + def test_blackbox_classifier_mismatch(): (x_train, y_train), (x_test, y_test) = dataset_utils.get_iris_dataset_np()