Add support for xgboost XGBClassifier (#53)

2026-04-30 23:36:22 +02:00 · 2022-07-28 16:31:08 +03:00 · 2022-07-28 16:31:08 +03:00 · a9e2a35e18
commit a9e2a35e18
parent a13415ad67
3 changed files with 106 additions and 1 deletions
--- a/apt/utils/models/init.py
+++ b/apt/utils/models/init.py
@ -3,3 +3,4 @@ from apt.utils.models.model import Model, BlackboxClassifier, ModelOutputType, S
    check_correct_model_output
 from apt.utils.models.sklearn_model import SklearnModel, SklearnClassifier, SklearnRegressor
 from apt.utils.models.keras_model import KerasClassifier, KerasRegressor
 from apt.utils.models.xgboost_model import XGBoostClassifier
--- a/apt/utils/models/xgboost_model.py
+++ b/apt/utils/models/xgboost_model.py
@ -0,0 +1,87 @@
 from typing import Optional, Tuple
 from apt.utils.models import Model, ModelOutputType, ScoringMethod, check_correct_model_output, is_one_hot
 from apt.utils.datasets import Dataset, OUTPUT_DATA_ARRAY_TYPE
 from xgboost import XGBClassifier
 import numpy as np
 from art.estimators.classification.xgboost import XGBoostClassifier as ArtXGBoostClassifier
 class XGBoostModel(Model):
    """
    Wrapper class for xgboost models.
    """
 class XGBoostClassifier(XGBoostModel):
    """
    Wrapper class for xgboost classification models.
    :param model: The original xgboost model object. Must be fit.
    :type model: Booster or XGBClassifier object
    :param output_type: The type of output the model yields (vector/label only)
    :type output_type: `ModelOutputType`
    :param input_shape: Shape of input to the model.
    :type input_shape: Tuple[int, ...]
    :param nb_classes: Number of prediction classes of the model.
    :type  nb_classes: int
    :param black_box_access: Boolean describing the type of deployment of the model (when in production).
                             Set to True if the model is only available via query (API) access, i.e.,
                             only the outputs of the model are exposed, and False if the model internals
                             are also available. Default is True.
    :type black_box_access: boolean, optional
    :param unlimited_queries: If black_box_access is True, this boolean indicates whether a user can perform
                              unlimited queries to the model API or whether there is a limit to the number of
                              queries that can be submitted. Default is True.
    :type unlimited_queries: boolean, optional
    """
    def __init__(self, model: XGBClassifier, output_type: ModelOutputType, input_shape: Tuple[int, ...],
                 nb_classes: int,black_box_access: Optional[bool] = True,
                 unlimited_queries: Optional[bool] = True, **kwargs):
        super().__init__(model, output_type, black_box_access, unlimited_queries, **kwargs)
        self._art_model = ArtXGBoostClassifier(model, nb_features=input_shape[0], nb_classes=nb_classes)
        self.nb_classes = nb_classes
    def fit(self, train_data: Dataset, **kwargs) -> None:
        """
        Fit the model using the training data.
        :param train_data: Training data. Labels are expected to either be one-hot encoded or a 1D-array of categorical
                           labels (consecutive integers starting at 0).
        :type train_data: `Dataset`
        :return: None
        """
        self._art_model._model.fit(train_data.get_samples(), train_data.get_labels())
    def predict(self, x: Dataset, **kwargs) -> OUTPUT_DATA_ARRAY_TYPE:
        """
        Perform predictions using the model for input `x`.
        :param x: Input samples.
        :type x: `Dataset`
        :return: Predictions from the model as numpy array (class probabilities, if supported).
        """
        predictions = self._art_model.predict(x.get_samples(), **kwargs)
        check_correct_model_output(predictions, self.output_type)
        return predictions
    def score(self, test_data: Dataset, scoring_method: Optional[ScoringMethod] = ScoringMethod.ACCURACY, **kwargs):
        """
        Score the model using test data.
        :param test_data: Test data.
        :type train_data: `Dataset`
        :return: the score as float (for classifiers, between 0 and 1)
        """
        y = test_data.get_labels()
        predicted = self.predict(test_data)
        if is_one_hot(predicted):
            predicted = np.argmax(predicted, axis=1)
        if is_one_hot(y):
            y = np.argmax(y, axis=1)
        if scoring_method == ScoringMethod.ACCURACY:
            return np.count_nonzero(y == predicted) / predicted.shape[0]
        else:
            raise NotImplementedError
--- a/tests/test_model.py
+++ b/tests/test_model.py
@ -2,12 +2,13 @@ import pytest
 import numpy as np
 from apt.utils.models import SklearnClassifier, SklearnRegressor, ModelOutputType, KerasClassifier, KerasRegressor, \
-    BlackboxClassifierPredictions, BlackboxClassifierPredictFunction, is_one_hot, get_nb_classes
+    BlackboxClassifierPredictions, BlackboxClassifierPredictFunction, is_one_hot, get_nb_classes, XGBoostClassifier
 from apt.utils.datasets import ArrayDataset, Data, DatasetWithPredictions
 from apt.utils import dataset_utils
 from sklearn.tree import DecisionTreeRegressor
 from sklearn.ensemble import RandomForestClassifier
 from xgboost import XGBClassifier
 from tensorflow.keras.models import Sequential
 from tensorflow.keras.layers import Dense, Input
@ -90,6 +91,22 @@ def test_keras_regressor():
    score = model.score(test)
 def test_xgboost_classifier():
    (x_train, y_train), (x_test, y_test) = dataset_utils.get_iris_dataset_np()
    underlying_model = XGBClassifier()
    underlying_model.fit(x_train, y_train)
    model = XGBoostClassifier(underlying_model, ModelOutputType.CLASSIFIER_PROBABILITIES, input_shape=(4,), nb_classes=3)
    train = ArrayDataset(x_train, y_train)
    test = ArrayDataset(x_test, y_test)
    pred = model.predict(test)
    assert(pred.shape[0] == x_test.shape[0])
    score = model.score(test)
    assert(0.0 <= score <= 1.0)
    model.fit(train)
 def test_blackbox_classifier():
    (x_train, y_train), (x_test, y_test) = dataset_utils.get_iris_dataset_np()