Add support for xgboost XGBClassifier (#53)

2026-04-25 04:46:21 +02:00 · 2022-07-28 16:31:08 +03:00 · 2022-07-28 16:31:08 +03:00 · a9e2a35e18
commit a9e2a35e18
parent a13415ad67
3 changed files with 106 additions and 1 deletions
--- a/apt/utils/models/init.py
+++ b/apt/utils/models/init.py
@ -3,3 +3,4 @@ from apt.utils.models.model import Model, BlackboxClassifier, ModelOutputType, S
    check_correct_model_output
 from apt.utils.models.sklearn_model import SklearnModel, SklearnClassifier, SklearnRegressor
 from apt.utils.models.keras_model import KerasClassifier, KerasRegressor
+from apt.utils.models.xgboost_model import XGBoostClassifier
--- a/apt/utils/models/xgboost_model.py
+++ b/apt/utils/models/xgboost_model.py
@ -0,0 +1,87 @@
+from typing import Optional, Tuple
+
+from apt.utils.models import Model, ModelOutputType, ScoringMethod, check_correct_model_output, is_one_hot
+from apt.utils.datasets import Dataset, OUTPUT_DATA_ARRAY_TYPE
+
+from xgboost import XGBClassifier
+import numpy as np
+
+from art.estimators.classification.xgboost import XGBoostClassifier as ArtXGBoostClassifier
+
+
+class XGBoostModel(Model):
+    """
+    Wrapper class for xgboost models.
+    """
+
+
+class XGBoostClassifier(XGBoostModel):
+    """
+    Wrapper class for xgboost classification models.
+
+    :param model: The original xgboost model object. Must be fit.
+    :type model: Booster or XGBClassifier object
+    :param output_type: The type of output the model yields (vector/label only)
+    :type output_type: `ModelOutputType`
+    :param input_shape: Shape of input to the model.
+    :type input_shape: Tuple[int, ...]
+    :param nb_classes: Number of prediction classes of the model.
+    :type  nb_classes: int
+    :param black_box_access: Boolean describing the type of deployment of the model (when in production).
+                             Set to True if the model is only available via query (API) access, i.e.,
+                             only the outputs of the model are exposed, and False if the model internals
+                             are also available. Default is True.
+    :type black_box_access: boolean, optional
+    :param unlimited_queries: If black_box_access is True, this boolean indicates whether a user can perform
+                              unlimited queries to the model API or whether there is a limit to the number of
+                              queries that can be submitted. Default is True.
+    :type unlimited_queries: boolean, optional
+    """
+    def __init__(self, model: XGBClassifier, output_type: ModelOutputType, input_shape: Tuple[int, ...],
+                 nb_classes: int,black_box_access: Optional[bool] = True,
+                 unlimited_queries: Optional[bool] = True, **kwargs):
+        super().__init__(model, output_type, black_box_access, unlimited_queries, **kwargs)
+        self._art_model = ArtXGBoostClassifier(model, nb_features=input_shape[0], nb_classes=nb_classes)
+        self.nb_classes = nb_classes
+
+    def fit(self, train_data: Dataset, **kwargs) -> None:
+        """
+        Fit the model using the training data.
+
+        :param train_data: Training data. Labels are expected to either be one-hot encoded or a 1D-array of categorical
+                           labels (consecutive integers starting at 0).
+        :type train_data: `Dataset`
+        :return: None
+        """
+        self._art_model._model.fit(train_data.get_samples(), train_data.get_labels())
+
+    def predict(self, x: Dataset, **kwargs) -> OUTPUT_DATA_ARRAY_TYPE:
+        """
+        Perform predictions using the model for input `x`.
+
+        :param x: Input samples.
+        :type x: `Dataset`
+        :return: Predictions from the model as numpy array (class probabilities, if supported).
+        """
+        predictions = self._art_model.predict(x.get_samples(), **kwargs)
+        check_correct_model_output(predictions, self.output_type)
+        return predictions
+
+    def score(self, test_data: Dataset, scoring_method: Optional[ScoringMethod] = ScoringMethod.ACCURACY, **kwargs):
+        """
+        Score the model using test data.
+
+        :param test_data: Test data.
+        :type train_data: `Dataset`
+        :return: the score as float (for classifiers, between 0 and 1)
+        """
+        y = test_data.get_labels()
+        predicted = self.predict(test_data)
+        if is_one_hot(predicted):
+            predicted = np.argmax(predicted, axis=1)
+        if is_one_hot(y):
+            y = np.argmax(y, axis=1)
+        if scoring_method == ScoringMethod.ACCURACY:
+            return np.count_nonzero(y == predicted) / predicted.shape[0]
+        else:
+            raise NotImplementedError
--- a/tests/test_model.py
+++ b/tests/test_model.py
@ -2,12 +2,13 @@ import pytest
 import numpy as np

 from apt.utils.models import SklearnClassifier, SklearnRegressor, ModelOutputType, KerasClassifier, KerasRegressor, \
-    BlackboxClassifierPredictions, BlackboxClassifierPredictFunction, is_one_hot, get_nb_classes
+    BlackboxClassifierPredictions, BlackboxClassifierPredictFunction, is_one_hot, get_nb_classes, XGBoostClassifier
 from apt.utils.datasets import ArrayDataset, Data, DatasetWithPredictions
 from apt.utils import dataset_utils

 from sklearn.tree import DecisionTreeRegressor
 from sklearn.ensemble import RandomForestClassifier
+from xgboost import XGBClassifier

 from tensorflow.keras.models import Sequential
 from tensorflow.keras.layers import Dense, Input
@ -90,6 +91,22 @@ def test_keras_regressor():
    score = model.score(test)


+def test_xgboost_classifier():
+    (x_train, y_train), (x_test, y_test) = dataset_utils.get_iris_dataset_np()
+    underlying_model = XGBClassifier()
+    underlying_model.fit(x_train, y_train)
+    model = XGBoostClassifier(underlying_model, ModelOutputType.CLASSIFIER_PROBABILITIES, input_shape=(4,), nb_classes=3)
+    train = ArrayDataset(x_train, y_train)
+    test = ArrayDataset(x_test, y_test)
+    pred = model.predict(test)
+    assert(pred.shape[0] == x_test.shape[0])
+
+    score = model.score(test)
+    assert(0.0 <= score <= 1.0)
+
+    model.fit(train)
+
+
 def test_blackbox_classifier():
    (x_train, y_train), (x_test, y_test) = dataset_utils.get_iris_dataset_np()