diff --git a/apt/utils/models/__init__.py b/apt/utils/models/__init__.py index e458139..ff582b4 100644 --- a/apt/utils/models/__init__.py +++ b/apt/utils/models/__init__.py @@ -3,3 +3,4 @@ from apt.utils.models.model import Model, BlackboxClassifier, ModelOutputType, S check_correct_model_output from apt.utils.models.sklearn_model import SklearnModel, SklearnClassifier, SklearnRegressor from apt.utils.models.keras_model import KerasClassifier, KerasRegressor +from apt.utils.models.xgboost_model import XGBoostClassifier diff --git a/apt/utils/models/xgboost_model.py b/apt/utils/models/xgboost_model.py new file mode 100644 index 0000000..1ea02e7 --- /dev/null +++ b/apt/utils/models/xgboost_model.py @@ -0,0 +1,87 @@ +from typing import Optional, Tuple + +from apt.utils.models import Model, ModelOutputType, ScoringMethod, check_correct_model_output, is_one_hot +from apt.utils.datasets import Dataset, OUTPUT_DATA_ARRAY_TYPE + +from xgboost import XGBClassifier +import numpy as np + +from art.estimators.classification.xgboost import XGBoostClassifier as ArtXGBoostClassifier + + +class XGBoostModel(Model): + """ + Wrapper class for xgboost models. + """ + + +class XGBoostClassifier(XGBoostModel): + """ + Wrapper class for xgboost classification models. + + :param model: The original xgboost model object. Must be fit. + :type model: Booster or XGBClassifier object + :param output_type: The type of output the model yields (vector/label only) + :type output_type: `ModelOutputType` + :param input_shape: Shape of input to the model. + :type input_shape: Tuple[int, ...] + :param nb_classes: Number of prediction classes of the model. + :type nb_classes: int + :param black_box_access: Boolean describing the type of deployment of the model (when in production). + Set to True if the model is only available via query (API) access, i.e., + only the outputs of the model are exposed, and False if the model internals + are also available. Default is True. + :type black_box_access: boolean, optional + :param unlimited_queries: If black_box_access is True, this boolean indicates whether a user can perform + unlimited queries to the model API or whether there is a limit to the number of + queries that can be submitted. Default is True. + :type unlimited_queries: boolean, optional + """ + def __init__(self, model: XGBClassifier, output_type: ModelOutputType, input_shape: Tuple[int, ...], + nb_classes: int,black_box_access: Optional[bool] = True, + unlimited_queries: Optional[bool] = True, **kwargs): + super().__init__(model, output_type, black_box_access, unlimited_queries, **kwargs) + self._art_model = ArtXGBoostClassifier(model, nb_features=input_shape[0], nb_classes=nb_classes) + self.nb_classes = nb_classes + + def fit(self, train_data: Dataset, **kwargs) -> None: + """ + Fit the model using the training data. + + :param train_data: Training data. Labels are expected to either be one-hot encoded or a 1D-array of categorical + labels (consecutive integers starting at 0). + :type train_data: `Dataset` + :return: None + """ + self._art_model._model.fit(train_data.get_samples(), train_data.get_labels()) + + def predict(self, x: Dataset, **kwargs) -> OUTPUT_DATA_ARRAY_TYPE: + """ + Perform predictions using the model for input `x`. + + :param x: Input samples. + :type x: `Dataset` + :return: Predictions from the model as numpy array (class probabilities, if supported). + """ + predictions = self._art_model.predict(x.get_samples(), **kwargs) + check_correct_model_output(predictions, self.output_type) + return predictions + + def score(self, test_data: Dataset, scoring_method: Optional[ScoringMethod] = ScoringMethod.ACCURACY, **kwargs): + """ + Score the model using test data. + + :param test_data: Test data. + :type train_data: `Dataset` + :return: the score as float (for classifiers, between 0 and 1) + """ + y = test_data.get_labels() + predicted = self.predict(test_data) + if is_one_hot(predicted): + predicted = np.argmax(predicted, axis=1) + if is_one_hot(y): + y = np.argmax(y, axis=1) + if scoring_method == ScoringMethod.ACCURACY: + return np.count_nonzero(y == predicted) / predicted.shape[0] + else: + raise NotImplementedError diff --git a/tests/test_model.py b/tests/test_model.py index 21c8fff..c182c59 100644 --- a/tests/test_model.py +++ b/tests/test_model.py @@ -2,12 +2,13 @@ import pytest import numpy as np from apt.utils.models import SklearnClassifier, SklearnRegressor, ModelOutputType, KerasClassifier, KerasRegressor, \ - BlackboxClassifierPredictions, BlackboxClassifierPredictFunction, is_one_hot, get_nb_classes + BlackboxClassifierPredictions, BlackboxClassifierPredictFunction, is_one_hot, get_nb_classes, XGBoostClassifier from apt.utils.datasets import ArrayDataset, Data, DatasetWithPredictions from apt.utils import dataset_utils from sklearn.tree import DecisionTreeRegressor from sklearn.ensemble import RandomForestClassifier +from xgboost import XGBClassifier from tensorflow.keras.models import Sequential from tensorflow.keras.layers import Dense, Input @@ -90,6 +91,22 @@ def test_keras_regressor(): score = model.score(test) +def test_xgboost_classifier(): + (x_train, y_train), (x_test, y_test) = dataset_utils.get_iris_dataset_np() + underlying_model = XGBClassifier() + underlying_model.fit(x_train, y_train) + model = XGBoostClassifier(underlying_model, ModelOutputType.CLASSIFIER_PROBABILITIES, input_shape=(4,), nb_classes=3) + train = ArrayDataset(x_train, y_train) + test = ArrayDataset(x_test, y_test) + pred = model.predict(test) + assert(pred.shape[0] == x_test.shape[0]) + + score = model.score(test) + assert(0.0 <= score <= 1.0) + + model.fit(train) + + def test_blackbox_classifier(): (x_train, y_train), (x_test, y_test) = dataset_utils.get_iris_dataset_np()