mirror of
https://github.com/IBM/ai-privacy-toolkit.git
synced 2026-04-25 04:46:21 +02:00
Add support for xgboost XGBClassifier (#53)
This commit is contained in:
parent
a13415ad67
commit
a9e2a35e18
3 changed files with 106 additions and 1 deletions
|
|
@ -3,3 +3,4 @@ from apt.utils.models.model import Model, BlackboxClassifier, ModelOutputType, S
|
|||
check_correct_model_output
|
||||
from apt.utils.models.sklearn_model import SklearnModel, SklearnClassifier, SklearnRegressor
|
||||
from apt.utils.models.keras_model import KerasClassifier, KerasRegressor
|
||||
from apt.utils.models.xgboost_model import XGBoostClassifier
|
||||
|
|
|
|||
87
apt/utils/models/xgboost_model.py
Normal file
87
apt/utils/models/xgboost_model.py
Normal file
|
|
@ -0,0 +1,87 @@
|
|||
from typing import Optional, Tuple
|
||||
|
||||
from apt.utils.models import Model, ModelOutputType, ScoringMethod, check_correct_model_output, is_one_hot
|
||||
from apt.utils.datasets import Dataset, OUTPUT_DATA_ARRAY_TYPE
|
||||
|
||||
from xgboost import XGBClassifier
|
||||
import numpy as np
|
||||
|
||||
from art.estimators.classification.xgboost import XGBoostClassifier as ArtXGBoostClassifier
|
||||
|
||||
|
||||
class XGBoostModel(Model):
|
||||
"""
|
||||
Wrapper class for xgboost models.
|
||||
"""
|
||||
|
||||
|
||||
class XGBoostClassifier(XGBoostModel):
|
||||
"""
|
||||
Wrapper class for xgboost classification models.
|
||||
|
||||
:param model: The original xgboost model object. Must be fit.
|
||||
:type model: Booster or XGBClassifier object
|
||||
:param output_type: The type of output the model yields (vector/label only)
|
||||
:type output_type: `ModelOutputType`
|
||||
:param input_shape: Shape of input to the model.
|
||||
:type input_shape: Tuple[int, ...]
|
||||
:param nb_classes: Number of prediction classes of the model.
|
||||
:type nb_classes: int
|
||||
:param black_box_access: Boolean describing the type of deployment of the model (when in production).
|
||||
Set to True if the model is only available via query (API) access, i.e.,
|
||||
only the outputs of the model are exposed, and False if the model internals
|
||||
are also available. Default is True.
|
||||
:type black_box_access: boolean, optional
|
||||
:param unlimited_queries: If black_box_access is True, this boolean indicates whether a user can perform
|
||||
unlimited queries to the model API or whether there is a limit to the number of
|
||||
queries that can be submitted. Default is True.
|
||||
:type unlimited_queries: boolean, optional
|
||||
"""
|
||||
def __init__(self, model: XGBClassifier, output_type: ModelOutputType, input_shape: Tuple[int, ...],
|
||||
nb_classes: int,black_box_access: Optional[bool] = True,
|
||||
unlimited_queries: Optional[bool] = True, **kwargs):
|
||||
super().__init__(model, output_type, black_box_access, unlimited_queries, **kwargs)
|
||||
self._art_model = ArtXGBoostClassifier(model, nb_features=input_shape[0], nb_classes=nb_classes)
|
||||
self.nb_classes = nb_classes
|
||||
|
||||
def fit(self, train_data: Dataset, **kwargs) -> None:
|
||||
"""
|
||||
Fit the model using the training data.
|
||||
|
||||
:param train_data: Training data. Labels are expected to either be one-hot encoded or a 1D-array of categorical
|
||||
labels (consecutive integers starting at 0).
|
||||
:type train_data: `Dataset`
|
||||
:return: None
|
||||
"""
|
||||
self._art_model._model.fit(train_data.get_samples(), train_data.get_labels())
|
||||
|
||||
def predict(self, x: Dataset, **kwargs) -> OUTPUT_DATA_ARRAY_TYPE:
|
||||
"""
|
||||
Perform predictions using the model for input `x`.
|
||||
|
||||
:param x: Input samples.
|
||||
:type x: `Dataset`
|
||||
:return: Predictions from the model as numpy array (class probabilities, if supported).
|
||||
"""
|
||||
predictions = self._art_model.predict(x.get_samples(), **kwargs)
|
||||
check_correct_model_output(predictions, self.output_type)
|
||||
return predictions
|
||||
|
||||
def score(self, test_data: Dataset, scoring_method: Optional[ScoringMethod] = ScoringMethod.ACCURACY, **kwargs):
|
||||
"""
|
||||
Score the model using test data.
|
||||
|
||||
:param test_data: Test data.
|
||||
:type train_data: `Dataset`
|
||||
:return: the score as float (for classifiers, between 0 and 1)
|
||||
"""
|
||||
y = test_data.get_labels()
|
||||
predicted = self.predict(test_data)
|
||||
if is_one_hot(predicted):
|
||||
predicted = np.argmax(predicted, axis=1)
|
||||
if is_one_hot(y):
|
||||
y = np.argmax(y, axis=1)
|
||||
if scoring_method == ScoringMethod.ACCURACY:
|
||||
return np.count_nonzero(y == predicted) / predicted.shape[0]
|
||||
else:
|
||||
raise NotImplementedError
|
||||
|
|
@ -2,12 +2,13 @@ import pytest
|
|||
import numpy as np
|
||||
|
||||
from apt.utils.models import SklearnClassifier, SklearnRegressor, ModelOutputType, KerasClassifier, KerasRegressor, \
|
||||
BlackboxClassifierPredictions, BlackboxClassifierPredictFunction, is_one_hot, get_nb_classes
|
||||
BlackboxClassifierPredictions, BlackboxClassifierPredictFunction, is_one_hot, get_nb_classes, XGBoostClassifier
|
||||
from apt.utils.datasets import ArrayDataset, Data, DatasetWithPredictions
|
||||
from apt.utils import dataset_utils
|
||||
|
||||
from sklearn.tree import DecisionTreeRegressor
|
||||
from sklearn.ensemble import RandomForestClassifier
|
||||
from xgboost import XGBClassifier
|
||||
|
||||
from tensorflow.keras.models import Sequential
|
||||
from tensorflow.keras.layers import Dense, Input
|
||||
|
|
@ -90,6 +91,22 @@ def test_keras_regressor():
|
|||
score = model.score(test)
|
||||
|
||||
|
||||
def test_xgboost_classifier():
|
||||
(x_train, y_train), (x_test, y_test) = dataset_utils.get_iris_dataset_np()
|
||||
underlying_model = XGBClassifier()
|
||||
underlying_model.fit(x_train, y_train)
|
||||
model = XGBoostClassifier(underlying_model, ModelOutputType.CLASSIFIER_PROBABILITIES, input_shape=(4,), nb_classes=3)
|
||||
train = ArrayDataset(x_train, y_train)
|
||||
test = ArrayDataset(x_test, y_test)
|
||||
pred = model.predict(test)
|
||||
assert(pred.shape[0] == x_test.shape[0])
|
||||
|
||||
score = model.score(test)
|
||||
assert(0.0 <= score <= 1.0)
|
||||
|
||||
model.fit(train)
|
||||
|
||||
|
||||
def test_blackbox_classifier():
|
||||
(x_train, y_train), (x_test, y_test) = dataset_utils.get_iris_dataset_np()
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue