mirror of
https://github.com/IBM/ai-privacy-toolkit.git
synced 2026-04-30 23:36:22 +02:00
Add support for xgboost XGBClassifier (#53)
This commit is contained in:
parent
a13415ad67
commit
a9e2a35e18
3 changed files with 106 additions and 1 deletions
|
|
@ -3,3 +3,4 @@ from apt.utils.models.model import Model, BlackboxClassifier, ModelOutputType, S
|
||||||
check_correct_model_output
|
check_correct_model_output
|
||||||
from apt.utils.models.sklearn_model import SklearnModel, SklearnClassifier, SklearnRegressor
|
from apt.utils.models.sklearn_model import SklearnModel, SklearnClassifier, SklearnRegressor
|
||||||
from apt.utils.models.keras_model import KerasClassifier, KerasRegressor
|
from apt.utils.models.keras_model import KerasClassifier, KerasRegressor
|
||||||
|
from apt.utils.models.xgboost_model import XGBoostClassifier
|
||||||
|
|
|
||||||
87
apt/utils/models/xgboost_model.py
Normal file
87
apt/utils/models/xgboost_model.py
Normal file
|
|
@ -0,0 +1,87 @@
|
||||||
|
from typing import Optional, Tuple
|
||||||
|
|
||||||
|
from apt.utils.models import Model, ModelOutputType, ScoringMethod, check_correct_model_output, is_one_hot
|
||||||
|
from apt.utils.datasets import Dataset, OUTPUT_DATA_ARRAY_TYPE
|
||||||
|
|
||||||
|
from xgboost import XGBClassifier
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
from art.estimators.classification.xgboost import XGBoostClassifier as ArtXGBoostClassifier
|
||||||
|
|
||||||
|
|
||||||
|
class XGBoostModel(Model):
|
||||||
|
"""
|
||||||
|
Wrapper class for xgboost models.
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
class XGBoostClassifier(XGBoostModel):
|
||||||
|
"""
|
||||||
|
Wrapper class for xgboost classification models.
|
||||||
|
|
||||||
|
:param model: The original xgboost model object. Must be fit.
|
||||||
|
:type model: Booster or XGBClassifier object
|
||||||
|
:param output_type: The type of output the model yields (vector/label only)
|
||||||
|
:type output_type: `ModelOutputType`
|
||||||
|
:param input_shape: Shape of input to the model.
|
||||||
|
:type input_shape: Tuple[int, ...]
|
||||||
|
:param nb_classes: Number of prediction classes of the model.
|
||||||
|
:type nb_classes: int
|
||||||
|
:param black_box_access: Boolean describing the type of deployment of the model (when in production).
|
||||||
|
Set to True if the model is only available via query (API) access, i.e.,
|
||||||
|
only the outputs of the model are exposed, and False if the model internals
|
||||||
|
are also available. Default is True.
|
||||||
|
:type black_box_access: boolean, optional
|
||||||
|
:param unlimited_queries: If black_box_access is True, this boolean indicates whether a user can perform
|
||||||
|
unlimited queries to the model API or whether there is a limit to the number of
|
||||||
|
queries that can be submitted. Default is True.
|
||||||
|
:type unlimited_queries: boolean, optional
|
||||||
|
"""
|
||||||
|
def __init__(self, model: XGBClassifier, output_type: ModelOutputType, input_shape: Tuple[int, ...],
|
||||||
|
nb_classes: int,black_box_access: Optional[bool] = True,
|
||||||
|
unlimited_queries: Optional[bool] = True, **kwargs):
|
||||||
|
super().__init__(model, output_type, black_box_access, unlimited_queries, **kwargs)
|
||||||
|
self._art_model = ArtXGBoostClassifier(model, nb_features=input_shape[0], nb_classes=nb_classes)
|
||||||
|
self.nb_classes = nb_classes
|
||||||
|
|
||||||
|
def fit(self, train_data: Dataset, **kwargs) -> None:
|
||||||
|
"""
|
||||||
|
Fit the model using the training data.
|
||||||
|
|
||||||
|
:param train_data: Training data. Labels are expected to either be one-hot encoded or a 1D-array of categorical
|
||||||
|
labels (consecutive integers starting at 0).
|
||||||
|
:type train_data: `Dataset`
|
||||||
|
:return: None
|
||||||
|
"""
|
||||||
|
self._art_model._model.fit(train_data.get_samples(), train_data.get_labels())
|
||||||
|
|
||||||
|
def predict(self, x: Dataset, **kwargs) -> OUTPUT_DATA_ARRAY_TYPE:
|
||||||
|
"""
|
||||||
|
Perform predictions using the model for input `x`.
|
||||||
|
|
||||||
|
:param x: Input samples.
|
||||||
|
:type x: `Dataset`
|
||||||
|
:return: Predictions from the model as numpy array (class probabilities, if supported).
|
||||||
|
"""
|
||||||
|
predictions = self._art_model.predict(x.get_samples(), **kwargs)
|
||||||
|
check_correct_model_output(predictions, self.output_type)
|
||||||
|
return predictions
|
||||||
|
|
||||||
|
def score(self, test_data: Dataset, scoring_method: Optional[ScoringMethod] = ScoringMethod.ACCURACY, **kwargs):
|
||||||
|
"""
|
||||||
|
Score the model using test data.
|
||||||
|
|
||||||
|
:param test_data: Test data.
|
||||||
|
:type train_data: `Dataset`
|
||||||
|
:return: the score as float (for classifiers, between 0 and 1)
|
||||||
|
"""
|
||||||
|
y = test_data.get_labels()
|
||||||
|
predicted = self.predict(test_data)
|
||||||
|
if is_one_hot(predicted):
|
||||||
|
predicted = np.argmax(predicted, axis=1)
|
||||||
|
if is_one_hot(y):
|
||||||
|
y = np.argmax(y, axis=1)
|
||||||
|
if scoring_method == ScoringMethod.ACCURACY:
|
||||||
|
return np.count_nonzero(y == predicted) / predicted.shape[0]
|
||||||
|
else:
|
||||||
|
raise NotImplementedError
|
||||||
|
|
@ -2,12 +2,13 @@ import pytest
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
from apt.utils.models import SklearnClassifier, SklearnRegressor, ModelOutputType, KerasClassifier, KerasRegressor, \
|
from apt.utils.models import SklearnClassifier, SklearnRegressor, ModelOutputType, KerasClassifier, KerasRegressor, \
|
||||||
BlackboxClassifierPredictions, BlackboxClassifierPredictFunction, is_one_hot, get_nb_classes
|
BlackboxClassifierPredictions, BlackboxClassifierPredictFunction, is_one_hot, get_nb_classes, XGBoostClassifier
|
||||||
from apt.utils.datasets import ArrayDataset, Data, DatasetWithPredictions
|
from apt.utils.datasets import ArrayDataset, Data, DatasetWithPredictions
|
||||||
from apt.utils import dataset_utils
|
from apt.utils import dataset_utils
|
||||||
|
|
||||||
from sklearn.tree import DecisionTreeRegressor
|
from sklearn.tree import DecisionTreeRegressor
|
||||||
from sklearn.ensemble import RandomForestClassifier
|
from sklearn.ensemble import RandomForestClassifier
|
||||||
|
from xgboost import XGBClassifier
|
||||||
|
|
||||||
from tensorflow.keras.models import Sequential
|
from tensorflow.keras.models import Sequential
|
||||||
from tensorflow.keras.layers import Dense, Input
|
from tensorflow.keras.layers import Dense, Input
|
||||||
|
|
@ -90,6 +91,22 @@ def test_keras_regressor():
|
||||||
score = model.score(test)
|
score = model.score(test)
|
||||||
|
|
||||||
|
|
||||||
|
def test_xgboost_classifier():
|
||||||
|
(x_train, y_train), (x_test, y_test) = dataset_utils.get_iris_dataset_np()
|
||||||
|
underlying_model = XGBClassifier()
|
||||||
|
underlying_model.fit(x_train, y_train)
|
||||||
|
model = XGBoostClassifier(underlying_model, ModelOutputType.CLASSIFIER_PROBABILITIES, input_shape=(4,), nb_classes=3)
|
||||||
|
train = ArrayDataset(x_train, y_train)
|
||||||
|
test = ArrayDataset(x_test, y_test)
|
||||||
|
pred = model.predict(test)
|
||||||
|
assert(pred.shape[0] == x_test.shape[0])
|
||||||
|
|
||||||
|
score = model.score(test)
|
||||||
|
assert(0.0 <= score <= 1.0)
|
||||||
|
|
||||||
|
model.fit(train)
|
||||||
|
|
||||||
|
|
||||||
def test_blackbox_classifier():
|
def test_blackbox_classifier():
|
||||||
(x_train, y_train), (x_test, y_test) = dataset_utils.get_iris_dataset_np()
|
(x_train, y_train), (x_test, y_test) = dataset_utils.get_iris_dataset_np()
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue