mirror of
https://github.com/IBM/ai-privacy-toolkit.git
synced 2026-04-24 20:36:21 +02:00
Limit scikit-learn version because of API changes (#81)
* Limit scikit-learn versions between 0.22.2 and 1.1.3, remove deprecated load_boston(). * Set pytest configuration option to show test progress in detail. * Change np.int to int according to DeprecationWarning Signed-off-by: Maya Anderson <mayaa@il.ibm.com>
This commit is contained in:
parent
be7d248c33
commit
e9a225501f
5 changed files with 84 additions and 79 deletions
|
|
@ -1,6 +1,7 @@
|
|||
{
|
||||
"cells": [
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
|
|
@ -8,6 +9,7 @@
|
|||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
|
|
@ -21,6 +23,7 @@
|
|||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
|
|
@ -29,7 +32,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
|
|
@ -44,18 +47,6 @@
|
|||
" [ 26. 11. 0. 0. 48.]\n",
|
||||
" [ 27. 9. 0. 0. 40.]]\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"/var/folders/9b/qbtw28w53355cvpjs4qn83yc0000gn/T/ipykernel_85828/3975777015.py:22: DeprecationWarning: `np.int` is a deprecated alias for the builtin `int`. To silence this warning, use `int` by itself. Doing this will not modify any behavior and is safe. When replacing `np.int`, you may wish to use e.g. `np.int64` or `np.int32` to specify the precision. If you wish to review your current use, check the release note link for additional information.\n",
|
||||
"Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations\n",
|
||||
" y_train = y_train.astype(np.int)\n",
|
||||
"/var/folders/9b/qbtw28w53355cvpjs4qn83yc0000gn/T/ipykernel_85828/3975777015.py:26: DeprecationWarning: `np.int` is a deprecated alias for the builtin `int`. To silence this warning, use `int` by itself. Doing this will not modify any behavior and is safe. When replacing `np.int`, you may wish to use e.g. `np.int64` or `np.int32` to specify the precision. If you wish to review your current use, check the release note link for additional information.\n",
|
||||
"Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations\n",
|
||||
" y_test = y_test.astype(np.int)\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
|
|
@ -80,11 +71,11 @@
|
|||
"\n",
|
||||
"y_train[y_train == '<=50K'] = 0\n",
|
||||
"y_train[y_train == '>50K'] = 1\n",
|
||||
"y_train = y_train.astype(np.int)\n",
|
||||
"y_train = y_train.astype(int)\n",
|
||||
"\n",
|
||||
"y_test[y_test == '<=50K'] = 0\n",
|
||||
"y_test[y_test == '>50K'] = 1\n",
|
||||
"y_test = y_test.astype(np.int)\n",
|
||||
"y_test = y_test.astype(int)\n",
|
||||
"\n",
|
||||
"# get balanced dataset\n",
|
||||
"x_train = x_train[:x_test.shape[0]]\n",
|
||||
|
|
@ -94,6 +85,7 @@
|
|||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
|
|
@ -102,14 +94,22 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Base model accuracy: 0.8074442601805786\n"
|
||||
"Base model accuracy: 0.8076285240464345\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"/home/mayaa/Development/GitHub/aiprivacy/ai-privacy-toolkit/venv1/lib/python3.8/site-packages/sklearn/utils/deprecation.py:103: FutureWarning: The attribute `n_features_` is deprecated in 1.0 and will be removed in 1.2. Use `n_features_in_` instead.\n",
|
||||
" warnings.warn(msg, category=FutureWarning)\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
|
|
@ -128,6 +128,7 @@
|
|||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
|
|
@ -138,18 +139,9 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"/Users/olasaadi/PycharmProjects/ai-privacy-toolkit-internal/venv/lib/python3.8/site-packages/art/attacks/inference/membership_inference/black_box.py:262: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n",
|
||||
" self.attack_model.fit(np.c_[x_1, x_2], y_ready) # type: ignore\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from art.attacks.inference.membership_inference import MembershipInferenceBlackBox\n",
|
||||
"\n",
|
||||
|
|
@ -167,6 +159,7 @@
|
|||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
|
|
@ -175,14 +168,14 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"0.545264709495148\n"
|
||||
"0.5460017196904557\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
|
|
@ -198,6 +191,7 @@
|
|||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
|
|
@ -205,6 +199,7 @@
|
|||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
|
|
@ -218,7 +213,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
|
|
@ -226,20 +221,20 @@
|
|||
"output_type": "stream",
|
||||
"text": [
|
||||
"[[38. 13. 0. 0. 40.]\n",
|
||||
" [57. 13. 0. 0. 30.]\n",
|
||||
" [37. 9. 0. 0. 40.]\n",
|
||||
" [46. 13. 0. 0. 35.]\n",
|
||||
" [28. 9. 0. 0. 40.]\n",
|
||||
" ...\n",
|
||||
" [26. 13. 0. 0. 40.]\n",
|
||||
" [29. 10. 0. 0. 50.]\n",
|
||||
" [25. 9. 0. 0. 40.]]\n"
|
||||
" [27. 10. 0. 0. 50.]\n",
|
||||
" [28. 9. 0. 0. 40.]]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from apt.utils.datasets import ArrayDataset\n",
|
||||
"import os\n",
|
||||
"import sys\n",
|
||||
"sys.path.insert(0, os.path.abspath('..'))\n",
|
||||
"from apt.utils.datasets import ArrayDataset\n",
|
||||
"from apt.anonymization import Anonymize\n",
|
||||
"\n",
|
||||
"# QI = (age, education-num, capital-gain, hours-per-week)\n",
|
||||
|
|
@ -251,14 +246,16 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": "6739"
|
||||
"text/plain": [
|
||||
"6739"
|
||||
]
|
||||
},
|
||||
"execution_count": 11,
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
|
|
@ -270,14 +267,16 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": "658"
|
||||
"text/plain": [
|
||||
"401"
|
||||
]
|
||||
},
|
||||
"execution_count": 12,
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
|
|
@ -288,6 +287,7 @@
|
|||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
|
|
@ -296,14 +296,22 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"execution_count": 10,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Anonymized model accuracy: 0.83078434985566\n"
|
||||
"Anonymized model accuracy: 0.826914808672686\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"/home/mayaa/Development/GitHub/aiprivacy/ai-privacy-toolkit/venv1/lib/python3.8/site-packages/sklearn/utils/deprecation.py:103: FutureWarning: The attribute `n_features_` is deprecated in 1.0 and will be removed in 1.2. Use `n_features_in_` instead.\n",
|
||||
" warnings.warn(msg, category=FutureWarning)\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
|
|
@ -317,6 +325,7 @@
|
|||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
|
|
@ -326,22 +335,14 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"execution_count": 11,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"/Users/olasaadi/PycharmProjects/ai-privacy-toolkit-internal/venv/lib/python3.8/site-packages/art/attacks/inference/membership_inference/black_box.py:262: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n",
|
||||
" self.attack_model.fit(np.c_[x_1, x_2], y_ready) # type: ignore\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"0.5047291487532244\n"
|
||||
"0.49692912418621793\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
|
|
@ -363,6 +364,7 @@
|
|||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
|
|
@ -371,15 +373,15 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 15,
|
||||
"execution_count": 12,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"(0.5312420517168291, 0.7696843139663432)\n",
|
||||
"(0.5048372911169745, 0.4935511607910576)\n"
|
||||
"(0.5316007088009451, 0.7738607050730868)\n",
|
||||
"(0.4971184877823882, 0.5297874953936863)\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
|
|
@ -417,6 +419,7 @@
|
|||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
|
|
@ -440,9 +443,9 @@
|
|||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.8.3"
|
||||
"version": "3.8.10"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -66,11 +66,11 @@
|
|||
"\n",
|
||||
"y_train[y_train == '<=50K'] = 0\n",
|
||||
"y_train[y_train == '>50K'] = 1\n",
|
||||
"y_train = y_train.astype(np.int)\n",
|
||||
"y_train = y_train.astype(int)\n",
|
||||
"\n",
|
||||
"y_test[y_test == '<=50K'] = 0\n",
|
||||
"y_test[y_test == '>50K'] = 1\n",
|
||||
"y_test = y_test.astype(np.int)\n",
|
||||
"y_test = y_test.astype(int)\n",
|
||||
"\n",
|
||||
"print(x_train)"
|
||||
]
|
||||
|
|
@ -264,4 +264,4 @@
|
|||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
}
|
||||
|
|
@ -1,7 +1,7 @@
|
|||
numpy==1.22.0
|
||||
pandas~=1.1.0
|
||||
scipy==1.4.1
|
||||
scikit-learn>=0.22.2
|
||||
scikit-learn>=0.22.2,<=1.1.3
|
||||
torch>=1.8.0
|
||||
tqdm>=4.64.1
|
||||
matplotlib>=3.7.0
|
||||
|
|
|
|||
|
|
@ -30,3 +30,7 @@ builtins = keras,xgboost
|
|||
exclude =
|
||||
venv
|
||||
venv1
|
||||
|
||||
[tool:pytest]
|
||||
log_cli = True
|
||||
log-cli-level = INFO
|
||||
|
|
|
|||
|
|
@ -4,7 +4,7 @@ import pandas as pd
|
|||
|
||||
from sklearn.compose import ColumnTransformer
|
||||
|
||||
from sklearn.datasets import load_boston, load_diabetes
|
||||
from sklearn.datasets import load_diabetes
|
||||
from sklearn.impute import SimpleImputer
|
||||
from sklearn.model_selection import train_test_split
|
||||
from sklearn.pipeline import Pipeline
|
||||
|
|
@ -24,11 +24,11 @@ tf.compat.v1.disable_eager_execution()
|
|||
|
||||
|
||||
@pytest.fixture
|
||||
def data():
|
||||
return load_boston(return_X_y=True)
|
||||
def dataset():
|
||||
return load_diabetes()
|
||||
|
||||
|
||||
def test_minimizer_params(data):
|
||||
def test_minimizer_params():
|
||||
# Assume two features, age and height, and boolean label
|
||||
cells = [{"id": 1, "ranges": {"age": {"start": None, "end": 38}, "height": {"start": None, "end": 170}}, "label": 0,
|
||||
'categories': {}, "representative": {"age": 26, "height": 149}},
|
||||
|
|
@ -54,7 +54,7 @@ def test_minimizer_params(data):
|
|||
gen.transform(dataset=ArrayDataset(X, features_names=features))
|
||||
|
||||
|
||||
def test_minimizer_fit(data):
|
||||
def test_minimizer_fit():
|
||||
features = ['age', 'height']
|
||||
X = np.array([[23, 165],
|
||||
[45, 158],
|
||||
|
|
@ -108,7 +108,7 @@ def test_minimizer_fit(data):
|
|||
assert ((rel_accuracy >= target_accuracy) or (target_accuracy - rel_accuracy) <= 0.05)
|
||||
|
||||
|
||||
def test_minimizer_fit_pandas(data):
|
||||
def test_minimizer_fit_pandas():
|
||||
features = ['age', 'height', 'sex', 'ola']
|
||||
X = [[23, 165, 'f', 'aa'],
|
||||
[45, 158, 'f', 'aa'],
|
||||
|
|
@ -179,7 +179,7 @@ def test_minimizer_fit_pandas(data):
|
|||
assert ((rel_accuracy >= target_accuracy) or (target_accuracy - rel_accuracy) <= 0.05)
|
||||
|
||||
|
||||
def test_minimizer_params_categorical(data):
|
||||
def test_minimizer_params_categorical():
|
||||
# Assume three features, age, sex and height, and boolean label
|
||||
cells = [{'id': 1, 'label': 0, 'ranges': {'age': {'start': None, 'end': None}},
|
||||
'categories': {'sex': ['f', 'm']}, 'hist': [2, 0],
|
||||
|
|
@ -246,7 +246,7 @@ def test_minimizer_params_categorical(data):
|
|||
assert ((rel_accuracy >= target_accuracy) or (target_accuracy - rel_accuracy) <= 0.05)
|
||||
|
||||
|
||||
def test_minimizer_fit_QI(data):
|
||||
def test_minimizer_fit_QI():
|
||||
features = ['age', 'height', 'weight']
|
||||
X = np.array([[23, 165, 70],
|
||||
[45, 158, 67],
|
||||
|
|
@ -301,7 +301,7 @@ def test_minimizer_fit_QI(data):
|
|||
assert ((rel_accuracy >= target_accuracy) or (target_accuracy - rel_accuracy) <= 0.05)
|
||||
|
||||
|
||||
def test_minimizer_fit_pandas_QI(data):
|
||||
def test_minimizer_fit_pandas_QI():
|
||||
features = ['age', 'height', 'weight', 'sex', 'ola']
|
||||
X = [[23, 165, 65, 'f', 'aa'],
|
||||
[45, 158, 76, 'f', 'aa'],
|
||||
|
|
@ -577,8 +577,7 @@ def test_german_credit_pandas():
|
|||
assert ((rel_accuracy >= target_accuracy) or (target_accuracy - rel_accuracy) <= 0.05)
|
||||
|
||||
|
||||
def test_regression():
|
||||
dataset = load_diabetes()
|
||||
def test_regression(dataset):
|
||||
x_train, x_test, y_train, y_test = train_test_split(dataset.data, dataset.target, test_size=0.5, random_state=14)
|
||||
|
||||
base_est = DecisionTreeRegressor(random_state=10, min_samples_split=2)
|
||||
|
|
@ -651,7 +650,7 @@ def test_regression():
|
|||
assert ((rel_accuracy >= target_accuracy) or (target_accuracy - rel_accuracy) <= 0.05)
|
||||
|
||||
|
||||
def test_X_y(data):
|
||||
def test_X_y():
|
||||
features = [0, 1, 2]
|
||||
X = np.array([[23, 165, 70],
|
||||
[45, 158, 67],
|
||||
|
|
@ -705,7 +704,7 @@ def test_X_y(data):
|
|||
assert ((rel_accuracy >= target_accuracy) or (target_accuracy - rel_accuracy) <= 0.05)
|
||||
|
||||
|
||||
def test_X_y_features_names(data):
|
||||
def test_X_y_features_names():
|
||||
features = ['age', 'height', 'weight']
|
||||
X = np.array([[23, 165, 70],
|
||||
[45, 158, 67],
|
||||
|
|
@ -759,7 +758,7 @@ def test_X_y_features_names(data):
|
|||
assert ((rel_accuracy >= target_accuracy) or (target_accuracy - rel_accuracy) <= 0.05)
|
||||
|
||||
|
||||
def test_BaseEstimator_classification(data):
|
||||
def test_BaseEstimator_classification():
|
||||
features = ['age', 'height', 'weight', 'sex', 'ola']
|
||||
X = [[23, 165, 65, 'f', 'aa'],
|
||||
[45, 158, 76, 'f', 'aa'],
|
||||
|
|
@ -833,8 +832,7 @@ def test_BaseEstimator_classification(data):
|
|||
assert ((rel_accuracy >= target_accuracy) or (target_accuracy - rel_accuracy) <= 0.05)
|
||||
|
||||
|
||||
def test_BaseEstimator_regression():
|
||||
dataset = load_diabetes()
|
||||
def test_BaseEstimator_regression(dataset):
|
||||
x_train, x_test, y_train, y_test = train_test_split(dataset.data, dataset.target, test_size=0.5, random_state=14)
|
||||
|
||||
base_est = DecisionTreeRegressor(random_state=10, min_samples_split=2)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue