diff --git a/notebooks/membership_inference_anonymization_adult.ipynb b/notebooks/membership_inference_anonymization_adult.ipynb index 4a0ea00..7d8bbb3 100644 --- a/notebooks/membership_inference_anonymization_adult.ipynb +++ b/notebooks/membership_inference_anonymization_adult.ipynb @@ -1,6 +1,7 @@ { "cells": [ { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -8,6 +9,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -21,6 +23,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -29,7 +32,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 3, "metadata": {}, "outputs": [ { @@ -44,18 +47,6 @@ " [ 26. 11. 0. 0. 48.]\n", " [ 27. 9. 0. 0. 40.]]\n" ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/var/folders/9b/qbtw28w53355cvpjs4qn83yc0000gn/T/ipykernel_85828/3975777015.py:22: DeprecationWarning: `np.int` is a deprecated alias for the builtin `int`. To silence this warning, use `int` by itself. Doing this will not modify any behavior and is safe. When replacing `np.int`, you may wish to use e.g. `np.int64` or `np.int32` to specify the precision. If you wish to review your current use, check the release note link for additional information.\n", - "Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations\n", - " y_train = y_train.astype(np.int)\n", - "/var/folders/9b/qbtw28w53355cvpjs4qn83yc0000gn/T/ipykernel_85828/3975777015.py:26: DeprecationWarning: `np.int` is a deprecated alias for the builtin `int`. To silence this warning, use `int` by itself. Doing this will not modify any behavior and is safe. When replacing `np.int`, you may wish to use e.g. `np.int64` or `np.int32` to specify the precision. If you wish to review your current use, check the release note link for additional information.\n", - "Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations\n", - " y_test = y_test.astype(np.int)\n" - ] } ], "source": [ @@ -80,11 +71,11 @@ "\n", "y_train[y_train == '<=50K'] = 0\n", "y_train[y_train == '>50K'] = 1\n", - "y_train = y_train.astype(np.int)\n", + "y_train = y_train.astype(int)\n", "\n", "y_test[y_test == '<=50K'] = 0\n", "y_test[y_test == '>50K'] = 1\n", - "y_test = y_test.astype(np.int)\n", + "y_test = y_test.astype(int)\n", "\n", "# get balanced dataset\n", "x_train = x_train[:x_test.shape[0]]\n", @@ -94,6 +85,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -102,14 +94,22 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Base model accuracy: 0.8074442601805786\n" + "Base model accuracy: 0.8076285240464345\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/mayaa/Development/GitHub/aiprivacy/ai-privacy-toolkit/venv1/lib/python3.8/site-packages/sklearn/utils/deprecation.py:103: FutureWarning: The attribute `n_features_` is deprecated in 1.0 and will be removed in 1.2. Use `n_features_in_` instead.\n", + " warnings.warn(msg, category=FutureWarning)\n" ] } ], @@ -128,6 +128,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -138,18 +139,9 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 5, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/olasaadi/PycharmProjects/ai-privacy-toolkit-internal/venv/lib/python3.8/site-packages/art/attacks/inference/membership_inference/black_box.py:262: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n", - " self.attack_model.fit(np.c_[x_1, x_2], y_ready) # type: ignore\n" - ] - } - ], + "outputs": [], "source": [ "from art.attacks.inference.membership_inference import MembershipInferenceBlackBox\n", "\n", @@ -167,6 +159,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -175,14 +168,14 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "0.545264709495148\n" + "0.5460017196904557\n" ] } ], @@ -198,6 +191,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -205,6 +199,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -218,7 +213,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 7, "metadata": {}, "outputs": [ { @@ -226,20 +221,20 @@ "output_type": "stream", "text": [ "[[38. 13. 0. 0. 40.]\n", - " [57. 13. 0. 0. 30.]\n", - " [37. 9. 0. 0. 40.]\n", + " [46. 13. 0. 0. 35.]\n", + " [28. 9. 0. 0. 40.]\n", " ...\n", " [26. 13. 0. 0. 40.]\n", - " [29. 10. 0. 0. 50.]\n", - " [25. 9. 0. 0. 40.]]\n" + " [27. 10. 0. 0. 50.]\n", + " [28. 9. 0. 0. 40.]]\n" ] } ], "source": [ - "from apt.utils.datasets import ArrayDataset\n", "import os\n", "import sys\n", "sys.path.insert(0, os.path.abspath('..'))\n", + "from apt.utils.datasets import ArrayDataset\n", "from apt.anonymization import Anonymize\n", "\n", "# QI = (age, education-num, capital-gain, hours-per-week)\n", @@ -251,14 +246,16 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 8, "metadata": {}, "outputs": [ { "data": { - "text/plain": "6739" + "text/plain": [ + "6739" + ] }, - "execution_count": 11, + "execution_count": 8, "metadata": {}, "output_type": "execute_result" } @@ -270,14 +267,16 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 9, "metadata": {}, "outputs": [ { "data": { - "text/plain": "658" + "text/plain": [ + "401" + ] }, - "execution_count": 12, + "execution_count": 9, "metadata": {}, "output_type": "execute_result" } @@ -288,6 +287,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -296,14 +296,22 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 10, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Anonymized model accuracy: 0.83078434985566\n" + "Anonymized model accuracy: 0.826914808672686\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/mayaa/Development/GitHub/aiprivacy/ai-privacy-toolkit/venv1/lib/python3.8/site-packages/sklearn/utils/deprecation.py:103: FutureWarning: The attribute `n_features_` is deprecated in 1.0 and will be removed in 1.2. Use `n_features_in_` instead.\n", + " warnings.warn(msg, category=FutureWarning)\n" ] } ], @@ -317,6 +325,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -326,22 +335,14 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 11, "metadata": {}, "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/olasaadi/PycharmProjects/ai-privacy-toolkit-internal/venv/lib/python3.8/site-packages/art/attacks/inference/membership_inference/black_box.py:262: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n", - " self.attack_model.fit(np.c_[x_1, x_2], y_ready) # type: ignore\n" - ] - }, { "name": "stdout", "output_type": "stream", "text": [ - "0.5047291487532244\n" + "0.49692912418621793\n" ] } ], @@ -363,6 +364,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -371,15 +373,15 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 12, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "(0.5312420517168291, 0.7696843139663432)\n", - "(0.5048372911169745, 0.4935511607910576)\n" + "(0.5316007088009451, 0.7738607050730868)\n", + "(0.4971184877823882, 0.5297874953936863)\n" ] } ], @@ -417,6 +419,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -440,9 +443,9 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.3" + "version": "3.8.10" } }, "nbformat": 4, "nbformat_minor": 2 -} \ No newline at end of file +} diff --git a/notebooks/minimization_adult.ipynb b/notebooks/minimization_adult.ipynb index a5a56ab..e3b0c4a 100644 --- a/notebooks/minimization_adult.ipynb +++ b/notebooks/minimization_adult.ipynb @@ -66,11 +66,11 @@ "\n", "y_train[y_train == '<=50K'] = 0\n", "y_train[y_train == '>50K'] = 1\n", - "y_train = y_train.astype(np.int)\n", + "y_train = y_train.astype(int)\n", "\n", "y_test[y_test == '<=50K'] = 0\n", "y_test[y_test == '>50K'] = 1\n", - "y_test = y_test.astype(np.int)\n", + "y_test = y_test.astype(int)\n", "\n", "print(x_train)" ] @@ -264,4 +264,4 @@ }, "nbformat": 4, "nbformat_minor": 2 -} +} \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 2421067..4af8475 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,7 @@ numpy==1.22.0 pandas~=1.1.0 scipy==1.4.1 -scikit-learn>=0.22.2 +scikit-learn>=0.22.2,<=1.1.3 torch>=1.8.0 tqdm>=4.64.1 matplotlib>=3.7.0 diff --git a/setup.cfg b/setup.cfg index d75c434..b46e9dd 100644 --- a/setup.cfg +++ b/setup.cfg @@ -30,3 +30,7 @@ builtins = keras,xgboost exclude = venv venv1 + +[tool:pytest] +log_cli = True +log-cli-level = INFO diff --git a/tests/test_minimizer.py b/tests/test_minimizer.py index bd2f422..e050937 100644 --- a/tests/test_minimizer.py +++ b/tests/test_minimizer.py @@ -4,7 +4,7 @@ import pandas as pd from sklearn.compose import ColumnTransformer -from sklearn.datasets import load_boston, load_diabetes +from sklearn.datasets import load_diabetes from sklearn.impute import SimpleImputer from sklearn.model_selection import train_test_split from sklearn.pipeline import Pipeline @@ -24,11 +24,11 @@ tf.compat.v1.disable_eager_execution() @pytest.fixture -def data(): - return load_boston(return_X_y=True) +def dataset(): + return load_diabetes() -def test_minimizer_params(data): +def test_minimizer_params(): # Assume two features, age and height, and boolean label cells = [{"id": 1, "ranges": {"age": {"start": None, "end": 38}, "height": {"start": None, "end": 170}}, "label": 0, 'categories': {}, "representative": {"age": 26, "height": 149}}, @@ -54,7 +54,7 @@ def test_minimizer_params(data): gen.transform(dataset=ArrayDataset(X, features_names=features)) -def test_minimizer_fit(data): +def test_minimizer_fit(): features = ['age', 'height'] X = np.array([[23, 165], [45, 158], @@ -108,7 +108,7 @@ def test_minimizer_fit(data): assert ((rel_accuracy >= target_accuracy) or (target_accuracy - rel_accuracy) <= 0.05) -def test_minimizer_fit_pandas(data): +def test_minimizer_fit_pandas(): features = ['age', 'height', 'sex', 'ola'] X = [[23, 165, 'f', 'aa'], [45, 158, 'f', 'aa'], @@ -179,7 +179,7 @@ def test_minimizer_fit_pandas(data): assert ((rel_accuracy >= target_accuracy) or (target_accuracy - rel_accuracy) <= 0.05) -def test_minimizer_params_categorical(data): +def test_minimizer_params_categorical(): # Assume three features, age, sex and height, and boolean label cells = [{'id': 1, 'label': 0, 'ranges': {'age': {'start': None, 'end': None}}, 'categories': {'sex': ['f', 'm']}, 'hist': [2, 0], @@ -246,7 +246,7 @@ def test_minimizer_params_categorical(data): assert ((rel_accuracy >= target_accuracy) or (target_accuracy - rel_accuracy) <= 0.05) -def test_minimizer_fit_QI(data): +def test_minimizer_fit_QI(): features = ['age', 'height', 'weight'] X = np.array([[23, 165, 70], [45, 158, 67], @@ -301,7 +301,7 @@ def test_minimizer_fit_QI(data): assert ((rel_accuracy >= target_accuracy) or (target_accuracy - rel_accuracy) <= 0.05) -def test_minimizer_fit_pandas_QI(data): +def test_minimizer_fit_pandas_QI(): features = ['age', 'height', 'weight', 'sex', 'ola'] X = [[23, 165, 65, 'f', 'aa'], [45, 158, 76, 'f', 'aa'], @@ -577,8 +577,7 @@ def test_german_credit_pandas(): assert ((rel_accuracy >= target_accuracy) or (target_accuracy - rel_accuracy) <= 0.05) -def test_regression(): - dataset = load_diabetes() +def test_regression(dataset): x_train, x_test, y_train, y_test = train_test_split(dataset.data, dataset.target, test_size=0.5, random_state=14) base_est = DecisionTreeRegressor(random_state=10, min_samples_split=2) @@ -651,7 +650,7 @@ def test_regression(): assert ((rel_accuracy >= target_accuracy) or (target_accuracy - rel_accuracy) <= 0.05) -def test_X_y(data): +def test_X_y(): features = [0, 1, 2] X = np.array([[23, 165, 70], [45, 158, 67], @@ -705,7 +704,7 @@ def test_X_y(data): assert ((rel_accuracy >= target_accuracy) or (target_accuracy - rel_accuracy) <= 0.05) -def test_X_y_features_names(data): +def test_X_y_features_names(): features = ['age', 'height', 'weight'] X = np.array([[23, 165, 70], [45, 158, 67], @@ -759,7 +758,7 @@ def test_X_y_features_names(data): assert ((rel_accuracy >= target_accuracy) or (target_accuracy - rel_accuracy) <= 0.05) -def test_BaseEstimator_classification(data): +def test_BaseEstimator_classification(): features = ['age', 'height', 'weight', 'sex', 'ola'] X = [[23, 165, 65, 'f', 'aa'], [45, 158, 76, 'f', 'aa'], @@ -833,8 +832,7 @@ def test_BaseEstimator_classification(data): assert ((rel_accuracy >= target_accuracy) or (target_accuracy - rel_accuracy) <= 0.05) -def test_BaseEstimator_regression(): - dataset = load_diabetes() +def test_BaseEstimator_regression(dataset): x_train, x_test, y_train, y_test = train_test_split(dataset.data, dataset.target, test_size=0.5, random_state=14) base_est = DecisionTreeRegressor(random_state=10, min_samples_split=2)