Squashed commit of the following:

commit d53818644e Author: olasaadi <92303887+olasaadi@users.noreply.github.com> Date: Mon Mar 7 20:12:55 2022 +0200 Build the dt on all features anon (#23) * add param to build the DT on all features and not just on QI * one-hot encoding only for categorical features commit c47819a031 Author: abigailt <abigailt@il.ibm.com> Date: Wed Feb 23 19:40:11 2022 +0200 Update docs commit 7e2ce7fe96 Merge: 7fbd1e4 752871d Author: abigailt <abigailt@il.ibm.com> Date: Wed Feb 23 19:26:44 2022 +0200 Merge remote-tracking branch 'origin/main' into main commit 7fbd1e4b90 Author: abigailt <abigailt@il.ibm.com> Date: Wed Feb 23 19:22:54 2022 +0200 Update version and docs commit 752871dd0c Author: olasaadi <92303887+olasaadi@users.noreply.github.com> Date: Wed Feb 23 14:57:12 2022 +0200 add minimization notebook (#22) * add german credit notebook to showcase new features (minimize only some features and categorical features) * add notebook to show minimization data on a regression problem
2026-06-17 15:35:13 +02:00 · 2022-04-25 17:39:30 +03:00 · 2022-04-25 17:39:30 +03:00 · a37ff06df8
commit a37ff06df8
parent fb2413c4aa
12 changed files with 753 additions and 69 deletions
--- a/apt/utils/dataset_utils.py
+++ b/apt/utils/dataset_utils.py
@ -18,14 +18,14 @@ def _load_iris(test_set_size: float = 0.3):
    return (x_train, y_train), (x_test, y_test)


-def get_iris_dataset():
+def get_iris_dataset(test_set: float = 0.3):
    """
    Loads the Iris dataset from scikit-learn.

    :param test_set: Proportion of the data to use as validation split (value between 0 and 1).
    :return: Entire dataset and labels as numpy array.
    """
-    return _load_iris()
+    return _load_iris(test_set)


 def _load_diabetes(test_set_size: float = 0.3):
@ -54,6 +54,7 @@ def get_german_credit_dataset(test_set: float = 0.3):
    """
    Loads the UCI German_credit dataset from `tests/datasets/german` or downloads it if necessary.

+    :param test_set: Proportion of the data to use as validation split (value between 0 and 1).
    :return: Dataset and labels as pandas dataframes.
    """