Squashed commit of the following:

commit d53818644e
Author: olasaadi <92303887+olasaadi@users.noreply.github.com>
Date:   Mon Mar 7 20:12:55 2022 +0200

    Build the dt on all features anon (#23)

    * add param to build the DT on all features and not just on QI
    * one-hot encoding only for categorical features

commit c47819a031
Author: abigailt <abigailt@il.ibm.com>
Date:   Wed Feb 23 19:40:11 2022 +0200

    Update docs

commit 7e2ce7fe96
Merge: 7fbd1e4 752871d
Author: abigailt <abigailt@il.ibm.com>
Date:   Wed Feb 23 19:26:44 2022 +0200

    Merge remote-tracking branch 'origin/main' into main

commit 7fbd1e4b90
Author: abigailt <abigailt@il.ibm.com>
Date:   Wed Feb 23 19:22:54 2022 +0200

    Update version and docs

commit 752871dd0c
Author: olasaadi <92303887+olasaadi@users.noreply.github.com>
Date:   Wed Feb 23 14:57:12 2022 +0200

    add minimization notebook (#22)

    * add german credit notebook to showcase new features (minimize only some features and categorical features)

    * add notebook to show minimization data on a regression problem
This commit is contained in:
abigailt 2022-04-25 17:39:30 +03:00
parent fb2413c4aa
commit a37ff06df8
12 changed files with 753 additions and 69 deletions

View file

@ -22,6 +22,7 @@ from apt.utils.models import Model, SklearnRegressor, ModelOutputType, SklearnCl
class GeneralizeToRepresentative(BaseEstimator, MetaEstimatorMixin, TransformerMixin):
""" A transformer that generalizes data to representative points.
Learns data generalizations based on an original model's predictions
and a target accuracy. Once the generalizations are learned, can
receive one or more data records and transform them to representative
@ -58,6 +59,10 @@ class GeneralizeToRepresentative(BaseEstimator, MetaEstimatorMixin, TransformerM
The required method to train data set for minimizing. Default is
to train the tree just on the features that are given as
features_to_minimize.
is_regression : Bool, optional
Whether the model is a regression model or not (if False, assumes
a classification model). Default is False.
Attributes
----------
features_ : list of str
@ -69,8 +74,6 @@ class GeneralizeToRepresentative(BaseEstimator, MetaEstimatorMixin, TransformerM
as measured on the training data.
generalizations_ : object
The generalizations that were learned (actual feature ranges).
Notes
-----
"""
def __init__(self, estimator: Union[BaseEstimator, Model] = None, target_accuracy: float = 0.998,
@ -95,11 +98,13 @@ class GeneralizeToRepresentative(BaseEstimator, MetaEstimatorMixin, TransformerM
def get_params(self, deep=True):
"""Get parameters for this estimator.
Parameters
----------
deep : boolean, optional
If True, will return the parameters for this estimator and contained
subobjects that are estimators.
Returns
-------
params : mapping of string to any
@ -116,6 +121,7 @@ class GeneralizeToRepresentative(BaseEstimator, MetaEstimatorMixin, TransformerM
def set_params(self, **params):
"""Set the parameters of this estimator.
Returns
-------
self : object
@ -134,6 +140,7 @@ class GeneralizeToRepresentative(BaseEstimator, MetaEstimatorMixin, TransformerM
def fit_transform(self, X: Optional[DATA_PANDAS_NUMPY_TYPE] = None, y: Optional[DATA_PANDAS_NUMPY_TYPE] = None,
features_names: Optional = None, dataset: Optional[ArrayDataset] = None):
"""Learns the generalizations based on training data, and applies them to the data.
Parameters
----------
X : {array-like, sparse matrix}, shape (n_samples, n_features), optional
@ -158,6 +165,7 @@ class GeneralizeToRepresentative(BaseEstimator, MetaEstimatorMixin, TransformerM
def fit(self, X: Optional[DATA_PANDAS_NUMPY_TYPE] = None, y: Optional[DATA_PANDAS_NUMPY_TYPE] = None,
features_names: Optional = None, dataset: ArrayDataset = None):
"""Learns the generalizations based on training data.
Parameters
----------
X : {array-like, sparse matrix}, shape (n_samples, n_features), optional
@ -380,6 +388,7 @@ class GeneralizeToRepresentative(BaseEstimator, MetaEstimatorMixin, TransformerM
def transform(self, X: Optional[DATA_PANDAS_NUMPY_TYPE] = None, features_names: Optional = None, dataset: ArrayDataset = None):
""" Transforms data records to representative points.
Parameters
----------
X : {array-like, sparse-matrix}, shape (n_samples, n_features), If provided as a pandas dataframe,