mirror of
https://github.com/IBM/ai-privacy-toolkit.git
synced 2026-06-08 15:05:13 +02:00
formatting
Signed-off-by: abigailt <abigailt@il.ibm.com>
This commit is contained in:
parent
f85fc87bdd
commit
69e45d99e5
4 changed files with 17 additions and 16 deletions
|
|
@ -878,7 +878,6 @@ class GeneralizeToRepresentative(BaseEstimator, MetaEstimatorMixin, TransformerM
|
|||
rows[feature] = generalizations['range_representatives'][feature][r_index]
|
||||
return original_data_generalized
|
||||
|
||||
|
||||
def _generalize_from_tree(self, original_data, prepared_data, level_nodes, cells, cells_by_id):
|
||||
mapping_to_cells = self._map_to_cells(prepared_data, level_nodes, cells_by_id)
|
||||
all_indexes = []
|
||||
|
|
@ -1199,6 +1198,7 @@ class GeneralizeToRepresentative(BaseEstimator, MetaEstimatorMixin, TransformerM
|
|||
else:
|
||||
ranges[feature] = list(set(ranges[feature]))
|
||||
ranges[feature].sort()
|
||||
prev_value = 0
|
||||
for index, value in enumerate(ranges[feature]):
|
||||
if index == 0:
|
||||
# for first range, use min value
|
||||
|
|
|
|||
|
|
@ -20,7 +20,7 @@ from scipy.sparse import csr_matrix
|
|||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
INPUT_DATA_ARRAY_TYPE = Union[np.ndarray, pd.DataFrame, List, Tensor]
|
||||
INPUT_DATA_ARRAY_TYPE = Union[np.ndarray, pd.DataFrame, List, Tensor, csr_matrix]
|
||||
OUTPUT_DATA_ARRAY_TYPE = np.ndarray
|
||||
DATA_PANDAS_NUMPY_TYPE = Union[np.ndarray, pd.DataFrame]
|
||||
|
||||
|
|
@ -30,15 +30,15 @@ def array2numpy(arr: INPUT_DATA_ARRAY_TYPE) -> OUTPUT_DATA_ARRAY_TYPE:
|
|||
"""
|
||||
converts from INPUT_DATA_ARRAY_TYPE to numpy array
|
||||
"""
|
||||
if type(arr) == np.ndarray:
|
||||
if isinstance(arr, np.ndarray):
|
||||
return arr
|
||||
if type(arr) == pd.DataFrame or type(arr) == pd.Series:
|
||||
if isinstance(arr, pd.DataFrame) or isinstance(arr, pd.Series):
|
||||
return arr.to_numpy()
|
||||
if isinstance(arr, list):
|
||||
return np.array(arr)
|
||||
if type(arr) == Tensor:
|
||||
if isinstance(arr, Tensor):
|
||||
return arr.detach().cpu().numpy()
|
||||
if type(arr) == csr_matrix:
|
||||
if isinstance(arr, csr_matrix):
|
||||
return arr.toarray()
|
||||
|
||||
raise ValueError("Non supported type: ", type(arr).__name__)
|
||||
|
|
@ -48,15 +48,15 @@ def array2torch_tensor(arr: INPUT_DATA_ARRAY_TYPE) -> Tensor:
|
|||
"""
|
||||
converts from INPUT_DATA_ARRAY_TYPE to torch tensor array
|
||||
"""
|
||||
if type(arr) == np.ndarray:
|
||||
if isinstance(arr, np.ndarray):
|
||||
return torch.from_numpy(arr)
|
||||
if type(arr) == pd.DataFrame or type(arr) == pd.Series:
|
||||
if isinstance(arr, pd.DataFrame) or isinstance(arr, pd.Series):
|
||||
return torch.from_numpy(arr.to_numpy())
|
||||
if isinstance(arr, list):
|
||||
return torch.tensor(arr)
|
||||
if type(arr) == Tensor:
|
||||
if isinstance(arr, Tensor):
|
||||
return arr
|
||||
if type(arr) == csr_matrix:
|
||||
if isinstance(arr, csr_matrix):
|
||||
return torch.from_numpy(arr.toarray())
|
||||
|
||||
raise ValueError("Non supported type: ", type(arr).__name__)
|
||||
|
|
@ -222,7 +222,7 @@ class ArrayDataset(Dataset):
|
|||
|
||||
def __init__(self, x: INPUT_DATA_ARRAY_TYPE, y: Optional[INPUT_DATA_ARRAY_TYPE] = None,
|
||||
features_names: Optional[list] = None, **kwargs):
|
||||
self.is_pandas = self.is_pandas = type(x) == pd.DataFrame or type(x) == pd.Series
|
||||
self.is_pandas = self.is_pandas = isinstance(x, pd.DataFrame) or isinstance(x, pd.Series)
|
||||
|
||||
self.features_names = features_names
|
||||
self._y = array2numpy(y) if y is not None else None
|
||||
|
|
@ -330,7 +330,7 @@ class PytorchData(Dataset):
|
|||
self._y = array2torch_tensor(y) if y is not None else None
|
||||
self._x = array2torch_tensor(x)
|
||||
|
||||
self.is_pandas = type(x) == pd.DataFrame or type(x) == pd.Series
|
||||
self.is_pandas = isinstance(x, pd.DataFrame) or isinstance(x, pd.Series)
|
||||
|
||||
if self.is_pandas:
|
||||
self.features_names = x.columns
|
||||
|
|
|
|||
|
|
@ -43,7 +43,7 @@ def get_nb_classes(y: OUTPUT_DATA_ARRAY_TYPE) -> int:
|
|||
if y is None:
|
||||
return 0
|
||||
|
||||
if type(y) != np.ndarray:
|
||||
if not isinstance(y, np.ndarray):
|
||||
raise ValueError("Input should be numpy array")
|
||||
|
||||
if is_one_hot(y):
|
||||
|
|
@ -339,8 +339,8 @@ class BlackboxClassifierPredictions(BlackboxClassifier):
|
|||
y_test_pred = check_and_transform_label_format(y_test_pred, nb_classes=self._nb_classes)
|
||||
|
||||
if x_train_pred is not None and y_train_pred is not None and x_test_pred is not None and y_test_pred is not None:
|
||||
if type(y_train_pred) != np.ndarray or type(y_test_pred) != np.ndarray \
|
||||
or type(y_train_pred) != np.ndarray or type(y_test_pred) != np.ndarray:
|
||||
if not isinstance(y_train_pred, np.ndarray) or not isinstance(y_test_pred, np.ndarray) \
|
||||
or not isinstance(y_train_pred, np.ndarray) or not isinstance(y_test_pred, np.ndarray):
|
||||
raise NotImplementedError("X/Y Data should be numpy array")
|
||||
x_pred = np.vstack((x_train_pred, x_test_pred))
|
||||
y_pred = np.vstack((y_train_pred, y_test_pred))
|
||||
|
|
|
|||
|
|
@ -246,6 +246,7 @@ def create_encoder(numeric_features, categorical_features, x):
|
|||
|
||||
return preprocessor, encoded
|
||||
|
||||
|
||||
def test_minimizer_params_not_transform(get_cells):
|
||||
# Assume two features, age and height, and boolean label
|
||||
cells, features, x, y = get_cells
|
||||
|
|
@ -385,7 +386,7 @@ def test_minimizer_fit_not_transform(get_data_two_features):
|
|||
|
||||
gen.fit(dataset=train_dataset, generalize_using_transform=False)
|
||||
gener = gen.generalizations
|
||||
expected_generalizations = {'ranges': {'age': [], 'height':[157.0]}, 'categories': {}, 'untouched': []}
|
||||
expected_generalizations = {'ranges': {'age': [], 'height': [157.0]}, 'categories': {}, 'untouched': []}
|
||||
|
||||
compare_generalizations(gener, expected_generalizations)
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue