mirror of
https://github.com/IBM/ai-privacy-toolkit.git
synced 2026-04-25 04:46:21 +02:00
update
This commit is contained in:
parent
312469212e
commit
06158c8508
2 changed files with 12 additions and 8 deletions
|
|
@ -52,13 +52,13 @@ class Anonymize:
|
|||
else:
|
||||
raise ValueError('No data provided')
|
||||
|
||||
transformed = self._anonymize_ndarray(dataset.get_samples().copy(), dataset.get_labels())
|
||||
transformed = self._anonymize(dataset.get_samples().copy(), dataset.get_labels())
|
||||
if dataset.is_pandas:
|
||||
return pd.DataFrame(transformed, columns=self._features)
|
||||
else:
|
||||
return transformed
|
||||
|
||||
def _anonymize_ndarray(self, x, y):
|
||||
def _anonymize(self, x, y):
|
||||
if x.shape[0] != y.shape[0]:
|
||||
raise ValueError("x and y should have same number of rows")
|
||||
x_anonymizer_train = x[:, self.quasi_identifiers]
|
||||
|
|
@ -75,7 +75,7 @@ class Anonymize:
|
|||
|
||||
self.anonymizer.fit(x_prepared, y)
|
||||
cells_by_id = self._calculate_cells(x, x_prepared)
|
||||
return self._anonymize_data_numpy(x, x_prepared, cells_by_id)
|
||||
return self._anonymize_data(x, x_prepared, cells_by_id)
|
||||
|
||||
def _calculate_cells(self, x, x_anonymizer_train):
|
||||
# x is original data, x_anonymizer_train is only QIs + 1-hot encoded
|
||||
|
|
@ -129,7 +129,7 @@ class Anonymize:
|
|||
node_ids = self._find_sample_nodes(samples)
|
||||
return [cells_by_id[node_id] for node_id in node_ids]
|
||||
|
||||
def _anonymize_data_numpy(self, x, x_anonymizer_train, cells_by_id):
|
||||
def _anonymize_data(self, x, x_anonymizer_train, cells_by_id):
|
||||
cells = self._find_sample_cells(x_anonymizer_train, cells_by_id)
|
||||
index = 0
|
||||
for row in x:
|
||||
|
|
|
|||
|
|
@ -41,13 +41,14 @@ def array2numpy(self, arr: INPUT_DATA_ARRAY_TYPE) -> OUTPUT_DATA_ARRAY_TYPE:
|
|||
raise ValueError('Non supported type: ', type(arr).__name__)
|
||||
|
||||
|
||||
def array2torch_tensor(arr: INPUT_DATA_ARRAY_TYPE) -> Tensor:
|
||||
def array2torch_tensor(self, arr: INPUT_DATA_ARRAY_TYPE) -> Tensor:
|
||||
"""
|
||||
converts from INPUT_DATA_ARRAY_TYPE to torch tensor array
|
||||
"""
|
||||
if type(arr) == np.ndarray:
|
||||
return torch.from_numpy(arr)
|
||||
if type(arr) == pd.DataFrame:
|
||||
if type(arr) == pd.DataFrame or type(arr) == pd.Series:
|
||||
self.is_pandas = True
|
||||
return torch.from_numpy(arr.to_numpy())
|
||||
if isinstance(arr, list):
|
||||
return torch.tensor(arr)
|
||||
|
|
@ -198,8 +199,11 @@ class PytorchData(Dataset):
|
|||
:param y: collection of labels (optional)
|
||||
:param kwargs: dataset parameters
|
||||
"""
|
||||
self._x = array2torch_tensor(x)
|
||||
self._y = array2torch_tensor(y) if y is not None else None
|
||||
self.is_pandas = False
|
||||
self._y = array2torch_tensor(self, y) if y is not None else None
|
||||
self._x = array2torch_tensor(self, x)
|
||||
if self.is_pandas:
|
||||
self.features_names = x.columns
|
||||
|
||||
if y is not None and len(self._x) != len(self._y):
|
||||
raise ValueError('Non equivalent lengths of x and y')
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue