Fix error with pandas dataframes (#92)

* Fix error with pandas dataframes in _columns_different_distributions + add appropriate test * Update documentation of classes to reflect that all data should be encoded and scaled. --------- Signed-off-by: abigailt <abigailt@il.ibm.com>
2026-07-23 17:01:03 +02:00 · 2024-02-13 08:56:12 -05:00 · 2024-02-13 08:56:12 -05:00 · e00535d120
commit e00535d120
parent cb70ca10e6
6 changed files with 28 additions and 30 deletions
--- a/tests/test_data_assessment_short_test.py
+++ b/tests/test_data_assessment_short_test.py
@ -1,3 +1,4 @@
+import pandas as pd
 import pytest

 from apt.anonymization import Anonymize
@ -52,6 +53,8 @@ def test_risk_anonymization(name, data, dataset_type, mgr):
        categorical_features = []
    elif "nursery" in name:
        preprocessed_x_train, preprocessed_x_test, categorical_features = preprocess_nursery_x_data(x_train, x_test)
+        preprocessed_x_train = pd.DataFrame(preprocessed_x_train)
+        preprocessed_x_test = pd.DataFrame(preprocessed_x_test)
        QI = list(range(15, 20))
        anonymizer = Anonymize(ANON_K, QI, train_only_QI=True)
    else: