From 60e8e3eab8f41b52a03137bfb8f8c99be9bf926b Mon Sep 17 00:00:00 2001 From: Yizhou Chi Date: Tue, 10 Sep 2024 13:51:54 +0800 Subject: [PATCH] fix hfdataset; make dirs when save notebook --- expo/data/hf_data.py | 9 ++++++--- expo/utils.py | 1 + 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/expo/data/hf_data.py b/expo/data/hf_data.py index a7e2a1afe..9ed2b2c48 100644 --- a/expo/data/hf_data.py +++ b/expo/data/hf_data.py @@ -38,18 +38,21 @@ class HFExpDataset(ExpDataset): df = pd.read_csv(Path(raw_dir, "train.csv")) else: df = self.dataset["train"].to_pandas() - df.to_csv(Path(raw_dir, "train.csv")) + df.to_csv(Path(raw_dir, "train.csv"), index=False) if os.path.exists(Path(raw_dir, "test.csv")): - test_df = pd.read_csv(Path(raw_dir, "test.csv")) + test_df = pd.read_csv(Path(raw_dir, "test.csv"), index=False) else: if "test" in self.dataset: test_df = self.dataset["test"].to_pandas() - test_df.to_csv(Path(raw_dir, "test.csv")) + test_df.to_csv(Path(raw_dir, "test.csv"), index=False) else: test_df = None return df, test_df + # def get_df_head(self, raw_df): + # return raw_df.head() + if __name__ == "__main__": dataset_dir = "D:/work/automl/datasets" diff --git a/expo/utils.py b/expo/utils.py index 270842e41..44de8cf9b 100644 --- a/expo/utils.py +++ b/expo/utils.py @@ -99,6 +99,7 @@ def save_notebook(role: Role, save_dir: str = "", name: str = ""): for code in codes: clean_nb.cells.append(nbformat.v4.new_code_cell(code)) nb = process_cells(role.execute_code.nb) + os.makedirs(save_dir, exist_ok=True) file_path = save_dir / f"{name}.ipynb" clean_file_path = save_dir / f"{name}_clean.ipynb" nbformat.write(nb, file_path)