fix hfdataset; make dirs when save notebook

This commit is contained in:
Yizhou Chi 2024-09-10 13:51:54 +08:00
parent 294d0fe709
commit 60e8e3eab8
2 changed files with 7 additions and 3 deletions

View file

@ -38,18 +38,21 @@ class HFExpDataset(ExpDataset):
df = pd.read_csv(Path(raw_dir, "train.csv"))
else:
df = self.dataset["train"].to_pandas()
df.to_csv(Path(raw_dir, "train.csv"))
df.to_csv(Path(raw_dir, "train.csv"), index=False)
if os.path.exists(Path(raw_dir, "test.csv")):
test_df = pd.read_csv(Path(raw_dir, "test.csv"))
test_df = pd.read_csv(Path(raw_dir, "test.csv"), index=False)
else:
if "test" in self.dataset:
test_df = self.dataset["test"].to_pandas()
test_df.to_csv(Path(raw_dir, "test.csv"))
test_df.to_csv(Path(raw_dir, "test.csv"), index=False)
else:
test_df = None
return df, test_df
# def get_df_head(self, raw_df):
# return raw_df.head()
if __name__ == "__main__":
dataset_dir = "D:/work/automl/datasets"

View file

@ -99,6 +99,7 @@ def save_notebook(role: Role, save_dir: str = "", name: str = ""):
for code in codes:
clean_nb.cells.append(nbformat.v4.new_code_cell(code))
nb = process_cells(role.execute_code.nb)
os.makedirs(save_dir, exist_ok=True)
file_path = save_dir / f"{name}.ipynb"
clean_file_path = save_dir / f"{name}_clean.ipynb"
nbformat.write(nb, file_path)