mirror of
https://github.com/SakanaAI/doc-to-lora.git
synced 2026-04-26 08:36:23 +02:00
17 lines
785 B
Python
17 lines
785 B
Python
from huggingface_hub import snapshot_download
|
|
|
|
if __name__ == "__main__":
|
|
self_gen_data_dir = "./data/raw_datasets/self_gen/"
|
|
snapshot_download(
|
|
"SakanaAI/self_gen_qa_d2l",
|
|
repo_type="dataset",
|
|
local_dir=self_gen_data_dir,
|
|
# we can filter based on model by using the `allow_patterns` argument
|
|
# based on https://huggingface.co/datasets/SakanaAI/self_gen_qa_d2l/tree/main
|
|
# we can use
|
|
# - `Qwen` for downloading the data for `Qwen/Qwen3-4B-Instruct-2507`
|
|
# - `google` for downloading the data for `google/gemma-2-2b-it`
|
|
# - `mistralai` for downloading the data for `mistralai/Mistral-7B-Instruct-v0.2`
|
|
#
|
|
# allow_patterns="google/*", # downloading the data for `google/gemma-2-2b-it`
|
|
)
|