mirror of
https://github.com/SakanaAI/doc-to-lora.git
synced 2026-04-26 08:36:23 +02:00
Doc-to-LoRA release
This commit is contained in:
commit
1abe8ae16d
92 changed files with 22131 additions and 0 deletions
17
scripts/main_exp/0-download_data.py
Normal file
17
scripts/main_exp/0-download_data.py
Normal file
|
|
@ -0,0 +1,17 @@
|
|||
from huggingface_hub import snapshot_download
|
||||
|
||||
if __name__ == "__main__":
|
||||
self_gen_data_dir = "./data/raw_datasets/self_gen/"
|
||||
snapshot_download(
|
||||
"SakanaAI/self_gen_qa_d2l",
|
||||
repo_type="dataset",
|
||||
local_dir=self_gen_data_dir,
|
||||
# we can filter based on model by using the `allow_patterns` argument
|
||||
# based on https://huggingface.co/datasets/SakanaAI/self_gen_qa_d2l/tree/main
|
||||
# we can use
|
||||
# - `Qwen` for downloading the data for `Qwen/Qwen3-4B-Instruct-2507`
|
||||
# - `google` for downloading the data for `google/gemma-2-2b-it`
|
||||
# - `mistralai` for downloading the data for `mistralai/Mistral-7B-Instruct-v0.2`
|
||||
#
|
||||
# allow_patterns="google/*", # downloading the data for `google/gemma-2-2b-it`
|
||||
)
|
||||
Loading…
Add table
Add a link
Reference in a new issue