mirror of
https://github.com/SakanaAI/doc-to-lora.git
synced 2026-04-25 00:06:20 +02:00
Doc-to-LoRA release
This commit is contained in:
commit
1abe8ae16d
92 changed files with 22131 additions and 0 deletions
31
configs/main_exp/mistral/self_gen_lv1_closed_qa_1_l2l.yaml
Normal file
31
configs/main_exp/mistral/self_gen_lv1_closed_qa_1_l2l.yaml
Normal file
|
|
@ -0,0 +1,31 @@
|
|||
# LoRA
|
||||
lora_r: 8
|
||||
lora_dropout: 0.0
|
||||
target_modules:
|
||||
- down_proj
|
||||
|
||||
use_kl_loss: true
|
||||
|
||||
ctx_encoder_type: per_layer_activations
|
||||
n_latent_queries: 8
|
||||
num_blocks: 9
|
||||
num_self_attn_per_block: 0
|
||||
|
||||
gradient_accumulation_steps: 11
|
||||
max_packed_inp_len: 6144
|
||||
max_packed_ctx_len: 6144
|
||||
|
||||
# data
|
||||
train_ds_names:
|
||||
- self_gen/mistralai/Mistral-7B-Instruct-v0.2_temp_0.0_closed_qa_prob_1.0/fw_qa_v2/min_0_to_2000/train/*level_1*.parquet
|
||||
- self_gen/mistralai/Mistral-7B-Instruct-v0.2_temp_0.0_closed_qa_prob_0.0/pwc_compact
|
||||
- self_gen/mistralai/Mistral-7B-Instruct-v0.2_temp_0.0_closed_qa_prob_1.0/squad_compact
|
||||
- self_gen/mistralai/Mistral-7B-Instruct-v0.2_temp_0.0_closed_qa_prob_1.0/ropes_compact
|
||||
- self_gen/mistralai/Mistral-7B-Instruct-v0.2_temp_0.0_closed_qa_prob_1.0/drop_compact
|
||||
|
||||
val_ds_names:
|
||||
- squad
|
||||
- pwc
|
||||
- drop
|
||||
- ropes
|
||||
- self_gen/mistralai/Mistral-7B-Instruct-v0.2_temp_0.0_closed_qa_prob_0.0/fw_qa_v2/min_0_to_2000/train/*level_0_val*.parquet
|
||||
30
configs/main_exp/qwen/self_gen_lv1_closed_qa_1_l2l.yaml
Normal file
30
configs/main_exp/qwen/self_gen_lv1_closed_qa_1_l2l.yaml
Normal file
|
|
@ -0,0 +1,30 @@
|
|||
# LoRA
|
||||
lora_r: 8
|
||||
lora_dropout: 0.0
|
||||
target_modules:
|
||||
- down_proj
|
||||
|
||||
use_kl_loss: true
|
||||
|
||||
ctx_encoder_type: per_layer_activations
|
||||
n_latent_queries: 8
|
||||
num_blocks: 9
|
||||
num_self_attn_per_block: 0
|
||||
|
||||
gradient_accumulation_steps: 11
|
||||
max_packed_inp_len: 6144
|
||||
max_packed_ctx_len: 6144
|
||||
|
||||
# data
|
||||
train_ds_names:
|
||||
- self_gen/Qwen/Qwen3-4B-Instruct-2507_temp_0.0_closed_qa_prob_1.0/fw_qa_v2/min_0_to_2000/train/*level_1*.parquet
|
||||
- self_gen/Qwen/Qwen3-4B-Instruct-2507_temp_0.0_closed_qa_prob_0.0/pwc_compact
|
||||
- self_gen/Qwen/Qwen3-4B-Instruct-2507_temp_0.0_closed_qa_prob_1.0/squad_compact
|
||||
- self_gen/Qwen/Qwen3-4B-Instruct-2507_temp_0.0_closed_qa_prob_1.0/ropes_compact
|
||||
- self_gen/Qwen/Qwen3-4B-Instruct-2507_temp_0.0_closed_qa_prob_1.0/drop_compact
|
||||
|
||||
val_ds_names:
|
||||
- squad
|
||||
- pwc
|
||||
- drop
|
||||
- ropes
|
||||
31
configs/main_exp/self_gen_lv1_closed_qa_1_l2l.yaml
Normal file
31
configs/main_exp/self_gen_lv1_closed_qa_1_l2l.yaml
Normal file
|
|
@ -0,0 +1,31 @@
|
|||
# LoRA
|
||||
lora_r: 8
|
||||
lora_dropout: 0.0
|
||||
target_modules:
|
||||
- down_proj
|
||||
|
||||
use_kl_loss: true
|
||||
|
||||
ctx_encoder_type: per_layer_activations
|
||||
n_latent_queries: 8
|
||||
num_blocks: 9
|
||||
num_self_attn_per_block: 0
|
||||
|
||||
gradient_accumulation_steps: 11
|
||||
max_packed_inp_len: 6144
|
||||
max_packed_ctx_len: 6144
|
||||
|
||||
# data
|
||||
train_ds_names:
|
||||
- self_gen/google/gemma-2-2b-it_temp_0.0_closed_qa_prob_1.0/fw_qa_v2/min_0_to_2000/train/*level_1*.parquet
|
||||
- self_gen/google/gemma-2-2b-it_temp_0.0_closed_qa_prob_0.0/pwc_compact
|
||||
- self_gen/google/gemma-2-2b-it_temp_0.0_closed_qa_prob_1.0/squad_compact
|
||||
- self_gen/google/gemma-2-2b-it_temp_0.0_closed_qa_prob_1.0/ropes_compact
|
||||
- self_gen/google/gemma-2-2b-it_temp_0.0_closed_qa_prob_1.0/drop_compact
|
||||
|
||||
val_ds_names:
|
||||
- squad
|
||||
- pwc
|
||||
- drop
|
||||
- ropes
|
||||
- self_gen/google/gemma-2-2b-it_temp_0.0_closed_qa_prob_0.0/fw_qa_v2/min_0_to_2000/train/*level_0_val*.parquet
|
||||
27
configs/main_exp/self_gen_lv1_closed_qa_1_no_qa_l2l.yaml
Normal file
27
configs/main_exp/self_gen_lv1_closed_qa_1_no_qa_l2l.yaml
Normal file
|
|
@ -0,0 +1,27 @@
|
|||
# LoRA
|
||||
lora_r: 8
|
||||
lora_dropout: 0.0
|
||||
target_modules:
|
||||
- down_proj
|
||||
|
||||
use_kl_loss: true
|
||||
|
||||
ctx_encoder_type: per_layer_activations
|
||||
n_latent_queries: 8
|
||||
num_blocks: 9
|
||||
num_self_attn_per_block: 0
|
||||
|
||||
gradient_accumulation_steps: 11
|
||||
max_packed_inp_len: 6144
|
||||
max_packed_ctx_len: 6144
|
||||
|
||||
# data
|
||||
train_ds_names:
|
||||
- self_gen/google/gemma-2-2b-it_temp_0.0_closed_qa_prob_1.0/fw_qa_v2/min_0_to_2000/train/*level_1*.parquet
|
||||
|
||||
val_ds_names:
|
||||
- squad
|
||||
- pwc
|
||||
- drop
|
||||
- ropes
|
||||
- self_gen/google/gemma-2-2b-it_temp_0.0_closed_qa_prob_0.0/fw_qa_v2/min_0_to_2000/train/*level_0_val*.parquet
|
||||
14
configs/niah_exp/ctx_magic_number_32_256.yaml
Normal file
14
configs/niah_exp/ctx_magic_number_32_256.yaml
Normal file
|
|
@ -0,0 +1,14 @@
|
|||
# LoRA
|
||||
lora_r: 8
|
||||
lora_dropout: 0.0
|
||||
target_modules:
|
||||
- down_proj
|
||||
|
||||
# data
|
||||
train_ds_names:
|
||||
- ctx_magic_number_32_128
|
||||
- ctx_magic_number_128_256
|
||||
|
||||
val_ds_names:
|
||||
- ctx_magic_number_32_128
|
||||
- ctx_magic_number_128_256
|
||||
Loading…
Add table
Add a link
Reference in a new issue