tinyforge-zero/recipe/train_on_pairs.py
Rana Usman 6305ff0f91 Initial release: TinyForge-Zero recipe + mined pairs + reproduction guide
Companion artifact for the paper 'How Far Can an Open Base Model
Self-Improve? Recipes, Limits, and Test-Time Synergy'.

Contents:
- recipe/{train_on_pairs,bootstrap,multi_pair_14b,curriculum_math,eval_raw,eval_plus,confirm}.py
- data/pairs_{7b_40,14b_multi_new60,math_13}.jsonl (released mined pairs)
- controls/mbpp_corrupt_control.py (the +0 negative control)
- docs/{scaling_chart,fig1_headline,fig6_boundary}.png
- REPRODUCE.md (paper claim -> exact command mapping)
2026-05-13 20:43:52 +05:00

146 lines
4.6 KiB
Python

"""Train a LoRA adapter on a released `pairs.jsonl` file and evaluate.
This is the clean replication entrypoint: skip the mining stage entirely
and just consume the (broken, fixed) pairs we already mined and released
in data/. Use this to reproduce the headline numbers without burning
GPU on the search step.
Schema of pairs.jsonl (one JSON object per line):
{
"signature": "def foo(x): ...", # function header + docstring
"tests": ["assert foo(1) == 2", ...],
"broken": "def foo(x): ... # buggy",
"error": "AssertionError ...",
"fixed": "def foo(x): ... # correct"
}
Example:
python recipe/train_on_pairs.py \\
--model Qwen/Qwen2.5-7B \\
--pairs data/pairs_7b_40.jsonl \\
--out adapter_7b_seed13 \\
--seed 13
Then evaluate the resulting adapter with:
python recipe/eval_raw.py --model Qwen/Qwen2.5-7B \\
--adapter adapter_7b_seed13 --bench humaneval
"""
import argparse, json, os, random, time
from pathlib import Path
import torch
from datasets import Dataset
from peft import LoraConfig, get_peft_model
from transformers import (AutoModelForCausalLM, AutoTokenizer,
Trainer, TrainingArguments)
T0 = time.time()
def log(m): print(f"[{time.time()-T0:7.1f}s] {m}", flush=True)
REPAIR_PROMPT = """### Task
Fix the bug in the Python function so it passes all the provided tests.
### Tests
{tests}
### Buggy code
```python
{broken}
```
### Error
{error}
### Fixed code
```python
{fixed}
```
"""
def main():
ap = argparse.ArgumentParser()
ap.add_argument("--model", required=True,
help="HF model id, e.g. Qwen/Qwen2.5-7B")
ap.add_argument("--pairs", required=True,
help="Path to a pairs.jsonl file (one JSON object per line)")
ap.add_argument("--out", required=True,
help="Output directory for the trained LoRA adapter")
ap.add_argument("--epochs", type=int, default=2)
ap.add_argument("--lr", type=float, default=1e-4)
ap.add_argument("--lora-rank", type=int, default=16)
ap.add_argument("--seed", type=int, default=13)
ap.add_argument("--batch-size", type=int, default=1)
ap.add_argument("--grad-accum", type=int, default=8)
ap.add_argument("--max-length", type=int, default=2048)
args = ap.parse_args()
random.seed(args.seed)
torch.manual_seed(args.seed)
log(f"Loading pairs from {args.pairs}")
pairs = [json.loads(l) for l in open(args.pairs)]
log(f" {len(pairs)} pairs")
log(f"Loading tokenizer + base model {args.model}")
tok = AutoTokenizer.from_pretrained(args.model, use_fast=True)
if tok.pad_token is None:
tok.pad_token = tok.eos_token
model = AutoModelForCausalLM.from_pretrained(
args.model, torch_dtype=torch.bfloat16, device_map="auto",
)
log(f"Attaching LoRA (rank {args.lora_rank}, q/k/v/o projections)")
lora = LoraConfig(
r=args.lora_rank, lora_alpha=args.lora_rank * 2,
target_modules=["q_proj", "k_proj", "v_proj", "o_proj"],
lora_dropout=0.05, bias="none", task_type="CAUSAL_LM",
)
model = get_peft_model(model, lora)
model.print_trainable_parameters()
def format_pair(p):
tests = "\n".join(p["tests"])
text = REPAIR_PROMPT.format(
tests=tests, broken=p["broken"],
error=p.get("error", ""), fixed=p["fixed"],
)
ids = tok(text, truncation=True, max_length=args.max_length,
padding="max_length", return_tensors="pt")
return {
"input_ids": ids.input_ids[0],
"attention_mask": ids.attention_mask[0],
"labels": ids.input_ids[0].clone(),
}
ds = Dataset.from_list([format_pair(p) for p in pairs])
log("Training")
targs = TrainingArguments(
output_dir=args.out + "_ckpt",
per_device_train_batch_size=args.batch_size,
gradient_accumulation_steps=args.grad_accum,
num_train_epochs=args.epochs,
learning_rate=args.lr,
lr_scheduler_type="cosine",
warmup_ratio=0.03,
logging_steps=5,
save_strategy="no",
bf16=True,
report_to="none",
seed=args.seed,
)
Trainer(model=model, args=targs, train_dataset=ds).train()
log(f"Saving adapter to {args.out}")
Path(args.out).mkdir(parents=True, exist_ok=True)
model.save_pretrained(args.out)
tok.save_pretrained(args.out)
log("Done. Evaluate with: python recipe/eval_raw.py --model "
f"{args.model} --adapter {args.out} --bench humaneval")
if __name__ == "__main__":
main()