mirror of
https://github.com/alainnothere/llm-circuit-finder.git
synced 2026-04-24 20:56:21 +02:00
Add files via upload
This commit is contained in:
parent
9ddf9fafd9
commit
8b68afbd3e
9 changed files with 2148 additions and 0 deletions
216
compare_eval.py
Normal file
216
compare_eval.py
Normal file
|
|
@ -0,0 +1,216 @@
|
|||
#!/usr/bin/env python3
|
||||
"""
|
||||
Compare lm_eval results across multiple runs.
|
||||
|
||||
Reads the results JSON files from lm_eval output directories and
|
||||
prints a side-by-side comparison table.
|
||||
|
||||
Usage:
|
||||
python compare_eval.py ./eval_base ./eval_rys_balanced ./eval_rys_triple
|
||||
python compare_eval.py ./eval_*
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def load_results(folder: str) -> dict:
|
||||
"""Load lm_eval results from an output folder."""
|
||||
folder = Path(folder)
|
||||
|
||||
# lm_eval saves results in a JSON file inside the folder
|
||||
# The filename varies, so find it
|
||||
candidates = list(folder.glob("**/*results*.json"))
|
||||
if not candidates:
|
||||
# Try the folder itself as a JSON file
|
||||
if folder.suffix == '.json' and folder.is_file():
|
||||
with open(folder) as f:
|
||||
return json.load(f)
|
||||
print(f"WARNING: No results JSON found in {folder}", file=sys.stderr)
|
||||
return {}
|
||||
|
||||
# Use the most recent one
|
||||
results_file = max(candidates, key=lambda p: p.stat().st_mtime)
|
||||
|
||||
with open(results_file) as f:
|
||||
data = json.load(f)
|
||||
|
||||
return data
|
||||
|
||||
|
||||
def extract_metrics(data: dict) -> dict:
|
||||
"""Extract task metrics from lm_eval results format."""
|
||||
metrics = {}
|
||||
|
||||
results = data.get("results", {})
|
||||
for task_name, task_data in results.items():
|
||||
for key, value in task_data.items():
|
||||
if key.endswith(",none") or key.endswith(",flexible-extract") or key.endswith(",strict-match") or key.endswith(",get-answer"):
|
||||
# Parse "metric_name,filter_name" format
|
||||
parts = key.rsplit(",", 1)
|
||||
metric = parts[0]
|
||||
filter_name = parts[1] if len(parts) > 1 else ""
|
||||
|
||||
if isinstance(value, (int, float)):
|
||||
display_name = f"{task_name}"
|
||||
if filter_name and filter_name != "none":
|
||||
display_name += f" ({filter_name})"
|
||||
if metric not in ("alias",):
|
||||
label = f"{task_name}|{metric}|{filter_name}"
|
||||
metrics[label] = {
|
||||
"task": task_name,
|
||||
"metric": metric,
|
||||
"filter": filter_name,
|
||||
"value": value,
|
||||
}
|
||||
|
||||
return metrics
|
||||
|
||||
|
||||
def get_display_name(label: str, metric_info: dict) -> str:
|
||||
"""Create a readable display name from metric info."""
|
||||
task = metric_info["task"]
|
||||
metric = metric_info["metric"]
|
||||
filt = metric_info["filter"]
|
||||
|
||||
# Shorten common task names
|
||||
task = task.replace("bbh_cot_fewshot_", "bbh/")
|
||||
|
||||
if filt and filt not in ("none", "get-answer"):
|
||||
return f"{task} [{filt}]"
|
||||
return f"{task} [{metric}]"
|
||||
|
||||
|
||||
def compare(folders: list[str], names: list[str] = None):
|
||||
"""Compare results across folders."""
|
||||
if names is None:
|
||||
names = [Path(f).name for f in folders]
|
||||
|
||||
# Pad names to equal length
|
||||
max_name_len = max(len(n) for n in names)
|
||||
|
||||
# Load all results
|
||||
all_metrics = {}
|
||||
for i, folder in enumerate(folders):
|
||||
data = load_results(folder)
|
||||
metrics = extract_metrics(data)
|
||||
all_metrics[names[i]] = metrics
|
||||
|
||||
# Collect all unique metric labels
|
||||
all_labels = set()
|
||||
for metrics in all_metrics.values():
|
||||
all_labels.update(metrics.keys())
|
||||
|
||||
# Sort labels by task name
|
||||
sorted_labels = sorted(all_labels)
|
||||
|
||||
# Print header
|
||||
col_width = 10
|
||||
name_col = max(45, max_name_len)
|
||||
|
||||
header = f"{'Metric':<{name_col}}"
|
||||
for name in names:
|
||||
header += f" {name:>{col_width}}"
|
||||
if len(names) > 1:
|
||||
header += f" {'Δ(last-first)':>{col_width+2}}"
|
||||
|
||||
print()
|
||||
print("=" * len(header))
|
||||
print("lm_eval Results Comparison")
|
||||
print("=" * len(header))
|
||||
print(header)
|
||||
print("-" * len(header))
|
||||
|
||||
prev_task = None
|
||||
for label in sorted_labels:
|
||||
# Get display info from first run that has this metric
|
||||
metric_info = None
|
||||
for metrics in all_metrics.values():
|
||||
if label in metrics:
|
||||
metric_info = metrics[label]
|
||||
break
|
||||
if metric_info is None:
|
||||
continue
|
||||
|
||||
display = get_display_name(label, metric_info)
|
||||
|
||||
# Skip stderr and alias entries
|
||||
if "stderr" in label.lower() or "alias" in label.lower():
|
||||
continue
|
||||
|
||||
# Add separator between tasks
|
||||
current_task = metric_info["task"]
|
||||
if prev_task and current_task != prev_task:
|
||||
print()
|
||||
prev_task = current_task
|
||||
|
||||
row = f"{display:<{name_col}}"
|
||||
|
||||
values = []
|
||||
for name in names:
|
||||
metrics = all_metrics[name]
|
||||
if label in metrics:
|
||||
val = metrics[label]["value"]
|
||||
values.append(val)
|
||||
if isinstance(val, float):
|
||||
row += f" {val:>{col_width}.4f}"
|
||||
else:
|
||||
row += f" {val:>{col_width}}"
|
||||
else:
|
||||
values.append(None)
|
||||
row += f" {'---':>{col_width}}"
|
||||
|
||||
# Delta column (last - first)
|
||||
if len(names) > 1 and values[0] is not None and values[-1] is not None:
|
||||
delta = values[-1] - values[0]
|
||||
if delta > 0:
|
||||
row += f" {delta:>+{col_width}.4f}"
|
||||
elif delta < 0:
|
||||
row += f" {delta:>+{col_width}.4f}"
|
||||
else:
|
||||
row += f" {'0':>{col_width+1}}"
|
||||
elif len(names) > 1:
|
||||
row += f" {'---':>{col_width+1}}"
|
||||
|
||||
print(row)
|
||||
|
||||
print("=" * len(header))
|
||||
|
||||
# Summary: average across all metrics per run
|
||||
print(f"\n{'Average (all metrics)':<{name_col}}", end="")
|
||||
avgs = []
|
||||
for name in names:
|
||||
metrics = all_metrics[name]
|
||||
vals = [m["value"] for m in metrics.values()
|
||||
if isinstance(m["value"], (int, float)) and "stderr" not in m["metric"]]
|
||||
if vals:
|
||||
avg = sum(vals) / len(vals)
|
||||
avgs.append(avg)
|
||||
print(f" {avg:>{col_width}.4f}", end="")
|
||||
else:
|
||||
avgs.append(None)
|
||||
print(f" {'---':>{col_width}}", end="")
|
||||
|
||||
if len(avgs) > 1 and avgs[0] is not None and avgs[-1] is not None:
|
||||
delta = avgs[-1] - avgs[0]
|
||||
print(f" {delta:>+{col_width}.4f}", end="")
|
||||
print()
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Compare lm_eval results across runs",
|
||||
epilog="Example: python compare_eval.py ./eval_base ./eval_rys"
|
||||
)
|
||||
parser.add_argument("folders", nargs="+", help="lm_eval output directories or JSON files")
|
||||
parser.add_argument("--names", nargs="+", help="Display names for each run (default: folder names)")
|
||||
args = parser.parse_args()
|
||||
|
||||
names = args.names if args.names else None
|
||||
compare(args.folders, names)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
267
eq_probe.py
Normal file
267
eq_probe.py
Normal file
|
|
@ -0,0 +1,267 @@
|
|||
#!/usr/bin/env python3
|
||||
"""
|
||||
EQ Probe for RYS Layer Duplication Sweep
|
||||
|
||||
Simplified EQ-Bench-style scenarios. Each presents a social situation
|
||||
and asks the model to rate the intensity of 4 emotions (0-10).
|
||||
Scored by comparing to reference answers using EQ-Bench v2 scoring.
|
||||
|
||||
The reference answers were crafted to represent reasonable human consensus.
|
||||
What matters for the sweep is relative performance between configs,
|
||||
not absolute calibration.
|
||||
"""
|
||||
|
||||
import json
|
||||
import re
|
||||
import math
|
||||
|
||||
|
||||
# Each scenario: situation text, 4 emotions to rate, reference scores (0-10)
|
||||
EQ_SCENARIOS = [
|
||||
{
|
||||
"id": "eq01",
|
||||
"situation": (
|
||||
"David just found out his closest friend has been spreading rumors about him "
|
||||
"at work for months. When confronted, the friend laughed it off and said "
|
||||
"\"everyone does it, don't be so sensitive.\" David has been quiet all evening."
|
||||
),
|
||||
"emotions": ["Anger", "Betrayal", "Sadness", "Confusion"],
|
||||
"reference": [7, 9, 6, 4]
|
||||
},
|
||||
{
|
||||
"id": "eq02",
|
||||
"situation": (
|
||||
"Maria spent six months preparing a presentation for the board. During the "
|
||||
"meeting, her manager took credit for all her ideas and presented them as his "
|
||||
"own. Afterward, he thanked her for 'helping out with the slides.'"
|
||||
),
|
||||
"emotions": ["Frustration", "Resentment", "Humiliation", "Determination"],
|
||||
"reference": [8, 8, 5, 6]
|
||||
},
|
||||
{
|
||||
"id": "eq03",
|
||||
"situation": (
|
||||
"After 20 years of marriage, Tom's wife tells him she wants a divorce. "
|
||||
"She says she still cares about him but hasn't been happy for years. "
|
||||
"Tom had no idea anything was wrong."
|
||||
),
|
||||
"emotions": ["Shock", "Grief", "Self-doubt", "Denial"],
|
||||
"reference": [9, 7, 6, 7]
|
||||
},
|
||||
{
|
||||
"id": "eq04",
|
||||
"situation": (
|
||||
"A young doctor must tell parents that their child's surgery was successful "
|
||||
"but there will be permanent limitations. The parents are already celebrating "
|
||||
"in the waiting room."
|
||||
),
|
||||
"emotions": ["Relief", "Anxiety", "Empathy", "Professional_detachment"],
|
||||
"reference": [4, 7, 8, 5]
|
||||
},
|
||||
{
|
||||
"id": "eq05",
|
||||
"situation": (
|
||||
"Chen receives a prestigious award at a ceremony. As he walks to the stage, "
|
||||
"he sees his estranged father in the audience - the man who abandoned the "
|
||||
"family when Chen was twelve."
|
||||
),
|
||||
"emotions": ["Pride", "Anger", "Longing", "Anxiety"],
|
||||
"reference": [7, 5, 6, 6]
|
||||
},
|
||||
{
|
||||
"id": "eq06",
|
||||
"situation": (
|
||||
"A retired teacher learns that a former student, who she failed years ago "
|
||||
"and who dropped out, has become extremely successful. The student publicly "
|
||||
"credits 'proving my teacher wrong' as their motivation."
|
||||
),
|
||||
"emotions": ["Guilt", "Pride", "Defensiveness", "Amusement"],
|
||||
"reference": [5, 4, 6, 3]
|
||||
},
|
||||
{
|
||||
"id": "eq07",
|
||||
"situation": (
|
||||
"Sophie finds out she's been accepted to her dream university on the same "
|
||||
"day her best friend receives a rejection from the same school. Her friend "
|
||||
"calls to congratulate her, voice cracking."
|
||||
),
|
||||
"emotions": ["Joy", "Guilt", "Empathy", "Awkwardness"],
|
||||
"reference": [7, 6, 8, 7]
|
||||
},
|
||||
{
|
||||
"id": "eq08",
|
||||
"situation": (
|
||||
"A firefighter rescues a child from a burning building. Weeks later, he "
|
||||
"wakes up screaming from nightmares about the ones he couldn't save in "
|
||||
"a previous fire. His partner asks if he's okay."
|
||||
),
|
||||
"emotions": ["Satisfaction", "Trauma", "Vulnerability", "Shame"],
|
||||
"reference": [3, 8, 7, 5]
|
||||
},
|
||||
{
|
||||
"id": "eq09",
|
||||
"situation": (
|
||||
"An elderly woman's family surprises her with a birthday party. She smiles "
|
||||
"and thanks everyone, but keeps glancing at an empty chair - where her "
|
||||
"late husband always sat."
|
||||
),
|
||||
"emotions": ["Gratitude", "Grief", "Loneliness", "Warmth"],
|
||||
"reference": [7, 7, 6, 6]
|
||||
},
|
||||
{
|
||||
"id": "eq10",
|
||||
"situation": (
|
||||
"A manager must lay off a team member who is also a close friend and a "
|
||||
"single parent. The company requires it due to budget cuts. HR is waiting "
|
||||
"for the paperwork."
|
||||
),
|
||||
"emotions": ["Guilt", "Dread", "Helplessness", "Resentment"],
|
||||
"reference": [8, 8, 7, 5]
|
||||
},
|
||||
{
|
||||
"id": "eq11",
|
||||
"situation": (
|
||||
"James finds old love letters in the attic from his wife, written to "
|
||||
"someone else before they met. The letters are passionate and describe "
|
||||
"a depth of feeling he's not sure she's ever expressed toward him."
|
||||
),
|
||||
"emotions": ["Jealousy", "Insecurity", "Curiosity", "Sadness"],
|
||||
"reference": [6, 7, 5, 4]
|
||||
},
|
||||
{
|
||||
"id": "eq12",
|
||||
"situation": (
|
||||
"A teenager confesses to her mother that she's been self-harming. The mother, "
|
||||
"who is a psychologist, realizes she completely missed the signs despite her "
|
||||
"professional training."
|
||||
),
|
||||
"emotions": ["Fear", "Guilt", "Love", "Self_criticism"],
|
||||
"reference": [8, 8, 9, 7]
|
||||
},
|
||||
{
|
||||
"id": "eq13",
|
||||
"situation": (
|
||||
"A war veteran returns home after two years to find his dog waiting on the "
|
||||
"porch, much older and thinner. The dog recognizes him immediately and "
|
||||
"limps over, tail wagging."
|
||||
),
|
||||
"emotions": ["Joy", "Guilt", "Tenderness", "Sorrow"],
|
||||
"reference": [8, 5, 9, 4]
|
||||
},
|
||||
{
|
||||
"id": "eq14",
|
||||
"situation": (
|
||||
"During a job interview, the candidate realizes the interviewer is someone "
|
||||
"she bullied badly in high school. The interviewer clearly recognizes her "
|
||||
"but proceeds professionally."
|
||||
),
|
||||
"emotions": ["Shame", "Anxiety", "Admiration", "Regret"],
|
||||
"reference": [7, 8, 4, 7]
|
||||
},
|
||||
{
|
||||
"id": "eq15",
|
||||
"situation": (
|
||||
"A father watches his daughter's wedding, knowing he has a terminal diagnosis "
|
||||
"he hasn't shared with the family. He chose to wait until after the wedding "
|
||||
"to tell them."
|
||||
),
|
||||
"emotions": ["Joy", "Grief", "Protectiveness", "Isolation"],
|
||||
"reference": [6, 8, 8, 7]
|
||||
},
|
||||
{
|
||||
"id": "eq16",
|
||||
"situation": (
|
||||
"Two siblings meet for the first time as adults after being separated in "
|
||||
"foster care as children. They look alike but have lived completely different "
|
||||
"lives. One is wealthy, the other struggles financially."
|
||||
),
|
||||
"emotions": ["Wonder", "Resentment", "Hope", "Grief"],
|
||||
"reference": [7, 3, 7, 6]
|
||||
},
|
||||
]
|
||||
|
||||
|
||||
def build_eq_prompt(scenario: dict) -> str:
|
||||
"""Build the prompt for a single EQ scenario."""
|
||||
emotions_str = ", ".join(scenario["emotions"])
|
||||
return (
|
||||
f"Read the following situation and rate the emotional intensity that "
|
||||
f"the main character is likely feeling for each of the listed emotions. "
|
||||
f"Rate each emotion from 0 (not feeling it at all) to 10 (extremely intense).\n\n"
|
||||
f"Situation: {scenario['situation']}\n\n"
|
||||
f"Rate these emotions: {emotions_str}\n\n"
|
||||
f"Respond ONLY with the four numbers separated by commas, in the same order. "
|
||||
f"Example format: 5, 3, 8, 2\n"
|
||||
f"Do not include any other text."
|
||||
)
|
||||
|
||||
|
||||
def parse_eq_response(response: str, n_emotions: int = 4) -> list[float] | None:
|
||||
"""Extract emotion ratings from model response."""
|
||||
# Try to find comma-separated numbers
|
||||
numbers = re.findall(r'(\d+(?:\.\d+)?)', response)
|
||||
|
||||
if len(numbers) < n_emotions:
|
||||
return None
|
||||
|
||||
try:
|
||||
# Take the first n_emotions numbers found
|
||||
ratings = [float(numbers[i]) for i in range(n_emotions)]
|
||||
# Clamp to 0-10
|
||||
ratings = [max(0.0, min(10.0, r)) for r in ratings]
|
||||
return ratings
|
||||
except (ValueError, IndexError):
|
||||
return None
|
||||
|
||||
|
||||
def score_eq_response(reference: list[int], predicted: list[float]) -> float:
|
||||
"""
|
||||
EQ-Bench v2 style scoring.
|
||||
Differences 1-4 from reference are scaled down on a curve.
|
||||
Differences 5-10 count 1:1.
|
||||
Score 0 = random, 100 = perfect match.
|
||||
"""
|
||||
if predicted is None or len(predicted) != len(reference):
|
||||
return 0.0
|
||||
|
||||
total_penalty = 0.0
|
||||
max_possible_penalty = 10.0 * len(reference) # worst case: all off by 10
|
||||
|
||||
for ref, pred in zip(reference, predicted):
|
||||
diff = abs(ref - pred)
|
||||
# Scale down small differences (EQ-Bench v2 approach)
|
||||
if diff <= 4:
|
||||
# Quadratic scaling: diff^2 / 4 so diff=4 -> penalty=4
|
||||
penalty = (diff ** 2) / 4.0
|
||||
else:
|
||||
# Linear for larger diffs, continuous at diff=4 (penalty=4)
|
||||
penalty = diff
|
||||
total_penalty += penalty
|
||||
|
||||
# Normalize: 0 penalty = score 100, max penalty = score ~0
|
||||
score = max(0.0, 100.0 * (1.0 - total_penalty / max_possible_penalty))
|
||||
return score
|
||||
|
||||
|
||||
# Convenience
|
||||
EQ_PROMPTS = [(s, build_eq_prompt(s)) for s in EQ_SCENARIOS]
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
print(f"EQ Probe: {len(EQ_SCENARIOS)} scenarios")
|
||||
print("=" * 60)
|
||||
|
||||
for i, scenario in enumerate(EQ_SCENARIOS):
|
||||
print(f"\n[{scenario['id']}] Emotions: {scenario['emotions']}")
|
||||
print(f" Reference: {scenario['reference']}")
|
||||
prompt = build_eq_prompt(scenario)
|
||||
print(f" Prompt length: {len(prompt)} chars")
|
||||
|
||||
# Test scoring
|
||||
print("\n\nScoring tests:")
|
||||
print(f" Perfect match: {score_eq_response([7, 9, 6, 4], [7, 9, 6, 4]):.1f}")
|
||||
print(f" All off by 1: {score_eq_response([7, 9, 6, 4], [8, 8, 7, 5]):.1f}")
|
||||
print(f" All off by 3: {score_eq_response([7, 9, 6, 4], [4, 6, 3, 1]):.1f}")
|
||||
print(f" All off by 5: {score_eq_response([7, 9, 6, 4], [2, 4, 1, 0]):.1f}")
|
||||
print(f" Worst case: {score_eq_response([7, 9, 6, 4], [0, 0, 0, 10]):.1f}")
|
||||
print(f" Unparseable: {score_eq_response([7, 9, 6, 4], None):.1f}")
|
||||
221
gguf_surgery.py
Normal file
221
gguf_surgery.py
Normal file
|
|
@ -0,0 +1,221 @@
|
|||
#!/usr/bin/env python3
|
||||
"""
|
||||
GGUF Layer Duplication Surgery
|
||||
|
||||
Reads a GGUF model file, duplicates transformer layers i..j-1 so they
|
||||
execute twice in the forward pass, and writes a new GGUF with the
|
||||
modified layer structure.
|
||||
|
||||
For a model with N layers, configuration (i, j) produces:
|
||||
layers 0..j-1, then layers i..j-1 again, then layers j..N-1
|
||||
Total layers: N + (j - i)
|
||||
|
||||
Tensor naming convention: blk.{layer_idx}.{tensor_name}
|
||||
Non-block tensors (token_embd, output_norm, output) are copied as-is.
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import re
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
import numpy as np
|
||||
from tqdm import tqdm
|
||||
|
||||
import gguf
|
||||
from gguf import GGUFReader, GGUFWriter, GGUFValueType
|
||||
|
||||
|
||||
BLK_PATTERN = re.compile(r'^blk\.(\d+)\.(.+)$')
|
||||
|
||||
|
||||
def get_field_value(reader: GGUFReader, key: str):
|
||||
"""Extract a scalar value from a reader field."""
|
||||
field = reader.get_field(key)
|
||||
if field is None:
|
||||
return None
|
||||
return field.contents()
|
||||
|
||||
|
||||
def duplicate_layers(input_path: str, output_path: str, dup_start: int, dup_end: int, verbose: bool = False):
|
||||
"""
|
||||
Create a new GGUF with layers dup_start..dup_end-1 duplicated.
|
||||
|
||||
The new layer order is:
|
||||
Original layers 0..dup_end-1
|
||||
Duplicated layers dup_start..dup_end-1 (renumbered)
|
||||
Original layers dup_end..N-1 (renumbered)
|
||||
"""
|
||||
reader = GGUFReader(input_path, 'r')
|
||||
|
||||
arch = get_field_value(reader, gguf.Keys.General.ARCHITECTURE)
|
||||
if arch is None:
|
||||
raise ValueError("Could not read architecture from GGUF")
|
||||
|
||||
block_count_key = f'{arch}.block_count'
|
||||
orig_block_count = get_field_value(reader, block_count_key)
|
||||
if orig_block_count is None:
|
||||
raise ValueError(f"Could not read {block_count_key} from GGUF")
|
||||
|
||||
n_dup = dup_end - dup_start
|
||||
new_block_count = orig_block_count + n_dup
|
||||
|
||||
if verbose:
|
||||
print(f"Architecture: {arch}")
|
||||
print(f"Original layers: {orig_block_count}")
|
||||
print(f"Duplicating layers {dup_start}..{dup_end - 1} ({n_dup} layers)")
|
||||
print(f"New layer count: {new_block_count}")
|
||||
|
||||
if dup_start < 0 or dup_end > orig_block_count or dup_start >= dup_end:
|
||||
raise ValueError(
|
||||
f"Invalid duplication range ({dup_start}, {dup_end}) "
|
||||
f"for model with {orig_block_count} layers"
|
||||
)
|
||||
|
||||
# Build layer mapping: new_idx -> original_layer_idx
|
||||
# Phase 1: original 0..dup_end-1 keep their indices
|
||||
# Phase 2: duplicates of dup_start..dup_end-1 get indices dup_end..dup_end+n_dup-1
|
||||
# Phase 3: original dup_end..N-1 shift up by n_dup
|
||||
layer_map = {}
|
||||
|
||||
for orig_idx in range(dup_end):
|
||||
layer_map[orig_idx] = orig_idx
|
||||
|
||||
for k in range(n_dup):
|
||||
layer_map[dup_end + k] = dup_start + k
|
||||
|
||||
for orig_idx in range(dup_end, orig_block_count):
|
||||
layer_map[orig_idx + n_dup] = orig_idx
|
||||
|
||||
if verbose:
|
||||
print("Layer mapping (new -> orig):")
|
||||
for new_idx in sorted(layer_map.keys()):
|
||||
tag = " [DUP]" if (dup_end <= new_idx < dup_end + n_dup) else ""
|
||||
print(f" new {new_idx:3d} -> orig {layer_map[new_idx]:3d}{tag}")
|
||||
|
||||
# Create writer
|
||||
writer = GGUFWriter(output_path, arch=arch, endianess=reader.endianess)
|
||||
|
||||
alignment = get_field_value(reader, gguf.Keys.General.ALIGNMENT)
|
||||
if alignment is not None:
|
||||
writer.data_alignment = alignment
|
||||
|
||||
# Copy metadata, overriding block_count
|
||||
for field in reader.fields.values():
|
||||
if field.name == gguf.Keys.General.ARCHITECTURE or field.name.startswith('GGUF.'):
|
||||
continue
|
||||
|
||||
val_type = field.types[0]
|
||||
sub_type = field.types[-1] if val_type == GGUFValueType.ARRAY else None
|
||||
|
||||
if field.name == block_count_key:
|
||||
writer.add_key_value(field.name, new_block_count, val_type)
|
||||
if verbose:
|
||||
print(f"Modified {field.name}: {orig_block_count} -> {new_block_count}")
|
||||
else:
|
||||
val = field.contents()
|
||||
if val is not None:
|
||||
writer.add_key_value(field.name, val, val_type, sub_type=sub_type)
|
||||
|
||||
# Organize tensors by type
|
||||
non_block_tensors = []
|
||||
block_tensors = {} # orig_layer_idx -> [(suffix, tensor), ...]
|
||||
|
||||
for tensor in reader.tensors:
|
||||
match = BLK_PATTERN.match(tensor.name)
|
||||
if match:
|
||||
layer_idx = int(match.group(1))
|
||||
suffix = match.group(2)
|
||||
if layer_idx not in block_tensors:
|
||||
block_tensors[layer_idx] = []
|
||||
block_tensors[layer_idx].append((suffix, tensor))
|
||||
else:
|
||||
non_block_tensors.append(tensor)
|
||||
|
||||
# Split non-block tensors into pre-block and post-block
|
||||
pre_block = []
|
||||
post_block = []
|
||||
for t in non_block_tensors:
|
||||
if 'output' in t.name:
|
||||
post_block.append(t)
|
||||
else:
|
||||
pre_block.append(t)
|
||||
|
||||
# Add tensor infos in order and build write queue
|
||||
total_bytes = 0
|
||||
block_write_order = [] # (new_name, original_tensor)
|
||||
|
||||
for tensor in pre_block:
|
||||
writer.add_tensor_info(
|
||||
tensor.name, tensor.data.shape, tensor.data.dtype,
|
||||
tensor.data.nbytes, tensor.tensor_type
|
||||
)
|
||||
total_bytes += tensor.n_bytes
|
||||
|
||||
for new_idx in range(new_block_count):
|
||||
orig_idx = layer_map[new_idx]
|
||||
if orig_idx not in block_tensors:
|
||||
print(f"WARNING: No tensors for original layer {orig_idx}", file=sys.stderr)
|
||||
continue
|
||||
for suffix, tensor in block_tensors[orig_idx]:
|
||||
new_name = f"blk.{new_idx}.{suffix}"
|
||||
writer.add_tensor_info(
|
||||
new_name, tensor.data.shape, tensor.data.dtype,
|
||||
tensor.data.nbytes, tensor.tensor_type
|
||||
)
|
||||
total_bytes += tensor.n_bytes
|
||||
block_write_order.append((new_name, tensor))
|
||||
|
||||
for tensor in post_block:
|
||||
writer.add_tensor_info(
|
||||
tensor.name, tensor.data.shape, tensor.data.dtype,
|
||||
tensor.data.nbytes, tensor.tensor_type
|
||||
)
|
||||
total_bytes += tensor.n_bytes
|
||||
|
||||
# Write file
|
||||
writer.write_header_to_file()
|
||||
writer.write_kv_data_to_file()
|
||||
writer.write_ti_data_to_file()
|
||||
|
||||
bar = tqdm(desc="Writing GGUF", total=total_bytes, unit="B", unit_scale=True)
|
||||
|
||||
for tensor in pre_block:
|
||||
writer.write_tensor_data(tensor.data)
|
||||
bar.update(tensor.n_bytes)
|
||||
|
||||
for _, tensor in block_write_order:
|
||||
writer.write_tensor_data(tensor.data)
|
||||
bar.update(tensor.n_bytes)
|
||||
|
||||
for tensor in post_block:
|
||||
writer.write_tensor_data(tensor.data)
|
||||
bar.update(tensor.n_bytes)
|
||||
|
||||
bar.close()
|
||||
writer.close()
|
||||
|
||||
if verbose:
|
||||
print(f"Done. Written to {output_path}")
|
||||
out_size = Path(output_path).stat().st_size / (1024**3)
|
||||
print(f"Output size: {out_size:.2f} GiB")
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Duplicate layers in a GGUF model (RYS method)"
|
||||
)
|
||||
parser.add_argument("input", help="Input GGUF file path")
|
||||
parser.add_argument("output", help="Output GGUF file path")
|
||||
parser.add_argument("-i", "--dup-start", type=int, required=True,
|
||||
help="First layer to duplicate (inclusive)")
|
||||
parser.add_argument("-j", "--dup-end", type=int, required=True,
|
||||
help="Last layer to duplicate (exclusive)")
|
||||
parser.add_argument("-v", "--verbose", action="store_true")
|
||||
args = parser.parse_args()
|
||||
|
||||
duplicate_layers(args.input, args.output, args.dup_start, args.dup_end, args.verbose)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
248
layer_path.py
Normal file
248
layer_path.py
Normal file
|
|
@ -0,0 +1,248 @@
|
|||
#!/usr/bin/env python3
|
||||
"""
|
||||
Explicit Layer Path GGUF Surgery
|
||||
|
||||
You provide the exact sequence of layers the model should execute.
|
||||
No ambiguous range notation — just list the layers.
|
||||
|
||||
Examples:
|
||||
# Normal 40-layer model (identity, for testing)
|
||||
python layer_path.py model.gguf out.gguf -p 0,1,2,...,39
|
||||
|
||||
# Duplicate layers 13-16 once (same as RYS with i=13,j=17)
|
||||
python layer_path.py model.gguf out.gguf -p 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,13,14,15,16,17,18,...,39
|
||||
|
||||
# Repeat layer 13 four times
|
||||
python layer_path.py model.gguf out.gguf -p 0,1,...,12,13,13,13,13,14,15,...,39
|
||||
|
||||
# Triple-pass layers 13-16
|
||||
python layer_path.py model.gguf out.gguf -p 0,1,...,16,13,14,15,16,13,14,15,16,17,...,39
|
||||
|
||||
# Shorthand: use .. to fill in sequential ranges
|
||||
python layer_path.py model.gguf out.gguf -p 0..16,13,14,15,16,13,14,15,16,17..39
|
||||
|
||||
Usage:
|
||||
python layer_path.py input.gguf output.gguf -p "0..16,13,14,15,16,17..39" -v
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import re
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
import numpy as np
|
||||
from tqdm import tqdm
|
||||
|
||||
import gguf
|
||||
from gguf import GGUFReader, GGUFWriter, GGUFValueType
|
||||
|
||||
|
||||
BLK_PATTERN = re.compile(r'^blk\.(\d+)\.(.+)$')
|
||||
|
||||
|
||||
def get_field_value(reader, key):
|
||||
field = reader.get_field(key)
|
||||
if field is None:
|
||||
return None
|
||||
return field.contents()
|
||||
|
||||
|
||||
def parse_layer_path(path_str: str) -> list[int]:
|
||||
"""
|
||||
Parse a layer path string into a list of layer indices.
|
||||
|
||||
Supports:
|
||||
- Individual numbers: 0,1,2,13,13,14
|
||||
- Ranges with ..: 0..16 expands to 0,1,2,...,16 (inclusive)
|
||||
- Mixed: 0..12,13,13,13,14..39
|
||||
|
||||
Whitespace is ignored.
|
||||
"""
|
||||
path_str = path_str.replace(' ', '')
|
||||
layers = []
|
||||
|
||||
for part in path_str.split(','):
|
||||
part = part.strip()
|
||||
if not part:
|
||||
continue
|
||||
|
||||
if '..' in part:
|
||||
# Range: start..end (inclusive)
|
||||
pieces = part.split('..')
|
||||
if len(pieces) != 2:
|
||||
raise ValueError(f"Invalid range: '{part}'. Use 'start..end'")
|
||||
start = int(pieces[0])
|
||||
end = int(pieces[1])
|
||||
if start > end:
|
||||
raise ValueError(f"Invalid range: {start}..{end} (start > end)")
|
||||
layers.extend(range(start, end + 1))
|
||||
else:
|
||||
layers.append(int(part))
|
||||
|
||||
return layers
|
||||
|
||||
|
||||
def build_gguf_from_path(input_path: str, output_path: str,
|
||||
layer_path: list[int], verbose: bool = False):
|
||||
"""
|
||||
Create a new GGUF where the forward pass follows the given layer path.
|
||||
"""
|
||||
reader = GGUFReader(input_path, 'r')
|
||||
|
||||
arch = get_field_value(reader, gguf.Keys.General.ARCHITECTURE)
|
||||
block_count_key = f'{arch}.block_count'
|
||||
orig_block_count = get_field_value(reader, block_count_key)
|
||||
|
||||
# Validate all layer indices
|
||||
for idx in layer_path:
|
||||
if idx < 0 or idx >= orig_block_count:
|
||||
raise ValueError(
|
||||
f"Layer {idx} out of range (model has {orig_block_count} layers, 0..{orig_block_count-1})"
|
||||
)
|
||||
|
||||
new_block_count = len(layer_path)
|
||||
|
||||
if verbose:
|
||||
print(f"Architecture: {arch}")
|
||||
print(f"Original layers: {orig_block_count}")
|
||||
print(f"New layer count: {new_block_count}")
|
||||
print(f"Layer path: {layer_path}")
|
||||
|
||||
# Show which layers are repeated
|
||||
from collections import Counter
|
||||
counts = Counter(layer_path)
|
||||
repeated = {k: v for k, v in counts.items() if v > 1}
|
||||
if repeated:
|
||||
print(f"Repeated layers: {dict(sorted(repeated.items()))}")
|
||||
else:
|
||||
print("No repeated layers (just a reorder)")
|
||||
|
||||
# layer_map: new_position -> original_layer_index
|
||||
layer_map = {new_idx: orig_idx for new_idx, orig_idx in enumerate(layer_path)}
|
||||
|
||||
# Create writer
|
||||
writer = GGUFWriter(output_path, arch=arch, endianess=reader.endianess)
|
||||
|
||||
alignment = get_field_value(reader, gguf.Keys.General.ALIGNMENT)
|
||||
if alignment is not None:
|
||||
writer.data_alignment = alignment
|
||||
|
||||
# Copy metadata, override block count
|
||||
for field in reader.fields.values():
|
||||
if field.name == gguf.Keys.General.ARCHITECTURE or field.name.startswith('GGUF.'):
|
||||
continue
|
||||
val_type = field.types[0]
|
||||
sub_type = field.types[-1] if val_type == GGUFValueType.ARRAY else None
|
||||
if field.name == block_count_key:
|
||||
writer.add_key_value(field.name, new_block_count, val_type)
|
||||
else:
|
||||
val = field.contents()
|
||||
if val is not None:
|
||||
writer.add_key_value(field.name, val, val_type, sub_type=sub_type)
|
||||
|
||||
# Organize tensors by layer
|
||||
non_block_tensors = []
|
||||
block_tensors = {}
|
||||
|
||||
for tensor in reader.tensors:
|
||||
match = BLK_PATTERN.match(tensor.name)
|
||||
if match:
|
||||
layer_idx = int(match.group(1))
|
||||
suffix = match.group(2)
|
||||
if layer_idx not in block_tensors:
|
||||
block_tensors[layer_idx] = []
|
||||
block_tensors[layer_idx].append((suffix, tensor))
|
||||
else:
|
||||
non_block_tensors.append(tensor)
|
||||
|
||||
pre_block = [t for t in non_block_tensors if 'output' not in t.name]
|
||||
post_block = [t for t in non_block_tensors if 'output' in t.name]
|
||||
|
||||
# Add tensor infos and build write order
|
||||
total_bytes = 0
|
||||
block_write_order = []
|
||||
|
||||
for tensor in pre_block:
|
||||
writer.add_tensor_info(tensor.name, tensor.data.shape, tensor.data.dtype,
|
||||
tensor.data.nbytes, tensor.tensor_type)
|
||||
total_bytes += tensor.n_bytes
|
||||
|
||||
for new_idx in range(new_block_count):
|
||||
orig_idx = layer_map[new_idx]
|
||||
if orig_idx not in block_tensors:
|
||||
raise ValueError(f"No tensors found for original layer {orig_idx}")
|
||||
for suffix, tensor in block_tensors[orig_idx]:
|
||||
new_name = f"blk.{new_idx}.{suffix}"
|
||||
writer.add_tensor_info(new_name, tensor.data.shape, tensor.data.dtype,
|
||||
tensor.data.nbytes, tensor.tensor_type)
|
||||
total_bytes += tensor.n_bytes
|
||||
block_write_order.append(tensor)
|
||||
|
||||
for tensor in post_block:
|
||||
writer.add_tensor_info(tensor.name, tensor.data.shape, tensor.data.dtype,
|
||||
tensor.data.nbytes, tensor.tensor_type)
|
||||
total_bytes += tensor.n_bytes
|
||||
|
||||
# Write
|
||||
writer.write_header_to_file()
|
||||
writer.write_kv_data_to_file()
|
||||
writer.write_ti_data_to_file()
|
||||
|
||||
bar = tqdm(desc="Writing GGUF", total=total_bytes, unit="B", unit_scale=True)
|
||||
|
||||
for tensor in pre_block:
|
||||
writer.write_tensor_data(tensor.data)
|
||||
bar.update(tensor.n_bytes)
|
||||
|
||||
for tensor in block_write_order:
|
||||
writer.write_tensor_data(tensor.data)
|
||||
bar.update(tensor.n_bytes)
|
||||
|
||||
for tensor in post_block:
|
||||
writer.write_tensor_data(tensor.data)
|
||||
bar.update(tensor.n_bytes)
|
||||
|
||||
bar.close()
|
||||
writer.close()
|
||||
|
||||
if verbose:
|
||||
out_size = Path(output_path).stat().st_size / (1024**3)
|
||||
print(f"Done. Output: {out_size:.2f} GiB")
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Build GGUF with explicit layer execution path",
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
epilog="""
|
||||
Examples:
|
||||
# Duplicate layers 13-16 once (RYS style)
|
||||
%(prog)s model.gguf out.gguf -p "0..16,13,14,15,16,17..39"
|
||||
|
||||
# Triple-pass layers 13-16
|
||||
%(prog)s model.gguf out.gguf -p "0..16,13,14,15,16,13,14,15,16,17..39"
|
||||
|
||||
# Repeat just layer 13 four times
|
||||
%(prog)s model.gguf out.gguf -p "0..12,13,13,13,13,14..39"
|
||||
|
||||
# Skip layer 5 entirely
|
||||
%(prog)s model.gguf out.gguf -p "0..4,6..39"
|
||||
"""
|
||||
)
|
||||
parser.add_argument("input", help="Input GGUF file")
|
||||
parser.add_argument("output", help="Output GGUF file")
|
||||
parser.add_argument("-p", "--path", required=True,
|
||||
help="Layer execution path (e.g. '0..16,13,14,15,16,17..39')")
|
||||
parser.add_argument("-v", "--verbose", action="store_true")
|
||||
args = parser.parse_args()
|
||||
|
||||
layer_path = parse_layer_path(args.path)
|
||||
print(f"Model: {args.input}")
|
||||
print(f"Output: {args.output}")
|
||||
print(f"Layer path ({len(layer_path)} layers): {layer_path}")
|
||||
|
||||
build_gguf_from_path(args.input, args.output, layer_path, args.verbose)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
155
math_probe.py
Normal file
155
math_probe.py
Normal file
|
|
@ -0,0 +1,155 @@
|
|||
#!/usr/bin/env python3
|
||||
"""
|
||||
Math Probe for RYS Layer Duplication Sweep
|
||||
|
||||
Hard arithmetic questions where the model must guess the answer
|
||||
without chain-of-thought. Scored with Ng's partial-credit function.
|
||||
"""
|
||||
|
||||
import json
|
||||
import math
|
||||
import random
|
||||
|
||||
|
||||
def calculate_score(actual, estimate):
|
||||
"""
|
||||
Ng's partial-credit scoring function.
|
||||
Pads shorter answers, penalizes proportionally.
|
||||
Returns 0-1 score.
|
||||
"""
|
||||
try:
|
||||
actual_str = str(int(actual))
|
||||
estimate_str = str(int(estimate))
|
||||
except (ValueError, OverflowError):
|
||||
return 0.0
|
||||
|
||||
max_length = max(len(actual_str), len(estimate_str))
|
||||
actual_padded = actual_str.ljust(max_length, "0")
|
||||
estimate_padded = estimate_str.ljust(max_length, "0")
|
||||
padding_size = max_length - min(len(actual_str), len(estimate_str))
|
||||
|
||||
actual_int = int(actual_padded)
|
||||
estimate_int = int(estimate_padded)
|
||||
|
||||
if max(actual_int, estimate_int) == 0:
|
||||
return 0.0
|
||||
|
||||
relative_diff = abs(actual_int - estimate_int) / max(actual_int, estimate_int)
|
||||
correction_factor = 1 - (padding_size / max_length)
|
||||
score = (1 - relative_diff) * correction_factor
|
||||
|
||||
return max(0.0, min(score, 1.0))
|
||||
|
||||
|
||||
def generate_math_questions(seed=42):
|
||||
"""
|
||||
Generate hard arithmetic questions with known answers.
|
||||
Mix of operations to test different numeric intuition.
|
||||
Returns list of (question_text, correct_answer) tuples.
|
||||
"""
|
||||
rng = random.Random(seed)
|
||||
questions = []
|
||||
|
||||
# Cube roots of large numbers (compute perfect cubes, ask for root)
|
||||
for _ in range(4):
|
||||
root = rng.randint(20000, 50000)
|
||||
cube = root ** 3
|
||||
questions.append((
|
||||
f"What is the cube root of {cube}? "
|
||||
f"Answer with just the number, no explanation.",
|
||||
root
|
||||
))
|
||||
|
||||
# Large multiplications
|
||||
for _ in range(4):
|
||||
a = rng.randint(100000, 999999)
|
||||
b = rng.randint(100000, 999999)
|
||||
product = a * b
|
||||
questions.append((
|
||||
f"What is {a} multiplied by {b}? "
|
||||
f"Answer with just the number, no explanation.",
|
||||
product
|
||||
))
|
||||
|
||||
# Square roots of large numbers (perfect squares)
|
||||
for _ in range(4):
|
||||
root = rng.randint(50000, 200000)
|
||||
square = root ** 2
|
||||
questions.append((
|
||||
f"What is the square root of {square}? "
|
||||
f"Answer with just the number, no explanation.",
|
||||
root
|
||||
))
|
||||
|
||||
# Mixed: cube root multiplied by a number
|
||||
for _ in range(4):
|
||||
root = rng.randint(100, 999)
|
||||
cube = root ** 3
|
||||
multiplier = rng.randint(10, 99)
|
||||
answer = root * multiplier
|
||||
questions.append((
|
||||
f"What is the cube root of {cube}, multiplied by {multiplier}? "
|
||||
f"Answer with just the number, no explanation.",
|
||||
answer
|
||||
))
|
||||
|
||||
return questions
|
||||
|
||||
|
||||
def parse_number_from_response(response: str) -> int | None:
|
||||
"""
|
||||
Extract the first integer from a model response.
|
||||
Handles common LLM quirks: commas in numbers, trailing text, etc.
|
||||
"""
|
||||
import re
|
||||
|
||||
# Clean up common formatting
|
||||
text = response.strip()
|
||||
|
||||
# Try to find a number (possibly with commas)
|
||||
# Match negative or positive integers, possibly with commas
|
||||
patterns = [
|
||||
r'[-+]?[\d,]+', # numbers with optional commas
|
||||
]
|
||||
|
||||
for pattern in patterns:
|
||||
matches = re.findall(pattern, text)
|
||||
if matches:
|
||||
# Take the first/longest match
|
||||
num_str = max(matches, key=len)
|
||||
num_str = num_str.replace(',', '')
|
||||
try:
|
||||
return int(num_str)
|
||||
except ValueError:
|
||||
continue
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def score_math_response(question_answer: int, response: str) -> float:
|
||||
"""Score a single math response."""
|
||||
parsed = parse_number_from_response(response)
|
||||
if parsed is None:
|
||||
return 0.0
|
||||
return calculate_score(question_answer, parsed)
|
||||
|
||||
|
||||
# Pre-generated questions for consistency across runs
|
||||
MATH_QUESTIONS = generate_math_questions(seed=42)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Print the questions and answers for verification
|
||||
print("Math Probe Questions:")
|
||||
print("=" * 60)
|
||||
for i, (q, a) in enumerate(MATH_QUESTIONS):
|
||||
print(f"\n[{i+1}] {q}")
|
||||
print(f" Answer: {a}")
|
||||
|
||||
# Test the scoring function
|
||||
print("\n\nScoring tests:")
|
||||
print(f" Exact match: {calculate_score(4302459, 4302459):.4f}")
|
||||
print(f" Missing digit: {calculate_score(4302459, 430245):.4f}")
|
||||
print(f" One digit off: {calculate_score(123456789, 123356789):.4f}")
|
||||
print(f" Way off: {calculate_score(4302459, 9999999):.4f}")
|
||||
print(f" Zero vs nonzero: {calculate_score(4302459, 0):.4f}")
|
||||
198
multi_repeat.py
Normal file
198
multi_repeat.py
Normal file
|
|
@ -0,0 +1,198 @@
|
|||
#!/usr/bin/env python3
|
||||
"""
|
||||
Multi-repeat GGUF surgery.
|
||||
|
||||
Duplicates a block of layers N times total (default 3 = original + 2 copies).
|
||||
|
||||
For (i=13, j=17, repeats=3) on a 40-layer model:
|
||||
0..16, 13..16, 13..16, 17..39 = 48 layers total
|
||||
The block 13-16 executes 3 times.
|
||||
|
||||
Usage:
|
||||
python multi_repeat.py \
|
||||
/path/to/model.gguf \
|
||||
/dev/shm/rys/triple_13_17.gguf \
|
||||
-i 13 -j 17 -n 3 -v
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import re
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
import numpy as np
|
||||
from tqdm import tqdm
|
||||
|
||||
import gguf
|
||||
from gguf import GGUFReader, GGUFWriter, GGUFValueType
|
||||
|
||||
BLK_PATTERN = re.compile(r'^blk\.(\d+)\.(.+)$')
|
||||
|
||||
|
||||
def get_field_value(reader, key):
|
||||
field = reader.get_field(key)
|
||||
if field is None:
|
||||
return None
|
||||
return field.contents()
|
||||
|
||||
|
||||
def multi_repeat_layers(input_path, output_path, dup_start, dup_end, n_repeats, verbose=False):
|
||||
reader = GGUFReader(input_path, 'r')
|
||||
|
||||
arch = get_field_value(reader, gguf.Keys.General.ARCHITECTURE)
|
||||
block_count_key = f'{arch}.block_count'
|
||||
orig_block_count = get_field_value(reader, block_count_key)
|
||||
|
||||
n_block = dup_end - dup_start
|
||||
extra_copies = n_repeats - 1 # original pass counts as 1
|
||||
new_block_count = orig_block_count + (n_block * extra_copies)
|
||||
|
||||
if verbose:
|
||||
print(f"Architecture: {arch}")
|
||||
print(f"Original layers: {orig_block_count}")
|
||||
print(f"Block: layers {dup_start}..{dup_end - 1} ({n_block} layers)")
|
||||
print(f"Repeats: {n_repeats}x (original + {extra_copies} copies)")
|
||||
print(f"New layer count: {new_block_count}")
|
||||
|
||||
# Build layer map
|
||||
# Phase 1: original 0..dup_end-1
|
||||
# Phase 2..N: copies of dup_start..dup_end-1
|
||||
# Phase last: original dup_end..orig-1 (shifted)
|
||||
layer_map = {}
|
||||
|
||||
# Phase 1: original layers up to dup_end
|
||||
for idx in range(dup_end):
|
||||
layer_map[idx] = idx
|
||||
|
||||
# Phase 2+: extra copies
|
||||
offset = dup_end
|
||||
for copy in range(extra_copies):
|
||||
for k in range(n_block):
|
||||
layer_map[offset + k] = dup_start + k
|
||||
offset += n_block
|
||||
|
||||
# Phase last: remaining original layers shifted
|
||||
for orig_idx in range(dup_end, orig_block_count):
|
||||
layer_map[orig_idx + (n_block * extra_copies)] = orig_idx
|
||||
|
||||
assert len(layer_map) == new_block_count
|
||||
|
||||
if verbose:
|
||||
path = [layer_map[i] for i in range(new_block_count)]
|
||||
print(f"Execution path ({len(path)} layers):")
|
||||
i = 0
|
||||
while i < len(path):
|
||||
run_start = path[i]
|
||||
run_end = run_start
|
||||
j = i + 1
|
||||
while j < len(path) and path[j] == run_end + 1:
|
||||
run_end = path[j]
|
||||
j += 1
|
||||
if run_start == run_end:
|
||||
print(f" [{run_start}]")
|
||||
else:
|
||||
print(f" [{run_start}..{run_end}]")
|
||||
i = j
|
||||
|
||||
# Create writer
|
||||
writer = GGUFWriter(output_path, arch=arch, endianess=reader.endianess)
|
||||
|
||||
alignment = get_field_value(reader, gguf.Keys.General.ALIGNMENT)
|
||||
if alignment is not None:
|
||||
writer.data_alignment = alignment
|
||||
|
||||
# Copy metadata
|
||||
for field in reader.fields.values():
|
||||
if field.name == gguf.Keys.General.ARCHITECTURE or field.name.startswith('GGUF.'):
|
||||
continue
|
||||
val_type = field.types[0]
|
||||
sub_type = field.types[-1] if val_type == GGUFValueType.ARRAY else None
|
||||
if field.name == block_count_key:
|
||||
writer.add_key_value(field.name, new_block_count, val_type)
|
||||
else:
|
||||
val = field.contents()
|
||||
if val is not None:
|
||||
writer.add_key_value(field.name, val, val_type, sub_type=sub_type)
|
||||
|
||||
# Organize tensors
|
||||
non_block_tensors = []
|
||||
block_tensors = {}
|
||||
for tensor in reader.tensors:
|
||||
match = BLK_PATTERN.match(tensor.name)
|
||||
if match:
|
||||
layer_idx = int(match.group(1))
|
||||
suffix = match.group(2)
|
||||
if layer_idx not in block_tensors:
|
||||
block_tensors[layer_idx] = []
|
||||
block_tensors[layer_idx].append((suffix, tensor))
|
||||
else:
|
||||
non_block_tensors.append(tensor)
|
||||
|
||||
pre_block = [t for t in non_block_tensors if 'output' not in t.name]
|
||||
post_block = [t for t in non_block_tensors if 'output' in t.name]
|
||||
|
||||
total_bytes = 0
|
||||
block_write_order = []
|
||||
|
||||
for tensor in pre_block:
|
||||
writer.add_tensor_info(tensor.name, tensor.data.shape, tensor.data.dtype,
|
||||
tensor.data.nbytes, tensor.tensor_type)
|
||||
total_bytes += tensor.n_bytes
|
||||
|
||||
for new_idx in range(new_block_count):
|
||||
orig_idx = layer_map[new_idx]
|
||||
for suffix, tensor in block_tensors[orig_idx]:
|
||||
new_name = f"blk.{new_idx}.{suffix}"
|
||||
writer.add_tensor_info(new_name, tensor.data.shape, tensor.data.dtype,
|
||||
tensor.data.nbytes, tensor.tensor_type)
|
||||
total_bytes += tensor.n_bytes
|
||||
block_write_order.append(tensor)
|
||||
|
||||
for tensor in post_block:
|
||||
writer.add_tensor_info(tensor.name, tensor.data.shape, tensor.data.dtype,
|
||||
tensor.data.nbytes, tensor.tensor_type)
|
||||
total_bytes += tensor.n_bytes
|
||||
|
||||
# Write
|
||||
writer.write_header_to_file()
|
||||
writer.write_kv_data_to_file()
|
||||
writer.write_ti_data_to_file()
|
||||
|
||||
bar = tqdm(desc="Writing GGUF", total=total_bytes, unit="B", unit_scale=True)
|
||||
|
||||
for tensor in pre_block:
|
||||
writer.write_tensor_data(tensor.data)
|
||||
bar.update(tensor.n_bytes)
|
||||
for tensor in block_write_order:
|
||||
writer.write_tensor_data(tensor.data)
|
||||
bar.update(tensor.n_bytes)
|
||||
for tensor in post_block:
|
||||
writer.write_tensor_data(tensor.data)
|
||||
bar.update(tensor.n_bytes)
|
||||
|
||||
bar.close()
|
||||
writer.close()
|
||||
|
||||
if verbose:
|
||||
size = Path(output_path).stat().st_size / (1024**3)
|
||||
print(f"Written: {output_path} ({size:.2f} GiB)")
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Multi-repeat layer duplication")
|
||||
parser.add_argument("input", help="Input GGUF")
|
||||
parser.add_argument("output", help="Output GGUF")
|
||||
parser.add_argument("-i", "--dup-start", type=int, required=True)
|
||||
parser.add_argument("-j", "--dup-end", type=int, required=True)
|
||||
parser.add_argument("-n", "--repeats", type=int, default=3,
|
||||
help="Total times the block executes (default: 3)")
|
||||
parser.add_argument("-v", "--verbose", action="store_true")
|
||||
args = parser.parse_args()
|
||||
|
||||
multi_repeat_layers(args.input, args.output,
|
||||
args.dup_start, args.dup_end,
|
||||
args.repeats, args.verbose)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
282
reasoning_probe.py
Normal file
282
reasoning_probe.py
Normal file
|
|
@ -0,0 +1,282 @@
|
|||
#!/usr/bin/env python3
|
||||
"""
|
||||
Reasoning Probe for RYS Layer Duplication Sweep
|
||||
|
||||
BBH-style questions: causal judgement, date understanding, logical
|
||||
deduction, navigation, plus basic math word problems.
|
||||
All answers are short (Yes/No, a letter, or a number).
|
||||
"""
|
||||
|
||||
import re
|
||||
|
||||
|
||||
# ─── Causal Judgement (Yes/No) ─────────────────────────────────────
|
||||
|
||||
CAUSAL_QUESTIONS = [
|
||||
{
|
||||
"prompt": (
|
||||
"How would a typical person answer each of the following questions about causation?\n"
|
||||
"Frank T., had an ongoing dispute with his neighbor over a stretch of land and one day "
|
||||
"decided to shoot his neighbor in the body. Frank T. had no experience with guns, his "
|
||||
"hand slipped on the barrel of the gun, and the shot went wild. Nonetheless, the bullet "
|
||||
"bounced off a large boulder several feet away and hit the neighbor's body, causing "
|
||||
"significant injury. Did Frank T. intentionally shoot his neighbor in the body?\n"
|
||||
"Options:\n- Yes\n- No\n\n"
|
||||
"Think step by step, then answer Yes or No."
|
||||
),
|
||||
"answer": "no",
|
||||
},
|
||||
{
|
||||
"prompt": (
|
||||
"How would a typical person answer each of the following questions about causation?\n"
|
||||
"Suzy and Billy are working on a project that is very important for our nation's security. "
|
||||
"The boss tells them both: \"Be sure that you are here at exactly 9 am. It is absolutely "
|
||||
"essential that you arrive at that time.\" Both Billy and Suzy arrive at 9 am. As it happens, "
|
||||
"there was a motion detector installed in the room where they arrived. The motion detector "
|
||||
"was set up to be triggered if at least one person appeared in the room at the same time. "
|
||||
"So the motion detector went off. Did Billy cause the motion detector to go off?\n"
|
||||
"Options:\n- Yes\n- No\n\n"
|
||||
"Think step by step, then answer Yes or No."
|
||||
),
|
||||
"answer": "yes",
|
||||
},
|
||||
{
|
||||
"prompt": (
|
||||
"How would a typical person answer each of the following questions about causation?\n"
|
||||
"George and his sister Lena reunite at their parents' house for Thanksgiving. Whereas "
|
||||
"George just got into medical school, Lena is unhappy in her marriage and recently lost "
|
||||
"her job. Over the course of the day, George and Lena get into a number of heated arguments. "
|
||||
"Later in the afternoon they play a game of darts. They split the first two games, and the "
|
||||
"third game is close until the end. Who will win comes down to George's last shot. If he "
|
||||
"hits a high point region, he wins; if he hits a low point region, Lena wins. George thinks "
|
||||
"of the difficult time Lena is having, and he really wants to let her win. He aims the dart "
|
||||
"at the low point region. He sets up his shot and the dart lands in the low point region. "
|
||||
"After his shot, Lena wins the game and is very happy. Did George hit the low point region "
|
||||
"intentionally?\n"
|
||||
"Options:\n- Yes\n- No\n\n"
|
||||
"Think step by step, then answer Yes or No."
|
||||
),
|
||||
"answer": "yes",
|
||||
},
|
||||
]
|
||||
|
||||
# ─── Date Understanding (multiple choice letter) ──────────────────
|
||||
|
||||
DATE_QUESTIONS = [
|
||||
{
|
||||
"prompt": (
|
||||
"Today is Christmas Eve of 1937. What is the date 10 days ago in MM/DD/YYYY?\n"
|
||||
"Options:\n"
|
||||
"(A) 12/14/2026\n(B) 12/14/1950\n(C) 12/14/2007\n"
|
||||
"(D) 12/14/1937\n(E) 07/14/1938\n(F) 12/14/1988\n\n"
|
||||
"Think step by step, then give your answer as a single letter."
|
||||
),
|
||||
"answer": "d",
|
||||
},
|
||||
{
|
||||
"prompt": (
|
||||
"Tomorrow is 11/12/2019. What is the date one year ago from today in MM/DD/YYYY?\n"
|
||||
"Options:\n"
|
||||
"(A) 09/04/2018\n(B) 11/11/2018\n(C) 08/25/2018\n"
|
||||
"(D) 11/02/2018\n(E) 11/04/2018\n\n"
|
||||
"Think step by step, then give your answer as a single letter."
|
||||
),
|
||||
"answer": "b",
|
||||
},
|
||||
{
|
||||
"prompt": (
|
||||
"Jane and John married on Jan 2, 1958. It is their 5-year anniversary today. "
|
||||
"What is the date tomorrow in MM/DD/YYYY?\n"
|
||||
"Options:\n"
|
||||
"(A) 01/11/1961\n(B) 01/03/1963\n(C) 01/18/1961\n"
|
||||
"(D) 10/14/1960\n(E) 01/03/1982\n(F) 12/03/1960\n\n"
|
||||
"Think step by step, then give your answer as a single letter."
|
||||
),
|
||||
"answer": "b",
|
||||
},
|
||||
]
|
||||
|
||||
# ─── Logical Deduction (multiple choice letter) ───────────────────
|
||||
|
||||
LOGIC_QUESTIONS = [
|
||||
{
|
||||
"prompt": (
|
||||
"The following paragraphs each describe a set of three objects arranged in a fixed order. "
|
||||
"The statements are logically consistent within each paragraph.\n"
|
||||
"In a golf tournament, there were three golfers: Amy, Eli, and Eve. "
|
||||
"Eve finished above Amy. Eli finished below Amy.\n"
|
||||
"Options:\n"
|
||||
"(A) Amy finished last\n(B) Eli finished last\n(C) Eve finished last\n\n"
|
||||
"Think step by step, then give your answer as a single letter."
|
||||
),
|
||||
"answer": "b",
|
||||
},
|
||||
{
|
||||
"prompt": (
|
||||
"The following paragraphs each describe a set of three objects arranged in a fixed order. "
|
||||
"The statements are logically consistent within each paragraph.\n"
|
||||
"On a shelf, there are three books: a white book, a green book, and an orange book. "
|
||||
"The green book is to the right of the white book. The orange book is the rightmost.\n"
|
||||
"Options:\n"
|
||||
"(A) The white book is the leftmost\n(B) The green book is the leftmost\n"
|
||||
"(C) The orange book is the leftmost\n\n"
|
||||
"Think step by step, then give your answer as a single letter."
|
||||
),
|
||||
"answer": "a",
|
||||
},
|
||||
{
|
||||
"prompt": (
|
||||
"The following paragraphs each describe a set of three objects arranged in a fixed order. "
|
||||
"The statements are logically consistent within each paragraph.\n"
|
||||
"On a shelf, there are three books: a red book, a gray book, and a white book. "
|
||||
"The white book is to the left of the gray book. The red book is the second from the left.\n"
|
||||
"Options:\n"
|
||||
"(A) The red book is the leftmost\n(B) The gray book is the leftmost\n"
|
||||
"(C) The white book is the leftmost\n\n"
|
||||
"Think step by step, then give your answer as a single letter."
|
||||
),
|
||||
"answer": "c",
|
||||
},
|
||||
]
|
||||
|
||||
# ─── Navigation (Yes/No) ──────────────────────────────────────────
|
||||
|
||||
NAV_QUESTIONS = [
|
||||
{
|
||||
"prompt": (
|
||||
"If you follow these instructions, do you return to the starting point? "
|
||||
"Turn left. Turn around. Turn left. Take 7 steps. Take 2 steps. Take 4 steps. Take 8 steps.\n"
|
||||
"Options:\n- Yes\n- No\n\n"
|
||||
"Think step by step, then answer Yes or No."
|
||||
),
|
||||
"answer": "no",
|
||||
},
|
||||
{
|
||||
"prompt": (
|
||||
"If you follow these instructions, do you return to the starting point? "
|
||||
"Turn around. Take 1 step. Take 6 steps. Turn around. Take 6 steps. Take 9 steps. Take 1 step.\n"
|
||||
"Options:\n- Yes\n- No\n\n"
|
||||
"Think step by step, then answer Yes or No."
|
||||
),
|
||||
"answer": "no",
|
||||
},
|
||||
{
|
||||
"prompt": (
|
||||
"If you follow these instructions, do you return to the starting point? "
|
||||
"Always face forward. Take 2 steps right. Take 9 steps left. Take 7 steps right.\n"
|
||||
"Options:\n- Yes\n- No\n\n"
|
||||
"Think step by step, then answer Yes or No."
|
||||
),
|
||||
"answer": "yes",
|
||||
},
|
||||
]
|
||||
|
||||
# ─── GSM8K Word Problems (number answer) ──────────────────────────
|
||||
|
||||
GSM_QUESTIONS = [
|
||||
{
|
||||
"prompt": (
|
||||
"There are 15 trees in the grove. Grove workers will plant trees in the grove today. "
|
||||
"After they are done, there will be 21 trees. How many trees did the grove workers plant today?\n\n"
|
||||
"Solve step by step. End with 'The answer is [NUMBER]'."
|
||||
),
|
||||
"answer": "6",
|
||||
},
|
||||
{
|
||||
"prompt": (
|
||||
"If there are 3 cars in the parking lot and 2 more cars arrive, how many cars are in the parking lot?\n\n"
|
||||
"Solve step by step. End with 'The answer is [NUMBER]'."
|
||||
),
|
||||
"answer": "5",
|
||||
},
|
||||
{
|
||||
"prompt": (
|
||||
"Leah had 32 chocolates and her sister had 42. If they ate 35, how many pieces do they have left in total?\n\n"
|
||||
"Solve step by step. End with 'The answer is [NUMBER]'."
|
||||
),
|
||||
"answer": "39",
|
||||
},
|
||||
{
|
||||
"prompt": (
|
||||
"Jason had 20 lollipops. He gave Denny some lollipops. Now Jason has 12 lollipops. "
|
||||
"How many lollipops did Jason give to Denny?\n\n"
|
||||
"Solve step by step. End with 'The answer is [NUMBER]'."
|
||||
),
|
||||
"answer": "8",
|
||||
},
|
||||
{
|
||||
"prompt": (
|
||||
"Shawn has five toys. For Christmas, he got two toys each from his mom and dad. "
|
||||
"How many toys does he have now?\n\n"
|
||||
"Solve step by step. End with 'The answer is [NUMBER]'."
|
||||
),
|
||||
"answer": "9",
|
||||
},
|
||||
]
|
||||
|
||||
# ─── All questions with types ─────────────────────────────────────
|
||||
|
||||
ALL_REASONING = []
|
||||
for q in CAUSAL_QUESTIONS:
|
||||
ALL_REASONING.append({**q, "type": "causal"})
|
||||
for q in DATE_QUESTIONS:
|
||||
ALL_REASONING.append({**q, "type": "date"})
|
||||
for q in LOGIC_QUESTIONS:
|
||||
ALL_REASONING.append({**q, "type": "logic"})
|
||||
for q in NAV_QUESTIONS:
|
||||
ALL_REASONING.append({**q, "type": "nav"})
|
||||
for q in GSM_QUESTIONS:
|
||||
ALL_REASONING.append({**q, "type": "gsm"})
|
||||
|
||||
# Alias for sweep.py import
|
||||
REASONING_QUESTIONS = ALL_REASONING
|
||||
|
||||
|
||||
def extract_final_answer(response: str) -> str:
|
||||
"""Extract the final answer from a CoT response."""
|
||||
match = re.search(r'the answer is\s+(.+?)[\.\!\n\r]', response, re.IGNORECASE)
|
||||
if match:
|
||||
return match.group(1).strip()
|
||||
match = re.search(r'\b(yes|no)\b', response.split('\n')[-1], re.IGNORECASE)
|
||||
if match:
|
||||
return match.group(1).strip()
|
||||
match = re.search(r'\(?([A-F])\)?', response.split('\n')[-1])
|
||||
if match:
|
||||
return match.group(1).strip()
|
||||
return response.strip().split('\n')[-1].strip()
|
||||
|
||||
|
||||
def score_reasoning_response(question: dict, response: str) -> float:
|
||||
"""Score a reasoning question. Returns 0 or 1."""
|
||||
if response is None:
|
||||
return 0.0
|
||||
|
||||
final = extract_final_answer(response).lower().strip()
|
||||
correct = question["answer"].lower().strip()
|
||||
|
||||
final = re.sub(r'[^a-z0-9]', '', final)
|
||||
correct = re.sub(r'[^a-z0-9]', '', correct)
|
||||
|
||||
if final == correct:
|
||||
return 1.0
|
||||
|
||||
if correct in ("yes", "no"):
|
||||
last_line = response.strip().split('\n')[-1].lower()
|
||||
if correct in last_line and (correct == "yes") != ("no" in last_line):
|
||||
return 1.0
|
||||
|
||||
return 0.0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
print(f"Reasoning Probe: {len(ALL_REASONING)} questions")
|
||||
print(f" Causal: {len(CAUSAL_QUESTIONS)}")
|
||||
print(f" Date: {len(DATE_QUESTIONS)}")
|
||||
print(f" Logic: {len(LOGIC_QUESTIONS)}")
|
||||
print(f" Nav: {len(NAV_QUESTIONS)}")
|
||||
print(f" GSM: {len(GSM_QUESTIONS)}")
|
||||
|
||||
print("\nSample questions:")
|
||||
for q in ALL_REASONING[:2]:
|
||||
print(f"\n [{q['type']}] answer={q['answer']}")
|
||||
print(f" {q['prompt'][:80]}...")
|
||||
447
sweep.py
Normal file
447
sweep.py
Normal file
|
|
@ -0,0 +1,447 @@
|
|||
#!/usr/bin/env python3
|
||||
"""
|
||||
RYS Layer Duplication Sweep
|
||||
|
||||
Orchestrates the search for optimal layer duplication configuration:
|
||||
1. Generate modified GGUF with duplicated layers
|
||||
2. Start llama-server with the modified model
|
||||
3. Run math + EQ probes
|
||||
4. Score and record results
|
||||
5. Print live results table
|
||||
6. Kill server, repeat
|
||||
|
||||
Usage:
|
||||
python sweep.py \
|
||||
--model /path/to/model.gguf \
|
||||
--llama-server /path/to/llama-server \
|
||||
--tmpdir /dev/shm/rys \
|
||||
--results results.jsonl
|
||||
|
||||
The sweep strategy:
|
||||
Pass 1: 8-layer blocks at stride 4 across the middle
|
||||
Pass 2: Refine within the hot zone with smaller blocks
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import os
|
||||
import signal
|
||||
import subprocess
|
||||
import sys
|
||||
import time
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
|
||||
import requests
|
||||
|
||||
from gguf_surgery import duplicate_layers
|
||||
from math_probe import MATH_QUESTIONS, score_math_response
|
||||
from eq_probe import EQ_SCENARIOS, build_eq_prompt, parse_eq_response, score_eq_response
|
||||
from reasoning_probe import REASONING_QUESTIONS, score_reasoning_response
|
||||
|
||||
|
||||
# Server config
|
||||
DEFAULT_PORT = 8099
|
||||
SERVER_STARTUP_TIMEOUT = 120 # seconds
|
||||
REQUEST_TIMEOUT = 60 # seconds per completion
|
||||
|
||||
|
||||
def wait_for_server(port: int, timeout: int = SERVER_STARTUP_TIMEOUT) -> bool:
|
||||
"""Wait for llama-server to be ready."""
|
||||
url = f"http://127.0.0.1:{port}/health"
|
||||
start = time.time()
|
||||
while time.time() - start < timeout:
|
||||
try:
|
||||
r = requests.get(url, timeout=2)
|
||||
if r.status_code == 200:
|
||||
data = r.json()
|
||||
if data.get("status") == "ok":
|
||||
return True
|
||||
except (requests.ConnectionError, requests.Timeout):
|
||||
pass
|
||||
time.sleep(1)
|
||||
return False
|
||||
|
||||
|
||||
def start_server(llama_server_path: str, model_path: str, port: int,
|
||||
extra_args: list[str] = None) -> subprocess.Popen:
|
||||
"""Start llama-server and return the process handle."""
|
||||
cmd = [
|
||||
llama_server_path,
|
||||
"-m", model_path,
|
||||
"--port", str(port),
|
||||
"-c", "4096", # small context for probe eval
|
||||
"-ngl", "99", # offload all layers to GPU
|
||||
"--flash-attn", "on",
|
||||
"--cache-type-k", "q8_0",
|
||||
"--cache-type-v", "q8_0",
|
||||
"--no-warmup",
|
||||
"-np", "1", # single slot
|
||||
]
|
||||
if extra_args:
|
||||
cmd.extend(extra_args)
|
||||
|
||||
print(f" [CMD] {' '.join(cmd)}", flush=True)
|
||||
|
||||
# Let server output go to a log file so we can debug without pipe deadlocks
|
||||
log_path = Path(f"/tmp/rys_server_{port}.log")
|
||||
log_file = open(log_path, "w")
|
||||
proc = subprocess.Popen(cmd, stdout=log_file, stderr=subprocess.STDOUT)
|
||||
proc._log_file = log_file # keep reference so it doesn't get GC'd
|
||||
proc._log_path = log_path
|
||||
print(f" [PID] Server started as PID {proc.pid}, log: {log_path}", flush=True)
|
||||
return proc
|
||||
|
||||
|
||||
def stop_server(proc: subprocess.Popen):
|
||||
"""Gracefully stop the server."""
|
||||
if proc.poll() is None:
|
||||
proc.terminate()
|
||||
try:
|
||||
proc.wait(timeout=10)
|
||||
except subprocess.TimeoutExpired:
|
||||
proc.kill()
|
||||
proc.wait()
|
||||
# Close the log file
|
||||
if hasattr(proc, '_log_file'):
|
||||
proc._log_file.close()
|
||||
|
||||
|
||||
def dump_server_log(proc: subprocess.Popen, tail_lines: int = 30):
|
||||
"""Print the last N lines of the server log for debugging."""
|
||||
if hasattr(proc, '_log_path') and proc._log_path.exists():
|
||||
lines = proc._log_path.read_text().splitlines()
|
||||
print(f" --- Server log (last {tail_lines} lines) ---", file=sys.stderr)
|
||||
for line in lines[-tail_lines:]:
|
||||
print(f" | {line}", file=sys.stderr)
|
||||
print(f" --- End server log ---", file=sys.stderr)
|
||||
|
||||
|
||||
def query_model(prompt: str, port: int, max_tokens: int = 64) -> str | None:
|
||||
"""Send a completion request to llama-server."""
|
||||
url = f"http://127.0.0.1:{port}/v1/chat/completions"
|
||||
|
||||
payload = {
|
||||
"model": "test",
|
||||
"messages": [
|
||||
{"role": "user", "content": prompt}
|
||||
],
|
||||
"max_tokens": max_tokens,
|
||||
"temperature": 0.0,
|
||||
}
|
||||
|
||||
try:
|
||||
r = requests.post(url, json=payload, timeout=REQUEST_TIMEOUT)
|
||||
if r.status_code == 200:
|
||||
data = r.json()
|
||||
return data["choices"][0]["message"]["content"]
|
||||
else:
|
||||
print(f" [WARN] Server returned {r.status_code}", file=sys.stderr)
|
||||
return None
|
||||
except (requests.ConnectionError, requests.Timeout) as e:
|
||||
print(f" [WARN] Request failed: {e}", file=sys.stderr)
|
||||
return None
|
||||
|
||||
|
||||
def run_math_probe(port: int) -> float:
|
||||
"""Run all math questions and return average score (0-1)."""
|
||||
scores = []
|
||||
for question, answer in MATH_QUESTIONS:
|
||||
response = query_model(question, port, max_tokens=48)
|
||||
if response is not None:
|
||||
score = score_math_response(answer, response)
|
||||
scores.append(score)
|
||||
else:
|
||||
scores.append(0.0)
|
||||
return sum(scores) / len(scores) if scores else 0.0
|
||||
|
||||
|
||||
def run_eq_probe(port: int) -> float:
|
||||
"""Run all EQ scenarios and return average score (0-100)."""
|
||||
scores = []
|
||||
for scenario in EQ_SCENARIOS:
|
||||
prompt = build_eq_prompt(scenario)
|
||||
response = query_model(prompt, port, max_tokens=48)
|
||||
if response is not None:
|
||||
predicted = parse_eq_response(response, len(scenario["emotions"]))
|
||||
score = score_eq_response(scenario["reference"], predicted)
|
||||
scores.append(score)
|
||||
else:
|
||||
scores.append(0.0)
|
||||
return sum(scores) / len(scores) if scores else 0.0
|
||||
|
||||
|
||||
def run_reasoning_probe(port: int) -> dict:
|
||||
"""Run all reasoning questions, return scores by category and overall."""
|
||||
by_category = {}
|
||||
for q in REASONING_QUESTIONS:
|
||||
cat = q["type"]
|
||||
if cat not in by_category:
|
||||
by_category[cat] = []
|
||||
response = query_model(q["prompt"], port, max_tokens=512)
|
||||
score = score_reasoning_response(q, response)
|
||||
by_category[cat].append(score)
|
||||
|
||||
# Per-category averages
|
||||
cat_scores = {}
|
||||
for cat, scores in by_category.items():
|
||||
cat_scores[cat] = sum(scores) / len(scores) if scores else 0.0
|
||||
|
||||
# Overall reasoning score (0-1)
|
||||
all_scores = [s for scores in by_category.values() for s in scores]
|
||||
overall = sum(all_scores) / len(all_scores) if all_scores else 0.0
|
||||
|
||||
return {"categories": cat_scores, "overall": overall}
|
||||
|
||||
|
||||
def run_evaluation(port: int) -> dict:
|
||||
"""Run all probes and return results."""
|
||||
math_score = run_math_probe(port)
|
||||
eq_score = run_eq_probe(port)
|
||||
reasoning = run_reasoning_probe(port)
|
||||
return {
|
||||
"math_score": math_score,
|
||||
"eq_score": eq_score,
|
||||
"reasoning_score": reasoning["overall"],
|
||||
"reasoning_cats": reasoning["categories"],
|
||||
}
|
||||
|
||||
|
||||
def print_results_table(results: list[dict], baseline: dict | None = None):
|
||||
"""Print a live-updating results table."""
|
||||
print("\n" + "=" * 105)
|
||||
print(f"{'Config':>12} {'Layers':>8} {'Math':>8} {'EQ':>8} {'Reason':>8} "
|
||||
f"{'Math Δ':>8} {'EQ Δ':>8} {'Reas Δ':>8} {'Combined Δ':>11}")
|
||||
print("-" * 105)
|
||||
|
||||
if baseline:
|
||||
brs = baseline.get('reasoning_score', 0)
|
||||
print(f"{'BASELINE':>12} {'0':>8} "
|
||||
f"{baseline['math_score']:>8.4f} {baseline['eq_score']:>8.2f} {brs:>8.2%} "
|
||||
f"{'---':>8} {'---':>8} {'---':>8} {'---':>11}")
|
||||
print("-" * 105)
|
||||
|
||||
for r in results:
|
||||
config = f"({r['dup_start']},{r['dup_end']})"
|
||||
n_dup = r['dup_end'] - r['dup_start']
|
||||
rs = r.get('reasoning_score', 0)
|
||||
|
||||
if baseline:
|
||||
math_delta = r['math_score'] - baseline['math_score']
|
||||
eq_delta = r['eq_score'] - baseline['eq_score']
|
||||
reas_delta = rs - baseline.get('reasoning_score', 0)
|
||||
# Combined: weight EQ and reasoning more than math
|
||||
combined = eq_delta + (reas_delta * 100)
|
||||
math_d = f"{math_delta:>+8.4f}"
|
||||
eq_d = f"{eq_delta:>+8.2f}"
|
||||
reas_d = f"{reas_delta:>+8.2%}"
|
||||
comb_d = f"{combined:>+11.2f}"
|
||||
else:
|
||||
math_d = eq_d = reas_d = comb_d = "---"
|
||||
|
||||
print(f"{config:>12} {n_dup:>8} "
|
||||
f"{r['math_score']:>8.4f} {r['eq_score']:>8.2f} {rs:>8.2%} "
|
||||
f"{math_d} {eq_d} {reas_d} {comb_d}")
|
||||
|
||||
print("=" * 105)
|
||||
sys.stdout.flush()
|
||||
|
||||
|
||||
def generate_sweep_configs(n_layers: int, block_sizes: list[int],
|
||||
start_min: int = 4, start_max: int = None,
|
||||
stride: int = 4) -> list[tuple[int, int]]:
|
||||
"""
|
||||
Generate (dup_start, dup_end) configs for the sweep.
|
||||
|
||||
Args:
|
||||
n_layers: Total layers in the model
|
||||
block_sizes: List of block sizes to try (e.g., [8])
|
||||
start_min: Earliest layer to start duplication
|
||||
start_max: Latest layer to start (default: n_layers - max(block_sizes) - 4)
|
||||
stride: Step between start positions
|
||||
"""
|
||||
if start_max is None:
|
||||
start_max = n_layers - max(block_sizes) - 4
|
||||
|
||||
configs = []
|
||||
for bs in block_sizes:
|
||||
for start in range(start_min, start_max + 1, stride):
|
||||
end = start + bs
|
||||
if end <= n_layers:
|
||||
configs.append((start, end))
|
||||
|
||||
return configs
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="RYS Layer Duplication Sweep")
|
||||
parser.add_argument("--model", required=True, help="Path to input GGUF model")
|
||||
parser.add_argument("--llama-server", required=True, help="Path to llama-server binary")
|
||||
parser.add_argument("--tmpdir", default="/dev/shm/rys",
|
||||
help="Temp directory for modified GGUFs (use tmpfs/RAM)")
|
||||
parser.add_argument("--results", default="rys_results.jsonl",
|
||||
help="Output results file (JSONL)")
|
||||
parser.add_argument("--port", type=int, default=DEFAULT_PORT)
|
||||
parser.add_argument("--block-sizes", type=int, nargs="+", default=[8],
|
||||
help="Block sizes to sweep (default: 8)")
|
||||
parser.add_argument("--stride", type=int, default=4,
|
||||
help="Stride between start positions (default: 4)")
|
||||
parser.add_argument("--start-min", type=int, default=4,
|
||||
help="Earliest layer to start duplication")
|
||||
parser.add_argument("--start-max", type=int, default=None,
|
||||
help="Latest layer to start duplication")
|
||||
parser.add_argument("--skip-baseline", action="store_true",
|
||||
help="Skip baseline run (use if already in results)")
|
||||
parser.add_argument("--server-args", nargs=argparse.REMAINDER, default=[],
|
||||
help="Extra args to pass to llama-server (must be last)")
|
||||
args = parser.parse_args()
|
||||
|
||||
model_path = Path(args.model).resolve()
|
||||
tmpdir = Path(args.tmpdir)
|
||||
tmpdir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
results_path = Path(args.results)
|
||||
results = []
|
||||
baseline = None
|
||||
|
||||
# Load existing results if resuming
|
||||
if results_path.exists():
|
||||
with open(results_path) as f:
|
||||
for line in f:
|
||||
line = line.strip()
|
||||
if line:
|
||||
entry = json.loads(line)
|
||||
if entry.get("is_baseline"):
|
||||
baseline = entry
|
||||
else:
|
||||
results.append(entry)
|
||||
print(f"Loaded {len(results)} existing results + baseline={baseline is not None}")
|
||||
|
||||
# Run baseline (unmodified model)
|
||||
if not args.skip_baseline and baseline is None:
|
||||
print("\n>>> Running BASELINE evaluation...")
|
||||
proc = start_server(args.llama_server, str(model_path), args.port, args.server_args)
|
||||
try:
|
||||
if not wait_for_server(args.port):
|
||||
print("ERROR: Server failed to start for baseline", file=sys.stderr)
|
||||
dump_server_log(proc)
|
||||
stop_server(proc)
|
||||
sys.exit(1)
|
||||
|
||||
print(" Server ready. Running probes...")
|
||||
eval_result = run_evaluation(args.port)
|
||||
baseline = {
|
||||
"is_baseline": True,
|
||||
"dup_start": -1,
|
||||
"dup_end": -1,
|
||||
"math_score": eval_result["math_score"],
|
||||
"eq_score": eval_result["eq_score"],
|
||||
"reasoning_score": eval_result["reasoning_score"],
|
||||
"reasoning_cats": eval_result.get("reasoning_cats", {}),
|
||||
"timestamp": datetime.now().isoformat(),
|
||||
}
|
||||
|
||||
with open(results_path, "a") as f:
|
||||
f.write(json.dumps(baseline) + "\n")
|
||||
|
||||
brs = baseline['reasoning_score']
|
||||
print(f" Baseline: math={baseline['math_score']:.4f} eq={baseline['eq_score']:.2f} reasoning={brs:.2%}")
|
||||
finally:
|
||||
stop_server(proc)
|
||||
|
||||
# Get model layer count from the GGUF metadata
|
||||
from gguf import GGUFReader
|
||||
reader = GGUFReader(str(model_path), 'r')
|
||||
arch_field = reader.get_field('general.architecture')
|
||||
arch = arch_field.contents()
|
||||
block_count_field = reader.get_field(f'{arch}.block_count')
|
||||
n_layers = block_count_field.contents()
|
||||
print(f"\nModel: {model_path.name}")
|
||||
print(f"Architecture: {arch}, Layers: {n_layers}")
|
||||
|
||||
# Generate sweep configurations
|
||||
configs = generate_sweep_configs(
|
||||
n_layers=n_layers,
|
||||
block_sizes=args.block_sizes,
|
||||
start_min=args.start_min,
|
||||
start_max=args.start_max,
|
||||
stride=args.stride,
|
||||
)
|
||||
|
||||
# Filter out already-completed configs
|
||||
done = {(r["dup_start"], r["dup_end"]) for r in results}
|
||||
configs = [(s, e) for s, e in configs if (s, e) not in done]
|
||||
|
||||
print(f"Configs to test: {len(configs)}")
|
||||
if configs:
|
||||
print(f" Range: ({configs[0][0]},{configs[0][1]}) to ({configs[-1][0]},{configs[-1][1]})")
|
||||
|
||||
print_results_table(results, baseline)
|
||||
|
||||
for idx, (dup_start, dup_end) in enumerate(configs):
|
||||
n_dup = dup_end - dup_start
|
||||
config_str = f"({dup_start},{dup_end})"
|
||||
print(f"\n>>> [{idx+1}/{len(configs)}] Testing config {config_str} "
|
||||
f"(+{n_dup} layers)...")
|
||||
|
||||
# Generate modified GGUF
|
||||
modified_path = tmpdir / f"rys_{dup_start}_{dup_end}.gguf"
|
||||
print(f" Generating modified GGUF...")
|
||||
try:
|
||||
duplicate_layers(
|
||||
str(model_path), str(modified_path),
|
||||
dup_start, dup_end, verbose=False
|
||||
)
|
||||
except Exception as e:
|
||||
print(f" ERROR generating GGUF: {e}", file=sys.stderr)
|
||||
continue
|
||||
|
||||
# Start server with modified model
|
||||
print(f" Starting server...")
|
||||
proc = start_server(
|
||||
args.llama_server, str(modified_path), args.port, args.server_args
|
||||
)
|
||||
|
||||
try:
|
||||
if not wait_for_server(args.port):
|
||||
print(f" ERROR: Server failed to start for {config_str}", file=sys.stderr)
|
||||
dump_server_log(proc)
|
||||
print(f" Check server log above for details.", file=sys.stderr)
|
||||
continue
|
||||
|
||||
print(f" Server ready. Running probes...")
|
||||
eval_result = run_evaluation(args.port)
|
||||
|
||||
entry = {
|
||||
"dup_start": dup_start,
|
||||
"dup_end": dup_end,
|
||||
"n_dup_layers": n_dup,
|
||||
"math_score": eval_result["math_score"],
|
||||
"eq_score": eval_result["eq_score"],
|
||||
"reasoning_score": eval_result["reasoning_score"],
|
||||
"reasoning_cats": eval_result.get("reasoning_cats", {}),
|
||||
"timestamp": datetime.now().isoformat(),
|
||||
}
|
||||
|
||||
results.append(entry)
|
||||
|
||||
# Append to results file
|
||||
with open(results_path, "a") as f:
|
||||
f.write(json.dumps(entry) + "\n")
|
||||
|
||||
print_results_table(results, baseline)
|
||||
|
||||
finally:
|
||||
stop_server(proc)
|
||||
|
||||
# Clean up modified GGUF to free tmpfs space
|
||||
if modified_path.exists():
|
||||
modified_path.unlink()
|
||||
print(f" Cleaned up {modified_path.name}")
|
||||
|
||||
print("\n\nSweep complete!")
|
||||
print_results_table(results, baseline)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
114
visualize.py
Normal file
114
visualize.py
Normal file
|
|
@ -0,0 +1,114 @@
|
|||
#!/usr/bin/env python3
|
||||
"""
|
||||
Visualize RYS sweep results.
|
||||
Reads the JSONL output from sweep.py, prints ranked table and bar chart.
|
||||
"""
|
||||
|
||||
import json
|
||||
import sys
|
||||
|
||||
|
||||
def load_results(path: str):
|
||||
baseline = None
|
||||
results = []
|
||||
with open(path) as f:
|
||||
for line in f:
|
||||
line = line.strip()
|
||||
if not line:
|
||||
continue
|
||||
entry = json.loads(line)
|
||||
if entry.get("is_baseline"):
|
||||
baseline = entry
|
||||
else:
|
||||
results.append(entry)
|
||||
return baseline, results
|
||||
|
||||
|
||||
def print_ranked(baseline, results):
|
||||
"""Print results ranked by combined delta."""
|
||||
if not baseline:
|
||||
print("No baseline found in results!")
|
||||
return
|
||||
|
||||
for r in results:
|
||||
math_delta = r["math_score"] - baseline["math_score"]
|
||||
eq_delta = r["eq_score"] - baseline["eq_score"]
|
||||
r["math_delta"] = math_delta
|
||||
r["eq_delta"] = eq_delta
|
||||
r["combined"] = (math_delta * 100) + eq_delta
|
||||
|
||||
ranked = sorted(results, key=lambda x: x["combined"], reverse=True)
|
||||
|
||||
print(f"\nBaseline: math={baseline['math_score']:.4f} eq={baseline['eq_score']:.2f}")
|
||||
print()
|
||||
print(f"{'Rank':>4} {'Config':>12} {'Dup':>4} "
|
||||
f"{'Math':>8} {'EQ':>8} "
|
||||
f"{'Math Δ':>9} {'EQ Δ':>8} {'Combined':>10}")
|
||||
print("-" * 80)
|
||||
|
||||
for i, r in enumerate(ranked):
|
||||
config = f"({r['dup_start']},{r['dup_end']})"
|
||||
n_dup = r['dup_end'] - r['dup_start']
|
||||
|
||||
if r["combined"] > 0:
|
||||
marker = "+"
|
||||
elif r["combined"] < -5:
|
||||
marker = "!"
|
||||
else:
|
||||
marker = " "
|
||||
|
||||
print(f"{i+1:>4} {config:>12} {n_dup:>4} "
|
||||
f"{r['math_score']:>8.4f} {r['eq_score']:>8.2f} "
|
||||
f"{r['math_delta']:>+9.4f} {r['eq_delta']:>+8.2f} "
|
||||
f"{r['combined']:>+10.2f} {marker}")
|
||||
|
||||
if ranked:
|
||||
best = ranked[0]
|
||||
worst = ranked[-1]
|
||||
print()
|
||||
print(f"Best: ({best['dup_start']},{best['dup_end']}) combined={best['combined']:+.2f}")
|
||||
print(f"Worst: ({worst['dup_start']},{worst['dup_end']}) combined={worst['combined']:+.2f}")
|
||||
|
||||
|
||||
def print_bar_chart(baseline, results):
|
||||
"""Print a horizontal bar chart sorted by start position."""
|
||||
if not baseline or not results:
|
||||
return
|
||||
|
||||
for r in results:
|
||||
math_delta = r["math_score"] - baseline["math_score"]
|
||||
eq_delta = r["eq_score"] - baseline["eq_score"]
|
||||
r["combined"] = (math_delta * 100) + eq_delta
|
||||
|
||||
ordered = sorted(results, key=lambda x: x["dup_start"])
|
||||
|
||||
max_val = max(abs(r["combined"]) for r in ordered)
|
||||
if max_val == 0:
|
||||
max_val = 1
|
||||
|
||||
half = 20
|
||||
print(f"\nCombined delta (baseline = |):")
|
||||
print(f"{'Config':>12} {'negative':<{half}}|{'positive':<{half}}")
|
||||
|
||||
for r in ordered:
|
||||
config = f"({r['dup_start']},{r['dup_end']})"
|
||||
val = r["combined"]
|
||||
bar_len = int(abs(val) / max_val * half)
|
||||
|
||||
if val >= 0:
|
||||
bar = " " * half + "|" + "#" * bar_len
|
||||
else:
|
||||
pad = half - bar_len
|
||||
bar = " " * pad + "=" * bar_len + "|"
|
||||
|
||||
print(f"{config:>12} {bar} {val:+.2f}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
if len(sys.argv) < 2:
|
||||
print(f"Usage: {sys.argv[0]} <results.jsonl>")
|
||||
sys.exit(1)
|
||||
|
||||
baseline, results = load_results(sys.argv[1])
|
||||
print_ranked(baseline, results)
|
||||
print_bar_chart(baseline, results)
|
||||
Loading…
Add table
Add a link
Reference in a new issue