Add files via upload

2026-04-24 20:56:21 +02:00 · 2026-03-18 21:06:28 +00:00 · 2026-03-18 21:06:28 +00:00 · 8b68afbd3e
commit 8b68afbd3e
parent 9ddf9fafd9
9 changed files with 2148 additions and 0 deletions
--- a/compare_eval.py
+++ b/compare_eval.py
@ -0,0 +1,216 @@
+#!/usr/bin/env python3
+"""
+Compare lm_eval results across multiple runs.
+
+Reads the results JSON files from lm_eval output directories and
+prints a side-by-side comparison table.
+
+Usage:
+    python compare_eval.py ./eval_base ./eval_rys_balanced ./eval_rys_triple
+    python compare_eval.py ./eval_*
+"""
+
+import argparse
+import json
+import sys
+from pathlib import Path
+
+
+def load_results(folder: str) -> dict:
+    """Load lm_eval results from an output folder."""
+    folder = Path(folder)
+
+    # lm_eval saves results in a JSON file inside the folder
+    # The filename varies, so find it
+    candidates = list(folder.glob("**/*results*.json"))
+    if not candidates:
+        # Try the folder itself as a JSON file
+        if folder.suffix == '.json' and folder.is_file():
+            with open(folder) as f:
+                return json.load(f)
+        print(f"WARNING: No results JSON found in {folder}", file=sys.stderr)
+        return {}
+
+    # Use the most recent one
+    results_file = max(candidates, key=lambda p: p.stat().st_mtime)
+
+    with open(results_file) as f:
+        data = json.load(f)
+
+    return data
+
+
+def extract_metrics(data: dict) -> dict:
+    """Extract task metrics from lm_eval results format."""
+    metrics = {}
+
+    results = data.get("results", {})
+    for task_name, task_data in results.items():
+        for key, value in task_data.items():
+            if key.endswith(",none") or key.endswith(",flexible-extract") or key.endswith(",strict-match") or key.endswith(",get-answer"):
+                # Parse "metric_name,filter_name" format
+                parts = key.rsplit(",", 1)
+                metric = parts[0]
+                filter_name = parts[1] if len(parts) > 1 else ""
+
+                if isinstance(value, (int, float)):
+                    display_name = f"{task_name}"
+                    if filter_name and filter_name != "none":
+                        display_name += f" ({filter_name})"
+                    if metric not in ("alias",):
+                        label = f"{task_name}|{metric}|{filter_name}"
+                        metrics[label] = {
+                            "task": task_name,
+                            "metric": metric,
+                            "filter": filter_name,
+                            "value": value,
+                        }
+
+    return metrics
+
+
+def get_display_name(label: str, metric_info: dict) -> str:
+    """Create a readable display name from metric info."""
+    task = metric_info["task"]
+    metric = metric_info["metric"]
+    filt = metric_info["filter"]
+
+    # Shorten common task names
+    task = task.replace("bbh_cot_fewshot_", "bbh/")
+
+    if filt and filt not in ("none", "get-answer"):
+        return f"{task} [{filt}]"
+    return f"{task} [{metric}]"
+
+
+def compare(folders: list[str], names: list[str] = None):
+    """Compare results across folders."""
+    if names is None:
+        names = [Path(f).name for f in folders]
+
+    # Pad names to equal length
+    max_name_len = max(len(n) for n in names)
+
+    # Load all results
+    all_metrics = {}
+    for i, folder in enumerate(folders):
+        data = load_results(folder)
+        metrics = extract_metrics(data)
+        all_metrics[names[i]] = metrics
+
+    # Collect all unique metric labels
+    all_labels = set()
+    for metrics in all_metrics.values():
+        all_labels.update(metrics.keys())
+
+    # Sort labels by task name
+    sorted_labels = sorted(all_labels)
+
+    # Print header
+    col_width = 10
+    name_col = max(45, max_name_len)
+
+    header = f"{'Metric':<{name_col}}"
+    for name in names:
+        header += f" {name:>{col_width}}"
+    if len(names) > 1:
+        header += f" {'Δ(last-first)':>{col_width+2}}"
+
+    print()
+    print("=" * len(header))
+    print("lm_eval Results Comparison")
+    print("=" * len(header))
+    print(header)
+    print("-" * len(header))
+
+    prev_task = None
+    for label in sorted_labels:
+        # Get display info from first run that has this metric
+        metric_info = None
+        for metrics in all_metrics.values():
+            if label in metrics:
+                metric_info = metrics[label]
+                break
+        if metric_info is None:
+            continue
+
+        display = get_display_name(label, metric_info)
+
+        # Skip stderr and alias entries
+        if "stderr" in label.lower() or "alias" in label.lower():
+            continue
+
+        # Add separator between tasks
+        current_task = metric_info["task"]
+        if prev_task and current_task != prev_task:
+            print()
+        prev_task = current_task
+
+        row = f"{display:<{name_col}}"
+
+        values = []
+        for name in names:
+            metrics = all_metrics[name]
+            if label in metrics:
+                val = metrics[label]["value"]
+                values.append(val)
+                if isinstance(val, float):
+                    row += f" {val:>{col_width}.4f}"
+                else:
+                    row += f" {val:>{col_width}}"
+            else:
+                values.append(None)
+                row += f" {'---':>{col_width}}"
+
+        # Delta column (last - first)
+        if len(names) > 1 and values[0] is not None and values[-1] is not None:
+            delta = values[-1] - values[0]
+            if delta > 0:
+                row += f"  {delta:>+{col_width}.4f}"
+            elif delta < 0:
+                row += f"  {delta:>+{col_width}.4f}"
+            else:
+                row += f"  {'0':>{col_width+1}}"
+        elif len(names) > 1:
+            row += f"  {'---':>{col_width+1}}"
+
+        print(row)
+
+    print("=" * len(header))
+
+    # Summary: average across all metrics per run
+    print(f"\n{'Average (all metrics)':<{name_col}}", end="")
+    avgs = []
+    for name in names:
+        metrics = all_metrics[name]
+        vals = [m["value"] for m in metrics.values()
+                if isinstance(m["value"], (int, float)) and "stderr" not in m["metric"]]
+        if vals:
+            avg = sum(vals) / len(vals)
+            avgs.append(avg)
+            print(f" {avg:>{col_width}.4f}", end="")
+        else:
+            avgs.append(None)
+            print(f" {'---':>{col_width}}", end="")
+
+    if len(avgs) > 1 and avgs[0] is not None and avgs[-1] is not None:
+        delta = avgs[-1] - avgs[0]
+        print(f"  {delta:>+{col_width}.4f}", end="")
+    print()
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Compare lm_eval results across runs",
+        epilog="Example: python compare_eval.py ./eval_base ./eval_rys"
+    )
+    parser.add_argument("folders", nargs="+", help="lm_eval output directories or JSON files")
+    parser.add_argument("--names", nargs="+", help="Display names for each run (default: folder names)")
+    args = parser.parse_args()
+
+    names = args.names if args.names else None
+    compare(args.folders, names)
+
+
+if __name__ == "__main__":
+    main()
--- a/eq_probe.py
+++ b/eq_probe.py
@ -0,0 +1,267 @@
+#!/usr/bin/env python3
+"""
+EQ Probe for RYS Layer Duplication Sweep
+
+Simplified EQ-Bench-style scenarios. Each presents a social situation
+and asks the model to rate the intensity of 4 emotions (0-10).
+Scored by comparing to reference answers using EQ-Bench v2 scoring.
+
+The reference answers were crafted to represent reasonable human consensus.
+What matters for the sweep is relative performance between configs,
+not absolute calibration.
+"""
+
+import json
+import re
+import math
+
+
+# Each scenario: situation text, 4 emotions to rate, reference scores (0-10)
+EQ_SCENARIOS = [
+    {
+        "id": "eq01",
+        "situation": (
+            "David just found out his closest friend has been spreading rumors about him "
+            "at work for months. When confronted, the friend laughed it off and said "
+            "\"everyone does it, don't be so sensitive.\" David has been quiet all evening."
+        ),
+        "emotions": ["Anger", "Betrayal", "Sadness", "Confusion"],
+        "reference": [7, 9, 6, 4]
+    },
+    {
+        "id": "eq02",
+        "situation": (
+            "Maria spent six months preparing a presentation for the board. During the "
+            "meeting, her manager took credit for all her ideas and presented them as his "
+            "own. Afterward, he thanked her for 'helping out with the slides.'"
+        ),
+        "emotions": ["Frustration", "Resentment", "Humiliation", "Determination"],
+        "reference": [8, 8, 5, 6]
+    },
+    {
+        "id": "eq03",
+        "situation": (
+            "After 20 years of marriage, Tom's wife tells him she wants a divorce. "
+            "She says she still cares about him but hasn't been happy for years. "
+            "Tom had no idea anything was wrong."
+        ),
+        "emotions": ["Shock", "Grief", "Self-doubt", "Denial"],
+        "reference": [9, 7, 6, 7]
+    },
+    {
+        "id": "eq04",
+        "situation": (
+            "A young doctor must tell parents that their child's surgery was successful "
+            "but there will be permanent limitations. The parents are already celebrating "
+            "in the waiting room."
+        ),
+        "emotions": ["Relief", "Anxiety", "Empathy", "Professional_detachment"],
+        "reference": [4, 7, 8, 5]
+    },
+    {
+        "id": "eq05",
+        "situation": (
+            "Chen receives a prestigious award at a ceremony. As he walks to the stage, "
+            "he sees his estranged father in the audience - the man who abandoned the "
+            "family when Chen was twelve."
+        ),
+        "emotions": ["Pride", "Anger", "Longing", "Anxiety"],
+        "reference": [7, 5, 6, 6]
+    },
+    {
+        "id": "eq06",
+        "situation": (
+            "A retired teacher learns that a former student, who she failed years ago "
+            "and who dropped out, has become extremely successful. The student publicly "
+            "credits 'proving my teacher wrong' as their motivation."
+        ),
+        "emotions": ["Guilt", "Pride", "Defensiveness", "Amusement"],
+        "reference": [5, 4, 6, 3]
+    },
+    {
+        "id": "eq07",
+        "situation": (
+            "Sophie finds out she's been accepted to her dream university on the same "
+            "day her best friend receives a rejection from the same school. Her friend "
+            "calls to congratulate her, voice cracking."
+        ),
+        "emotions": ["Joy", "Guilt", "Empathy", "Awkwardness"],
+        "reference": [7, 6, 8, 7]
+    },
+    {
+        "id": "eq08",
+        "situation": (
+            "A firefighter rescues a child from a burning building. Weeks later, he "
+            "wakes up screaming from nightmares about the ones he couldn't save in "
+            "a previous fire. His partner asks if he's okay."
+        ),
+        "emotions": ["Satisfaction", "Trauma", "Vulnerability", "Shame"],
+        "reference": [3, 8, 7, 5]
+    },
+    {
+        "id": "eq09",
+        "situation": (
+            "An elderly woman's family surprises her with a birthday party. She smiles "
+            "and thanks everyone, but keeps glancing at an empty chair - where her "
+            "late husband always sat."
+        ),
+        "emotions": ["Gratitude", "Grief", "Loneliness", "Warmth"],
+        "reference": [7, 7, 6, 6]
+    },
+    {
+        "id": "eq10",
+        "situation": (
+            "A manager must lay off a team member who is also a close friend and a "
+            "single parent. The company requires it due to budget cuts. HR is waiting "
+            "for the paperwork."
+        ),
+        "emotions": ["Guilt", "Dread", "Helplessness", "Resentment"],
+        "reference": [8, 8, 7, 5]
+    },
+    {
+        "id": "eq11",
+        "situation": (
+            "James finds old love letters in the attic from his wife, written to "
+            "someone else before they met. The letters are passionate and describe "
+            "a depth of feeling he's not sure she's ever expressed toward him."
+        ),
+        "emotions": ["Jealousy", "Insecurity", "Curiosity", "Sadness"],
+        "reference": [6, 7, 5, 4]
+    },
+    {
+        "id": "eq12",
+        "situation": (
+            "A teenager confesses to her mother that she's been self-harming. The mother, "
+            "who is a psychologist, realizes she completely missed the signs despite her "
+            "professional training."
+        ),
+        "emotions": ["Fear", "Guilt", "Love", "Self_criticism"],
+        "reference": [8, 8, 9, 7]
+    },
+    {
+        "id": "eq13",
+        "situation": (
+            "A war veteran returns home after two years to find his dog waiting on the "
+            "porch, much older and thinner. The dog recognizes him immediately and "
+            "limps over, tail wagging."
+        ),
+        "emotions": ["Joy", "Guilt", "Tenderness", "Sorrow"],
+        "reference": [8, 5, 9, 4]
+    },
+    {
+        "id": "eq14",
+        "situation": (
+            "During a job interview, the candidate realizes the interviewer is someone "
+            "she bullied badly in high school. The interviewer clearly recognizes her "
+            "but proceeds professionally."
+        ),
+        "emotions": ["Shame", "Anxiety", "Admiration", "Regret"],
+        "reference": [7, 8, 4, 7]
+    },
+    {
+        "id": "eq15",
+        "situation": (
+            "A father watches his daughter's wedding, knowing he has a terminal diagnosis "
+            "he hasn't shared with the family. He chose to wait until after the wedding "
+            "to tell them."
+        ),
+        "emotions": ["Joy", "Grief", "Protectiveness", "Isolation"],
+        "reference": [6, 8, 8, 7]
+    },
+    {
+        "id": "eq16",
+        "situation": (
+            "Two siblings meet for the first time as adults after being separated in "
+            "foster care as children. They look alike but have lived completely different "
+            "lives. One is wealthy, the other struggles financially."
+        ),
+        "emotions": ["Wonder", "Resentment", "Hope", "Grief"],
+        "reference": [7, 3, 7, 6]
+    },
+]
+
+
+def build_eq_prompt(scenario: dict) -> str:
+    """Build the prompt for a single EQ scenario."""
+    emotions_str = ", ".join(scenario["emotions"])
+    return (
+        f"Read the following situation and rate the emotional intensity that "
+        f"the main character is likely feeling for each of the listed emotions. "
+        f"Rate each emotion from 0 (not feeling it at all) to 10 (extremely intense).\n\n"
+        f"Situation: {scenario['situation']}\n\n"
+        f"Rate these emotions: {emotions_str}\n\n"
+        f"Respond ONLY with the four numbers separated by commas, in the same order. "
+        f"Example format: 5, 3, 8, 2\n"
+        f"Do not include any other text."
+    )
+
+
+def parse_eq_response(response: str, n_emotions: int = 4) -> list[float] | None:
+    """Extract emotion ratings from model response."""
+    # Try to find comma-separated numbers
+    numbers = re.findall(r'(\d+(?:\.\d+)?)', response)
+
+    if len(numbers) < n_emotions:
+        return None
+
+    try:
+        # Take the first n_emotions numbers found
+        ratings = [float(numbers[i]) for i in range(n_emotions)]
+        # Clamp to 0-10
+        ratings = [max(0.0, min(10.0, r)) for r in ratings]
+        return ratings
+    except (ValueError, IndexError):
+        return None
+
+
+def score_eq_response(reference: list[int], predicted: list[float]) -> float:
+    """
+    EQ-Bench v2 style scoring.
+    Differences 1-4 from reference are scaled down on a curve.
+    Differences 5-10 count 1:1.
+    Score 0 = random, 100 = perfect match.
+    """
+    if predicted is None or len(predicted) != len(reference):
+        return 0.0
+
+    total_penalty = 0.0
+    max_possible_penalty = 10.0 * len(reference)  # worst case: all off by 10
+
+    for ref, pred in zip(reference, predicted):
+        diff = abs(ref - pred)
+        # Scale down small differences (EQ-Bench v2 approach)
+        if diff <= 4:
+            # Quadratic scaling: diff^2 / 4 so diff=4 -> penalty=4
+            penalty = (diff ** 2) / 4.0
+        else:
+            # Linear for larger diffs, continuous at diff=4 (penalty=4)
+            penalty = diff
+        total_penalty += penalty
+
+    # Normalize: 0 penalty = score 100, max penalty = score ~0
+    score = max(0.0, 100.0 * (1.0 - total_penalty / max_possible_penalty))
+    return score
+
+
+# Convenience
+EQ_PROMPTS = [(s, build_eq_prompt(s)) for s in EQ_SCENARIOS]
+
+
+if __name__ == "__main__":
+    print(f"EQ Probe: {len(EQ_SCENARIOS)} scenarios")
+    print("=" * 60)
+
+    for i, scenario in enumerate(EQ_SCENARIOS):
+        print(f"\n[{scenario['id']}] Emotions: {scenario['emotions']}")
+        print(f"    Reference: {scenario['reference']}")
+        prompt = build_eq_prompt(scenario)
+        print(f"    Prompt length: {len(prompt)} chars")
+
+    # Test scoring
+    print("\n\nScoring tests:")
+    print(f"  Perfect match:    {score_eq_response([7, 9, 6, 4], [7, 9, 6, 4]):.1f}")
+    print(f"  All off by 1:     {score_eq_response([7, 9, 6, 4], [8, 8, 7, 5]):.1f}")
+    print(f"  All off by 3:     {score_eq_response([7, 9, 6, 4], [4, 6, 3, 1]):.1f}")
+    print(f"  All off by 5:     {score_eq_response([7, 9, 6, 4], [2, 4, 1, 0]):.1f}")
+    print(f"  Worst case:       {score_eq_response([7, 9, 6, 4], [0, 0, 0, 10]):.1f}")
+    print(f"  Unparseable:      {score_eq_response([7, 9, 6, 4], None):.1f}")
--- a/gguf_surgery.py
+++ b/gguf_surgery.py
@ -0,0 +1,221 @@
+#!/usr/bin/env python3
+"""
+GGUF Layer Duplication Surgery
+
+Reads a GGUF model file, duplicates transformer layers i..j-1 so they
+execute twice in the forward pass, and writes a new GGUF with the
+modified layer structure.
+
+For a model with N layers, configuration (i, j) produces:
+  layers 0..j-1, then layers i..j-1 again, then layers j..N-1
+  Total layers: N + (j - i)
+
+Tensor naming convention: blk.{layer_idx}.{tensor_name}
+Non-block tensors (token_embd, output_norm, output) are copied as-is.
+"""
+
+import argparse
+import re
+import sys
+from pathlib import Path
+
+import numpy as np
+from tqdm import tqdm
+
+import gguf
+from gguf import GGUFReader, GGUFWriter, GGUFValueType
+
+
+BLK_PATTERN = re.compile(r'^blk\.(\d+)\.(.+)$')
+
+
+def get_field_value(reader: GGUFReader, key: str):
+    """Extract a scalar value from a reader field."""
+    field = reader.get_field(key)
+    if field is None:
+        return None
+    return field.contents()
+
+
+def duplicate_layers(input_path: str, output_path: str, dup_start: int, dup_end: int, verbose: bool = False):
+    """
+    Create a new GGUF with layers dup_start..dup_end-1 duplicated.
+
+    The new layer order is:
+      Original layers 0..dup_end-1
+      Duplicated layers dup_start..dup_end-1  (renumbered)
+      Original layers dup_end..N-1            (renumbered)
+    """
+    reader = GGUFReader(input_path, 'r')
+
+    arch = get_field_value(reader, gguf.Keys.General.ARCHITECTURE)
+    if arch is None:
+        raise ValueError("Could not read architecture from GGUF")
+
+    block_count_key = f'{arch}.block_count'
+    orig_block_count = get_field_value(reader, block_count_key)
+    if orig_block_count is None:
+        raise ValueError(f"Could not read {block_count_key} from GGUF")
+
+    n_dup = dup_end - dup_start
+    new_block_count = orig_block_count + n_dup
+
+    if verbose:
+        print(f"Architecture: {arch}")
+        print(f"Original layers: {orig_block_count}")
+        print(f"Duplicating layers {dup_start}..{dup_end - 1} ({n_dup} layers)")
+        print(f"New layer count: {new_block_count}")
+
+    if dup_start < 0 or dup_end > orig_block_count or dup_start >= dup_end:
+        raise ValueError(
+            f"Invalid duplication range ({dup_start}, {dup_end}) "
+            f"for model with {orig_block_count} layers"
+        )
+
+    # Build layer mapping: new_idx -> original_layer_idx
+    # Phase 1: original 0..dup_end-1 keep their indices
+    # Phase 2: duplicates of dup_start..dup_end-1 get indices dup_end..dup_end+n_dup-1
+    # Phase 3: original dup_end..N-1 shift up by n_dup
+    layer_map = {}
+
+    for orig_idx in range(dup_end):
+        layer_map[orig_idx] = orig_idx
+
+    for k in range(n_dup):
+        layer_map[dup_end + k] = dup_start + k
+
+    for orig_idx in range(dup_end, orig_block_count):
+        layer_map[orig_idx + n_dup] = orig_idx
+
+    if verbose:
+        print("Layer mapping (new -> orig):")
+        for new_idx in sorted(layer_map.keys()):
+            tag = " [DUP]" if (dup_end <= new_idx < dup_end + n_dup) else ""
+            print(f"  new {new_idx:3d} -> orig {layer_map[new_idx]:3d}{tag}")
+
+    # Create writer
+    writer = GGUFWriter(output_path, arch=arch, endianess=reader.endianess)
+
+    alignment = get_field_value(reader, gguf.Keys.General.ALIGNMENT)
+    if alignment is not None:
+        writer.data_alignment = alignment
+
+    # Copy metadata, overriding block_count
+    for field in reader.fields.values():
+        if field.name == gguf.Keys.General.ARCHITECTURE or field.name.startswith('GGUF.'):
+            continue
+
+        val_type = field.types[0]
+        sub_type = field.types[-1] if val_type == GGUFValueType.ARRAY else None
+
+        if field.name == block_count_key:
+            writer.add_key_value(field.name, new_block_count, val_type)
+            if verbose:
+                print(f"Modified {field.name}: {orig_block_count} -> {new_block_count}")
+        else:
+            val = field.contents()
+            if val is not None:
+                writer.add_key_value(field.name, val, val_type, sub_type=sub_type)
+
+    # Organize tensors by type
+    non_block_tensors = []
+    block_tensors = {}  # orig_layer_idx -> [(suffix, tensor), ...]
+
+    for tensor in reader.tensors:
+        match = BLK_PATTERN.match(tensor.name)
+        if match:
+            layer_idx = int(match.group(1))
+            suffix = match.group(2)
+            if layer_idx not in block_tensors:
+                block_tensors[layer_idx] = []
+            block_tensors[layer_idx].append((suffix, tensor))
+        else:
+            non_block_tensors.append(tensor)
+
+    # Split non-block tensors into pre-block and post-block
+    pre_block = []
+    post_block = []
+    for t in non_block_tensors:
+        if 'output' in t.name:
+            post_block.append(t)
+        else:
+            pre_block.append(t)
+
+    # Add tensor infos in order and build write queue
+    total_bytes = 0
+    block_write_order = []  # (new_name, original_tensor)
+
+    for tensor in pre_block:
+        writer.add_tensor_info(
+            tensor.name, tensor.data.shape, tensor.data.dtype,
+            tensor.data.nbytes, tensor.tensor_type
+        )
+        total_bytes += tensor.n_bytes
+
+    for new_idx in range(new_block_count):
+        orig_idx = layer_map[new_idx]
+        if orig_idx not in block_tensors:
+            print(f"WARNING: No tensors for original layer {orig_idx}", file=sys.stderr)
+            continue
+        for suffix, tensor in block_tensors[orig_idx]:
+            new_name = f"blk.{new_idx}.{suffix}"
+            writer.add_tensor_info(
+                new_name, tensor.data.shape, tensor.data.dtype,
+                tensor.data.nbytes, tensor.tensor_type
+            )
+            total_bytes += tensor.n_bytes
+            block_write_order.append((new_name, tensor))
+
+    for tensor in post_block:
+        writer.add_tensor_info(
+            tensor.name, tensor.data.shape, tensor.data.dtype,
+            tensor.data.nbytes, tensor.tensor_type
+        )
+        total_bytes += tensor.n_bytes
+
+    # Write file
+    writer.write_header_to_file()
+    writer.write_kv_data_to_file()
+    writer.write_ti_data_to_file()
+
+    bar = tqdm(desc="Writing GGUF", total=total_bytes, unit="B", unit_scale=True)
+
+    for tensor in pre_block:
+        writer.write_tensor_data(tensor.data)
+        bar.update(tensor.n_bytes)
+
+    for _, tensor in block_write_order:
+        writer.write_tensor_data(tensor.data)
+        bar.update(tensor.n_bytes)
+
+    for tensor in post_block:
+        writer.write_tensor_data(tensor.data)
+        bar.update(tensor.n_bytes)
+
+    bar.close()
+    writer.close()
+
+    if verbose:
+        print(f"Done. Written to {output_path}")
+        out_size = Path(output_path).stat().st_size / (1024**3)
+        print(f"Output size: {out_size:.2f} GiB")
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Duplicate layers in a GGUF model (RYS method)"
+    )
+    parser.add_argument("input", help="Input GGUF file path")
+    parser.add_argument("output", help="Output GGUF file path")
+    parser.add_argument("-i", "--dup-start", type=int, required=True,
+                        help="First layer to duplicate (inclusive)")
+    parser.add_argument("-j", "--dup-end", type=int, required=True,
+                        help="Last layer to duplicate (exclusive)")
+    parser.add_argument("-v", "--verbose", action="store_true")
+    args = parser.parse_args()
+
+    duplicate_layers(args.input, args.output, args.dup_start, args.dup_end, args.verbose)
+
+
+if __name__ == "__main__":
+    main()
--- a/layer_path.py
+++ b/layer_path.py
@ -0,0 +1,248 @@
+#!/usr/bin/env python3
+"""
+Explicit Layer Path GGUF Surgery
+
+You provide the exact sequence of layers the model should execute.
+No ambiguous range notation — just list the layers.
+
+Examples:
+    # Normal 40-layer model (identity, for testing)
+    python layer_path.py model.gguf out.gguf -p 0,1,2,...,39
+
+    # Duplicate layers 13-16 once (same as RYS with i=13,j=17)
+    python layer_path.py model.gguf out.gguf -p 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,13,14,15,16,17,18,...,39
+
+    # Repeat layer 13 four times
+    python layer_path.py model.gguf out.gguf -p 0,1,...,12,13,13,13,13,14,15,...,39
+
+    # Triple-pass layers 13-16
+    python layer_path.py model.gguf out.gguf -p 0,1,...,16,13,14,15,16,13,14,15,16,17,...,39
+
+    # Shorthand: use .. to fill in sequential ranges
+    python layer_path.py model.gguf out.gguf -p 0..16,13,14,15,16,13,14,15,16,17..39
+
+Usage:
+    python layer_path.py input.gguf output.gguf -p "0..16,13,14,15,16,17..39" -v
+"""
+
+import argparse
+import re
+import sys
+from pathlib import Path
+
+import numpy as np
+from tqdm import tqdm
+
+import gguf
+from gguf import GGUFReader, GGUFWriter, GGUFValueType
+
+
+BLK_PATTERN = re.compile(r'^blk\.(\d+)\.(.+)$')
+
+
+def get_field_value(reader, key):
+    field = reader.get_field(key)
+    if field is None:
+        return None
+    return field.contents()
+
+
+def parse_layer_path(path_str: str) -> list[int]:
+    """
+    Parse a layer path string into a list of layer indices.
+
+    Supports:
+        - Individual numbers: 0,1,2,13,13,14
+        - Ranges with ..: 0..16 expands to 0,1,2,...,16 (inclusive)
+        - Mixed: 0..12,13,13,13,14..39
+
+    Whitespace is ignored.
+    """
+    path_str = path_str.replace(' ', '')
+    layers = []
+
+    for part in path_str.split(','):
+        part = part.strip()
+        if not part:
+            continue
+
+        if '..' in part:
+            # Range: start..end (inclusive)
+            pieces = part.split('..')
+            if len(pieces) != 2:
+                raise ValueError(f"Invalid range: '{part}'. Use 'start..end'")
+            start = int(pieces[0])
+            end = int(pieces[1])
+            if start > end:
+                raise ValueError(f"Invalid range: {start}..{end} (start > end)")
+            layers.extend(range(start, end + 1))
+        else:
+            layers.append(int(part))
+
+    return layers
+
+
+def build_gguf_from_path(input_path: str, output_path: str,
+                         layer_path: list[int], verbose: bool = False):
+    """
+    Create a new GGUF where the forward pass follows the given layer path.
+    """
+    reader = GGUFReader(input_path, 'r')
+
+    arch = get_field_value(reader, gguf.Keys.General.ARCHITECTURE)
+    block_count_key = f'{arch}.block_count'
+    orig_block_count = get_field_value(reader, block_count_key)
+
+    # Validate all layer indices
+    for idx in layer_path:
+        if idx < 0 or idx >= orig_block_count:
+            raise ValueError(
+                f"Layer {idx} out of range (model has {orig_block_count} layers, 0..{orig_block_count-1})"
+            )
+
+    new_block_count = len(layer_path)
+
+    if verbose:
+        print(f"Architecture: {arch}")
+        print(f"Original layers: {orig_block_count}")
+        print(f"New layer count: {new_block_count}")
+        print(f"Layer path: {layer_path}")
+
+        # Show which layers are repeated
+        from collections import Counter
+        counts = Counter(layer_path)
+        repeated = {k: v for k, v in counts.items() if v > 1}
+        if repeated:
+            print(f"Repeated layers: {dict(sorted(repeated.items()))}")
+        else:
+            print("No repeated layers (just a reorder)")
+
+    # layer_map: new_position -> original_layer_index
+    layer_map = {new_idx: orig_idx for new_idx, orig_idx in enumerate(layer_path)}
+
+    # Create writer
+    writer = GGUFWriter(output_path, arch=arch, endianess=reader.endianess)
+
+    alignment = get_field_value(reader, gguf.Keys.General.ALIGNMENT)
+    if alignment is not None:
+        writer.data_alignment = alignment
+
+    # Copy metadata, override block count
+    for field in reader.fields.values():
+        if field.name == gguf.Keys.General.ARCHITECTURE or field.name.startswith('GGUF.'):
+            continue
+        val_type = field.types[0]
+        sub_type = field.types[-1] if val_type == GGUFValueType.ARRAY else None
+        if field.name == block_count_key:
+            writer.add_key_value(field.name, new_block_count, val_type)
+        else:
+            val = field.contents()
+            if val is not None:
+                writer.add_key_value(field.name, val, val_type, sub_type=sub_type)
+
+    # Organize tensors by layer
+    non_block_tensors = []
+    block_tensors = {}
+
+    for tensor in reader.tensors:
+        match = BLK_PATTERN.match(tensor.name)
+        if match:
+            layer_idx = int(match.group(1))
+            suffix = match.group(2)
+            if layer_idx not in block_tensors:
+                block_tensors[layer_idx] = []
+            block_tensors[layer_idx].append((suffix, tensor))
+        else:
+            non_block_tensors.append(tensor)
+
+    pre_block = [t for t in non_block_tensors if 'output' not in t.name]
+    post_block = [t for t in non_block_tensors if 'output' in t.name]
+
+    # Add tensor infos and build write order
+    total_bytes = 0
+    block_write_order = []
+
+    for tensor in pre_block:
+        writer.add_tensor_info(tensor.name, tensor.data.shape, tensor.data.dtype,
+                               tensor.data.nbytes, tensor.tensor_type)
+        total_bytes += tensor.n_bytes
+
+    for new_idx in range(new_block_count):
+        orig_idx = layer_map[new_idx]
+        if orig_idx not in block_tensors:
+            raise ValueError(f"No tensors found for original layer {orig_idx}")
+        for suffix, tensor in block_tensors[orig_idx]:
+            new_name = f"blk.{new_idx}.{suffix}"
+            writer.add_tensor_info(new_name, tensor.data.shape, tensor.data.dtype,
+                                   tensor.data.nbytes, tensor.tensor_type)
+            total_bytes += tensor.n_bytes
+            block_write_order.append(tensor)
+
+    for tensor in post_block:
+        writer.add_tensor_info(tensor.name, tensor.data.shape, tensor.data.dtype,
+                               tensor.data.nbytes, tensor.tensor_type)
+        total_bytes += tensor.n_bytes
+
+    # Write
+    writer.write_header_to_file()
+    writer.write_kv_data_to_file()
+    writer.write_ti_data_to_file()
+
+    bar = tqdm(desc="Writing GGUF", total=total_bytes, unit="B", unit_scale=True)
+
+    for tensor in pre_block:
+        writer.write_tensor_data(tensor.data)
+        bar.update(tensor.n_bytes)
+
+    for tensor in block_write_order:
+        writer.write_tensor_data(tensor.data)
+        bar.update(tensor.n_bytes)
+
+    for tensor in post_block:
+        writer.write_tensor_data(tensor.data)
+        bar.update(tensor.n_bytes)
+
+    bar.close()
+    writer.close()
+
+    if verbose:
+        out_size = Path(output_path).stat().st_size / (1024**3)
+        print(f"Done. Output: {out_size:.2f} GiB")
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Build GGUF with explicit layer execution path",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+Examples:
+  # Duplicate layers 13-16 once (RYS style)
+  %(prog)s model.gguf out.gguf -p "0..16,13,14,15,16,17..39"
+
+  # Triple-pass layers 13-16
+  %(prog)s model.gguf out.gguf -p "0..16,13,14,15,16,13,14,15,16,17..39"
+
+  # Repeat just layer 13 four times
+  %(prog)s model.gguf out.gguf -p "0..12,13,13,13,13,14..39"
+
+  # Skip layer 5 entirely
+  %(prog)s model.gguf out.gguf -p "0..4,6..39"
+        """
+    )
+    parser.add_argument("input", help="Input GGUF file")
+    parser.add_argument("output", help="Output GGUF file")
+    parser.add_argument("-p", "--path", required=True,
+                        help="Layer execution path (e.g. '0..16,13,14,15,16,17..39')")
+    parser.add_argument("-v", "--verbose", action="store_true")
+    args = parser.parse_args()
+
+    layer_path = parse_layer_path(args.path)
+    print(f"Model: {args.input}")
+    print(f"Output: {args.output}")
+    print(f"Layer path ({len(layer_path)} layers): {layer_path}")
+
+    build_gguf_from_path(args.input, args.output, layer_path, args.verbose)
+
+
+if __name__ == "__main__":
+    main()
--- a/math_probe.py
+++ b/math_probe.py
@ -0,0 +1,155 @@
+#!/usr/bin/env python3
+"""
+Math Probe for RYS Layer Duplication Sweep
+
+Hard arithmetic questions where the model must guess the answer
+without chain-of-thought. Scored with Ng's partial-credit function.
+"""
+
+import json
+import math
+import random
+
+
+def calculate_score(actual, estimate):
+    """
+    Ng's partial-credit scoring function.
+    Pads shorter answers, penalizes proportionally.
+    Returns 0-1 score.
+    """
+    try:
+        actual_str = str(int(actual))
+        estimate_str = str(int(estimate))
+    except (ValueError, OverflowError):
+        return 0.0
+
+    max_length = max(len(actual_str), len(estimate_str))
+    actual_padded = actual_str.ljust(max_length, "0")
+    estimate_padded = estimate_str.ljust(max_length, "0")
+    padding_size = max_length - min(len(actual_str), len(estimate_str))
+
+    actual_int = int(actual_padded)
+    estimate_int = int(estimate_padded)
+
+    if max(actual_int, estimate_int) == 0:
+        return 0.0
+
+    relative_diff = abs(actual_int - estimate_int) / max(actual_int, estimate_int)
+    correction_factor = 1 - (padding_size / max_length)
+    score = (1 - relative_diff) * correction_factor
+
+    return max(0.0, min(score, 1.0))
+
+
+def generate_math_questions(seed=42):
+    """
+    Generate hard arithmetic questions with known answers.
+    Mix of operations to test different numeric intuition.
+    Returns list of (question_text, correct_answer) tuples.
+    """
+    rng = random.Random(seed)
+    questions = []
+
+    # Cube roots of large numbers (compute perfect cubes, ask for root)
+    for _ in range(4):
+        root = rng.randint(20000, 50000)
+        cube = root ** 3
+        questions.append((
+            f"What is the cube root of {cube}? "
+            f"Answer with just the number, no explanation.",
+            root
+        ))
+
+    # Large multiplications
+    for _ in range(4):
+        a = rng.randint(100000, 999999)
+        b = rng.randint(100000, 999999)
+        product = a * b
+        questions.append((
+            f"What is {a} multiplied by {b}? "
+            f"Answer with just the number, no explanation.",
+            product
+        ))
+
+    # Square roots of large numbers (perfect squares)
+    for _ in range(4):
+        root = rng.randint(50000, 200000)
+        square = root ** 2
+        questions.append((
+            f"What is the square root of {square}? "
+            f"Answer with just the number, no explanation.",
+            root
+        ))
+
+    # Mixed: cube root multiplied by a number
+    for _ in range(4):
+        root = rng.randint(100, 999)
+        cube = root ** 3
+        multiplier = rng.randint(10, 99)
+        answer = root * multiplier
+        questions.append((
+            f"What is the cube root of {cube}, multiplied by {multiplier}? "
+            f"Answer with just the number, no explanation.",
+            answer
+        ))
+
+    return questions
+
+
+def parse_number_from_response(response: str) -> int | None:
+    """
+    Extract the first integer from a model response.
+    Handles common LLM quirks: commas in numbers, trailing text, etc.
+    """
+    import re
+
+    # Clean up common formatting
+    text = response.strip()
+
+    # Try to find a number (possibly with commas)
+    # Match negative or positive integers, possibly with commas
+    patterns = [
+        r'[-+]?[\d,]+',  # numbers with optional commas
+    ]
+
+    for pattern in patterns:
+        matches = re.findall(pattern, text)
+        if matches:
+            # Take the first/longest match
+            num_str = max(matches, key=len)
+            num_str = num_str.replace(',', '')
+            try:
+                return int(num_str)
+            except ValueError:
+                continue
+
+    return None
+
+
+def score_math_response(question_answer: int, response: str) -> float:
+    """Score a single math response."""
+    parsed = parse_number_from_response(response)
+    if parsed is None:
+        return 0.0
+    return calculate_score(question_answer, parsed)
+
+
+# Pre-generated questions for consistency across runs
+MATH_QUESTIONS = generate_math_questions(seed=42)
+
+
+if __name__ == "__main__":
+    # Print the questions and answers for verification
+    print("Math Probe Questions:")
+    print("=" * 60)
+    for i, (q, a) in enumerate(MATH_QUESTIONS):
+        print(f"\n[{i+1}] {q}")
+        print(f"    Answer: {a}")
+
+    # Test the scoring function
+    print("\n\nScoring tests:")
+    print(f"  Exact match:     {calculate_score(4302459, 4302459):.4f}")
+    print(f"  Missing digit:   {calculate_score(4302459, 430245):.4f}")
+    print(f"  One digit off:   {calculate_score(123456789, 123356789):.4f}")
+    print(f"  Way off:         {calculate_score(4302459, 9999999):.4f}")
+    print(f"  Zero vs nonzero: {calculate_score(4302459, 0):.4f}")
--- a/multi_repeat.py
+++ b/multi_repeat.py
@ -0,0 +1,198 @@
+#!/usr/bin/env python3
+"""
+Multi-repeat GGUF surgery.
+
+Duplicates a block of layers N times total (default 3 = original + 2 copies).
+
+For (i=13, j=17, repeats=3) on a 40-layer model:
+  0..16, 13..16, 13..16, 17..39  = 48 layers total
+  The block 13-16 executes 3 times.
+
+Usage:
+    python multi_repeat.py \
+        /path/to/model.gguf \
+        /dev/shm/rys/triple_13_17.gguf \
+        -i 13 -j 17 -n 3 -v
+"""
+
+import argparse
+import re
+import sys
+from pathlib import Path
+
+import numpy as np
+from tqdm import tqdm
+
+import gguf
+from gguf import GGUFReader, GGUFWriter, GGUFValueType
+
+BLK_PATTERN = re.compile(r'^blk\.(\d+)\.(.+)$')
+
+
+def get_field_value(reader, key):
+    field = reader.get_field(key)
+    if field is None:
+        return None
+    return field.contents()
+
+
+def multi_repeat_layers(input_path, output_path, dup_start, dup_end, n_repeats, verbose=False):
+    reader = GGUFReader(input_path, 'r')
+
+    arch = get_field_value(reader, gguf.Keys.General.ARCHITECTURE)
+    block_count_key = f'{arch}.block_count'
+    orig_block_count = get_field_value(reader, block_count_key)
+
+    n_block = dup_end - dup_start
+    extra_copies = n_repeats - 1  # original pass counts as 1
+    new_block_count = orig_block_count + (n_block * extra_copies)
+
+    if verbose:
+        print(f"Architecture: {arch}")
+        print(f"Original layers: {orig_block_count}")
+        print(f"Block: layers {dup_start}..{dup_end - 1} ({n_block} layers)")
+        print(f"Repeats: {n_repeats}x (original + {extra_copies} copies)")
+        print(f"New layer count: {new_block_count}")
+
+    # Build layer map
+    # Phase 1: original 0..dup_end-1
+    # Phase 2..N: copies of dup_start..dup_end-1
+    # Phase last: original dup_end..orig-1 (shifted)
+    layer_map = {}
+
+    # Phase 1: original layers up to dup_end
+    for idx in range(dup_end):
+        layer_map[idx] = idx
+
+    # Phase 2+: extra copies
+    offset = dup_end
+    for copy in range(extra_copies):
+        for k in range(n_block):
+            layer_map[offset + k] = dup_start + k
+        offset += n_block
+
+    # Phase last: remaining original layers shifted
+    for orig_idx in range(dup_end, orig_block_count):
+        layer_map[orig_idx + (n_block * extra_copies)] = orig_idx
+
+    assert len(layer_map) == new_block_count
+
+    if verbose:
+        path = [layer_map[i] for i in range(new_block_count)]
+        print(f"Execution path ({len(path)} layers):")
+        i = 0
+        while i < len(path):
+            run_start = path[i]
+            run_end = run_start
+            j = i + 1
+            while j < len(path) and path[j] == run_end + 1:
+                run_end = path[j]
+                j += 1
+            if run_start == run_end:
+                print(f"  [{run_start}]")
+            else:
+                print(f"  [{run_start}..{run_end}]")
+            i = j
+
+    # Create writer
+    writer = GGUFWriter(output_path, arch=arch, endianess=reader.endianess)
+
+    alignment = get_field_value(reader, gguf.Keys.General.ALIGNMENT)
+    if alignment is not None:
+        writer.data_alignment = alignment
+
+    # Copy metadata
+    for field in reader.fields.values():
+        if field.name == gguf.Keys.General.ARCHITECTURE or field.name.startswith('GGUF.'):
+            continue
+        val_type = field.types[0]
+        sub_type = field.types[-1] if val_type == GGUFValueType.ARRAY else None
+        if field.name == block_count_key:
+            writer.add_key_value(field.name, new_block_count, val_type)
+        else:
+            val = field.contents()
+            if val is not None:
+                writer.add_key_value(field.name, val, val_type, sub_type=sub_type)
+
+    # Organize tensors
+    non_block_tensors = []
+    block_tensors = {}
+    for tensor in reader.tensors:
+        match = BLK_PATTERN.match(tensor.name)
+        if match:
+            layer_idx = int(match.group(1))
+            suffix = match.group(2)
+            if layer_idx not in block_tensors:
+                block_tensors[layer_idx] = []
+            block_tensors[layer_idx].append((suffix, tensor))
+        else:
+            non_block_tensors.append(tensor)
+
+    pre_block = [t for t in non_block_tensors if 'output' not in t.name]
+    post_block = [t for t in non_block_tensors if 'output' in t.name]
+
+    total_bytes = 0
+    block_write_order = []
+
+    for tensor in pre_block:
+        writer.add_tensor_info(tensor.name, tensor.data.shape, tensor.data.dtype,
+                               tensor.data.nbytes, tensor.tensor_type)
+        total_bytes += tensor.n_bytes
+
+    for new_idx in range(new_block_count):
+        orig_idx = layer_map[new_idx]
+        for suffix, tensor in block_tensors[orig_idx]:
+            new_name = f"blk.{new_idx}.{suffix}"
+            writer.add_tensor_info(new_name, tensor.data.shape, tensor.data.dtype,
+                                   tensor.data.nbytes, tensor.tensor_type)
+            total_bytes += tensor.n_bytes
+            block_write_order.append(tensor)
+
+    for tensor in post_block:
+        writer.add_tensor_info(tensor.name, tensor.data.shape, tensor.data.dtype,
+                               tensor.data.nbytes, tensor.tensor_type)
+        total_bytes += tensor.n_bytes
+
+    # Write
+    writer.write_header_to_file()
+    writer.write_kv_data_to_file()
+    writer.write_ti_data_to_file()
+
+    bar = tqdm(desc="Writing GGUF", total=total_bytes, unit="B", unit_scale=True)
+
+    for tensor in pre_block:
+        writer.write_tensor_data(tensor.data)
+        bar.update(tensor.n_bytes)
+    for tensor in block_write_order:
+        writer.write_tensor_data(tensor.data)
+        bar.update(tensor.n_bytes)
+    for tensor in post_block:
+        writer.write_tensor_data(tensor.data)
+        bar.update(tensor.n_bytes)
+
+    bar.close()
+    writer.close()
+
+    if verbose:
+        size = Path(output_path).stat().st_size / (1024**3)
+        print(f"Written: {output_path} ({size:.2f} GiB)")
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Multi-repeat layer duplication")
+    parser.add_argument("input", help="Input GGUF")
+    parser.add_argument("output", help="Output GGUF")
+    parser.add_argument("-i", "--dup-start", type=int, required=True)
+    parser.add_argument("-j", "--dup-end", type=int, required=True)
+    parser.add_argument("-n", "--repeats", type=int, default=3,
+                        help="Total times the block executes (default: 3)")
+    parser.add_argument("-v", "--verbose", action="store_true")
+    args = parser.parse_args()
+
+    multi_repeat_layers(args.input, args.output,
+                        args.dup_start, args.dup_end,
+                        args.repeats, args.verbose)
+
+
+if __name__ == "__main__":
+    main()
--- a/reasoning_probe.py
+++ b/reasoning_probe.py
@ -0,0 +1,282 @@
+#!/usr/bin/env python3
+"""
+Reasoning Probe for RYS Layer Duplication Sweep
+
+BBH-style questions: causal judgement, date understanding, logical
+deduction, navigation, plus basic math word problems.
+All answers are short (Yes/No, a letter, or a number).
+"""
+
+import re
+
+
+# ─── Causal Judgement (Yes/No) ─────────────────────────────────────
+
+CAUSAL_QUESTIONS = [
+    {
+        "prompt": (
+            "How would a typical person answer each of the following questions about causation?\n"
+            "Frank T., had an ongoing dispute with his neighbor over a stretch of land and one day "
+            "decided to shoot his neighbor in the body. Frank T. had no experience with guns, his "
+            "hand slipped on the barrel of the gun, and the shot went wild. Nonetheless, the bullet "
+            "bounced off a large boulder several feet away and hit the neighbor's body, causing "
+            "significant injury. Did Frank T. intentionally shoot his neighbor in the body?\n"
+            "Options:\n- Yes\n- No\n\n"
+            "Think step by step, then answer Yes or No."
+        ),
+        "answer": "no",
+    },
+    {
+        "prompt": (
+            "How would a typical person answer each of the following questions about causation?\n"
+            "Suzy and Billy are working on a project that is very important for our nation's security. "
+            "The boss tells them both: \"Be sure that you are here at exactly 9 am. It is absolutely "
+            "essential that you arrive at that time.\" Both Billy and Suzy arrive at 9 am. As it happens, "
+            "there was a motion detector installed in the room where they arrived. The motion detector "
+            "was set up to be triggered if at least one person appeared in the room at the same time. "
+            "So the motion detector went off. Did Billy cause the motion detector to go off?\n"
+            "Options:\n- Yes\n- No\n\n"
+            "Think step by step, then answer Yes or No."
+        ),
+        "answer": "yes",
+    },
+    {
+        "prompt": (
+            "How would a typical person answer each of the following questions about causation?\n"
+            "George and his sister Lena reunite at their parents' house for Thanksgiving. Whereas "
+            "George just got into medical school, Lena is unhappy in her marriage and recently lost "
+            "her job. Over the course of the day, George and Lena get into a number of heated arguments. "
+            "Later in the afternoon they play a game of darts. They split the first two games, and the "
+            "third game is close until the end. Who will win comes down to George's last shot. If he "
+            "hits a high point region, he wins; if he hits a low point region, Lena wins. George thinks "
+            "of the difficult time Lena is having, and he really wants to let her win. He aims the dart "
+            "at the low point region. He sets up his shot and the dart lands in the low point region. "
+            "After his shot, Lena wins the game and is very happy. Did George hit the low point region "
+            "intentionally?\n"
+            "Options:\n- Yes\n- No\n\n"
+            "Think step by step, then answer Yes or No."
+        ),
+        "answer": "yes",
+    },
+]
+
+# ─── Date Understanding (multiple choice letter) ──────────────────
+
+DATE_QUESTIONS = [
+    {
+        "prompt": (
+            "Today is Christmas Eve of 1937. What is the date 10 days ago in MM/DD/YYYY?\n"
+            "Options:\n"
+            "(A) 12/14/2026\n(B) 12/14/1950\n(C) 12/14/2007\n"
+            "(D) 12/14/1937\n(E) 07/14/1938\n(F) 12/14/1988\n\n"
+            "Think step by step, then give your answer as a single letter."
+        ),
+        "answer": "d",
+    },
+    {
+        "prompt": (
+            "Tomorrow is 11/12/2019. What is the date one year ago from today in MM/DD/YYYY?\n"
+            "Options:\n"
+            "(A) 09/04/2018\n(B) 11/11/2018\n(C) 08/25/2018\n"
+            "(D) 11/02/2018\n(E) 11/04/2018\n\n"
+            "Think step by step, then give your answer as a single letter."
+        ),
+        "answer": "b",
+    },
+    {
+        "prompt": (
+            "Jane and John married on Jan 2, 1958. It is their 5-year anniversary today. "
+            "What is the date tomorrow in MM/DD/YYYY?\n"
+            "Options:\n"
+            "(A) 01/11/1961\n(B) 01/03/1963\n(C) 01/18/1961\n"
+            "(D) 10/14/1960\n(E) 01/03/1982\n(F) 12/03/1960\n\n"
+            "Think step by step, then give your answer as a single letter."
+        ),
+        "answer": "b",
+    },
+]
+
+# ─── Logical Deduction (multiple choice letter) ───────────────────
+
+LOGIC_QUESTIONS = [
+    {
+        "prompt": (
+            "The following paragraphs each describe a set of three objects arranged in a fixed order. "
+            "The statements are logically consistent within each paragraph.\n"
+            "In a golf tournament, there were three golfers: Amy, Eli, and Eve. "
+            "Eve finished above Amy. Eli finished below Amy.\n"
+            "Options:\n"
+            "(A) Amy finished last\n(B) Eli finished last\n(C) Eve finished last\n\n"
+            "Think step by step, then give your answer as a single letter."
+        ),
+        "answer": "b",
+    },
+    {
+        "prompt": (
+            "The following paragraphs each describe a set of three objects arranged in a fixed order. "
+            "The statements are logically consistent within each paragraph.\n"
+            "On a shelf, there are three books: a white book, a green book, and an orange book. "
+            "The green book is to the right of the white book. The orange book is the rightmost.\n"
+            "Options:\n"
+            "(A) The white book is the leftmost\n(B) The green book is the leftmost\n"
+            "(C) The orange book is the leftmost\n\n"
+            "Think step by step, then give your answer as a single letter."
+        ),
+        "answer": "a",
+    },
+    {
+        "prompt": (
+            "The following paragraphs each describe a set of three objects arranged in a fixed order. "
+            "The statements are logically consistent within each paragraph.\n"
+            "On a shelf, there are three books: a red book, a gray book, and a white book. "
+            "The white book is to the left of the gray book. The red book is the second from the left.\n"
+            "Options:\n"
+            "(A) The red book is the leftmost\n(B) The gray book is the leftmost\n"
+            "(C) The white book is the leftmost\n\n"
+            "Think step by step, then give your answer as a single letter."
+        ),
+        "answer": "c",
+    },
+]
+
+# ─── Navigation (Yes/No) ──────────────────────────────────────────
+
+NAV_QUESTIONS = [
+    {
+        "prompt": (
+            "If you follow these instructions, do you return to the starting point? "
+            "Turn left. Turn around. Turn left. Take 7 steps. Take 2 steps. Take 4 steps. Take 8 steps.\n"
+            "Options:\n- Yes\n- No\n\n"
+            "Think step by step, then answer Yes or No."
+        ),
+        "answer": "no",
+    },
+    {
+        "prompt": (
+            "If you follow these instructions, do you return to the starting point? "
+            "Turn around. Take 1 step. Take 6 steps. Turn around. Take 6 steps. Take 9 steps. Take 1 step.\n"
+            "Options:\n- Yes\n- No\n\n"
+            "Think step by step, then answer Yes or No."
+        ),
+        "answer": "no",
+    },
+    {
+        "prompt": (
+            "If you follow these instructions, do you return to the starting point? "
+            "Always face forward. Take 2 steps right. Take 9 steps left. Take 7 steps right.\n"
+            "Options:\n- Yes\n- No\n\n"
+            "Think step by step, then answer Yes or No."
+        ),
+        "answer": "yes",
+    },
+]
+
+# ─── GSM8K Word Problems (number answer) ──────────────────────────
+
+GSM_QUESTIONS = [
+    {
+        "prompt": (
+            "There are 15 trees in the grove. Grove workers will plant trees in the grove today. "
+            "After they are done, there will be 21 trees. How many trees did the grove workers plant today?\n\n"
+            "Solve step by step. End with 'The answer is [NUMBER]'."
+        ),
+        "answer": "6",
+    },
+    {
+        "prompt": (
+            "If there are 3 cars in the parking lot and 2 more cars arrive, how many cars are in the parking lot?\n\n"
+            "Solve step by step. End with 'The answer is [NUMBER]'."
+        ),
+        "answer": "5",
+    },
+    {
+        "prompt": (
+            "Leah had 32 chocolates and her sister had 42. If they ate 35, how many pieces do they have left in total?\n\n"
+            "Solve step by step. End with 'The answer is [NUMBER]'."
+        ),
+        "answer": "39",
+    },
+    {
+        "prompt": (
+            "Jason had 20 lollipops. He gave Denny some lollipops. Now Jason has 12 lollipops. "
+            "How many lollipops did Jason give to Denny?\n\n"
+            "Solve step by step. End with 'The answer is [NUMBER]'."
+        ),
+        "answer": "8",
+    },
+    {
+        "prompt": (
+            "Shawn has five toys. For Christmas, he got two toys each from his mom and dad. "
+            "How many toys does he have now?\n\n"
+            "Solve step by step. End with 'The answer is [NUMBER]'."
+        ),
+        "answer": "9",
+    },
+]
+
+# ─── All questions with types ─────────────────────────────────────
+
+ALL_REASONING = []
+for q in CAUSAL_QUESTIONS:
+    ALL_REASONING.append({**q, "type": "causal"})
+for q in DATE_QUESTIONS:
+    ALL_REASONING.append({**q, "type": "date"})
+for q in LOGIC_QUESTIONS:
+    ALL_REASONING.append({**q, "type": "logic"})
+for q in NAV_QUESTIONS:
+    ALL_REASONING.append({**q, "type": "nav"})
+for q in GSM_QUESTIONS:
+    ALL_REASONING.append({**q, "type": "gsm"})
+
+# Alias for sweep.py import
+REASONING_QUESTIONS = ALL_REASONING
+
+
+def extract_final_answer(response: str) -> str:
+    """Extract the final answer from a CoT response."""
+    match = re.search(r'the answer is\s+(.+?)[\.\!\n\r]', response, re.IGNORECASE)
+    if match:
+        return match.group(1).strip()
+    match = re.search(r'\b(yes|no)\b', response.split('\n')[-1], re.IGNORECASE)
+    if match:
+        return match.group(1).strip()
+    match = re.search(r'\(?([A-F])\)?', response.split('\n')[-1])
+    if match:
+        return match.group(1).strip()
+    return response.strip().split('\n')[-1].strip()
+
+
+def score_reasoning_response(question: dict, response: str) -> float:
+    """Score a reasoning question. Returns 0 or 1."""
+    if response is None:
+        return 0.0
+
+    final = extract_final_answer(response).lower().strip()
+    correct = question["answer"].lower().strip()
+
+    final = re.sub(r'[^a-z0-9]', '', final)
+    correct = re.sub(r'[^a-z0-9]', '', correct)
+
+    if final == correct:
+        return 1.0
+
+    if correct in ("yes", "no"):
+        last_line = response.strip().split('\n')[-1].lower()
+        if correct in last_line and (correct == "yes") != ("no" in last_line):
+            return 1.0
+
+    return 0.0
+
+
+if __name__ == "__main__":
+    print(f"Reasoning Probe: {len(ALL_REASONING)} questions")
+    print(f"  Causal:  {len(CAUSAL_QUESTIONS)}")
+    print(f"  Date:    {len(DATE_QUESTIONS)}")
+    print(f"  Logic:   {len(LOGIC_QUESTIONS)}")
+    print(f"  Nav:     {len(NAV_QUESTIONS)}")
+    print(f"  GSM:     {len(GSM_QUESTIONS)}")
+
+    print("\nSample questions:")
+    for q in ALL_REASONING[:2]:
+        print(f"\n  [{q['type']}] answer={q['answer']}")
+        print(f"  {q['prompt'][:80]}...")
--- a/sweep.py
+++ b/sweep.py
@ -0,0 +1,447 @@
+#!/usr/bin/env python3
+"""
+RYS Layer Duplication Sweep
+
+Orchestrates the search for optimal layer duplication configuration:
+1. Generate modified GGUF with duplicated layers
+2. Start llama-server with the modified model
+3. Run math + EQ probes
+4. Score and record results
+5. Print live results table
+6. Kill server, repeat
+
+Usage:
+    python sweep.py \
+        --model /path/to/model.gguf \
+        --llama-server /path/to/llama-server \
+        --tmpdir /dev/shm/rys \
+        --results results.jsonl
+
+The sweep strategy:
+    Pass 1: 8-layer blocks at stride 4 across the middle
+    Pass 2: Refine within the hot zone with smaller blocks
+"""
+
+import argparse
+import json
+import os
+import signal
+import subprocess
+import sys
+import time
+from datetime import datetime
+from pathlib import Path
+
+import requests
+
+from gguf_surgery import duplicate_layers
+from math_probe import MATH_QUESTIONS, score_math_response
+from eq_probe import EQ_SCENARIOS, build_eq_prompt, parse_eq_response, score_eq_response
+from reasoning_probe import REASONING_QUESTIONS, score_reasoning_response
+
+
+# Server config
+DEFAULT_PORT = 8099
+SERVER_STARTUP_TIMEOUT = 120  # seconds
+REQUEST_TIMEOUT = 60  # seconds per completion
+
+
+def wait_for_server(port: int, timeout: int = SERVER_STARTUP_TIMEOUT) -> bool:
+    """Wait for llama-server to be ready."""
+    url = f"http://127.0.0.1:{port}/health"
+    start = time.time()
+    while time.time() - start < timeout:
+        try:
+            r = requests.get(url, timeout=2)
+            if r.status_code == 200:
+                data = r.json()
+                if data.get("status") == "ok":
+                    return True
+        except (requests.ConnectionError, requests.Timeout):
+            pass
+        time.sleep(1)
+    return False
+
+
+def start_server(llama_server_path: str, model_path: str, port: int,
+                 extra_args: list[str] = None) -> subprocess.Popen:
+    """Start llama-server and return the process handle."""
+    cmd = [
+        llama_server_path,
+        "-m", model_path,
+        "--port", str(port),
+        "-c", "4096",           # small context for probe eval
+        "-ngl", "99",           # offload all layers to GPU
+        "--flash-attn", "on",
+        "--cache-type-k", "q8_0",
+        "--cache-type-v", "q8_0",
+        "--no-warmup",
+        "-np", "1",             # single slot
+    ]
+    if extra_args:
+        cmd.extend(extra_args)
+
+    print(f"  [CMD] {' '.join(cmd)}", flush=True)
+
+    # Let server output go to a log file so we can debug without pipe deadlocks
+    log_path = Path(f"/tmp/rys_server_{port}.log")
+    log_file = open(log_path, "w")
+    proc = subprocess.Popen(cmd, stdout=log_file, stderr=subprocess.STDOUT)
+    proc._log_file = log_file  # keep reference so it doesn't get GC'd
+    proc._log_path = log_path
+    print(f"  [PID] Server started as PID {proc.pid}, log: {log_path}", flush=True)
+    return proc
+
+
+def stop_server(proc: subprocess.Popen):
+    """Gracefully stop the server."""
+    if proc.poll() is None:
+        proc.terminate()
+        try:
+            proc.wait(timeout=10)
+        except subprocess.TimeoutExpired:
+            proc.kill()
+            proc.wait()
+    # Close the log file
+    if hasattr(proc, '_log_file'):
+        proc._log_file.close()
+
+
+def dump_server_log(proc: subprocess.Popen, tail_lines: int = 30):
+    """Print the last N lines of the server log for debugging."""
+    if hasattr(proc, '_log_path') and proc._log_path.exists():
+        lines = proc._log_path.read_text().splitlines()
+        print(f"  --- Server log (last {tail_lines} lines) ---", file=sys.stderr)
+        for line in lines[-tail_lines:]:
+            print(f"  | {line}", file=sys.stderr)
+        print(f"  --- End server log ---", file=sys.stderr)
+
+
+def query_model(prompt: str, port: int, max_tokens: int = 64) -> str | None:
+    """Send a completion request to llama-server."""
+    url = f"http://127.0.0.1:{port}/v1/chat/completions"
+
+    payload = {
+        "model": "test",
+        "messages": [
+            {"role": "user", "content": prompt}
+        ],
+        "max_tokens": max_tokens,
+        "temperature": 0.0,
+    }
+
+    try:
+        r = requests.post(url, json=payload, timeout=REQUEST_TIMEOUT)
+        if r.status_code == 200:
+            data = r.json()
+            return data["choices"][0]["message"]["content"]
+        else:
+            print(f"  [WARN] Server returned {r.status_code}", file=sys.stderr)
+            return None
+    except (requests.ConnectionError, requests.Timeout) as e:
+        print(f"  [WARN] Request failed: {e}", file=sys.stderr)
+        return None
+
+
+def run_math_probe(port: int) -> float:
+    """Run all math questions and return average score (0-1)."""
+    scores = []
+    for question, answer in MATH_QUESTIONS:
+        response = query_model(question, port, max_tokens=48)
+        if response is not None:
+            score = score_math_response(answer, response)
+            scores.append(score)
+        else:
+            scores.append(0.0)
+    return sum(scores) / len(scores) if scores else 0.0
+
+
+def run_eq_probe(port: int) -> float:
+    """Run all EQ scenarios and return average score (0-100)."""
+    scores = []
+    for scenario in EQ_SCENARIOS:
+        prompt = build_eq_prompt(scenario)
+        response = query_model(prompt, port, max_tokens=48)
+        if response is not None:
+            predicted = parse_eq_response(response, len(scenario["emotions"]))
+            score = score_eq_response(scenario["reference"], predicted)
+            scores.append(score)
+        else:
+            scores.append(0.0)
+    return sum(scores) / len(scores) if scores else 0.0
+
+
+def run_reasoning_probe(port: int) -> dict:
+    """Run all reasoning questions, return scores by category and overall."""
+    by_category = {}
+    for q in REASONING_QUESTIONS:
+        cat = q["type"]
+        if cat not in by_category:
+            by_category[cat] = []
+        response = query_model(q["prompt"], port, max_tokens=512)
+        score = score_reasoning_response(q, response)
+        by_category[cat].append(score)
+
+    # Per-category averages
+    cat_scores = {}
+    for cat, scores in by_category.items():
+        cat_scores[cat] = sum(scores) / len(scores) if scores else 0.0
+
+    # Overall reasoning score (0-1)
+    all_scores = [s for scores in by_category.values() for s in scores]
+    overall = sum(all_scores) / len(all_scores) if all_scores else 0.0
+
+    return {"categories": cat_scores, "overall": overall}
+
+
+def run_evaluation(port: int) -> dict:
+    """Run all probes and return results."""
+    math_score = run_math_probe(port)
+    eq_score = run_eq_probe(port)
+    reasoning = run_reasoning_probe(port)
+    return {
+        "math_score": math_score,
+        "eq_score": eq_score,
+        "reasoning_score": reasoning["overall"],
+        "reasoning_cats": reasoning["categories"],
+    }
+
+
+def print_results_table(results: list[dict], baseline: dict | None = None):
+    """Print a live-updating results table."""
+    print("\n" + "=" * 105)
+    print(f"{'Config':>12} {'Layers':>8} {'Math':>8} {'EQ':>8} {'Reason':>8} "
+          f"{'Math Δ':>8} {'EQ Δ':>8} {'Reas Δ':>8} {'Combined Δ':>11}")
+    print("-" * 105)
+
+    if baseline:
+        brs = baseline.get('reasoning_score', 0)
+        print(f"{'BASELINE':>12} {'0':>8} "
+              f"{baseline['math_score']:>8.4f} {baseline['eq_score']:>8.2f} {brs:>8.2%} "
+              f"{'---':>8} {'---':>8} {'---':>8} {'---':>11}")
+        print("-" * 105)
+
+    for r in results:
+        config = f"({r['dup_start']},{r['dup_end']})"
+        n_dup = r['dup_end'] - r['dup_start']
+        rs = r.get('reasoning_score', 0)
+
+        if baseline:
+            math_delta = r['math_score'] - baseline['math_score']
+            eq_delta = r['eq_score'] - baseline['eq_score']
+            reas_delta = rs - baseline.get('reasoning_score', 0)
+            # Combined: weight EQ and reasoning more than math
+            combined = eq_delta + (reas_delta * 100)
+            math_d = f"{math_delta:>+8.4f}"
+            eq_d = f"{eq_delta:>+8.2f}"
+            reas_d = f"{reas_delta:>+8.2%}"
+            comb_d = f"{combined:>+11.2f}"
+        else:
+            math_d = eq_d = reas_d = comb_d = "---"
+
+        print(f"{config:>12} {n_dup:>8} "
+              f"{r['math_score']:>8.4f} {r['eq_score']:>8.2f} {rs:>8.2%} "
+              f"{math_d} {eq_d} {reas_d} {comb_d}")
+
+    print("=" * 105)
+    sys.stdout.flush()
+
+
+def generate_sweep_configs(n_layers: int, block_sizes: list[int],
+                           start_min: int = 4, start_max: int = None,
+                           stride: int = 4) -> list[tuple[int, int]]:
+    """
+    Generate (dup_start, dup_end) configs for the sweep.
+
+    Args:
+        n_layers: Total layers in the model
+        block_sizes: List of block sizes to try (e.g., [8])
+        start_min: Earliest layer to start duplication
+        start_max: Latest layer to start (default: n_layers - max(block_sizes) - 4)
+        stride: Step between start positions
+    """
+    if start_max is None:
+        start_max = n_layers - max(block_sizes) - 4
+
+    configs = []
+    for bs in block_sizes:
+        for start in range(start_min, start_max + 1, stride):
+            end = start + bs
+            if end <= n_layers:
+                configs.append((start, end))
+
+    return configs
+
+
+def main():
+    parser = argparse.ArgumentParser(description="RYS Layer Duplication Sweep")
+    parser.add_argument("--model", required=True, help="Path to input GGUF model")
+    parser.add_argument("--llama-server", required=True, help="Path to llama-server binary")
+    parser.add_argument("--tmpdir", default="/dev/shm/rys",
+                        help="Temp directory for modified GGUFs (use tmpfs/RAM)")
+    parser.add_argument("--results", default="rys_results.jsonl",
+                        help="Output results file (JSONL)")
+    parser.add_argument("--port", type=int, default=DEFAULT_PORT)
+    parser.add_argument("--block-sizes", type=int, nargs="+", default=[8],
+                        help="Block sizes to sweep (default: 8)")
+    parser.add_argument("--stride", type=int, default=4,
+                        help="Stride between start positions (default: 4)")
+    parser.add_argument("--start-min", type=int, default=4,
+                        help="Earliest layer to start duplication")
+    parser.add_argument("--start-max", type=int, default=None,
+                        help="Latest layer to start duplication")
+    parser.add_argument("--skip-baseline", action="store_true",
+                        help="Skip baseline run (use if already in results)")
+    parser.add_argument("--server-args", nargs=argparse.REMAINDER, default=[],
+                        help="Extra args to pass to llama-server (must be last)")
+    args = parser.parse_args()
+
+    model_path = Path(args.model).resolve()
+    tmpdir = Path(args.tmpdir)
+    tmpdir.mkdir(parents=True, exist_ok=True)
+
+    results_path = Path(args.results)
+    results = []
+    baseline = None
+
+    # Load existing results if resuming
+    if results_path.exists():
+        with open(results_path) as f:
+            for line in f:
+                line = line.strip()
+                if line:
+                    entry = json.loads(line)
+                    if entry.get("is_baseline"):
+                        baseline = entry
+                    else:
+                        results.append(entry)
+        print(f"Loaded {len(results)} existing results + baseline={baseline is not None}")
+
+    # Run baseline (unmodified model)
+    if not args.skip_baseline and baseline is None:
+        print("\n>>> Running BASELINE evaluation...")
+        proc = start_server(args.llama_server, str(model_path), args.port, args.server_args)
+        try:
+            if not wait_for_server(args.port):
+                print("ERROR: Server failed to start for baseline", file=sys.stderr)
+                dump_server_log(proc)
+                stop_server(proc)
+                sys.exit(1)
+
+            print("  Server ready. Running probes...")
+            eval_result = run_evaluation(args.port)
+            baseline = {
+                "is_baseline": True,
+                "dup_start": -1,
+                "dup_end": -1,
+                "math_score": eval_result["math_score"],
+                "eq_score": eval_result["eq_score"],
+                "reasoning_score": eval_result["reasoning_score"],
+                "reasoning_cats": eval_result.get("reasoning_cats", {}),
+                "timestamp": datetime.now().isoformat(),
+            }
+
+            with open(results_path, "a") as f:
+                f.write(json.dumps(baseline) + "\n")
+
+            brs = baseline['reasoning_score']
+            print(f"  Baseline: math={baseline['math_score']:.4f} eq={baseline['eq_score']:.2f} reasoning={brs:.2%}")
+        finally:
+            stop_server(proc)
+
+    # Get model layer count from the GGUF metadata
+    from gguf import GGUFReader
+    reader = GGUFReader(str(model_path), 'r')
+    arch_field = reader.get_field('general.architecture')
+    arch = arch_field.contents()
+    block_count_field = reader.get_field(f'{arch}.block_count')
+    n_layers = block_count_field.contents()
+    print(f"\nModel: {model_path.name}")
+    print(f"Architecture: {arch}, Layers: {n_layers}")
+
+    # Generate sweep configurations
+    configs = generate_sweep_configs(
+        n_layers=n_layers,
+        block_sizes=args.block_sizes,
+        start_min=args.start_min,
+        start_max=args.start_max,
+        stride=args.stride,
+    )
+
+    # Filter out already-completed configs
+    done = {(r["dup_start"], r["dup_end"]) for r in results}
+    configs = [(s, e) for s, e in configs if (s, e) not in done]
+
+    print(f"Configs to test: {len(configs)}")
+    if configs:
+        print(f"  Range: ({configs[0][0]},{configs[0][1]}) to ({configs[-1][0]},{configs[-1][1]})")
+
+    print_results_table(results, baseline)
+
+    for idx, (dup_start, dup_end) in enumerate(configs):
+        n_dup = dup_end - dup_start
+        config_str = f"({dup_start},{dup_end})"
+        print(f"\n>>> [{idx+1}/{len(configs)}] Testing config {config_str} "
+              f"(+{n_dup} layers)...")
+
+        # Generate modified GGUF
+        modified_path = tmpdir / f"rys_{dup_start}_{dup_end}.gguf"
+        print(f"  Generating modified GGUF...")
+        try:
+            duplicate_layers(
+                str(model_path), str(modified_path),
+                dup_start, dup_end, verbose=False
+            )
+        except Exception as e:
+            print(f"  ERROR generating GGUF: {e}", file=sys.stderr)
+            continue
+
+        # Start server with modified model
+        print(f"  Starting server...")
+        proc = start_server(
+            args.llama_server, str(modified_path), args.port, args.server_args
+        )
+
+        try:
+            if not wait_for_server(args.port):
+                print(f"  ERROR: Server failed to start for {config_str}", file=sys.stderr)
+                dump_server_log(proc)
+                print(f"  Check server log above for details.", file=sys.stderr)
+                continue
+
+            print(f"  Server ready. Running probes...")
+            eval_result = run_evaluation(args.port)
+
+            entry = {
+                "dup_start": dup_start,
+                "dup_end": dup_end,
+                "n_dup_layers": n_dup,
+                "math_score": eval_result["math_score"],
+                "eq_score": eval_result["eq_score"],
+                "reasoning_score": eval_result["reasoning_score"],
+                "reasoning_cats": eval_result.get("reasoning_cats", {}),
+                "timestamp": datetime.now().isoformat(),
+            }
+
+            results.append(entry)
+
+            # Append to results file
+            with open(results_path, "a") as f:
+                f.write(json.dumps(entry) + "\n")
+
+            print_results_table(results, baseline)
+
+        finally:
+            stop_server(proc)
+
+            # Clean up modified GGUF to free tmpfs space
+            if modified_path.exists():
+                modified_path.unlink()
+                print(f"  Cleaned up {modified_path.name}")
+
+    print("\n\nSweep complete!")
+    print_results_table(results, baseline)
+
+
+if __name__ == "__main__":
+    main()
--- a/visualize.py
+++ b/visualize.py
@ -0,0 +1,114 @@
+#!/usr/bin/env python3
+"""
+Visualize RYS sweep results.
+Reads the JSONL output from sweep.py, prints ranked table and bar chart.
+"""
+
+import json
+import sys
+
+
+def load_results(path: str):
+    baseline = None
+    results = []
+    with open(path) as f:
+        for line in f:
+            line = line.strip()
+            if not line:
+                continue
+            entry = json.loads(line)
+            if entry.get("is_baseline"):
+                baseline = entry
+            else:
+                results.append(entry)
+    return baseline, results
+
+
+def print_ranked(baseline, results):
+    """Print results ranked by combined delta."""
+    if not baseline:
+        print("No baseline found in results!")
+        return
+
+    for r in results:
+        math_delta = r["math_score"] - baseline["math_score"]
+        eq_delta = r["eq_score"] - baseline["eq_score"]
+        r["math_delta"] = math_delta
+        r["eq_delta"] = eq_delta
+        r["combined"] = (math_delta * 100) + eq_delta
+
+    ranked = sorted(results, key=lambda x: x["combined"], reverse=True)
+
+    print(f"\nBaseline: math={baseline['math_score']:.4f}  eq={baseline['eq_score']:.2f}")
+    print()
+    print(f"{'Rank':>4} {'Config':>12} {'Dup':>4} "
+          f"{'Math':>8} {'EQ':>8} "
+          f"{'Math Δ':>9} {'EQ Δ':>8} {'Combined':>10}")
+    print("-" * 80)
+
+    for i, r in enumerate(ranked):
+        config = f"({r['dup_start']},{r['dup_end']})"
+        n_dup = r['dup_end'] - r['dup_start']
+
+        if r["combined"] > 0:
+            marker = "+"
+        elif r["combined"] < -5:
+            marker = "!"
+        else:
+            marker = " "
+
+        print(f"{i+1:>4} {config:>12} {n_dup:>4} "
+              f"{r['math_score']:>8.4f} {r['eq_score']:>8.2f} "
+              f"{r['math_delta']:>+9.4f} {r['eq_delta']:>+8.2f} "
+              f"{r['combined']:>+10.2f} {marker}")
+
+    if ranked:
+        best = ranked[0]
+        worst = ranked[-1]
+        print()
+        print(f"Best:  ({best['dup_start']},{best['dup_end']})  combined={best['combined']:+.2f}")
+        print(f"Worst: ({worst['dup_start']},{worst['dup_end']})  combined={worst['combined']:+.2f}")
+
+
+def print_bar_chart(baseline, results):
+    """Print a horizontal bar chart sorted by start position."""
+    if not baseline or not results:
+        return
+
+    for r in results:
+        math_delta = r["math_score"] - baseline["math_score"]
+        eq_delta = r["eq_score"] - baseline["eq_score"]
+        r["combined"] = (math_delta * 100) + eq_delta
+
+    ordered = sorted(results, key=lambda x: x["dup_start"])
+
+    max_val = max(abs(r["combined"]) for r in ordered)
+    if max_val == 0:
+        max_val = 1
+
+    half = 20
+    print(f"\nCombined delta (baseline = |):")
+    print(f"{'Config':>12} {'negative':<{half}}|{'positive':<{half}}")
+
+    for r in ordered:
+        config = f"({r['dup_start']},{r['dup_end']})"
+        val = r["combined"]
+        bar_len = int(abs(val) / max_val * half)
+
+        if val >= 0:
+            bar = " " * half + "|" + "#" * bar_len
+        else:
+            pad = half - bar_len
+            bar = " " * pad + "=" * bar_len + "|"
+
+        print(f"{config:>12} {bar} {val:+.2f}")
+
+
+if __name__ == "__main__":
+    if len(sys.argv) < 2:
+        print(f"Usage: {sys.argv[0]} <results.jsonl>")
+        sys.exit(1)
+
+    baseline, results = load_results(sys.argv[1])
+    print_ranked(baseline, results)
+    print_bar_chart(baseline, results)