nyx/tests/eval_corpus/budget.toml

37 lines
1.6 KiB
TOML
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# Phase 31: ratchet values set to the headline targets.
#
# These are the published acceptance numbers behind the dynamic-verification
# overhaul (see `docs/dynamic.md` "Headline metrics"). The ratchet schedule
# from Phase 29 collapsed into a single target row: every (cap, lang) cell is
# now gated against the same headline thresholds. Per-cell carve-outs were
# dropped in Phase 31; if a cell is still wider than these numbers in practice
# it shows up as a per-cell `FAIL` in `report.py` and as a gate-1 failure in
# `scripts/m7_ship_gate.sh`, which is the intended forcing function for the
# remaining engine follow-ups tracked in `.pitboss/play/deferred.md`.
#
# Wall-clock cost (≤ 2× static-only) is enforced separately by Gate 3 of
# `scripts/m7_ship_gate.sh` against `benches/fixtures/`; it is not a per-cell
# budget knob and has no entry in this file.
#
# Schema:
#
# [default]
# unsupported_rate = 0.20 # max(Unsupported / total) per cell
# false_confirmed_rate = 0.02 # max(wrong / Confirmed) per cap
# repro_stability = 0.95 # min(stable / Confirmed) per cell
# ratchet_deadline = "..." # informational; cells already at headline
#
# [[cell]]
# cap = "..."
# lang = "..."
# <overrides as above>
#
# `cap` matches `tabulate.py`'s _CAP_BIT_TABLE / _CAP_RULE_TABLE labels.
# `lang` matches the ext_map values (`python`, `javascript`, …).
# A wildcard `"*"` matches any cell that does not have an exact entry.
[default]
unsupported_rate = 0.20
false_confirmed_rate = 0.02
repro_stability = 0.95
ratchet_deadline = "2026-05-15"