mirror of
https://github.com/elicpeter/nyx.git
synced 2026-06-09 19:45:13 +02:00
30 lines
998 B
TOML
30 lines
998 B
TOML
# Eval corpus budget.
|
|
#
|
|
# `report.py` enforces these values when `run.sh` or `run_full.sh` pass
|
|
# `--budget`. Each (cap, lang) cell uses the default row unless a specific
|
|
# override appears below.
|
|
#
|
|
# Wall-clock cost is measured separately from this per-cell budget.
|
|
#
|
|
# Schema:
|
|
#
|
|
# [default]
|
|
# unsupported_rate = 0.20 # max(Unsupported / total) per cell
|
|
# false_confirmed_rate = 0.02 # max(wrong / Confirmed) per cap
|
|
# repro_stability = 0.95 # min(stable / Confirmed) per cell
|
|
# ratchet_deadline = "..." # informational; cells already at headline
|
|
#
|
|
# [[cell]]
|
|
# cap = "..."
|
|
# lang = "..."
|
|
# <overrides as above>
|
|
#
|
|
# `cap` matches `tabulate.py`'s _CAP_BIT_TABLE / _CAP_RULE_TABLE labels.
|
|
# `lang` matches the ext_map values (`python`, `javascript`, …).
|
|
# A wildcard `"*"` matches any cell that does not have an exact entry.
|
|
|
|
[default]
|
|
unsupported_rate = 0.20
|
|
false_confirmed_rate = 0.02
|
|
repro_stability = 0.95
|
|
ratchet_deadline = "2026-05-15"
|