chore: remove stale scheduled_tasks.lock file

This commit is contained in:
elipeter 2026-05-31 21:18:38 -05:00
parent a5929bb169
commit 2a4d49b68b
12 changed files with 1059 additions and 21 deletions

View file

@ -6,6 +6,8 @@
# scripts/m7_ship_gate.sh # every gate
# scripts/m7_ship_gate.sh --gates 3,6 # only gates 3 + 6
# scripts/m7_ship_gate.sh --sets owasp # Java OWASP corpus only
# scripts/m7_ship_gate.sh --sets jsts # NodeGoat + Juice Shop only
# scripts/m7_ship_gate.sh --sets nodegoat # one JS/TS corpus only
#
# Gate map (kept in sync with .pitboss/play/plan.md track M.7):
# Gate 1: Static-only scan is green on `tests/benchmark/corpus`.
@ -26,13 +28,22 @@
# R.0) added the precision/recall/budget ratchet. The corpus is
# *not* checked into the repo; the gate skips with a clear message
# when `NYX_OWASP_CORPUS` does not point at a real checkout.
# Gate 7: JS/TS real-corpus acceptance (Track R.1 / Phase 28). OWASP
# NodeGoat (Express, .js) + OWASP Juice Shop (TypeScript, .ts)
# `--verify` against the committed ground truth. Same shape as
# Gate 6: wall-clock budget + the per-(cap,lang) budget in
# tests/eval_corpus/budget.toml hard-enforced; per-cap
# confirmed-rate / precision / recall published report-only
# (NYX_JSTS_FLOOR_CAPS empty by default). Each corpus row
# self-skips unless its NYX_NODEGOAT_CORPUS / NYX_JUICESHOP_CORPUS
# points at a real checkout.
set -euo pipefail
REPO_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
cd "${REPO_ROOT}"
GATES="1,2,3,4,5,6"
GATES="1,2,3,4,5,6,7"
SETS=""
while [[ $# -gt 0 ]]; do
@ -56,10 +67,15 @@ while [[ $# -gt 0 ]]; do
esac
done
# When `--sets owasp` is passed CI only wants Gate 6.
if [[ "${SETS}" == "owasp" ]]; then
GATES="6"
fi
# `--sets` lets CI run a single real-corpus gate. `owasp` -> Gate 6;
# `jsts` (both JS/TS corpora) / `nodegoat` / `juiceshop` -> Gate 7, with the
# corpus name passed through so Gate 7 runs only the requested row.
case "${SETS}" in
owasp) GATES="6" ;;
jsts|nodegoat|juiceshop) GATES="7" ;;
"") ;; # no --sets: run the requested --gates
*) echo "unknown --sets: ${SETS}" >&2; exit 2 ;;
esac
want_gate() {
[[ ",${GATES}," == *",$1,"* ]]
@ -292,6 +308,162 @@ PY
echo " PASS"
}
# ── Gate 7: JS/TS real-corpus acceptance (NodeGoat + Juice Shop) ──────────────
# Phase 28 (Track R.1) mirror of Gate 6 for the JS/TS corpora. Same
# wall-clock split (10 min dev reference / 15 min CI) and the same
# report-only-by-default floor policy: NYX_JSTS_FLOOR_CAPS is empty, so the
# per-cap confirmed-rate / precision / recall numbers are published but gate
# nothing, while the per-(cap,lang) budget (unsupported_rate,
# false_confirmed_rate) is hard-enforced. Promote a cap into the floor set
# once it starts Confirming end to end.
GATE7_WALLCLOCK_BUDGET="${NYX_JSTS_WALLCLOCK_BUDGET_SECONDS:-900}"
GATE7_CONFIRMED_RATE_TARGET="${NYX_JSTS_CONFIRMED_RATE_TARGET:-0.40}"
GATE7_PRECISION_TARGET="${NYX_JSTS_PRECISION_TARGET:-0.85}"
GATE7_RECALL_TARGET="${NYX_JSTS_RECALL_TARGET:-0.40}"
GATE7_FLOOR_CAPS="${NYX_JSTS_FLOOR_CAPS:-}"
GATE7_BUDGET="${NYX_JSTS_BUDGET:-${REPO_ROOT}/tests/eval_corpus/budget.toml}"
# Run one real-corpus `--verify` row: scan under a wall-clock guard,
# tabulate against the committed ground truth, enforce the per-cell budget,
# publish (or, when floor caps are set, enforce) the per-cap floors.
# $1 label $2 corpus dir $3 ground-truth json
# Returns 0 on pass, 1 on fail. Caller decides skip.
_gate7_run_corpus() {
local label="$1" corpus="$2" gt="$3"
local scan_report="/tmp/m7_gate7_${label}_scan.json"
local results_report="/tmp/m7_gate7_${label}_results.json"
local wallclock_report="/tmp/m7_gate7_${label}_wallclock.txt"
local gate_home="${TMPDIR:-/tmp}/nyx_m7_gate7_${label}_home"
local gate_build_pool="${TMPDIR:-/tmp}/nyx_m7_gate7_${label}_build_pool"
local wallclock
mkdir -p "${gate_home}" "${gate_build_pool}"
rm -f "${scan_report}" "${results_report}" "${wallclock_report}"
set +e
HOME="${gate_home}" \
NYX_BUILD_POOL_DIR="${gate_build_pool}" \
python3 - "${GATE7_WALLCLOCK_BUDGET}" "${scan_report}" "${wallclock_report}" \
"${REPO_ROOT}/target/release/nyx" scan \
--verify \
--index off \
--format json \
--quiet \
"${corpus}" <<'PY'
import subprocess
import sys
import time
budget = float(sys.argv[1])
scan_report = sys.argv[2]
wallclock_report = sys.argv[3]
cmd = sys.argv[4:]
start = time.monotonic()
rc = 0
try:
with open(scan_report, "wb") as out:
completed = subprocess.run(cmd, stdout=out, timeout=budget)
rc = completed.returncode
except subprocess.TimeoutExpired:
rc = 124
finally:
elapsed = time.monotonic() - start
with open(wallclock_report, "w") as f:
f.write(f"{elapsed:.1f}\n")
sys.exit(rc)
PY
local nyx_exit=$?
set -e
wallclock="$(cat "${wallclock_report}" 2>/dev/null || printf "%s" "${GATE7_WALLCLOCK_BUDGET}")"
echo " ${label} verify wall-clock: ${wallclock}s (budget ${GATE7_WALLCLOCK_BUDGET}s)"
if [[ ${nyx_exit} -eq 124 ]]; then
echo " FAIL: ${label} scan exceeded wall-clock budget"
return 1
fi
if [[ ${nyx_exit} -ne 0 && ${nyx_exit} -ne 1 ]]; then
echo " FAIL: ${label} scan exited ${nyx_exit}"
return 1
fi
if [[ ! -s "${scan_report}" ]]; then
echo " FAIL: ${label} scan produced no JSON report"
return 1
fi
awk -v w="${wallclock}" -v b="${GATE7_WALLCLOCK_BUDGET}" \
'BEGIN { if (w+0 > b+0) exit 1 }' \
|| { echo " FAIL: ${label} wall-clock exceeds budget"; return 1; }
echo "[]" > "${results_report}"
python3 "${REPO_ROOT}/tests/eval_corpus/tabulate.py" \
--label "${label}" \
--scan "${scan_report}" \
--ground-truth "${gt}" \
--append "${results_report}" \
|| { echo " FAIL: ${label} result tabulation failed"; return 1; }
local -a report_args=(
--results "${results_report}"
--budget "${GATE7_BUDGET}"
)
if [[ -n "${GATE7_FLOOR_CAPS}" ]]; then
report_args+=(
--floor-caps "${GATE7_FLOOR_CAPS}"
--min-confirmed-rate "${GATE7_CONFIRMED_RATE_TARGET}"
--min-precision "${GATE7_PRECISION_TARGET}"
--min-recall "${GATE7_RECALL_TARGET}"
)
echo " enforcing per-cap floors (confirmed >= ${GATE7_CONFIRMED_RATE_TARGET}, precision >= ${GATE7_PRECISION_TARGET}, recall >= ${GATE7_RECALL_TARGET}) on: ${GATE7_FLOOR_CAPS}"
else
echo " per-cap confirmed/precision/recall: report-only (NYX_JSTS_FLOOR_CAPS unset)"
fi
python3 "${REPO_ROOT}/tests/eval_corpus/report.py" "${report_args[@]}" \
|| { echo " FAIL: ${label} per-cell budget exceeded or a gated per-cap floor missed"; return 1; }
return 0
}
gate_7_jsts_scale() {
echo "── Gate 7: JS/TS real-corpus (NodeGoat + Juice Shop) verify acceptance ──"
cargo build --release --quiet --features dynamic
# name : env var holding the corpus dir : committed ground-truth file
local rows=(
"nodegoat:NYX_NODEGOAT_CORPUS:nodegoat.json"
"juiceshop:NYX_JUICESHOP_CORPUS:juiceshop.json"
)
local any_ran=0 any_failed=0
for row in "${rows[@]}"; do
local name envvar gtfile
IFS=: read -r name envvar gtfile <<<"${row}"
# When --sets names a single corpus, only run that row.
if [[ -n "${SETS}" && "${SETS}" != "jsts" && "${SETS}" != "${name}" ]]; then
continue
fi
local corpus="${!envvar:-}"
if [[ -z "${corpus}" || ! -d "${corpus}" ]]; then
echo " SKIP ${name}: set ${envvar} to a checkout to run this row."
continue
fi
any_ran=1
echo " ── ${name} (${corpus}) ──"
if _gate7_run_corpus "${name}" "${corpus}" \
"${REPO_ROOT}/tests/eval_corpus/ground_truth/${gtfile}"; then
echo " PASS ${name}"
else
any_failed=1
fi
done
if [[ ${any_ran} -eq 0 ]]; then
echo " SKIP: no JS/TS corpus configured (set NYX_NODEGOAT_CORPUS / NYX_JUICESHOP_CORPUS)."
echo " (Gate 7 is Phase 28's headline acceptance for the JS/TS real corpora.)"
return 0
fi
[[ ${any_failed} -eq 0 ]] || return 1
echo " PASS"
}
# ── Driver ────────────────────────────────────────────────────────────────────
declare -a FAILED=()
@ -310,6 +482,7 @@ run_gate 3 verify_ratio
run_gate 4 sarif_schema
run_gate 5 layering
run_gate 6 owasp_scale
run_gate 7 jsts_scale
if [[ ${#FAILED[@]} -gt 0 ]]; then
echo