nyx/scripts/m7_ship_gate.sh

287 lines
11 KiB
Bash
Executable file
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env bash
# m7_ship_gate.sh — milestone-7 ship gates.
#
# Each gate runs as an isolated function so CI can call a subset:
#
# scripts/m7_ship_gate.sh # every gate
# scripts/m7_ship_gate.sh --gates 3,6 # only gates 3 + 6
# scripts/m7_ship_gate.sh --sets owasp # Java OWASP corpus only
#
# Gate map (kept in sync with .pitboss/play/plan.md track M.7):
# Gate 1: Static-only scan is green on `tests/benchmark/corpus`.
# Gate 2: `cargo nextest run --features dynamic` is green.
# Gate 3: With-verify / static-only wall-clock ratio ≤ 1.5× on
# `benches/fixtures/`. Phase 22 had relaxed this to ≤ 2×
# while only `javac` had a warm daemon; Phase 23 lands the
# cross-lang build pools (shared caches for Node/Python/PHP/
# Ruby/Go/Rust/C/C++), so the bar is tightened back to ≤ 1.5×.
# Gate 4: SARIF schema validation on every dynamic verdict variant.
# Gate 5: Layering boundary test green.
# Gate 6: Java OWASP Benchmark v1.2 `--verify` wall-clock ≤ 15 min on
# CI / ≤ 10 min on the dev reference machine, confirmed-rate
# ≥ 40% per cap. Added Phase 22 as the headline acceptance
# for the warm `javac` daemon. The corpus is *not* checked
# into the repo; the gate skips with a clear message when
# `NYX_OWASP_CORPUS` does not point at a real checkout.
set -euo pipefail
REPO_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
cd "${REPO_ROOT}"
GATES="1,2,3,4,5,6"
SETS=""
while [[ $# -gt 0 ]]; do
case "$1" in
--gates)
GATES="$2"
shift 2
;;
--sets)
SETS="$2"
shift 2
;;
-h | --help)
sed -n '2,/^$/p' "${BASH_SOURCE[0]}"
exit 0
;;
*)
echo "unknown flag: $1" >&2
exit 2
;;
esac
done
# When `--sets owasp` is passed CI only wants Gate 6.
if [[ "${SETS}" == "owasp" ]]; then
GATES="6"
fi
want_gate() {
[[ ",${GATES}," == *",$1,"* ]]
}
# ── Gate 1 ────────────────────────────────────────────────────────────────────
gate_1_static_corpus() {
echo "── Gate 1: static-only scan on tests/benchmark/corpus ──"
if [[ ! -d "${REPO_ROOT}/tests/benchmark/corpus" ]]; then
echo " SKIP: tests/benchmark/corpus not present"
return 0
fi
cargo run --release --quiet -- scan \
--format json \
"${REPO_ROOT}/tests/benchmark/corpus" > /tmp/m7_gate1.json
echo " PASS: static scan completed"
}
# ── Gate 2 ────────────────────────────────────────────────────────────────────
gate_2_dynamic_tests() {
echo "── Gate 2: cargo nextest run --features dynamic ──"
cargo nextest run --features dynamic
# The real-toolchain build-pool perf benches (dynamic_*_build_pool +
# dynamic_java_compile_pool) are #[ignore]d so the default inner-loop
# suite stays hermetic + fast: no cargo/go/cc/c++/npm/pip/composer/
# bundle/javac spawns. Run them explicitly here so CI still exercises
# the warm-pool compile path end to end. They self-skip when a
# toolchain is missing, so a toolchain-less CI row stays green.
cargo nextest run --features dynamic --run-ignored ignored-only \
-E 'binary(~build_pool) | binary(~compile_pool)'
echo " PASS: dynamic test suite green"
}
# ── Gate 3: with-verify / static-only ratio ───────────────────────────────────
# Phase 23 target: ratio ≤ 1.5×, now that the cross-lang build pools
# give every shipped language a warm cache (was ≤ 2× under Phase 22).
GATE3_RATIO_TARGET="${GATE3_RATIO_TARGET:-1.5}"
gate_3_verify_ratio() {
echo "── Gate 3: with-verify / static-only ratio on benches/fixtures/ ──"
local fixtures="${REPO_ROOT}/benches/fixtures"
if [[ ! -d "${fixtures}" ]]; then
echo " SKIP: ${fixtures} not present"
return 0
fi
# Phase 23: the warm build pools are what buy the ≤ 1.5× ratio, so
# make sure they are on for both scans even if the caller's env
# disabled them. Default is already ON for every shipped language.
export NYX_DYNAMIC_BUILD_POOL="java=1,node=1,python=1,php=1,ruby=1,go=1,rust=1,c=1,cpp=1"
local static_seconds verify_seconds
static_seconds="$(time_scan "${fixtures}" 0)"
verify_seconds="$(time_scan "${fixtures}" 1)"
local ratio
ratio="$(awk -v v="${verify_seconds}" -v s="${static_seconds}" \
'BEGIN { if (s <= 0) { print "inf"; exit } printf "%.3f", v / s }')"
echo " static-only wall-clock: ${static_seconds}s"
echo " with-verify wall-clock: ${verify_seconds}s"
echo " ratio: ${ratio} (target ≤ ${GATE3_RATIO_TARGET})"
awk -v r="${ratio}" -v t="${GATE3_RATIO_TARGET}" \
'BEGIN { if (r+0 > t+0) exit 1 }' \
|| { echo " FAIL: ratio exceeds target"; return 1; }
echo " PASS"
}
# Print wall-clock seconds for a single scan run.
# $1 = path to scan
# $2 = 0 for static-only, 1 for --verify
time_scan() {
local path="$1" verify="$2"
local args=("--format" "json")
if [[ "${verify}" == "1" ]]; then
args+=("--verify")
fi
args+=("${path}")
local start end
start="$(python3 -c 'import time;print(time.monotonic())')"
cargo run --release --quiet --features dynamic -- scan "${args[@]}" > /dev/null
end="$(python3 -c 'import time;print(time.monotonic())')"
awk -v a="${start}" -v b="${end}" 'BEGIN { printf "%.3f", b - a }'
}
# ── Gate 4 ────────────────────────────────────────────────────────────────────
gate_4_sarif_schema() {
echo "── Gate 4: SARIF schema validation ──"
cargo nextest run --features dynamic --test sarif_dynamic_verdict_tests
echo " PASS"
}
# ── Gate 5 ────────────────────────────────────────────────────────────────────
gate_5_layering() {
echo "── Gate 5: dynamic layering boundary ──"
cargo nextest run --features dynamic --test dynamic_layering
echo " PASS"
}
# ── Gate 6: Java OWASP-scale ratio ────────────────────────────────────────────
# Phase 22 + Phase 27 jointly own this gate. The wall-clock budgets
# are split: 10 min on the dev reference (M1 macOS w/ JDK 21) and 15
# min in CI. Override `NYX_OWASP_WALLCLOCK_BUDGET_SECONDS` to tighten.
GATE6_WALLCLOCK_BUDGET="${NYX_OWASP_WALLCLOCK_BUDGET_SECONDS:-900}"
GATE6_CONFIRMED_RATE_TARGET="${NYX_OWASP_CONFIRMED_RATE_TARGET:-0.40}"
gate_6_owasp_scale() {
echo "── Gate 6: Java OWASP Benchmark v1.2 verify wall-clock + confirmed-rate ──"
local corpus="${NYX_OWASP_CORPUS:-}"
if [[ -z "${corpus}" || ! -d "${corpus}" ]]; then
echo " SKIP: set NYX_OWASP_CORPUS to a v1.2 checkout to run this gate."
echo " (Gate 6 is Phase 22's headline acceptance for the warm javac daemon.)"
return 0
fi
local scan_report="/tmp/m7_gate6_scan.json"
local results_report="/tmp/m7_gate6_results.json"
local wallclock_report="/tmp/m7_gate6_wallclock.txt"
local gate_home="${TMPDIR:-/tmp}/nyx_m7_gate6_home"
local gate_build_pool="${TMPDIR:-/tmp}/nyx_m7_gate6_build_pool"
local wallclock
cargo build --release --quiet --features dynamic
mkdir -p "${gate_home}" "${gate_build_pool}"
rm -f "${scan_report}" "${results_report}" "${wallclock_report}"
set +e
HOME="${gate_home}" \
NYX_BUILD_POOL_DIR="${gate_build_pool}" \
python3 - "${GATE6_WALLCLOCK_BUDGET}" "${scan_report}" "${wallclock_report}" \
"${REPO_ROOT}/target/release/nyx" scan \
--verify \
--index off \
--format json \
--quiet \
"${corpus}" <<'PY'
import subprocess
import sys
import time
budget = float(sys.argv[1])
scan_report = sys.argv[2]
wallclock_report = sys.argv[3]
cmd = sys.argv[4:]
start = time.monotonic()
rc = 0
try:
with open(scan_report, "wb") as out:
completed = subprocess.run(cmd, stdout=out, timeout=budget)
rc = completed.returncode
except subprocess.TimeoutExpired:
rc = 124
finally:
elapsed = time.monotonic() - start
with open(wallclock_report, "w") as f:
f.write(f"{elapsed:.1f}\n")
sys.exit(rc)
PY
local nyx_exit=$?
set -e
wallclock="$(cat "${wallclock_report}" 2>/dev/null || printf "%s" "${GATE6_WALLCLOCK_BUDGET}")"
echo " OWASP verify wall-clock: ${wallclock}s (budget ${GATE6_WALLCLOCK_BUDGET}s)"
if [[ ${nyx_exit} -eq 124 ]]; then
echo " FAIL: nyx scan exceeded wall-clock budget"
return 1
fi
if [[ ${nyx_exit} -ne 0 && ${nyx_exit} -ne 1 ]]; then
echo " FAIL: nyx scan exited ${nyx_exit}"
return 1
fi
if [[ ! -s "${scan_report}" ]]; then
echo " FAIL: nyx scan produced no JSON report"
return 1
fi
awk -v w="${wallclock}" -v b="${GATE6_WALLCLOCK_BUDGET}" \
'BEGIN { if (w+0 > b+0) exit 1 }' \
|| { echo " FAIL: wall-clock exceeds budget"; return 1; }
echo "[]" > "${results_report}"
python3 "${REPO_ROOT}/tests/eval_corpus/tabulate.py" \
--label owasp \
--scan "${scan_report}" \
--ground-truth "${REPO_ROOT}/tests/eval_corpus/ground_truth/owasp_benchmark_v1.2.json" \
--append "${results_report}" \
|| { echo " FAIL: OWASP result tabulation failed"; return 1; }
python3 "${REPO_ROOT}/tests/eval_corpus/report.py" \
--results "${results_report}" \
--min-confirmed-rate "${GATE6_CONFIRMED_RATE_TARGET}" \
|| { echo " FAIL: confirmed-rate below ${GATE6_CONFIRMED_RATE_TARGET}"; return 1; }
echo " PASS"
}
# ── Driver ────────────────────────────────────────────────────────────────────
declare -a FAILED=()
run_gate() {
local idx="$1" name="$2"
if want_gate "${idx}"; then
if ! "gate_${idx}_${name}"; then
FAILED+=("${idx}")
fi
fi
}
run_gate 1 static_corpus
run_gate 2 dynamic_tests
run_gate 3 verify_ratio
run_gate 4 sarif_schema
run_gate 5 layering
run_gate 6 owasp_scale
if [[ ${#FAILED[@]} -gt 0 ]]; then
echo
echo "FAILED gates: ${FAILED[*]}"
exit 1
fi
echo
echo "All requested gates passed."