mirror of
https://github.com/elicpeter/nyx.git
synced 2026-06-09 19:45:13 +02:00
287 lines
11 KiB
Bash
Executable file
287 lines
11 KiB
Bash
Executable file
#!/usr/bin/env bash
|
||
# m7_ship_gate.sh — milestone-7 ship gates.
|
||
#
|
||
# Each gate runs as an isolated function so CI can call a subset:
|
||
#
|
||
# scripts/m7_ship_gate.sh # every gate
|
||
# scripts/m7_ship_gate.sh --gates 3,6 # only gates 3 + 6
|
||
# scripts/m7_ship_gate.sh --sets owasp # Java OWASP corpus only
|
||
#
|
||
# Gate map (kept in sync with .pitboss/play/plan.md track M.7):
|
||
# Gate 1: Static-only scan is green on `tests/benchmark/corpus`.
|
||
# Gate 2: `cargo nextest run --features dynamic` is green.
|
||
# Gate 3: With-verify / static-only wall-clock ratio ≤ 1.5× on
|
||
# `benches/fixtures/`. Phase 22 had relaxed this to ≤ 2×
|
||
# while only `javac` had a warm daemon; Phase 23 lands the
|
||
# cross-lang build pools (shared caches for Node/Python/PHP/
|
||
# Ruby/Go/Rust/C/C++), so the bar is tightened back to ≤ 1.5×.
|
||
# Gate 4: SARIF schema validation on every dynamic verdict variant.
|
||
# Gate 5: Layering boundary test green.
|
||
# Gate 6: Java OWASP Benchmark v1.2 `--verify` wall-clock ≤ 15 min on
|
||
# CI / ≤ 10 min on the dev reference machine, confirmed-rate
|
||
# ≥ 40% per cap. Added Phase 22 as the headline acceptance
|
||
# for the warm `javac` daemon. The corpus is *not* checked
|
||
# into the repo; the gate skips with a clear message when
|
||
# `NYX_OWASP_CORPUS` does not point at a real checkout.
|
||
|
||
set -euo pipefail
|
||
|
||
REPO_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
|
||
cd "${REPO_ROOT}"
|
||
|
||
GATES="1,2,3,4,5,6"
|
||
SETS=""
|
||
|
||
while [[ $# -gt 0 ]]; do
|
||
case "$1" in
|
||
--gates)
|
||
GATES="$2"
|
||
shift 2
|
||
;;
|
||
--sets)
|
||
SETS="$2"
|
||
shift 2
|
||
;;
|
||
-h | --help)
|
||
sed -n '2,/^$/p' "${BASH_SOURCE[0]}"
|
||
exit 0
|
||
;;
|
||
*)
|
||
echo "unknown flag: $1" >&2
|
||
exit 2
|
||
;;
|
||
esac
|
||
done
|
||
|
||
# When `--sets owasp` is passed CI only wants Gate 6.
|
||
if [[ "${SETS}" == "owasp" ]]; then
|
||
GATES="6"
|
||
fi
|
||
|
||
want_gate() {
|
||
[[ ",${GATES}," == *",$1,"* ]]
|
||
}
|
||
|
||
# ── Gate 1 ────────────────────────────────────────────────────────────────────
|
||
|
||
gate_1_static_corpus() {
|
||
echo "── Gate 1: static-only scan on tests/benchmark/corpus ──"
|
||
if [[ ! -d "${REPO_ROOT}/tests/benchmark/corpus" ]]; then
|
||
echo " SKIP: tests/benchmark/corpus not present"
|
||
return 0
|
||
fi
|
||
cargo run --release --quiet -- scan \
|
||
--format json \
|
||
"${REPO_ROOT}/tests/benchmark/corpus" > /tmp/m7_gate1.json
|
||
echo " PASS: static scan completed"
|
||
}
|
||
|
||
# ── Gate 2 ────────────────────────────────────────────────────────────────────
|
||
|
||
gate_2_dynamic_tests() {
|
||
echo "── Gate 2: cargo nextest run --features dynamic ──"
|
||
cargo nextest run --features dynamic
|
||
# The real-toolchain build-pool perf benches (dynamic_*_build_pool +
|
||
# dynamic_java_compile_pool) are #[ignore]d so the default inner-loop
|
||
# suite stays hermetic + fast: no cargo/go/cc/c++/npm/pip/composer/
|
||
# bundle/javac spawns. Run them explicitly here so CI still exercises
|
||
# the warm-pool compile path end to end. They self-skip when a
|
||
# toolchain is missing, so a toolchain-less CI row stays green.
|
||
cargo nextest run --features dynamic --run-ignored ignored-only \
|
||
-E 'binary(~build_pool) | binary(~compile_pool)'
|
||
echo " PASS: dynamic test suite green"
|
||
}
|
||
|
||
# ── Gate 3: with-verify / static-only ratio ───────────────────────────────────
|
||
|
||
# Phase 23 target: ratio ≤ 1.5×, now that the cross-lang build pools
|
||
# give every shipped language a warm cache (was ≤ 2× under Phase 22).
|
||
GATE3_RATIO_TARGET="${GATE3_RATIO_TARGET:-1.5}"
|
||
|
||
gate_3_verify_ratio() {
|
||
echo "── Gate 3: with-verify / static-only ratio on benches/fixtures/ ──"
|
||
local fixtures="${REPO_ROOT}/benches/fixtures"
|
||
if [[ ! -d "${fixtures}" ]]; then
|
||
echo " SKIP: ${fixtures} not present"
|
||
return 0
|
||
fi
|
||
|
||
# Phase 23: the warm build pools are what buy the ≤ 1.5× ratio, so
|
||
# make sure they are on for both scans even if the caller's env
|
||
# disabled them. Default is already ON for every shipped language.
|
||
export NYX_DYNAMIC_BUILD_POOL="java=1,node=1,python=1,php=1,ruby=1,go=1,rust=1,c=1,cpp=1"
|
||
|
||
local static_seconds verify_seconds
|
||
static_seconds="$(time_scan "${fixtures}" 0)"
|
||
verify_seconds="$(time_scan "${fixtures}" 1)"
|
||
local ratio
|
||
ratio="$(awk -v v="${verify_seconds}" -v s="${static_seconds}" \
|
||
'BEGIN { if (s <= 0) { print "inf"; exit } printf "%.3f", v / s }')"
|
||
|
||
echo " static-only wall-clock: ${static_seconds}s"
|
||
echo " with-verify wall-clock: ${verify_seconds}s"
|
||
echo " ratio: ${ratio} (target ≤ ${GATE3_RATIO_TARGET})"
|
||
|
||
awk -v r="${ratio}" -v t="${GATE3_RATIO_TARGET}" \
|
||
'BEGIN { if (r+0 > t+0) exit 1 }' \
|
||
|| { echo " FAIL: ratio exceeds target"; return 1; }
|
||
echo " PASS"
|
||
}
|
||
|
||
# Print wall-clock seconds for a single scan run.
|
||
# $1 = path to scan
|
||
# $2 = 0 for static-only, 1 for --verify
|
||
time_scan() {
|
||
local path="$1" verify="$2"
|
||
local args=("--format" "json")
|
||
if [[ "${verify}" == "1" ]]; then
|
||
args+=("--verify")
|
||
fi
|
||
args+=("${path}")
|
||
local start end
|
||
start="$(python3 -c 'import time;print(time.monotonic())')"
|
||
cargo run --release --quiet --features dynamic -- scan "${args[@]}" > /dev/null
|
||
end="$(python3 -c 'import time;print(time.monotonic())')"
|
||
awk -v a="${start}" -v b="${end}" 'BEGIN { printf "%.3f", b - a }'
|
||
}
|
||
|
||
# ── Gate 4 ────────────────────────────────────────────────────────────────────
|
||
|
||
gate_4_sarif_schema() {
|
||
echo "── Gate 4: SARIF schema validation ──"
|
||
cargo nextest run --features dynamic --test sarif_dynamic_verdict_tests
|
||
echo " PASS"
|
||
}
|
||
|
||
# ── Gate 5 ────────────────────────────────────────────────────────────────────
|
||
|
||
gate_5_layering() {
|
||
echo "── Gate 5: dynamic layering boundary ──"
|
||
cargo nextest run --features dynamic --test dynamic_layering
|
||
echo " PASS"
|
||
}
|
||
|
||
# ── Gate 6: Java OWASP-scale ratio ────────────────────────────────────────────
|
||
|
||
# Phase 22 + Phase 27 jointly own this gate. The wall-clock budgets
|
||
# are split: 10 min on the dev reference (M1 macOS w/ JDK 21) and 15
|
||
# min in CI. Override `NYX_OWASP_WALLCLOCK_BUDGET_SECONDS` to tighten.
|
||
GATE6_WALLCLOCK_BUDGET="${NYX_OWASP_WALLCLOCK_BUDGET_SECONDS:-900}"
|
||
GATE6_CONFIRMED_RATE_TARGET="${NYX_OWASP_CONFIRMED_RATE_TARGET:-0.40}"
|
||
|
||
gate_6_owasp_scale() {
|
||
echo "── Gate 6: Java OWASP Benchmark v1.2 verify wall-clock + confirmed-rate ──"
|
||
local corpus="${NYX_OWASP_CORPUS:-}"
|
||
if [[ -z "${corpus}" || ! -d "${corpus}" ]]; then
|
||
echo " SKIP: set NYX_OWASP_CORPUS to a v1.2 checkout to run this gate."
|
||
echo " (Gate 6 is Phase 22's headline acceptance for the warm javac daemon.)"
|
||
return 0
|
||
fi
|
||
|
||
local scan_report="/tmp/m7_gate6_scan.json"
|
||
local results_report="/tmp/m7_gate6_results.json"
|
||
local wallclock_report="/tmp/m7_gate6_wallclock.txt"
|
||
local gate_home="${TMPDIR:-/tmp}/nyx_m7_gate6_home"
|
||
local gate_build_pool="${TMPDIR:-/tmp}/nyx_m7_gate6_build_pool"
|
||
local wallclock
|
||
|
||
cargo build --release --quiet --features dynamic
|
||
mkdir -p "${gate_home}" "${gate_build_pool}"
|
||
rm -f "${scan_report}" "${results_report}" "${wallclock_report}"
|
||
|
||
set +e
|
||
HOME="${gate_home}" \
|
||
NYX_BUILD_POOL_DIR="${gate_build_pool}" \
|
||
python3 - "${GATE6_WALLCLOCK_BUDGET}" "${scan_report}" "${wallclock_report}" \
|
||
"${REPO_ROOT}/target/release/nyx" scan \
|
||
--verify \
|
||
--index off \
|
||
--format json \
|
||
--quiet \
|
||
"${corpus}" <<'PY'
|
||
import subprocess
|
||
import sys
|
||
import time
|
||
|
||
budget = float(sys.argv[1])
|
||
scan_report = sys.argv[2]
|
||
wallclock_report = sys.argv[3]
|
||
cmd = sys.argv[4:]
|
||
start = time.monotonic()
|
||
rc = 0
|
||
try:
|
||
with open(scan_report, "wb") as out:
|
||
completed = subprocess.run(cmd, stdout=out, timeout=budget)
|
||
rc = completed.returncode
|
||
except subprocess.TimeoutExpired:
|
||
rc = 124
|
||
finally:
|
||
elapsed = time.monotonic() - start
|
||
with open(wallclock_report, "w") as f:
|
||
f.write(f"{elapsed:.1f}\n")
|
||
sys.exit(rc)
|
||
PY
|
||
local nyx_exit=$?
|
||
set -e
|
||
wallclock="$(cat "${wallclock_report}" 2>/dev/null || printf "%s" "${GATE6_WALLCLOCK_BUDGET}")"
|
||
|
||
echo " OWASP verify wall-clock: ${wallclock}s (budget ${GATE6_WALLCLOCK_BUDGET}s)"
|
||
|
||
if [[ ${nyx_exit} -eq 124 ]]; then
|
||
echo " FAIL: nyx scan exceeded wall-clock budget"
|
||
return 1
|
||
fi
|
||
if [[ ${nyx_exit} -ne 0 && ${nyx_exit} -ne 1 ]]; then
|
||
echo " FAIL: nyx scan exited ${nyx_exit}"
|
||
return 1
|
||
fi
|
||
if [[ ! -s "${scan_report}" ]]; then
|
||
echo " FAIL: nyx scan produced no JSON report"
|
||
return 1
|
||
fi
|
||
|
||
awk -v w="${wallclock}" -v b="${GATE6_WALLCLOCK_BUDGET}" \
|
||
'BEGIN { if (w+0 > b+0) exit 1 }' \
|
||
|| { echo " FAIL: wall-clock exceeds budget"; return 1; }
|
||
|
||
echo "[]" > "${results_report}"
|
||
python3 "${REPO_ROOT}/tests/eval_corpus/tabulate.py" \
|
||
--label owasp \
|
||
--scan "${scan_report}" \
|
||
--ground-truth "${REPO_ROOT}/tests/eval_corpus/ground_truth/owasp_benchmark_v1.2.json" \
|
||
--append "${results_report}" \
|
||
|| { echo " FAIL: OWASP result tabulation failed"; return 1; }
|
||
|
||
python3 "${REPO_ROOT}/tests/eval_corpus/report.py" \
|
||
--results "${results_report}" \
|
||
--min-confirmed-rate "${GATE6_CONFIRMED_RATE_TARGET}" \
|
||
|| { echo " FAIL: confirmed-rate below ${GATE6_CONFIRMED_RATE_TARGET}"; return 1; }
|
||
echo " PASS"
|
||
}
|
||
|
||
# ── Driver ────────────────────────────────────────────────────────────────────
|
||
|
||
declare -a FAILED=()
|
||
run_gate() {
|
||
local idx="$1" name="$2"
|
||
if want_gate "${idx}"; then
|
||
if ! "gate_${idx}_${name}"; then
|
||
FAILED+=("${idx}")
|
||
fi
|
||
fi
|
||
}
|
||
|
||
run_gate 1 static_corpus
|
||
run_gate 2 dynamic_tests
|
||
run_gate 3 verify_ratio
|
||
run_gate 4 sarif_schema
|
||
run_gate 5 layering
|
||
run_gate 6 owasp_scale
|
||
|
||
if [[ ${#FAILED[@]} -gt 0 ]]; then
|
||
echo
|
||
echo "FAILED gates: ${FAILED[*]}"
|
||
exit 1
|
||
fi
|
||
echo
|
||
echo "All requested gates passed."
|