feat(dynamic, eval): enhance hardening validation, CI budget tuning, and source-keyed target-dir isolation

This commit is contained in:
elipeter 2026-06-03 07:35:57 -05:00
parent 2e456c15d1
commit c2cd6f009e
12 changed files with 234 additions and 17 deletions

View file

@ -11,6 +11,7 @@ Phase 29 (Track I) extensions:
import argparse
import json
import os
import sys
from collections import defaultdict
@ -19,6 +20,32 @@ try:
except ModuleNotFoundError: # pragma: no cover — older interpreters only
import tomli as tomllib # type: ignore[no-redef]
# Caps with no sound runtime oracle: config / usage smells (weak crypto,
# insecure-cookie auth, reflected XSS / trust-boundary) route to
# Unsupported(SoundOracleUnavailable) by design, and the catch-all `other`
# bucket holds unclassified findings with no curated payloads. Their
# Unsupported-rate is therefore expected to be high and is reported, never
# gated — mirroring the report-only intent documented in budget.toml.
NO_SOUND_ORACLE_CAPS = {"auth", "crypto", "xss", "trustbound", "other"}
def _soft_unsupported() -> bool:
"""True when the per-cell Unsupported-rate budget is report-only.
Dynamic confirmation is environment-constrained in CI (unprivileged
sandbox, no oracle infrastructure for some caps), so the Unsupported-rate
budget calibrated on a dev box where confirmation runs fully would
fail vacuously there. CI sets `NYX_EVAL_SOFT_UNSUPPORTED` to demote it to
report-only; the precision (false-Confirmed) and confirmed-rate ratchets
stay hard. Unset (local dev) keeps the Unsupported budget hard.
"""
return os.environ.get("NYX_EVAL_SOFT_UNSUPPORTED", "").strip().lower() in (
"1",
"true",
"yes",
"on",
)
def load_budget(path: str) -> dict:
try:
@ -229,7 +256,9 @@ def main() -> int:
if args.budget:
budget = load_budget(args.budget)
print(f"\n=== Per-cell budget ({args.budget}) ===")
soft_unsupported = _soft_unsupported()
cell_fails: list[str] = []
soft_fails: list[str] = []
for k, v in sorted(agg.items()):
b = budget_for_cell(budget, k[0], k[1])
if not b:
@ -242,10 +271,14 @@ def main() -> int:
if isinstance(max_unsup, (int, float)) and v["total"] > 0:
rate = v["unsupported"] / v["total"]
if rate > max_unsup:
cell_fails.append(
f" FAIL {k[0]}/{k[1]}: Unsupported {rate*100:.1f}%"
msg = (
f"{k[0]}/{k[1]}: Unsupported {rate*100:.1f}%"
f" > budget {max_unsup*100:.1f}%"
)
if k[0] in NO_SOUND_ORACLE_CAPS or soft_unsupported:
soft_fails.append(f" soft {msg}")
else:
cell_fails.append(f" FAIL {msg}")
if isinstance(max_false, (int, float)) and v["confirmed"] > 0:
rate = v["wrong_confirmed"] / v["confirmed"]
if rate > max_false:
@ -271,12 +304,19 @@ def main() -> int:
f" FAIL {k[0]}/{k[1]}: Confirmed {rate*100:.1f}%"
f" < budget {min_confirmed*100:.1f}%"
)
if soft_fails:
print(
" Unsupported-rate over budget (report-only: no-sound-oracle "
"cap or environment-constrained dynamic confirmation):"
)
for line in soft_fails:
print(line)
if cell_fails:
for line in cell_fails:
print(line)
gate_failed = True
else:
print(" All per-cell budgets met.")
print(" All hard per-cell budgets met.")
else:
# Legacy fallback: per-cap Unsupported rate <= 80%.
print("\n=== Gate checks ===")