mirror of
https://github.com/elicpeter/nyx.git
synced 2026-06-21 20:18:06 +02:00
feat(eval-corpus): add Track R.2 polyglot corpora (RailsGoat, DVWA, DVPWA, gosec, RustSec) with curated manifests, negative controls, and CI validation
This commit is contained in:
parent
2a4d49b68b
commit
e0833537e4
20 changed files with 1181 additions and 53 deletions
|
|
@ -362,15 +362,34 @@ def main() -> int:
|
|||
default="",
|
||||
help="path to budget.toml (per-(cap,lang) thresholds)",
|
||||
)
|
||||
p.add_argument(
|
||||
"--lang",
|
||||
default="",
|
||||
help=(
|
||||
"comma-separated language allowlist (python, javascript, php, "
|
||||
"ruby, go, rust, ...). When set, only findings AND ground-truth "
|
||||
"entries whose source language is in the list are tabulated; "
|
||||
"everything else is dropped before tallying. Used by the Phase 29 "
|
||||
"polyglot corpora (Track R.2) to scope a single-language corpus to "
|
||||
"its target language so incidental third-party assets in other "
|
||||
"languages — e.g. the vendored JavaScript a Rails or aiohttp app "
|
||||
"bundles — do not pollute that corpus's per-cap metrics. Empty = "
|
||||
"no language filter (every finding tabulated, the OWASP/JSTS "
|
||||
"default)."
|
||||
),
|
||||
)
|
||||
p.add_argument(
|
||||
"--diff",
|
||||
default="",
|
||||
help="path to a previous results JSON; fail on monotonic-improvement regression",
|
||||
)
|
||||
args = p.parse_args()
|
||||
lang_filter = {l.strip() for l in args.lang.split(",") if l.strip()}
|
||||
|
||||
scan_data = load_json(args.scan)
|
||||
findings = scan_data if isinstance(scan_data, list) else scan_data.get("findings", [])
|
||||
if lang_filter:
|
||||
findings = [f for f in findings if lang_of(f) in lang_filter]
|
||||
|
||||
# ── Manual-triage stamping (Phase 31 follow-up) ───────────────────────
|
||||
# Cross-reference Confirmed rows against a manual-triage file before
|
||||
|
|
@ -463,6 +482,10 @@ def main() -> int:
|
|||
# Ground truth format: list of {"path": ..., "line": ..., "cap": ..., "vuln": bool}
|
||||
gt_true: list[dict] = []
|
||||
for entry in gt if isinstance(gt, list) else []:
|
||||
# Honour the same language scope as the findings filter so recall
|
||||
# is measured only over the corpus's target language.
|
||||
if lang_filter and lang_of(entry) not in lang_filter:
|
||||
continue
|
||||
if entry.get("vuln"):
|
||||
gt_true.append({
|
||||
"path": entry.get("path", ""),
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue