mirror of
https://github.com/elicpeter/nyx.git
synced 2026-06-18 20:15:14 +02:00
introduce ground-truth converters for OWASP and SARD datasets
This commit is contained in:
parent
e62fddb82a
commit
5909fa8c5d
14 changed files with 16779 additions and 369 deletions
|
|
@ -104,6 +104,7 @@ mod parity_tests {
|
|||
},
|
||||
project_root: None,
|
||||
db_path: None,
|
||||
verify_all_confidence: false,
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -116,6 +117,7 @@ mod parity_tests {
|
|||
},
|
||||
project_root: None,
|
||||
db_path: None,
|
||||
verify_all_confidence: false,
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -58,17 +58,15 @@ mod escape_tests {
|
|||
backend: SandboxBackend::Docker,
|
||||
env_passthrough: vec![],
|
||||
output_limit: 65536,
|
||||
oob_listener: None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Minimal no-op payload (escape scripts ignore NYX_PAYLOAD).
|
||||
fn noop_payload() -> nyx_scanner::dynamic::corpus::Payload {
|
||||
nyx_scanner::dynamic::corpus::Payload {
|
||||
bytes: b"",
|
||||
label: "escape-noop",
|
||||
oracle: nyx_scanner::dynamic::corpus::Oracle::ExitStatus(1),
|
||||
is_benign: true,
|
||||
}
|
||||
/// Minimal no-op payload bytes (escape scripts ignore NYX_PAYLOAD).
|
||||
/// `sandbox::run` takes `&[u8]` directly; the CuratedPayload struct lives
|
||||
/// one level up in the runner.
|
||||
fn noop_payload() -> &'static [u8] {
|
||||
b""
|
||||
}
|
||||
|
||||
/// Copy a directory tree into a destination (creating it if needed).
|
||||
|
|
|
|||
16442
tests/eval_corpus/ground_truth/owasp_benchmark_v1.2.json
Normal file
16442
tests/eval_corpus/ground_truth/owasp_benchmark_v1.2.json
Normal file
File diff suppressed because it is too large
Load diff
97
tests/eval_corpus/owasp_gt_convert.py
Normal file
97
tests/eval_corpus/owasp_gt_convert.py
Normal file
|
|
@ -0,0 +1,97 @@
|
|||
#!/usr/bin/env python3
|
||||
"""Convert OWASP Benchmark v1.2 expectedresults-*.csv into nyx ground-truth JSON.
|
||||
|
||||
Source: `expectedresults-1.2beta.csv` shipped in the BenchmarkJava repo.
|
||||
Output: list of `{path, line, cap, vuln}` records, where:
|
||||
- `path` is the absolute path to the BenchmarkTest*.java under --corpus-dir.
|
||||
- `line` is 0 (CSV does not pin a line; tabulate uses LINE_TOLERANCE on findings).
|
||||
- `cap` is a nyx cap label mapped from the OWASP category column.
|
||||
- `vuln` is True for `real vulnerability == true`, else False.
|
||||
|
||||
Usage:
|
||||
tests/eval_corpus/owasp_gt_convert.py \\
|
||||
--corpus-dir ~/.cache/nyx/eval_corpus/owasp_benchmark_v1.2 \\
|
||||
--output tests/eval_corpus/ground_truth/owasp_benchmark_v1.2.json
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import csv
|
||||
import json
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
OWASP_TO_NYX_CAP = {
|
||||
"cmdi": "cmdi",
|
||||
"crypto": "crypto",
|
||||
"hash": "crypto",
|
||||
"ldapi": "ldap_injection",
|
||||
"pathtraver": "path_traversal",
|
||||
"securecookie": "auth",
|
||||
"sqli": "sqli",
|
||||
"trustbound": "xss",
|
||||
"weakrand": "crypto",
|
||||
"xpathi": "xpath_injection",
|
||||
"xss": "xss",
|
||||
}
|
||||
|
||||
|
||||
def main() -> int:
|
||||
p = argparse.ArgumentParser()
|
||||
p.add_argument("--corpus-dir", required=True,
|
||||
help="Path to BenchmarkJava clone root.")
|
||||
p.add_argument("--output", required=True,
|
||||
help="Output ground-truth JSON path.")
|
||||
p.add_argument("--csv", default="",
|
||||
help="Override CSV path (default: <corpus-dir>/expectedresults-1.2beta.csv).")
|
||||
args = p.parse_args()
|
||||
|
||||
corpus = Path(args.corpus_dir).expanduser().resolve()
|
||||
csv_path = Path(args.csv) if args.csv else corpus / "expectedresults-1.2beta.csv"
|
||||
if not csv_path.exists():
|
||||
print(f"error: csv not found: {csv_path}", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
java_root = corpus / "src" / "main" / "java" / "org" / "owasp" / "benchmark" / "testcode"
|
||||
if not java_root.is_dir():
|
||||
print(f"error: java testcode dir not found: {java_root}", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
records: list[dict] = []
|
||||
skipped = 0
|
||||
with open(csv_path) as f:
|
||||
reader = csv.reader(f)
|
||||
next(reader, None)
|
||||
for row in reader:
|
||||
if len(row) < 3:
|
||||
continue
|
||||
name, category, real_vuln = row[0].strip(), row[1].strip(), row[2].strip().lower()
|
||||
cap = OWASP_TO_NYX_CAP.get(category)
|
||||
if cap is None:
|
||||
skipped += 1
|
||||
continue
|
||||
java_file = java_root / f"{name}.java"
|
||||
if not java_file.exists():
|
||||
skipped += 1
|
||||
continue
|
||||
records.append({
|
||||
"path": str(java_file),
|
||||
"line": 0,
|
||||
"cap": cap,
|
||||
"vuln": real_vuln == "true",
|
||||
})
|
||||
|
||||
out = Path(args.output).expanduser().resolve()
|
||||
out.parent.mkdir(parents=True, exist_ok=True)
|
||||
with open(out, "w") as f:
|
||||
json.dump(records, f, indent=2)
|
||||
|
||||
vuln_count = sum(1 for r in records if r["vuln"])
|
||||
print(f"wrote {len(records)} records to {out}")
|
||||
print(f" vulns: {vuln_count}")
|
||||
print(f" non-vuln: {len(records) - vuln_count}")
|
||||
print(f" skipped: {skipped}")
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
|
|
@ -147,7 +147,23 @@ fi
|
|||
# ── Emit summary table ────────────────────────────────────────────────────────
|
||||
info ""
|
||||
info "Results written to: $RESULTS_JSON"
|
||||
python3 "${SCRIPT_DIR}/report.py" --results "$RESULTS_JSON" \
|
||||
|| { info "report.py not available; raw results at $RESULTS_JSON"; exit 0; }
|
||||
|
||||
[[ -n "$OUTPUT_DIR" ]] && cp "$RESULTS_JSON" "${OUTPUT_DIR}/eval_results.json"
|
||||
|
||||
if [[ ! -f "${SCRIPT_DIR}/report.py" ]]; then
|
||||
info "report.py not available; raw results at $RESULTS_JSON"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
set +e
|
||||
python3 "${SCRIPT_DIR}/report.py" --results "$RESULTS_JSON"
|
||||
REPORT_RC=$?
|
||||
set -e
|
||||
# Propagate gate-fail (exit 2). Treat other non-zero as setup error (exit 1).
|
||||
if [[ $REPORT_RC -eq 2 ]]; then
|
||||
exit 2
|
||||
elif [[ $REPORT_RC -ne 0 ]]; then
|
||||
info "report.py crashed (exit $REPORT_RC); raw results at $RESULTS_JSON"
|
||||
exit 1
|
||||
fi
|
||||
exit 0
|
||||
|
|
|
|||
134
tests/eval_corpus/sard_gt_convert.py
Normal file
134
tests/eval_corpus/sard_gt_convert.py
Normal file
|
|
@ -0,0 +1,134 @@
|
|||
#!/usr/bin/env python3
|
||||
"""Convert NIST SARD manifest XML into nyx ground-truth JSON.
|
||||
|
||||
SARD ships per-test-case `manifest.xml` files alongside source. Each
|
||||
`<testcase>` lists one or more `<file path="…">` entries with optional
|
||||
`<flaw line="…" name="CWE-XXX_…"/>` children.
|
||||
|
||||
Output schema (consumed by tabulate.py):
|
||||
list of {"path", "line", "cap", "vuln"} records.
|
||||
|
||||
Usage:
|
||||
tests/eval_corpus/sard_gt_convert.py \\
|
||||
--corpus-dir ~/.cache/nyx/eval_corpus/nist_sard \\
|
||||
--output tests/eval_corpus/ground_truth/nist_sard.json
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import re
|
||||
import sys
|
||||
import xml.etree.ElementTree as ET
|
||||
from pathlib import Path
|
||||
|
||||
CWE_TO_NYX_CAP = {
|
||||
"20": "validation",
|
||||
"22": "path_traversal",
|
||||
"78": "cmdi",
|
||||
"79": "xss",
|
||||
"89": "sqli",
|
||||
"90": "ldap_injection",
|
||||
"91": "xpath_injection",
|
||||
"94": "cmdi",
|
||||
"113": "header_injection",
|
||||
"117": "header_injection",
|
||||
"190": "memory",
|
||||
"200": "data_exfil",
|
||||
"287": "auth",
|
||||
"295": "crypto",
|
||||
"311": "crypto",
|
||||
"327": "crypto",
|
||||
"328": "crypto",
|
||||
"330": "crypto",
|
||||
"352": "auth",
|
||||
"434": "path_traversal",
|
||||
"476": "memory",
|
||||
"502": "deserialize",
|
||||
"601": "redirect",
|
||||
"611": "xxe",
|
||||
"643": "xpath_injection",
|
||||
"798": "crypto",
|
||||
"918": "ssrf",
|
||||
}
|
||||
|
||||
CWE_RE = re.compile(r"CWE[-_](\d+)", re.IGNORECASE)
|
||||
|
||||
|
||||
def cap_for_flaw(name: str) -> str | None:
|
||||
m = CWE_RE.search(name or "")
|
||||
if not m:
|
||||
return None
|
||||
return CWE_TO_NYX_CAP.get(m.group(1))
|
||||
|
||||
|
||||
def main() -> int:
|
||||
p = argparse.ArgumentParser()
|
||||
p.add_argument("--corpus-dir", required=True)
|
||||
p.add_argument("--output", required=True)
|
||||
args = p.parse_args()
|
||||
|
||||
root = Path(args.corpus_dir).expanduser().resolve()
|
||||
if not root.is_dir():
|
||||
print(f"error: corpus dir not found: {root}", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
records: list[dict] = []
|
||||
skipped_files = 0
|
||||
skipped_caps = 0
|
||||
|
||||
for manifest in root.rglob("manifest.xml"):
|
||||
try:
|
||||
tree = ET.parse(manifest)
|
||||
except ET.ParseError as e:
|
||||
print(f"warn: parse failed {manifest}: {e}", file=sys.stderr)
|
||||
continue
|
||||
for tc in tree.iter("testcase"):
|
||||
for fnode in tc.iter("file"):
|
||||
rel = fnode.get("path") or ""
|
||||
if not rel:
|
||||
continue
|
||||
abs_path = (manifest.parent / rel).resolve()
|
||||
if not abs_path.exists():
|
||||
skipped_files += 1
|
||||
continue
|
||||
flaws = list(fnode.iter("flaw")) + list(fnode.iter("mixed"))
|
||||
if not flaws:
|
||||
records.append({
|
||||
"path": str(abs_path),
|
||||
"line": 0,
|
||||
"cap": "other",
|
||||
"vuln": False,
|
||||
})
|
||||
continue
|
||||
for flaw in flaws:
|
||||
cap = cap_for_flaw(flaw.get("name", ""))
|
||||
if cap is None:
|
||||
skipped_caps += 1
|
||||
continue
|
||||
try:
|
||||
line = int(flaw.get("line", "0") or 0)
|
||||
except ValueError:
|
||||
line = 0
|
||||
records.append({
|
||||
"path": str(abs_path),
|
||||
"line": line,
|
||||
"cap": cap,
|
||||
"vuln": True,
|
||||
})
|
||||
|
||||
out = Path(args.output).expanduser().resolve()
|
||||
out.parent.mkdir(parents=True, exist_ok=True)
|
||||
with open(out, "w") as f:
|
||||
json.dump(records, f, indent=2)
|
||||
|
||||
vuln_count = sum(1 for r in records if r["vuln"])
|
||||
print(f"wrote {len(records)} records to {out}")
|
||||
print(f" vulns: {vuln_count}")
|
||||
print(f" non-vuln: {len(records) - vuln_count}")
|
||||
print(f" skipped (file): {skipped_files}")
|
||||
print(f" skipped (cap): {skipped_caps}")
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
|
|
@ -19,25 +19,46 @@ from pathlib import Path
|
|||
|
||||
LINE_TOLERANCE = 5
|
||||
|
||||
_CAP_PREFIX_TABLE = [
|
||||
("taint.path_traversal", "path_traversal"),
|
||||
("taint.sql", "sqli"),
|
||||
("taint.xss", "xss"),
|
||||
("taint.ssrf", "ssrf"),
|
||||
("taint.cmdi", "cmdi"),
|
||||
("taint.deserialize", "deserialize"),
|
||||
("taint.redirect", "redirect"),
|
||||
("taint.xxe", "xxe"),
|
||||
# Bitflag positions for Cap (src/labels/mod.rs). Sink bits map to a cap label.
|
||||
_CAP_BIT_TABLE = [
|
||||
(1 << 5, "path_traversal"), # FILE_IO
|
||||
(1 << 6, "fmt_string"),
|
||||
(1 << 7, "sqli"), # SQL_QUERY
|
||||
(1 << 8, "deserialize"),
|
||||
(1 << 9, "ssrf"),
|
||||
(1 << 10, "cmdi"), # CODE_EXEC
|
||||
(1 << 11, "crypto"),
|
||||
(1 << 12, "unauthorized_id"),
|
||||
(1 << 13, "data_exfil"),
|
||||
(1 << 14, "ldap_injection"),
|
||||
(1 << 15, "xpath_injection"),
|
||||
(1 << 16, "header_injection"),
|
||||
(1 << 17, "redirect"), # OPEN_REDIRECT
|
||||
(1 << 18, "xss"), # SSTI (template_injection); also covers XSS sinks
|
||||
(1 << 19, "xxe"),
|
||||
(1 << 20, "prototype_pollution"),
|
||||
]
|
||||
|
||||
# Substring → cap lookup for rule IDs. Order matters: most specific first.
|
||||
_CAP_RULE_TABLE = [
|
||||
("path_traversal", "path_traversal"),
|
||||
("sqli", "sqli"),
|
||||
("xss", "xss"),
|
||||
("ssrf", "ssrf"),
|
||||
("cmdi", "cmdi"),
|
||||
("deserialize", "deserialize"),
|
||||
("redirect", "redirect"),
|
||||
("xxe", "xxe"),
|
||||
("auth", "auth"),
|
||||
("taint", "taint"),
|
||||
("sql", "sqli"),
|
||||
("xss", "xss"),
|
||||
("ssrf", "ssrf"),
|
||||
("cmdi", "cmdi"),
|
||||
("cmd_exec", "cmdi"),
|
||||
("code_exec", "cmdi"),
|
||||
("deser", "deserialize"),
|
||||
("unserialize", "deserialize"),
|
||||
("redirect", "redirect"),
|
||||
("xxe", "xxe"),
|
||||
("template", "xss"),
|
||||
("auth", "auth"),
|
||||
("memory", "memory"),
|
||||
("crypto", "crypto"),
|
||||
("data-exfil", "data_exfil"),
|
||||
("data_exfil", "data_exfil"),
|
||||
("header", "header_injection"),
|
||||
]
|
||||
|
||||
|
||||
|
|
@ -47,9 +68,18 @@ def load_json(path: str) -> object:
|
|||
|
||||
|
||||
def cap_of(finding: dict) -> str:
|
||||
rule = finding.get("rule_id", "").lower()
|
||||
for prefix, cap in _CAP_PREFIX_TABLE:
|
||||
if rule.startswith(prefix):
|
||||
# 1. Prefer evidence.sink_caps bitmask — the engine's own classification.
|
||||
ev = finding.get("evidence", {}) or {}
|
||||
sink_caps = ev.get("sink_caps")
|
||||
if isinstance(sink_caps, int) and sink_caps:
|
||||
for bit, name in _CAP_BIT_TABLE:
|
||||
if sink_caps & bit:
|
||||
return name
|
||||
# 2. Fall back to rule id substring (e.g. py.cmdi.os_system, java.deser.readobject).
|
||||
rid = (finding.get("id") or "").lower()
|
||||
head = rid.split(" ", 1)[0]
|
||||
for needle, cap in _CAP_RULE_TABLE:
|
||||
if needle in head:
|
||||
return cap
|
||||
return "other"
|
||||
|
||||
|
|
@ -122,8 +152,9 @@ def main() -> int:
|
|||
for idx, gt_entry in enumerate(gt_true):
|
||||
if (gt_entry["path"] == f_path
|
||||
and gt_entry["cap"] == f_cap
|
||||
and abs(gt_entry["line"] - f_line) <= LINE_TOLERANCE
|
||||
and idx not in matched_gt):
|
||||
and idx not in matched_gt
|
||||
and (gt_entry["line"] == 0
|
||||
or abs(gt_entry["line"] - f_line) <= LINE_TOLERANCE)):
|
||||
matched_idx = idx
|
||||
break
|
||||
if matched_idx is not None:
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue