mirror of
https://github.com/elicpeter/nyx.git
synced 2026-06-09 19:45:13 +02:00
[pitboss/grind] deferred session-0001 (20260517T044708Z-e058)
This commit is contained in:
parent
6189c4a4c5
commit
3d51a3d8ae
4 changed files with 264 additions and 2 deletions
|
|
@ -379,11 +379,22 @@ impl LangEmitter for CEmitter {
|
|||
|
||||
/// Phase 26 — C chain-step harness.
|
||||
///
|
||||
/// Splices the C probe shim ([`probe_shim`]) ahead of a minimal driver
|
||||
/// that reads `NYX_PREV_OUTPUT` and forwards it on stdout. The shim's
|
||||
/// static functions (`__nyx_probe`, `__nyx_install_crash_guard`,
|
||||
/// `__nyx_stub_sql_record`, `__nyx_stub_http_record`) become callable
|
||||
/// from a future sink-rewrite pass without bringing in another
|
||||
/// translation unit. Unreferenced shim helpers stay quiet under
|
||||
/// default `cc` flags — `-Wunused-function` is not on the warning
|
||||
/// baseline so dead helpers do not fail the build.
|
||||
///
|
||||
/// Shell-wraps `cc` + run so the compiled binary actually executes after
|
||||
/// the build completes — `ChainStepHarness.command` models a single
|
||||
/// process, so the build-then-run sequence must collapse to one `sh -c`.
|
||||
fn chain_step(prev_output: Option<&[u8]>) -> ChainStepHarness {
|
||||
let source = "#include <stdio.h>\n#include <stdlib.h>\n\nint main(void) {\n const char *prev = getenv(\"NYX_PREV_OUTPUT\");\n if (prev) fputs(prev, stdout);\n return 0;\n}\n".to_owned();
|
||||
let shim = probe_shim();
|
||||
let driver = "\nint main(void) {\n const char *prev = getenv(\"NYX_PREV_OUTPUT\");\n if (prev) fputs(prev, stdout);\n return 0;\n}\n";
|
||||
let source = format!("{shim}{driver}");
|
||||
ChainStepHarness {
|
||||
source,
|
||||
filename: "step.c".to_owned(),
|
||||
|
|
@ -853,4 +864,54 @@ mod tests {
|
|||
let mk = h.extra_files.iter().find(|(n, _)| n == "Makefile").expect("Makefile must be staged");
|
||||
assert!(mk.1.contains("nyx_harness: main.c entry.c"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn chain_step_splices_probe_shim_for_composite_reverify() {
|
||||
// Phase 26 follow-up: C chain_step now splices the probe shim
|
||||
// ahead of the driver so a chain step that terminates at a sink
|
||||
// can drive the `__nyx_probe` channel directly. Asserts the
|
||||
// shim banner is present and lands before `int main`, that
|
||||
// `__nyx_install_crash_guard` is reachable from the spliced
|
||||
// source, that `prev_output` rides through `extra_env`, and
|
||||
// that the build-then-run command stays in one `sh -c` so the
|
||||
// sandbox sees a single process.
|
||||
let step = chain_step(Some(b"prev-output"));
|
||||
assert!(
|
||||
step.source.contains("__nyx_probe shim (Phase 06"),
|
||||
"probe_shim banner missing from chain step source",
|
||||
);
|
||||
assert!(
|
||||
step.source.contains("static void __nyx_install_crash_guard("),
|
||||
"install_crash_guard missing from chain step source",
|
||||
);
|
||||
let shim_pos = step
|
||||
.source
|
||||
.find("__nyx_probe shim (Phase 06")
|
||||
.expect("shim banner");
|
||||
let main_pos = step.source.find("int main(void)").expect("main fn");
|
||||
assert!(
|
||||
shim_pos < main_pos,
|
||||
"shim must be spliced before int main: shim={shim_pos} main={main_pos}",
|
||||
);
|
||||
assert_eq!(step.filename, "step.c");
|
||||
assert_eq!(
|
||||
step.command,
|
||||
vec![
|
||||
"sh".to_owned(),
|
||||
"-c".to_owned(),
|
||||
"cc step.c -o step && ./step".to_owned(),
|
||||
],
|
||||
);
|
||||
assert!(
|
||||
step.extra_env
|
||||
.iter()
|
||||
.any(|(k, v)| k == ChainStepHarness::PREV_OUTPUT_ENV && v == "prev-output"),
|
||||
"prev_output must be threaded through extra_env, got {:?}",
|
||||
step.extra_env,
|
||||
);
|
||||
assert!(
|
||||
step.extra_files.is_empty(),
|
||||
"C chain step needs no companion build manifest; `cc` is self-sufficient",
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -332,10 +332,18 @@ impl LangEmitter for CppEmitter {
|
|||
|
||||
/// Phase 26 — C++ chain-step harness.
|
||||
///
|
||||
/// Splices the C++ probe shim ([`probe_shim`]) ahead of a minimal driver
|
||||
/// that reads `NYX_PREV_OUTPUT` and forwards it on stdout. Same
|
||||
/// rationale as the C sibling: the inline shim helpers become callable
|
||||
/// from a future sink-rewrite pass without a separate translation unit;
|
||||
/// unreferenced inline functions stay quiet under default `c++` flags.
|
||||
///
|
||||
/// Shell-wraps `c++` + run so the compiled binary actually executes
|
||||
/// after the build completes (see C-side commentary for the rationale).
|
||||
fn chain_step(prev_output: Option<&[u8]>) -> ChainStepHarness {
|
||||
let source = "#include <cstdio>\n#include <cstdlib>\n\nint main() {\n const char *prev = std::getenv(\"NYX_PREV_OUTPUT\");\n if (prev) std::fputs(prev, stdout);\n return 0;\n}\n".to_owned();
|
||||
let shim = probe_shim();
|
||||
let driver = "\nint main() {\n const char *prev = std::getenv(\"NYX_PREV_OUTPUT\");\n if (prev) std::fputs(prev, stdout);\n return 0;\n}\n";
|
||||
let source = format!("{shim}{driver}");
|
||||
ChainStepHarness {
|
||||
source,
|
||||
filename: "step.cpp".to_owned(),
|
||||
|
|
@ -725,4 +733,52 @@ mod tests {
|
|||
let mk = h.extra_files.iter().find(|(n, _)| n == "CMakeLists.txt").expect("CMakeLists.txt must be staged");
|
||||
assert!(mk.1.contains("add_executable(nyx_harness main.cpp)"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn chain_step_splices_probe_shim_for_composite_reverify() {
|
||||
// Phase 26 follow-up: C++ chain_step now splices the probe shim
|
||||
// ahead of the driver so a chain step that terminates at a sink
|
||||
// can drive the `__nyx_probe` channel directly. Asserts the
|
||||
// shim banner is present and lands before `int main`, that
|
||||
// `__nyx_install_crash_guard` is reachable, prev_output rides
|
||||
// through `extra_env`, and build-then-run stays one `sh -c`.
|
||||
let step = chain_step(Some(b"prev-output"));
|
||||
assert!(
|
||||
step.source.contains("__nyx_probe shim (Phase 06"),
|
||||
"probe_shim banner missing from chain step source",
|
||||
);
|
||||
assert!(
|
||||
step.source.contains("inline void __nyx_install_crash_guard("),
|
||||
"install_crash_guard missing from chain step source",
|
||||
);
|
||||
let shim_pos = step
|
||||
.source
|
||||
.find("__nyx_probe shim (Phase 06")
|
||||
.expect("shim banner");
|
||||
let main_pos = step.source.find("int main()").expect("main fn");
|
||||
assert!(
|
||||
shim_pos < main_pos,
|
||||
"shim must be spliced before int main: shim={shim_pos} main={main_pos}",
|
||||
);
|
||||
assert_eq!(step.filename, "step.cpp");
|
||||
assert_eq!(
|
||||
step.command,
|
||||
vec![
|
||||
"sh".to_owned(),
|
||||
"-c".to_owned(),
|
||||
"c++ step.cpp -o step && ./step".to_owned(),
|
||||
],
|
||||
);
|
||||
assert!(
|
||||
step.extra_env
|
||||
.iter()
|
||||
.any(|(k, v)| k == ChainStepHarness::PREV_OUTPUT_ENV && v == "prev-output"),
|
||||
"prev_output must be threaded through extra_env, got {:?}",
|
||||
step.extra_env,
|
||||
);
|
||||
assert!(
|
||||
step.extra_files.is_empty(),
|
||||
"C++ chain step needs no companion build manifest; `c++` is self-sufficient",
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -317,6 +317,19 @@ def main() -> int:
|
|||
p.add_argument("--ground-truth", default="", help="ground truth JSON")
|
||||
p.add_argument("--inhouse", action="store_true")
|
||||
p.add_argument("--append", required=True, help="results accumulator JSON")
|
||||
p.add_argument(
|
||||
"--manual-triage",
|
||||
default="",
|
||||
help=(
|
||||
"path to a manual-triage JSON file (list of "
|
||||
"{path, line, cap, vuln: bool}). Confirmed findings matching a "
|
||||
"`vuln: false` entry are stamped with `wrong: true` before "
|
||||
"tabulation so the per-cell False-Confirmed budget becomes "
|
||||
"non-vacuous without depending on the host's `nyx verify-feedback` "
|
||||
"log. Matching uses LINE_TOLERANCE (=5) — line == 0 in the triage "
|
||||
"entry matches any line."
|
||||
),
|
||||
)
|
||||
p.add_argument(
|
||||
"--budget",
|
||||
default="",
|
||||
|
|
@ -332,6 +345,47 @@ def main() -> int:
|
|||
scan_data = load_json(args.scan)
|
||||
findings = scan_data if isinstance(scan_data, list) else scan_data.get("findings", [])
|
||||
|
||||
# ── Manual-triage stamping (Phase 31 follow-up) ───────────────────────
|
||||
# Cross-reference Confirmed rows against a manual-triage file before
|
||||
# tabulation. Each `vuln: false` entry whose `(path, cap)` matches a
|
||||
# Confirmed finding (with LINE_TOLERANCE, or any line when triage
|
||||
# entry's `line == 0`) stamps `wrong: true` on the finding's
|
||||
# `dynamic_verdict`, which the existing wrong_confirmed counter picks
|
||||
# up below. Decouples the False-Confirmed budget from the host-local
|
||||
# `nyx verify-feedback` log so CI on a fresh eval corpus can still
|
||||
# gate the headline target.
|
||||
if args.manual_triage and Path(args.manual_triage).exists():
|
||||
triage = load_json(args.manual_triage)
|
||||
not_vuln: list[dict] = []
|
||||
for entry in triage if isinstance(triage, list) else []:
|
||||
if entry.get("vuln") is False:
|
||||
not_vuln.append({
|
||||
"path": entry.get("path", ""),
|
||||
"line": entry.get("line", 0),
|
||||
"cap": entry.get("cap", ""),
|
||||
})
|
||||
used: set[int] = set()
|
||||
for f in findings:
|
||||
ev = f.get("evidence") or {}
|
||||
dv = ev.get("dynamic_verdict") or {}
|
||||
if dv.get("status") != "Confirmed":
|
||||
continue
|
||||
f_path = f.get("path", "")
|
||||
f_line = f.get("line", 0)
|
||||
f_cap = cap_of(f)
|
||||
for idx, entry in enumerate(not_vuln):
|
||||
if idx in used:
|
||||
continue
|
||||
if (entry["path"] == f_path
|
||||
and entry["cap"] == f_cap
|
||||
and (entry["line"] == 0
|
||||
or abs(entry["line"] - f_line) <= LINE_TOLERANCE)):
|
||||
used.add(idx)
|
||||
dv["wrong"] = True
|
||||
ev["dynamic_verdict"] = dv
|
||||
f["evidence"] = ev
|
||||
break
|
||||
|
||||
# Per-cell tallies: {(cap, lang): {tp, fp, fn, unsupported, confirmed,
|
||||
# wrong_confirmed, stable_replays, total}}
|
||||
cells: dict[tuple[str, str], dict] = defaultdict(
|
||||
|
|
|
|||
|
|
@ -199,6 +199,95 @@ def test_diff_passes_on_improvement(tmp: Path) -> None:
|
|||
assert "no regressions" in proc.stdout, proc.stdout
|
||||
|
||||
|
||||
def test_manual_triage_stamps_wrong_confirmed(tmp: Path) -> None:
|
||||
# Phase 31 follow-up: --manual-triage should cross-reference Confirmed
|
||||
# findings against a list of {path, line, cap, vuln: false} entries
|
||||
# and stamp `wrong: true` so the per-cell wrong_confirmed counter
|
||||
# becomes non-vacuous without the host's verify-feedback log.
|
||||
#
|
||||
# Confirmed at line 10 matches the triage's vuln:false at line 12
|
||||
# (within LINE_TOLERANCE=5). Confirmed at line 100 does not match
|
||||
# any triage entry, so wrong_confirmed stays at 1 / 2 Confirmed.
|
||||
scan = tmp / "scan.json"
|
||||
write_json(
|
||||
scan,
|
||||
{
|
||||
"findings": [
|
||||
python_finding(SINK_BIT_SQL, "app.py", 10, "Confirmed"),
|
||||
python_finding(SINK_BIT_SQL, "app.py", 100, "Confirmed"),
|
||||
]
|
||||
},
|
||||
)
|
||||
triage = tmp / "triage.json"
|
||||
write_json(
|
||||
triage,
|
||||
[
|
||||
{"path": "app.py", "line": 12, "cap": "sqli", "vuln": False},
|
||||
],
|
||||
)
|
||||
append = tmp / "results.json"
|
||||
write_json(append, [])
|
||||
proc = run_tabulate(
|
||||
"--label", "triage-test",
|
||||
"--scan", str(scan),
|
||||
"--inhouse",
|
||||
"--append", str(append),
|
||||
"--manual-triage", str(triage),
|
||||
)
|
||||
assert proc.returncode == 0, (
|
||||
f"manual-triage run must succeed without budget, got {proc.returncode}\n"
|
||||
f"stdout: {proc.stdout}\nstderr: {proc.stderr}"
|
||||
)
|
||||
results = json.loads(append.read_text())
|
||||
cells = {(c["cap"], c["lang"]): c for c in results[-1]["cells"]}
|
||||
sqli_py = cells.get(("sqli", "python"))
|
||||
assert sqli_py is not None, f"expected sqli/python cell, got {list(cells)}"
|
||||
assert sqli_py["confirmed"] == 2, sqli_py
|
||||
assert sqli_py["wrong_confirmed"] == 1, (
|
||||
"exactly one Confirmed finding must be stamped wrong via the triage match; "
|
||||
f"got {sqli_py}"
|
||||
)
|
||||
|
||||
|
||||
def test_manual_triage_ignores_vuln_true_entries(tmp: Path) -> None:
|
||||
# Triage entries with `vuln: true` are ground-truth-positive markers,
|
||||
# not False-Confirmed evidence. --manual-triage must leave them alone
|
||||
# so a real Confirmed-on-vuln-true row does not get downgraded.
|
||||
scan = tmp / "scan.json"
|
||||
write_json(
|
||||
scan,
|
||||
{
|
||||
"findings": [
|
||||
python_finding(SINK_BIT_SQL, "app.py", 10, "Confirmed"),
|
||||
]
|
||||
},
|
||||
)
|
||||
triage = tmp / "triage.json"
|
||||
write_json(
|
||||
triage,
|
||||
[
|
||||
{"path": "app.py", "line": 10, "cap": "sqli", "vuln": True},
|
||||
],
|
||||
)
|
||||
append = tmp / "results.json"
|
||||
write_json(append, [])
|
||||
proc = run_tabulate(
|
||||
"--label", "triage-true-test",
|
||||
"--scan", str(scan),
|
||||
"--inhouse",
|
||||
"--append", str(append),
|
||||
"--manual-triage", str(triage),
|
||||
)
|
||||
assert proc.returncode == 0
|
||||
results = json.loads(append.read_text())
|
||||
cells = {(c["cap"], c["lang"]): c for c in results[-1]["cells"]}
|
||||
sqli_py = cells[("sqli", "python")]
|
||||
assert sqli_py["confirmed"] == 1
|
||||
assert sqli_py["wrong_confirmed"] == 0, (
|
||||
f"vuln:true triage rows must not stamp wrong; got {sqli_py}"
|
||||
)
|
||||
|
||||
|
||||
def test_budget_malformed_exits_3(tmp: Path) -> None:
|
||||
bad = tmp / "bad.toml"
|
||||
bad.write_text("[default]\nunsupported_rate = not_a_number\n")
|
||||
|
|
@ -226,6 +315,8 @@ def main() -> int:
|
|||
test_budget_fails_when_unsupported_exceeds,
|
||||
test_diff_fails_on_regression,
|
||||
test_diff_passes_on_improvement,
|
||||
test_manual_triage_stamps_wrong_confirmed,
|
||||
test_manual_triage_ignores_vuln_true_entries,
|
||||
test_budget_malformed_exits_3,
|
||||
):
|
||||
sub = tmp / fn.__name__
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue