[pitboss/grind] deferred session-0001 (20260517T044708Z-e058)

This commit is contained in:
pitboss 2026-05-17 00:05:12 -05:00
parent 6189c4a4c5
commit 3d51a3d8ae
4 changed files with 264 additions and 2 deletions

View file

@ -379,11 +379,22 @@ impl LangEmitter for CEmitter {
/// Phase 26 — C chain-step harness.
///
/// Splices the C probe shim ([`probe_shim`]) ahead of a minimal driver
/// that reads `NYX_PREV_OUTPUT` and forwards it on stdout. The shim's
/// static functions (`__nyx_probe`, `__nyx_install_crash_guard`,
/// `__nyx_stub_sql_record`, `__nyx_stub_http_record`) become callable
/// from a future sink-rewrite pass without bringing in another
/// translation unit. Unreferenced shim helpers stay quiet under
/// default `cc` flags — `-Wunused-function` is not on the warning
/// baseline so dead helpers do not fail the build.
///
/// Shell-wraps `cc` + run so the compiled binary actually executes after
/// the build completes — `ChainStepHarness.command` models a single
/// process, so the build-then-run sequence must collapse to one `sh -c`.
fn chain_step(prev_output: Option<&[u8]>) -> ChainStepHarness {
let source = "#include <stdio.h>\n#include <stdlib.h>\n\nint main(void) {\n const char *prev = getenv(\"NYX_PREV_OUTPUT\");\n if (prev) fputs(prev, stdout);\n return 0;\n}\n".to_owned();
let shim = probe_shim();
let driver = "\nint main(void) {\n const char *prev = getenv(\"NYX_PREV_OUTPUT\");\n if (prev) fputs(prev, stdout);\n return 0;\n}\n";
let source = format!("{shim}{driver}");
ChainStepHarness {
source,
filename: "step.c".to_owned(),
@ -853,4 +864,54 @@ mod tests {
let mk = h.extra_files.iter().find(|(n, _)| n == "Makefile").expect("Makefile must be staged");
assert!(mk.1.contains("nyx_harness: main.c entry.c"));
}
#[test]
fn chain_step_splices_probe_shim_for_composite_reverify() {
// Phase 26 follow-up: C chain_step now splices the probe shim
// ahead of the driver so a chain step that terminates at a sink
// can drive the `__nyx_probe` channel directly. Asserts the
// shim banner is present and lands before `int main`, that
// `__nyx_install_crash_guard` is reachable from the spliced
// source, that `prev_output` rides through `extra_env`, and
// that the build-then-run command stays in one `sh -c` so the
// sandbox sees a single process.
let step = chain_step(Some(b"prev-output"));
assert!(
step.source.contains("__nyx_probe shim (Phase 06"),
"probe_shim banner missing from chain step source",
);
assert!(
step.source.contains("static void __nyx_install_crash_guard("),
"install_crash_guard missing from chain step source",
);
let shim_pos = step
.source
.find("__nyx_probe shim (Phase 06")
.expect("shim banner");
let main_pos = step.source.find("int main(void)").expect("main fn");
assert!(
shim_pos < main_pos,
"shim must be spliced before int main: shim={shim_pos} main={main_pos}",
);
assert_eq!(step.filename, "step.c");
assert_eq!(
step.command,
vec![
"sh".to_owned(),
"-c".to_owned(),
"cc step.c -o step && ./step".to_owned(),
],
);
assert!(
step.extra_env
.iter()
.any(|(k, v)| k == ChainStepHarness::PREV_OUTPUT_ENV && v == "prev-output"),
"prev_output must be threaded through extra_env, got {:?}",
step.extra_env,
);
assert!(
step.extra_files.is_empty(),
"C chain step needs no companion build manifest; `cc` is self-sufficient",
);
}
}

View file

@ -332,10 +332,18 @@ impl LangEmitter for CppEmitter {
/// Phase 26 — C++ chain-step harness.
///
/// Splices the C++ probe shim ([`probe_shim`]) ahead of a minimal driver
/// that reads `NYX_PREV_OUTPUT` and forwards it on stdout. Same
/// rationale as the C sibling: the inline shim helpers become callable
/// from a future sink-rewrite pass without a separate translation unit;
/// unreferenced inline functions stay quiet under default `c++` flags.
///
/// Shell-wraps `c++` + run so the compiled binary actually executes
/// after the build completes (see C-side commentary for the rationale).
fn chain_step(prev_output: Option<&[u8]>) -> ChainStepHarness {
let source = "#include <cstdio>\n#include <cstdlib>\n\nint main() {\n const char *prev = std::getenv(\"NYX_PREV_OUTPUT\");\n if (prev) std::fputs(prev, stdout);\n return 0;\n}\n".to_owned();
let shim = probe_shim();
let driver = "\nint main() {\n const char *prev = std::getenv(\"NYX_PREV_OUTPUT\");\n if (prev) std::fputs(prev, stdout);\n return 0;\n}\n";
let source = format!("{shim}{driver}");
ChainStepHarness {
source,
filename: "step.cpp".to_owned(),
@ -725,4 +733,52 @@ mod tests {
let mk = h.extra_files.iter().find(|(n, _)| n == "CMakeLists.txt").expect("CMakeLists.txt must be staged");
assert!(mk.1.contains("add_executable(nyx_harness main.cpp)"));
}
#[test]
fn chain_step_splices_probe_shim_for_composite_reverify() {
// Phase 26 follow-up: C++ chain_step now splices the probe shim
// ahead of the driver so a chain step that terminates at a sink
// can drive the `__nyx_probe` channel directly. Asserts the
// shim banner is present and lands before `int main`, that
// `__nyx_install_crash_guard` is reachable, prev_output rides
// through `extra_env`, and build-then-run stays one `sh -c`.
let step = chain_step(Some(b"prev-output"));
assert!(
step.source.contains("__nyx_probe shim (Phase 06"),
"probe_shim banner missing from chain step source",
);
assert!(
step.source.contains("inline void __nyx_install_crash_guard("),
"install_crash_guard missing from chain step source",
);
let shim_pos = step
.source
.find("__nyx_probe shim (Phase 06")
.expect("shim banner");
let main_pos = step.source.find("int main()").expect("main fn");
assert!(
shim_pos < main_pos,
"shim must be spliced before int main: shim={shim_pos} main={main_pos}",
);
assert_eq!(step.filename, "step.cpp");
assert_eq!(
step.command,
vec![
"sh".to_owned(),
"-c".to_owned(),
"c++ step.cpp -o step && ./step".to_owned(),
],
);
assert!(
step.extra_env
.iter()
.any(|(k, v)| k == ChainStepHarness::PREV_OUTPUT_ENV && v == "prev-output"),
"prev_output must be threaded through extra_env, got {:?}",
step.extra_env,
);
assert!(
step.extra_files.is_empty(),
"C++ chain step needs no companion build manifest; `c++` is self-sufficient",
);
}
}

View file

@ -317,6 +317,19 @@ def main() -> int:
p.add_argument("--ground-truth", default="", help="ground truth JSON")
p.add_argument("--inhouse", action="store_true")
p.add_argument("--append", required=True, help="results accumulator JSON")
p.add_argument(
"--manual-triage",
default="",
help=(
"path to a manual-triage JSON file (list of "
"{path, line, cap, vuln: bool}). Confirmed findings matching a "
"`vuln: false` entry are stamped with `wrong: true` before "
"tabulation so the per-cell False-Confirmed budget becomes "
"non-vacuous without depending on the host's `nyx verify-feedback` "
"log. Matching uses LINE_TOLERANCE (=5) — line == 0 in the triage "
"entry matches any line."
),
)
p.add_argument(
"--budget",
default="",
@ -332,6 +345,47 @@ def main() -> int:
scan_data = load_json(args.scan)
findings = scan_data if isinstance(scan_data, list) else scan_data.get("findings", [])
# ── Manual-triage stamping (Phase 31 follow-up) ───────────────────────
# Cross-reference Confirmed rows against a manual-triage file before
# tabulation. Each `vuln: false` entry whose `(path, cap)` matches a
# Confirmed finding (with LINE_TOLERANCE, or any line when triage
# entry's `line == 0`) stamps `wrong: true` on the finding's
# `dynamic_verdict`, which the existing wrong_confirmed counter picks
# up below. Decouples the False-Confirmed budget from the host-local
# `nyx verify-feedback` log so CI on a fresh eval corpus can still
# gate the headline target.
if args.manual_triage and Path(args.manual_triage).exists():
triage = load_json(args.manual_triage)
not_vuln: list[dict] = []
for entry in triage if isinstance(triage, list) else []:
if entry.get("vuln") is False:
not_vuln.append({
"path": entry.get("path", ""),
"line": entry.get("line", 0),
"cap": entry.get("cap", ""),
})
used: set[int] = set()
for f in findings:
ev = f.get("evidence") or {}
dv = ev.get("dynamic_verdict") or {}
if dv.get("status") != "Confirmed":
continue
f_path = f.get("path", "")
f_line = f.get("line", 0)
f_cap = cap_of(f)
for idx, entry in enumerate(not_vuln):
if idx in used:
continue
if (entry["path"] == f_path
and entry["cap"] == f_cap
and (entry["line"] == 0
or abs(entry["line"] - f_line) <= LINE_TOLERANCE)):
used.add(idx)
dv["wrong"] = True
ev["dynamic_verdict"] = dv
f["evidence"] = ev
break
# Per-cell tallies: {(cap, lang): {tp, fp, fn, unsupported, confirmed,
# wrong_confirmed, stable_replays, total}}
cells: dict[tuple[str, str], dict] = defaultdict(

View file

@ -199,6 +199,95 @@ def test_diff_passes_on_improvement(tmp: Path) -> None:
assert "no regressions" in proc.stdout, proc.stdout
def test_manual_triage_stamps_wrong_confirmed(tmp: Path) -> None:
# Phase 31 follow-up: --manual-triage should cross-reference Confirmed
# findings against a list of {path, line, cap, vuln: false} entries
# and stamp `wrong: true` so the per-cell wrong_confirmed counter
# becomes non-vacuous without the host's verify-feedback log.
#
# Confirmed at line 10 matches the triage's vuln:false at line 12
# (within LINE_TOLERANCE=5). Confirmed at line 100 does not match
# any triage entry, so wrong_confirmed stays at 1 / 2 Confirmed.
scan = tmp / "scan.json"
write_json(
scan,
{
"findings": [
python_finding(SINK_BIT_SQL, "app.py", 10, "Confirmed"),
python_finding(SINK_BIT_SQL, "app.py", 100, "Confirmed"),
]
},
)
triage = tmp / "triage.json"
write_json(
triage,
[
{"path": "app.py", "line": 12, "cap": "sqli", "vuln": False},
],
)
append = tmp / "results.json"
write_json(append, [])
proc = run_tabulate(
"--label", "triage-test",
"--scan", str(scan),
"--inhouse",
"--append", str(append),
"--manual-triage", str(triage),
)
assert proc.returncode == 0, (
f"manual-triage run must succeed without budget, got {proc.returncode}\n"
f"stdout: {proc.stdout}\nstderr: {proc.stderr}"
)
results = json.loads(append.read_text())
cells = {(c["cap"], c["lang"]): c for c in results[-1]["cells"]}
sqli_py = cells.get(("sqli", "python"))
assert sqli_py is not None, f"expected sqli/python cell, got {list(cells)}"
assert sqli_py["confirmed"] == 2, sqli_py
assert sqli_py["wrong_confirmed"] == 1, (
"exactly one Confirmed finding must be stamped wrong via the triage match; "
f"got {sqli_py}"
)
def test_manual_triage_ignores_vuln_true_entries(tmp: Path) -> None:
# Triage entries with `vuln: true` are ground-truth-positive markers,
# not False-Confirmed evidence. --manual-triage must leave them alone
# so a real Confirmed-on-vuln-true row does not get downgraded.
scan = tmp / "scan.json"
write_json(
scan,
{
"findings": [
python_finding(SINK_BIT_SQL, "app.py", 10, "Confirmed"),
]
},
)
triage = tmp / "triage.json"
write_json(
triage,
[
{"path": "app.py", "line": 10, "cap": "sqli", "vuln": True},
],
)
append = tmp / "results.json"
write_json(append, [])
proc = run_tabulate(
"--label", "triage-true-test",
"--scan", str(scan),
"--inhouse",
"--append", str(append),
"--manual-triage", str(triage),
)
assert proc.returncode == 0
results = json.loads(append.read_text())
cells = {(c["cap"], c["lang"]): c for c in results[-1]["cells"]}
sqli_py = cells[("sqli", "python")]
assert sqli_py["confirmed"] == 1
assert sqli_py["wrong_confirmed"] == 0, (
f"vuln:true triage rows must not stamp wrong; got {sqli_py}"
)
def test_budget_malformed_exits_3(tmp: Path) -> None:
bad = tmp / "bad.toml"
bad.write_text("[default]\nunsupported_rate = not_a_number\n")
@ -226,6 +315,8 @@ def main() -> int:
test_budget_fails_when_unsupported_exceeds,
test_diff_fails_on_regression,
test_diff_passes_on_improvement,
test_manual_triage_stamps_wrong_confirmed,
test_manual_triage_ignores_vuln_true_entries,
test_budget_malformed_exits_3,
):
sub = tmp / fn.__name__