[pitboss/grind] deferred session-0001 (20260517T044708Z-e058)

2026-07-27 21:51:03 +02:00 · 2026-05-17 00:05:12 -05:00 · 2026-05-17 00:05:12 -05:00 · 3d51a3d8ae
commit 3d51a3d8ae
parent 6189c4a4c5
4 changed files with 264 additions and 2 deletions
--- a/src/dynamic/lang/c.rs
+++ b/src/dynamic/lang/c.rs
@ -379,11 +379,22 @@ impl LangEmitter for CEmitter {

 /// Phase 26 — C chain-step harness.
 ///
+/// Splices the C probe shim ([`probe_shim`]) ahead of a minimal driver
+/// that reads `NYX_PREV_OUTPUT` and forwards it on stdout.  The shim's
+/// static functions (`__nyx_probe`, `__nyx_install_crash_guard`,
+/// `__nyx_stub_sql_record`, `__nyx_stub_http_record`) become callable
+/// from a future sink-rewrite pass without bringing in another
+/// translation unit.  Unreferenced shim helpers stay quiet under
+/// default `cc` flags — `-Wunused-function` is not on the warning
+/// baseline so dead helpers do not fail the build.
+///
 /// Shell-wraps `cc` + run so the compiled binary actually executes after
 /// the build completes — `ChainStepHarness.command` models a single
 /// process, so the build-then-run sequence must collapse to one `sh -c`.
 fn chain_step(prev_output: Option<&[u8]>) -> ChainStepHarness {
-    let source = "#include <stdio.h>\n#include <stdlib.h>\n\nint main(void) {\n    const char *prev = getenv(\"NYX_PREV_OUTPUT\");\n    if (prev) fputs(prev, stdout);\n    return 0;\n}\n".to_owned();
+    let shim = probe_shim();
+    let driver = "\nint main(void) {\n    const char *prev = getenv(\"NYX_PREV_OUTPUT\");\n    if (prev) fputs(prev, stdout);\n    return 0;\n}\n";
+    let source = format!("{shim}{driver}");
    ChainStepHarness {
        source,
        filename: "step.c".to_owned(),
@ -853,4 +864,54 @@ mod tests {
        let mk = h.extra_files.iter().find(|(n, _)| n == "Makefile").expect("Makefile must be staged");
        assert!(mk.1.contains("nyx_harness: main.c entry.c"));
    }
+
+    #[test]
+    fn chain_step_splices_probe_shim_for_composite_reverify() {
+        // Phase 26 follow-up: C chain_step now splices the probe shim
+        // ahead of the driver so a chain step that terminates at a sink
+        // can drive the `__nyx_probe` channel directly.  Asserts the
+        // shim banner is present and lands before `int main`, that
+        // `__nyx_install_crash_guard` is reachable from the spliced
+        // source, that `prev_output` rides through `extra_env`, and
+        // that the build-then-run command stays in one `sh -c` so the
+        // sandbox sees a single process.
+        let step = chain_step(Some(b"prev-output"));
+        assert!(
+            step.source.contains("__nyx_probe shim (Phase 06"),
+            "probe_shim banner missing from chain step source",
+        );
+        assert!(
+            step.source.contains("static void __nyx_install_crash_guard("),
+            "install_crash_guard missing from chain step source",
+        );
+        let shim_pos = step
+            .source
+            .find("__nyx_probe shim (Phase 06")
+            .expect("shim banner");
+        let main_pos = step.source.find("int main(void)").expect("main fn");
+        assert!(
+            shim_pos < main_pos,
+            "shim must be spliced before int main: shim={shim_pos} main={main_pos}",
+        );
+        assert_eq!(step.filename, "step.c");
+        assert_eq!(
+            step.command,
+            vec![
+                "sh".to_owned(),
+                "-c".to_owned(),
+                "cc step.c -o step && ./step".to_owned(),
+            ],
+        );
+        assert!(
+            step.extra_env
+                .iter()
+                .any(|(k, v)| k == ChainStepHarness::PREV_OUTPUT_ENV && v == "prev-output"),
+            "prev_output must be threaded through extra_env, got {:?}",
+            step.extra_env,
+        );
+        assert!(
+            step.extra_files.is_empty(),
+            "C chain step needs no companion build manifest; `cc` is self-sufficient",
+        );
+    }
 }
--- a/src/dynamic/lang/cpp.rs
+++ b/src/dynamic/lang/cpp.rs
@ -332,10 +332,18 @@ impl LangEmitter for CppEmitter {

 /// Phase 26 — C++ chain-step harness.
 ///
+/// Splices the C++ probe shim ([`probe_shim`]) ahead of a minimal driver
+/// that reads `NYX_PREV_OUTPUT` and forwards it on stdout.  Same
+/// rationale as the C sibling: the inline shim helpers become callable
+/// from a future sink-rewrite pass without a separate translation unit;
+/// unreferenced inline functions stay quiet under default `c++` flags.
+///
 /// Shell-wraps `c++` + run so the compiled binary actually executes
 /// after the build completes (see C-side commentary for the rationale).
 fn chain_step(prev_output: Option<&[u8]>) -> ChainStepHarness {
-    let source = "#include <cstdio>\n#include <cstdlib>\n\nint main() {\n    const char *prev = std::getenv(\"NYX_PREV_OUTPUT\");\n    if (prev) std::fputs(prev, stdout);\n    return 0;\n}\n".to_owned();
+    let shim = probe_shim();
+    let driver = "\nint main() {\n    const char *prev = std::getenv(\"NYX_PREV_OUTPUT\");\n    if (prev) std::fputs(prev, stdout);\n    return 0;\n}\n";
+    let source = format!("{shim}{driver}");
    ChainStepHarness {
        source,
        filename: "step.cpp".to_owned(),
@ -725,4 +733,52 @@ mod tests {
        let mk = h.extra_files.iter().find(|(n, _)| n == "CMakeLists.txt").expect("CMakeLists.txt must be staged");
        assert!(mk.1.contains("add_executable(nyx_harness main.cpp)"));
    }
+
+    #[test]
+    fn chain_step_splices_probe_shim_for_composite_reverify() {
+        // Phase 26 follow-up: C++ chain_step now splices the probe shim
+        // ahead of the driver so a chain step that terminates at a sink
+        // can drive the `__nyx_probe` channel directly.  Asserts the
+        // shim banner is present and lands before `int main`, that
+        // `__nyx_install_crash_guard` is reachable, prev_output rides
+        // through `extra_env`, and build-then-run stays one `sh -c`.
+        let step = chain_step(Some(b"prev-output"));
+        assert!(
+            step.source.contains("__nyx_probe shim (Phase 06"),
+            "probe_shim banner missing from chain step source",
+        );
+        assert!(
+            step.source.contains("inline void __nyx_install_crash_guard("),
+            "install_crash_guard missing from chain step source",
+        );
+        let shim_pos = step
+            .source
+            .find("__nyx_probe shim (Phase 06")
+            .expect("shim banner");
+        let main_pos = step.source.find("int main()").expect("main fn");
+        assert!(
+            shim_pos < main_pos,
+            "shim must be spliced before int main: shim={shim_pos} main={main_pos}",
+        );
+        assert_eq!(step.filename, "step.cpp");
+        assert_eq!(
+            step.command,
+            vec![
+                "sh".to_owned(),
+                "-c".to_owned(),
+                "c++ step.cpp -o step && ./step".to_owned(),
+            ],
+        );
+        assert!(
+            step.extra_env
+                .iter()
+                .any(|(k, v)| k == ChainStepHarness::PREV_OUTPUT_ENV && v == "prev-output"),
+            "prev_output must be threaded through extra_env, got {:?}",
+            step.extra_env,
+        );
+        assert!(
+            step.extra_files.is_empty(),
+            "C++ chain step needs no companion build manifest; `c++` is self-sufficient",
+        );
+    }
 }
--- a/tests/eval_corpus/tabulate.py
+++ b/tests/eval_corpus/tabulate.py
@ -317,6 +317,19 @@ def main() -> int:
    p.add_argument("--ground-truth", default="", help="ground truth JSON")
    p.add_argument("--inhouse", action="store_true")
    p.add_argument("--append", required=True, help="results accumulator JSON")
+    p.add_argument(
+        "--manual-triage",
+        default="",
+        help=(
+            "path to a manual-triage JSON file (list of "
+            "{path, line, cap, vuln: bool}).  Confirmed findings matching a "
+            "`vuln: false` entry are stamped with `wrong: true` before "
+            "tabulation so the per-cell False-Confirmed budget becomes "
+            "non-vacuous without depending on the host's `nyx verify-feedback` "
+            "log.  Matching uses LINE_TOLERANCE (=5) — line == 0 in the triage "
+            "entry matches any line."
+        ),
+    )
    p.add_argument(
        "--budget",
        default="",
@ -332,6 +345,47 @@ def main() -> int:
    scan_data = load_json(args.scan)
    findings = scan_data if isinstance(scan_data, list) else scan_data.get("findings", [])

+    # ── Manual-triage stamping (Phase 31 follow-up) ───────────────────────
+    # Cross-reference Confirmed rows against a manual-triage file before
+    # tabulation.  Each `vuln: false` entry whose `(path, cap)` matches a
+    # Confirmed finding (with LINE_TOLERANCE, or any line when triage
+    # entry's `line == 0`) stamps `wrong: true` on the finding's
+    # `dynamic_verdict`, which the existing wrong_confirmed counter picks
+    # up below.  Decouples the False-Confirmed budget from the host-local
+    # `nyx verify-feedback` log so CI on a fresh eval corpus can still
+    # gate the headline target.
+    if args.manual_triage and Path(args.manual_triage).exists():
+        triage = load_json(args.manual_triage)
+        not_vuln: list[dict] = []
+        for entry in triage if isinstance(triage, list) else []:
+            if entry.get("vuln") is False:
+                not_vuln.append({
+                    "path": entry.get("path", ""),
+                    "line": entry.get("line", 0),
+                    "cap": entry.get("cap", ""),
+                })
+        used: set[int] = set()
+        for f in findings:
+            ev = f.get("evidence") or {}
+            dv = ev.get("dynamic_verdict") or {}
+            if dv.get("status") != "Confirmed":
+                continue
+            f_path = f.get("path", "")
+            f_line = f.get("line", 0)
+            f_cap = cap_of(f)
+            for idx, entry in enumerate(not_vuln):
+                if idx in used:
+                    continue
+                if (entry["path"] == f_path
+                        and entry["cap"] == f_cap
+                        and (entry["line"] == 0
+                             or abs(entry["line"] - f_line) <= LINE_TOLERANCE)):
+                    used.add(idx)
+                    dv["wrong"] = True
+                    ev["dynamic_verdict"] = dv
+                    f["evidence"] = ev
+                    break
+
    # Per-cell tallies: {(cap, lang): {tp, fp, fn, unsupported, confirmed,
    # wrong_confirmed, stable_replays, total}}
    cells: dict[tuple[str, str], dict] = defaultdict(
--- a/tests/eval_corpus/test_tabulate_regression.py
+++ b/tests/eval_corpus/test_tabulate_regression.py
@ -199,6 +199,95 @@ def test_diff_passes_on_improvement(tmp: Path) -> None:
    assert "no regressions" in proc.stdout, proc.stdout


+def test_manual_triage_stamps_wrong_confirmed(tmp: Path) -> None:
+    # Phase 31 follow-up: --manual-triage should cross-reference Confirmed
+    # findings against a list of {path, line, cap, vuln: false} entries
+    # and stamp `wrong: true` so the per-cell wrong_confirmed counter
+    # becomes non-vacuous without the host's verify-feedback log.
+    #
+    # Confirmed at line 10 matches the triage's vuln:false at line 12
+    # (within LINE_TOLERANCE=5).  Confirmed at line 100 does not match
+    # any triage entry, so wrong_confirmed stays at 1 / 2 Confirmed.
+    scan = tmp / "scan.json"
+    write_json(
+        scan,
+        {
+            "findings": [
+                python_finding(SINK_BIT_SQL, "app.py", 10, "Confirmed"),
+                python_finding(SINK_BIT_SQL, "app.py", 100, "Confirmed"),
+            ]
+        },
+    )
+    triage = tmp / "triage.json"
+    write_json(
+        triage,
+        [
+            {"path": "app.py", "line": 12, "cap": "sqli", "vuln": False},
+        ],
+    )
+    append = tmp / "results.json"
+    write_json(append, [])
+    proc = run_tabulate(
+        "--label", "triage-test",
+        "--scan", str(scan),
+        "--inhouse",
+        "--append", str(append),
+        "--manual-triage", str(triage),
+    )
+    assert proc.returncode == 0, (
+        f"manual-triage run must succeed without budget, got {proc.returncode}\n"
+        f"stdout: {proc.stdout}\nstderr: {proc.stderr}"
+    )
+    results = json.loads(append.read_text())
+    cells = {(c["cap"], c["lang"]): c for c in results[-1]["cells"]}
+    sqli_py = cells.get(("sqli", "python"))
+    assert sqli_py is not None, f"expected sqli/python cell, got {list(cells)}"
+    assert sqli_py["confirmed"] == 2, sqli_py
+    assert sqli_py["wrong_confirmed"] == 1, (
+        "exactly one Confirmed finding must be stamped wrong via the triage match; "
+        f"got {sqli_py}"
+    )
+
+
+def test_manual_triage_ignores_vuln_true_entries(tmp: Path) -> None:
+    # Triage entries with `vuln: true` are ground-truth-positive markers,
+    # not False-Confirmed evidence.  --manual-triage must leave them alone
+    # so a real Confirmed-on-vuln-true row does not get downgraded.
+    scan = tmp / "scan.json"
+    write_json(
+        scan,
+        {
+            "findings": [
+                python_finding(SINK_BIT_SQL, "app.py", 10, "Confirmed"),
+            ]
+        },
+    )
+    triage = tmp / "triage.json"
+    write_json(
+        triage,
+        [
+            {"path": "app.py", "line": 10, "cap": "sqli", "vuln": True},
+        ],
+    )
+    append = tmp / "results.json"
+    write_json(append, [])
+    proc = run_tabulate(
+        "--label", "triage-true-test",
+        "--scan", str(scan),
+        "--inhouse",
+        "--append", str(append),
+        "--manual-triage", str(triage),
+    )
+    assert proc.returncode == 0
+    results = json.loads(append.read_text())
+    cells = {(c["cap"], c["lang"]): c for c in results[-1]["cells"]}
+    sqli_py = cells[("sqli", "python")]
+    assert sqli_py["confirmed"] == 1
+    assert sqli_py["wrong_confirmed"] == 0, (
+        f"vuln:true triage rows must not stamp wrong; got {sqli_py}"
+    )
+
+
 def test_budget_malformed_exits_3(tmp: Path) -> None:
    bad = tmp / "bad.toml"
    bad.write_text("[default]\nunsupported_rate = not_a_number\n")
@ -226,6 +315,8 @@ def main() -> int:
            test_budget_fails_when_unsupported_exceeds,
            test_diff_fails_on_regression,
            test_diff_passes_on_improvement,
+            test_manual_triage_stamps_wrong_confirmed,
+            test_manual_triage_ignores_vuln_true_entries,
            test_budget_malformed_exits_3,
        ):
            sub = tmp / fn.__name__