[pitboss/grind] deferred session-0016 (20260517T044708Z-e058)

2026-06-09 19:45:13 +02:00 · 2026-05-17 05:50:56 -05:00 · 2026-05-17 05:50:56 -05:00 · 5b90a67f5c
commit 5b90a67f5c
parent e66b35f355
1 changed files with 189 additions and 18 deletions
--- a/src/chain/reverify.rs
+++ b/src/chain/reverify.rs
@ -52,12 +52,15 @@
 use crate::chain::finding::{ChainFinding, ChainSeverity};
 use crate::commands::scan::Diag;
 use crate::dynamic::build_sandbox::dispatch_prepare;
-use crate::dynamic::harness;
+use crate::dynamic::harness::{self, BuiltHarness};
+use crate::dynamic::lang;
+use crate::dynamic::sandbox;
 use crate::dynamic::spec::HarnessSpec;
 use crate::dynamic::verify::VerifyOptions;
 use crate::evidence::{InconclusiveReason, UnsupportedReason, VerifyResult, VerifyStatus};
 use crate::surface::SurfaceMap;
 use std::collections::HashMap;
+use std::path::PathBuf;

 /// Outcome of composite re-verification for a single chain.
 ///
@ -184,27 +187,42 @@ pub trait CompositeReverifier {
 /// The composite-harness composer walks `chain.members`, derives one
 /// [`HarnessSpec`] per member via [`chain_step_specs`], drives each
 /// derived spec through [`harness::build`] + [`dispatch_prepare`] so
-/// the per-language build cost is amortised against the on-disk caches
-/// before the live sandbox-run pass lands, and (in a future session)
-/// will call [`crate::dynamic::lang::compose_chain_step`] per step to
-/// assemble a per-step harness with `NYX_PREV_OUTPUT` threading.
+/// the per-language build cost is amortised against the on-disk caches,
+/// then runs each step sequentially through [`sandbox::run`] with the
+/// previous step's stdout threaded into the next step via
+/// [`crate::dynamic::lang::ChainStepHarness::PREV_OUTPUT_ENV`].
 ///
 /// Today the default reverifier surfaces
-/// `Inconclusive(BackendInsufficient)` when invoked, but the `detail`
-/// field reports both the spec-derivation coverage AND the per-step
-/// build coverage (`derived N/M`, `built B/N`, `cache_hit=H`,
-/// `build_ms=T`, `build_errors=E`) so operators (and the
-/// [`reverify_top_chains`] caller) can see the build-cost coverage
-/// before the live execution path lands.  Callers that need a
+/// `Inconclusive(BackendInsufficient)` when invoked.  The `detail`
+/// field reports spec-derivation, per-step build coverage, AND per-
+/// step run coverage so operators (and the [`reverify_top_chains`]
+/// caller) can see how far down the live execution path the chain
+/// got: `derived N/M`, `built B/N (cache_hit=H, build_ms=T,
+/// build_errors=E)`, `ran S/B (sandbox_errors=SE, timeouts=TO,
+/// nonzero_exits=NE, final_sink_hit=F)`.  Callers that need a
 /// deterministic outcome (tests, CI) use [`reverify_chain_with`] with
 /// a stubbed reverifier.
 ///
+/// The verdict stays `Inconclusive` even on a fully-successful run
+/// pass because today's per-language [`lang::compose_chain_step`]
+/// shims echo `NYX_PREV_OUTPUT` to stdout but do not yet invoke the
+/// chain's terminal sink — the sink-rewrite pass that wires the final
+/// step's probe call lands separately.  Once that pass arrives, the
+/// `final_sink_hit=true` branch will flip the verdict to `Confirmed`.
+///
+/// Languages whose [`dispatch_prepare`] returns `Unsupported`
+/// (Ruby today) are counted under `build_errors` and skipped from the
+/// run loop; their `compose_chain_step` source is never staged.
+///
 /// Workdir lifetime: every per-step build is content-addressed by
 /// [`HarnessSpec::spec_hash`] under `/tmp/nyx-harness/{spec_hash}`,
 /// and the per-language `prepare_*` caches under the host's
 /// `ProjectDirs` cache root are keyed on `(lockfile_hash,
 /// toolchain_id, language)`.  Repeated calls with the same specs are
-/// idempotent — no per-call growth on disk.
+/// idempotent — no per-call growth on disk.  The chain-step source
+/// (`step.py`, `step.sh`, etc.) is written into the same workdir
+/// alongside the harness source; filenames are distinct so they do
+/// not collide with [`harness::build`] output for the same spec_hash.
 pub struct DefaultCompositeReverifier;

 impl CompositeReverifier for DefaultCompositeReverifier {
@ -226,15 +244,17 @@ impl CompositeReverifier for DefaultCompositeReverifier {

        // Sub-task (b) main of the Phase 26 live-execution split:
        // drive each derived spec through the per-language build
-        // pipeline so the per-step cache state is visible before
-        // sub-task (c) lands the live sandbox::run chain.  Failures
-        // are counted, not propagated — the outer verdict stays
-        // `Inconclusive(BackendInsufficient)` until (c) lands.
+        // pipeline so each step's interpreter / compile artefact is
+        // staged in its content-addressed workdir before the run
+        // pass.  Failures are counted, not propagated — the outer
+        // verdict stays `Inconclusive(BackendInsufficient)` until
+        // the sink-rewrite pass lands.
        let profile = opts.sandbox.process_hardening;
        let mut built = 0usize;
        let mut cache_hits = 0usize;
        let mut total_build_ms: u128 = 0;
        let mut build_errors = 0usize;
+        let mut built_steps: Vec<(PathBuf, &HarnessSpec)> = Vec::with_capacity(derived);
        for spec in &derived_specs {
            match harness::build(spec) {
                Ok(built_harness) => {
@ -246,6 +266,7 @@ impl CompositeReverifier for DefaultCompositeReverifier {
                            }
                            total_build_ms = total_build_ms
                                .saturating_add(result.duration.as_millis());
+                            built_steps.push((built_harness.workdir, spec));
                        }
                        Err(_) => build_errors += 1,
                    }
@ -254,10 +275,21 @@ impl CompositeReverifier for DefaultCompositeReverifier {
            }
        }

+        // Sub-task (c) of the Phase 26 live-execution split:
+        // sequentially run each built chain-step harness through
+        // `sandbox::run`, threading the previous step's stdout into
+        // the next step via `NYX_PREV_OUTPUT`.  The final step's
+        // `sink_hit` is captured for the detail field; today it stays
+        // false because `compose_chain_step` does not yet rewrite the
+        // chain's terminal sink.
+        let (steps_run, sandbox_errors, steps_timeout, nonzero_exits, final_sink_hit) =
+            run_chain_steps(&built_steps, &opts.sandbox);
+
        let detail = format!(
-            "composite chain re-verification not yet wired for live runs; \
+            "composite chain re-verification: live runs collect step coverage; \
             derived {derived}/{total} harness specs; \
-             built {built}/{derived} (cache_hit={cache_hits}, build_ms={total_build_ms}, build_errors={build_errors})"
+             built {built}/{derived} (cache_hit={cache_hits}, build_ms={total_build_ms}, build_errors={build_errors}); \
+             ran {steps_run}/{built} (sandbox_errors={sandbox_errors}, timeouts={steps_timeout}, nonzero_exits={nonzero_exits}, final_sink_hit={final_sink_hit})"
        );
        VerifyResult {
            finding_id,
@ -279,6 +311,102 @@ impl CompositeReverifier for DefaultCompositeReverifier {
    }
 }

+/// Phase 26 sub-task (c): sequentially run each built chain step
+/// through [`sandbox::run`] with `NYX_PREV_OUTPUT` threading.
+///
+/// Returns `(steps_run, sandbox_errors, timeouts, nonzero_exits,
+/// final_sink_hit)`.  The final step's [`sandbox::SandboxOutcome::sink_hit`]
+/// is captured for the verdict's `detail` field (sub-task (d)); today
+/// the per-language [`lang::compose_chain_step`] sources echo
+/// `NYX_PREV_OUTPUT` to stdout without invoking the chain's terminal
+/// sink, so `final_sink_hit` stays `false` until the sink-rewrite
+/// pass lands.
+///
+/// `sandbox_errors` aborts the rest of the chain — a step that can
+/// neither spawn nor stage its source file has no useful `stdout` to
+/// thread into the next step.  Non-zero exits and timeouts are
+/// recorded but do not stop the chain: the previous step's stdout is
+/// still threaded forward so partial-success chains keep collecting
+/// coverage.
+///
+/// `base_opts` is cloned per step; the per-step clone overlays the
+/// chain-step's `extra_env` (typically the single `NYX_PREV_OUTPUT`
+/// binding) on top of any caller-provided extras and drops the
+/// per-finding `stub_harness` because chain-step harnesses do not
+/// drive boundary stubs.
+fn run_chain_steps(
+    built_steps: &[(PathBuf, &HarnessSpec)],
+    base_opts: &sandbox::SandboxOptions,
+) -> (usize, usize, usize, usize, bool) {
+    let mut steps_run = 0usize;
+    let mut sandbox_errors = 0usize;
+    let mut steps_timeout = 0usize;
+    let mut nonzero_exits = 0usize;
+    let mut final_sink_hit = false;
+    let mut prev_output: Option<Vec<u8>> = None;
+    let last_idx = built_steps.len().saturating_sub(1);
+    for (idx, (workdir, spec)) in built_steps.iter().enumerate() {
+        let step = lang::compose_chain_step(spec.lang, prev_output.as_deref());
+
+        let step_path = workdir.join(&step.filename);
+        if let Some(parent) = step_path.parent() {
+            let _ = std::fs::create_dir_all(parent);
+        }
+        if std::fs::write(&step_path, step.source.as_bytes()).is_err() {
+            sandbox_errors += 1;
+            break;
+        }
+        let mut extra_files_failed = false;
+        for (rel, content) in &step.extra_files {
+            let dest = workdir.join(rel);
+            if let Some(parent) = dest.parent() {
+                let _ = std::fs::create_dir_all(parent);
+            }
+            if std::fs::write(&dest, content.as_bytes()).is_err() {
+                extra_files_failed = true;
+                break;
+            }
+        }
+        if extra_files_failed {
+            sandbox_errors += 1;
+            break;
+        }
+
+        let mut step_opts = base_opts.clone();
+        step_opts.extra_env.extend(step.extra_env.iter().cloned());
+        step_opts.stub_harness = None;
+
+        let step_built = BuiltHarness {
+            workdir: workdir.clone(),
+            command: step.command.clone(),
+            env: vec![],
+            source: step.source.clone(),
+            entry_source: String::new(),
+        };
+
+        match sandbox::run(&step_built, b"", &step_opts) {
+            Ok(outcome) => {
+                steps_run += 1;
+                if outcome.timed_out {
+                    steps_timeout += 1;
+                }
+                if outcome.exit_code.unwrap_or(-1) != 0 {
+                    nonzero_exits += 1;
+                }
+                if idx == last_idx {
+                    final_sink_hit = outcome.sink_hit;
+                }
+                prev_output = Some(outcome.stdout);
+            }
+            Err(_) => {
+                sandbox_errors += 1;
+                break;
+            }
+        }
+    }
+    (steps_run, sandbox_errors, steps_timeout, nonzero_exits, final_sink_hit)
+}
+
 /// Phase 26 — Track G.3: drive composite dynamic re-verification for
 /// one chain.
 ///
@ -595,6 +723,49 @@ mod tests {
        );
    }

+    #[test]
+    fn default_reverifier_detail_reports_run_coverage_with_no_built_steps() {
+        // No diags → 0/N derived → 0/0 built → 0/0 ran.  Verifies the
+        // run-coverage segment of the detail string is well-formed
+        // even when the chain-step run loop is never entered.
+        let mut chain = mk_chain(0xCD, ChainSeverity::Medium, ImpactCategory::InfoDisclosure);
+        let surface = SurfaceMap::new();
+        let opts = VerifyOptions::default();
+        let result = reverify_chain(&mut chain, &[], &surface, &opts);
+        let detail = result.verdict.detail.as_deref().expect("detail populated");
+        assert!(
+            detail.contains("ran 0/0"),
+            "detail must report 0/0 ran when no specs built; got {detail:?}"
+        );
+        assert!(
+            detail.contains("sandbox_errors=0"),
+            "detail must zero sandbox_errors when no runs attempted; got {detail:?}"
+        );
+        assert!(
+            detail.contains("timeouts=0"),
+            "detail must zero timeouts when no runs attempted; got {detail:?}"
+        );
+        assert!(
+            detail.contains("nonzero_exits=0"),
+            "detail must zero nonzero_exits when no runs attempted; got {detail:?}"
+        );
+        assert!(
+            detail.contains("final_sink_hit=false"),
+            "detail must stamp final_sink_hit=false when no runs attempted; got {detail:?}"
+        );
+    }
+
+    #[test]
+    fn run_chain_steps_with_empty_input_is_a_no_op() {
+        // Locks the contract that the run loop is a no-op when no
+        // steps built — the run-coverage detail segment is wholly a
+        // function of the (steps_run, sandbox_errors, timeouts,
+        // nonzero_exits, final_sink_hit) tuple this helper returns.
+        let opts = sandbox::SandboxOptions::default();
+        let result = run_chain_steps(&[], &opts);
+        assert_eq!(result, (0, 0, 0, 0, false));
+    }
+
    #[test]
    fn chain_step_specs_reports_no_flow_steps_for_missing_diag() {
        let chain = mk_chain(7, ChainSeverity::Medium, ImpactCategory::InfoDisclosure);