[pitboss/grind] deferred session-0001 (20260516T052512Z-20f8)

This commit is contained in:
pitboss 2026-05-16 00:52:57 -05:00
parent 93e4764e3c
commit 7a2f82c2ab
8 changed files with 262 additions and 28 deletions

View file

@ -314,12 +314,20 @@ impl LangEmitter for CEmitter {
}
/// Phase 26 — C chain-step harness.
///
/// Shell-wraps `cc` + run so the compiled binary actually executes after
/// the build completes — `ChainStepHarness.command` models a single
/// process, so the build-then-run sequence must collapse to one `sh -c`.
fn chain_step(prev_output: Option<&[u8]>) -> ChainStepHarness {
let source = "#include <stdio.h>\n#include <stdlib.h>\n\nint main(void) {\n const char *prev = getenv(\"NYX_PREV_OUTPUT\");\n if (prev) fputs(prev, stdout);\n return 0;\n}\n".to_owned();
ChainStepHarness {
source,
filename: "step.c".to_owned(),
command: vec!["cc".to_owned(), "step.c".to_owned(), "-o".to_owned(), "step".to_owned()],
command: vec![
"sh".to_owned(),
"-c".to_owned(),
"cc step.c -o step && ./step".to_owned(),
],
extra_env: prev_output
.map(|bytes| {
vec![(
@ -356,6 +364,7 @@ pub fn emit(spec: &HarnessSpec) -> Result<HarnessSource, UnsupportedReason> {
/// Generate the harness `main.c` for the resolved shape.
fn generate_main_c(spec: &HarnessSpec, shape: CShape) -> String {
let invocation = invoke_for_shape(spec, shape);
let (entry_open, entry_close) = entry_include_guards(spec);
format!(
r#"/* Nyx dynamic harness — auto-generated, do not edit (Phase 16 — CShape::{shape:?}). */
@ -370,8 +379,8 @@ fn generate_main_c(spec: &HarnessSpec, shape: CShape) -> String {
* compilation unit. */
static char *nyx_payload(void);
#include "entry.c"
{entry_open}#include "entry.c"
{entry_close}
int main(int argc, char *argv[]) {{
(void)argc; (void)argv;
char *payload = nyx_payload();
@ -430,11 +439,33 @@ static char *nyx_payload(void) {{
"#,
shape = shape,
invocation = invocation,
entry_open = entry_open,
entry_close = entry_close,
)
}
/// Preprocessor wrapper around `#include "entry.c"` that renames the user's
/// `int main(...)` to `__nyx_entry_main(...)` when the spec's entry symbol IS
/// `main` (i.e. a real CLI under Track B). Without this, the entry's `main`
/// collides with the harness's own `main` at link time.
///
/// Fixture authors who already expose a non-`main` entry name (e.g.
/// `nyx_entry_main` under `tests/dynamic_fixtures/c/main_argv/`) get
/// empty guards.
fn entry_include_guards(spec: &HarnessSpec) -> (&'static str, &'static str) {
if spec.entry_name == "main" {
("#define main __nyx_entry_main\n", "#undef main\n")
} else {
("", "")
}
}
fn invoke_for_shape(spec: &HarnessSpec, shape: CShape) -> String {
let entry_fn = &spec.entry_name;
let entry_fn: &str = if spec.entry_name == "main" {
"__nyx_entry_main"
} else {
spec.entry_name.as_str()
};
match shape {
CShape::FreeFn => match &spec.payload_slot {
PayloadSlot::EnvVar(name) => format!(
@ -450,14 +481,15 @@ fn invoke_for_shape(spec: &HarnessSpec, shape: CShape) -> String {
)
}
CShape::MainArgv => {
// Rename the user-supplied entry to `nyx_entry_main` via macro so
// it does not collide with the harness `main` symbol when the
// entry source defines `int main(...)`. Fixture authors should
// expose the entry as a function named in `spec.entry_name`.
//
// Heap-allocate `new_argv` so a future `PayloadSlot::Argv(n)` with
// `n >= 6` cannot overrun a fixed stack array. Slots: 1
// ("nyx_harness") + pad + 1 (payload) + 1 (NULL terminator).
//
// When `spec.entry_name == "main"` the entry's `int main(...)` is
// renamed to `__nyx_entry_main` via the preprocessor guards on
// `#include "entry.c"`, and the call site below targets that
// renamed symbol. Fixtures that already expose a non-`main`
// entry symbol are called by name unchanged.
let pad = match &spec.payload_slot {
PayloadSlot::Argv(n) => *n,
_ => 0,
@ -607,6 +639,40 @@ mod tests {
assert!(h6.source.contains("free(new_argv);"));
}
#[test]
fn emit_main_argv_renames_main_when_entry_named_main() {
// Real-world Track B CLI vuln: the spec.entry_name IS "main", and the
// entry source defines `int main(int argc, char *argv[])`. Without
// preprocessor rename guards, the entry's `main` collides with the
// harness's own `main` at link time.
let mut spec = make_spec(PayloadSlot::Argv(0));
spec.entry_kind = EntryKind::CliSubcommand;
spec.entry_name = "main".into();
let h = emit(&spec).unwrap();
assert!(
h.source.contains("#define main __nyx_entry_main"),
"rename guard missing from emitted source",
);
assert!(
h.source.contains("#undef main"),
"undef guard missing — harness `int main(...)` definition follows the include",
);
assert!(
h.source.contains("__nyx_entry_main(new_argc, new_argv)"),
"harness call site must target the renamed symbol",
);
// The harness's own `main` must remain a real entry point.
assert!(h.source.contains("int main(int argc, char *argv[])"));
// Guards must NOT fire for fixture-style non-main entry names.
let mut fixture_spec = make_spec(PayloadSlot::Argv(0));
fixture_spec.entry_kind = EntryKind::CliSubcommand;
fixture_spec.entry_name = "nyx_entry_main".into();
let fh = emit(&fixture_spec).unwrap();
assert!(!fh.source.contains("#define main"));
assert!(!fh.source.contains("#undef main"));
assert!(fh.source.contains("nyx_entry_main(new_argc, new_argv)"));
}
#[test]
fn emit_libfuzzer_shape_passes_bytes() {
let mut spec = make_spec(PayloadSlot::Param(0));

View file

@ -287,12 +287,19 @@ impl LangEmitter for CppEmitter {
}
/// Phase 26 — C++ chain-step harness.
///
/// Shell-wraps `c++` + run so the compiled binary actually executes
/// after the build completes (see C-side commentary for the rationale).
fn chain_step(prev_output: Option<&[u8]>) -> ChainStepHarness {
let source = "#include <cstdio>\n#include <cstdlib>\n\nint main() {\n const char *prev = std::getenv(\"NYX_PREV_OUTPUT\");\n if (prev) std::fputs(prev, stdout);\n return 0;\n}\n".to_owned();
ChainStepHarness {
source,
filename: "step.cpp".to_owned(),
command: vec!["c++".to_owned(), "step.cpp".to_owned(), "-o".to_owned(), "step".to_owned()],
command: vec![
"sh".to_owned(),
"-c".to_owned(),
"c++ step.cpp -o step && ./step".to_owned(),
],
extra_env: prev_output
.map(|bytes| {
vec![(
@ -328,6 +335,7 @@ pub fn emit(spec: &HarnessSpec) -> Result<HarnessSource, UnsupportedReason> {
fn generate_main_cpp(spec: &HarnessSpec, shape: CppShape) -> String {
let invocation = invoke_for_shape(spec, shape);
let (entry_open, entry_close) = entry_include_guards(spec);
format!(
r#"// Nyx dynamic harness — auto-generated, do not edit (Phase 16 — CppShape::{shape:?}).
@ -341,8 +349,8 @@ fn generate_main_cpp(spec: &HarnessSpec, shape: CppShape) -> String {
static std::string nyx_payload();
#include "entry.cpp"
{entry_open}#include "entry.cpp"
{entry_close}
int main(int argc, char *argv[]) {{
(void)argc; (void)argv;
std::string payload = nyx_payload();
@ -390,11 +398,29 @@ static std::string nyx_payload() {{
"#,
shape = shape,
invocation = invocation,
entry_open = entry_open,
entry_close = entry_close,
)
}
/// Preprocessor guards that rename the entry source's `int main(...)` to
/// `__nyx_entry_main(...)` when the spec entry symbol IS `main`. Mirrors
/// the C-side fix; without it the user's `main` collides with the harness's
/// own `main` at link time.
fn entry_include_guards(spec: &HarnessSpec) -> (&'static str, &'static str) {
if spec.entry_name == "main" {
("#define main __nyx_entry_main\n", "#undef main\n")
} else {
("", "")
}
}
fn invoke_for_shape(spec: &HarnessSpec, shape: CppShape) -> String {
let entry_fn = &spec.entry_name;
let entry_fn: &str = if spec.entry_name == "main" {
"__nyx_entry_main"
} else {
spec.entry_name.as_str()
};
match shape {
CppShape::FreeFn => match &spec.payload_slot {
PayloadSlot::EnvVar(name) => format!(
@ -539,6 +565,35 @@ mod tests {
assert!(h.source.contains("nyx_entry_main(static_cast<int>(argv_storage.size()), new_argv.data())"));
}
#[test]
fn emit_main_argv_renames_main_when_entry_named_main() {
// Real-world Track B CLI vuln: spec.entry_name IS "main". Without
// preprocessor rename guards, the entry's `int main(...)` collides
// with the harness's own `main` at link time.
let mut spec = make_spec(PayloadSlot::Argv(0));
spec.entry_kind = EntryKind::CliSubcommand;
spec.entry_name = "main".into();
let h = emit(&spec).unwrap();
assert!(
h.source.contains("#define main __nyx_entry_main"),
"rename guard missing",
);
assert!(h.source.contains("#undef main"), "undef guard missing");
assert!(
h.source.contains("__nyx_entry_main(static_cast<int>(argv_storage.size()), new_argv.data())"),
"harness call site must target the renamed symbol",
);
assert!(h.source.contains("int main(int argc, char *argv[])"));
// Guards must not fire for fixture-style non-main entry names.
let mut fixture_spec = make_spec(PayloadSlot::Argv(0));
fixture_spec.entry_kind = EntryKind::CliSubcommand;
fixture_spec.entry_name = "nyx_entry_main".into();
let fh = emit(&fixture_spec).unwrap();
assert!(!fh.source.contains("#define main"));
assert!(!fh.source.contains("#undef main"));
assert!(fh.source.contains("nyx_entry_main(static_cast<int>(argv_storage.size()), new_argv.data())"));
}
#[test]
fn emit_cmake_in_extra_files() {
let spec = make_spec(PayloadSlot::Param(0));

View file

@ -83,16 +83,23 @@ impl LangEmitter for JavaEmitter {
/// Phase 26 — Java chain-step harness.
///
/// Emits a `Step.java` class whose `main` reads `NYX_PREV_OUTPUT` and
/// forwards it on stdout. The Java probe shim is class-level and
/// requires `System`/`java.io.*` imports the chain step already pulls in
/// implicitly; wiring the full shim is tracked alongside the Phase 14
/// emitter follow-up about probe shim splicing.
/// forwards it on stdout. The command shell-wraps `javac` + `java` so
/// the step actually runs after the build step completes (the
/// `ChainStepHarness.command` slot models a single process). The Java
/// probe shim is class-level and requires `System` / `java.io.*` imports
/// the chain step already pulls in implicitly; wiring the full shim is
/// tracked alongside the Phase 14 emitter follow-up about probe shim
/// splicing.
fn chain_step(prev_output: Option<&[u8]>) -> ChainStepHarness {
let source = "public class Step {\n public static void main(String[] args) {\n String prev = System.getenv(\"NYX_PREV_OUTPUT\");\n if (prev == null) prev = \"\";\n System.out.print(prev);\n }\n}\n".to_owned();
ChainStepHarness {
source,
filename: "Step.java".to_owned(),
command: vec!["java".to_owned(), "Step".to_owned()],
command: vec![
"sh".to_owned(),
"-c".to_owned(),
"javac Step.java && java Step".to_owned(),
],
extra_env: prev_output
.map(|bytes| {
vec![(

View file

@ -403,10 +403,21 @@ pub fn emit(spec: &HarnessSpec, is_typescript: bool) -> Result<HarnessSource, Un
pub fn chain_step(prev_output: Option<&[u8]>, is_typescript: bool) -> ChainStepHarness {
let probe = probe_shim();
let driver = "\nprocess.stdout.write(process.env.NYX_PREV_OUTPUT || '');\n";
// The chain-step source is pure JS even under the TypeScript emitter
// — the probe shim uses no TS-specific syntax — so we keep the `.ts`
// filename intent (so the workdir reflects which emitter produced
// the step) but stage a `.js` sibling and run that. Without this,
// `node step.ts` fails on stock Node before 22.6 (the
// `--experimental-strip-types` flag) and on any host that has not
// installed `tsx` / `ts-node`.
let (filename, command) = if is_typescript {
(
"step.ts".to_owned(),
vec!["node".to_owned(), "step.ts".to_owned()],
vec![
"sh".to_owned(),
"-c".to_owned(),
"cp step.ts step.js && node step.js".to_owned(),
],
)
} else {
(

View file

@ -78,10 +78,17 @@ impl LangEmitter for RustEmitter {
/// via the standard emit path.
fn chain_step(prev_output: Option<&[u8]>) -> ChainStepHarness {
let source = "use std::env;\nuse std::io::{self, Write};\n\nfn main() {\n let prev = env::var(\"NYX_PREV_OUTPUT\").unwrap_or_default();\n let _ = io::stdout().write_all(prev.as_bytes());\n}\n".to_owned();
// Shell-wrap build + run so the step actually executes the compiled binary.
// `ChainStepHarness.command` models a single process; without the wrap the
// step ends after `rustc` exits and the next chain member sees no output.
ChainStepHarness {
source,
filename: "step.rs".to_owned(),
command: vec!["rustc".to_owned(), "step.rs".to_owned(), "-o".to_owned(), "step".to_owned()],
command: vec![
"sh".to_owned(),
"-c".to_owned(),
"rustc step.rs -o step && ./step".to_owned(),
],
extra_env: prev_output
.map(|bytes| {
vec![(

View file

@ -218,6 +218,37 @@ impl Scrubber {
text.to_owned()
}
}
/// Scrub raw bytes from a sink-side payload capture. Returns the
/// input unchanged when no project secret pattern matches; on a hit,
/// returns a deterministic same-length placeholder derived from the
/// blake3 digest of the input so downstream forensic tooling that
/// keys on payload length (e.g. corpus-promote diffing) keeps its
/// invariants.
///
/// The deferred Phase 28 follow-up flagged this gap: the textual
/// scrubber already covers `env_snapshot` / `cwd` / `args_repr` /
/// `callee`, but `ProbeWitness::payload_bytes` was passed through
/// raw because curated corpus payloads are deterministic literals
/// known not to contain credentials. Real-world Track B sinks can
/// surface attacker-controlled bytes that contain credentials, and
/// this routes that path through the same regex set as everything
/// else.
pub fn scrub_bytes(&self, bytes: &[u8]) -> Vec<u8> {
if !redact::contains_secret(bytes) {
return bytes.to_vec();
}
// Same-length deterministic placeholder: tile the input's blake3
// hex digest across `bytes.len()`. Length is preserved so any
// downstream tooling that asserts on payload length (the
// `events.jsonl` size budget, the corpus-promote diff) keeps
// working; content is replaced with a fixed-vocabulary marker
// derived from a one-way hash of the original.
let digest = blake3::hash(bytes).to_hex();
let hex = digest.as_bytes();
debug_assert!(!hex.is_empty(), "blake3 hex digest is never empty");
(0..bytes.len()).map(|i| hex[i % hex.len()]).collect()
}
}
/// Hash a matched secret into the `<scrubbed-hash:<prefix>>` shape.
@ -562,6 +593,47 @@ mod tests {
assert_ne!(a, b);
}
#[test]
fn scrub_bytes_passes_through_clean_payload() {
let s = Scrubber::project_default();
let original = b"<script>NYX_XSS_CONFIRMED</script>".to_vec();
let out = s.scrub_bytes(&original);
assert_eq!(out, original);
}
#[test]
fn scrub_bytes_replaces_credential_payload_same_length() {
let s = Scrubber::project_default();
let original = b"username=admin&token=AKIAFAKETEST00000000&action=login".to_vec();
let out = s.scrub_bytes(&original);
assert_eq!(out.len(), original.len(), "same-length contract");
assert!(!out.windows(20).any(|w| w == b"AKIAFAKETEST00000000"));
assert!(out.iter().all(|b| b.is_ascii_hexdigit()));
}
#[test]
fn scrub_bytes_is_deterministic() {
let s = Scrubber::project_default();
let original = b"AKIAFAKETEST00000000 payload tail".to_vec();
let a = s.scrub_bytes(&original);
let b = s.scrub_bytes(&original);
assert_eq!(a, b);
}
#[test]
fn scrub_bytes_differs_for_different_inputs() {
let s = Scrubber::project_default();
let a = s.scrub_bytes(b"AKIAFAKETEST00000000 alpha");
let b = s.scrub_bytes(b"AKIAFAKETEST11111111 alpha");
assert_ne!(a, b);
}
#[test]
fn scrub_bytes_handles_empty() {
let s = Scrubber::project_default();
assert_eq!(s.scrub_bytes(&[]), Vec::<u8>::new());
}
#[test]
fn scrub_is_deterministic_btree() {
// Same iterator yields the same map; BTreeMap guarantees iteration order.

View file

@ -185,10 +185,12 @@ impl ProbeWitness {
/// the host-side constructor cannot accidentally produce an
/// unscrubbed / unbounded witness. Every textual field
/// (`env_snapshot` values, `cwd`, each `args_repr` entry) is routed
/// through the scrubber before the witness is serialised; the
/// `payload_bytes` field is left as raw bytes because the curated
/// payload corpus is checked into the repo and grepping it is the
/// only reliable forensic signal for triage.
/// through the scrubber before the witness is serialised, and the
/// truncated `payload_bytes` slice is routed through the
/// byte-aware [`crate::dynamic::policy::Scrubber::scrub_bytes`] so
/// real-world payloads carrying credential tokens are replaced with
/// a deterministic same-length placeholder while curated corpus
/// payloads pass through unchanged.
pub fn from_inputs<I, S>(
env: I,
cwd: impl Into<String>,
@ -211,10 +213,12 @@ impl ProbeWitness {
.collect();
let scrubbed_callee = scrubber.scrub_string(&callee.into());
let scrubbed_cwd = scrubber.scrub_string(&cwd.into());
let truncated = policy::truncate_payload_bytes(payload);
let scrubbed_payload = scrubber.scrub_bytes(truncated);
Self {
env_snapshot,
cwd: scrubbed_cwd,
payload_bytes: policy::truncate_payload_bytes(payload).to_vec(),
payload_bytes: scrubbed_payload,
callee: scrubbed_callee,
args_repr: scrubbed_args,
}