mirror of
https://github.com/elicpeter/nyx.git
synced 2026-06-09 19:45:13 +02:00
[pitboss/grind] deferred session-0001 (20260516T052512Z-20f8)
This commit is contained in:
parent
93e4764e3c
commit
7a2f82c2ab
8 changed files with 262 additions and 28 deletions
|
|
@ -314,12 +314,20 @@ impl LangEmitter for CEmitter {
|
|||
}
|
||||
|
||||
/// Phase 26 — C chain-step harness.
|
||||
///
|
||||
/// Shell-wraps `cc` + run so the compiled binary actually executes after
|
||||
/// the build completes — `ChainStepHarness.command` models a single
|
||||
/// process, so the build-then-run sequence must collapse to one `sh -c`.
|
||||
fn chain_step(prev_output: Option<&[u8]>) -> ChainStepHarness {
|
||||
let source = "#include <stdio.h>\n#include <stdlib.h>\n\nint main(void) {\n const char *prev = getenv(\"NYX_PREV_OUTPUT\");\n if (prev) fputs(prev, stdout);\n return 0;\n}\n".to_owned();
|
||||
ChainStepHarness {
|
||||
source,
|
||||
filename: "step.c".to_owned(),
|
||||
command: vec!["cc".to_owned(), "step.c".to_owned(), "-o".to_owned(), "step".to_owned()],
|
||||
command: vec![
|
||||
"sh".to_owned(),
|
||||
"-c".to_owned(),
|
||||
"cc step.c -o step && ./step".to_owned(),
|
||||
],
|
||||
extra_env: prev_output
|
||||
.map(|bytes| {
|
||||
vec![(
|
||||
|
|
@ -356,6 +364,7 @@ pub fn emit(spec: &HarnessSpec) -> Result<HarnessSource, UnsupportedReason> {
|
|||
/// Generate the harness `main.c` for the resolved shape.
|
||||
fn generate_main_c(spec: &HarnessSpec, shape: CShape) -> String {
|
||||
let invocation = invoke_for_shape(spec, shape);
|
||||
let (entry_open, entry_close) = entry_include_guards(spec);
|
||||
|
||||
format!(
|
||||
r#"/* Nyx dynamic harness — auto-generated, do not edit (Phase 16 — CShape::{shape:?}). */
|
||||
|
|
@ -370,8 +379,8 @@ fn generate_main_c(spec: &HarnessSpec, shape: CShape) -> String {
|
|||
* compilation unit. */
|
||||
static char *nyx_payload(void);
|
||||
|
||||
#include "entry.c"
|
||||
|
||||
{entry_open}#include "entry.c"
|
||||
{entry_close}
|
||||
int main(int argc, char *argv[]) {{
|
||||
(void)argc; (void)argv;
|
||||
char *payload = nyx_payload();
|
||||
|
|
@ -430,11 +439,33 @@ static char *nyx_payload(void) {{
|
|||
"#,
|
||||
shape = shape,
|
||||
invocation = invocation,
|
||||
entry_open = entry_open,
|
||||
entry_close = entry_close,
|
||||
)
|
||||
}
|
||||
|
||||
/// Preprocessor wrapper around `#include "entry.c"` that renames the user's
|
||||
/// `int main(...)` to `__nyx_entry_main(...)` when the spec's entry symbol IS
|
||||
/// `main` (i.e. a real CLI under Track B). Without this, the entry's `main`
|
||||
/// collides with the harness's own `main` at link time.
|
||||
///
|
||||
/// Fixture authors who already expose a non-`main` entry name (e.g.
|
||||
/// `nyx_entry_main` under `tests/dynamic_fixtures/c/main_argv/`) get
|
||||
/// empty guards.
|
||||
fn entry_include_guards(spec: &HarnessSpec) -> (&'static str, &'static str) {
|
||||
if spec.entry_name == "main" {
|
||||
("#define main __nyx_entry_main\n", "#undef main\n")
|
||||
} else {
|
||||
("", "")
|
||||
}
|
||||
}
|
||||
|
||||
fn invoke_for_shape(spec: &HarnessSpec, shape: CShape) -> String {
|
||||
let entry_fn = &spec.entry_name;
|
||||
let entry_fn: &str = if spec.entry_name == "main" {
|
||||
"__nyx_entry_main"
|
||||
} else {
|
||||
spec.entry_name.as_str()
|
||||
};
|
||||
match shape {
|
||||
CShape::FreeFn => match &spec.payload_slot {
|
||||
PayloadSlot::EnvVar(name) => format!(
|
||||
|
|
@ -450,14 +481,15 @@ fn invoke_for_shape(spec: &HarnessSpec, shape: CShape) -> String {
|
|||
)
|
||||
}
|
||||
CShape::MainArgv => {
|
||||
// Rename the user-supplied entry to `nyx_entry_main` via macro so
|
||||
// it does not collide with the harness `main` symbol when the
|
||||
// entry source defines `int main(...)`. Fixture authors should
|
||||
// expose the entry as a function named in `spec.entry_name`.
|
||||
//
|
||||
// Heap-allocate `new_argv` so a future `PayloadSlot::Argv(n)` with
|
||||
// `n >= 6` cannot overrun a fixed stack array. Slots: 1
|
||||
// ("nyx_harness") + pad + 1 (payload) + 1 (NULL terminator).
|
||||
//
|
||||
// When `spec.entry_name == "main"` the entry's `int main(...)` is
|
||||
// renamed to `__nyx_entry_main` via the preprocessor guards on
|
||||
// `#include "entry.c"`, and the call site below targets that
|
||||
// renamed symbol. Fixtures that already expose a non-`main`
|
||||
// entry symbol are called by name unchanged.
|
||||
let pad = match &spec.payload_slot {
|
||||
PayloadSlot::Argv(n) => *n,
|
||||
_ => 0,
|
||||
|
|
@ -607,6 +639,40 @@ mod tests {
|
|||
assert!(h6.source.contains("free(new_argv);"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn emit_main_argv_renames_main_when_entry_named_main() {
|
||||
// Real-world Track B CLI vuln: the spec.entry_name IS "main", and the
|
||||
// entry source defines `int main(int argc, char *argv[])`. Without
|
||||
// preprocessor rename guards, the entry's `main` collides with the
|
||||
// harness's own `main` at link time.
|
||||
let mut spec = make_spec(PayloadSlot::Argv(0));
|
||||
spec.entry_kind = EntryKind::CliSubcommand;
|
||||
spec.entry_name = "main".into();
|
||||
let h = emit(&spec).unwrap();
|
||||
assert!(
|
||||
h.source.contains("#define main __nyx_entry_main"),
|
||||
"rename guard missing from emitted source",
|
||||
);
|
||||
assert!(
|
||||
h.source.contains("#undef main"),
|
||||
"undef guard missing — harness `int main(...)` definition follows the include",
|
||||
);
|
||||
assert!(
|
||||
h.source.contains("__nyx_entry_main(new_argc, new_argv)"),
|
||||
"harness call site must target the renamed symbol",
|
||||
);
|
||||
// The harness's own `main` must remain a real entry point.
|
||||
assert!(h.source.contains("int main(int argc, char *argv[])"));
|
||||
// Guards must NOT fire for fixture-style non-main entry names.
|
||||
let mut fixture_spec = make_spec(PayloadSlot::Argv(0));
|
||||
fixture_spec.entry_kind = EntryKind::CliSubcommand;
|
||||
fixture_spec.entry_name = "nyx_entry_main".into();
|
||||
let fh = emit(&fixture_spec).unwrap();
|
||||
assert!(!fh.source.contains("#define main"));
|
||||
assert!(!fh.source.contains("#undef main"));
|
||||
assert!(fh.source.contains("nyx_entry_main(new_argc, new_argv)"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn emit_libfuzzer_shape_passes_bytes() {
|
||||
let mut spec = make_spec(PayloadSlot::Param(0));
|
||||
|
|
|
|||
|
|
@ -287,12 +287,19 @@ impl LangEmitter for CppEmitter {
|
|||
}
|
||||
|
||||
/// Phase 26 — C++ chain-step harness.
|
||||
///
|
||||
/// Shell-wraps `c++` + run so the compiled binary actually executes
|
||||
/// after the build completes (see C-side commentary for the rationale).
|
||||
fn chain_step(prev_output: Option<&[u8]>) -> ChainStepHarness {
|
||||
let source = "#include <cstdio>\n#include <cstdlib>\n\nint main() {\n const char *prev = std::getenv(\"NYX_PREV_OUTPUT\");\n if (prev) std::fputs(prev, stdout);\n return 0;\n}\n".to_owned();
|
||||
ChainStepHarness {
|
||||
source,
|
||||
filename: "step.cpp".to_owned(),
|
||||
command: vec!["c++".to_owned(), "step.cpp".to_owned(), "-o".to_owned(), "step".to_owned()],
|
||||
command: vec![
|
||||
"sh".to_owned(),
|
||||
"-c".to_owned(),
|
||||
"c++ step.cpp -o step && ./step".to_owned(),
|
||||
],
|
||||
extra_env: prev_output
|
||||
.map(|bytes| {
|
||||
vec![(
|
||||
|
|
@ -328,6 +335,7 @@ pub fn emit(spec: &HarnessSpec) -> Result<HarnessSource, UnsupportedReason> {
|
|||
|
||||
fn generate_main_cpp(spec: &HarnessSpec, shape: CppShape) -> String {
|
||||
let invocation = invoke_for_shape(spec, shape);
|
||||
let (entry_open, entry_close) = entry_include_guards(spec);
|
||||
|
||||
format!(
|
||||
r#"// Nyx dynamic harness — auto-generated, do not edit (Phase 16 — CppShape::{shape:?}).
|
||||
|
|
@ -341,8 +349,8 @@ fn generate_main_cpp(spec: &HarnessSpec, shape: CppShape) -> String {
|
|||
|
||||
static std::string nyx_payload();
|
||||
|
||||
#include "entry.cpp"
|
||||
|
||||
{entry_open}#include "entry.cpp"
|
||||
{entry_close}
|
||||
int main(int argc, char *argv[]) {{
|
||||
(void)argc; (void)argv;
|
||||
std::string payload = nyx_payload();
|
||||
|
|
@ -390,11 +398,29 @@ static std::string nyx_payload() {{
|
|||
"#,
|
||||
shape = shape,
|
||||
invocation = invocation,
|
||||
entry_open = entry_open,
|
||||
entry_close = entry_close,
|
||||
)
|
||||
}
|
||||
|
||||
/// Preprocessor guards that rename the entry source's `int main(...)` to
|
||||
/// `__nyx_entry_main(...)` when the spec entry symbol IS `main`. Mirrors
|
||||
/// the C-side fix; without it the user's `main` collides with the harness's
|
||||
/// own `main` at link time.
|
||||
fn entry_include_guards(spec: &HarnessSpec) -> (&'static str, &'static str) {
|
||||
if spec.entry_name == "main" {
|
||||
("#define main __nyx_entry_main\n", "#undef main\n")
|
||||
} else {
|
||||
("", "")
|
||||
}
|
||||
}
|
||||
|
||||
fn invoke_for_shape(spec: &HarnessSpec, shape: CppShape) -> String {
|
||||
let entry_fn = &spec.entry_name;
|
||||
let entry_fn: &str = if spec.entry_name == "main" {
|
||||
"__nyx_entry_main"
|
||||
} else {
|
||||
spec.entry_name.as_str()
|
||||
};
|
||||
match shape {
|
||||
CppShape::FreeFn => match &spec.payload_slot {
|
||||
PayloadSlot::EnvVar(name) => format!(
|
||||
|
|
@ -539,6 +565,35 @@ mod tests {
|
|||
assert!(h.source.contains("nyx_entry_main(static_cast<int>(argv_storage.size()), new_argv.data())"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn emit_main_argv_renames_main_when_entry_named_main() {
|
||||
// Real-world Track B CLI vuln: spec.entry_name IS "main". Without
|
||||
// preprocessor rename guards, the entry's `int main(...)` collides
|
||||
// with the harness's own `main` at link time.
|
||||
let mut spec = make_spec(PayloadSlot::Argv(0));
|
||||
spec.entry_kind = EntryKind::CliSubcommand;
|
||||
spec.entry_name = "main".into();
|
||||
let h = emit(&spec).unwrap();
|
||||
assert!(
|
||||
h.source.contains("#define main __nyx_entry_main"),
|
||||
"rename guard missing",
|
||||
);
|
||||
assert!(h.source.contains("#undef main"), "undef guard missing");
|
||||
assert!(
|
||||
h.source.contains("__nyx_entry_main(static_cast<int>(argv_storage.size()), new_argv.data())"),
|
||||
"harness call site must target the renamed symbol",
|
||||
);
|
||||
assert!(h.source.contains("int main(int argc, char *argv[])"));
|
||||
// Guards must not fire for fixture-style non-main entry names.
|
||||
let mut fixture_spec = make_spec(PayloadSlot::Argv(0));
|
||||
fixture_spec.entry_kind = EntryKind::CliSubcommand;
|
||||
fixture_spec.entry_name = "nyx_entry_main".into();
|
||||
let fh = emit(&fixture_spec).unwrap();
|
||||
assert!(!fh.source.contains("#define main"));
|
||||
assert!(!fh.source.contains("#undef main"));
|
||||
assert!(fh.source.contains("nyx_entry_main(static_cast<int>(argv_storage.size()), new_argv.data())"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn emit_cmake_in_extra_files() {
|
||||
let spec = make_spec(PayloadSlot::Param(0));
|
||||
|
|
|
|||
|
|
@ -83,16 +83,23 @@ impl LangEmitter for JavaEmitter {
|
|||
/// Phase 26 — Java chain-step harness.
|
||||
///
|
||||
/// Emits a `Step.java` class whose `main` reads `NYX_PREV_OUTPUT` and
|
||||
/// forwards it on stdout. The Java probe shim is class-level and
|
||||
/// requires `System`/`java.io.*` imports the chain step already pulls in
|
||||
/// implicitly; wiring the full shim is tracked alongside the Phase 14
|
||||
/// emitter follow-up about probe shim splicing.
|
||||
/// forwards it on stdout. The command shell-wraps `javac` + `java` so
|
||||
/// the step actually runs after the build step completes (the
|
||||
/// `ChainStepHarness.command` slot models a single process). The Java
|
||||
/// probe shim is class-level and requires `System` / `java.io.*` imports
|
||||
/// the chain step already pulls in implicitly; wiring the full shim is
|
||||
/// tracked alongside the Phase 14 emitter follow-up about probe shim
|
||||
/// splicing.
|
||||
fn chain_step(prev_output: Option<&[u8]>) -> ChainStepHarness {
|
||||
let source = "public class Step {\n public static void main(String[] args) {\n String prev = System.getenv(\"NYX_PREV_OUTPUT\");\n if (prev == null) prev = \"\";\n System.out.print(prev);\n }\n}\n".to_owned();
|
||||
ChainStepHarness {
|
||||
source,
|
||||
filename: "Step.java".to_owned(),
|
||||
command: vec!["java".to_owned(), "Step".to_owned()],
|
||||
command: vec![
|
||||
"sh".to_owned(),
|
||||
"-c".to_owned(),
|
||||
"javac Step.java && java Step".to_owned(),
|
||||
],
|
||||
extra_env: prev_output
|
||||
.map(|bytes| {
|
||||
vec![(
|
||||
|
|
|
|||
|
|
@ -403,10 +403,21 @@ pub fn emit(spec: &HarnessSpec, is_typescript: bool) -> Result<HarnessSource, Un
|
|||
pub fn chain_step(prev_output: Option<&[u8]>, is_typescript: bool) -> ChainStepHarness {
|
||||
let probe = probe_shim();
|
||||
let driver = "\nprocess.stdout.write(process.env.NYX_PREV_OUTPUT || '');\n";
|
||||
// The chain-step source is pure JS even under the TypeScript emitter
|
||||
// — the probe shim uses no TS-specific syntax — so we keep the `.ts`
|
||||
// filename intent (so the workdir reflects which emitter produced
|
||||
// the step) but stage a `.js` sibling and run that. Without this,
|
||||
// `node step.ts` fails on stock Node before 22.6 (the
|
||||
// `--experimental-strip-types` flag) and on any host that has not
|
||||
// installed `tsx` / `ts-node`.
|
||||
let (filename, command) = if is_typescript {
|
||||
(
|
||||
"step.ts".to_owned(),
|
||||
vec!["node".to_owned(), "step.ts".to_owned()],
|
||||
vec![
|
||||
"sh".to_owned(),
|
||||
"-c".to_owned(),
|
||||
"cp step.ts step.js && node step.js".to_owned(),
|
||||
],
|
||||
)
|
||||
} else {
|
||||
(
|
||||
|
|
|
|||
|
|
@ -78,10 +78,17 @@ impl LangEmitter for RustEmitter {
|
|||
/// via the standard emit path.
|
||||
fn chain_step(prev_output: Option<&[u8]>) -> ChainStepHarness {
|
||||
let source = "use std::env;\nuse std::io::{self, Write};\n\nfn main() {\n let prev = env::var(\"NYX_PREV_OUTPUT\").unwrap_or_default();\n let _ = io::stdout().write_all(prev.as_bytes());\n}\n".to_owned();
|
||||
// Shell-wrap build + run so the step actually executes the compiled binary.
|
||||
// `ChainStepHarness.command` models a single process; without the wrap the
|
||||
// step ends after `rustc` exits and the next chain member sees no output.
|
||||
ChainStepHarness {
|
||||
source,
|
||||
filename: "step.rs".to_owned(),
|
||||
command: vec!["rustc".to_owned(), "step.rs".to_owned(), "-o".to_owned(), "step".to_owned()],
|
||||
command: vec![
|
||||
"sh".to_owned(),
|
||||
"-c".to_owned(),
|
||||
"rustc step.rs -o step && ./step".to_owned(),
|
||||
],
|
||||
extra_env: prev_output
|
||||
.map(|bytes| {
|
||||
vec![(
|
||||
|
|
|
|||
|
|
@ -218,6 +218,37 @@ impl Scrubber {
|
|||
text.to_owned()
|
||||
}
|
||||
}
|
||||
|
||||
/// Scrub raw bytes from a sink-side payload capture. Returns the
|
||||
/// input unchanged when no project secret pattern matches; on a hit,
|
||||
/// returns a deterministic same-length placeholder derived from the
|
||||
/// blake3 digest of the input so downstream forensic tooling that
|
||||
/// keys on payload length (e.g. corpus-promote diffing) keeps its
|
||||
/// invariants.
|
||||
///
|
||||
/// The deferred Phase 28 follow-up flagged this gap: the textual
|
||||
/// scrubber already covers `env_snapshot` / `cwd` / `args_repr` /
|
||||
/// `callee`, but `ProbeWitness::payload_bytes` was passed through
|
||||
/// raw because curated corpus payloads are deterministic literals
|
||||
/// known not to contain credentials. Real-world Track B sinks can
|
||||
/// surface attacker-controlled bytes that contain credentials, and
|
||||
/// this routes that path through the same regex set as everything
|
||||
/// else.
|
||||
pub fn scrub_bytes(&self, bytes: &[u8]) -> Vec<u8> {
|
||||
if !redact::contains_secret(bytes) {
|
||||
return bytes.to_vec();
|
||||
}
|
||||
// Same-length deterministic placeholder: tile the input's blake3
|
||||
// hex digest across `bytes.len()`. Length is preserved so any
|
||||
// downstream tooling that asserts on payload length (the
|
||||
// `events.jsonl` size budget, the corpus-promote diff) keeps
|
||||
// working; content is replaced with a fixed-vocabulary marker
|
||||
// derived from a one-way hash of the original.
|
||||
let digest = blake3::hash(bytes).to_hex();
|
||||
let hex = digest.as_bytes();
|
||||
debug_assert!(!hex.is_empty(), "blake3 hex digest is never empty");
|
||||
(0..bytes.len()).map(|i| hex[i % hex.len()]).collect()
|
||||
}
|
||||
}
|
||||
|
||||
/// Hash a matched secret into the `<scrubbed-hash:<prefix>>` shape.
|
||||
|
|
@ -562,6 +593,47 @@ mod tests {
|
|||
assert_ne!(a, b);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn scrub_bytes_passes_through_clean_payload() {
|
||||
let s = Scrubber::project_default();
|
||||
let original = b"<script>NYX_XSS_CONFIRMED</script>".to_vec();
|
||||
let out = s.scrub_bytes(&original);
|
||||
assert_eq!(out, original);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn scrub_bytes_replaces_credential_payload_same_length() {
|
||||
let s = Scrubber::project_default();
|
||||
let original = b"username=admin&token=AKIAFAKETEST00000000&action=login".to_vec();
|
||||
let out = s.scrub_bytes(&original);
|
||||
assert_eq!(out.len(), original.len(), "same-length contract");
|
||||
assert!(!out.windows(20).any(|w| w == b"AKIAFAKETEST00000000"));
|
||||
assert!(out.iter().all(|b| b.is_ascii_hexdigit()));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn scrub_bytes_is_deterministic() {
|
||||
let s = Scrubber::project_default();
|
||||
let original = b"AKIAFAKETEST00000000 payload tail".to_vec();
|
||||
let a = s.scrub_bytes(&original);
|
||||
let b = s.scrub_bytes(&original);
|
||||
assert_eq!(a, b);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn scrub_bytes_differs_for_different_inputs() {
|
||||
let s = Scrubber::project_default();
|
||||
let a = s.scrub_bytes(b"AKIAFAKETEST00000000 alpha");
|
||||
let b = s.scrub_bytes(b"AKIAFAKETEST11111111 alpha");
|
||||
assert_ne!(a, b);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn scrub_bytes_handles_empty() {
|
||||
let s = Scrubber::project_default();
|
||||
assert_eq!(s.scrub_bytes(&[]), Vec::<u8>::new());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn scrub_is_deterministic_btree() {
|
||||
// Same iterator yields the same map; BTreeMap guarantees iteration order.
|
||||
|
|
|
|||
|
|
@ -185,10 +185,12 @@ impl ProbeWitness {
|
|||
/// the host-side constructor cannot accidentally produce an
|
||||
/// unscrubbed / unbounded witness. Every textual field
|
||||
/// (`env_snapshot` values, `cwd`, each `args_repr` entry) is routed
|
||||
/// through the scrubber before the witness is serialised; the
|
||||
/// `payload_bytes` field is left as raw bytes because the curated
|
||||
/// payload corpus is checked into the repo and grepping it is the
|
||||
/// only reliable forensic signal for triage.
|
||||
/// through the scrubber before the witness is serialised, and the
|
||||
/// truncated `payload_bytes` slice is routed through the
|
||||
/// byte-aware [`crate::dynamic::policy::Scrubber::scrub_bytes`] so
|
||||
/// real-world payloads carrying credential tokens are replaced with
|
||||
/// a deterministic same-length placeholder while curated corpus
|
||||
/// payloads pass through unchanged.
|
||||
pub fn from_inputs<I, S>(
|
||||
env: I,
|
||||
cwd: impl Into<String>,
|
||||
|
|
@ -211,10 +213,12 @@ impl ProbeWitness {
|
|||
.collect();
|
||||
let scrubbed_callee = scrubber.scrub_string(&callee.into());
|
||||
let scrubbed_cwd = scrubber.scrub_string(&cwd.into());
|
||||
let truncated = policy::truncate_payload_bytes(payload);
|
||||
let scrubbed_payload = scrubber.scrub_bytes(truncated);
|
||||
Self {
|
||||
env_snapshot,
|
||||
cwd: scrubbed_cwd,
|
||||
payload_bytes: policy::truncate_payload_bytes(payload).to_vec(),
|
||||
payload_bytes: scrubbed_payload,
|
||||
callee: scrubbed_callee,
|
||||
args_repr: scrubbed_args,
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue