nyx/fuzz/dynamic_corpus/src/main.rs

337 lines
12 KiB
Rust

//! Dynamic corpus mutation fuzzer.
//!
//! Seeds from [`nyx_scanner::dynamic::corpus::payloads_for`], mutates bytes,
//! runs against an instrumented fixture harness, and writes candidates to
//! `fuzz-discovered/{spec_hash}/` when `sink_hit && oracle_fired`.
//!
//! # Usage
//!
//! ```text
//! # Run against the SSRF corpus with an OOB listener
//! cargo run -p nyx-dynamic-corpus -- \
//! --cap ssrf \
//! --spec-hash 0123456789abcdef \
//! --output ../../fuzz-discovered \
//! --iterations 1000 \
//! --harness-cmd "python3 tests/dynamic_fixtures/ssrf_harness.py"
//! ```
//!
//! Discovered candidates land in `{output}/{spec_hash}/` with a JSON
//! provenance sidecar (see §16.1 / §16.4 rationale for manual review gate).
use nyx_scanner::dynamic::corpus::{
audit_marker_collisions, materialise_bytes, payloads_for, CuratedPayload, Oracle,
PayloadProvenance, CORPUS_VERSION,
};
use nyx_scanner::dynamic::rand::SpecRng;
use nyx_scanner::labels::Cap;
use std::collections::HashSet;
use std::path::{Path, PathBuf};
fn main() {
let args: Vec<String> = std::env::args().collect();
if args.len() < 2 {
eprintln!("Usage: {} <command>", args[0]);
eprintln!("Commands:");
eprintln!(" run --cap <cap> --spec-hash <hash> [--output <dir>] [--iterations <n>]");
eprintln!(" audit-markers");
eprintln!(" list-caps");
std::process::exit(1);
}
match args[1].as_str() {
"audit-markers" => cmd_audit_markers(),
"list-caps" => cmd_list_caps(),
"run" => cmd_run(&args[2..]),
_ => {
eprintln!("Unknown command: {}", args[1]);
std::process::exit(1);
}
}
}
fn cmd_audit_markers() {
let collisions = audit_marker_collisions();
if collisions.is_empty() {
println!("OK: no marker collisions detected (corpus_version={})", CORPUS_VERSION);
} else {
eprintln!("FAIL: {} marker collision(s) detected:", collisions.len());
for (cap, label, other_cap) in &collisions {
eprintln!(" {cap}/{label} marker appears in {other_cap} payload bytes");
}
std::process::exit(1);
}
}
fn cmd_list_caps() {
let supported = [
("sql_query", Cap::SQL_QUERY),
("code_exec", Cap::CODE_EXEC),
("file_io", Cap::FILE_IO),
("ssrf", Cap::SSRF),
("html_escape", Cap::HTML_ESCAPE),
];
println!("Supported caps (corpus_version={}):", CORPUS_VERSION);
for (name, cap) in &supported {
let payloads = payloads_for(*cap);
println!(" {name}: {} payload(s)", payloads.len());
for p in payloads {
println!(
" - {} [{}] oob_nonce_slot={}",
p.label,
if p.is_benign { "benign" } else { "vuln" },
p.oob_nonce_slot
);
}
}
}
fn cmd_run(args: &[String]) {
let cap_name = get_arg(args, "--cap").unwrap_or_else(|| {
eprintln!("--cap required"); std::process::exit(1);
});
let spec_hash = get_arg(args, "--spec-hash").unwrap_or_else(|| {
eprintln!("--spec-hash required"); std::process::exit(1);
});
let output_dir = get_arg(args, "--output").unwrap_or_else(|| "fuzz-discovered".to_owned());
let iterations: u64 = get_arg(args, "--iterations")
.and_then(|s| s.parse().ok())
.unwrap_or(1000);
let harness_cmd = get_arg(args, "--harness-cmd");
let cap = parse_cap(&cap_name).unwrap_or_else(|| {
eprintln!("Unknown cap: {cap_name}. Use list-caps to see supported caps.");
std::process::exit(1);
});
let payloads = payloads_for(cap);
if payloads.is_empty() {
eprintln!("No payloads for cap {cap_name}");
std::process::exit(1);
}
let out_path = PathBuf::from(&output_dir).join(&spec_hash);
std::fs::create_dir_all(&out_path).unwrap_or_else(|e| {
eprintln!("Cannot create output dir {}: {e}", out_path.display());
std::process::exit(1);
});
println!(
"Dynamic corpus fuzzer: cap={cap_name} spec_hash={spec_hash} \
iterations={iterations} output={}",
out_path.display()
);
let mut discovered = 0u64;
let mut seen: HashSet<Vec<u8>> = HashSet::new();
// Seed the fuzzer from the corpus payloads.
let seed_bytes: Vec<Vec<u8>> = payloads
.iter()
.filter(|p| !p.is_benign && !p.oob_nonce_slot)
.map(|p| p.bytes.to_vec())
.collect();
if seed_bytes.is_empty() {
println!("No static seed payloads for {cap_name} (all are OOB or benign). Skipping.");
return;
}
let mut corpus: Vec<Vec<u8>> = seed_bytes.clone();
// Deterministic RNG keyed on the spec hash so two runs against the
// same fixture produce identical candidate streams. The Phase 27
// events.jsonl replay invariant + Phase 28 repro bundle hermeticity
// contract both require the verifier (and any fuzzer feeding it) to
// be reproducible from inputs alone — no host entropy mixed in.
let mut rng = SpecRng::seeded(&spec_hash);
for iter in 0..iterations {
let seed = &corpus[rng.gen_range(corpus.len())];
let candidate = mutate_bytes(seed, &mut rng);
if seen.contains(&candidate) {
continue;
}
seen.insert(candidate.clone());
let interesting = if let Some(ref cmd) = harness_cmd {
run_candidate_against_harness(&candidate, cmd, payloads)
} else {
// Headless mode: check heuristically whether the candidate is
// structurally plausible for the cap (bypass the subprocess cost).
is_structurally_interesting(&candidate, cap)
};
if interesting {
discovered += 1;
let filename = format!("candidate-{:016x}", rng.next_u64());
let candidate_path = out_path.join(&filename);
std::fs::write(&candidate_path, &candidate).unwrap_or_else(|e| {
eprintln!("Failed to write candidate: {e}");
});
// Write provenance sidecar.
let sidecar = serde_json::json!({
"source": "InternalFuzzer",
"references": [format!("fuzzer-run-{}", iter)],
"since_corpus_version": CORPUS_VERSION,
"spec_hash": spec_hash,
"cap": cap_name,
"bytes_hex": hex_encode(&candidate),
});
let sidecar_path = out_path.join(format!("{filename}.json"));
let _ = std::fs::write(sidecar_path, sidecar.to_string());
println!(" [+] iter={iter} candidate={filename}");
}
}
println!(
"Done: {iterations} iterations, {discovered} candidates written to {}",
out_path.display()
);
}
// ── Helpers ──────────────────────────────────────────────────────────────────
fn get_arg(args: &[String], name: &str) -> Option<String> {
let pos = args.iter().position(|a| a == name)?;
args.get(pos + 1).cloned()
}
fn parse_cap(name: &str) -> Option<Cap> {
match name.to_ascii_lowercase().as_str() {
"sql_query" | "sqli" | "sql" => Some(Cap::SQL_QUERY),
"code_exec" | "cmdi" | "rce" => Some(Cap::CODE_EXEC),
"file_io" | "path_traversal" | "lfi" => Some(Cap::FILE_IO),
"ssrf" => Some(Cap::SSRF),
"html_escape" | "xss" => Some(Cap::HTML_ESCAPE),
_ => None,
}
}
fn mutate_bytes(input: &[u8], rng: &mut SpecRng) -> Vec<u8> {
let mut out = input.to_vec();
if out.is_empty() {
return out;
}
match rng.next_u64() % 5 {
0 => {
// Flip a random byte.
let idx = rng.gen_range(out.len());
out[idx] ^= (rng.next_u64() as u8) | 1;
}
1 => {
// Insert a byte.
let idx = rng.gen_range(out.len() + 1);
out.insert(idx, rng.next_u64() as u8);
}
2 => {
// Delete a byte.
if out.len() > 1 {
let idx = rng.gen_range(out.len());
out.remove(idx);
}
}
3 => {
// Append known-interesting bytes.
let suffixes: &[&[u8]] = &[
b"'", b"\"", b";", b"--", b" OR 1=1", b"<script>", b"../",
b"\x00", b"{{", b"|", b"`",
];
let s = suffixes[rng.gen_range(suffixes.len())];
out.extend_from_slice(s);
}
_ => {
// Replace a slice with an interesting pattern.
let interesting: &[&[u8]] = &[b"'", b"\"", b"<", b">", b"%00", b"../", b"//"];
if !out.is_empty() {
let idx = rng.gen_range(out.len());
let pat = interesting[rng.gen_range(interesting.len())];
let end = (idx + pat.len()).min(out.len());
out[idx..end].copy_from_slice(&pat[..end - idx]);
}
}
}
out
}
/// Heuristic: does the candidate look structurally plausible for the cap?
/// Used in headless (no-harness) mode.
fn is_structurally_interesting(candidate: &[u8], cap: Cap) -> bool {
if cap.contains(Cap::SQL_QUERY) {
let s = String::from_utf8_lossy(candidate);
s.contains('\'') || s.contains("--") || s.to_ascii_uppercase().contains("UNION")
} else if cap.contains(Cap::CODE_EXEC) {
candidate.contains(&b';') || candidate.contains(&b'|') || candidate.contains(&b'`')
} else if cap.contains(Cap::FILE_IO) {
let s = String::from_utf8_lossy(candidate);
s.contains("../") || s.contains("/etc/")
} else if cap.contains(Cap::HTML_ESCAPE) {
let s = String::from_utf8_lossy(candidate);
s.contains('<') || s.contains('>')
} else {
false
}
}
/// Run a candidate against an external harness subprocess.
///
/// Passes the candidate via `NYX_PAYLOAD_B64` env var and checks for
/// `__NYX_SINK_HIT__` sentinel in output.
fn run_candidate_against_harness(
candidate: &[u8],
harness_cmd: &str,
payloads: &[CuratedPayload],
) -> bool {
let b64 = base64_encode(candidate);
let oracle_marker = payloads
.iter()
.filter(|p| !p.is_benign && !p.oob_nonce_slot)
.find_map(|p| {
if let Oracle::OutputContains(m) = &p.oracle {
Some(*m)
} else {
None
}
});
let parts: Vec<&str> = harness_cmd.split_whitespace().collect();
let (cmd, cmd_args) = match parts.split_first() {
Some(s) => s,
None => return false,
};
let output = std::process::Command::new(cmd)
.args(cmd_args)
.env("NYX_PAYLOAD_B64", &b64)
.output();
let Ok(out) = output else { return false };
let combined: Vec<u8> = out.stdout.iter().chain(out.stderr.iter()).copied().collect();
let sink_hit = combined.windows(16).any(|w| w == b"__NYX_SINK_HIT__");
let oracle = oracle_marker
.map(|m| combined.windows(m.len()).any(|w| w == m.as_bytes()))
.unwrap_or(false);
sink_hit && oracle
}
fn hex_encode(data: &[u8]) -> String {
data.iter().map(|b| format!("{b:02x}")).collect()
}
fn base64_encode(data: &[u8]) -> String {
const ALPHABET: &[u8] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
let mut out = String::with_capacity((data.len() + 2) / 3 * 4);
for chunk in data.chunks(3) {
let b0 = chunk[0] as u32;
let b1 = if chunk.len() > 1 { chunk[1] as u32 } else { 0 };
let b2 = if chunk.len() > 2 { chunk[2] as u32 } else { 0 };
let n = (b0 << 16) | (b1 << 8) | b2;
out.push(ALPHABET[((n >> 18) & 63) as usize] as char);
out.push(ALPHABET[((n >> 12) & 63) as usize] as char);
if chunk.len() > 1 { out.push(ALPHABET[((n >> 6) & 63) as usize] as char); } else { out.push('='); }
if chunk.len() > 2 { out.push(ALPHABET[(n & 63) as usize] as char); } else { out.push('='); }
}
out
}