mirror of
https://github.com/elicpeter/nyx.git
synced 2026-06-09 19:45:13 +02:00
280 lines
9.7 KiB
Rust
280 lines
9.7 KiB
Rust
//! Deterministic seeded RNG for the dynamic layer (Phase 30 — Track C
|
||
//! determinism audit).
|
||
//!
|
||
//! Every randomness source in [`crate::dynamic`] must route through
|
||
//! [`SpecRng`] so identical inputs (spec hash + corpus version) produce
|
||
//! identical sandbox runs. Non-determinism inside the verifier breaks
|
||
//! the Phase 27 `events.jsonl` replay invariant, the Phase 28 repro
|
||
//! bundle hermeticity contract, and the Phase 29 per-cell budget gates.
|
||
//!
|
||
//! The implementation is intentionally minimal:
|
||
//!
|
||
//! * No external RNG crate — blake3 is the project's hashing primitive
|
||
//! and an extra `rand`/`rand_chacha` dep would expand the supply-chain
|
||
//! surface for no gain.
|
||
//! * Output stream is a SHAKE-style hash chain: every 32-byte block is
|
||
//! `blake3(seed || counter_le)`, with the counter incremented after
|
||
//! each block. Throughput is dwarfed by sandbox / build cost so any
|
||
//! added cycles compared to a CSPRNG do not show up in
|
||
//! `benches/dynamic_bench.rs`.
|
||
//! * No `Send`/thread-local state — callers thread the [`SpecRng`]
|
||
//! explicitly so a fork in control flow always produces a fresh,
|
||
//! reproducible substream. Mutation fuzzers can clone the RNG before
|
||
//! forking to keep both branches reproducible.
|
||
//!
|
||
//! # Audit gate
|
||
//!
|
||
//! `scripts/check_no_unseeded_rand.sh` greps `src/dynamic/` for the
|
||
//! banned non-deterministic APIs (`rand::thread_rng`, `OsRng`,
|
||
//! `from_entropy`, `getrandom::getrandom`, `Uuid::new_v4`, `fastrand`).
|
||
//! Any match exits the script non-zero so CI catches regressions before
|
||
//! they land. The seccomp policy file is allowed to mention
|
||
//! `"getrandom"` because that string is a syscall name, not a Rust API
|
||
//! call; the audit script's regex filters that case out.
|
||
|
||
use blake3::Hasher;
|
||
|
||
/// Length of the seed mixed into every block of the RNG stream. 32
|
||
/// bytes = full blake3 output width; using anything smaller would lose
|
||
/// entropy if a caller passes a longer spec hash.
|
||
const SEED_BYTES: usize = 32;
|
||
|
||
/// Width of a single hash-chain block. Matches blake3's natural output
|
||
/// length so we never have to truncate or extend.
|
||
const BLOCK_BYTES: usize = 32;
|
||
|
||
/// Deterministic pseudo-random number generator keyed by a spec hash.
|
||
///
|
||
/// Construct via [`SpecRng::seeded`] (the standard entry point used by
|
||
/// every verifier call site) or [`SpecRng::from_seed_bytes`] (for tests
|
||
/// that need to pin the seed independently of a spec).
|
||
///
|
||
/// The same seed always produces the same byte stream, so any consumer
|
||
/// inside [`crate::dynamic`] that needs randomness (mutation fuzzer
|
||
/// payload choice, environment variable jitter, stub port jitter, …)
|
||
/// gets a reproducible roll without leaking host entropy into the
|
||
/// verdict.
|
||
#[derive(Debug, Clone)]
|
||
pub struct SpecRng {
|
||
seed: [u8; SEED_BYTES],
|
||
counter: u64,
|
||
buf: [u8; BLOCK_BYTES],
|
||
buf_pos: usize,
|
||
}
|
||
|
||
impl SpecRng {
|
||
/// Seed an RNG from a spec hash hex string.
|
||
///
|
||
/// The hex prefix is hashed with blake3 to normalise it to 32 bytes
|
||
/// — callers may pass the short 16-hex-char spec hash (the form
|
||
/// stamped onto [`crate::dynamic::spec::HarnessSpec::spec_hash`])
|
||
/// or a longer derivation; both produce a full-width seed.
|
||
pub fn seeded(spec_hash: &str) -> Self {
|
||
let mut h = Hasher::new();
|
||
h.update(b"nyx.dynamic.rand.v1\0");
|
||
h.update(spec_hash.as_bytes());
|
||
let mut seed = [0u8; SEED_BYTES];
|
||
seed.copy_from_slice(h.finalize().as_bytes());
|
||
Self::from_seed_bytes(seed)
|
||
}
|
||
|
||
/// Seed from raw bytes. Exposed for tests that need a known seed
|
||
/// without round-tripping through a spec hash.
|
||
pub fn from_seed_bytes(seed: [u8; SEED_BYTES]) -> Self {
|
||
Self {
|
||
seed,
|
||
counter: 0,
|
||
buf: [0u8; BLOCK_BYTES],
|
||
buf_pos: BLOCK_BYTES,
|
||
}
|
||
}
|
||
|
||
/// Refill the internal buffer with the next block of the hash
|
||
/// chain. Called lazily as bytes are consumed.
|
||
fn refill(&mut self) {
|
||
let mut h = Hasher::new();
|
||
h.update(&self.seed);
|
||
h.update(&self.counter.to_le_bytes());
|
||
let digest = h.finalize();
|
||
self.buf.copy_from_slice(digest.as_bytes());
|
||
self.counter = self.counter.wrapping_add(1);
|
||
self.buf_pos = 0;
|
||
}
|
||
|
||
/// Fill `out` with deterministic pseudo-random bytes.
|
||
pub fn fill_bytes(&mut self, out: &mut [u8]) {
|
||
let mut written = 0;
|
||
while written < out.len() {
|
||
if self.buf_pos == BLOCK_BYTES {
|
||
self.refill();
|
||
}
|
||
let take = (out.len() - written).min(BLOCK_BYTES - self.buf_pos);
|
||
out[written..written + take]
|
||
.copy_from_slice(&self.buf[self.buf_pos..self.buf_pos + take]);
|
||
self.buf_pos += take;
|
||
written += take;
|
||
}
|
||
}
|
||
|
||
/// Draw the next `u64` from the stream. Used by the rejection
|
||
/// loop in [`Self::gen_range`].
|
||
pub fn next_u64(&mut self) -> u64 {
|
||
let mut buf = [0u8; 8];
|
||
self.fill_bytes(&mut buf);
|
||
u64::from_le_bytes(buf)
|
||
}
|
||
|
||
/// Draw a `u32`. Convenience for callers picking among small
|
||
/// alternatives (payload variants, env mutation slots).
|
||
pub fn next_u32(&mut self) -> u32 {
|
||
(self.next_u64() & 0xFFFF_FFFF) as u32
|
||
}
|
||
|
||
/// Sample a `usize` uniformly in `[0, upper)`. Panics when
|
||
/// `upper == 0` because the request is meaningless; callers should
|
||
/// guard zero-length slices.
|
||
///
|
||
/// Uses rejection sampling against the largest multiple of `upper`
|
||
/// that fits in a `u64` so the distribution is exactly uniform —
|
||
/// modulo-bias would otherwise nudge the corpus picker toward
|
||
/// low-indexed payloads.
|
||
pub fn gen_range(&mut self, upper: usize) -> usize {
|
||
assert!(upper > 0, "SpecRng::gen_range upper bound must be > 0");
|
||
let upper_u64 = upper as u64;
|
||
let zone = u64::MAX - (u64::MAX % upper_u64);
|
||
loop {
|
||
let candidate = self.next_u64();
|
||
if candidate < zone {
|
||
return (candidate % upper_u64) as usize;
|
||
}
|
||
}
|
||
}
|
||
|
||
/// Pick one element from `slice`. Returns `None` only when the
|
||
/// slice is empty so callers can use `?` for empty-corpus paths.
|
||
pub fn choose<'a, T>(&mut self, slice: &'a [T]) -> Option<&'a T> {
|
||
if slice.is_empty() {
|
||
None
|
||
} else {
|
||
Some(&slice[self.gen_range(slice.len())])
|
||
}
|
||
}
|
||
|
||
/// In-place Fisher–Yates shuffle. Useful for the mutation fuzzer
|
||
/// when iterating a payload list in a reproducible order without
|
||
/// pre-sorting in caller code.
|
||
pub fn shuffle<T>(&mut self, slice: &mut [T]) {
|
||
for i in (1..slice.len()).rev() {
|
||
let j = self.gen_range(i + 1);
|
||
slice.swap(i, j);
|
||
}
|
||
}
|
||
}
|
||
|
||
#[cfg(test)]
|
||
mod tests {
|
||
use super::*;
|
||
|
||
#[test]
|
||
fn same_seed_produces_same_stream() {
|
||
let mut a = SpecRng::seeded("deadbeefcafebabe");
|
||
let mut b = SpecRng::seeded("deadbeefcafebabe");
|
||
let mut buf_a = [0u8; 64];
|
||
let mut buf_b = [0u8; 64];
|
||
a.fill_bytes(&mut buf_a);
|
||
b.fill_bytes(&mut buf_b);
|
||
assert_eq!(buf_a, buf_b);
|
||
}
|
||
|
||
#[test]
|
||
fn different_seeds_diverge() {
|
||
let mut a = SpecRng::seeded("aaaa");
|
||
let mut b = SpecRng::seeded("bbbb");
|
||
assert_ne!(a.next_u64(), b.next_u64());
|
||
}
|
||
|
||
#[test]
|
||
fn fill_bytes_crosses_block_boundary() {
|
||
// 80 > BLOCK_BYTES (32) — exercises the refill loop and proves
|
||
// stream continuity across block transitions.
|
||
let mut rng = SpecRng::seeded("boundary");
|
||
let mut a = vec![0u8; 80];
|
||
rng.fill_bytes(&mut a);
|
||
let mut rng2 = SpecRng::seeded("boundary");
|
||
let mut b1 = vec![0u8; 32];
|
||
let mut b2 = vec![0u8; 48];
|
||
rng2.fill_bytes(&mut b1);
|
||
rng2.fill_bytes(&mut b2);
|
||
let mut concat = b1.clone();
|
||
concat.extend_from_slice(&b2);
|
||
assert_eq!(a, concat);
|
||
}
|
||
|
||
#[test]
|
||
fn gen_range_stays_in_bounds() {
|
||
let mut rng = SpecRng::seeded("range");
|
||
for _ in 0..1000 {
|
||
let v = rng.gen_range(7);
|
||
assert!(v < 7);
|
||
}
|
||
}
|
||
|
||
#[test]
|
||
#[should_panic]
|
||
fn gen_range_zero_panics() {
|
||
let mut rng = SpecRng::seeded("range");
|
||
rng.gen_range(0);
|
||
}
|
||
|
||
#[test]
|
||
fn choose_empty_returns_none() {
|
||
let mut rng = SpecRng::seeded("choose");
|
||
let empty: [u32; 0] = [];
|
||
assert!(rng.choose(&empty).is_none());
|
||
}
|
||
|
||
#[test]
|
||
fn choose_is_reproducible() {
|
||
let items = [10u32, 20, 30, 40, 50];
|
||
let mut a = SpecRng::seeded("pick");
|
||
let mut b = SpecRng::seeded("pick");
|
||
for _ in 0..16 {
|
||
assert_eq!(a.choose(&items), b.choose(&items));
|
||
}
|
||
}
|
||
|
||
#[test]
|
||
fn shuffle_is_reproducible() {
|
||
let mut v1: Vec<u32> = (0..20).collect();
|
||
let mut v2 = v1.clone();
|
||
let mut a = SpecRng::seeded("shuffle");
|
||
let mut b = SpecRng::seeded("shuffle");
|
||
a.shuffle(&mut v1);
|
||
b.shuffle(&mut v2);
|
||
assert_eq!(v1, v2);
|
||
}
|
||
|
||
#[test]
|
||
fn clone_forks_substream_reproducibly() {
|
||
// Cloning at any point must produce identical streams from
|
||
// both halves — required so a fuzzer fork (try-this-mutation
|
||
// vs try-that) is hermetic.
|
||
let mut rng = SpecRng::seeded("fork");
|
||
rng.next_u32();
|
||
let mut a = rng.clone();
|
||
let mut b = rng.clone();
|
||
let mut buf_a = [0u8; 48];
|
||
let mut buf_b = [0u8; 48];
|
||
a.fill_bytes(&mut buf_a);
|
||
b.fill_bytes(&mut buf_b);
|
||
assert_eq!(buf_a, buf_b);
|
||
}
|
||
|
||
#[test]
|
||
fn from_seed_bytes_is_deterministic() {
|
||
let seed = [7u8; SEED_BYTES];
|
||
let mut a = SpecRng::from_seed_bytes(seed);
|
||
let mut b = SpecRng::from_seed_bytes(seed);
|
||
assert_eq!(a.next_u64(), b.next_u64());
|
||
}
|
||
}
|