nyx/src/dynamic/rand.rs

280 lines
9.7 KiB
Rust
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

//! Deterministic seeded RNG for the dynamic layer (Phase 30 — Track C
//! determinism audit).
//!
//! Every randomness source in [`crate::dynamic`] must route through
//! [`SpecRng`] so identical inputs (spec hash + corpus version) produce
//! identical sandbox runs. Non-determinism inside the verifier breaks
//! the Phase 27 `events.jsonl` replay invariant, the Phase 28 repro
//! bundle hermeticity contract, and the Phase 29 per-cell budget gates.
//!
//! The implementation is intentionally minimal:
//!
//! * No external RNG crate — blake3 is the project's hashing primitive
//! and an extra `rand`/`rand_chacha` dep would expand the supply-chain
//! surface for no gain.
//! * Output stream is a SHAKE-style hash chain: every 32-byte block is
//! `blake3(seed || counter_le)`, with the counter incremented after
//! each block. Throughput is dwarfed by sandbox / build cost so any
//! added cycles compared to a CSPRNG do not show up in
//! `benches/dynamic_bench.rs`.
//! * No `Send`/thread-local state — callers thread the [`SpecRng`]
//! explicitly so a fork in control flow always produces a fresh,
//! reproducible substream. Mutation fuzzers can clone the RNG before
//! forking to keep both branches reproducible.
//!
//! # Audit gate
//!
//! `scripts/check_no_unseeded_rand.sh` greps `src/dynamic/` for the
//! banned non-deterministic APIs (`rand::thread_rng`, `OsRng`,
//! `from_entropy`, `getrandom::getrandom`, `Uuid::new_v4`, `fastrand`).
//! Any match exits the script non-zero so CI catches regressions before
//! they land. The seccomp policy file is allowed to mention
//! `"getrandom"` because that string is a syscall name, not a Rust API
//! call; the audit script's regex filters that case out.
use blake3::Hasher;
/// Length of the seed mixed into every block of the RNG stream. 32
/// bytes = full blake3 output width; using anything smaller would lose
/// entropy if a caller passes a longer spec hash.
const SEED_BYTES: usize = 32;
/// Width of a single hash-chain block. Matches blake3's natural output
/// length so we never have to truncate or extend.
const BLOCK_BYTES: usize = 32;
/// Deterministic pseudo-random number generator keyed by a spec hash.
///
/// Construct via [`SpecRng::seeded`] (the standard entry point used by
/// every verifier call site) or [`SpecRng::from_seed_bytes`] (for tests
/// that need to pin the seed independently of a spec).
///
/// The same seed always produces the same byte stream, so any consumer
/// inside [`crate::dynamic`] that needs randomness (mutation fuzzer
/// payload choice, environment variable jitter, stub port jitter, …)
/// gets a reproducible roll without leaking host entropy into the
/// verdict.
#[derive(Debug, Clone)]
pub struct SpecRng {
seed: [u8; SEED_BYTES],
counter: u64,
buf: [u8; BLOCK_BYTES],
buf_pos: usize,
}
impl SpecRng {
/// Seed an RNG from a spec hash hex string.
///
/// The hex prefix is hashed with blake3 to normalise it to 32 bytes
/// — callers may pass the short 16-hex-char spec hash (the form
/// stamped onto [`crate::dynamic::spec::HarnessSpec::spec_hash`])
/// or a longer derivation; both produce a full-width seed.
pub fn seeded(spec_hash: &str) -> Self {
let mut h = Hasher::new();
h.update(b"nyx.dynamic.rand.v1\0");
h.update(spec_hash.as_bytes());
let mut seed = [0u8; SEED_BYTES];
seed.copy_from_slice(h.finalize().as_bytes());
Self::from_seed_bytes(seed)
}
/// Seed from raw bytes. Exposed for tests that need a known seed
/// without round-tripping through a spec hash.
pub fn from_seed_bytes(seed: [u8; SEED_BYTES]) -> Self {
Self {
seed,
counter: 0,
buf: [0u8; BLOCK_BYTES],
buf_pos: BLOCK_BYTES,
}
}
/// Refill the internal buffer with the next block of the hash
/// chain. Called lazily as bytes are consumed.
fn refill(&mut self) {
let mut h = Hasher::new();
h.update(&self.seed);
h.update(&self.counter.to_le_bytes());
let digest = h.finalize();
self.buf.copy_from_slice(digest.as_bytes());
self.counter = self.counter.wrapping_add(1);
self.buf_pos = 0;
}
/// Fill `out` with deterministic pseudo-random bytes.
pub fn fill_bytes(&mut self, out: &mut [u8]) {
let mut written = 0;
while written < out.len() {
if self.buf_pos == BLOCK_BYTES {
self.refill();
}
let take = (out.len() - written).min(BLOCK_BYTES - self.buf_pos);
out[written..written + take]
.copy_from_slice(&self.buf[self.buf_pos..self.buf_pos + take]);
self.buf_pos += take;
written += take;
}
}
/// Draw the next `u64` from the stream. Used by the rejection
/// loop in [`Self::gen_range`].
pub fn next_u64(&mut self) -> u64 {
let mut buf = [0u8; 8];
self.fill_bytes(&mut buf);
u64::from_le_bytes(buf)
}
/// Draw a `u32`. Convenience for callers picking among small
/// alternatives (payload variants, env mutation slots).
pub fn next_u32(&mut self) -> u32 {
(self.next_u64() & 0xFFFF_FFFF) as u32
}
/// Sample a `usize` uniformly in `[0, upper)`. Panics when
/// `upper == 0` because the request is meaningless; callers should
/// guard zero-length slices.
///
/// Uses rejection sampling against the largest multiple of `upper`
/// that fits in a `u64` so the distribution is exactly uniform —
/// modulo-bias would otherwise nudge the corpus picker toward
/// low-indexed payloads.
pub fn gen_range(&mut self, upper: usize) -> usize {
assert!(upper > 0, "SpecRng::gen_range upper bound must be > 0");
let upper_u64 = upper as u64;
let zone = u64::MAX - (u64::MAX % upper_u64);
loop {
let candidate = self.next_u64();
if candidate < zone {
return (candidate % upper_u64) as usize;
}
}
}
/// Pick one element from `slice`. Returns `None` only when the
/// slice is empty so callers can use `?` for empty-corpus paths.
pub fn choose<'a, T>(&mut self, slice: &'a [T]) -> Option<&'a T> {
if slice.is_empty() {
None
} else {
Some(&slice[self.gen_range(slice.len())])
}
}
/// In-place FisherYates shuffle. Useful for the mutation fuzzer
/// when iterating a payload list in a reproducible order without
/// pre-sorting in caller code.
pub fn shuffle<T>(&mut self, slice: &mut [T]) {
for i in (1..slice.len()).rev() {
let j = self.gen_range(i + 1);
slice.swap(i, j);
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn same_seed_produces_same_stream() {
let mut a = SpecRng::seeded("deadbeefcafebabe");
let mut b = SpecRng::seeded("deadbeefcafebabe");
let mut buf_a = [0u8; 64];
let mut buf_b = [0u8; 64];
a.fill_bytes(&mut buf_a);
b.fill_bytes(&mut buf_b);
assert_eq!(buf_a, buf_b);
}
#[test]
fn different_seeds_diverge() {
let mut a = SpecRng::seeded("aaaa");
let mut b = SpecRng::seeded("bbbb");
assert_ne!(a.next_u64(), b.next_u64());
}
#[test]
fn fill_bytes_crosses_block_boundary() {
// 80 > BLOCK_BYTES (32) — exercises the refill loop and proves
// stream continuity across block transitions.
let mut rng = SpecRng::seeded("boundary");
let mut a = vec![0u8; 80];
rng.fill_bytes(&mut a);
let mut rng2 = SpecRng::seeded("boundary");
let mut b1 = vec![0u8; 32];
let mut b2 = vec![0u8; 48];
rng2.fill_bytes(&mut b1);
rng2.fill_bytes(&mut b2);
let mut concat = b1.clone();
concat.extend_from_slice(&b2);
assert_eq!(a, concat);
}
#[test]
fn gen_range_stays_in_bounds() {
let mut rng = SpecRng::seeded("range");
for _ in 0..1000 {
let v = rng.gen_range(7);
assert!(v < 7);
}
}
#[test]
#[should_panic]
fn gen_range_zero_panics() {
let mut rng = SpecRng::seeded("range");
rng.gen_range(0);
}
#[test]
fn choose_empty_returns_none() {
let mut rng = SpecRng::seeded("choose");
let empty: [u32; 0] = [];
assert!(rng.choose(&empty).is_none());
}
#[test]
fn choose_is_reproducible() {
let items = [10u32, 20, 30, 40, 50];
let mut a = SpecRng::seeded("pick");
let mut b = SpecRng::seeded("pick");
for _ in 0..16 {
assert_eq!(a.choose(&items), b.choose(&items));
}
}
#[test]
fn shuffle_is_reproducible() {
let mut v1: Vec<u32> = (0..20).collect();
let mut v2 = v1.clone();
let mut a = SpecRng::seeded("shuffle");
let mut b = SpecRng::seeded("shuffle");
a.shuffle(&mut v1);
b.shuffle(&mut v2);
assert_eq!(v1, v2);
}
#[test]
fn clone_forks_substream_reproducibly() {
// Cloning at any point must produce identical streams from
// both halves — required so a fuzzer fork (try-this-mutation
// vs try-that) is hermetic.
let mut rng = SpecRng::seeded("fork");
rng.next_u32();
let mut a = rng.clone();
let mut b = rng.clone();
let mut buf_a = [0u8; 48];
let mut buf_b = [0u8; 48];
a.fill_bytes(&mut buf_a);
b.fill_bytes(&mut buf_b);
assert_eq!(buf_a, buf_b);
}
#[test]
fn from_seed_bytes_is_deterministic() {
let seed = [7u8; SEED_BYTES];
let mut a = SpecRng::from_seed_bytes(seed);
let mut b = SpecRng::from_seed_bytes(seed);
assert_eq!(a.next_u64(), b.next_u64());
}
}