mirror of
https://github.com/elicpeter/nyx.git
synced 2026-06-06 19:35:13 +02:00
217 lines
8.7 KiB
Rust
217 lines
8.7 KiB
Rust
//! Phase 30 (Track N.0) — oracle library consolidation + canary uniqueness
|
||
//! audit.
|
||
//!
|
||
//! Tracks J.1–J.9 seeded their probe-based oracles with a single fixed
|
||
//! sentinel string (`__nyx_canary`). Phase 30 replaces it with a per-spec
|
||
//! [`Canary`] derived from the finding's `spec_hash`, substituted at run time
|
||
//! into the payload bytes, the harness's `NYX_CANARY` environment, and the
|
||
//! oracle match. This test is the build-time guard the plan calls for: it
|
||
//!
|
||
//! 1. enumerates every `ProbePredicate` carried by the const corpus and
|
||
//! asserts each canary-bearing predicate uses exactly
|
||
//! [`Canary::PLACEHOLDER`] (a new ad-hoc literal fails the build);
|
||
//! 2. asserts the runtime [`Canary`] clears the 128-bit entropy floor, is
|
||
//! deterministic within a process, and is collision-free across a large
|
||
//! spec-hash sweep (so distinct findings — and therefore the eval corpora
|
||
//! — never share a canary); and
|
||
//! 3. classifies *every* `ProbePredicate` variant with an exhaustive match,
|
||
//! so adding a new variant without classifying it as canary-bearing or
|
||
//! structural fails to compile here.
|
||
//!
|
||
//! `cargo nextest run --features dynamic --test oracle_canary_audit`.
|
||
|
||
#![cfg(feature = "dynamic")]
|
||
|
||
use std::collections::HashSet;
|
||
|
||
use nyx_scanner::dynamic::corpus::CORPUS;
|
||
use nyx_scanner::dynamic::oracle::{Canary, Oracle, ProbePredicate};
|
||
|
||
/// Classify a predicate as canary-bearing (returns its stored canary token)
|
||
/// or structural (returns `None`).
|
||
///
|
||
/// The match is intentionally exhaustive with no `_` arm: a new
|
||
/// `ProbePredicate` variant added to the library forces a classification
|
||
/// decision here, which is the Phase 30 guard that "CI fails the build if a
|
||
/// new ad-hoc canary lands". Structural predicates carry header names,
|
||
/// allowlists, thresholds, or needles — intentionally low-entropy, public
|
||
/// values that are *not* secret sentinels and must not be treated as
|
||
/// canaries.
|
||
fn canary_token(p: &ProbePredicate) -> Option<&str> {
|
||
match p {
|
||
// The one secret-sentinel predicate: its `canary` is the property a
|
||
// prototype-pollution sink writes onto `Object.prototype` and the
|
||
// oracle matches against the drained probe.
|
||
ProbePredicate::PrototypeCanaryTouched { canary } => Some(canary),
|
||
|
||
// Structural predicates — no secret sentinel.
|
||
ProbePredicate::ArgContains { .. }
|
||
| ProbePredicate::ArgEquals { .. }
|
||
| ProbePredicate::AnyArgContains(_)
|
||
| ProbePredicate::CalleeEquals(_)
|
||
| ProbePredicate::MinArgs(_)
|
||
| ProbePredicate::StubEventMatches { .. }
|
||
| ProbePredicate::DeserializeGadgetInvoked { .. }
|
||
| ProbePredicate::TemplateEvalEqual { .. }
|
||
| ProbePredicate::XxeEntityExpanded { .. }
|
||
| ProbePredicate::HeaderInjected { .. }
|
||
| ProbePredicate::HeaderSmuggledInWire { .. }
|
||
| ProbePredicate::RedirectHostNotIn { .. }
|
||
| ProbePredicate::WeakKeyEntropy { .. }
|
||
| ProbePredicate::IdorBoundaryCrossed
|
||
| ProbePredicate::OutboundHostNotIn { .. }
|
||
| ProbePredicate::QueryResultCountGreaterThan { .. }
|
||
| ProbePredicate::JsonParseExcessiveDepth { .. } => None,
|
||
}
|
||
}
|
||
|
||
/// Visit every `ProbePredicate` the corpus carries — both the active
|
||
/// `Oracle::SinkProbe { predicates }` slice and the parallel
|
||
/// `CuratedPayload::probe_predicates` slice — for every `(cap, lang)` entry.
|
||
fn for_each_corpus_predicate(
|
||
mut visit: impl FnMut(&str /*label*/, &[u8] /*bytes*/, &ProbePredicate),
|
||
) {
|
||
for &(_cap, _lang, slice) in CORPUS.entries {
|
||
for payload in slice {
|
||
if let Oracle::SinkProbe { predicates } = &payload.oracle {
|
||
for p in *predicates {
|
||
visit(payload.label, payload.bytes, p);
|
||
}
|
||
}
|
||
for p in payload.probe_predicates {
|
||
visit(payload.label, payload.bytes, p);
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
/// No corpus predicate may carry an ad-hoc canary literal: every
|
||
/// canary-bearing predicate must reference [`Canary::PLACEHOLDER`], and the
|
||
/// owning payload's bytes must embed that placeholder so the runner's
|
||
/// run-time substitution actually has a token to rewrite.
|
||
#[test]
|
||
fn corpus_canaries_use_placeholder_and_are_substitutable() {
|
||
let mut canary_predicates = 0usize;
|
||
for_each_corpus_predicate(|label, bytes, p| {
|
||
let Some(token) = canary_token(p) else {
|
||
return;
|
||
};
|
||
canary_predicates += 1;
|
||
assert_eq!(
|
||
token,
|
||
Canary::PLACEHOLDER,
|
||
"payload {label:?} carries an ad-hoc canary literal {token:?}; \
|
||
canary-bearing predicates must use Canary::PLACEHOLDER so the \
|
||
runner can substitute a per-spec canary",
|
||
);
|
||
let needle = Canary::PLACEHOLDER.as_bytes();
|
||
let embedded = bytes.windows(needle.len()).any(|w| w == needle);
|
||
assert!(
|
||
embedded,
|
||
"payload {label:?} carries a PrototypeCanaryTouched predicate but \
|
||
its bytes do not embed Canary::PLACEHOLDER ({:?}); run-time \
|
||
substitution would have nothing to rewrite and the harness trap \
|
||
would never match",
|
||
Canary::PLACEHOLDER,
|
||
);
|
||
});
|
||
// Sanity: the prototype-pollution + json_parse slices contribute these,
|
||
// so the audit must actually have inspected some. A zero here means the
|
||
// corpus walk silently stopped finding canary predicates.
|
||
assert!(
|
||
canary_predicates > 0,
|
||
"expected at least one canary-bearing predicate in the corpus",
|
||
);
|
||
}
|
||
|
||
/// A generated canary is 32 bytes / 256 bits; its rendered form is 64
|
||
/// lowercase-hex characters, clears the 128-bit floor, and is deterministic
|
||
/// within a process (the runner derives it twice — once for the harness env,
|
||
/// once for the oracle — and the two must agree).
|
||
#[test]
|
||
fn canary_entropy_and_determinism() {
|
||
const { assert!(Canary::ENTROPY_BITS >= 128) };
|
||
|
||
let bytes = Canary::generate("spec-hash-under-audit");
|
||
assert_eq!(bytes.len(), 32, "canary is 256 bits of BLAKE3 output");
|
||
|
||
let rendered = Canary::render(&bytes);
|
||
assert_eq!(rendered.len(), 64, "render encodes all 32 bytes as hex");
|
||
assert!(
|
||
rendered.len() * 4 >= 128,
|
||
"rendered canary must carry at least 128 bits",
|
||
);
|
||
assert!(
|
||
rendered
|
||
.bytes()
|
||
.all(|b| b.is_ascii_hexdigit() && !b.is_ascii_uppercase()),
|
||
"rendered canary must be lowercase hex (safe as a JSON key / JS \
|
||
property / header token): {rendered}",
|
||
);
|
||
|
||
// Deterministic within the process.
|
||
assert_eq!(bytes, Canary::generate("spec-hash-under-audit"));
|
||
assert_eq!(
|
||
Canary::for_spec("spec-hash-under-audit"),
|
||
Canary::for_spec("spec-hash-under-audit"),
|
||
);
|
||
|
||
// Not a fixed string: the rendered canary differs from the historical
|
||
// placeholder sentinel.
|
||
assert_ne!(Canary::for_spec("anything"), Canary::PLACEHOLDER);
|
||
}
|
||
|
||
/// Distinct findings get distinct canaries: a large sweep of distinct
|
||
/// `spec_hash` values produces no collisions. This is the "no oracle
|
||
/// collision in any of the eval corpora" guarantee — every finding in a run
|
||
/// has a unique `spec_hash`, hence a unique canary, hence one finding's probe
|
||
/// record can never satisfy another's oracle.
|
||
#[test]
|
||
fn canary_is_collision_free_across_spec_hash_sweep() {
|
||
let mut seen = HashSet::new();
|
||
let n = 50_000u32;
|
||
for i in 0..n {
|
||
// Vary the hash shape the way real spec hashes do (16 hex chars) plus
|
||
// a few longer forms to exercise the input space.
|
||
let spec_hash = format!("{i:016x}");
|
||
let canary = Canary::for_spec(&spec_hash);
|
||
assert!(
|
||
seen.insert(canary),
|
||
"canary collision at spec_hash {spec_hash}",
|
||
);
|
||
}
|
||
assert_eq!(
|
||
seen.len() as u32,
|
||
n,
|
||
"every spec_hash produced a unique canary"
|
||
);
|
||
}
|
||
|
||
/// The byte output of `generate` exercises the full space: across many
|
||
/// samples every byte position takes both low and high values, so no position
|
||
/// is stuck (a coarse but effective check that the BLAKE3 mixing is wired up
|
||
/// rather than, say, a zero-fill).
|
||
#[test]
|
||
fn canary_byte_positions_are_not_stuck() {
|
||
let mut saw_low = [false; 32];
|
||
let mut saw_high = [false; 32];
|
||
for i in 0..512u32 {
|
||
let b = Canary::generate(&format!("stuck-check-{i}"));
|
||
for (pos, byte) in b.iter().enumerate() {
|
||
if *byte < 0x40 {
|
||
saw_low[pos] = true;
|
||
}
|
||
if *byte >= 0xc0 {
|
||
saw_high[pos] = true;
|
||
}
|
||
}
|
||
}
|
||
for pos in 0..32 {
|
||
assert!(
|
||
saw_low[pos] && saw_high[pos],
|
||
"byte position {pos} looks stuck (low={}, high={})",
|
||
saw_low[pos],
|
||
saw_high[pos],
|
||
);
|
||
}
|
||
}
|