mirror of
https://github.com/elicpeter/nyx.git
synced 2026-06-12 19:55:14 +02:00
feat(dynamic): replace fixed canary with per-spec cryptographically-random canary for enhanced security
This commit is contained in:
parent
e0833537e4
commit
6bca7a7c97
10 changed files with 569 additions and 28 deletions
|
|
@ -10,9 +10,9 @@
|
|||
//! (`NYX_JSON_DEEP` vs `NYX_JSON_SHALLOW`) picks the branch.
|
||||
|
||||
use super::super::{CuratedPayload, Oracle, PayloadProvenance, PayloadRef};
|
||||
use crate::dynamic::oracle::ProbePredicate;
|
||||
use crate::dynamic::oracle::{Canary, ProbePredicate};
|
||||
|
||||
const CANARY: &str = "__nyx_canary";
|
||||
const CANARY: &str = Canary::PLACEHOLDER;
|
||||
const MAX_DEPTH: u32 = 64;
|
||||
|
||||
pub const PAYLOADS: &[CuratedPayload] = &[
|
||||
|
|
|
|||
|
|
@ -5,9 +5,9 @@
|
|||
//! vulnerable and benign attempts.
|
||||
|
||||
use super::super::{CuratedPayload, Oracle, PayloadProvenance, PayloadRef};
|
||||
use crate::dynamic::oracle::ProbePredicate;
|
||||
use crate::dynamic::oracle::{Canary, ProbePredicate};
|
||||
|
||||
const CANARY: &str = "__nyx_canary";
|
||||
const CANARY: &str = Canary::PLACEHOLDER;
|
||||
const MAX_DEPTH: u32 = 64;
|
||||
|
||||
pub const PAYLOADS: &[CuratedPayload] = &[
|
||||
|
|
|
|||
|
|
@ -9,9 +9,9 @@
|
|||
//! (`NYX_JSON_DEEP` vs `NYX_JSON_SHALLOW`) picks the branch.
|
||||
|
||||
use super::super::{CuratedPayload, Oracle, PayloadProvenance, PayloadRef};
|
||||
use crate::dynamic::oracle::ProbePredicate;
|
||||
use crate::dynamic::oracle::{Canary, ProbePredicate};
|
||||
|
||||
const CANARY: &str = "__nyx_canary";
|
||||
const CANARY: &str = Canary::PLACEHOLDER;
|
||||
const MAX_DEPTH: u32 = 64;
|
||||
|
||||
pub const PAYLOADS: &[CuratedPayload] = &[
|
||||
|
|
|
|||
|
|
@ -22,9 +22,9 @@
|
|||
//! in the payload bytes.
|
||||
|
||||
use super::super::{CuratedPayload, Oracle, PayloadProvenance, PayloadRef};
|
||||
use crate::dynamic::oracle::ProbePredicate;
|
||||
use crate::dynamic::oracle::{Canary, ProbePredicate};
|
||||
|
||||
const CANARY: &str = "__nyx_canary";
|
||||
const CANARY: &str = Canary::PLACEHOLDER;
|
||||
|
||||
pub const PAYLOADS: &[CuratedPayload] = &[
|
||||
CuratedPayload {
|
||||
|
|
|
|||
|
|
@ -8,9 +8,9 @@
|
|||
//! fixtures the static-analysis side consumes).
|
||||
|
||||
use super::super::{CuratedPayload, Oracle, PayloadProvenance, PayloadRef};
|
||||
use crate::dynamic::oracle::ProbePredicate;
|
||||
use crate::dynamic::oracle::{Canary, ProbePredicate};
|
||||
|
||||
const CANARY: &str = "__nyx_canary";
|
||||
const CANARY: &str = Canary::PLACEHOLDER;
|
||||
|
||||
pub const PAYLOADS: &[CuratedPayload] = &[
|
||||
CuratedPayload {
|
||||
|
|
|
|||
|
|
@ -2635,7 +2635,13 @@ pub fn emit_prototype_pollution_harness(_spec: &HarnessSpec) -> HarnessSource {
|
|||
r#"// Nyx dynamic harness — PROTOTYPE_POLLUTION canary trap (Phase 10 / Track J.8).
|
||||
{shim}
|
||||
|
||||
const NYX_PP_CANARY = '__nyx_canary';
|
||||
// Per-spec canary (Phase 30 / Track N.0): the runner derives a
|
||||
// cryptographically-random, per-`spec_hash` canary and passes it in via
|
||||
// NYX_CANARY, substituting the same value into the payload bytes and the
|
||||
// oracle match. The '__nyx_canary' fallback keeps this source a
|
||||
// deterministic function of the spec (cache-safe) and preserves the
|
||||
// legacy behaviour for any path that does not set the env var.
|
||||
const NYX_PP_CANARY = process.env.NYX_CANARY || '__nyx_canary';
|
||||
|
||||
function nyxPrototypePollutionProbe(value) {{
|
||||
const p = process.env.NYX_PROBE_PATH;
|
||||
|
|
|
|||
|
|
@ -342,10 +342,15 @@ pub enum ProbePredicate {
|
|||
/// [`Self::RedirectHostNotIn`] — evaluated across every drained
|
||||
/// probe rather than against a single record.
|
||||
PrototypeCanaryTouched {
|
||||
/// Canary property name the harness installed on
|
||||
/// `Object.prototype` (typically `"__nyx_canary"`). Compared
|
||||
/// case-sensitively against
|
||||
/// Canary property name, compared case-sensitively against
|
||||
/// [`ProbeKind::PrototypePollution::property`].
|
||||
///
|
||||
/// The const corpus stores only [`Canary::PLACEHOLDER`] here; at
|
||||
/// run time [`oracle_fired_full`] is handed the per-spec
|
||||
/// [`Canary`] the runner substituted into the payload bytes and
|
||||
/// the harness's `NYX_CANARY` environment, and matches against
|
||||
/// that instead — so this field is the low-entropy placeholder,
|
||||
/// never the value actually compared in production.
|
||||
canary: &'static str,
|
||||
},
|
||||
/// Phase 11 (Track J.9): CRYPTO weak-key entropy predicate.
|
||||
|
|
@ -521,12 +526,43 @@ pub fn oracle_fired(oracle: &Oracle, outcome: &SandboxOutcome, probes: &[SinkPro
|
|||
/// scope. See [`Oracle::StubEvent`] for the semantics of the new
|
||||
/// branch and [`ProbePredicate::StubEventMatches`] for the new
|
||||
/// `Oracle::SinkProbe` cross-cutting predicate.
|
||||
#[allow(deprecated)]
|
||||
///
|
||||
/// Thin wrapper over [`oracle_fired_full`] with no per-spec canary —
|
||||
/// every [`ProbePredicate::PrototypeCanaryTouched`] matches against the
|
||||
/// const corpus's stored [`Canary::PLACEHOLDER`] token. Production
|
||||
/// callers in the runner use [`oracle_fired_full`] with the per-spec
|
||||
/// canary; this entry point is preserved for tests and pre-Phase-30
|
||||
/// callers.
|
||||
pub fn oracle_fired_with_stubs(
|
||||
oracle: &Oracle,
|
||||
outcome: &SandboxOutcome,
|
||||
probes: &[SinkProbe],
|
||||
stub_events: &[StubEvent],
|
||||
) -> bool {
|
||||
oracle_fired_full(oracle, outcome, probes, stub_events, None)
|
||||
}
|
||||
|
||||
/// Phase 30 (Track N.0): evaluate an oracle with the per-spec
|
||||
/// verification [`Canary`] threaded in.
|
||||
///
|
||||
/// When `canary` is `Some`, every
|
||||
/// [`ProbePredicate::PrototypeCanaryTouched`] matches the drained probe's
|
||||
/// `property` against the runtime canary the runner derived from the
|
||||
/// finding's `spec_hash` and substituted into the payload bytes + the
|
||||
/// harness's `NYX_CANARY` environment — rather than the const corpus's
|
||||
/// low-entropy [`Canary::PLACEHOLDER`] token. Keying the match on a
|
||||
/// per-spec value means a probe record left over from one finding's run
|
||||
/// (or ambient harness output that happens to mention the historical
|
||||
/// `__nyx_canary` sentinel) can never satisfy a different finding's
|
||||
/// oracle. `None` keeps the placeholder-match path for unit tests and
|
||||
/// any caller that has not derived a per-spec canary.
|
||||
#[allow(deprecated)]
|
||||
pub fn oracle_fired_full(
|
||||
oracle: &Oracle,
|
||||
outcome: &SandboxOutcome,
|
||||
probes: &[SinkProbe],
|
||||
stub_events: &[StubEvent],
|
||||
canary: Option<&str>,
|
||||
) -> bool {
|
||||
match oracle {
|
||||
Oracle::SinkProbe { predicates } => {
|
||||
|
|
@ -635,9 +671,9 @@ pub fn oracle_fired_with_stubs(
|
|||
// [`ProbeKind::PrototypePollution`] record whose
|
||||
// `property` matches the canary name.
|
||||
let canary_ok = cross.iter().all(|p| match p {
|
||||
ProbePredicate::PrototypeCanaryTouched { canary } => {
|
||||
probes_satisfy_prototype_canary(probes, canary)
|
||||
}
|
||||
ProbePredicate::PrototypeCanaryTouched {
|
||||
canary: placeholder,
|
||||
} => probes_satisfy_prototype_canary(probes, canary.unwrap_or(placeholder)),
|
||||
_ => true,
|
||||
});
|
||||
if !canary_ok {
|
||||
|
|
@ -1212,6 +1248,140 @@ pub fn probe_crash_signal(probe: &SinkProbe) -> Option<Signal> {
|
|||
}
|
||||
}
|
||||
|
||||
/// Per-spec verification canary (Phase 30 — Track N.0).
|
||||
///
|
||||
/// Tracks J.1–J.9 (phases 03–11) seeded their probe-based oracles with a
|
||||
/// single fixed sentinel string, `__nyx_canary`: the *same* low-entropy
|
||||
/// token appeared in every spec's payload bytes, every prototype-pollution
|
||||
/// harness's setter trap, and every
|
||||
/// [`ProbePredicate::PrototypeCanaryTouched`] in the const corpus. A fixed
|
||||
/// token is wrong on three counts the plan calls out: it is (a) not
|
||||
/// cryptographically random, (b) not collision-resistant against ambient
|
||||
/// harness output (anything that prints `__nyx_canary` matches), and (c) not
|
||||
/// per-spec — a probe record left in a reused workdir from one finding's run
|
||||
/// could satisfy a different finding's oracle.
|
||||
///
|
||||
/// `Canary` replaces it with a value derived per finding from the finding's
|
||||
/// [`spec_hash`](crate::dynamic::spec::HarnessSpec::spec_hash) and a
|
||||
/// process-global run nonce. The const corpus carries only the
|
||||
/// [`PLACEHOLDER`](Canary::PLACEHOLDER) token; the runner computes the real
|
||||
/// canary once per spec via [`generate`](Canary::generate) +
|
||||
/// [`render`](Canary::render) and substitutes it into (1) the payload bytes,
|
||||
/// (2) the harness's `NYX_CANARY` environment variable, and (3) the oracle
|
||||
/// match (threaded through [`oracle_fired_full`]). All three agree on the
|
||||
/// same per-spec value at run time while the corpus source stays
|
||||
/// `const`-declarable.
|
||||
///
|
||||
/// The verdict never depends on the canary's *value* — only on whether the
|
||||
/// pollution reached it — so deriving it from a fresh run nonce does not
|
||||
/// break the engine's rerun-determinism contract (identical inputs still
|
||||
/// produce identical verdicts).
|
||||
pub struct Canary;
|
||||
|
||||
impl Canary {
|
||||
/// Placeholder token embedded in the const corpus: payload byte
|
||||
/// literals, the `canary` field of
|
||||
/// [`ProbePredicate::PrototypeCanaryTouched`], and the per-language
|
||||
/// harness's `NYX_CANARY` fallback. Substituted with a per-spec
|
||||
/// [`render`](Canary::render)ed value at run time.
|
||||
///
|
||||
/// Kept byte-for-byte equal to the historical `__nyx_canary` sentinel so
|
||||
/// legacy fixtures, the harness env fallback, and the colocated unit
|
||||
/// tests that exercise the placeholder-match path keep resolving. The
|
||||
/// Phase 30 audit (`tests/oracle_canary_audit.rs`) asserts every
|
||||
/// canary-bearing predicate in the corpus uses exactly this constant, so
|
||||
/// a new ad-hoc literal fails the build.
|
||||
pub const PLACEHOLDER: &'static str = "__nyx_canary";
|
||||
|
||||
/// Bits of entropy a [`render`](Canary::render)ed canary carries.
|
||||
///
|
||||
/// [`generate`](Canary::generate) returns 32 bytes and `render` encodes
|
||||
/// every byte, so a rendered canary is 256 bits — comfortably above the
|
||||
/// 128-bit floor the Phase 30 audit enforces.
|
||||
pub const ENTROPY_BITS: u32 = 256;
|
||||
|
||||
/// Derive a 32-byte canary for the finding identified by `spec_hash`.
|
||||
///
|
||||
/// `BLAKE3("nyx.dynamic.canary.v1" ‖ run_nonce ‖ spec_hash)`. The
|
||||
/// [`run_nonce`] is a process-global value seeded once from the OS
|
||||
/// CSPRNG (mixed with time + pid as a fallback), so two runs of the same
|
||||
/// spec draw different canaries and a stale probe record cannot satisfy a
|
||||
/// later run. Keying on `spec_hash` gives every finding in a single run
|
||||
/// a distinct canary, so one finding's canary can never collide with
|
||||
/// another's. Deterministic within a process — the audit relies on this.
|
||||
pub fn generate(spec_hash: &str) -> [u8; 32] {
|
||||
let mut h = blake3::Hasher::new();
|
||||
h.update(b"nyx.dynamic.canary.v1\0");
|
||||
h.update(&run_nonce());
|
||||
h.update(b"\0");
|
||||
h.update(spec_hash.as_bytes());
|
||||
*h.finalize().as_bytes()
|
||||
}
|
||||
|
||||
/// Render a generated canary as a 64-character lowercase-hex token.
|
||||
///
|
||||
/// Hex keeps the canary safe to embed verbatim as a JSON object key, a
|
||||
/// JavaScript property name, and a header / filter token without
|
||||
/// escaping. Every byte is encoded, so the token carries the full
|
||||
/// [`ENTROPY_BITS`](Canary::ENTROPY_BITS).
|
||||
pub fn render(bytes: &[u8; 32]) -> String {
|
||||
let mut s = String::with_capacity(bytes.len() * 2);
|
||||
for b in bytes {
|
||||
s.push(char::from_digit((b >> 4) as u32, 16).unwrap());
|
||||
s.push(char::from_digit((b & 0x0f) as u32, 16).unwrap());
|
||||
}
|
||||
s
|
||||
}
|
||||
|
||||
/// Convenience: the per-spec canary already rendered to its run-time
|
||||
/// string form. Equivalent to `render(&generate(spec_hash))`.
|
||||
pub fn for_spec(spec_hash: &str) -> String {
|
||||
Self::render(&Self::generate(spec_hash))
|
||||
}
|
||||
}
|
||||
|
||||
/// Process-global run nonce backing [`Canary::generate`].
|
||||
///
|
||||
/// Seeded once, lazily, from the OS CSPRNG (`/dev/urandom` on Unix) mixed
|
||||
/// with the wall clock, pid, and a counter so the value is fresh per process
|
||||
/// but stable within it. The fallback mixing guarantees a non-repeating seed
|
||||
/// even when no CSPRNG source is reachable.
|
||||
fn run_nonce() -> [u8; 32] {
|
||||
use std::sync::OnceLock;
|
||||
static RUN_NONCE: OnceLock<[u8; 32]> = OnceLock::new();
|
||||
*RUN_NONCE.get_or_init(|| {
|
||||
let mut h = blake3::Hasher::new();
|
||||
h.update(b"nyx.dynamic.run_nonce.v1\0");
|
||||
let mut os = [0u8; 32];
|
||||
if read_os_entropy(&mut os) {
|
||||
h.update(&os);
|
||||
}
|
||||
// Always mix time + pid + a counter so a missing or blocked CSPRNG
|
||||
// still yields a fresh, non-repeating seed.
|
||||
if let Ok(d) = std::time::SystemTime::now().duration_since(std::time::UNIX_EPOCH) {
|
||||
h.update(&d.as_nanos().to_le_bytes());
|
||||
}
|
||||
h.update(&(std::process::id() as u64).to_le_bytes());
|
||||
static CTR: std::sync::atomic::AtomicU64 = std::sync::atomic::AtomicU64::new(0);
|
||||
let c = CTR.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
|
||||
h.update(&c.to_le_bytes());
|
||||
*h.finalize().as_bytes()
|
||||
})
|
||||
}
|
||||
|
||||
/// Fill `buf` from the OS CSPRNG. Returns `false` (caller falls back to the
|
||||
/// time + pid mixing) when no source is available on the platform.
|
||||
fn read_os_entropy(buf: &mut [u8]) -> bool {
|
||||
#[cfg(unix)]
|
||||
{
|
||||
use std::io::Read;
|
||||
if let Ok(mut f) = std::fs::File::open("/dev/urandom") {
|
||||
return f.read_exact(buf).is_ok();
|
||||
}
|
||||
}
|
||||
false
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
|
@ -1829,4 +1999,93 @@ mod tests {
|
|||
let probes = vec![header_emit_probe("Set-Cookie", "noise")];
|
||||
assert!(!oracle_fired(&oracle, &outcome(), &probes));
|
||||
}
|
||||
|
||||
// ── Phase 30 (Track N.0): per-spec canary ───────────────────────────
|
||||
|
||||
#[test]
|
||||
fn canary_generate_is_deterministic_within_process() {
|
||||
let a = Canary::generate("deadbeefcafe0001");
|
||||
let b = Canary::generate("deadbeefcafe0001");
|
||||
assert_eq!(a, b, "same spec_hash must yield the same canary in-process");
|
||||
assert_eq!(Canary::for_spec("h"), Canary::for_spec("h"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn canary_render_is_64_lowercase_hex() {
|
||||
let bytes = Canary::generate("spec-hash-xyz");
|
||||
assert_eq!(bytes.len(), 32, "canary is 32 bytes / 256 bits");
|
||||
let r = Canary::render(&bytes);
|
||||
assert_eq!(r.len(), 64, "render encodes every byte as two hex digits");
|
||||
assert!(
|
||||
r.bytes()
|
||||
.all(|b| b.is_ascii_hexdigit() && !b.is_ascii_uppercase()),
|
||||
"render must be lowercase hex: {r}",
|
||||
);
|
||||
assert!(Canary::ENTROPY_BITS >= 128);
|
||||
assert!(r.len() * 4 >= 128, "rendered canary clears the 128-bit floor");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn canary_distinct_spec_hashes_yield_distinct_canaries() {
|
||||
assert_ne!(Canary::for_spec("aaaa"), Canary::for_spec("bbbb"));
|
||||
// No collisions across a large sweep of distinct spec hashes:
|
||||
// distinct findings always get distinct canaries.
|
||||
let mut seen = std::collections::HashSet::new();
|
||||
for i in 0..4096u32 {
|
||||
let sh = format!("{i:016x}");
|
||||
assert!(
|
||||
seen.insert(Canary::for_spec(&sh)),
|
||||
"canary collision at spec_hash {sh}",
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn oracle_full_canary_override_matches_runtime_property_not_placeholder() {
|
||||
// The corpus predicate stores only the placeholder; the runner
|
||||
// supplies the per-spec canary. A probe whose `property` is the
|
||||
// runtime canary must fire under the override and NOT under the
|
||||
// stale placeholder.
|
||||
let runtime = Canary::for_spec("phase30-spec");
|
||||
let oracle = Oracle::SinkProbe {
|
||||
predicates: &[ProbePredicate::PrototypeCanaryTouched {
|
||||
canary: Canary::PLACEHOLDER,
|
||||
}],
|
||||
};
|
||||
let probes = vec![prototype_pollution_probe(&runtime, "pwned")];
|
||||
// With the per-spec override: fires.
|
||||
assert!(oracle_fired_full(
|
||||
&oracle,
|
||||
&outcome(),
|
||||
&probes,
|
||||
&[],
|
||||
Some(&runtime),
|
||||
));
|
||||
// Without an override (None): the predicate's placeholder does not
|
||||
// match the runtime property, so it does NOT fire — proving a
|
||||
// probe carrying the per-spec canary cannot satisfy a placeholder
|
||||
// match, and vice-versa.
|
||||
assert!(!oracle_fired_full(&oracle, &outcome(), &probes, &[], None));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn oracle_full_canary_override_rejects_stale_placeholder_probe() {
|
||||
// A probe carrying the historical `__nyx_canary` sentinel (e.g.
|
||||
// left over from a pre-Phase-30 run or ambient output) must NOT
|
||||
// satisfy a run whose per-spec canary differs.
|
||||
let runtime = Canary::for_spec("phase30-spec-2");
|
||||
let oracle = Oracle::SinkProbe {
|
||||
predicates: &[ProbePredicate::PrototypeCanaryTouched {
|
||||
canary: Canary::PLACEHOLDER,
|
||||
}],
|
||||
};
|
||||
let probes = vec![prototype_pollution_probe(Canary::PLACEHOLDER, "pwned")];
|
||||
assert!(!oracle_fired_full(
|
||||
&oracle,
|
||||
&outcome(),
|
||||
&probes,
|
||||
&[],
|
||||
Some(&runtime),
|
||||
));
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -326,9 +326,12 @@ pub enum ProbeKind {
|
|||
/// `PrototypePollution` probe.
|
||||
PrototypePollution {
|
||||
/// Property name the host attempted to set on
|
||||
/// `Object.prototype` — always `"__nyx_canary"` for Phase 10
|
||||
/// but parametrised so future per-sink canaries reuse the
|
||||
/// kind without proliferating variants.
|
||||
/// `Object.prototype`. Pre-Phase-30 this was always the fixed
|
||||
/// `"__nyx_canary"` sentinel; Phase 30 (Track N.0) feeds the
|
||||
/// harness a per-spec [`crate::dynamic::oracle::Canary`] via the
|
||||
/// `NYX_CANARY` environment variable, so this carries the
|
||||
/// cryptographically-random per-finding token the trap was
|
||||
/// installed under.
|
||||
property: String,
|
||||
/// Stringified value the host attempted to bind. Echoed
|
||||
/// verbatim so repro tooling can pin the exact payload bytes
|
||||
|
|
|
|||
|
|
@ -13,7 +13,7 @@ use crate::dynamic::corpus::{
|
|||
use crate::dynamic::differential;
|
||||
use crate::dynamic::harness::{self, HarnessError};
|
||||
use crate::dynamic::middleware_demotion;
|
||||
use crate::dynamic::oracle::{Oracle, oracle_fired_with_stubs, probe_crash_signal};
|
||||
use crate::dynamic::oracle::{Canary, Oracle, oracle_fired_full, probe_crash_signal};
|
||||
use crate::dynamic::probe::{ProbeChannel, SinkProbe};
|
||||
use crate::dynamic::sandbox::{self, SandboxBackend, SandboxError, SandboxOptions, SandboxOutcome};
|
||||
use crate::dynamic::spec::HarnessSpec;
|
||||
|
|
@ -463,6 +463,21 @@ pub fn run_spec(spec: &HarnessSpec, opts: &SandboxOptions) -> Result<RunOutcome,
|
|||
}
|
||||
let probe_channel: Option<Arc<ProbeChannel>> = effective_opts.probe_channel.clone();
|
||||
|
||||
// ── Phase 30 (Track N.0): per-spec verification canary ──────────────
|
||||
// Derive a cryptographically-random, per-`spec_hash` canary, hand it to
|
||||
// the harness via `NYX_CANARY` (the prototype-pollution setter trap and
|
||||
// any future per-spec sentinel read it from the environment), and thread
|
||||
// it into the oracle match below. Each payload's bytes have the const
|
||||
// corpus's `Canary::PLACEHOLDER` token rewritten to this value, so the
|
||||
// harness trap, the polluted property name, and the oracle all agree on
|
||||
// a token unique to this finding — a stale probe from another run (or
|
||||
// ambient output mentioning the historical `__nyx_canary` sentinel) can
|
||||
// never satisfy this run's oracle.
|
||||
let run_canary = Canary::for_spec(&spec.spec_hash);
|
||||
effective_opts
|
||||
.extra_env
|
||||
.push(("NYX_CANARY".to_string(), run_canary.clone()));
|
||||
|
||||
// Run only vuln (non-benign) payloads in the main loop.
|
||||
let vuln_payloads: Vec<&Payload> = payloads.iter().filter(|p| !p.is_benign).collect();
|
||||
|
||||
|
|
@ -510,6 +525,9 @@ pub fn run_spec(spec: &HarnessSpec, opts: &SandboxOptions) -> Result<RunOutcome,
|
|||
} else {
|
||||
(None, payload.bytes.to_vec())
|
||||
};
|
||||
// Phase 30: rewrite the corpus canary placeholder to this run's
|
||||
// per-spec canary so the harness trap + oracle agree on it.
|
||||
let effective_bytes = substitute_canary_bytes(effective_bytes, &run_canary);
|
||||
|
||||
// Clear the probe channel before each payload so the oracle's
|
||||
// drained records belong unambiguously to this run.
|
||||
|
|
@ -577,8 +595,13 @@ pub fn run_spec(spec: &HarnessSpec, opts: &SandboxOptions) -> Result<RunOutcome,
|
|||
.map(|h| h.drain_all())
|
||||
.unwrap_or_default();
|
||||
|
||||
let vuln_fired =
|
||||
oracle_fired_with_stubs(&payload.oracle, &outcome, &vuln_probes, &vuln_stub_events);
|
||||
let vuln_fired = oracle_fired_full(
|
||||
&payload.oracle,
|
||||
&outcome,
|
||||
&vuln_probes,
|
||||
&vuln_stub_events,
|
||||
Some(&run_canary),
|
||||
);
|
||||
let sink_hit = outcome.sink_hit;
|
||||
trace_record(
|
||||
trace_handle.as_ref(),
|
||||
|
|
@ -708,9 +731,12 @@ pub fn run_spec(spec: &HarnessSpec, opts: &SandboxOptions) -> Result<RunOutcome,
|
|||
// signal and falls through to `NotConfirmed`.
|
||||
}
|
||||
Some(benign) => {
|
||||
let benign_bytes = materialise_bytes(benign, None)
|
||||
.map(|b| b.into_owned())
|
||||
.unwrap_or_default();
|
||||
let benign_bytes = substitute_canary_bytes(
|
||||
materialise_bytes(benign, None)
|
||||
.map(|b| b.into_owned())
|
||||
.unwrap_or_default(),
|
||||
&run_canary,
|
||||
);
|
||||
if let Some(ch) = &probe_channel {
|
||||
let _ = ch.clear();
|
||||
}
|
||||
|
|
@ -725,11 +751,12 @@ pub fn run_spec(spec: &HarnessSpec, opts: &SandboxOptions) -> Result<RunOutcome,
|
|||
.as_ref()
|
||||
.map(|h| h.drain_all())
|
||||
.unwrap_or_default();
|
||||
let benign_fired = oracle_fired_with_stubs(
|
||||
let benign_fired = oracle_fired_full(
|
||||
&benign.oracle,
|
||||
&benign_outcome,
|
||||
&benign_probes,
|
||||
&benign_stub_events,
|
||||
Some(&run_canary),
|
||||
);
|
||||
|
||||
if is_confirm_candidate {
|
||||
|
|
@ -850,6 +877,38 @@ fn uses_docker_backend(opts: &SandboxOptions) -> bool {
|
|||
}
|
||||
}
|
||||
|
||||
/// Rewrite every occurrence of [`Canary::PLACEHOLDER`] in `bytes` to the
|
||||
/// per-spec `canary` (Phase 30 — Track N.0).
|
||||
///
|
||||
/// Const corpus payloads embed the placeholder token; the runner swaps in
|
||||
/// the finding's per-spec canary before the harness runs so the polluted
|
||||
/// property name matches the trap the harness installed from `NYX_CANARY`
|
||||
/// and the oracle's per-spec match. A cheap no-op for the vast majority of
|
||||
/// payloads — those that never mention the placeholder return their input
|
||||
/// buffer unchanged without reallocating.
|
||||
fn substitute_canary_bytes(bytes: Vec<u8>, canary: &str) -> Vec<u8> {
|
||||
let needle = Canary::PLACEHOLDER.as_bytes();
|
||||
if needle.is_empty()
|
||||
|| needle.len() > bytes.len()
|
||||
|| !bytes.windows(needle.len()).any(|w| w == needle)
|
||||
{
|
||||
return bytes;
|
||||
}
|
||||
let repl = canary.as_bytes();
|
||||
let mut out = Vec::with_capacity(bytes.len());
|
||||
let mut i = 0;
|
||||
while i < bytes.len() {
|
||||
if bytes[i..].starts_with(needle) {
|
||||
out.extend_from_slice(repl);
|
||||
i += needle.len();
|
||||
} else {
|
||||
out.push(bytes[i]);
|
||||
i += 1;
|
||||
}
|
||||
}
|
||||
out
|
||||
}
|
||||
|
||||
/// Generate a random 16-character hex nonce for OOB callback tracking.
|
||||
fn generate_nonce() -> String {
|
||||
use std::time::{SystemTime, UNIX_EPOCH};
|
||||
|
|
|
|||
214
tests/oracle_canary_audit.rs
Normal file
214
tests/oracle_canary_audit.rs
Normal file
|
|
@ -0,0 +1,214 @@
|
|||
//! Phase 30 (Track N.0) — oracle library consolidation + canary uniqueness
|
||||
//! audit.
|
||||
//!
|
||||
//! Tracks J.1–J.9 seeded their probe-based oracles with a single fixed
|
||||
//! sentinel string (`__nyx_canary`). Phase 30 replaces it with a per-spec
|
||||
//! [`Canary`] derived from the finding's `spec_hash`, substituted at run time
|
||||
//! into the payload bytes, the harness's `NYX_CANARY` environment, and the
|
||||
//! oracle match. This test is the build-time guard the plan calls for: it
|
||||
//!
|
||||
//! 1. enumerates every `ProbePredicate` carried by the const corpus and
|
||||
//! asserts each canary-bearing predicate uses exactly
|
||||
//! [`Canary::PLACEHOLDER`] (a new ad-hoc literal fails the build);
|
||||
//! 2. asserts the runtime [`Canary`] clears the 128-bit entropy floor, is
|
||||
//! deterministic within a process, and is collision-free across a large
|
||||
//! spec-hash sweep (so distinct findings — and therefore the eval corpora
|
||||
//! — never share a canary); and
|
||||
//! 3. classifies *every* `ProbePredicate` variant with an exhaustive match,
|
||||
//! so adding a new variant without classifying it as canary-bearing or
|
||||
//! structural fails to compile here.
|
||||
//!
|
||||
//! `cargo nextest run --features dynamic --test oracle_canary_audit`.
|
||||
|
||||
#![cfg(feature = "dynamic")]
|
||||
|
||||
use std::collections::HashSet;
|
||||
|
||||
use nyx_scanner::dynamic::corpus::CORPUS;
|
||||
use nyx_scanner::dynamic::oracle::{Canary, Oracle, ProbePredicate};
|
||||
|
||||
/// Classify a predicate as canary-bearing (returns its stored canary token)
|
||||
/// or structural (returns `None`).
|
||||
///
|
||||
/// The match is intentionally exhaustive with no `_` arm: a new
|
||||
/// `ProbePredicate` variant added to the library forces a classification
|
||||
/// decision here, which is the Phase 30 guard that "CI fails the build if a
|
||||
/// new ad-hoc canary lands". Structural predicates carry header names,
|
||||
/// allowlists, thresholds, or needles — intentionally low-entropy, public
|
||||
/// values that are *not* secret sentinels and must not be treated as
|
||||
/// canaries.
|
||||
fn canary_token(p: &ProbePredicate) -> Option<&str> {
|
||||
match p {
|
||||
// The one secret-sentinel predicate: its `canary` is the property a
|
||||
// prototype-pollution sink writes onto `Object.prototype` and the
|
||||
// oracle matches against the drained probe.
|
||||
ProbePredicate::PrototypeCanaryTouched { canary } => Some(canary),
|
||||
|
||||
// Structural predicates — no secret sentinel.
|
||||
ProbePredicate::ArgContains { .. }
|
||||
| ProbePredicate::ArgEquals { .. }
|
||||
| ProbePredicate::AnyArgContains(_)
|
||||
| ProbePredicate::CalleeEquals(_)
|
||||
| ProbePredicate::MinArgs(_)
|
||||
| ProbePredicate::StubEventMatches { .. }
|
||||
| ProbePredicate::DeserializeGadgetInvoked { .. }
|
||||
| ProbePredicate::TemplateEvalEqual { .. }
|
||||
| ProbePredicate::XxeEntityExpanded { .. }
|
||||
| ProbePredicate::HeaderInjected { .. }
|
||||
| ProbePredicate::HeaderSmuggledInWire { .. }
|
||||
| ProbePredicate::RedirectHostNotIn { .. }
|
||||
| ProbePredicate::WeakKeyEntropy { .. }
|
||||
| ProbePredicate::IdorBoundaryCrossed
|
||||
| ProbePredicate::OutboundHostNotIn { .. }
|
||||
| ProbePredicate::QueryResultCountGreaterThan { .. }
|
||||
| ProbePredicate::JsonParseExcessiveDepth { .. } => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Visit every `ProbePredicate` the corpus carries — both the active
|
||||
/// `Oracle::SinkProbe { predicates }` slice and the parallel
|
||||
/// `CuratedPayload::probe_predicates` slice — for every `(cap, lang)` entry.
|
||||
fn for_each_corpus_predicate(mut visit: impl FnMut(&str /*label*/, &[u8] /*bytes*/, &ProbePredicate)) {
|
||||
for &(_cap, _lang, slice) in CORPUS.entries {
|
||||
for payload in slice {
|
||||
if let Oracle::SinkProbe { predicates } = &payload.oracle {
|
||||
for p in *predicates {
|
||||
visit(payload.label, payload.bytes, p);
|
||||
}
|
||||
}
|
||||
for p in payload.probe_predicates {
|
||||
visit(payload.label, payload.bytes, p);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// No corpus predicate may carry an ad-hoc canary literal: every
|
||||
/// canary-bearing predicate must reference [`Canary::PLACEHOLDER`], and the
|
||||
/// owning payload's bytes must embed that placeholder so the runner's
|
||||
/// run-time substitution actually has a token to rewrite.
|
||||
#[test]
|
||||
fn corpus_canaries_use_placeholder_and_are_substitutable() {
|
||||
let mut canary_predicates = 0usize;
|
||||
for_each_corpus_predicate(|label, bytes, p| {
|
||||
let Some(token) = canary_token(p) else {
|
||||
return;
|
||||
};
|
||||
canary_predicates += 1;
|
||||
assert_eq!(
|
||||
token,
|
||||
Canary::PLACEHOLDER,
|
||||
"payload {label:?} carries an ad-hoc canary literal {token:?}; \
|
||||
canary-bearing predicates must use Canary::PLACEHOLDER so the \
|
||||
runner can substitute a per-spec canary",
|
||||
);
|
||||
let needle = Canary::PLACEHOLDER.as_bytes();
|
||||
let embedded = bytes.windows(needle.len()).any(|w| w == needle);
|
||||
assert!(
|
||||
embedded,
|
||||
"payload {label:?} carries a PrototypeCanaryTouched predicate but \
|
||||
its bytes do not embed Canary::PLACEHOLDER ({:?}); run-time \
|
||||
substitution would have nothing to rewrite and the harness trap \
|
||||
would never match",
|
||||
Canary::PLACEHOLDER,
|
||||
);
|
||||
});
|
||||
// Sanity: the prototype-pollution + json_parse slices contribute these,
|
||||
// so the audit must actually have inspected some. A zero here means the
|
||||
// corpus walk silently stopped finding canary predicates.
|
||||
assert!(
|
||||
canary_predicates > 0,
|
||||
"expected at least one canary-bearing predicate in the corpus",
|
||||
);
|
||||
}
|
||||
|
||||
/// A generated canary is 32 bytes / 256 bits; its rendered form is 64
|
||||
/// lowercase-hex characters, clears the 128-bit floor, and is deterministic
|
||||
/// within a process (the runner derives it twice — once for the harness env,
|
||||
/// once for the oracle — and the two must agree).
|
||||
#[test]
|
||||
fn canary_entropy_and_determinism() {
|
||||
assert!(
|
||||
Canary::ENTROPY_BITS >= 128,
|
||||
"Canary::ENTROPY_BITS must clear the 128-bit floor",
|
||||
);
|
||||
|
||||
let bytes = Canary::generate("spec-hash-under-audit");
|
||||
assert_eq!(bytes.len(), 32, "canary is 256 bits of BLAKE3 output");
|
||||
|
||||
let rendered = Canary::render(&bytes);
|
||||
assert_eq!(rendered.len(), 64, "render encodes all 32 bytes as hex");
|
||||
assert!(
|
||||
rendered.len() * 4 >= 128,
|
||||
"rendered canary must carry at least 128 bits",
|
||||
);
|
||||
assert!(
|
||||
rendered
|
||||
.bytes()
|
||||
.all(|b| b.is_ascii_hexdigit() && !b.is_ascii_uppercase()),
|
||||
"rendered canary must be lowercase hex (safe as a JSON key / JS \
|
||||
property / header token): {rendered}",
|
||||
);
|
||||
|
||||
// Deterministic within the process.
|
||||
assert_eq!(bytes, Canary::generate("spec-hash-under-audit"));
|
||||
assert_eq!(
|
||||
Canary::for_spec("spec-hash-under-audit"),
|
||||
Canary::for_spec("spec-hash-under-audit"),
|
||||
);
|
||||
|
||||
// Not a fixed string: the rendered canary differs from the historical
|
||||
// placeholder sentinel.
|
||||
assert_ne!(Canary::for_spec("anything"), Canary::PLACEHOLDER);
|
||||
}
|
||||
|
||||
/// Distinct findings get distinct canaries: a large sweep of distinct
|
||||
/// `spec_hash` values produces no collisions. This is the "no oracle
|
||||
/// collision in any of the eval corpora" guarantee — every finding in a run
|
||||
/// has a unique `spec_hash`, hence a unique canary, hence one finding's probe
|
||||
/// record can never satisfy another's oracle.
|
||||
#[test]
|
||||
fn canary_is_collision_free_across_spec_hash_sweep() {
|
||||
let mut seen = HashSet::new();
|
||||
let n = 50_000u32;
|
||||
for i in 0..n {
|
||||
// Vary the hash shape the way real spec hashes do (16 hex chars) plus
|
||||
// a few longer forms to exercise the input space.
|
||||
let spec_hash = format!("{i:016x}");
|
||||
let canary = Canary::for_spec(&spec_hash);
|
||||
assert!(
|
||||
seen.insert(canary),
|
||||
"canary collision at spec_hash {spec_hash}",
|
||||
);
|
||||
}
|
||||
assert_eq!(seen.len() as u32, n, "every spec_hash produced a unique canary");
|
||||
}
|
||||
|
||||
/// The byte output of `generate` exercises the full space: across many
|
||||
/// samples every byte position takes both low and high values, so no position
|
||||
/// is stuck (a coarse but effective check that the BLAKE3 mixing is wired up
|
||||
/// rather than, say, a zero-fill).
|
||||
#[test]
|
||||
fn canary_byte_positions_are_not_stuck() {
|
||||
let mut saw_low = [false; 32];
|
||||
let mut saw_high = [false; 32];
|
||||
for i in 0..512u32 {
|
||||
let b = Canary::generate(&format!("stuck-check-{i}"));
|
||||
for (pos, byte) in b.iter().enumerate() {
|
||||
if *byte < 0x40 {
|
||||
saw_low[pos] = true;
|
||||
}
|
||||
if *byte >= 0xc0 {
|
||||
saw_high[pos] = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
for pos in 0..32 {
|
||||
assert!(
|
||||
saw_low[pos] && saw_high[pos],
|
||||
"byte position {pos} looks stuck (low={}, high={})",
|
||||
saw_low[pos],
|
||||
saw_high[pos],
|
||||
);
|
||||
}
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue