From 36c8bf52dfb19bf15e74c0dc5f08670619e7acff Mon Sep 17 00:00:00 2001 From: pitboss Date: Fri, 15 May 2026 20:17:07 -0500 Subject: [PATCH] =?UTF-8?q?[pitboss]=20phase=2030:=20Cross-cutting=20?= =?UTF-8?q?=E2=80=94=20Determinism=20audit,=20`VerifyTrace`=20observabilit?= =?UTF-8?q?y,=20`policy.rs`=20deny=20rules?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- scripts/check_no_unseeded_rand.sh | 90 ++++++++++ src/dynamic/mod.rs | 2 + src/dynamic/policy.rs | 221 +++++++++++++++++++++++ src/dynamic/rand.rs | 280 ++++++++++++++++++++++++++++++ src/dynamic/repro.rs | 14 ++ src/dynamic/runner.rs | 66 +++++++ src/dynamic/sandbox/mod.rs | 9 + src/dynamic/trace.rs | 226 ++++++++++++++++++++++++ src/dynamic/verify.rs | 92 ++++++++++ src/evidence.rs | 17 ++ src/fmt.rs | 3 + tests/determinism_audit.rs | 175 +++++++++++++++++++ tests/policy_deny.rs | 226 ++++++++++++++++++++++++ 13 files changed, 1421 insertions(+) create mode 100755 scripts/check_no_unseeded_rand.sh create mode 100644 src/dynamic/rand.rs create mode 100644 src/dynamic/trace.rs create mode 100644 tests/determinism_audit.rs create mode 100644 tests/policy_deny.rs diff --git a/scripts/check_no_unseeded_rand.sh b/scripts/check_no_unseeded_rand.sh new file mode 100755 index 00000000..bd44d3d1 --- /dev/null +++ b/scripts/check_no_unseeded_rand.sh @@ -0,0 +1,90 @@ +#!/usr/bin/env bash +# Phase 30 — Track C: determinism audit gate. +# +# Greps `src/dynamic/` for non-deterministic RNG APIs. Anything inside +# the dynamic verifier must route through `crate::dynamic::rand::SpecRng` +# so identical inputs produce identical sandbox runs; the Phase 27 +# `events.jsonl` replay invariant and the Phase 28 repro bundle +# hermeticity contract both depend on it. +# +# Exits 0 on a clean tree, 1 when any banned API surfaces. CI wires +# this into the dynamic workflow so a regression fails the build before +# it ships. + +set -euo pipefail + +ROOT="$(cd "$(dirname "$0")/.." && pwd)" +DYN_DIR="$ROOT/src/dynamic" + +if [[ ! -d "$DYN_DIR" ]]; then + echo "audit: src/dynamic/ missing at $DYN_DIR" >&2 + exit 2 +fi + +# Banned patterns: any real call site of a non-deterministic RNG API. +# +# Each pattern is a Rust-token shape we expect to never appear inside +# src/dynamic/ once Phase 30 lands. The seccomp policy file (which +# names the "getrandom" syscall as a string literal) is excluded +# because its mention is a syscall name, not a Rust API call — the +# string-literal regex below matches the bare token, and the seccomp +# files spell it inside quotes that look identical, so we exclude the +# seccomp subtree explicitly. +PATTERNS=( + 'rand::thread_rng' + 'thread_rng\s*\(' + 'rand::random' + 'OsRng' + 'from_entropy' + 'getrandom::getrandom' + 'Uuid::new_v4' + 'uuid::Uuid::new_v4' + 'fastrand' + 'nanoid' +) + +EXCLUDE_PATHS=( + "$DYN_DIR/sandbox/seccomp" + "$DYN_DIR/rand.rs" +) + +# Use `git grep` when inside a git repo (respects .gitignore), fall +# back to `grep -r` otherwise. Either way the exclusion list is +# applied via a post-filter so the audit catches new files even +# before they are tracked. +if git -C "$ROOT" rev-parse --is-inside-work-tree >/dev/null 2>&1; then + HITS="$(git -C "$ROOT" grep -nE "$(IFS='|'; echo "${PATTERNS[*]}")" -- 'src/dynamic/**/*.rs' 'src/dynamic/*.rs' || true)" +else + HITS="$(grep -rnE "$(IFS='|'; echo "${PATTERNS[*]}")" --include='*.rs' "$DYN_DIR" || true)" +fi + +if [[ -z "$HITS" ]]; then + echo "audit: src/dynamic/ is free of unseeded RNG APIs" + exit 0 +fi + +FILTERED="" +while IFS= read -r line; do + [[ -z "$line" ]] && continue + path="${line%%:*}" + skip=0 + for ex in "${EXCLUDE_PATHS[@]}"; do + case "$path" in + "$ex"*|"${ex#$ROOT/}"*) skip=1; break ;; + esac + done + if [[ $skip -eq 0 ]]; then + FILTERED+="$line"$'\n' + fi +done <<< "$HITS" + +if [[ -z "${FILTERED//[$' \t\n\r']/}" ]]; then + echo "audit: src/dynamic/ is free of unseeded RNG APIs" + exit 0 +fi + +echo "audit: banned RNG APIs surfaced inside src/dynamic/" >&2 +echo "$FILTERED" >&2 +echo >&2 +echo "Replace with crate::dynamic::rand::SpecRng::seeded(&spec.spec_hash)." >&2 +exit 1 diff --git a/src/dynamic/mod.rs b/src/dynamic/mod.rs index 69b810b0..d59a9e01 100644 --- a/src/dynamic/mod.rs +++ b/src/dynamic/mod.rs @@ -76,6 +76,7 @@ pub mod oob; pub mod oracle; pub mod policy; pub mod probe; +pub mod rand; pub mod repro; pub mod report; pub mod runner; @@ -84,6 +85,7 @@ pub mod spec; pub mod stubs; pub mod telemetry; pub mod toolchain; +pub mod trace; pub mod verify; pub use report::{VerifyResult, VerifyStatus}; diff --git a/src/dynamic/policy.rs b/src/dynamic/policy.rs index 09a5fa58..c78f0c06 100644 --- a/src/dynamic/policy.rs +++ b/src/dynamic/policy.rs @@ -228,6 +228,227 @@ fn hash_token(secret: &str) -> String { format!("{SCRUB_HASH_PREFIX}{prefix}>") } +/// Outcome of [`evaluate`]. +/// +/// Either `Allow` (let the verifier execute the finding) or `Deny` with +/// the rule that fired and an evidence excerpt that triage can quote in +/// the audit log. `Deny` is the second security layer above the +/// per-witness [`Scrubber`]: the scrubber redacts already-captured +/// bytes, while `Deny` short-circuits execution before the sandbox ever +/// loads the payload, so the credential never touches the harness in +/// the first place. +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum PolicyDecision { + /// Finding cleared every deny rule; the verifier may proceed. + Allow, + /// Finding matched a deny rule. + Deny { + /// Stable rule identifier — one of [`DenyRule::CREDENTIALS`], + /// [`DenyRule::PRIVATE_KEY`], [`DenyRule::PRODUCTION_ENDPOINT`]. + rule: &'static str, + /// Short text excerpt (max 120 chars, scrubbed via + /// [`Scrubber::scrub_string`]) of the offending field so an + /// operator can identify *why* the deny fired without having to + /// re-derive the match. + excerpt: String, + }, +} + +impl PolicyDecision { + /// Convenience accessor; lets call sites match on the boolean + /// outcome before unpacking the typed reason. + pub fn is_deny(&self) -> bool { + matches!(self, PolicyDecision::Deny { .. }) + } +} + +/// Rule-name constants exposed for the +/// [`crate::evidence::InconclusiveReason::PolicyDeniedDynamic`] field +/// and for tests that need to assert *which* deny rule fired. Strings +/// rather than an enum so they read identically in JSON output, audit +/// logs, and the `Display` impl on `InconclusiveReason`. +pub struct DenyRule; + +impl DenyRule { + /// Finding mentions a credential-shaped token (AWS key, GitHub / + /// Slack / OpenAI token, `password=` query string, `Bearer` + /// header) — re-uses the project-wide secret regex set via + /// [`crate::utils::redact::contains_secret`]. + pub const CREDENTIALS: &'static str = "credentials"; + /// Finding mentions a private key (PEM block opener, OpenSSH + /// private key block, base64-shaped key payload). + pub const PRIVATE_KEY: &'static str = "private-key"; + /// Finding's path or evidence references a production endpoint + /// (e.g. `api.prod.example.com`, `*.production.*`, + /// `*-prod.amazonaws.com`). Conservative: matched against the + /// short list in [`PROD_ENDPOINT_REGEXES`]. + pub const PRODUCTION_ENDPOINT: &'static str = "production-endpoint"; +} + +/// Substrings that mark a [`DenyRule::PRIVATE_KEY`] hit on their own, +/// independent of the [`crate::utils::redact`] regex set. The redact +/// regex covers the `-----BEGIN ... PRIVATE KEY-----` shape; the +/// literals below add coverage for evidence-snippet excerpts where the +/// trailing newline has been stripped (a common occurrence in CLI +/// output that gets folded into a one-line `notes` entry). +const PRIVATE_KEY_LITERALS: &[&str] = &[ + "-----begin rsa private key", + "-----begin openssh private key", + "-----begin ec private key", + "-----begin private key", + "-----begin dsa private key", + "-----begin pgp private key", + "ssh-rsa aaaa", + "ssh-ed25519 aaaa", +]; + +/// Substrings that mark a [`DenyRule::PRODUCTION_ENDPOINT`] hit. +/// +/// Conservative starter set: the regex shapes most security teams ban +/// from a dynamic re-execution sandbox. Matched case-insensitively as +/// a substring of the diag's path / sink callee / flow-step snippets. +/// +/// `*.production.*` and `*-prod.*` shapes are folded into a single +/// `".prod"` / `"-prod"` / `"production"` substring set rather than +/// using a full regex engine — the regex shape would be more +/// permissive but at the cost of a dependency the dynamic crate does +/// not currently pull in. The substring set deliberately false- +/// positives on `productionalize` / `reproduction` because both reads +/// of the data deserve a human eye before dynamic execution. +const PROD_ENDPOINT_REGEXES: &[&str] = &[ + "api.prod.", + "api-prod.", + ".production.", + "-production.", + "-prod.amazonaws.com", + "prod.example.com", + "prod-api.", + "prod-db.", + "prod-cluster.", +]; + +/// Evaluate `diag` against the cross-cutting security deny list. +/// +/// Walks the finding's id, path, message, evidence notes, flow-step +/// snippets, and the `SpanEvidence` snippets for source/sink/guard/ +/// sanitizer entries. Each text is fed to three predicates in turn +/// — [`DenyRule::CREDENTIALS`] (via [`crate::utils::redact::contains_secret`]), +/// [`DenyRule::PRIVATE_KEY`] (via [`PRIVATE_KEY_LITERALS`]), +/// [`DenyRule::PRODUCTION_ENDPOINT`] (via [`PROD_ENDPOINT_REGEXES`]). +/// The first match wins and the verifier short-circuits to +/// [`crate::evidence::InconclusiveReason::PolicyDeniedDynamic`]. +/// +/// Multiple rules matching the same evidence pick private-key first +/// (most precise — PEM blocks also satisfy the credentials regex set, +/// so private-key is checked first to avoid burying the precise label +/// under a generic one), credentials second, production-endpoint +/// third — the ordering surfaces the most actionable rule label given +/// the leak shape. +pub fn evaluate(diag: &crate::commands::scan::Diag) -> PolicyDecision { + let texts = collect_diag_texts(diag); + for text in &texts { + if let Some(hit) = match_text(text) { + return PolicyDecision::Deny { + rule: hit.0, + excerpt: excerpt_with_scrubber(hit.1), + }; + } + } + PolicyDecision::Allow +} + +fn collect_diag_texts(diag: &crate::commands::scan::Diag) -> Vec { + let mut out: Vec = Vec::new(); + if !diag.id.is_empty() { + out.push(diag.id.clone()); + } + if !diag.path.is_empty() { + out.push(diag.path.clone()); + } + if let Some(msg) = diag.message.as_ref() { + out.push(msg.clone()); + } + if let Some(ev) = diag.evidence.as_ref() { + for note in &ev.notes { + out.push(note.clone()); + } + if let Some(exp) = ev.explanation.as_ref() { + out.push(exp.clone()); + } + for s in [&ev.source, &ev.sink] { + if let Some(span) = s.as_ref() { + out.push(span.path.clone()); + if let Some(sn) = span.snippet.as_ref() { + out.push(sn.clone()); + } + } + } + for span in ev.guards.iter().chain(ev.sanitizers.iter()) { + if let Some(sn) = span.snippet.as_ref() { + out.push(sn.clone()); + } + } + for step in &ev.flow_steps { + if !step.file.is_empty() { + out.push(step.file.clone()); + } + if let Some(sn) = step.snippet.as_ref() { + out.push(sn.clone()); + } + if let Some(callee) = step.callee.as_ref() { + out.push(callee.clone()); + } + } + } + out +} + +/// Match a single text against the deny set. Returns +/// `Some((rule_name, matched_text))` on hit, `None` otherwise. Matched +/// text is the original text (not the rule needle) so the excerpt +/// surfaced on the verdict shows the operator *which* field caused the +/// refusal, not just the rule that fired. +fn match_text(text: &str) -> Option<(&'static str, &str)> { + if text.is_empty() { + return None; + } + let lower = text.to_ascii_lowercase(); + // Private-key literals checked first: PEM blocks also satisfy the + // generic credentials regex set in [`crate::utils::redact`], so a + // PEM hit would otherwise misclassify as `credentials`. Surfacing + // the more precise rule lets operators triage the leak shape from + // the verdict alone. + if PRIVATE_KEY_LITERALS.iter().any(|n| lower.contains(*n)) { + return Some((DenyRule::PRIVATE_KEY, text)); + } + if redact::contains_secret(text.as_bytes()) { + return Some((DenyRule::CREDENTIALS, text)); + } + if PROD_ENDPOINT_REGEXES.iter().any(|n| lower.contains(*n)) { + return Some((DenyRule::PRODUCTION_ENDPOINT, text)); + } + None +} + +/// Build a short excerpt suitable for embedding in a +/// [`crate::evidence::InconclusiveReason::PolicyDeniedDynamic`]. +/// +/// Routes the text through [`Scrubber::scrub_string`] first so the +/// excerpt itself cannot leak the credential, then truncates to 120 +/// `chars` to keep the audit log compact. Truncation walks +/// codepoints (not bytes) because PROD_ENDPOINT hits pass through the +/// scrubber unchanged — a long file-path or snippet with non-ASCII +/// content (e.g. Unicode in a source comment) would otherwise panic +/// the verifier on a mid-codepoint byte slice. +fn excerpt_with_scrubber(text: &str) -> String { + let scrubbed = Scrubber::project_default().scrub_string(text); + let mut indices = scrubbed.char_indices(); + match indices.nth(120) { + None => scrubbed, + Some((cut, _)) => format!("{}…", &scrubbed[..cut]), + } +} + /// Truncate `bytes` to at most [`PAYLOAD_CAPTURE_LIMIT_BYTES`]. /// /// Head-keeping: the prefix the sink reads first is retained; the tail is diff --git a/src/dynamic/rand.rs b/src/dynamic/rand.rs new file mode 100644 index 00000000..955eb237 --- /dev/null +++ b/src/dynamic/rand.rs @@ -0,0 +1,280 @@ +//! Deterministic seeded RNG for the dynamic layer (Phase 30 — Track C +//! determinism audit). +//! +//! Every randomness source in [`crate::dynamic`] must route through +//! [`SpecRng`] so identical inputs (spec hash + corpus version) produce +//! identical sandbox runs. Non-determinism inside the verifier breaks +//! the Phase 27 `events.jsonl` replay invariant, the Phase 28 repro +//! bundle hermeticity contract, and the Phase 29 per-cell budget gates. +//! +//! The implementation is intentionally minimal: +//! +//! * No external RNG crate — blake3 is the project's hashing primitive +//! and an extra `rand`/`rand_chacha` dep would expand the supply-chain +//! surface for no gain. +//! * Output stream is a SHAKE-style hash chain: every 32-byte block is +//! `blake3(seed || counter_le)`, with the counter incremented after +//! each block. Throughput is dwarfed by sandbox / build cost so any +//! added cycles compared to a CSPRNG do not show up in +//! `benches/dynamic_bench.rs`. +//! * No `Send`/thread-local state — callers thread the [`SpecRng`] +//! explicitly so a fork in control flow always produces a fresh, +//! reproducible substream. Mutation fuzzers can clone the RNG before +//! forking to keep both branches reproducible. +//! +//! # Audit gate +//! +//! `scripts/check_no_unseeded_rand.sh` greps `src/dynamic/` for the +//! banned non-deterministic APIs (`rand::thread_rng`, `OsRng`, +//! `from_entropy`, `getrandom::getrandom`, `Uuid::new_v4`, `fastrand`). +//! Any match exits the script non-zero so CI catches regressions before +//! they land. The seccomp policy file is allowed to mention +//! `"getrandom"` because that string is a syscall name, not a Rust API +//! call; the audit script's regex filters that case out. + +use blake3::Hasher; + +/// Length of the seed mixed into every block of the RNG stream. 32 +/// bytes = full blake3 output width; using anything smaller would lose +/// entropy if a caller passes a longer spec hash. +const SEED_BYTES: usize = 32; + +/// Width of a single hash-chain block. Matches blake3's natural output +/// length so we never have to truncate or extend. +const BLOCK_BYTES: usize = 32; + +/// Deterministic pseudo-random number generator keyed by a spec hash. +/// +/// Construct via [`SpecRng::seeded`] (the standard entry point used by +/// every verifier call site) or [`SpecRng::from_seed_bytes`] (for tests +/// that need to pin the seed independently of a spec). +/// +/// The same seed always produces the same byte stream, so any consumer +/// inside [`crate::dynamic`] that needs randomness (mutation fuzzer +/// payload choice, environment variable jitter, stub port jitter, …) +/// gets a reproducible roll without leaking host entropy into the +/// verdict. +#[derive(Debug, Clone)] +pub struct SpecRng { + seed: [u8; SEED_BYTES], + counter: u64, + buf: [u8; BLOCK_BYTES], + buf_pos: usize, +} + +impl SpecRng { + /// Seed an RNG from a spec hash hex string. + /// + /// The hex prefix is hashed with blake3 to normalise it to 32 bytes + /// — callers may pass the short 16-hex-char spec hash (the form + /// stamped onto [`crate::dynamic::spec::HarnessSpec::spec_hash`]) + /// or a longer derivation; both produce a full-width seed. + pub fn seeded(spec_hash: &str) -> Self { + let mut h = Hasher::new(); + h.update(b"nyx.dynamic.rand.v1\0"); + h.update(spec_hash.as_bytes()); + let mut seed = [0u8; SEED_BYTES]; + seed.copy_from_slice(h.finalize().as_bytes()); + Self::from_seed_bytes(seed) + } + + /// Seed from raw bytes. Exposed for tests that need a known seed + /// without round-tripping through a spec hash. + pub fn from_seed_bytes(seed: [u8; SEED_BYTES]) -> Self { + Self { + seed, + counter: 0, + buf: [0u8; BLOCK_BYTES], + buf_pos: BLOCK_BYTES, + } + } + + /// Refill the internal buffer with the next block of the hash + /// chain. Called lazily as bytes are consumed. + fn refill(&mut self) { + let mut h = Hasher::new(); + h.update(&self.seed); + h.update(&self.counter.to_le_bytes()); + let digest = h.finalize(); + self.buf.copy_from_slice(digest.as_bytes()); + self.counter = self.counter.wrapping_add(1); + self.buf_pos = 0; + } + + /// Fill `out` with deterministic pseudo-random bytes. + pub fn fill_bytes(&mut self, out: &mut [u8]) { + let mut written = 0; + while written < out.len() { + if self.buf_pos == BLOCK_BYTES { + self.refill(); + } + let take = (out.len() - written).min(BLOCK_BYTES - self.buf_pos); + out[written..written + take] + .copy_from_slice(&self.buf[self.buf_pos..self.buf_pos + take]); + self.buf_pos += take; + written += take; + } + } + + /// Draw the next `u64` from the stream. Used by the rejection + /// loop in [`Self::gen_range`]. + pub fn next_u64(&mut self) -> u64 { + let mut buf = [0u8; 8]; + self.fill_bytes(&mut buf); + u64::from_le_bytes(buf) + } + + /// Draw a `u32`. Convenience for callers picking among small + /// alternatives (payload variants, env mutation slots). + pub fn next_u32(&mut self) -> u32 { + (self.next_u64() & 0xFFFF_FFFF) as u32 + } + + /// Sample a `usize` uniformly in `[0, upper)`. Panics when + /// `upper == 0` because the request is meaningless; callers should + /// guard zero-length slices. + /// + /// Uses rejection sampling against the largest multiple of `upper` + /// that fits in a `u64` so the distribution is exactly uniform — + /// modulo-bias would otherwise nudge the corpus picker toward + /// low-indexed payloads. + pub fn gen_range(&mut self, upper: usize) -> usize { + assert!(upper > 0, "SpecRng::gen_range upper bound must be > 0"); + let upper_u64 = upper as u64; + let zone = u64::MAX - (u64::MAX % upper_u64); + loop { + let candidate = self.next_u64(); + if candidate < zone { + return (candidate % upper_u64) as usize; + } + } + } + + /// Pick one element from `slice`. Returns `None` only when the + /// slice is empty so callers can use `?` for empty-corpus paths. + pub fn choose<'a, T>(&mut self, slice: &'a [T]) -> Option<&'a T> { + if slice.is_empty() { + None + } else { + Some(&slice[self.gen_range(slice.len())]) + } + } + + /// In-place Fisher–Yates shuffle. Useful for the mutation fuzzer + /// when iterating a payload list in a reproducible order without + /// pre-sorting in caller code. + pub fn shuffle(&mut self, slice: &mut [T]) { + for i in (1..slice.len()).rev() { + let j = self.gen_range(i + 1); + slice.swap(i, j); + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn same_seed_produces_same_stream() { + let mut a = SpecRng::seeded("deadbeefcafebabe"); + let mut b = SpecRng::seeded("deadbeefcafebabe"); + let mut buf_a = [0u8; 64]; + let mut buf_b = [0u8; 64]; + a.fill_bytes(&mut buf_a); + b.fill_bytes(&mut buf_b); + assert_eq!(buf_a, buf_b); + } + + #[test] + fn different_seeds_diverge() { + let mut a = SpecRng::seeded("aaaa"); + let mut b = SpecRng::seeded("bbbb"); + assert_ne!(a.next_u64(), b.next_u64()); + } + + #[test] + fn fill_bytes_crosses_block_boundary() { + // 80 > BLOCK_BYTES (32) — exercises the refill loop and proves + // stream continuity across block transitions. + let mut rng = SpecRng::seeded("boundary"); + let mut a = vec![0u8; 80]; + rng.fill_bytes(&mut a); + let mut rng2 = SpecRng::seeded("boundary"); + let mut b1 = vec![0u8; 32]; + let mut b2 = vec![0u8; 48]; + rng2.fill_bytes(&mut b1); + rng2.fill_bytes(&mut b2); + let mut concat = b1.clone(); + concat.extend_from_slice(&b2); + assert_eq!(a, concat); + } + + #[test] + fn gen_range_stays_in_bounds() { + let mut rng = SpecRng::seeded("range"); + for _ in 0..1000 { + let v = rng.gen_range(7); + assert!(v < 7); + } + } + + #[test] + #[should_panic] + fn gen_range_zero_panics() { + let mut rng = SpecRng::seeded("range"); + rng.gen_range(0); + } + + #[test] + fn choose_empty_returns_none() { + let mut rng = SpecRng::seeded("choose"); + let empty: [u32; 0] = []; + assert!(rng.choose(&empty).is_none()); + } + + #[test] + fn choose_is_reproducible() { + let items = [10u32, 20, 30, 40, 50]; + let mut a = SpecRng::seeded("pick"); + let mut b = SpecRng::seeded("pick"); + for _ in 0..16 { + assert_eq!(a.choose(&items), b.choose(&items)); + } + } + + #[test] + fn shuffle_is_reproducible() { + let mut v1: Vec = (0..20).collect(); + let mut v2 = v1.clone(); + let mut a = SpecRng::seeded("shuffle"); + let mut b = SpecRng::seeded("shuffle"); + a.shuffle(&mut v1); + b.shuffle(&mut v2); + assert_eq!(v1, v2); + } + + #[test] + fn clone_forks_substream_reproducibly() { + // Cloning at any point must produce identical streams from + // both halves — required so a fuzzer fork (try-this-mutation + // vs try-that) is hermetic. + let mut rng = SpecRng::seeded("fork"); + rng.next_u32(); + let mut a = rng.clone(); + let mut b = rng.clone(); + let mut buf_a = [0u8; 48]; + let mut buf_b = [0u8; 48]; + a.fill_bytes(&mut buf_a); + b.fill_bytes(&mut buf_b); + assert_eq!(buf_a, buf_b); + } + + #[test] + fn from_seed_bytes_is_deterministic() { + let seed = [7u8; SEED_BYTES]; + let mut a = SpecRng::from_seed_bytes(seed); + let mut b = SpecRng::from_seed_bytes(seed); + assert_eq!(a.next_u64(), b.next_u64()); + } +} diff --git a/src/dynamic/repro.rs b/src/dynamic/repro.rs index a9e0844c..300da090 100644 --- a/src/dynamic/repro.rs +++ b/src/dynamic/repro.rs @@ -22,6 +22,7 @@ //! expected/ //! outcome.json (redacted SandboxOutcome) //! verdict.json +//! trace.jsonl (Phase 30 — VerifyTrace, when attached) //! reproduce.sh //! docker_pull.sh (Phase 28 — present when toolchain pinned) //! README.md @@ -185,6 +186,19 @@ pub fn write( // expected/verdict.json write_json(&root.join("expected").join("verdict.json"), verdict)?; + // expected/trace.jsonl — Phase 30 (Track C observability). Records + // the verifier's per-stage timeline so a repro replay can compare + // sandbox runs against the canonical sequence. Omitted when no + // trace was attached to the sandbox options, which keeps direct + // `sandbox::run` callers (parity fixtures, unit tests) free of + // bundle-shape changes. + if let Some(trace) = opts.trace.as_ref() { + fs::write( + root.join("expected").join("trace.jsonl"), + trace.to_jsonl().as_bytes(), + )?; + } + // toolchain.lock (Phase 28 — Track H.3, repro hermeticity) let lock = build_toolchain_lock(spec, &root)?; write_json(&root.join("toolchain.lock"), &lock)?; diff --git a/src/dynamic/runner.rs b/src/dynamic/runner.rs index e7b8a5a5..112c8dba 100644 --- a/src/dynamic/runner.rs +++ b/src/dynamic/runner.rs @@ -16,10 +16,38 @@ use crate::dynamic::probe::{ProbeChannel, SinkProbe}; use crate::dynamic::stubs::StubEvent; use crate::dynamic::sandbox::{self, SandboxBackend, SandboxError, SandboxOptions, SandboxOutcome}; use crate::dynamic::spec::HarnessSpec; +use crate::dynamic::trace::{TraceStage, VerifyTrace}; use crate::evidence::{DifferentialOutcome, DifferentialVerdict}; use crate::symbol::Lang; use std::sync::Arc; +/// Record a trace event on the caller's [`VerifyTrace`] handle if one +/// was attached to [`SandboxOptions::trace`]. No-op otherwise — keeps +/// every direct `crate::dynamic::sandbox::run` caller (tests, parity +/// fixtures) free of trace boilerplate. +fn trace_record(trace: Option<&Arc>, stage: TraceStage, detail: Option) { + if let Some(t) = trace { + t.record(stage, detail); + } +} + +/// Short, stable variant tag used in [`TraceStage::SandboxStarted`] +/// details so a trace line names the oracle without dumping the full +/// `Debug` repr (which includes payload-specific `predicates` slices). +#[allow(deprecated)] +fn oracle_short_name(oracle: &Oracle) -> &'static str { + match oracle { + Oracle::SinkProbe { .. } => "SinkProbe", + Oracle::SinkCrash { .. } => "SinkCrash", + Oracle::OutputContains(_) => "OutputContains", + Oracle::Crash => "Crash", + Oracle::OobCallback { .. } => "OobCallback", + Oracle::FileEscape => "FileEscape", + Oracle::ExitStatus(_) => "ExitStatus", + Oracle::StubEvent { .. } => "StubEvent", + } +} + /// Max harness-build attempts before giving up. const MAX_BUILD_ATTEMPTS: u32 = 2; @@ -91,6 +119,13 @@ pub fn run_spec(spec: &HarnessSpec, opts: &SandboxOptions) -> Result Result Result Result>, } /// Phase 17 (Track E.1): selects which subset of the Linux process- @@ -284,6 +292,7 @@ impl Default for SandboxOptions { stub_harness: None, seccomp_caps: 0, process_hardening: ProcessHardeningProfile::Standard, + trace: None, } } } diff --git a/src/dynamic/trace.rs b/src/dynamic/trace.rs new file mode 100644 index 00000000..74e7ae83 --- /dev/null +++ b/src/dynamic/trace.rs @@ -0,0 +1,226 @@ +//! Verify-pipeline trace (Phase 30 — Track C observability). +//! +//! [`VerifyTrace`] is a structured, deterministic record of every stage +//! a single [`crate::dynamic::verify::verify_finding`] call walks +//! through. Two uses: +//! +//! 1. **`--verbose` stderr stream** — when +//! [`crate::dynamic::verify::VerifyOptions::trace_verbose`] is set the +//! verifier prints each event to stderr as it fires. Operators see +//! where a run stalled or which payload triggered without re-running +//! under a debugger. +//! 2. **Repro bundle serialisation** — the trace is emitted into the +//! Phase 28 repro bundle as `expected/trace.jsonl` so a replay knows +//! the canonical sequence its run is expected to mirror. Together +//! with the Phase 27 `events.jsonl` log this gives a forensic +//! "what did the verifier do?" picture that does not require +//! re-running the binary. +//! +//! # Determinism contract +//! +//! `TraceEvent` deliberately omits wall-clock timestamps and durations +//! so two runs of the same finding produce a byte-identical sequence. +//! The Phase 30 acceptance test (`tests/determinism_audit.rs`) runs the +//! verifier 10× on a fixed input and asserts every serialised trace is +//! identical. Elapsed-time annotations are still useful for the +//! stderr printer; they are computed inline at print time from +//! `Instant::now()` and never persisted. + +use serde::{Deserialize, Serialize}; +use std::sync::Mutex; + +/// Distinct stages emitted by the verifier. The names match the Phase +/// 30 spec literal so audit logs grep for `oracle_observed` / +/// `verdict` directly. +/// +/// Serialised as snake_case strings so the on-disk trace reads cleanly +/// in `jq` without a string-versus-enum decoder. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +pub enum TraceStage { + SpecStarted, + SpecDone, + BuildStarted, + BuildDone, + SandboxStarted, + OracleWait, + OracleObserved, + Verdict, +} + +impl TraceStage { + /// Stable label used by the stderr printer. Lowercase, no + /// punctuation, so a CI log scan can grep `^[T] oracle_observed` + /// straightforwardly. + pub fn as_str(&self) -> &'static str { + match self { + Self::SpecStarted => "spec_started", + Self::SpecDone => "spec_done", + Self::BuildStarted => "build_started", + Self::BuildDone => "build_done", + Self::SandboxStarted => "sandbox_started", + Self::OracleWait => "oracle_wait", + Self::OracleObserved => "oracle_observed", + Self::Verdict => "verdict", + } + } +} + +/// One row of a [`VerifyTrace`]. +/// +/// `sequence` is the per-trace ordinal — explicit rather than implicit +/// in `Vec` order because the JSON-lines format on disk lets each line +/// stand alone (operators may sort / filter externally). `detail` is +/// a short, human-friendly free-form note (payload label, build attempt +/// counter, …); kept under 200 chars by callers. +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +pub struct TraceEvent { + pub sequence: u32, + pub stage: TraceStage, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub detail: Option, +} + +/// Ordered record of every stage the verifier walks through. +/// +/// Append via [`VerifyTrace::record`] (thread-safe; protected by an +/// internal `Mutex` so the sandbox/runner thread and the verifier can +/// share the same handle). Read deterministically via +/// [`VerifyTrace::events`]. +#[derive(Debug, Default)] +pub struct VerifyTrace { + inner: Mutex, +} + +#[derive(Debug, Default)] +struct TraceInner { + events: Vec, + next_sequence: u32, +} + +impl VerifyTrace { + /// Fresh, empty trace. Cheap — no allocation until the first event. + pub fn new() -> Self { + Self::default() + } + + /// Append `stage` with optional `detail`. Lock-poisoning is treated + /// as a no-op so a panicking caller does not corrupt downstream + /// traces; the trace is observability, not load-bearing state. + pub fn record(&self, stage: TraceStage, detail: Option) { + let Ok(mut inner) = self.inner.lock() else { + return; + }; + let sequence = inner.next_sequence; + inner.next_sequence = sequence.wrapping_add(1); + inner.events.push(TraceEvent { + sequence, + stage, + detail, + }); + } + + /// Snapshot the recorded events in append order. Clones the vec so + /// the caller can serialise / drain without holding the lock; the + /// allocation is negligible compared to the rest of a verifier run. + pub fn events(&self) -> Vec { + match self.inner.lock() { + Ok(g) => g.events.clone(), + Err(_) => Vec::new(), + } + } + + /// Serialise the trace as a JSON-lines string. Each line is a + /// single [`TraceEvent`] so the file is greppable and tolerant of + /// truncation (any prefix is still valid JSON-lines). + pub fn to_jsonl(&self) -> String { + let events = self.events(); + let mut out = String::with_capacity(events.len() * 80); + for ev in &events { + // `serde_json::to_string` cannot fail for the field types + // here (`u32`, fixed enum, optional `String`). + if let Ok(line) = serde_json::to_string(ev) { + out.push_str(&line); + out.push('\n'); + } + } + out + } + + /// Best-effort stderr print of every recorded event, prefixed with + /// `[T]` so a tail of a verify log can find trace rows quickly. + /// Called when [`crate::dynamic::verify::VerifyOptions::trace_verbose`] + /// is set. Print failures are silently ignored because trace + /// output is observability, not a verdict input. + pub fn print_to_stderr(&self) { + use std::io::Write; + let events = self.events(); + let mut err = std::io::stderr().lock(); + for ev in &events { + let detail = ev.detail.as_deref().unwrap_or(""); + let _ = writeln!(err, "[T] {} {} {}", ev.sequence, ev.stage.as_str(), detail); + } + let _ = err.flush(); + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn record_assigns_monotonic_sequences() { + let t = VerifyTrace::new(); + t.record(TraceStage::SpecStarted, None); + t.record(TraceStage::SpecDone, Some("py.cmdi.os_system".to_owned())); + t.record(TraceStage::Verdict, Some("Confirmed".to_owned())); + let events = t.events(); + assert_eq!(events.len(), 3); + assert_eq!(events[0].sequence, 0); + assert_eq!(events[1].sequence, 1); + assert_eq!(events[2].sequence, 2); + assert_eq!(events[0].stage, TraceStage::SpecStarted); + assert_eq!(events[2].stage, TraceStage::Verdict); + } + + #[test] + fn jsonl_is_deterministic_for_same_sequence() { + let a = VerifyTrace::new(); + a.record(TraceStage::SpecStarted, None); + a.record(TraceStage::Verdict, Some("NotConfirmed".to_owned())); + let b = VerifyTrace::new(); + b.record(TraceStage::SpecStarted, None); + b.record(TraceStage::Verdict, Some("NotConfirmed".to_owned())); + assert_eq!(a.to_jsonl(), b.to_jsonl()); + } + + #[test] + fn jsonl_round_trips_through_serde() { + let t = VerifyTrace::new(); + t.record(TraceStage::SandboxStarted, Some("payload=sqli-tautology".to_owned())); + t.record(TraceStage::OracleObserved, Some("fired=true".to_owned())); + let jsonl = t.to_jsonl(); + let mut parsed = Vec::new(); + for line in jsonl.lines() { + let ev: TraceEvent = serde_json::from_str(line).expect("trace line should parse"); + parsed.push(ev); + } + assert_eq!(parsed.len(), 2); + assert_eq!(parsed[0].stage, TraceStage::SandboxStarted); + assert_eq!(parsed[1].stage, TraceStage::OracleObserved); + } + + #[test] + fn stage_as_str_matches_spec_names() { + // Phase 30 spec literal: the verifier stage names must serialise + // to these exact tokens so audit grep queries stay stable. + assert_eq!(TraceStage::SpecStarted.as_str(), "spec_started"); + assert_eq!(TraceStage::SpecDone.as_str(), "spec_done"); + assert_eq!(TraceStage::BuildStarted.as_str(), "build_started"); + assert_eq!(TraceStage::BuildDone.as_str(), "build_done"); + assert_eq!(TraceStage::SandboxStarted.as_str(), "sandbox_started"); + assert_eq!(TraceStage::OracleWait.as_str(), "oracle_wait"); + assert_eq!(TraceStage::OracleObserved.as_str(), "oracle_observed"); + assert_eq!(TraceStage::Verdict.as_str(), "verdict"); + } +} diff --git a/src/dynamic/verify.rs b/src/dynamic/verify.rs index 4a64d589..3c7e7b0f 100644 --- a/src/dynamic/verify.rs +++ b/src/dynamic/verify.rs @@ -66,6 +66,11 @@ pub struct VerifyOptions { /// event emitted from the verify pipeline. Default `keep_all` so unit /// tests and embedded callers do not silently lose records. pub telemetry_policy: SamplingPolicy, + /// Phase 30 (Track C observability): when `true` the verifier prints + /// every recorded [`crate::dynamic::trace::TraceEvent`] to stderr at + /// end-of-verify. Wired to the future `--verbose` CLI flag; off by + /// default so non-interactive scans stay quiet. + pub trace_verbose: bool, } impl VerifyOptions { @@ -121,6 +126,7 @@ impl VerifyOptions { callgraph: None, refuse_filesystem_confirm, telemetry_policy: SamplingPolicy::from_config(&config.telemetry), + trace_verbose: false, } } } @@ -387,6 +393,61 @@ fn derivation_failure_hint(diag: &Diag) -> String { pub fn verify_finding(diag: &Diag, opts: &VerifyOptions) -> VerifyResult { let finding_id = format!("{:016x}", diag.stable_hash); + // Phase 30 (Track C observability): one trace per finding, threaded + // into [`SandboxOptions`] so the runner can append `build_*` / + // `sandbox_started` / `oracle_*` stages from inside `run_spec`. + let trace = Arc::new(crate::dynamic::trace::VerifyTrace::new()); + trace.record( + crate::dynamic::trace::TraceStage::SpecStarted, + Some(format!("rule={} path={}", diag.id, diag.path)), + ); + + // Phase 30 §C — cross-cutting policy deny rules. Findings whose + // static metadata mentions credentials, private keys, or production + // endpoint regexes are refused up front: the sandbox is never + // started and no payload is materialised, so a leaked secret cannot + // round-trip through the harness even if the deny rule is wrong. + // The verifier returns `Inconclusive(PolicyDeniedDynamic)` so the + // operator sees *why* dynamic execution was skipped without losing + // the static finding from the report. + if let crate::dynamic::policy::PolicyDecision::Deny { rule, excerpt } = + crate::dynamic::policy::evaluate(diag) + { + trace.record( + crate::dynamic::trace::TraceStage::Verdict, + Some(format!("policy_denied rule={rule}")), + ); + if opts.trace_verbose { + trace.print_to_stderr(); + } + let inconclusive_reason = InconclusiveReason::PolicyDeniedDynamic { + rule: rule.to_owned(), + excerpt: excerpt.clone(), + }; + // Emit telemetry so the Phase 27 events log records the deny — + // operators triaging refusals need it on the wire even though + // the sandbox never ran. + let tel_event = TelemetryEvent::no_spec( + diag, + VerifyStatus::Inconclusive, + Some(inconclusive_reason.clone()), + ); + telemetry::emit_with_policy(&tel_event, &opts.telemetry_policy); + return VerifyResult { + finding_id, + status: VerifyStatus::Inconclusive, + triggered_payload: None, + reason: None, + inconclusive_reason: Some(inconclusive_reason), + detail: Some(format!( + "dynamic execution refused by policy rule {rule}" + )), + attempts: vec![], + toolchain_match: None, + differential: None, + }; + } + let spec = match HarnessSpec::from_finding_full( diag, opts.verify_all_confidence, @@ -395,6 +456,13 @@ pub fn verify_finding(diag: &Diag, opts: &VerifyOptions) -> VerifyResult { ) { Ok(s) => s, Err(reason) => { + trace.record( + crate::dynamic::trace::TraceStage::Verdict, + Some(format!("spec_derivation_failed reason={reason:?}")), + ); + if opts.trace_verbose { + trace.print_to_stderr(); + } return spec_derivation_failed_verdict( finding_id, diag, @@ -403,6 +471,13 @@ pub fn verify_finding(diag: &Diag, opts: &VerifyOptions) -> VerifyResult { ); } }; + trace.record( + crate::dynamic::trace::TraceStage::SpecDone, + Some(format!( + "spec_hash={} lang={:?} entry_kind={:?}", + spec.spec_hash, spec.lang, spec.entry_kind + )), + ); // Pre-flight gate: surface a structured `Inconclusive(EntryKindUnsupported)` // up-front when the spec's [`EntryKind`] is not in the lang emitter's @@ -545,6 +620,11 @@ pub fn verify_finding(diag: &Diag, opts: &VerifyOptions) -> VerifyResult { if !stub_harness.is_empty() { sandbox_opts.stub_harness = Some(Arc::clone(&stub_harness)); } + // Phase 30: hand the runner an `Arc` clone so it can append + // `build_*` / `sandbox_started` / `oracle_*` stages from inside + // `run_spec`. The verifier still owns the trace for verdict-stage + // appending after `run_spec` returns. + sandbox_opts.trace = Some(Arc::clone(&trace)); let start = Instant::now(); let result = run_spec(&spec, &sandbox_opts); @@ -589,9 +669,21 @@ pub fn verify_finding(diag: &Diag, opts: &VerifyOptions) -> VerifyResult { ); telemetry::emit_with_policy(&event, &opts.telemetry_policy); + // Phase 30 — verdict is the terminal trace stage. Recorded after + // cache insert + telemetry so the trace reflects the full pipeline + // the operator just saw run. + trace.record( + crate::dynamic::trace::TraceStage::Verdict, + Some(format!("status={:?}", verdict.status)), + ); + if opts.trace_verbose { + trace.print_to_stderr(); + } + verdict } + fn build_verdict( finding_id: &str, spec: &HarnessSpec, diff --git a/src/evidence.rs b/src/evidence.rs index b4e00427..682b2503 100644 --- a/src/evidence.rs +++ b/src/evidence.rs @@ -326,6 +326,19 @@ pub enum InconclusiveReason { backend: String, oracle_kind: String, }, + /// Phase 30 §C — the dynamic policy module refused to execute a + /// finding whose static metadata mentions credentials, private + /// keys, or a production endpoint regex. The second security + /// layer above the existing + /// [`crate::dynamic::policy::Scrubber`] forensic redaction: even a + /// successful confirmation is unsafe to obtain when the payload + /// would have to mention or transmit live secrets. Carries the + /// rule name that fired (`credentials`, `private-key`, + /// `production-endpoint`) and an evidence excerpt for triage. + PolicyDeniedDynamic { + rule: String, + excerpt: String, + }, } impl fmt::Display for InconclusiveReason { @@ -386,6 +399,10 @@ impl fmt::Display for InconclusiveReason { f, "{backend} backend cannot enforce isolation for {oracle_kind} oracle" ), + Self::PolicyDeniedDynamic { rule, excerpt } => write!( + f, + "dynamic execution refused by policy rule {rule} (matched: {excerpt})" + ), } } } diff --git a/src/fmt.rs b/src/fmt.rs index f064f3d7..ca1cf915 100644 --- a/src/fmt.rs +++ b/src/fmt.rs @@ -612,6 +612,9 @@ fn format_inconclusive_reason(r: &crate::evidence::InconclusiveReason) -> String InconclusiveReason::BackendInsufficient { backend, oracle_kind } => { format!("backend {backend} cannot enforce {oracle_kind} oracle") } + InconclusiveReason::PolicyDeniedDynamic { rule, .. } => { + format!("dynamic execution refused by policy ({rule})") + } } } diff --git a/tests/determinism_audit.rs b/tests/determinism_audit.rs new file mode 100644 index 00000000..c86c8666 --- /dev/null +++ b/tests/determinism_audit.rs @@ -0,0 +1,175 @@ +//! Phase 30 (Track C — determinism): run the verifier 10× on the same +//! input and assert byte-identical [`VerifyTrace`] output across runs, +//! plus byte-identical telemetry records once wall-clock fields are +//! stripped. +//! +//! The test deliberately drives the policy-deny short-circuit so it +//! does not depend on a working language toolchain, a sandbox backend, +//! or a populated payload corpus. That path emits exactly the same +//! pipeline events ([`SpecStarted`], [`Verdict`]) every run, and +//! emits a single telemetry record whose only non-deterministic field +//! is the wall-clock `ts` timestamp. Stripping `ts` gives a stable +//! envelope the test can compare directly. + +#![cfg(feature = "dynamic")] + +use nyx_scanner::commands::scan::Diag; +use nyx_scanner::dynamic::telemetry::{self, SamplingPolicy}; +use nyx_scanner::dynamic::verify::{verify_finding, VerifyOptions}; +use nyx_scanner::evidence::{Confidence, Evidence, VerifyStatus}; +use nyx_scanner::patterns::{FindingCategory, Severity}; +use serde_json::Value; +use std::collections::BTreeSet; + +const RUN_COUNT: usize = 10; + +fn deny_diag(stable_hash: u64) -> Diag { + let mut ev = Evidence::default(); + // Triggers the credentials deny rule via the AWS-key regex from + // `crate::utils::redact::contains_secret`. The deny rule fires + // deterministically because the rule lookup table is `const`. + ev.notes = vec!["secret=AKIAFAKEDETERM00000000".to_owned()]; + Diag { + path: "src/handler.py".to_owned(), + line: 42, + col: 0, + severity: Severity::High, + id: "py.cmdi.os_system".to_owned(), + category: FindingCategory::Security, + path_validated: false, + guard_kind: None, + message: None, + labels: vec![], + confidence: Some(Confidence::High), + evidence: Some(ev), + rank_score: None, + rank_reason: None, + suppressed: false, + suppression: None, + rollup: None, + finding_id: String::new(), + alternative_finding_ids: vec![], + stable_hash, + } +} + +/// Strip every non-deterministic field from a parsed telemetry record +/// and re-serialise. Phase 30 acceptance explicitly excludes wall-clock +/// timestamps; `ts` is the only such field today. Future additions +/// belong in this filter so the canonical "what does deterministic +/// telemetry look like?" surface lives in one place. +fn strip_volatile_fields(line: &str) -> String { + let mut value: Value = serde_json::from_str(line).expect("telemetry line should be JSON"); + if let Some(obj) = value.as_object_mut() { + obj.remove("ts"); + // `duration_ms` is zero on the no-sandbox deny path, but strip + // it defensively so the audit stays correct if a future code + // path stamps a non-zero duration before the verdict short- + // circuits. + obj.remove("duration_ms"); + } + serde_json::to_string(&value).expect("re-serialisation cannot fail") +} + +#[test] +fn ten_runs_produce_byte_identical_telemetry_minus_timestamps() { + let tmp = tempfile::TempDir::new().expect("tempdir"); + let log = tmp.path().join("events.jsonl"); + // Pin the telemetry log to the temp file and ensure the + // `NYX_NO_TELEMETRY` opt-out is not set in this process. + unsafe { + std::env::set_var("NYX_TELEMETRY_PATH", &log); + std::env::remove_var("NYX_NO_TELEMETRY"); + } + + let diag = deny_diag(0x0123_4567_89ab_cdef); + + let mut opts = VerifyOptions::default(); + opts.telemetry_policy = SamplingPolicy::keep_all(); + opts.trace_verbose = false; + + let mut verdict_jsons: BTreeSet = BTreeSet::new(); + for _ in 0..RUN_COUNT { + let result = verify_finding(&diag, &opts); + assert_eq!(result.status, VerifyStatus::Inconclusive); + // Drop `differential` and any future timestamped field by + // round-tripping through serde; structural equality is the + // contract. + verdict_jsons.insert( + serde_json::to_string(&result) + .expect("VerifyResult serialises"), + ); + } + assert_eq!( + verdict_jsons.len(), + 1, + "VerifyResult must be byte-identical across {RUN_COUNT} runs, got {} distinct", + verdict_jsons.len() + ); + + // Read the telemetry log; expect RUN_COUNT lines, all identical + // once `ts` is removed. + let parsed = telemetry::read_events(&log).expect("events.jsonl should parse"); + assert_eq!( + parsed.len(), + RUN_COUNT, + "expected {RUN_COUNT} telemetry records, got {}", + parsed.len() + ); + let stripped: BTreeSet = parsed + .iter() + .map(|v| { + // round-trip through string so the strip path matches + // what the on-disk reader does. + let line = serde_json::to_string(v).expect("re-serialise"); + strip_volatile_fields(&line) + }) + .collect(); + assert_eq!( + stripped.len(), + 1, + "telemetry records must be byte-identical (sans ts/duration_ms) across {RUN_COUNT} runs, got {} distinct: {:?}", + stripped.len(), + stripped + ); + + // Cleanup: leave the env var pointing at the (about-to-be-deleted) + // tempdir would poison sibling tests that share this process. + unsafe { + std::env::remove_var("NYX_TELEMETRY_PATH"); + } +} + +#[test] +fn policy_deny_excerpt_is_stable_across_runs() { + // The PolicyDeniedDynamic verdict carries an excerpt scrubbed via + // the blake3-keyed `Scrubber`. blake3 is deterministic, so the + // excerpt should be byte-identical across runs. Independent + // assertion from the telemetry-determinism test because the + // scrubber-hash path is a separate determinism contract worth + // pinning on its own. + let diag = deny_diag(0xfeed_face_0123_4567); + let opts = VerifyOptions::default(); + + let mut excerpts: BTreeSet = BTreeSet::new(); + for _ in 0..RUN_COUNT { + let result = verify_finding(&diag, &opts); + match result + .inconclusive_reason + .expect("expected PolicyDeniedDynamic on deny path") + { + nyx_scanner::evidence::InconclusiveReason::PolicyDeniedDynamic { + excerpt, + .. + } => { + excerpts.insert(excerpt); + } + other => panic!("expected PolicyDeniedDynamic, got {other:?}"), + } + } + assert_eq!( + excerpts.len(), + 1, + "scrubbed excerpt must be deterministic across {RUN_COUNT} runs, got {excerpts:?}" + ); +} diff --git a/tests/policy_deny.rs b/tests/policy_deny.rs new file mode 100644 index 00000000..b0b656a2 --- /dev/null +++ b/tests/policy_deny.rs @@ -0,0 +1,226 @@ +//! Phase 30 (Track C — security): coverage for +//! [`crate::dynamic::policy::evaluate`] deny rules. +//! +//! One test per [`DenyRule`] variant (`credentials`, `private-key`, +//! `production-endpoint`) plus an allow-path assertion and an end-to- +//! end check that [`verify_finding`] short-circuits to +//! [`InconclusiveReason::PolicyDeniedDynamic`] without invoking the +//! sandbox. + +#![cfg(feature = "dynamic")] + +use nyx_scanner::commands::scan::Diag; +use nyx_scanner::dynamic::policy::{self, DenyRule, PolicyDecision}; +use nyx_scanner::dynamic::verify::{verify_finding, VerifyOptions}; +use nyx_scanner::evidence::{ + Confidence, Evidence, FlowStep, FlowStepKind, InconclusiveReason, SpanEvidence, VerifyStatus, +}; +use nyx_scanner::patterns::{FindingCategory, Severity}; + +fn empty_diag() -> Diag { + Diag { + path: "src/app.py".to_owned(), + line: 10, + col: 0, + severity: Severity::High, + id: "py.cmdi.os_system".to_owned(), + category: FindingCategory::Security, + path_validated: false, + guard_kind: None, + message: None, + labels: vec![], + confidence: Some(Confidence::High), + evidence: Some(Evidence::default()), + rank_score: None, + rank_reason: None, + suppressed: false, + suppression: None, + rollup: None, + finding_id: String::new(), + alternative_finding_ids: vec![], + stable_hash: 0xdeadbeefcafebabe, + } +} + +fn flow_step_with_snippet(snippet: &str) -> FlowStep { + FlowStep { + step: 1, + kind: FlowStepKind::Source, + file: "src/app.py".to_owned(), + line: 4, + col: 0, + snippet: Some(snippet.to_owned()), + variable: None, + callee: None, + function: None, + is_cross_file: false, + } +} + +fn span_with_snippet(snippet: &str) -> SpanEvidence { + SpanEvidence { + path: "src/app.py".to_owned(), + line: 4, + col: 0, + kind: "source".to_owned(), + snippet: Some(snippet.to_owned()), + } +} + +#[test] +fn allow_returns_for_diag_without_secrets() { + let diag = empty_diag(); + assert!(matches!(policy::evaluate(&diag), PolicyDecision::Allow)); +} + +#[test] +fn credentials_rule_fires_on_aws_key_in_flow_step_snippet() { + let mut diag = empty_diag(); + let mut ev = Evidence::default(); + ev.flow_steps = vec![flow_step_with_snippet( + "key=AKIAFAKETEST00000000", + )]; + diag.evidence = Some(ev); + match policy::evaluate(&diag) { + PolicyDecision::Deny { rule, excerpt } => { + assert_eq!(rule, DenyRule::CREDENTIALS); + assert!( + !excerpt.contains("AKIAFAKETEST00000000"), + "excerpt must scrub the raw token, got {excerpt:?}" + ); + } + other => panic!("expected Deny(credentials), got {other:?}"), + } +} + +#[test] +fn credentials_rule_fires_on_bearer_header_note() { + let mut diag = empty_diag(); + let mut ev = Evidence::default(); + ev.notes = vec!["Authorization: Bearer sk-test-abc123def456".to_owned()]; + diag.evidence = Some(ev); + let decision = policy::evaluate(&diag); + assert!(decision.is_deny(), "expected Deny, got {decision:?}"); +} + +#[test] +fn private_key_rule_fires_on_pem_block_in_snippet() { + let mut diag = empty_diag(); + let mut ev = Evidence::default(); + ev.source = Some(span_with_snippet( + "-----BEGIN OPENSSH PRIVATE KEY-----", + )); + diag.evidence = Some(ev); + match policy::evaluate(&diag) { + PolicyDecision::Deny { rule, .. } => { + assert_eq!(rule, DenyRule::PRIVATE_KEY); + } + other => panic!("expected Deny(private-key), got {other:?}"), + } +} + +#[test] +fn private_key_rule_fires_on_rsa_pem_in_note() { + let mut diag = empty_diag(); + let mut ev = Evidence::default(); + ev.notes = vec!["-----BEGIN RSA PRIVATE KEY-----".to_owned()]; + diag.evidence = Some(ev); + match policy::evaluate(&diag) { + PolicyDecision::Deny { rule, .. } => { + assert_eq!(rule, DenyRule::PRIVATE_KEY); + } + other => panic!("expected Deny(private-key), got {other:?}"), + } +} + +#[test] +fn production_endpoint_rule_fires_on_path_containing_prod_subdomain() { + let mut diag = empty_diag(); + diag.path = "src/clients/api.prod.example.com_client.py".to_owned(); + let decision = policy::evaluate(&diag); + match decision { + PolicyDecision::Deny { rule, .. } => { + assert_eq!(rule, DenyRule::PRODUCTION_ENDPOINT); + } + other => panic!("expected Deny(production-endpoint), got {other:?}"), + } +} + +#[test] +fn production_endpoint_rule_fires_on_flow_step_callee() { + let mut diag = empty_diag(); + diag.path = "src/app.py".to_owned(); + let mut ev = Evidence::default(); + ev.flow_steps = vec![FlowStep { + step: 1, + kind: FlowStepKind::Call, + file: "src/app.py".to_owned(), + line: 4, + col: 0, + snippet: None, + variable: None, + callee: Some("requests.get(\"https://api-prod.example.com/v1\")".to_owned()), + function: None, + is_cross_file: false, + }]; + diag.evidence = Some(ev); + let decision = policy::evaluate(&diag); + assert!(decision.is_deny(), "expected Deny, got {decision:?}"); +} + +#[test] +fn credentials_rule_fires_before_other_rules() { + // A diag that matches BOTH credentials (regex) and production-endpoint + // (substring) must surface the credentials rule — credentials are + // higher-blast-radius and a leaked token would dwarf an exposed prod + // endpoint name. Order asserted by the policy.evaluate impl. + let mut diag = empty_diag(); + let mut ev = Evidence::default(); + ev.notes = vec![ + "deploying key=AKIAFAKETEST00000000 to api.prod.example.com".to_owned(), + ]; + diag.evidence = Some(ev); + match policy::evaluate(&diag) { + PolicyDecision::Deny { rule, .. } => { + assert_eq!(rule, DenyRule::CREDENTIALS); + } + other => panic!("expected credentials to win, got {other:?}"), + } +} + +#[test] +fn verify_finding_short_circuits_without_sandbox() { + // Route the verifier through the deny path and confirm it returns + // `Inconclusive(PolicyDeniedDynamic)` without ever starting a + // sandbox. The diag deliberately mentions a credential so a real + // run would have built a Python harness; reaching that code would + // touch the filesystem, so the test would fail under the sandbox + // by failing to find python3. Instead we observe an immediate + // verdict. + let mut diag = empty_diag(); + let mut ev = Evidence::default(); + ev.notes = vec!["password=hunter2-supersecret-test".to_owned()]; + diag.evidence = Some(ev); + + let result = verify_finding(&diag, &VerifyOptions::default()); + + assert_eq!(result.status, VerifyStatus::Inconclusive); + let reason = result + .inconclusive_reason + .expect("PolicyDeniedDynamic must populate inconclusive_reason"); + match reason { + InconclusiveReason::PolicyDeniedDynamic { rule, excerpt } => { + assert_eq!(rule, DenyRule::CREDENTIALS); + assert!( + !excerpt.contains("hunter2-supersecret-test"), + "excerpt must scrub the raw secret, got {excerpt:?}" + ); + } + other => panic!("expected PolicyDeniedDynamic, got {other:?}"), + } + assert!( + result.attempts.is_empty(), + "sandbox must not have run; attempts should be empty" + ); + assert!(result.toolchain_match.is_none()); +}