[pitboss] phase 30: Cross-cutting — Determinism audit, VerifyTrace observability, policy.rs deny rules

This commit is contained in:
pitboss 2026-05-15 20:17:07 -05:00
parent b56c19ef64
commit 36c8bf52df
13 changed files with 1421 additions and 0 deletions

View file

@ -76,6 +76,7 @@ pub mod oob;
pub mod oracle;
pub mod policy;
pub mod probe;
pub mod rand;
pub mod repro;
pub mod report;
pub mod runner;
@ -84,6 +85,7 @@ pub mod spec;
pub mod stubs;
pub mod telemetry;
pub mod toolchain;
pub mod trace;
pub mod verify;
pub use report::{VerifyResult, VerifyStatus};

View file

@ -228,6 +228,227 @@ fn hash_token(secret: &str) -> String {
format!("{SCRUB_HASH_PREFIX}{prefix}>")
}
/// Outcome of [`evaluate`].
///
/// Either `Allow` (let the verifier execute the finding) or `Deny` with
/// the rule that fired and an evidence excerpt that triage can quote in
/// the audit log. `Deny` is the second security layer above the
/// per-witness [`Scrubber`]: the scrubber redacts already-captured
/// bytes, while `Deny` short-circuits execution before the sandbox ever
/// loads the payload, so the credential never touches the harness in
/// the first place.
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum PolicyDecision {
/// Finding cleared every deny rule; the verifier may proceed.
Allow,
/// Finding matched a deny rule.
Deny {
/// Stable rule identifier — one of [`DenyRule::CREDENTIALS`],
/// [`DenyRule::PRIVATE_KEY`], [`DenyRule::PRODUCTION_ENDPOINT`].
rule: &'static str,
/// Short text excerpt (max 120 chars, scrubbed via
/// [`Scrubber::scrub_string`]) of the offending field so an
/// operator can identify *why* the deny fired without having to
/// re-derive the match.
excerpt: String,
},
}
impl PolicyDecision {
/// Convenience accessor; lets call sites match on the boolean
/// outcome before unpacking the typed reason.
pub fn is_deny(&self) -> bool {
matches!(self, PolicyDecision::Deny { .. })
}
}
/// Rule-name constants exposed for the
/// [`crate::evidence::InconclusiveReason::PolicyDeniedDynamic`] field
/// and for tests that need to assert *which* deny rule fired. Strings
/// rather than an enum so they read identically in JSON output, audit
/// logs, and the `Display` impl on `InconclusiveReason`.
pub struct DenyRule;
impl DenyRule {
/// Finding mentions a credential-shaped token (AWS key, GitHub /
/// Slack / OpenAI token, `password=` query string, `Bearer`
/// header) — re-uses the project-wide secret regex set via
/// [`crate::utils::redact::contains_secret`].
pub const CREDENTIALS: &'static str = "credentials";
/// Finding mentions a private key (PEM block opener, OpenSSH
/// private key block, base64-shaped key payload).
pub const PRIVATE_KEY: &'static str = "private-key";
/// Finding's path or evidence references a production endpoint
/// (e.g. `api.prod.example.com`, `*.production.*`,
/// `*-prod.amazonaws.com`). Conservative: matched against the
/// short list in [`PROD_ENDPOINT_REGEXES`].
pub const PRODUCTION_ENDPOINT: &'static str = "production-endpoint";
}
/// Substrings that mark a [`DenyRule::PRIVATE_KEY`] hit on their own,
/// independent of the [`crate::utils::redact`] regex set. The redact
/// regex covers the `-----BEGIN ... PRIVATE KEY-----` shape; the
/// literals below add coverage for evidence-snippet excerpts where the
/// trailing newline has been stripped (a common occurrence in CLI
/// output that gets folded into a one-line `notes` entry).
const PRIVATE_KEY_LITERALS: &[&str] = &[
"-----begin rsa private key",
"-----begin openssh private key",
"-----begin ec private key",
"-----begin private key",
"-----begin dsa private key",
"-----begin pgp private key",
"ssh-rsa aaaa",
"ssh-ed25519 aaaa",
];
/// Substrings that mark a [`DenyRule::PRODUCTION_ENDPOINT`] hit.
///
/// Conservative starter set: the regex shapes most security teams ban
/// from a dynamic re-execution sandbox. Matched case-insensitively as
/// a substring of the diag's path / sink callee / flow-step snippets.
///
/// `*.production.*` and `*-prod.*` shapes are folded into a single
/// `".prod"` / `"-prod"` / `"production"` substring set rather than
/// using a full regex engine — the regex shape would be more
/// permissive but at the cost of a dependency the dynamic crate does
/// not currently pull in. The substring set deliberately false-
/// positives on `productionalize` / `reproduction` because both reads
/// of the data deserve a human eye before dynamic execution.
const PROD_ENDPOINT_REGEXES: &[&str] = &[
"api.prod.",
"api-prod.",
".production.",
"-production.",
"-prod.amazonaws.com",
"prod.example.com",
"prod-api.",
"prod-db.",
"prod-cluster.",
];
/// Evaluate `diag` against the cross-cutting security deny list.
///
/// Walks the finding's id, path, message, evidence notes, flow-step
/// snippets, and the `SpanEvidence` snippets for source/sink/guard/
/// sanitizer entries. Each text is fed to three predicates in turn
/// — [`DenyRule::CREDENTIALS`] (via [`crate::utils::redact::contains_secret`]),
/// [`DenyRule::PRIVATE_KEY`] (via [`PRIVATE_KEY_LITERALS`]),
/// [`DenyRule::PRODUCTION_ENDPOINT`] (via [`PROD_ENDPOINT_REGEXES`]).
/// The first match wins and the verifier short-circuits to
/// [`crate::evidence::InconclusiveReason::PolicyDeniedDynamic`].
///
/// Multiple rules matching the same evidence pick private-key first
/// (most precise — PEM blocks also satisfy the credentials regex set,
/// so private-key is checked first to avoid burying the precise label
/// under a generic one), credentials second, production-endpoint
/// third — the ordering surfaces the most actionable rule label given
/// the leak shape.
pub fn evaluate(diag: &crate::commands::scan::Diag) -> PolicyDecision {
let texts = collect_diag_texts(diag);
for text in &texts {
if let Some(hit) = match_text(text) {
return PolicyDecision::Deny {
rule: hit.0,
excerpt: excerpt_with_scrubber(hit.1),
};
}
}
PolicyDecision::Allow
}
fn collect_diag_texts(diag: &crate::commands::scan::Diag) -> Vec<String> {
let mut out: Vec<String> = Vec::new();
if !diag.id.is_empty() {
out.push(diag.id.clone());
}
if !diag.path.is_empty() {
out.push(diag.path.clone());
}
if let Some(msg) = diag.message.as_ref() {
out.push(msg.clone());
}
if let Some(ev) = diag.evidence.as_ref() {
for note in &ev.notes {
out.push(note.clone());
}
if let Some(exp) = ev.explanation.as_ref() {
out.push(exp.clone());
}
for s in [&ev.source, &ev.sink] {
if let Some(span) = s.as_ref() {
out.push(span.path.clone());
if let Some(sn) = span.snippet.as_ref() {
out.push(sn.clone());
}
}
}
for span in ev.guards.iter().chain(ev.sanitizers.iter()) {
if let Some(sn) = span.snippet.as_ref() {
out.push(sn.clone());
}
}
for step in &ev.flow_steps {
if !step.file.is_empty() {
out.push(step.file.clone());
}
if let Some(sn) = step.snippet.as_ref() {
out.push(sn.clone());
}
if let Some(callee) = step.callee.as_ref() {
out.push(callee.clone());
}
}
}
out
}
/// Match a single text against the deny set. Returns
/// `Some((rule_name, matched_text))` on hit, `None` otherwise. Matched
/// text is the original text (not the rule needle) so the excerpt
/// surfaced on the verdict shows the operator *which* field caused the
/// refusal, not just the rule that fired.
fn match_text(text: &str) -> Option<(&'static str, &str)> {
if text.is_empty() {
return None;
}
let lower = text.to_ascii_lowercase();
// Private-key literals checked first: PEM blocks also satisfy the
// generic credentials regex set in [`crate::utils::redact`], so a
// PEM hit would otherwise misclassify as `credentials`. Surfacing
// the more precise rule lets operators triage the leak shape from
// the verdict alone.
if PRIVATE_KEY_LITERALS.iter().any(|n| lower.contains(*n)) {
return Some((DenyRule::PRIVATE_KEY, text));
}
if redact::contains_secret(text.as_bytes()) {
return Some((DenyRule::CREDENTIALS, text));
}
if PROD_ENDPOINT_REGEXES.iter().any(|n| lower.contains(*n)) {
return Some((DenyRule::PRODUCTION_ENDPOINT, text));
}
None
}
/// Build a short excerpt suitable for embedding in a
/// [`crate::evidence::InconclusiveReason::PolicyDeniedDynamic`].
///
/// Routes the text through [`Scrubber::scrub_string`] first so the
/// excerpt itself cannot leak the credential, then truncates to 120
/// `chars` to keep the audit log compact. Truncation walks
/// codepoints (not bytes) because PROD_ENDPOINT hits pass through the
/// scrubber unchanged — a long file-path or snippet with non-ASCII
/// content (e.g. Unicode in a source comment) would otherwise panic
/// the verifier on a mid-codepoint byte slice.
fn excerpt_with_scrubber(text: &str) -> String {
let scrubbed = Scrubber::project_default().scrub_string(text);
let mut indices = scrubbed.char_indices();
match indices.nth(120) {
None => scrubbed,
Some((cut, _)) => format!("{}", &scrubbed[..cut]),
}
}
/// Truncate `bytes` to at most [`PAYLOAD_CAPTURE_LIMIT_BYTES`].
///
/// Head-keeping: the prefix the sink reads first is retained; the tail is

280
src/dynamic/rand.rs Normal file
View file

@ -0,0 +1,280 @@
//! Deterministic seeded RNG for the dynamic layer (Phase 30 — Track C
//! determinism audit).
//!
//! Every randomness source in [`crate::dynamic`] must route through
//! [`SpecRng`] so identical inputs (spec hash + corpus version) produce
//! identical sandbox runs. Non-determinism inside the verifier breaks
//! the Phase 27 `events.jsonl` replay invariant, the Phase 28 repro
//! bundle hermeticity contract, and the Phase 29 per-cell budget gates.
//!
//! The implementation is intentionally minimal:
//!
//! * No external RNG crate — blake3 is the project's hashing primitive
//! and an extra `rand`/`rand_chacha` dep would expand the supply-chain
//! surface for no gain.
//! * Output stream is a SHAKE-style hash chain: every 32-byte block is
//! `blake3(seed || counter_le)`, with the counter incremented after
//! each block. Throughput is dwarfed by sandbox / build cost so any
//! added cycles compared to a CSPRNG do not show up in
//! `benches/dynamic_bench.rs`.
//! * No `Send`/thread-local state — callers thread the [`SpecRng`]
//! explicitly so a fork in control flow always produces a fresh,
//! reproducible substream. Mutation fuzzers can clone the RNG before
//! forking to keep both branches reproducible.
//!
//! # Audit gate
//!
//! `scripts/check_no_unseeded_rand.sh` greps `src/dynamic/` for the
//! banned non-deterministic APIs (`rand::thread_rng`, `OsRng`,
//! `from_entropy`, `getrandom::getrandom`, `Uuid::new_v4`, `fastrand`).
//! Any match exits the script non-zero so CI catches regressions before
//! they land. The seccomp policy file is allowed to mention
//! `"getrandom"` because that string is a syscall name, not a Rust API
//! call; the audit script's regex filters that case out.
use blake3::Hasher;
/// Length of the seed mixed into every block of the RNG stream. 32
/// bytes = full blake3 output width; using anything smaller would lose
/// entropy if a caller passes a longer spec hash.
const SEED_BYTES: usize = 32;
/// Width of a single hash-chain block. Matches blake3's natural output
/// length so we never have to truncate or extend.
const BLOCK_BYTES: usize = 32;
/// Deterministic pseudo-random number generator keyed by a spec hash.
///
/// Construct via [`SpecRng::seeded`] (the standard entry point used by
/// every verifier call site) or [`SpecRng::from_seed_bytes`] (for tests
/// that need to pin the seed independently of a spec).
///
/// The same seed always produces the same byte stream, so any consumer
/// inside [`crate::dynamic`] that needs randomness (mutation fuzzer
/// payload choice, environment variable jitter, stub port jitter, …)
/// gets a reproducible roll without leaking host entropy into the
/// verdict.
#[derive(Debug, Clone)]
pub struct SpecRng {
seed: [u8; SEED_BYTES],
counter: u64,
buf: [u8; BLOCK_BYTES],
buf_pos: usize,
}
impl SpecRng {
/// Seed an RNG from a spec hash hex string.
///
/// The hex prefix is hashed with blake3 to normalise it to 32 bytes
/// — callers may pass the short 16-hex-char spec hash (the form
/// stamped onto [`crate::dynamic::spec::HarnessSpec::spec_hash`])
/// or a longer derivation; both produce a full-width seed.
pub fn seeded(spec_hash: &str) -> Self {
let mut h = Hasher::new();
h.update(b"nyx.dynamic.rand.v1\0");
h.update(spec_hash.as_bytes());
let mut seed = [0u8; SEED_BYTES];
seed.copy_from_slice(h.finalize().as_bytes());
Self::from_seed_bytes(seed)
}
/// Seed from raw bytes. Exposed for tests that need a known seed
/// without round-tripping through a spec hash.
pub fn from_seed_bytes(seed: [u8; SEED_BYTES]) -> Self {
Self {
seed,
counter: 0,
buf: [0u8; BLOCK_BYTES],
buf_pos: BLOCK_BYTES,
}
}
/// Refill the internal buffer with the next block of the hash
/// chain. Called lazily as bytes are consumed.
fn refill(&mut self) {
let mut h = Hasher::new();
h.update(&self.seed);
h.update(&self.counter.to_le_bytes());
let digest = h.finalize();
self.buf.copy_from_slice(digest.as_bytes());
self.counter = self.counter.wrapping_add(1);
self.buf_pos = 0;
}
/// Fill `out` with deterministic pseudo-random bytes.
pub fn fill_bytes(&mut self, out: &mut [u8]) {
let mut written = 0;
while written < out.len() {
if self.buf_pos == BLOCK_BYTES {
self.refill();
}
let take = (out.len() - written).min(BLOCK_BYTES - self.buf_pos);
out[written..written + take]
.copy_from_slice(&self.buf[self.buf_pos..self.buf_pos + take]);
self.buf_pos += take;
written += take;
}
}
/// Draw the next `u64` from the stream. Used by the rejection
/// loop in [`Self::gen_range`].
pub fn next_u64(&mut self) -> u64 {
let mut buf = [0u8; 8];
self.fill_bytes(&mut buf);
u64::from_le_bytes(buf)
}
/// Draw a `u32`. Convenience for callers picking among small
/// alternatives (payload variants, env mutation slots).
pub fn next_u32(&mut self) -> u32 {
(self.next_u64() & 0xFFFF_FFFF) as u32
}
/// Sample a `usize` uniformly in `[0, upper)`. Panics when
/// `upper == 0` because the request is meaningless; callers should
/// guard zero-length slices.
///
/// Uses rejection sampling against the largest multiple of `upper`
/// that fits in a `u64` so the distribution is exactly uniform —
/// modulo-bias would otherwise nudge the corpus picker toward
/// low-indexed payloads.
pub fn gen_range(&mut self, upper: usize) -> usize {
assert!(upper > 0, "SpecRng::gen_range upper bound must be > 0");
let upper_u64 = upper as u64;
let zone = u64::MAX - (u64::MAX % upper_u64);
loop {
let candidate = self.next_u64();
if candidate < zone {
return (candidate % upper_u64) as usize;
}
}
}
/// Pick one element from `slice`. Returns `None` only when the
/// slice is empty so callers can use `?` for empty-corpus paths.
pub fn choose<'a, T>(&mut self, slice: &'a [T]) -> Option<&'a T> {
if slice.is_empty() {
None
} else {
Some(&slice[self.gen_range(slice.len())])
}
}
/// In-place FisherYates shuffle. Useful for the mutation fuzzer
/// when iterating a payload list in a reproducible order without
/// pre-sorting in caller code.
pub fn shuffle<T>(&mut self, slice: &mut [T]) {
for i in (1..slice.len()).rev() {
let j = self.gen_range(i + 1);
slice.swap(i, j);
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn same_seed_produces_same_stream() {
let mut a = SpecRng::seeded("deadbeefcafebabe");
let mut b = SpecRng::seeded("deadbeefcafebabe");
let mut buf_a = [0u8; 64];
let mut buf_b = [0u8; 64];
a.fill_bytes(&mut buf_a);
b.fill_bytes(&mut buf_b);
assert_eq!(buf_a, buf_b);
}
#[test]
fn different_seeds_diverge() {
let mut a = SpecRng::seeded("aaaa");
let mut b = SpecRng::seeded("bbbb");
assert_ne!(a.next_u64(), b.next_u64());
}
#[test]
fn fill_bytes_crosses_block_boundary() {
// 80 > BLOCK_BYTES (32) — exercises the refill loop and proves
// stream continuity across block transitions.
let mut rng = SpecRng::seeded("boundary");
let mut a = vec![0u8; 80];
rng.fill_bytes(&mut a);
let mut rng2 = SpecRng::seeded("boundary");
let mut b1 = vec![0u8; 32];
let mut b2 = vec![0u8; 48];
rng2.fill_bytes(&mut b1);
rng2.fill_bytes(&mut b2);
let mut concat = b1.clone();
concat.extend_from_slice(&b2);
assert_eq!(a, concat);
}
#[test]
fn gen_range_stays_in_bounds() {
let mut rng = SpecRng::seeded("range");
for _ in 0..1000 {
let v = rng.gen_range(7);
assert!(v < 7);
}
}
#[test]
#[should_panic]
fn gen_range_zero_panics() {
let mut rng = SpecRng::seeded("range");
rng.gen_range(0);
}
#[test]
fn choose_empty_returns_none() {
let mut rng = SpecRng::seeded("choose");
let empty: [u32; 0] = [];
assert!(rng.choose(&empty).is_none());
}
#[test]
fn choose_is_reproducible() {
let items = [10u32, 20, 30, 40, 50];
let mut a = SpecRng::seeded("pick");
let mut b = SpecRng::seeded("pick");
for _ in 0..16 {
assert_eq!(a.choose(&items), b.choose(&items));
}
}
#[test]
fn shuffle_is_reproducible() {
let mut v1: Vec<u32> = (0..20).collect();
let mut v2 = v1.clone();
let mut a = SpecRng::seeded("shuffle");
let mut b = SpecRng::seeded("shuffle");
a.shuffle(&mut v1);
b.shuffle(&mut v2);
assert_eq!(v1, v2);
}
#[test]
fn clone_forks_substream_reproducibly() {
// Cloning at any point must produce identical streams from
// both halves — required so a fuzzer fork (try-this-mutation
// vs try-that) is hermetic.
let mut rng = SpecRng::seeded("fork");
rng.next_u32();
let mut a = rng.clone();
let mut b = rng.clone();
let mut buf_a = [0u8; 48];
let mut buf_b = [0u8; 48];
a.fill_bytes(&mut buf_a);
b.fill_bytes(&mut buf_b);
assert_eq!(buf_a, buf_b);
}
#[test]
fn from_seed_bytes_is_deterministic() {
let seed = [7u8; SEED_BYTES];
let mut a = SpecRng::from_seed_bytes(seed);
let mut b = SpecRng::from_seed_bytes(seed);
assert_eq!(a.next_u64(), b.next_u64());
}
}

View file

@ -22,6 +22,7 @@
//! expected/
//! outcome.json (redacted SandboxOutcome)
//! verdict.json
//! trace.jsonl (Phase 30 — VerifyTrace, when attached)
//! reproduce.sh
//! docker_pull.sh (Phase 28 — present when toolchain pinned)
//! README.md
@ -185,6 +186,19 @@ pub fn write(
// expected/verdict.json
write_json(&root.join("expected").join("verdict.json"), verdict)?;
// expected/trace.jsonl — Phase 30 (Track C observability). Records
// the verifier's per-stage timeline so a repro replay can compare
// sandbox runs against the canonical sequence. Omitted when no
// trace was attached to the sandbox options, which keeps direct
// `sandbox::run` callers (parity fixtures, unit tests) free of
// bundle-shape changes.
if let Some(trace) = opts.trace.as_ref() {
fs::write(
root.join("expected").join("trace.jsonl"),
trace.to_jsonl().as_bytes(),
)?;
}
// toolchain.lock (Phase 28 — Track H.3, repro hermeticity)
let lock = build_toolchain_lock(spec, &root)?;
write_json(&root.join("toolchain.lock"), &lock)?;

View file

@ -16,10 +16,38 @@ use crate::dynamic::probe::{ProbeChannel, SinkProbe};
use crate::dynamic::stubs::StubEvent;
use crate::dynamic::sandbox::{self, SandboxBackend, SandboxError, SandboxOptions, SandboxOutcome};
use crate::dynamic::spec::HarnessSpec;
use crate::dynamic::trace::{TraceStage, VerifyTrace};
use crate::evidence::{DifferentialOutcome, DifferentialVerdict};
use crate::symbol::Lang;
use std::sync::Arc;
/// Record a trace event on the caller's [`VerifyTrace`] handle if one
/// was attached to [`SandboxOptions::trace`]. No-op otherwise — keeps
/// every direct `crate::dynamic::sandbox::run` caller (tests, parity
/// fixtures) free of trace boilerplate.
fn trace_record(trace: Option<&Arc<VerifyTrace>>, stage: TraceStage, detail: Option<String>) {
if let Some(t) = trace {
t.record(stage, detail);
}
}
/// Short, stable variant tag used in [`TraceStage::SandboxStarted`]
/// details so a trace line names the oracle without dumping the full
/// `Debug` repr (which includes payload-specific `predicates` slices).
#[allow(deprecated)]
fn oracle_short_name(oracle: &Oracle) -> &'static str {
match oracle {
Oracle::SinkProbe { .. } => "SinkProbe",
Oracle::SinkCrash { .. } => "SinkCrash",
Oracle::OutputContains(_) => "OutputContains",
Oracle::Crash => "Crash",
Oracle::OobCallback { .. } => "OobCallback",
Oracle::FileEscape => "FileEscape",
Oracle::ExitStatus(_) => "ExitStatus",
Oracle::StubEvent { .. } => "StubEvent",
}
}
/// Max harness-build attempts before giving up.
const MAX_BUILD_ATTEMPTS: u32 = 2;
@ -91,6 +119,13 @@ pub fn run_spec(spec: &HarnessSpec, opts: &SandboxOptions) -> Result<RunOutcome,
return Err(RunError::NoPayloadsForCap);
}
let trace_handle = opts.trace.as_ref().cloned();
trace_record(
trace_handle.as_ref(),
TraceStage::BuildStarted,
Some(format!("lang={:?} spec_hash={}", spec.lang, spec.spec_hash)),
);
// Build harness with retry.
const BACKOFF: [u64; 1] = [1];
let mut build_attempts = 0u32;
@ -265,6 +300,12 @@ pub fn run_spec(spec: &HarnessSpec, opts: &SandboxOptions) -> Result<RunOutcome,
}
}
trace_record(
trace_handle.as_ref(),
TraceStage::BuildDone,
Some(format!("attempts={build_attempts}")),
);
let harness_source = harness.source.clone();
let entry_source = harness.entry_source.clone();
@ -317,7 +358,25 @@ pub fn run_spec(spec: &HarnessSpec, opts: &SandboxOptions) -> Result<RunOutcome,
let _ = ch.clear();
}
trace_record(
trace_handle.as_ref(),
TraceStage::SandboxStarted,
Some(format!(
"attempt={i} payload={} oracle={}",
payload.label,
oracle_short_name(&payload.oracle)
)),
);
let mut outcome = sandbox::run(&harness, &effective_bytes, &effective_opts)?;
trace_record(
trace_handle.as_ref(),
TraceStage::OracleWait,
Some(format!(
"attempt={i} exit_code={:?} timed_out={}",
outcome.exit_code, outcome.timed_out
)),
);
// For OOB payloads, check the nonce listener and update the outcome flag.
if let (Some(nonce), Some(listener)) = (&oob_nonce, effective_opts.oob_listener()) {
@ -348,6 +407,13 @@ pub fn run_spec(spec: &HarnessSpec, opts: &SandboxOptions) -> Result<RunOutcome,
&vuln_stub_events,
);
let sink_hit = outcome.sink_hit;
trace_record(
trace_handle.as_ref(),
TraceStage::OracleObserved,
Some(format!(
"attempt={i} fired={vuln_fired} sink_hit={sink_hit}"
)),
);
// Phase 08 §C.4: a process-level crash with no matching sink-site
// Crash probe is an "unrelated abort" (setup code, harness build,

View file

@ -232,6 +232,14 @@ pub struct SandboxOptions {
/// process backend. See [`ProcessHardeningProfile`] for the per-
/// variant primitive matrix.
pub process_hardening: ProcessHardeningProfile,
/// Phase 30 (Track C observability): optional [`VerifyTrace`] handle
/// the runner appends pipeline stages to (`build_started`,
/// `build_done`, `sandbox_started`, `oracle_wait`, `oracle_observed`).
/// `None` keeps the runner silent — sandbox-level callers that do
/// not want a trace pay zero cost. Held as `Arc` so the verifier
/// can clone the same trace across attempt loops in
/// [`crate::dynamic::runner::run_spec`] without copying events.
pub trace: Option<Arc<crate::dynamic::trace::VerifyTrace>>,
}
/// Phase 17 (Track E.1): selects which subset of the Linux process-
@ -284,6 +292,7 @@ impl Default for SandboxOptions {
stub_harness: None,
seccomp_caps: 0,
process_hardening: ProcessHardeningProfile::Standard,
trace: None,
}
}
}

226
src/dynamic/trace.rs Normal file
View file

@ -0,0 +1,226 @@
//! Verify-pipeline trace (Phase 30 — Track C observability).
//!
//! [`VerifyTrace`] is a structured, deterministic record of every stage
//! a single [`crate::dynamic::verify::verify_finding`] call walks
//! through. Two uses:
//!
//! 1. **`--verbose` stderr stream** — when
//! [`crate::dynamic::verify::VerifyOptions::trace_verbose`] is set the
//! verifier prints each event to stderr as it fires. Operators see
//! where a run stalled or which payload triggered without re-running
//! under a debugger.
//! 2. **Repro bundle serialisation** — the trace is emitted into the
//! Phase 28 repro bundle as `expected/trace.jsonl` so a replay knows
//! the canonical sequence its run is expected to mirror. Together
//! with the Phase 27 `events.jsonl` log this gives a forensic
//! "what did the verifier do?" picture that does not require
//! re-running the binary.
//!
//! # Determinism contract
//!
//! `TraceEvent` deliberately omits wall-clock timestamps and durations
//! so two runs of the same finding produce a byte-identical sequence.
//! The Phase 30 acceptance test (`tests/determinism_audit.rs`) runs the
//! verifier 10× on a fixed input and asserts every serialised trace is
//! identical. Elapsed-time annotations are still useful for the
//! stderr printer; they are computed inline at print time from
//! `Instant::now()` and never persisted.
use serde::{Deserialize, Serialize};
use std::sync::Mutex;
/// Distinct stages emitted by the verifier. The names match the Phase
/// 30 spec literal so audit logs grep for `oracle_observed` /
/// `verdict` directly.
///
/// Serialised as snake_case strings so the on-disk trace reads cleanly
/// in `jq` without a string-versus-enum decoder.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum TraceStage {
SpecStarted,
SpecDone,
BuildStarted,
BuildDone,
SandboxStarted,
OracleWait,
OracleObserved,
Verdict,
}
impl TraceStage {
/// Stable label used by the stderr printer. Lowercase, no
/// punctuation, so a CI log scan can grep `^[T] oracle_observed`
/// straightforwardly.
pub fn as_str(&self) -> &'static str {
match self {
Self::SpecStarted => "spec_started",
Self::SpecDone => "spec_done",
Self::BuildStarted => "build_started",
Self::BuildDone => "build_done",
Self::SandboxStarted => "sandbox_started",
Self::OracleWait => "oracle_wait",
Self::OracleObserved => "oracle_observed",
Self::Verdict => "verdict",
}
}
}
/// One row of a [`VerifyTrace`].
///
/// `sequence` is the per-trace ordinal — explicit rather than implicit
/// in `Vec` order because the JSON-lines format on disk lets each line
/// stand alone (operators may sort / filter externally). `detail` is
/// a short, human-friendly free-form note (payload label, build attempt
/// counter, …); kept under 200 chars by callers.
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct TraceEvent {
pub sequence: u32,
pub stage: TraceStage,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub detail: Option<String>,
}
/// Ordered record of every stage the verifier walks through.
///
/// Append via [`VerifyTrace::record`] (thread-safe; protected by an
/// internal `Mutex` so the sandbox/runner thread and the verifier can
/// share the same handle). Read deterministically via
/// [`VerifyTrace::events`].
#[derive(Debug, Default)]
pub struct VerifyTrace {
inner: Mutex<TraceInner>,
}
#[derive(Debug, Default)]
struct TraceInner {
events: Vec<TraceEvent>,
next_sequence: u32,
}
impl VerifyTrace {
/// Fresh, empty trace. Cheap — no allocation until the first event.
pub fn new() -> Self {
Self::default()
}
/// Append `stage` with optional `detail`. Lock-poisoning is treated
/// as a no-op so a panicking caller does not corrupt downstream
/// traces; the trace is observability, not load-bearing state.
pub fn record(&self, stage: TraceStage, detail: Option<String>) {
let Ok(mut inner) = self.inner.lock() else {
return;
};
let sequence = inner.next_sequence;
inner.next_sequence = sequence.wrapping_add(1);
inner.events.push(TraceEvent {
sequence,
stage,
detail,
});
}
/// Snapshot the recorded events in append order. Clones the vec so
/// the caller can serialise / drain without holding the lock; the
/// allocation is negligible compared to the rest of a verifier run.
pub fn events(&self) -> Vec<TraceEvent> {
match self.inner.lock() {
Ok(g) => g.events.clone(),
Err(_) => Vec::new(),
}
}
/// Serialise the trace as a JSON-lines string. Each line is a
/// single [`TraceEvent`] so the file is greppable and tolerant of
/// truncation (any prefix is still valid JSON-lines).
pub fn to_jsonl(&self) -> String {
let events = self.events();
let mut out = String::with_capacity(events.len() * 80);
for ev in &events {
// `serde_json::to_string` cannot fail for the field types
// here (`u32`, fixed enum, optional `String`).
if let Ok(line) = serde_json::to_string(ev) {
out.push_str(&line);
out.push('\n');
}
}
out
}
/// Best-effort stderr print of every recorded event, prefixed with
/// `[T]` so a tail of a verify log can find trace rows quickly.
/// Called when [`crate::dynamic::verify::VerifyOptions::trace_verbose`]
/// is set. Print failures are silently ignored because trace
/// output is observability, not a verdict input.
pub fn print_to_stderr(&self) {
use std::io::Write;
let events = self.events();
let mut err = std::io::stderr().lock();
for ev in &events {
let detail = ev.detail.as_deref().unwrap_or("");
let _ = writeln!(err, "[T] {} {} {}", ev.sequence, ev.stage.as_str(), detail);
}
let _ = err.flush();
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn record_assigns_monotonic_sequences() {
let t = VerifyTrace::new();
t.record(TraceStage::SpecStarted, None);
t.record(TraceStage::SpecDone, Some("py.cmdi.os_system".to_owned()));
t.record(TraceStage::Verdict, Some("Confirmed".to_owned()));
let events = t.events();
assert_eq!(events.len(), 3);
assert_eq!(events[0].sequence, 0);
assert_eq!(events[1].sequence, 1);
assert_eq!(events[2].sequence, 2);
assert_eq!(events[0].stage, TraceStage::SpecStarted);
assert_eq!(events[2].stage, TraceStage::Verdict);
}
#[test]
fn jsonl_is_deterministic_for_same_sequence() {
let a = VerifyTrace::new();
a.record(TraceStage::SpecStarted, None);
a.record(TraceStage::Verdict, Some("NotConfirmed".to_owned()));
let b = VerifyTrace::new();
b.record(TraceStage::SpecStarted, None);
b.record(TraceStage::Verdict, Some("NotConfirmed".to_owned()));
assert_eq!(a.to_jsonl(), b.to_jsonl());
}
#[test]
fn jsonl_round_trips_through_serde() {
let t = VerifyTrace::new();
t.record(TraceStage::SandboxStarted, Some("payload=sqli-tautology".to_owned()));
t.record(TraceStage::OracleObserved, Some("fired=true".to_owned()));
let jsonl = t.to_jsonl();
let mut parsed = Vec::new();
for line in jsonl.lines() {
let ev: TraceEvent = serde_json::from_str(line).expect("trace line should parse");
parsed.push(ev);
}
assert_eq!(parsed.len(), 2);
assert_eq!(parsed[0].stage, TraceStage::SandboxStarted);
assert_eq!(parsed[1].stage, TraceStage::OracleObserved);
}
#[test]
fn stage_as_str_matches_spec_names() {
// Phase 30 spec literal: the verifier stage names must serialise
// to these exact tokens so audit grep queries stay stable.
assert_eq!(TraceStage::SpecStarted.as_str(), "spec_started");
assert_eq!(TraceStage::SpecDone.as_str(), "spec_done");
assert_eq!(TraceStage::BuildStarted.as_str(), "build_started");
assert_eq!(TraceStage::BuildDone.as_str(), "build_done");
assert_eq!(TraceStage::SandboxStarted.as_str(), "sandbox_started");
assert_eq!(TraceStage::OracleWait.as_str(), "oracle_wait");
assert_eq!(TraceStage::OracleObserved.as_str(), "oracle_observed");
assert_eq!(TraceStage::Verdict.as_str(), "verdict");
}
}

View file

@ -66,6 +66,11 @@ pub struct VerifyOptions {
/// event emitted from the verify pipeline. Default `keep_all` so unit
/// tests and embedded callers do not silently lose records.
pub telemetry_policy: SamplingPolicy,
/// Phase 30 (Track C observability): when `true` the verifier prints
/// every recorded [`crate::dynamic::trace::TraceEvent`] to stderr at
/// end-of-verify. Wired to the future `--verbose` CLI flag; off by
/// default so non-interactive scans stay quiet.
pub trace_verbose: bool,
}
impl VerifyOptions {
@ -121,6 +126,7 @@ impl VerifyOptions {
callgraph: None,
refuse_filesystem_confirm,
telemetry_policy: SamplingPolicy::from_config(&config.telemetry),
trace_verbose: false,
}
}
}
@ -387,6 +393,61 @@ fn derivation_failure_hint(diag: &Diag) -> String {
pub fn verify_finding(diag: &Diag, opts: &VerifyOptions) -> VerifyResult {
let finding_id = format!("{:016x}", diag.stable_hash);
// Phase 30 (Track C observability): one trace per finding, threaded
// into [`SandboxOptions`] so the runner can append `build_*` /
// `sandbox_started` / `oracle_*` stages from inside `run_spec`.
let trace = Arc::new(crate::dynamic::trace::VerifyTrace::new());
trace.record(
crate::dynamic::trace::TraceStage::SpecStarted,
Some(format!("rule={} path={}", diag.id, diag.path)),
);
// Phase 30 §C — cross-cutting policy deny rules. Findings whose
// static metadata mentions credentials, private keys, or production
// endpoint regexes are refused up front: the sandbox is never
// started and no payload is materialised, so a leaked secret cannot
// round-trip through the harness even if the deny rule is wrong.
// The verifier returns `Inconclusive(PolicyDeniedDynamic)` so the
// operator sees *why* dynamic execution was skipped without losing
// the static finding from the report.
if let crate::dynamic::policy::PolicyDecision::Deny { rule, excerpt } =
crate::dynamic::policy::evaluate(diag)
{
trace.record(
crate::dynamic::trace::TraceStage::Verdict,
Some(format!("policy_denied rule={rule}")),
);
if opts.trace_verbose {
trace.print_to_stderr();
}
let inconclusive_reason = InconclusiveReason::PolicyDeniedDynamic {
rule: rule.to_owned(),
excerpt: excerpt.clone(),
};
// Emit telemetry so the Phase 27 events log records the deny —
// operators triaging refusals need it on the wire even though
// the sandbox never ran.
let tel_event = TelemetryEvent::no_spec(
diag,
VerifyStatus::Inconclusive,
Some(inconclusive_reason.clone()),
);
telemetry::emit_with_policy(&tel_event, &opts.telemetry_policy);
return VerifyResult {
finding_id,
status: VerifyStatus::Inconclusive,
triggered_payload: None,
reason: None,
inconclusive_reason: Some(inconclusive_reason),
detail: Some(format!(
"dynamic execution refused by policy rule {rule}"
)),
attempts: vec![],
toolchain_match: None,
differential: None,
};
}
let spec = match HarnessSpec::from_finding_full(
diag,
opts.verify_all_confidence,
@ -395,6 +456,13 @@ pub fn verify_finding(diag: &Diag, opts: &VerifyOptions) -> VerifyResult {
) {
Ok(s) => s,
Err(reason) => {
trace.record(
crate::dynamic::trace::TraceStage::Verdict,
Some(format!("spec_derivation_failed reason={reason:?}")),
);
if opts.trace_verbose {
trace.print_to_stderr();
}
return spec_derivation_failed_verdict(
finding_id,
diag,
@ -403,6 +471,13 @@ pub fn verify_finding(diag: &Diag, opts: &VerifyOptions) -> VerifyResult {
);
}
};
trace.record(
crate::dynamic::trace::TraceStage::SpecDone,
Some(format!(
"spec_hash={} lang={:?} entry_kind={:?}",
spec.spec_hash, spec.lang, spec.entry_kind
)),
);
// Pre-flight gate: surface a structured `Inconclusive(EntryKindUnsupported)`
// up-front when the spec's [`EntryKind`] is not in the lang emitter's
@ -545,6 +620,11 @@ pub fn verify_finding(diag: &Diag, opts: &VerifyOptions) -> VerifyResult {
if !stub_harness.is_empty() {
sandbox_opts.stub_harness = Some(Arc::clone(&stub_harness));
}
// Phase 30: hand the runner an `Arc` clone so it can append
// `build_*` / `sandbox_started` / `oracle_*` stages from inside
// `run_spec`. The verifier still owns the trace for verdict-stage
// appending after `run_spec` returns.
sandbox_opts.trace = Some(Arc::clone(&trace));
let start = Instant::now();
let result = run_spec(&spec, &sandbox_opts);
@ -589,9 +669,21 @@ pub fn verify_finding(diag: &Diag, opts: &VerifyOptions) -> VerifyResult {
);
telemetry::emit_with_policy(&event, &opts.telemetry_policy);
// Phase 30 — verdict is the terminal trace stage. Recorded after
// cache insert + telemetry so the trace reflects the full pipeline
// the operator just saw run.
trace.record(
crate::dynamic::trace::TraceStage::Verdict,
Some(format!("status={:?}", verdict.status)),
);
if opts.trace_verbose {
trace.print_to_stderr();
}
verdict
}
fn build_verdict(
finding_id: &str,
spec: &HarnessSpec,