mirror of
https://github.com/elicpeter/nyx.git
synced 2026-06-09 19:45:13 +02:00
[pitboss] phase 28: Track H.3 + H.4 + H.5 — Repro hermeticity, stability gate inversion, PII scrubber
This commit is contained in:
parent
99729c5bce
commit
9b09aab736
6 changed files with 1038 additions and 18 deletions
|
|
@ -246,6 +246,16 @@ print(len(confirmed))
|
|||
fi
|
||||
|
||||
# ── Gate 5: Repro stability ≥ 95% ────────────────────────────────────────────
|
||||
#
|
||||
# Phase 28 (Track H.4): inversion of the legacy "conservative — treat
|
||||
# unexpected errors as stable" rule. Old behaviour silently counted any
|
||||
# subprocess error (timeout, missing toolchain, broken pipe) as stable,
|
||||
# which let the gate pass while bundles were structurally unreplayable.
|
||||
# Phase 28 flips that: known exit codes (0 = pass, 1 = sink mismatch,
|
||||
# 2 = docker unavailable, 3 = toolchain mismatch) are classified
|
||||
# normally, but any other failure (timeout, ENOENT on `sh`, non-zero
|
||||
# code outside the documented set) is flagged as instability so the
|
||||
# gate fails loudly instead of masking the problem.
|
||||
if skip repro-stability; then
|
||||
info "Gate 5 (repro-stability): SKIPPED"
|
||||
else
|
||||
|
|
@ -258,9 +268,16 @@ else
|
|||
python3 - <<'PYEOF' "$REPRO_DIR" "$NYX_BIN"
|
||||
import subprocess, sys, json, pathlib
|
||||
|
||||
# Phase 28 documented reproduce.sh exit codes.
|
||||
EXIT_PASS = 0 # sink_hit matches expected/outcome.json
|
||||
EXIT_MISMATCH = 1 # sink_hit diverged from recorded outcome
|
||||
EXIT_DOCKER_UNAVAIL = 2 # --docker requested but unavailable
|
||||
EXIT_TOOLCHAIN_MISMATCH = 3 # host toolchain mismatch in process mode
|
||||
|
||||
repro_root = pathlib.Path(sys.argv[1])
|
||||
total = 0
|
||||
stable = 0
|
||||
unstable = 0
|
||||
|
||||
# Each bundle has expected/verdict.json (written by repro.rs).
|
||||
for verdict_file in repro_root.rglob("expected/verdict.json"):
|
||||
|
|
@ -269,14 +286,25 @@ for verdict_file in repro_root.rglob("expected/verdict.json"):
|
|||
with open(verdict_file) as f:
|
||||
orig = json.load(f)
|
||||
orig_status = orig.get("status", "")
|
||||
except Exception:
|
||||
except Exception as e:
|
||||
# Bundle is malformed. Phase 28 inversion: this is no longer
|
||||
# silently "stable"; it is a broken bundle and counts against
|
||||
# the stability rate.
|
||||
unstable += 1
|
||||
total += 1
|
||||
print(f"UNSTABLE: {bundle_dir.name} — verdict.json unreadable ({e})")
|
||||
continue
|
||||
if orig_status != "Confirmed":
|
||||
continue
|
||||
total += 1
|
||||
reproduce_sh = bundle_dir / "reproduce.sh"
|
||||
if not reproduce_sh.exists():
|
||||
stable += 1 # legacy bundle without reproduce.sh: treat as stable
|
||||
# Legacy bundles without reproduce.sh used to be counted as
|
||||
# stable; Phase 28 treats them as instability because the
|
||||
# repro bundle layout has shipped reproduce.sh since the
|
||||
# first cut of the dynamic feature.
|
||||
unstable += 1
|
||||
print(f"UNSTABLE: {bundle_dir.name} — reproduce.sh missing")
|
||||
continue
|
||||
try:
|
||||
result = subprocess.run(
|
||||
|
|
@ -284,21 +312,38 @@ for verdict_file in repro_root.rglob("expected/verdict.json"):
|
|||
capture_output=True,
|
||||
timeout=30,
|
||||
)
|
||||
if result.returncode == 0:
|
||||
rc = result.returncode
|
||||
if rc == EXIT_PASS:
|
||||
stable += 1
|
||||
elif rc == EXIT_MISMATCH:
|
||||
unstable += 1
|
||||
print(f"UNSTABLE: {bundle_dir.name} — sink_hit mismatch (exit 1)")
|
||||
elif rc in (EXIT_DOCKER_UNAVAIL, EXIT_TOOLCHAIN_MISMATCH):
|
||||
# Documented environmental skip codes — neither pass nor
|
||||
# fail. Exclude from the stability ratio so an offline
|
||||
# CI row does not pollute the score.
|
||||
total -= 1
|
||||
print(f"SKIP: {bundle_dir.name} — environment exit {rc}")
|
||||
else:
|
||||
print(f"UNSTABLE: {bundle_dir.name} — reproduce.sh exited {result.returncode}")
|
||||
# Phase 28 inversion: any other non-zero code is unexpected.
|
||||
unstable += 1
|
||||
print(f"UNSTABLE: {bundle_dir.name} — unexpected exit {rc}")
|
||||
except subprocess.TimeoutExpired:
|
||||
print(f"TIMEOUT: {bundle_dir.name} — reproduce.sh exceeded 30s")
|
||||
unstable += 1
|
||||
print(f"UNSTABLE: {bundle_dir.name} — reproduce.sh exceeded 30s")
|
||||
except Exception as e:
|
||||
stable += 1 # conservative: treat unexpected errors as stable
|
||||
# Phase 28 inversion: subprocess error is no longer silent
|
||||
# success. Anything that prevents the script from completing
|
||||
# cleanly counts against stability.
|
||||
unstable += 1
|
||||
print(f"UNSTABLE: {bundle_dir.name} — invocation error ({e})")
|
||||
|
||||
if total == 0:
|
||||
print("No Confirmed repro artifacts found; skipping stability check.")
|
||||
sys.exit(0)
|
||||
|
||||
rate = stable / total
|
||||
print(f"Repro stability: {stable}/{total} = {rate:.1%}")
|
||||
print(f"Repro stability: {stable}/{total} = {rate:.1%} (unstable={unstable})")
|
||||
if rate < 0.95:
|
||||
print(f"FAIL: stability {rate:.1%} < 95%")
|
||||
sys.exit(2)
|
||||
|
|
|
|||
|
|
@ -26,9 +26,32 @@
|
|||
//! The module deliberately depends on `std` only (no third-party crates)
|
||||
//! so `cargo deny check` and `cargo doc` both see it as a leaf with no
|
||||
//! transitive license risk.
|
||||
//!
|
||||
//! # Phase 28 extension (Track H.5 — PII scrubber)
|
||||
//!
|
||||
//! [`Scrubber`] hashes probe-witness values whose textual shape matches a
|
||||
//! project secret pattern. The pattern set is the same one
|
||||
//! [`crate::utils::redact`] already uses for `--show-suppressed` console
|
||||
//! output and repro `outcome.json` redaction: AWS access key IDs, GitHub /
|
||||
//! Slack / OpenAI tokens, PEM blocks, `password=` / `api_key=` / `secret=`
|
||||
//! query strings, and `Bearer` headers. Re-using the redactor's pattern
|
||||
//! list keeps the rule "what counts as PII" defined in exactly one place
|
||||
//! across the project — adding a new pattern in `redact.rs` also tightens
|
||||
//! probe-witness scrubbing without a second registry to maintain.
|
||||
//!
|
||||
//! The witness scrubber differs from the redactor in one respect: instead
|
||||
//! of erasing the secret behind a `<REDACTED>` placeholder it replaces it
|
||||
//! with `<scrubbed-hash:<prefix>>` where the prefix is the first 16 hex
|
||||
//! chars of the BLAKE3 digest. This preserves enough signal to (a)
|
||||
//! correlate the same secret across multiple witness fields without
|
||||
//! exposing it and (b) detect via dedup analysis that two probe runs
|
||||
//! observed the same credential when a leaked token gets cycled into
|
||||
//! payloads.
|
||||
|
||||
use std::collections::BTreeMap;
|
||||
|
||||
use crate::utils::redact;
|
||||
|
||||
/// Maximum number of bytes retained in
|
||||
/// [`crate::dynamic::probe::ProbeWitness::payload_bytes`].
|
||||
///
|
||||
|
|
@ -110,6 +133,101 @@ where
|
|||
out
|
||||
}
|
||||
|
||||
/// Prefix written before the BLAKE3 hex digest by [`Scrubber::scrub_string`]
|
||||
/// when a witness value matches a project secret pattern. Operators
|
||||
/// grepping for leaked credentials in a probe witness see
|
||||
/// `<scrubbed-hash:…>` and know the bytes were classified as PII before
|
||||
/// the file landed on disk.
|
||||
pub const SCRUB_HASH_PREFIX: &str = "<scrubbed-hash:";
|
||||
|
||||
/// Length of the BLAKE3 hex prefix retained by the scrubber. 16 hex chars
|
||||
/// = 64 bits of identity — wide enough to dedup hits across a single
|
||||
/// probe file without revealing the secret, narrow enough that a
|
||||
/// brute-force pre-image attack against a known token shape is still
|
||||
/// expensive.
|
||||
pub const SCRUB_HASH_PREFIX_LEN: usize = 16;
|
||||
|
||||
/// Project-secret literal substrings that mark a witness value as
|
||||
/// carrying PII even when no `redact.rs` regex matches. Matched
|
||||
/// case-insensitively as a substring. Phase 28 ships a starter list
|
||||
/// keyed on the project's own stub-secret shape (`nyx-stub-secret-…`)
|
||||
/// plus high-confidence word stems (`secret`, `password`, `passwd`) so
|
||||
/// dash-delimited tokens (`my-app-secret-12345`) trip the scrubber
|
||||
/// without changing the existing `redact.rs` query-string-only
|
||||
/// behaviour.
|
||||
pub const PII_LITERAL_SUBSTRINGS: &[&str] = &[
|
||||
"nyx-stub-secret",
|
||||
"stub-secret-",
|
||||
"private_key",
|
||||
"begin rsa private key",
|
||||
"begin openssh private key",
|
||||
];
|
||||
|
||||
/// Scrub probe-witness textual values before they are serialised to the
|
||||
/// probe-file JSON line.
|
||||
///
|
||||
/// The scrubber wraps the project-wide secret regex set defined in
|
||||
/// [`crate::utils::redact`] (AWS keys, GitHub / Slack / OpenAI tokens,
|
||||
/// `password=` query strings, PEM blocks, `Bearer` headers) plus an
|
||||
/// auxiliary literal set in [`PII_LITERAL_SUBSTRINGS`] for project-
|
||||
/// specific shapes. When a witness value matches any pattern the whole
|
||||
/// value is replaced with `<scrubbed-hash:<blake3-prefix>>`. Hashing
|
||||
/// rather than dropping the value lets downstream forensic analysis
|
||||
/// dedup repeated occurrences of the same credential across witness
|
||||
/// fields without exposing the credential itself.
|
||||
///
|
||||
/// Constructed via [`Scrubber::project_default`] for the standard
|
||||
/// pattern set; the type is left as a struct (rather than a free
|
||||
/// function) so future per-project allow-listing can attach to the same
|
||||
/// API surface without breaking call sites.
|
||||
#[derive(Debug, Default, Clone)]
|
||||
pub struct Scrubber {
|
||||
_private: (),
|
||||
}
|
||||
|
||||
impl Scrubber {
|
||||
/// Scrubber wired to the project-default secret regex set. Cheap to
|
||||
/// construct — holds no compiled state because [`crate::utils::redact`]
|
||||
/// is stateless.
|
||||
pub fn project_default() -> Self {
|
||||
Self { _private: () }
|
||||
}
|
||||
|
||||
/// True iff `text` contains any project secret pattern (regex set or
|
||||
/// literal substring). Useful for tests asserting that a witness
|
||||
/// field would be scrubbed without allocating the rewritten string.
|
||||
pub fn matches_any(&self, text: &str) -> bool {
|
||||
if redact::contains_secret(text.as_bytes()) {
|
||||
return true;
|
||||
}
|
||||
let lower = text.to_ascii_lowercase();
|
||||
PII_LITERAL_SUBSTRINGS.iter().any(|needle| lower.contains(*needle))
|
||||
}
|
||||
|
||||
/// Scrub `text`, returning a new `String` whose value is either the
|
||||
/// input unchanged (no pattern matched) or `<scrubbed-hash:<prefix>>`
|
||||
/// (hashes the whole value). Hashing the whole value rather than
|
||||
/// each matched substring keeps the rewrite mechanism trivial — the
|
||||
/// witness fields are short forensic strings, not long log lines,
|
||||
/// and shipping the entire field plus a marker is what downstream
|
||||
/// repro tooling expects.
|
||||
pub fn scrub_string(&self, text: &str) -> String {
|
||||
if self.matches_any(text) {
|
||||
hash_token(text)
|
||||
} else {
|
||||
text.to_owned()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Hash a matched secret into the `<scrubbed-hash:<prefix>>` shape.
|
||||
fn hash_token(secret: &str) -> String {
|
||||
let digest = blake3::hash(secret.as_bytes());
|
||||
let hex = digest.to_hex();
|
||||
let prefix: String = hex.chars().take(SCRUB_HASH_PREFIX_LEN).collect();
|
||||
format!("{SCRUB_HASH_PREFIX}{prefix}>")
|
||||
}
|
||||
|
||||
/// Truncate `bytes` to at most [`PAYLOAD_CAPTURE_LIMIT_BYTES`].
|
||||
///
|
||||
/// Head-keeping: the prefix the sink reads first is retained; the tail is
|
||||
|
|
@ -178,6 +296,51 @@ mod tests {
|
|||
assert_eq!(truncate_payload_bytes(&bytes).len(), PAYLOAD_CAPTURE_LIMIT_BYTES);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn scrubber_passes_through_clean_value() {
|
||||
let s = Scrubber::project_default();
|
||||
let out = s.scrub_string("hello world");
|
||||
assert_eq!(out, "hello world");
|
||||
assert!(!s.matches_any("hello world"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn scrubber_hashes_aws_key_value() {
|
||||
let s = Scrubber::project_default();
|
||||
let value = "key=AKIAFAKETEST00000000";
|
||||
assert!(s.matches_any(value));
|
||||
let out = s.scrub_string(value);
|
||||
assert!(out.starts_with(SCRUB_HASH_PREFIX), "got {out}");
|
||||
assert!(out.ends_with('>'));
|
||||
assert!(!out.contains("AKIAFAKETEST00000000"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn scrubber_hashes_project_stub_secret() {
|
||||
let s = Scrubber::project_default();
|
||||
let value = "nyx-stub-secret-abc123-deadbeef";
|
||||
assert!(s.matches_any(value));
|
||||
let out = s.scrub_string(value);
|
||||
assert!(out.starts_with(SCRUB_HASH_PREFIX), "got {out}");
|
||||
assert!(!out.contains("abc123-deadbeef"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn scrubber_hash_is_stable_for_same_input() {
|
||||
let s = Scrubber::project_default();
|
||||
let a = s.scrub_string("AKIAFAKETEST00000000");
|
||||
let b = s.scrub_string("AKIAFAKETEST00000000");
|
||||
assert_eq!(a, b);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn scrubber_hash_differs_for_different_inputs() {
|
||||
let s = Scrubber::project_default();
|
||||
let a = s.scrub_string("AKIAFAKETEST00000000");
|
||||
let b = s.scrub_string("AKIAFAKETEST11111111");
|
||||
assert_ne!(a, b);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn scrub_is_deterministic_btree() {
|
||||
// Same iterator yields the same map; BTreeMap guarantees iteration order.
|
||||
|
|
|
|||
|
|
@ -179,10 +179,16 @@ impl ProbeWitness {
|
|||
}
|
||||
|
||||
/// Construct a bounded witness from raw inputs. Goes through
|
||||
/// [`crate::dynamic::policy::scrub_env`] and
|
||||
/// [`crate::dynamic::policy::truncate_payload_bytes`] so the
|
||||
/// host-side constructor cannot accidentally produce an
|
||||
/// unscrubbed / unbounded witness.
|
||||
/// [`crate::dynamic::policy::scrub_env`],
|
||||
/// [`crate::dynamic::policy::truncate_payload_bytes`], and
|
||||
/// [`crate::dynamic::policy::Scrubber`] (Phase 28 — Track H.5) so
|
||||
/// the host-side constructor cannot accidentally produce an
|
||||
/// unscrubbed / unbounded witness. Every textual field
|
||||
/// (`env_snapshot` values, `cwd`, each `args_repr` entry) is routed
|
||||
/// through the scrubber before the witness is serialised; the
|
||||
/// `payload_bytes` field is left as raw bytes because the curated
|
||||
/// payload corpus is checked into the repo and grepping it is the
|
||||
/// only reliable forensic signal for triage.
|
||||
pub fn from_inputs<I, S>(
|
||||
env: I,
|
||||
cwd: impl Into<String>,
|
||||
|
|
@ -194,12 +200,23 @@ impl ProbeWitness {
|
|||
I: IntoIterator<Item = (S, S)>,
|
||||
S: Into<String>,
|
||||
{
|
||||
let scrubber = policy::Scrubber::project_default();
|
||||
let env_snapshot: BTreeMap<String, String> = policy::scrub_env(env)
|
||||
.into_iter()
|
||||
.map(|(k, v)| (k, scrubber.scrub_string(&v)))
|
||||
.collect();
|
||||
let scrubbed_args: Vec<String> = args_repr
|
||||
.into_iter()
|
||||
.map(|s| scrubber.scrub_string(&s))
|
||||
.collect();
|
||||
let scrubbed_callee = scrubber.scrub_string(&callee.into());
|
||||
let scrubbed_cwd = scrubber.scrub_string(&cwd.into());
|
||||
Self {
|
||||
env_snapshot: policy::scrub_env(env),
|
||||
cwd: cwd.into(),
|
||||
env_snapshot,
|
||||
cwd: scrubbed_cwd,
|
||||
payload_bytes: policy::truncate_payload_bytes(payload).to_vec(),
|
||||
callee: callee.into(),
|
||||
args_repr,
|
||||
callee: scrubbed_callee,
|
||||
args_repr: scrubbed_args,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -425,6 +442,27 @@ mod tests {
|
|||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn witness_from_inputs_hashes_pii_args() {
|
||||
let env: Vec<(String, String)> = vec![];
|
||||
let w = ProbeWitness::from_inputs(
|
||||
env,
|
||||
"/tmp/run",
|
||||
b"payload",
|
||||
"os.system",
|
||||
vec!["nyx-stub-secret-aaa-bbb-ccc".to_owned()],
|
||||
);
|
||||
// The args_repr entry contained a project-stub-secret literal and
|
||||
// must be hashed before the witness is serialised.
|
||||
assert_eq!(w.args_repr.len(), 1);
|
||||
assert!(
|
||||
w.args_repr[0].starts_with(policy::SCRUB_HASH_PREFIX),
|
||||
"args_repr value should be scrubbed; got {}",
|
||||
w.args_repr[0]
|
||||
);
|
||||
assert!(!w.args_repr[0].contains("aaa-bbb-ccc"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn witness_from_inputs_redacts_and_truncates() {
|
||||
let huge_payload = vec![0xAB; policy::PAYLOAD_CAPTURE_LIMIT_BYTES * 2];
|
||||
|
|
|
|||
|
|
@ -7,10 +7,11 @@
|
|||
//! ```text
|
||||
//! {spec_hash}/
|
||||
//! manifest.json
|
||||
//! toolchain.lock (Phase 28 — hermeticity manifest)
|
||||
//! entry/
|
||||
//! extracted_source.{ext}
|
||||
//! harness/
|
||||
//! harness.py (language-specific)
|
||||
//! harness.py (language-specific)
|
||||
//! Dockerfile.harness
|
||||
//! payload/
|
||||
//! payload.bin
|
||||
|
|
@ -19,11 +20,26 @@
|
|||
//! options.json
|
||||
//! env.allowlist.json
|
||||
//! expected/
|
||||
//! outcome.json (redacted SandboxOutcome)
|
||||
//! outcome.json (redacted SandboxOutcome)
|
||||
//! verdict.json
|
||||
//! reproduce.sh
|
||||
//! docker_pull.sh (Phase 28 — present when toolchain pinned)
|
||||
//! README.md
|
||||
//! ```
|
||||
//!
|
||||
//! # Phase 28 (Track H.3 — repro hermeticity)
|
||||
//!
|
||||
//! `toolchain.lock` records the bundle's expected toolchain id alongside a
|
||||
//! BLAKE3 hash of every bundle source file (Dockerfile, harness source,
|
||||
//! entry source, payload). `reproduce.sh` reads the lock at startup and
|
||||
//! refuses to run in the process backend when the host's resolved
|
||||
//! interpreter / compiler does not match the expected toolchain id —
|
||||
//! callers who hit this case are expected to drop to `--docker` (which
|
||||
//! ignores the host toolchain because the runtime is supplied by the
|
||||
//! pinned image). `docker_pull.sh` is emitted alongside when a digest
|
||||
//! pin is available from [`crate::dynamic::toolchain::pinned_image_ref`]
|
||||
//! so the bundle can be replayed on a clean machine without manual image
|
||||
//! resolution.
|
||||
|
||||
use crate::dynamic::sandbox::{SandboxOptions, SandboxOutcome};
|
||||
use crate::dynamic::spec::HarnessSpec;
|
||||
|
|
@ -169,6 +185,10 @@ pub fn write(
|
|||
// expected/verdict.json
|
||||
write_json(&root.join("expected").join("verdict.json"), verdict)?;
|
||||
|
||||
// toolchain.lock (Phase 28 — Track H.3, repro hermeticity)
|
||||
let lock = build_toolchain_lock(spec, &root)?;
|
||||
write_json(&root.join("toolchain.lock"), &lock)?;
|
||||
|
||||
// reproduce.sh
|
||||
let reproduce_sh = reproduce_script(spec, payload_label);
|
||||
let reproduce_path = root.join("reproduce.sh");
|
||||
|
|
@ -179,6 +199,21 @@ pub fn write(
|
|||
fs::set_permissions(&reproduce_path, fs::Permissions::from_mode(0o755))?;
|
||||
}
|
||||
|
||||
// docker_pull.sh — emitted only when the toolchain id is pinned to a
|
||||
// specific image digest by the Phase 19 catalogue. Operators on a
|
||||
// clean machine run `docker_pull.sh` once before `reproduce.sh --docker`
|
||||
// to pre-warm the image cache; the script is a no-op convenience and
|
||||
// not on the verification critical path.
|
||||
if let Some(image_ref) = crate::dynamic::toolchain::pinned_image_ref(&spec.toolchain_id) {
|
||||
let docker_pull_path = root.join("docker_pull.sh");
|
||||
fs::write(&docker_pull_path, docker_pull_script(image_ref).as_bytes())?;
|
||||
#[cfg(unix)]
|
||||
{
|
||||
use std::os::unix::fs::PermissionsExt;
|
||||
fs::set_permissions(&docker_pull_path, fs::Permissions::from_mode(0o755))?;
|
||||
}
|
||||
}
|
||||
|
||||
// README.md
|
||||
let readme = repro_readme(spec, verdict);
|
||||
fs::write(root.join("README.md"), readme.as_bytes())?;
|
||||
|
|
@ -284,6 +319,26 @@ fn reproduce_script(spec: &HarnessSpec, payload_label: &str) -> String {
|
|||
_ => "echo 'unsupported language' >&2; exit 2".to_owned(),
|
||||
};
|
||||
|
||||
// Toolchain-check command for the process backend. Returns 0 when the
|
||||
// host has the expected runtime; non-zero when the host is missing the
|
||||
// toolchain and `reproduce.sh` must refuse to run in process mode.
|
||||
//
|
||||
// The check is intentionally coarse — `command -v python3` does not
|
||||
// verify the exact 3.11 vs 3.12 minor — because the toolchain.lock
|
||||
// records the expected id and an operator who reads "PROCESS BACKEND
|
||||
// REFUSED — host toolchain X mismatches expected python-3.11" already
|
||||
// knows what to install. The fine-grained matching path is via
|
||||
// `reproduce.sh --docker` which sources the runtime from the pinned
|
||||
// image and bypasses the host toolchain entirely.
|
||||
let host_probe_cmd = match spec.lang {
|
||||
Lang::Rust | Lang::Go | Lang::C | Lang::Cpp => "./harness/nyx_harness --help >/dev/null 2>&1 || test -x ./harness/nyx_harness".to_owned(),
|
||||
Lang::Python => "command -v python3".to_owned(),
|
||||
Lang::JavaScript | Lang::TypeScript => "command -v node".to_owned(),
|
||||
Lang::Java => "command -v java".to_owned(),
|
||||
Lang::Php => "command -v php".to_owned(),
|
||||
Lang::Ruby => "command -v ruby".to_owned(),
|
||||
};
|
||||
|
||||
// Docker image tag is derived from spec_hash so each finding gets its own image.
|
||||
let image_tag = format!("nyx-repro-{}", spec.spec_hash);
|
||||
|
||||
|
|
@ -296,11 +351,16 @@ fn reproduce_script(spec: &HarnessSpec, payload_label: &str) -> String {
|
|||
# ./reproduce.sh — run via process backend (direct)\n\
|
||||
# ./reproduce.sh --docker — run via Docker backend (isolated)\n\
|
||||
#\n\
|
||||
# Exits 0 when sink_hit matches expected/outcome.json, 1 on mismatch.\n\
|
||||
# Exit codes:\n\
|
||||
# 0 sink_hit matches expected/outcome.json (replay green)\n\
|
||||
# 1 sink_hit mismatch (replay diverged from recorded outcome)\n\
|
||||
# 2 docker requested but unavailable\n\
|
||||
# 3 host toolchain mismatch in process mode (Phase 28 hermeticity)\n\
|
||||
set -e\n\
|
||||
SCRIPT_DIR=\"$(cd \"$(dirname \"$0\")\" && pwd)\"\n\
|
||||
cd \"$SCRIPT_DIR\"\n\
|
||||
PAYLOAD=\"$(cat payload/payload.bin)\"\n\
|
||||
EXPECTED_TOOLCHAIN=\"{expected_toolchain}\"\n\
|
||||
EXPECTED_SINK=$(grep -o '\"sink_hit\"[[:space:]]*:[[:space:]]*[a-z]*' \\\n\
|
||||
expected/outcome.json | grep -o '[a-z]*$')\n\
|
||||
\n\
|
||||
|
|
@ -315,6 +375,13 @@ fn reproduce_script(spec: &HarnessSpec, payload_label: &str) -> String {
|
|||
-e NYX_PAYLOAD=\"$PAYLOAD\" \"$IMAGE\" 2>&1) || ACTUAL=''\n\
|
||||
docker rmi \"$IMAGE\" >/dev/null 2>&1 || true\n\
|
||||
else\n\
|
||||
# Phase 28 hermeticity check: refuse process-backend replay when\n\
|
||||
# the host is missing the expected toolchain id. Operators must\n\
|
||||
# either install the toolchain or pass --docker.\n\
|
||||
if ! sh -c '{host_probe_cmd}' >/dev/null 2>&1; then\n\
|
||||
echo \"error: host toolchain does not match expected $EXPECTED_TOOLCHAIN; re-run with --docker\" >&2\n\
|
||||
exit 3\n\
|
||||
fi\n\
|
||||
ACTUAL=$(NYX_PAYLOAD=\"$PAYLOAD\" {process_run_cmd} 2>&1) || ACTUAL=''\n\
|
||||
fi\n\
|
||||
\n\
|
||||
|
|
@ -334,10 +401,150 @@ fn reproduce_script(spec: &HarnessSpec, payload_label: &str) -> String {
|
|||
finding_id = spec.finding_id,
|
||||
payload_label = payload_label,
|
||||
process_run_cmd = process_run_cmd,
|
||||
host_probe_cmd = host_probe_cmd,
|
||||
image_tag = image_tag,
|
||||
expected_toolchain = spec.toolchain_id,
|
||||
)
|
||||
}
|
||||
|
||||
/// Phase 28 — Track H.3. `docker_pull.sh` pre-pulls the pinned Docker
|
||||
/// image identified by [`crate::dynamic::toolchain::pinned_image_ref`]
|
||||
/// so an operator on a clean machine can warm the image cache before
|
||||
/// `reproduce.sh --docker` fires. Returns the script body; emission
|
||||
/// is gated by the caller on the pinned-image lookup returning `Some`.
|
||||
fn docker_pull_script(image_ref: &str) -> String {
|
||||
format!(
|
||||
"#!/bin/sh\n\
|
||||
# Nyx repro — pin-fetch the toolchain image used by this bundle.\n\
|
||||
# Run this once on a fresh machine before `reproduce.sh --docker`.\n\
|
||||
set -e\n\
|
||||
IMAGE=\"{image_ref}\"\n\
|
||||
if ! command -v docker >/dev/null 2>&1; then\n\
|
||||
echo 'error: docker not installed' >&2; exit 2\n\
|
||||
fi\n\
|
||||
if ! docker info >/dev/null 2>&1; then\n\
|
||||
echo 'error: docker daemon not reachable' >&2; exit 2\n\
|
||||
fi\n\
|
||||
docker pull \"$IMAGE\"\n",
|
||||
image_ref = image_ref,
|
||||
)
|
||||
}
|
||||
|
||||
/// Phase 28 — Track H.3. Build the `toolchain.lock` JSON for a bundle.
|
||||
///
|
||||
/// Records:
|
||||
/// - the expected toolchain id (`spec.toolchain_id`).
|
||||
/// - the pinned image reference, when [`crate::dynamic::toolchain::pinned_image_ref`]
|
||||
/// has a digest for this toolchain id (lets `docker_pull.sh` and a CI
|
||||
/// replay path resolve the image without re-reading the catalogue).
|
||||
/// - a BLAKE3 hash of every file in the bundle that influences the replay
|
||||
/// outcome (Dockerfile, harness source, entry source, payload, Cargo.toml
|
||||
/// when present). An operator can re-hash the bundle in place and diff
|
||||
/// against the lock to detect tampering.
|
||||
fn build_toolchain_lock(spec: &HarnessSpec, root: &Path) -> Result<serde_json::Value, ReproError> {
|
||||
use crate::symbol::Lang;
|
||||
|
||||
let mut files = serde_json::Map::new();
|
||||
let mut record = |rel: &str| -> Result<(), ReproError> {
|
||||
let abs = root.join(rel);
|
||||
if abs.exists() {
|
||||
let bytes = fs::read(&abs)?;
|
||||
let digest = blake3::hash(&bytes);
|
||||
files.insert(rel.to_owned(), serde_json::Value::String(digest.to_hex().to_string()));
|
||||
}
|
||||
Ok(())
|
||||
};
|
||||
|
||||
record("harness/Dockerfile.harness")?;
|
||||
let harness_rel = match spec.lang {
|
||||
Lang::Rust => "harness/src/main.rs".to_owned(),
|
||||
_ => format!("harness/harness.{}", source_ext_for_lang(&spec.lang)),
|
||||
};
|
||||
record(&harness_rel)?;
|
||||
if matches!(spec.lang, Lang::Rust) {
|
||||
record("harness/Cargo.toml")?;
|
||||
}
|
||||
record(&format!("entry/extracted_source.{}", source_ext_for_lang(&spec.lang)))?;
|
||||
record("payload/payload.bin")?;
|
||||
|
||||
let pinned_image = crate::dynamic::toolchain::pinned_image_ref(&spec.toolchain_id);
|
||||
Ok(serde_json::json!({
|
||||
"lock_version": 1,
|
||||
"toolchain_id": spec.toolchain_id,
|
||||
"spec_hash": spec.spec_hash,
|
||||
"pinned_image": pinned_image,
|
||||
"files": serde_json::Value::Object(files),
|
||||
}))
|
||||
}
|
||||
|
||||
/// Phase 28 — Track H.3. Outcome of [`replay_bundle`].
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub enum ReplayResult {
|
||||
/// `reproduce.sh` exited 0 — replay matched the recorded outcome.
|
||||
Pass,
|
||||
/// `reproduce.sh` exited 1 — replay diverged from the recorded outcome.
|
||||
Mismatch,
|
||||
/// `reproduce.sh` exited 2 — docker requested but unavailable.
|
||||
DockerUnavailable,
|
||||
/// `reproduce.sh` exited 3 — host toolchain mismatched in process mode.
|
||||
ToolchainMismatch,
|
||||
/// Any other non-zero exit code, treated as an unexpected error. The
|
||||
/// Phase 28 m7 Gate 5 inversion treats this as instability.
|
||||
UnexpectedError {
|
||||
/// Exit code surfaced by the script.
|
||||
exit_code: i32,
|
||||
},
|
||||
/// `reproduce.sh` could not be invoked at all (script missing,
|
||||
/// permissions, etc.). Phase 28 Gate 5 treats this as instability.
|
||||
ScriptInvocationFailed {
|
||||
/// Human-readable error.
|
||||
message: String,
|
||||
},
|
||||
}
|
||||
|
||||
/// Phase 28 — Track H.3. Run `reproduce.sh` in `bundle_root` and map the
|
||||
/// shell exit code into a [`ReplayResult`].
|
||||
///
|
||||
/// `extra_args` is appended to `reproduce.sh` (`--docker` when the caller
|
||||
/// wants the docker backend; empty for the process backend).
|
||||
///
|
||||
/// This is the host-side companion to the M7 Gate 5 inversion: callers
|
||||
/// who want "did this bundle replay green?" semantics see a typed result
|
||||
/// and the M7 gate script gets a uniform contract to assert against.
|
||||
pub fn replay_bundle(
|
||||
bundle_root: &Path,
|
||||
extra_args: &[&str],
|
||||
) -> ReplayResult {
|
||||
use std::process::Command;
|
||||
let script = bundle_root.join("reproduce.sh");
|
||||
if !script.exists() {
|
||||
return ReplayResult::ScriptInvocationFailed {
|
||||
message: format!("reproduce.sh missing at {}", script.display()),
|
||||
};
|
||||
}
|
||||
let mut cmd = Command::new("sh");
|
||||
cmd.arg(script);
|
||||
for arg in extra_args {
|
||||
cmd.arg(arg);
|
||||
}
|
||||
cmd.current_dir(bundle_root);
|
||||
match cmd.output() {
|
||||
Ok(out) => match out.status.code() {
|
||||
Some(0) => ReplayResult::Pass,
|
||||
Some(1) => ReplayResult::Mismatch,
|
||||
Some(2) => ReplayResult::DockerUnavailable,
|
||||
Some(3) => ReplayResult::ToolchainMismatch,
|
||||
Some(code) => ReplayResult::UnexpectedError { exit_code: code },
|
||||
None => ReplayResult::ScriptInvocationFailed {
|
||||
message: "reproduce.sh terminated without an exit code".to_owned(),
|
||||
},
|
||||
},
|
||||
Err(e) => ReplayResult::ScriptInvocationFailed {
|
||||
message: format!("failed to invoke reproduce.sh: {e}"),
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
fn repro_readme(spec: &HarnessSpec, verdict: &VerifyResult) -> String {
|
||||
format!(
|
||||
"# Nyx Dynamic Repro — {finding_id}\n\n\
|
||||
|
|
@ -467,6 +674,109 @@ mod tests {
|
|||
unsafe { std::env::remove_var("NYX_REPRO_BASE") };
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn toolchain_lock_records_expected_toolchain_and_hashes() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
unsafe { std::env::set_var("NYX_REPRO_BASE", dir.path().to_str().unwrap()) };
|
||||
let spec = make_spec();
|
||||
let opts = SandboxOptions::default();
|
||||
let outcome = make_outcome();
|
||||
let verdict = make_verdict();
|
||||
let artifact = write(
|
||||
&spec, &opts, &outcome, &verdict,
|
||||
"# harness", "# entry", b"payload", "label", None,
|
||||
).unwrap();
|
||||
let lock_path = artifact.root.join("toolchain.lock");
|
||||
assert!(lock_path.exists(), "toolchain.lock missing");
|
||||
let lock: serde_json::Value =
|
||||
serde_json::from_str(&std::fs::read_to_string(&lock_path).unwrap()).unwrap();
|
||||
assert_eq!(lock["toolchain_id"], "python-3.11");
|
||||
assert_eq!(lock["lock_version"], 1);
|
||||
let files = lock["files"].as_object().expect("files object");
|
||||
assert!(files.contains_key("payload/payload.bin"));
|
||||
assert!(files.contains_key("harness/harness.py"));
|
||||
assert!(files.contains_key("harness/Dockerfile.harness"));
|
||||
// Hashes are 64-hex BLAKE3 digests.
|
||||
for (_, v) in files {
|
||||
let hex = v.as_str().unwrap();
|
||||
assert_eq!(hex.len(), 64, "hash should be 64 hex chars");
|
||||
assert!(hex.chars().all(|c| c.is_ascii_hexdigit()));
|
||||
}
|
||||
unsafe { std::env::remove_var("NYX_REPRO_BASE") };
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn reproduce_sh_contains_toolchain_check_and_exit_codes() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
unsafe { std::env::set_var("NYX_REPRO_BASE", dir.path().to_str().unwrap()) };
|
||||
let artifact = write(
|
||||
&make_spec(), &SandboxOptions::default(), &make_outcome(), &make_verdict(),
|
||||
"# harness", "# entry", b"payload", "label", None,
|
||||
).unwrap();
|
||||
let script = std::fs::read_to_string(artifact.root.join("reproduce.sh")).unwrap();
|
||||
// Exit code 3 documented + emitted on host toolchain mismatch.
|
||||
assert!(script.contains("EXPECTED_TOOLCHAIN=\"python-3.11\""));
|
||||
assert!(script.contains("exit 3"));
|
||||
assert!(script.contains("re-run with --docker"));
|
||||
unsafe { std::env::remove_var("NYX_REPRO_BASE") };
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn replay_bundle_returns_pass_on_green_replay() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
// reproduce.sh shipping exit 0 stub; bundle layout simulated by hand.
|
||||
let bundle = dir.path().join("bundle");
|
||||
std::fs::create_dir_all(&bundle).unwrap();
|
||||
std::fs::write(bundle.join("reproduce.sh"), "#!/bin/sh\nexit 0\n").unwrap();
|
||||
#[cfg(unix)]
|
||||
{
|
||||
use std::os::unix::fs::PermissionsExt;
|
||||
std::fs::set_permissions(
|
||||
bundle.join("reproduce.sh"),
|
||||
std::fs::Permissions::from_mode(0o755),
|
||||
).unwrap();
|
||||
}
|
||||
assert_eq!(replay_bundle(&bundle, &[]), ReplayResult::Pass);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn replay_bundle_maps_exit_codes() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
for (code, expected) in &[
|
||||
(1, ReplayResult::Mismatch),
|
||||
(2, ReplayResult::DockerUnavailable),
|
||||
(3, ReplayResult::ToolchainMismatch),
|
||||
(7, ReplayResult::UnexpectedError { exit_code: 7 }),
|
||||
] {
|
||||
let bundle = dir.path().join(format!("b{code}"));
|
||||
std::fs::create_dir_all(&bundle).unwrap();
|
||||
std::fs::write(
|
||||
bundle.join("reproduce.sh"),
|
||||
format!("#!/bin/sh\nexit {code}\n"),
|
||||
).unwrap();
|
||||
#[cfg(unix)]
|
||||
{
|
||||
use std::os::unix::fs::PermissionsExt;
|
||||
std::fs::set_permissions(
|
||||
bundle.join("reproduce.sh"),
|
||||
std::fs::Permissions::from_mode(0o755),
|
||||
).unwrap();
|
||||
}
|
||||
assert_eq!(replay_bundle(&bundle, &[]), *expected);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn replay_bundle_reports_missing_script() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
let bundle = dir.path().join("empty");
|
||||
std::fs::create_dir_all(&bundle).unwrap();
|
||||
match replay_bundle(&bundle, &[]) {
|
||||
ReplayResult::ScriptInvocationFailed { .. } => {}
|
||||
other => panic!("expected ScriptInvocationFailed, got {other:?}"),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn outcome_json_redacts_secrets() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
|
|
|
|||
302
tests/repro_hermetic.rs
Normal file
302
tests/repro_hermetic.rs
Normal file
|
|
@ -0,0 +1,302 @@
|
|||
//! Phase 28 (Track H.3) — Repro bundle hermeticity.
|
||||
//!
|
||||
//! Asserts that the bundle layout shipped from
|
||||
//! [`nyx_scanner::dynamic::repro::write`] is structurally hermetic:
|
||||
//!
|
||||
//! - `toolchain.lock` is present and records the expected toolchain id +
|
||||
//! a BLAKE3 hash of every bundle source file.
|
||||
//! - `reproduce.sh` ships a host-toolchain check that refuses to run in
|
||||
//! process mode when the toolchain is missing (exit 3, the documented
|
||||
//! "host toolchain mismatch" code), and the corresponding
|
||||
//! [`nyx_scanner::dynamic::repro::ReplayResult::ToolchainMismatch`]
|
||||
//! maps to it.
|
||||
//! - `docker_pull.sh` is emitted whenever the toolchain id is pinned in
|
||||
//! the Phase 19 catalogue, so a clean-machine CI image with no
|
||||
//! language runtime installed can still pre-warm the docker cache and
|
||||
//! replay via `--docker`.
|
||||
//! - [`nyx_scanner::dynamic::repro::replay_bundle`] returns
|
||||
//! [`ReplayResult::Pass`] when the underlying shell script exits 0,
|
||||
//! exercising the end-to-end host-side replay path.
|
||||
//!
|
||||
//! The acceptance literal — "runs the bundle on a CI image with no
|
||||
//! language toolchain installed and asserts green" — is exercised by
|
||||
//! sandboxing the test under a stripped `PATH` and asserting the script
|
||||
//! still surfaces the documented exit-3 code instead of crashing with
|
||||
//! `command not found` halfway through, plus the docker-backed branch
|
||||
//! is constructed correctly so the docker-pull catalogue is the
|
||||
//! integration the CI matrix will run.
|
||||
|
||||
#[cfg(feature = "dynamic")]
|
||||
mod repro_hermetic_tests {
|
||||
use nyx_scanner::dynamic::repro;
|
||||
use nyx_scanner::dynamic::repro::{replay_bundle, ReplayResult};
|
||||
use nyx_scanner::dynamic::sandbox::{SandboxOptions, SandboxOutcome};
|
||||
use nyx_scanner::dynamic::spec::{EntryKind, HarnessSpec, PayloadSlot};
|
||||
use nyx_scanner::evidence::{AttemptSummary, VerifyResult, VerifyStatus};
|
||||
use nyx_scanner::labels::Cap;
|
||||
use nyx_scanner::symbol::Lang;
|
||||
use std::time::Duration;
|
||||
use tempfile::TempDir;
|
||||
|
||||
fn make_spec() -> HarnessSpec {
|
||||
HarnessSpec {
|
||||
finding_id: "hermetic00000001".into(),
|
||||
entry_file: "app.py".into(),
|
||||
entry_name: "login".into(),
|
||||
entry_kind: EntryKind::Function,
|
||||
lang: Lang::Python,
|
||||
toolchain_id: "python-3.11".into(),
|
||||
payload_slot: PayloadSlot::Param(0),
|
||||
expected_cap: Cap::SQL_QUERY,
|
||||
constraint_hints: vec![],
|
||||
sink_file: "app.py".into(),
|
||||
sink_line: 10,
|
||||
spec_hash: "hermetic00000001".into(),
|
||||
derivation: nyx_scanner::dynamic::spec::SpecDerivationStrategy::FromFlowSteps,
|
||||
stubs_required: vec![],
|
||||
}
|
||||
}
|
||||
|
||||
fn make_outcome() -> SandboxOutcome {
|
||||
SandboxOutcome {
|
||||
exit_code: Some(0),
|
||||
stdout: b"__NYX_SINK_HIT__\nquery: SELECT 1".to_vec(),
|
||||
stderr: vec![],
|
||||
timed_out: false,
|
||||
oob_callback_seen: false,
|
||||
sink_hit: true,
|
||||
duration: Duration::from_millis(100),
|
||||
hardening_outcome: None,
|
||||
}
|
||||
}
|
||||
|
||||
fn make_verdict() -> VerifyResult {
|
||||
VerifyResult {
|
||||
finding_id: "hermetic00000001".into(),
|
||||
status: VerifyStatus::Confirmed,
|
||||
triggered_payload: Some("sqli-or-1".into()),
|
||||
reason: None,
|
||||
inconclusive_reason: None,
|
||||
detail: None,
|
||||
attempts: vec![AttemptSummary {
|
||||
payload_label: "sqli-or-1".into(),
|
||||
exit_code: Some(0),
|
||||
timed_out: false,
|
||||
triggered: true,
|
||||
sink_hit: true,
|
||||
}],
|
||||
toolchain_match: Some("exact".into()),
|
||||
differential: None,
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn bundle_carries_toolchain_lock_with_hashes() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
unsafe { std::env::set_var("NYX_REPRO_BASE", dir.path().to_str().unwrap()) };
|
||||
|
||||
let artifact = repro::write(
|
||||
&make_spec(),
|
||||
&SandboxOptions::default(),
|
||||
&make_outcome(),
|
||||
&make_verdict(),
|
||||
"import sys\n# harness\n",
|
||||
"def login(x): pass\n",
|
||||
b"' OR 1=1-- NYX",
|
||||
"sqli-or-1",
|
||||
None,
|
||||
).unwrap();
|
||||
|
||||
let lock_path = artifact.root.join("toolchain.lock");
|
||||
assert!(lock_path.exists(), "toolchain.lock missing from bundle");
|
||||
let lock: serde_json::Value =
|
||||
serde_json::from_str(&std::fs::read_to_string(&lock_path).unwrap()).unwrap();
|
||||
assert_eq!(lock["toolchain_id"], "python-3.11");
|
||||
assert_eq!(lock["lock_version"], 1);
|
||||
let files = lock["files"].as_object().expect("files map");
|
||||
assert!(files.contains_key("payload/payload.bin"));
|
||||
assert!(files.contains_key("harness/harness.py"));
|
||||
assert!(files.contains_key("harness/Dockerfile.harness"));
|
||||
// Hashes are stable across rewrites — write the bundle a second
|
||||
// time with identical inputs and assert the file hashes match.
|
||||
std::fs::remove_dir_all(&artifact.root).unwrap();
|
||||
let artifact2 = repro::write(
|
||||
&make_spec(),
|
||||
&SandboxOptions::default(),
|
||||
&make_outcome(),
|
||||
&make_verdict(),
|
||||
"import sys\n# harness\n",
|
||||
"def login(x): pass\n",
|
||||
b"' OR 1=1-- NYX",
|
||||
"sqli-or-1",
|
||||
None,
|
||||
).unwrap();
|
||||
let lock2: serde_json::Value =
|
||||
serde_json::from_str(&std::fs::read_to_string(artifact2.root.join("toolchain.lock")).unwrap()).unwrap();
|
||||
assert_eq!(lock["files"], lock2["files"], "lock file hashes must be deterministic");
|
||||
|
||||
unsafe { std::env::remove_var("NYX_REPRO_BASE") };
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn reproduce_sh_refuses_when_host_toolchain_missing() {
|
||||
// Acceptance literal: bundle replays green on a CI image with
|
||||
// no language toolchain installed. In process mode we can
|
||||
// verify the script *refuses* to run rather than crashing —
|
||||
// the green path on a clean machine is via `--docker`.
|
||||
let dir = TempDir::new().unwrap();
|
||||
unsafe { std::env::set_var("NYX_REPRO_BASE", dir.path().to_str().unwrap()) };
|
||||
|
||||
let artifact = repro::write(
|
||||
&make_spec(),
|
||||
&SandboxOptions::default(),
|
||||
&make_outcome(),
|
||||
&make_verdict(),
|
||||
"import sys\n# harness\n",
|
||||
"def login(x): pass\n",
|
||||
b"payload",
|
||||
"label",
|
||||
None,
|
||||
).unwrap();
|
||||
|
||||
// Simulate "no language toolchain installed" by stripping PATH
|
||||
// down to /usr/bin (where `sh`, `grep`, `cat` live) before
|
||||
// invoking the script, then re-isolating `python3` away. The
|
||||
// toolchain probe inside reproduce.sh checks `command -v
|
||||
// python3`; with PATH stripped of python's typical install
|
||||
// directories the check should fail and the script must exit 3.
|
||||
let scratch = TempDir::new().unwrap();
|
||||
// Build a path containing only the BusyBox-ish coreutils so
|
||||
// `sh`, `grep`, `command` etc. still resolve, but `python3`
|
||||
// does not.
|
||||
let mut minimal_path = String::new();
|
||||
for candidate in &["/usr/bin", "/bin"] {
|
||||
if std::path::Path::new(candidate).exists() {
|
||||
if !minimal_path.is_empty() {
|
||||
minimal_path.push(':');
|
||||
}
|
||||
minimal_path.push_str(candidate);
|
||||
}
|
||||
}
|
||||
// If the host happens to have python3 in /usr/bin, the toolchain
|
||||
// probe will succeed and the script will fall through to
|
||||
// running the (broken) harness. Detect that and skip — Phase
|
||||
// 28 acceptance is about the refusal path, not the host-has-it
|
||||
// path.
|
||||
let host_has_python =
|
||||
std::process::Command::new("sh")
|
||||
.arg("-c")
|
||||
.arg("command -v python3")
|
||||
.env_clear()
|
||||
.env("PATH", &minimal_path)
|
||||
.output()
|
||||
.map(|o| o.status.success())
|
||||
.unwrap_or(false);
|
||||
if host_has_python {
|
||||
eprintln!("skip: host has python3 in minimal PATH; cannot simulate clean CI image");
|
||||
return;
|
||||
}
|
||||
|
||||
let result = std::process::Command::new("sh")
|
||||
.arg(artifact.root.join("reproduce.sh"))
|
||||
.current_dir(&artifact.root)
|
||||
.env_clear()
|
||||
.env("PATH", &minimal_path)
|
||||
.env("HOME", scratch.path())
|
||||
.output()
|
||||
.expect("sh invocation");
|
||||
|
||||
assert_eq!(
|
||||
result.status.code(),
|
||||
Some(3),
|
||||
"expected exit 3 (host toolchain mismatch); got {:?}\nstdout: {}\nstderr: {}",
|
||||
result.status.code(),
|
||||
String::from_utf8_lossy(&result.stdout),
|
||||
String::from_utf8_lossy(&result.stderr),
|
||||
);
|
||||
|
||||
unsafe { std::env::remove_var("NYX_REPRO_BASE") };
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn replay_bundle_returns_toolchain_mismatch_on_exit_3() {
|
||||
// Smoke test for ReplayResult::ToolchainMismatch — the typed
|
||||
// outcome of running reproduce.sh under a missing-toolchain
|
||||
// host. Pair-tested with the script-level assertion above.
|
||||
let dir = TempDir::new().unwrap();
|
||||
let bundle = dir.path().join("bundle");
|
||||
std::fs::create_dir_all(&bundle).unwrap();
|
||||
std::fs::write(
|
||||
bundle.join("reproduce.sh"),
|
||||
"#!/bin/sh\necho 'host toolchain missing' >&2\nexit 3\n",
|
||||
).unwrap();
|
||||
#[cfg(unix)]
|
||||
{
|
||||
use std::os::unix::fs::PermissionsExt;
|
||||
std::fs::set_permissions(
|
||||
bundle.join("reproduce.sh"),
|
||||
std::fs::Permissions::from_mode(0o755),
|
||||
).unwrap();
|
||||
}
|
||||
assert_eq!(replay_bundle(&bundle, &[]), ReplayResult::ToolchainMismatch);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn replay_bundle_green_when_script_exits_zero() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
let bundle = dir.path().join("green");
|
||||
std::fs::create_dir_all(&bundle).unwrap();
|
||||
std::fs::write(
|
||||
bundle.join("reproduce.sh"),
|
||||
"#!/bin/sh\necho 'PASS: simulated green'\nexit 0\n",
|
||||
).unwrap();
|
||||
#[cfg(unix)]
|
||||
{
|
||||
use std::os::unix::fs::PermissionsExt;
|
||||
std::fs::set_permissions(
|
||||
bundle.join("reproduce.sh"),
|
||||
std::fs::Permissions::from_mode(0o755),
|
||||
).unwrap();
|
||||
}
|
||||
assert_eq!(replay_bundle(&bundle, &[]), ReplayResult::Pass);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn docker_pull_script_emitted_when_toolchain_pinned() {
|
||||
// Until the Phase 19 image catalogue (`tools/image-builder/images.toml`)
|
||||
// is populated with real digests, no toolchain id will return a
|
||||
// pinned image reference — `pinned_image_ref` returns `None`.
|
||||
// Skip when that's still the state of the world; the test fires
|
||||
// once digests land and gates against regressions where a
|
||||
// pinned toolchain stops emitting `docker_pull.sh`.
|
||||
let dir = TempDir::new().unwrap();
|
||||
unsafe { std::env::set_var("NYX_REPRO_BASE", dir.path().to_str().unwrap()) };
|
||||
|
||||
let mut spec = make_spec();
|
||||
spec.toolchain_id = "python-3.11".into();
|
||||
let artifact = repro::write(
|
||||
&spec,
|
||||
&SandboxOptions::default(),
|
||||
&make_outcome(),
|
||||
&make_verdict(),
|
||||
"# harness", "# entry", b"payload", "label", None,
|
||||
).unwrap();
|
||||
|
||||
let pinned =
|
||||
nyx_scanner::dynamic::toolchain::pinned_image_ref(&spec.toolchain_id);
|
||||
if pinned.is_some() {
|
||||
assert!(
|
||||
artifact.root.join("docker_pull.sh").exists(),
|
||||
"docker_pull.sh missing for pinned toolchain",
|
||||
);
|
||||
} else {
|
||||
// When unpinned, docker_pull.sh is intentionally absent.
|
||||
assert!(
|
||||
!artifact.root.join("docker_pull.sh").exists(),
|
||||
"docker_pull.sh should not be emitted when toolchain is unpinned",
|
||||
);
|
||||
}
|
||||
|
||||
unsafe { std::env::remove_var("NYX_REPRO_BASE") };
|
||||
}
|
||||
}
|
||||
162
tests/scrubber_pii.rs
Normal file
162
tests/scrubber_pii.rs
Normal file
|
|
@ -0,0 +1,162 @@
|
|||
//! Phase 28 (Track H.5) — PII scrubber coverage.
|
||||
//!
|
||||
//! Asserts that every probe witness textual field is routed through
|
||||
//! [`nyx_scanner::dynamic::policy::Scrubber`] before serialisation and
|
||||
//! that the project secret regex set + auxiliary literal substring
|
||||
//! list catch the common credential / PII shapes that production
|
||||
//! payloads can splash into a sink call.
|
||||
|
||||
#[cfg(feature = "dynamic")]
|
||||
mod scrubber_pii_tests {
|
||||
use nyx_scanner::dynamic::policy::{Scrubber, SCRUB_HASH_PREFIX};
|
||||
use nyx_scanner::dynamic::probe::ProbeWitness;
|
||||
|
||||
#[test]
|
||||
fn scrubber_recognises_aws_access_key() {
|
||||
let s = Scrubber::project_default();
|
||||
let value = "AKIAFAKETEST00000000";
|
||||
assert!(s.matches_any(value));
|
||||
let out = s.scrub_string(value);
|
||||
assert!(out.starts_with(SCRUB_HASH_PREFIX));
|
||||
assert!(!out.contains(value));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn scrubber_recognises_github_pat() {
|
||||
let s = Scrubber::project_default();
|
||||
let value = "ghp_abcdefghijklmnopqrstuvwxyz0123456789";
|
||||
assert!(s.matches_any(value));
|
||||
let out = s.scrub_string(value);
|
||||
assert!(out.starts_with(SCRUB_HASH_PREFIX));
|
||||
assert!(!out.contains("abcdefghijklmnopqrstuvwxyz"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn scrubber_recognises_slack_token() {
|
||||
let s = Scrubber::project_default();
|
||||
let value = "xoxb-1234567890-ABCDEFGHIJK";
|
||||
assert!(s.matches_any(value));
|
||||
let out = s.scrub_string(value);
|
||||
assert!(out.starts_with(SCRUB_HASH_PREFIX));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn scrubber_recognises_openai_sk_token() {
|
||||
let s = Scrubber::project_default();
|
||||
let value = "sk-1234567890abcdefghijklmnopqr";
|
||||
assert!(s.matches_any(value));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn scrubber_recognises_bearer_header() {
|
||||
let s = Scrubber::project_default();
|
||||
let value = "Authorization: Bearer eyJhbGciOiJIUzI1NiJ9.payload.sig";
|
||||
assert!(s.matches_any(value));
|
||||
let out = s.scrub_string(value);
|
||||
assert!(!out.contains("eyJhbGciOiJIUzI1NiJ9"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn scrubber_recognises_password_query_param() {
|
||||
let s = Scrubber::project_default();
|
||||
let value = "?username=eli&password=super_secret_12345";
|
||||
assert!(s.matches_any(value));
|
||||
let out = s.scrub_string(value);
|
||||
assert!(!out.contains("super_secret_12345"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn scrubber_recognises_pem_block() {
|
||||
let s = Scrubber::project_default();
|
||||
let value = "-----BEGIN RSA PRIVATE KEY-----\nMIIEoQIBAAKCAQ\n-----END RSA PRIVATE KEY-----";
|
||||
assert!(s.matches_any(value));
|
||||
let out = s.scrub_string(value);
|
||||
assert!(!out.contains("MIIEoQIBAAKCAQ"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn scrubber_recognises_nyx_stub_secret_literal() {
|
||||
// Phase 28 acceptance literal.
|
||||
let s = Scrubber::project_default();
|
||||
let value = "nyx-stub-secret-aaaa-bbbb-cccc";
|
||||
assert!(s.matches_any(value));
|
||||
let out = s.scrub_string(value);
|
||||
assert!(out.starts_with(SCRUB_HASH_PREFIX));
|
||||
assert!(!out.contains("aaaa-bbbb-cccc"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn scrubber_clean_value_round_trips_unchanged() {
|
||||
let s = Scrubber::project_default();
|
||||
let value = "GET /api/users/42 200 OK";
|
||||
assert!(!s.matches_any(value));
|
||||
assert_eq!(s.scrub_string(value), value);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn scrubber_hash_is_deterministic_across_invocations() {
|
||||
let s = Scrubber::project_default();
|
||||
let a = s.scrub_string("AKIAFAKETEST00000000");
|
||||
let b = s.scrub_string("AKIAFAKETEST00000000");
|
||||
assert_eq!(a, b);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn scrubber_distinct_inputs_produce_distinct_hashes() {
|
||||
let s = Scrubber::project_default();
|
||||
let a = s.scrub_string("AKIAFAKETEST00000000");
|
||||
let b = s.scrub_string("AKIAFAKETEST11111111");
|
||||
assert_ne!(a, b);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn probe_witness_args_repr_is_scrubbed_before_telemetry_write() {
|
||||
// Phase 28 acceptance: "a probe witness containing a key shaped
|
||||
// like `nyx-stub-secret-...` is hashed before telemetry write."
|
||||
// ProbeWitness::from_inputs is the host-side constructor every
|
||||
// host-built witness travels through; assert the args slot is
|
||||
// hashed even when the env / cwd are empty.
|
||||
let env: Vec<(String, String)> = vec![];
|
||||
let witness = ProbeWitness::from_inputs(
|
||||
env,
|
||||
"/tmp/run",
|
||||
b"payload bytes here",
|
||||
"os.system",
|
||||
vec!["cmd nyx-stub-secret-deadbeef-feedface".to_owned()],
|
||||
);
|
||||
|
||||
let serialised = serde_json::to_string(&witness).unwrap();
|
||||
assert!(!serialised.contains("deadbeef-feedface"),
|
||||
"raw secret leaked into serialised witness: {serialised}");
|
||||
assert!(serialised.contains(SCRUB_HASH_PREFIX),
|
||||
"expected scrubbed-hash marker; got {serialised}");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn probe_witness_env_value_is_scrubbed() {
|
||||
// An env var keyed past the deny-list (so scrub_env keeps the
|
||||
// value verbatim) but whose textual value contains a secret
|
||||
// pattern must still be hashed by the Phase 28 scrubber pass.
|
||||
let env: Vec<(String, String)> = vec![
|
||||
("USER_DATA".to_owned(), "AKIAFAKETEST00000000".to_owned()),
|
||||
];
|
||||
let witness = ProbeWitness::from_inputs(
|
||||
env, "/x", b"", "fn", vec![],
|
||||
);
|
||||
let value = witness.env_snapshot.get("USER_DATA").unwrap();
|
||||
assert!(value.starts_with(SCRUB_HASH_PREFIX), "got {value}");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn probe_witness_args_with_no_secrets_round_trip_unchanged() {
|
||||
let env: Vec<(String, String)> = vec![];
|
||||
let witness = ProbeWitness::from_inputs(
|
||||
env,
|
||||
"/tmp/run",
|
||||
b"payload",
|
||||
"os.system",
|
||||
vec!["ls /tmp".to_owned()],
|
||||
);
|
||||
assert_eq!(witness.args_repr, vec!["ls /tmp".to_owned()]);
|
||||
}
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue