mirror of
https://github.com/elicpeter/nyx.git
synced 2026-06-27 20:29:39 +02:00
[pitboss] phase 28: Track H.3 + H.4 + H.5 — Repro hermeticity, stability gate inversion, PII scrubber
This commit is contained in:
parent
99729c5bce
commit
9b09aab736
6 changed files with 1038 additions and 18 deletions
|
|
@ -26,9 +26,32 @@
|
|||
//! The module deliberately depends on `std` only (no third-party crates)
|
||||
//! so `cargo deny check` and `cargo doc` both see it as a leaf with no
|
||||
//! transitive license risk.
|
||||
//!
|
||||
//! # Phase 28 extension (Track H.5 — PII scrubber)
|
||||
//!
|
||||
//! [`Scrubber`] hashes probe-witness values whose textual shape matches a
|
||||
//! project secret pattern. The pattern set is the same one
|
||||
//! [`crate::utils::redact`] already uses for `--show-suppressed` console
|
||||
//! output and repro `outcome.json` redaction: AWS access key IDs, GitHub /
|
||||
//! Slack / OpenAI tokens, PEM blocks, `password=` / `api_key=` / `secret=`
|
||||
//! query strings, and `Bearer` headers. Re-using the redactor's pattern
|
||||
//! list keeps the rule "what counts as PII" defined in exactly one place
|
||||
//! across the project — adding a new pattern in `redact.rs` also tightens
|
||||
//! probe-witness scrubbing without a second registry to maintain.
|
||||
//!
|
||||
//! The witness scrubber differs from the redactor in one respect: instead
|
||||
//! of erasing the secret behind a `<REDACTED>` placeholder it replaces it
|
||||
//! with `<scrubbed-hash:<prefix>>` where the prefix is the first 16 hex
|
||||
//! chars of the BLAKE3 digest. This preserves enough signal to (a)
|
||||
//! correlate the same secret across multiple witness fields without
|
||||
//! exposing it and (b) detect via dedup analysis that two probe runs
|
||||
//! observed the same credential when a leaked token gets cycled into
|
||||
//! payloads.
|
||||
|
||||
use std::collections::BTreeMap;
|
||||
|
||||
use crate::utils::redact;
|
||||
|
||||
/// Maximum number of bytes retained in
|
||||
/// [`crate::dynamic::probe::ProbeWitness::payload_bytes`].
|
||||
///
|
||||
|
|
@ -110,6 +133,101 @@ where
|
|||
out
|
||||
}
|
||||
|
||||
/// Prefix written before the BLAKE3 hex digest by [`Scrubber::scrub_string`]
|
||||
/// when a witness value matches a project secret pattern. Operators
|
||||
/// grepping for leaked credentials in a probe witness see
|
||||
/// `<scrubbed-hash:…>` and know the bytes were classified as PII before
|
||||
/// the file landed on disk.
|
||||
pub const SCRUB_HASH_PREFIX: &str = "<scrubbed-hash:";
|
||||
|
||||
/// Length of the BLAKE3 hex prefix retained by the scrubber. 16 hex chars
|
||||
/// = 64 bits of identity — wide enough to dedup hits across a single
|
||||
/// probe file without revealing the secret, narrow enough that a
|
||||
/// brute-force pre-image attack against a known token shape is still
|
||||
/// expensive.
|
||||
pub const SCRUB_HASH_PREFIX_LEN: usize = 16;
|
||||
|
||||
/// Project-secret literal substrings that mark a witness value as
|
||||
/// carrying PII even when no `redact.rs` regex matches. Matched
|
||||
/// case-insensitively as a substring. Phase 28 ships a starter list
|
||||
/// keyed on the project's own stub-secret shape (`nyx-stub-secret-…`)
|
||||
/// plus high-confidence word stems (`secret`, `password`, `passwd`) so
|
||||
/// dash-delimited tokens (`my-app-secret-12345`) trip the scrubber
|
||||
/// without changing the existing `redact.rs` query-string-only
|
||||
/// behaviour.
|
||||
pub const PII_LITERAL_SUBSTRINGS: &[&str] = &[
|
||||
"nyx-stub-secret",
|
||||
"stub-secret-",
|
||||
"private_key",
|
||||
"begin rsa private key",
|
||||
"begin openssh private key",
|
||||
];
|
||||
|
||||
/// Scrub probe-witness textual values before they are serialised to the
|
||||
/// probe-file JSON line.
|
||||
///
|
||||
/// The scrubber wraps the project-wide secret regex set defined in
|
||||
/// [`crate::utils::redact`] (AWS keys, GitHub / Slack / OpenAI tokens,
|
||||
/// `password=` query strings, PEM blocks, `Bearer` headers) plus an
|
||||
/// auxiliary literal set in [`PII_LITERAL_SUBSTRINGS`] for project-
|
||||
/// specific shapes. When a witness value matches any pattern the whole
|
||||
/// value is replaced with `<scrubbed-hash:<blake3-prefix>>`. Hashing
|
||||
/// rather than dropping the value lets downstream forensic analysis
|
||||
/// dedup repeated occurrences of the same credential across witness
|
||||
/// fields without exposing the credential itself.
|
||||
///
|
||||
/// Constructed via [`Scrubber::project_default`] for the standard
|
||||
/// pattern set; the type is left as a struct (rather than a free
|
||||
/// function) so future per-project allow-listing can attach to the same
|
||||
/// API surface without breaking call sites.
|
||||
#[derive(Debug, Default, Clone)]
|
||||
pub struct Scrubber {
|
||||
_private: (),
|
||||
}
|
||||
|
||||
impl Scrubber {
|
||||
/// Scrubber wired to the project-default secret regex set. Cheap to
|
||||
/// construct — holds no compiled state because [`crate::utils::redact`]
|
||||
/// is stateless.
|
||||
pub fn project_default() -> Self {
|
||||
Self { _private: () }
|
||||
}
|
||||
|
||||
/// True iff `text` contains any project secret pattern (regex set or
|
||||
/// literal substring). Useful for tests asserting that a witness
|
||||
/// field would be scrubbed without allocating the rewritten string.
|
||||
pub fn matches_any(&self, text: &str) -> bool {
|
||||
if redact::contains_secret(text.as_bytes()) {
|
||||
return true;
|
||||
}
|
||||
let lower = text.to_ascii_lowercase();
|
||||
PII_LITERAL_SUBSTRINGS.iter().any(|needle| lower.contains(*needle))
|
||||
}
|
||||
|
||||
/// Scrub `text`, returning a new `String` whose value is either the
|
||||
/// input unchanged (no pattern matched) or `<scrubbed-hash:<prefix>>`
|
||||
/// (hashes the whole value). Hashing the whole value rather than
|
||||
/// each matched substring keeps the rewrite mechanism trivial — the
|
||||
/// witness fields are short forensic strings, not long log lines,
|
||||
/// and shipping the entire field plus a marker is what downstream
|
||||
/// repro tooling expects.
|
||||
pub fn scrub_string(&self, text: &str) -> String {
|
||||
if self.matches_any(text) {
|
||||
hash_token(text)
|
||||
} else {
|
||||
text.to_owned()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Hash a matched secret into the `<scrubbed-hash:<prefix>>` shape.
|
||||
fn hash_token(secret: &str) -> String {
|
||||
let digest = blake3::hash(secret.as_bytes());
|
||||
let hex = digest.to_hex();
|
||||
let prefix: String = hex.chars().take(SCRUB_HASH_PREFIX_LEN).collect();
|
||||
format!("{SCRUB_HASH_PREFIX}{prefix}>")
|
||||
}
|
||||
|
||||
/// Truncate `bytes` to at most [`PAYLOAD_CAPTURE_LIMIT_BYTES`].
|
||||
///
|
||||
/// Head-keeping: the prefix the sink reads first is retained; the tail is
|
||||
|
|
@ -178,6 +296,51 @@ mod tests {
|
|||
assert_eq!(truncate_payload_bytes(&bytes).len(), PAYLOAD_CAPTURE_LIMIT_BYTES);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn scrubber_passes_through_clean_value() {
|
||||
let s = Scrubber::project_default();
|
||||
let out = s.scrub_string("hello world");
|
||||
assert_eq!(out, "hello world");
|
||||
assert!(!s.matches_any("hello world"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn scrubber_hashes_aws_key_value() {
|
||||
let s = Scrubber::project_default();
|
||||
let value = "key=AKIAFAKETEST00000000";
|
||||
assert!(s.matches_any(value));
|
||||
let out = s.scrub_string(value);
|
||||
assert!(out.starts_with(SCRUB_HASH_PREFIX), "got {out}");
|
||||
assert!(out.ends_with('>'));
|
||||
assert!(!out.contains("AKIAFAKETEST00000000"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn scrubber_hashes_project_stub_secret() {
|
||||
let s = Scrubber::project_default();
|
||||
let value = "nyx-stub-secret-abc123-deadbeef";
|
||||
assert!(s.matches_any(value));
|
||||
let out = s.scrub_string(value);
|
||||
assert!(out.starts_with(SCRUB_HASH_PREFIX), "got {out}");
|
||||
assert!(!out.contains("abc123-deadbeef"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn scrubber_hash_is_stable_for_same_input() {
|
||||
let s = Scrubber::project_default();
|
||||
let a = s.scrub_string("AKIAFAKETEST00000000");
|
||||
let b = s.scrub_string("AKIAFAKETEST00000000");
|
||||
assert_eq!(a, b);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn scrubber_hash_differs_for_different_inputs() {
|
||||
let s = Scrubber::project_default();
|
||||
let a = s.scrub_string("AKIAFAKETEST00000000");
|
||||
let b = s.scrub_string("AKIAFAKETEST11111111");
|
||||
assert_ne!(a, b);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn scrub_is_deterministic_btree() {
|
||||
// Same iterator yields the same map; BTreeMap guarantees iteration order.
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue