[pitboss] phase 30: Cross-cutting — Determinism audit, VerifyTrace observability, policy.rs deny rules

This commit is contained in:
pitboss 2026-05-15 20:17:07 -05:00
parent b56c19ef64
commit 36c8bf52df
13 changed files with 1421 additions and 0 deletions

175
tests/determinism_audit.rs Normal file
View file

@ -0,0 +1,175 @@
//! Phase 30 (Track C — determinism): run the verifier 10× on the same
//! input and assert byte-identical [`VerifyTrace`] output across runs,
//! plus byte-identical telemetry records once wall-clock fields are
//! stripped.
//!
//! The test deliberately drives the policy-deny short-circuit so it
//! does not depend on a working language toolchain, a sandbox backend,
//! or a populated payload corpus. That path emits exactly the same
//! pipeline events ([`SpecStarted`], [`Verdict`]) every run, and
//! emits a single telemetry record whose only non-deterministic field
//! is the wall-clock `ts` timestamp. Stripping `ts` gives a stable
//! envelope the test can compare directly.
#![cfg(feature = "dynamic")]
use nyx_scanner::commands::scan::Diag;
use nyx_scanner::dynamic::telemetry::{self, SamplingPolicy};
use nyx_scanner::dynamic::verify::{verify_finding, VerifyOptions};
use nyx_scanner::evidence::{Confidence, Evidence, VerifyStatus};
use nyx_scanner::patterns::{FindingCategory, Severity};
use serde_json::Value;
use std::collections::BTreeSet;
const RUN_COUNT: usize = 10;
fn deny_diag(stable_hash: u64) -> Diag {
let mut ev = Evidence::default();
// Triggers the credentials deny rule via the AWS-key regex from
// `crate::utils::redact::contains_secret`. The deny rule fires
// deterministically because the rule lookup table is `const`.
ev.notes = vec!["secret=AKIAFAKEDETERM00000000".to_owned()];
Diag {
path: "src/handler.py".to_owned(),
line: 42,
col: 0,
severity: Severity::High,
id: "py.cmdi.os_system".to_owned(),
category: FindingCategory::Security,
path_validated: false,
guard_kind: None,
message: None,
labels: vec![],
confidence: Some(Confidence::High),
evidence: Some(ev),
rank_score: None,
rank_reason: None,
suppressed: false,
suppression: None,
rollup: None,
finding_id: String::new(),
alternative_finding_ids: vec![],
stable_hash,
}
}
/// Strip every non-deterministic field from a parsed telemetry record
/// and re-serialise. Phase 30 acceptance explicitly excludes wall-clock
/// timestamps; `ts` is the only such field today. Future additions
/// belong in this filter so the canonical "what does deterministic
/// telemetry look like?" surface lives in one place.
fn strip_volatile_fields(line: &str) -> String {
let mut value: Value = serde_json::from_str(line).expect("telemetry line should be JSON");
if let Some(obj) = value.as_object_mut() {
obj.remove("ts");
// `duration_ms` is zero on the no-sandbox deny path, but strip
// it defensively so the audit stays correct if a future code
// path stamps a non-zero duration before the verdict short-
// circuits.
obj.remove("duration_ms");
}
serde_json::to_string(&value).expect("re-serialisation cannot fail")
}
#[test]
fn ten_runs_produce_byte_identical_telemetry_minus_timestamps() {
let tmp = tempfile::TempDir::new().expect("tempdir");
let log = tmp.path().join("events.jsonl");
// Pin the telemetry log to the temp file and ensure the
// `NYX_NO_TELEMETRY` opt-out is not set in this process.
unsafe {
std::env::set_var("NYX_TELEMETRY_PATH", &log);
std::env::remove_var("NYX_NO_TELEMETRY");
}
let diag = deny_diag(0x0123_4567_89ab_cdef);
let mut opts = VerifyOptions::default();
opts.telemetry_policy = SamplingPolicy::keep_all();
opts.trace_verbose = false;
let mut verdict_jsons: BTreeSet<String> = BTreeSet::new();
for _ in 0..RUN_COUNT {
let result = verify_finding(&diag, &opts);
assert_eq!(result.status, VerifyStatus::Inconclusive);
// Drop `differential` and any future timestamped field by
// round-tripping through serde; structural equality is the
// contract.
verdict_jsons.insert(
serde_json::to_string(&result)
.expect("VerifyResult serialises"),
);
}
assert_eq!(
verdict_jsons.len(),
1,
"VerifyResult must be byte-identical across {RUN_COUNT} runs, got {} distinct",
verdict_jsons.len()
);
// Read the telemetry log; expect RUN_COUNT lines, all identical
// once `ts` is removed.
let parsed = telemetry::read_events(&log).expect("events.jsonl should parse");
assert_eq!(
parsed.len(),
RUN_COUNT,
"expected {RUN_COUNT} telemetry records, got {}",
parsed.len()
);
let stripped: BTreeSet<String> = parsed
.iter()
.map(|v| {
// round-trip through string so the strip path matches
// what the on-disk reader does.
let line = serde_json::to_string(v).expect("re-serialise");
strip_volatile_fields(&line)
})
.collect();
assert_eq!(
stripped.len(),
1,
"telemetry records must be byte-identical (sans ts/duration_ms) across {RUN_COUNT} runs, got {} distinct: {:?}",
stripped.len(),
stripped
);
// Cleanup: leave the env var pointing at the (about-to-be-deleted)
// tempdir would poison sibling tests that share this process.
unsafe {
std::env::remove_var("NYX_TELEMETRY_PATH");
}
}
#[test]
fn policy_deny_excerpt_is_stable_across_runs() {
// The PolicyDeniedDynamic verdict carries an excerpt scrubbed via
// the blake3-keyed `Scrubber`. blake3 is deterministic, so the
// excerpt should be byte-identical across runs. Independent
// assertion from the telemetry-determinism test because the
// scrubber-hash path is a separate determinism contract worth
// pinning on its own.
let diag = deny_diag(0xfeed_face_0123_4567);
let opts = VerifyOptions::default();
let mut excerpts: BTreeSet<String> = BTreeSet::new();
for _ in 0..RUN_COUNT {
let result = verify_finding(&diag, &opts);
match result
.inconclusive_reason
.expect("expected PolicyDeniedDynamic on deny path")
{
nyx_scanner::evidence::InconclusiveReason::PolicyDeniedDynamic {
excerpt,
..
} => {
excerpts.insert(excerpt);
}
other => panic!("expected PolicyDeniedDynamic, got {other:?}"),
}
}
assert_eq!(
excerpts.len(),
1,
"scrubbed excerpt must be deterministic across {RUN_COUNT} runs, got {excerpts:?}"
);
}

226
tests/policy_deny.rs Normal file
View file

@ -0,0 +1,226 @@
//! Phase 30 (Track C — security): coverage for
//! [`crate::dynamic::policy::evaluate`] deny rules.
//!
//! One test per [`DenyRule`] variant (`credentials`, `private-key`,
//! `production-endpoint`) plus an allow-path assertion and an end-to-
//! end check that [`verify_finding`] short-circuits to
//! [`InconclusiveReason::PolicyDeniedDynamic`] without invoking the
//! sandbox.
#![cfg(feature = "dynamic")]
use nyx_scanner::commands::scan::Diag;
use nyx_scanner::dynamic::policy::{self, DenyRule, PolicyDecision};
use nyx_scanner::dynamic::verify::{verify_finding, VerifyOptions};
use nyx_scanner::evidence::{
Confidence, Evidence, FlowStep, FlowStepKind, InconclusiveReason, SpanEvidence, VerifyStatus,
};
use nyx_scanner::patterns::{FindingCategory, Severity};
fn empty_diag() -> Diag {
Diag {
path: "src/app.py".to_owned(),
line: 10,
col: 0,
severity: Severity::High,
id: "py.cmdi.os_system".to_owned(),
category: FindingCategory::Security,
path_validated: false,
guard_kind: None,
message: None,
labels: vec![],
confidence: Some(Confidence::High),
evidence: Some(Evidence::default()),
rank_score: None,
rank_reason: None,
suppressed: false,
suppression: None,
rollup: None,
finding_id: String::new(),
alternative_finding_ids: vec![],
stable_hash: 0xdeadbeefcafebabe,
}
}
fn flow_step_with_snippet(snippet: &str) -> FlowStep {
FlowStep {
step: 1,
kind: FlowStepKind::Source,
file: "src/app.py".to_owned(),
line: 4,
col: 0,
snippet: Some(snippet.to_owned()),
variable: None,
callee: None,
function: None,
is_cross_file: false,
}
}
fn span_with_snippet(snippet: &str) -> SpanEvidence {
SpanEvidence {
path: "src/app.py".to_owned(),
line: 4,
col: 0,
kind: "source".to_owned(),
snippet: Some(snippet.to_owned()),
}
}
#[test]
fn allow_returns_for_diag_without_secrets() {
let diag = empty_diag();
assert!(matches!(policy::evaluate(&diag), PolicyDecision::Allow));
}
#[test]
fn credentials_rule_fires_on_aws_key_in_flow_step_snippet() {
let mut diag = empty_diag();
let mut ev = Evidence::default();
ev.flow_steps = vec![flow_step_with_snippet(
"key=AKIAFAKETEST00000000",
)];
diag.evidence = Some(ev);
match policy::evaluate(&diag) {
PolicyDecision::Deny { rule, excerpt } => {
assert_eq!(rule, DenyRule::CREDENTIALS);
assert!(
!excerpt.contains("AKIAFAKETEST00000000"),
"excerpt must scrub the raw token, got {excerpt:?}"
);
}
other => panic!("expected Deny(credentials), got {other:?}"),
}
}
#[test]
fn credentials_rule_fires_on_bearer_header_note() {
let mut diag = empty_diag();
let mut ev = Evidence::default();
ev.notes = vec!["Authorization: Bearer sk-test-abc123def456".to_owned()];
diag.evidence = Some(ev);
let decision = policy::evaluate(&diag);
assert!(decision.is_deny(), "expected Deny, got {decision:?}");
}
#[test]
fn private_key_rule_fires_on_pem_block_in_snippet() {
let mut diag = empty_diag();
let mut ev = Evidence::default();
ev.source = Some(span_with_snippet(
"-----BEGIN OPENSSH PRIVATE KEY-----",
));
diag.evidence = Some(ev);
match policy::evaluate(&diag) {
PolicyDecision::Deny { rule, .. } => {
assert_eq!(rule, DenyRule::PRIVATE_KEY);
}
other => panic!("expected Deny(private-key), got {other:?}"),
}
}
#[test]
fn private_key_rule_fires_on_rsa_pem_in_note() {
let mut diag = empty_diag();
let mut ev = Evidence::default();
ev.notes = vec!["-----BEGIN RSA PRIVATE KEY-----".to_owned()];
diag.evidence = Some(ev);
match policy::evaluate(&diag) {
PolicyDecision::Deny { rule, .. } => {
assert_eq!(rule, DenyRule::PRIVATE_KEY);
}
other => panic!("expected Deny(private-key), got {other:?}"),
}
}
#[test]
fn production_endpoint_rule_fires_on_path_containing_prod_subdomain() {
let mut diag = empty_diag();
diag.path = "src/clients/api.prod.example.com_client.py".to_owned();
let decision = policy::evaluate(&diag);
match decision {
PolicyDecision::Deny { rule, .. } => {
assert_eq!(rule, DenyRule::PRODUCTION_ENDPOINT);
}
other => panic!("expected Deny(production-endpoint), got {other:?}"),
}
}
#[test]
fn production_endpoint_rule_fires_on_flow_step_callee() {
let mut diag = empty_diag();
diag.path = "src/app.py".to_owned();
let mut ev = Evidence::default();
ev.flow_steps = vec![FlowStep {
step: 1,
kind: FlowStepKind::Call,
file: "src/app.py".to_owned(),
line: 4,
col: 0,
snippet: None,
variable: None,
callee: Some("requests.get(\"https://api-prod.example.com/v1\")".to_owned()),
function: None,
is_cross_file: false,
}];
diag.evidence = Some(ev);
let decision = policy::evaluate(&diag);
assert!(decision.is_deny(), "expected Deny, got {decision:?}");
}
#[test]
fn credentials_rule_fires_before_other_rules() {
// A diag that matches BOTH credentials (regex) and production-endpoint
// (substring) must surface the credentials rule — credentials are
// higher-blast-radius and a leaked token would dwarf an exposed prod
// endpoint name. Order asserted by the policy.evaluate impl.
let mut diag = empty_diag();
let mut ev = Evidence::default();
ev.notes = vec![
"deploying key=AKIAFAKETEST00000000 to api.prod.example.com".to_owned(),
];
diag.evidence = Some(ev);
match policy::evaluate(&diag) {
PolicyDecision::Deny { rule, .. } => {
assert_eq!(rule, DenyRule::CREDENTIALS);
}
other => panic!("expected credentials to win, got {other:?}"),
}
}
#[test]
fn verify_finding_short_circuits_without_sandbox() {
// Route the verifier through the deny path and confirm it returns
// `Inconclusive(PolicyDeniedDynamic)` without ever starting a
// sandbox. The diag deliberately mentions a credential so a real
// run would have built a Python harness; reaching that code would
// touch the filesystem, so the test would fail under the sandbox
// by failing to find python3. Instead we observe an immediate
// verdict.
let mut diag = empty_diag();
let mut ev = Evidence::default();
ev.notes = vec!["password=hunter2-supersecret-test".to_owned()];
diag.evidence = Some(ev);
let result = verify_finding(&diag, &VerifyOptions::default());
assert_eq!(result.status, VerifyStatus::Inconclusive);
let reason = result
.inconclusive_reason
.expect("PolicyDeniedDynamic must populate inconclusive_reason");
match reason {
InconclusiveReason::PolicyDeniedDynamic { rule, excerpt } => {
assert_eq!(rule, DenyRule::CREDENTIALS);
assert!(
!excerpt.contains("hunter2-supersecret-test"),
"excerpt must scrub the raw secret, got {excerpt:?}"
);
}
other => panic!("expected PolicyDeniedDynamic, got {other:?}"),
}
assert!(
result.attempts.is_empty(),
"sandbox must not have run; attempts should be empty"
);
assert!(result.toolchain_match.is_none());
}