[pitboss] phase 30: Cross-cutting — Determinism audit, VerifyTrace observability, policy.rs deny rules

2026-06-15 20:05:13 +02:00 · 2026-05-15 20:17:07 -05:00 · 2026-05-15 20:17:07 -05:00 · 36c8bf52df
commit 36c8bf52df
parent b56c19ef64
13 changed files with 1421 additions and 0 deletions
--- a/tests/determinism_audit.rs
+++ b/tests/determinism_audit.rs
@ -0,0 +1,175 @@
+//! Phase 30 (Track C — determinism): run the verifier 10× on the same
+//! input and assert byte-identical [`VerifyTrace`] output across runs,
+//! plus byte-identical telemetry records once wall-clock fields are
+//! stripped.
+//!
+//! The test deliberately drives the policy-deny short-circuit so it
+//! does not depend on a working language toolchain, a sandbox backend,
+//! or a populated payload corpus.  That path emits exactly the same
+//! pipeline events ([`SpecStarted`], [`Verdict`]) every run, and
+//! emits a single telemetry record whose only non-deterministic field
+//! is the wall-clock `ts` timestamp.  Stripping `ts` gives a stable
+//! envelope the test can compare directly.
+
+#![cfg(feature = "dynamic")]
+
+use nyx_scanner::commands::scan::Diag;
+use nyx_scanner::dynamic::telemetry::{self, SamplingPolicy};
+use nyx_scanner::dynamic::verify::{verify_finding, VerifyOptions};
+use nyx_scanner::evidence::{Confidence, Evidence, VerifyStatus};
+use nyx_scanner::patterns::{FindingCategory, Severity};
+use serde_json::Value;
+use std::collections::BTreeSet;
+
+const RUN_COUNT: usize = 10;
+
+fn deny_diag(stable_hash: u64) -> Diag {
+    let mut ev = Evidence::default();
+    // Triggers the credentials deny rule via the AWS-key regex from
+    // `crate::utils::redact::contains_secret`.  The deny rule fires
+    // deterministically because the rule lookup table is `const`.
+    ev.notes = vec!["secret=AKIAFAKEDETERM00000000".to_owned()];
+    Diag {
+        path: "src/handler.py".to_owned(),
+        line: 42,
+        col: 0,
+        severity: Severity::High,
+        id: "py.cmdi.os_system".to_owned(),
+        category: FindingCategory::Security,
+        path_validated: false,
+        guard_kind: None,
+        message: None,
+        labels: vec![],
+        confidence: Some(Confidence::High),
+        evidence: Some(ev),
+        rank_score: None,
+        rank_reason: None,
+        suppressed: false,
+        suppression: None,
+        rollup: None,
+        finding_id: String::new(),
+        alternative_finding_ids: vec![],
+        stable_hash,
+    }
+}
+
+/// Strip every non-deterministic field from a parsed telemetry record
+/// and re-serialise.  Phase 30 acceptance explicitly excludes wall-clock
+/// timestamps; `ts` is the only such field today.  Future additions
+/// belong in this filter so the canonical "what does deterministic
+/// telemetry look like?" surface lives in one place.
+fn strip_volatile_fields(line: &str) -> String {
+    let mut value: Value = serde_json::from_str(line).expect("telemetry line should be JSON");
+    if let Some(obj) = value.as_object_mut() {
+        obj.remove("ts");
+        // `duration_ms` is zero on the no-sandbox deny path, but strip
+        // it defensively so the audit stays correct if a future code
+        // path stamps a non-zero duration before the verdict short-
+        // circuits.
+        obj.remove("duration_ms");
+    }
+    serde_json::to_string(&value).expect("re-serialisation cannot fail")
+}
+
+#[test]
+fn ten_runs_produce_byte_identical_telemetry_minus_timestamps() {
+    let tmp = tempfile::TempDir::new().expect("tempdir");
+    let log = tmp.path().join("events.jsonl");
+    // Pin the telemetry log to the temp file and ensure the
+    // `NYX_NO_TELEMETRY` opt-out is not set in this process.
+    unsafe {
+        std::env::set_var("NYX_TELEMETRY_PATH", &log);
+        std::env::remove_var("NYX_NO_TELEMETRY");
+    }
+
+    let diag = deny_diag(0x0123_4567_89ab_cdef);
+
+    let mut opts = VerifyOptions::default();
+    opts.telemetry_policy = SamplingPolicy::keep_all();
+    opts.trace_verbose = false;
+
+    let mut verdict_jsons: BTreeSet<String> = BTreeSet::new();
+    for _ in 0..RUN_COUNT {
+        let result = verify_finding(&diag, &opts);
+        assert_eq!(result.status, VerifyStatus::Inconclusive);
+        // Drop `differential` and any future timestamped field by
+        // round-tripping through serde; structural equality is the
+        // contract.
+        verdict_jsons.insert(
+            serde_json::to_string(&result)
+                .expect("VerifyResult serialises"),
+        );
+    }
+    assert_eq!(
+        verdict_jsons.len(),
+        1,
+        "VerifyResult must be byte-identical across {RUN_COUNT} runs, got {} distinct",
+        verdict_jsons.len()
+    );
+
+    // Read the telemetry log; expect RUN_COUNT lines, all identical
+    // once `ts` is removed.
+    let parsed = telemetry::read_events(&log).expect("events.jsonl should parse");
+    assert_eq!(
+        parsed.len(),
+        RUN_COUNT,
+        "expected {RUN_COUNT} telemetry records, got {}",
+        parsed.len()
+    );
+    let stripped: BTreeSet<String> = parsed
+        .iter()
+        .map(|v| {
+            // round-trip through string so the strip path matches
+            // what the on-disk reader does.
+            let line = serde_json::to_string(v).expect("re-serialise");
+            strip_volatile_fields(&line)
+        })
+        .collect();
+    assert_eq!(
+        stripped.len(),
+        1,
+        "telemetry records must be byte-identical (sans ts/duration_ms) across {RUN_COUNT} runs, got {} distinct: {:?}",
+        stripped.len(),
+        stripped
+    );
+
+    // Cleanup: leave the env var pointing at the (about-to-be-deleted)
+    // tempdir would poison sibling tests that share this process.
+    unsafe {
+        std::env::remove_var("NYX_TELEMETRY_PATH");
+    }
+}
+
+#[test]
+fn policy_deny_excerpt_is_stable_across_runs() {
+    // The PolicyDeniedDynamic verdict carries an excerpt scrubbed via
+    // the blake3-keyed `Scrubber`.  blake3 is deterministic, so the
+    // excerpt should be byte-identical across runs.  Independent
+    // assertion from the telemetry-determinism test because the
+    // scrubber-hash path is a separate determinism contract worth
+    // pinning on its own.
+    let diag = deny_diag(0xfeed_face_0123_4567);
+    let opts = VerifyOptions::default();
+
+    let mut excerpts: BTreeSet<String> = BTreeSet::new();
+    for _ in 0..RUN_COUNT {
+        let result = verify_finding(&diag, &opts);
+        match result
+            .inconclusive_reason
+            .expect("expected PolicyDeniedDynamic on deny path")
+        {
+            nyx_scanner::evidence::InconclusiveReason::PolicyDeniedDynamic {
+                excerpt,
+                ..
+            } => {
+                excerpts.insert(excerpt);
+            }
+            other => panic!("expected PolicyDeniedDynamic, got {other:?}"),
+        }
+    }
+    assert_eq!(
+        excerpts.len(),
+        1,
+        "scrubbed excerpt must be deterministic across {RUN_COUNT} runs, got {excerpts:?}"
+    );
+}
--- a/tests/policy_deny.rs
+++ b/tests/policy_deny.rs
@ -0,0 +1,226 @@
+//! Phase 30 (Track C — security): coverage for
+//! [`crate::dynamic::policy::evaluate`] deny rules.
+//!
+//! One test per [`DenyRule`] variant (`credentials`, `private-key`,
+//! `production-endpoint`) plus an allow-path assertion and an end-to-
+//! end check that [`verify_finding`] short-circuits to
+//! [`InconclusiveReason::PolicyDeniedDynamic`] without invoking the
+//! sandbox.
+
+#![cfg(feature = "dynamic")]
+
+use nyx_scanner::commands::scan::Diag;
+use nyx_scanner::dynamic::policy::{self, DenyRule, PolicyDecision};
+use nyx_scanner::dynamic::verify::{verify_finding, VerifyOptions};
+use nyx_scanner::evidence::{
+    Confidence, Evidence, FlowStep, FlowStepKind, InconclusiveReason, SpanEvidence, VerifyStatus,
+};
+use nyx_scanner::patterns::{FindingCategory, Severity};
+
+fn empty_diag() -> Diag {
+    Diag {
+        path: "src/app.py".to_owned(),
+        line: 10,
+        col: 0,
+        severity: Severity::High,
+        id: "py.cmdi.os_system".to_owned(),
+        category: FindingCategory::Security,
+        path_validated: false,
+        guard_kind: None,
+        message: None,
+        labels: vec![],
+        confidence: Some(Confidence::High),
+        evidence: Some(Evidence::default()),
+        rank_score: None,
+        rank_reason: None,
+        suppressed: false,
+        suppression: None,
+        rollup: None,
+        finding_id: String::new(),
+        alternative_finding_ids: vec![],
+        stable_hash: 0xdeadbeefcafebabe,
+    }
+}
+
+fn flow_step_with_snippet(snippet: &str) -> FlowStep {
+    FlowStep {
+        step: 1,
+        kind: FlowStepKind::Source,
+        file: "src/app.py".to_owned(),
+        line: 4,
+        col: 0,
+        snippet: Some(snippet.to_owned()),
+        variable: None,
+        callee: None,
+        function: None,
+        is_cross_file: false,
+    }
+}
+
+fn span_with_snippet(snippet: &str) -> SpanEvidence {
+    SpanEvidence {
+        path: "src/app.py".to_owned(),
+        line: 4,
+        col: 0,
+        kind: "source".to_owned(),
+        snippet: Some(snippet.to_owned()),
+    }
+}
+
+#[test]
+fn allow_returns_for_diag_without_secrets() {
+    let diag = empty_diag();
+    assert!(matches!(policy::evaluate(&diag), PolicyDecision::Allow));
+}
+
+#[test]
+fn credentials_rule_fires_on_aws_key_in_flow_step_snippet() {
+    let mut diag = empty_diag();
+    let mut ev = Evidence::default();
+    ev.flow_steps = vec![flow_step_with_snippet(
+        "key=AKIAFAKETEST00000000",
+    )];
+    diag.evidence = Some(ev);
+    match policy::evaluate(&diag) {
+        PolicyDecision::Deny { rule, excerpt } => {
+            assert_eq!(rule, DenyRule::CREDENTIALS);
+            assert!(
+                !excerpt.contains("AKIAFAKETEST00000000"),
+                "excerpt must scrub the raw token, got {excerpt:?}"
+            );
+        }
+        other => panic!("expected Deny(credentials), got {other:?}"),
+    }
+}
+
+#[test]
+fn credentials_rule_fires_on_bearer_header_note() {
+    let mut diag = empty_diag();
+    let mut ev = Evidence::default();
+    ev.notes = vec!["Authorization: Bearer sk-test-abc123def456".to_owned()];
+    diag.evidence = Some(ev);
+    let decision = policy::evaluate(&diag);
+    assert!(decision.is_deny(), "expected Deny, got {decision:?}");
+}
+
+#[test]
+fn private_key_rule_fires_on_pem_block_in_snippet() {
+    let mut diag = empty_diag();
+    let mut ev = Evidence::default();
+    ev.source = Some(span_with_snippet(
+        "-----BEGIN OPENSSH PRIVATE KEY-----",
+    ));
+    diag.evidence = Some(ev);
+    match policy::evaluate(&diag) {
+        PolicyDecision::Deny { rule, .. } => {
+            assert_eq!(rule, DenyRule::PRIVATE_KEY);
+        }
+        other => panic!("expected Deny(private-key), got {other:?}"),
+    }
+}
+
+#[test]
+fn private_key_rule_fires_on_rsa_pem_in_note() {
+    let mut diag = empty_diag();
+    let mut ev = Evidence::default();
+    ev.notes = vec!["-----BEGIN RSA PRIVATE KEY-----".to_owned()];
+    diag.evidence = Some(ev);
+    match policy::evaluate(&diag) {
+        PolicyDecision::Deny { rule, .. } => {
+            assert_eq!(rule, DenyRule::PRIVATE_KEY);
+        }
+        other => panic!("expected Deny(private-key), got {other:?}"),
+    }
+}
+
+#[test]
+fn production_endpoint_rule_fires_on_path_containing_prod_subdomain() {
+    let mut diag = empty_diag();
+    diag.path = "src/clients/api.prod.example.com_client.py".to_owned();
+    let decision = policy::evaluate(&diag);
+    match decision {
+        PolicyDecision::Deny { rule, .. } => {
+            assert_eq!(rule, DenyRule::PRODUCTION_ENDPOINT);
+        }
+        other => panic!("expected Deny(production-endpoint), got {other:?}"),
+    }
+}
+
+#[test]
+fn production_endpoint_rule_fires_on_flow_step_callee() {
+    let mut diag = empty_diag();
+    diag.path = "src/app.py".to_owned();
+    let mut ev = Evidence::default();
+    ev.flow_steps = vec![FlowStep {
+        step: 1,
+        kind: FlowStepKind::Call,
+        file: "src/app.py".to_owned(),
+        line: 4,
+        col: 0,
+        snippet: None,
+        variable: None,
+        callee: Some("requests.get(\"https://api-prod.example.com/v1\")".to_owned()),
+        function: None,
+        is_cross_file: false,
+    }];
+    diag.evidence = Some(ev);
+    let decision = policy::evaluate(&diag);
+    assert!(decision.is_deny(), "expected Deny, got {decision:?}");
+}
+
+#[test]
+fn credentials_rule_fires_before_other_rules() {
+    // A diag that matches BOTH credentials (regex) and production-endpoint
+    // (substring) must surface the credentials rule — credentials are
+    // higher-blast-radius and a leaked token would dwarf an exposed prod
+    // endpoint name.  Order asserted by the policy.evaluate impl.
+    let mut diag = empty_diag();
+    let mut ev = Evidence::default();
+    ev.notes = vec![
+        "deploying key=AKIAFAKETEST00000000 to api.prod.example.com".to_owned(),
+    ];
+    diag.evidence = Some(ev);
+    match policy::evaluate(&diag) {
+        PolicyDecision::Deny { rule, .. } => {
+            assert_eq!(rule, DenyRule::CREDENTIALS);
+        }
+        other => panic!("expected credentials to win, got {other:?}"),
+    }
+}
+
+#[test]
+fn verify_finding_short_circuits_without_sandbox() {
+    // Route the verifier through the deny path and confirm it returns
+    // `Inconclusive(PolicyDeniedDynamic)` without ever starting a
+    // sandbox.  The diag deliberately mentions a credential so a real
+    // run would have built a Python harness; reaching that code would
+    // touch the filesystem, so the test would fail under the sandbox
+    // by failing to find python3.  Instead we observe an immediate
+    // verdict.
+    let mut diag = empty_diag();
+    let mut ev = Evidence::default();
+    ev.notes = vec!["password=hunter2-supersecret-test".to_owned()];
+    diag.evidence = Some(ev);
+
+    let result = verify_finding(&diag, &VerifyOptions::default());
+
+    assert_eq!(result.status, VerifyStatus::Inconclusive);
+    let reason = result
+        .inconclusive_reason
+        .expect("PolicyDeniedDynamic must populate inconclusive_reason");
+    match reason {
+        InconclusiveReason::PolicyDeniedDynamic { rule, excerpt } => {
+            assert_eq!(rule, DenyRule::CREDENTIALS);
+            assert!(
+                !excerpt.contains("hunter2-supersecret-test"),
+                "excerpt must scrub the raw secret, got {excerpt:?}"
+            );
+        }
+        other => panic!("expected PolicyDeniedDynamic, got {other:?}"),
+    }
+    assert!(
+        result.attempts.is_empty(),
+        "sandbox must not have run; attempts should be empty"
+    );
+    assert!(result.toolchain_match.is_none());
+}