diff --git a/src/commands/scan.rs b/src/commands/scan.rs index c71d6439..5b15c01f 100644 --- a/src/commands/scan.rs +++ b/src/commands/scan.rs @@ -192,6 +192,7 @@ fn is_zero_u64(v: &u64) -> bool { *v == 0 } +#[cfg(test)] impl Default for Diag { fn default() -> Self { Self { @@ -4264,3 +4265,122 @@ mod prioritize_tests { assert_eq!(j1, j2, "same input should produce same output"); } } + +#[cfg(test)] +mod stable_hash_tests { + use super::*; + use crate::evidence::Evidence; + use crate::labels::Cap; + use crate::patterns::{FindingCategory, Severity}; + + fn base_diag() -> Diag { + Diag { + path: "src/handler.rs".into(), + line: 42, + col: 5, + severity: Severity::High, + id: "taint-unsanitised-flow".into(), + category: FindingCategory::Security, + path_validated: false, + guard_kind: None, + message: None, + labels: vec![], + confidence: None, + evidence: Some(Evidence { + sink_caps: Cap::SQL_QUERY.bits(), + ..Default::default() + }), + rank_score: None, + rank_reason: None, + suppressed: false, + suppression: None, + rollup: None, + finding_id: String::new(), + alternative_finding_ids: vec![], + stable_hash: 0, + } + } + + #[test] + fn compute_stable_hash_is_deterministic() { + let d = base_diag(); + let h1 = compute_stable_hash(&d); + let h2 = compute_stable_hash(&d); + assert_eq!(h1, h2); + assert_ne!(h1, 0); + } + + #[test] + fn compute_stable_hash_sensitive_to_rule_id() { + let d1 = base_diag(); + let mut d2 = base_diag(); + d2.id = "taint-unsanitised-flow (source 5:1)".into(); + assert_ne!(compute_stable_hash(&d1), compute_stable_hash(&d2)); + } + + #[test] + fn compute_stable_hash_sensitive_to_path() { + let d1 = base_diag(); + let mut d2 = base_diag(); + d2.path = "src/other.rs".into(); + assert_ne!(compute_stable_hash(&d1), compute_stable_hash(&d2)); + } + + #[test] + fn compute_stable_hash_sensitive_to_line() { + let d1 = base_diag(); + let mut d2 = base_diag(); + d2.line = 43; + assert_ne!(compute_stable_hash(&d1), compute_stable_hash(&d2)); + } + + #[test] + fn compute_stable_hash_sensitive_to_col() { + let d1 = base_diag(); + let mut d2 = base_diag(); + d2.col = 6; + assert_ne!(compute_stable_hash(&d1), compute_stable_hash(&d2)); + } + + #[test] + fn compute_stable_hash_sensitive_to_sink_caps() { + let d1 = base_diag(); + let mut d2 = base_diag(); + d2.evidence = Some(Evidence { + sink_caps: Cap::CODE_EXEC.bits(), + ..Default::default() + }); + assert_ne!(compute_stable_hash(&d1), compute_stable_hash(&d2)); + } + + #[test] + fn compute_stable_hash_collision_resistance() { + let d1 = Diag { + path: "src/a.rs".into(), + line: 1, + col: 0, + id: "rule-x".into(), + ..base_diag() + }; + let d2 = Diag { + path: "src/b.rs".into(), + line: 1, + col: 0, + id: "rule-x".into(), + ..base_diag() + }; + let d3 = Diag { + path: "src/a.rs".into(), + line: 2, + col: 0, + id: "rule-x".into(), + ..base_diag() + }; + let h1 = compute_stable_hash(&d1); + let h2 = compute_stable_hash(&d2); + let h3 = compute_stable_hash(&d3); + assert_ne!(h1, h2); + assert_ne!(h1, h3); + assert_ne!(h2, h3); + } +} diff --git a/src/dynamic/corpus.rs b/src/dynamic/corpus.rs index 050206b5..346e0b1e 100644 --- a/src/dynamic/corpus.rs +++ b/src/dynamic/corpus.rs @@ -71,6 +71,39 @@ pub enum Oracle { /// /// When adding a new `Cap` bit: add a row above, update this function, and /// bump [`CORPUS_VERSION`] if you add payload support. +/// +/// Compile-time exhaustiveness guard: `CORPUS_SUPPORTED | CORPUS_UNSUPPORTED` +/// must equal `Cap::all()`. Adding a new Cap bit without updating this table +/// triggers a `const` assertion failure at build time. +const CORPUS_SUPPORTED: u32 = Cap::SQL_QUERY.bits() + | Cap::CODE_EXEC.bits() + | Cap::FILE_IO.bits() + | Cap::SSRF.bits() + | Cap::HTML_ESCAPE.bits(); + +const CORPUS_UNSUPPORTED: u32 = Cap::ENV_VAR.bits() + | Cap::SHELL_ESCAPE.bits() + | Cap::URL_ENCODE.bits() + | Cap::JSON_PARSE.bits() + | Cap::FMT_STRING.bits() + | Cap::DESERIALIZE.bits() + | Cap::CRYPTO.bits() + | Cap::UNAUTHORIZED_ID.bits() + | Cap::DATA_EXFIL.bits() + | Cap::LDAP_INJECTION.bits() + | Cap::XPATH_INJECTION.bits() + | Cap::HEADER_INJECTION.bits() + | Cap::OPEN_REDIRECT.bits() + | Cap::SSTI.bits() + | Cap::XXE.bits() + | Cap::PROTOTYPE_POLLUTION.bits(); + +const _: () = assert!( + CORPUS_SUPPORTED | CORPUS_UNSUPPORTED == Cap::all().bits(), + "Cap bit missing from corpus coverage table; \ + add to CORPUS_SUPPORTED or CORPUS_UNSUPPORTED and update payloads_for", +); + pub fn payloads_for(cap: Cap) -> &'static [Payload] { if cap.contains(Cap::SQL_QUERY) { return SQLI; diff --git a/src/dynamic/spec.rs b/src/dynamic/spec.rs index 7f53be85..7a2da868 100644 --- a/src/dynamic/spec.rs +++ b/src/dynamic/spec.rs @@ -376,4 +376,87 @@ mod tests { let s2 = HarnessSpec::from_finding(&diag).unwrap(); assert_eq!(s1.spec_hash, s2.spec_hash); } + + fn base_spec() -> HarnessSpec { + use crate::labels::Cap; + let mut spec = HarnessSpec { + finding_id: "0000000000000000".into(), + entry_file: "src/handler.rs".into(), + entry_name: "process".into(), + entry_kind: EntryKind::Function, + lang: crate::symbol::Lang::Rust, + toolchain_id: "rust-stable".into(), + payload_slot: PayloadSlot::Param(0), + expected_cap: Cap::SQL_QUERY, + constraint_hints: vec![], + spec_hash: String::new(), + }; + spec.spec_hash = compute_spec_hash(&spec); + spec + } + + #[test] + fn spec_hash_flips_on_entry_file() { + let s1 = base_spec(); + let mut s2 = s1.clone(); + s2.entry_file = "src/other.rs".into(); + s2.spec_hash = compute_spec_hash(&s2); + assert_ne!(s1.spec_hash, s2.spec_hash, "entry_file mutation must change spec_hash"); + } + + #[test] + fn spec_hash_flips_on_entry_name() { + let s1 = base_spec(); + let mut s2 = s1.clone(); + s2.entry_name = "other_handler".into(); + s2.spec_hash = compute_spec_hash(&s2); + assert_ne!(s1.spec_hash, s2.spec_hash, "entry_name mutation must change spec_hash"); + } + + #[test] + fn spec_hash_flips_on_payload_slot() { + let s1 = base_spec(); + let mut s2 = s1.clone(); + s2.payload_slot = PayloadSlot::Param(1); + s2.spec_hash = compute_spec_hash(&s2); + assert_ne!(s1.spec_hash, s2.spec_hash, "payload_slot mutation must change spec_hash"); + + let mut s3 = s1.clone(); + s3.payload_slot = PayloadSlot::HttpBody; + s3.spec_hash = compute_spec_hash(&s3); + assert_ne!(s1.spec_hash, s3.spec_hash, "payload_slot tag change must change spec_hash"); + + let mut s4 = s1.clone(); + s4.payload_slot = PayloadSlot::EnvVar("NYX_INPUT".into()); + s4.spec_hash = compute_spec_hash(&s4); + assert_ne!(s1.spec_hash, s4.spec_hash, "EnvVar payload_slot must change spec_hash"); + } + + #[test] + fn spec_hash_flips_on_expected_cap() { + use crate::labels::Cap; + let s1 = base_spec(); + let mut s2 = s1.clone(); + s2.expected_cap = Cap::CODE_EXEC; + s2.spec_hash = compute_spec_hash(&s2); + assert_ne!(s1.spec_hash, s2.spec_hash, "expected_cap mutation must change spec_hash"); + } + + #[test] + fn spec_hash_flips_on_constraint_hints() { + let s1 = base_spec(); + let mut s2 = s1.clone(); + s2.constraint_hints = vec!["prefix:admin/".into()]; + s2.spec_hash = compute_spec_hash(&s2); + assert_ne!(s1.spec_hash, s2.spec_hash, "constraint_hints mutation must change spec_hash"); + } + + #[test] + fn spec_hash_flips_on_toolchain_id() { + let s1 = base_spec(); + let mut s2 = s1.clone(); + s2.toolchain_id = "rust-nightly".into(); + s2.spec_hash = compute_spec_hash(&s2); + assert_ne!(s1.spec_hash, s2.spec_hash, "toolchain_id mutation must change spec_hash"); + } } diff --git a/tests/dynamic_verify_e2e.rs b/tests/dynamic_verify_e2e.rs new file mode 100644 index 00000000..ab94edd4 --- /dev/null +++ b/tests/dynamic_verify_e2e.rs @@ -0,0 +1,149 @@ +//! End-to-end integration test for the `--verify` / `verify: true` path. +//! +//! Phase M1 has no harness builder (`harness::build` returns `Unimplemented`), +//! so every finding that reaches `verify_finding` collapses to +//! `VerifyStatus::Unsupported` with `reason = BackendUnavailable`. These tests +//! confirm that: +//! +//! 1. `verify_finding` returns the expected `VerifyResult` shape. +//! 2. The JSON serialization of `VerifyResult` contains the expected fields. +//! 3. Findings that cannot derive a spec produce `Unsupported` with a typed +//! reason (not `BackendUnavailable`), confirming the two code paths are +//! distinct. +//! +//! Tests are gated on `#[cfg(feature = "dynamic")]` because `verify_finding` +//! lives in the `dynamic` module. Run with `cargo nextest run --features +//! dynamic` to exercise them. + +#[cfg(feature = "dynamic")] +mod verify_e2e { + use nyx_scanner::commands::scan::Diag; + use nyx_scanner::dynamic::verify::{verify_finding, VerifyOptions}; + use nyx_scanner::evidence::{Confidence, Evidence, FlowStep, FlowStepKind, UnsupportedReason, VerifyStatus}; + use nyx_scanner::labels::Cap; + use nyx_scanner::patterns::{FindingCategory, Severity}; + + fn source_step(file: &str, function: &str) -> FlowStep { + FlowStep { + step: 1, + kind: FlowStepKind::Source, + file: file.into(), + line: 1, + col: 0, + snippet: None, + variable: Some("x".into()), + callee: None, + function: Some(function.into()), + is_cross_file: false, + } + } + + fn sink_step(file: &str) -> FlowStep { + FlowStep { + step: 2, + kind: FlowStepKind::Sink, + file: file.into(), + line: 10, + col: 0, + snippet: None, + variable: None, + callee: None, + function: None, + is_cross_file: false, + } + } + + fn taint_diag_with_cap(cap: Cap) -> Diag { + Diag { + path: "src/handler.rs".into(), + line: 10, + col: 0, + severity: Severity::High, + id: "taint-unsanitised-flow".into(), + category: FindingCategory::Security, + path_validated: false, + guard_kind: None, + message: None, + labels: vec![], + confidence: Some(Confidence::High), + evidence: Some(Evidence { + flow_steps: vec![ + source_step("src/handler.rs", "handle_request"), + sink_step("src/handler.rs"), + ], + sink_caps: cap.bits(), + ..Default::default() + }), + rank_score: None, + rank_reason: None, + suppressed: false, + suppression: None, + rollup: None, + finding_id: String::new(), + alternative_finding_ids: vec![], + stable_hash: 0, + } + } + + /// A finding with a supported cap (SQL_QUERY) and a derivable spec reaches + /// `harness::build`, which returns `Unimplemented` in phase M1, producing + /// `VerifyStatus::Unsupported` with `reason = BackendUnavailable`. + #[test] + fn verify_finding_with_supported_cap_returns_backend_unavailable() { + let diag = taint_diag_with_cap(Cap::SQL_QUERY); + let opts = VerifyOptions::default(); + let result = verify_finding(&diag, &opts); + + assert_eq!(result.status, VerifyStatus::Unsupported); + assert_eq!(result.reason, Some(UnsupportedReason::BackendUnavailable)); + assert!(result.triggered_payload.is_none()); + assert!(result.attempts.is_empty()); + } + + /// A finding with an unsupported cap (CRYPTO has no payload corpus) reaches + /// `run_spec`, which returns `RunError::NoPayloadsForCap`, producing + /// `VerifyStatus::Unsupported` with `reason = NoPayloadsForCap`. + /// This is distinct from `BackendUnavailable` and tests the two code paths. + #[test] + fn verify_finding_with_unsupported_cap_returns_no_payloads() { + let diag = taint_diag_with_cap(Cap::CRYPTO); + let opts = VerifyOptions::default(); + let result = verify_finding(&diag, &opts); + + assert_eq!(result.status, VerifyStatus::Unsupported); + assert_eq!(result.reason, Some(UnsupportedReason::NoPayloadsForCap)); + } + + /// A low-confidence finding is rejected before spec derivation with + /// `reason = ConfidenceTooLow`. + #[test] + fn verify_finding_low_confidence_returns_confidence_too_low() { + let mut diag = taint_diag_with_cap(Cap::SQL_QUERY); + diag.confidence = Some(Confidence::Low); + let opts = VerifyOptions::default(); + let result = verify_finding(&diag, &opts); + + assert_eq!(result.status, VerifyStatus::Unsupported); + assert_eq!(result.reason, Some(UnsupportedReason::ConfidenceTooLow)); + } + + /// The JSON shape of `VerifyResult` for `BackendUnavailable` matches the + /// documented contract: `status`, `reason` present; `triggered_payload`, + /// `detail`, `attempts` absent (skipped by serde default). + #[test] + fn verify_result_json_shape_backend_unavailable() { + let diag = taint_diag_with_cap(Cap::SQL_QUERY); + let opts = VerifyOptions::default(); + let result = verify_finding(&diag, &opts); + + let json = serde_json::to_string(&result).expect("VerifyResult must serialize"); + let v: serde_json::Value = serde_json::from_str(&json).expect("must be valid JSON"); + + assert_eq!(v["status"], "Unsupported"); + assert_eq!(v["reason"], "BackendUnavailable"); + assert!(v.get("triggered_payload").is_none(), "triggered_payload must be absent"); + assert!(v.get("detail").is_none(), "detail must be absent"); + assert!(v.get("attempts").is_none(), "attempts must be absent (empty vec skipped)"); + assert!(v["finding_id"].is_string()); + } +}