From 4eccbd48b4a4f03a9dbd93b3e3453a7c1b593e3e Mon Sep 17 00:00:00 2001 From: pitboss Date: Thu, 14 May 2026 12:37:14 -0500 Subject: [PATCH] =?UTF-8?q?[pitboss]=20phase=2007:=20Track=20C.3=20?= =?UTF-8?q?=E2=80=94=20Differential=20confirmation=20enforcement?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/baseline.rs | 1 + src/dynamic/corpus.rs | 179 ++++++++++++++++++++++++--- src/dynamic/differential.rs | 141 +++++++++++++++++++++ src/dynamic/mod.rs | 1 + src/dynamic/repro.rs | 1 + src/dynamic/runner.rs | 89 +++++++++---- src/dynamic/verify.rs | 91 +++++++++++++- src/evidence.rs | 89 +++++++++++++ src/fmt.rs | 2 + src/rank.rs | 5 + tests/console_snapshot.rs | 4 + tests/fix_validation_e2e.rs | 2 + tests/go_fixtures.rs | 1 + tests/java_fixtures.rs | 1 + tests/js_fixtures.rs | 1 + tests/json_snapshot.rs | 3 + tests/oracle_differential.rs | 156 +++++++++++++++++++++++ tests/php_fixtures.rs | 1 + tests/repro_determinism.rs | 1 + tests/sarif_dynamic_verdict_tests.rs | 6 + 20 files changed, 734 insertions(+), 41 deletions(-) create mode 100644 src/dynamic/differential.rs create mode 100644 tests/oracle_differential.rs diff --git a/src/baseline.rs b/src/baseline.rs index b4473c4d..ec544705 100644 --- a/src/baseline.rs +++ b/src/baseline.rs @@ -445,6 +445,7 @@ mod tests { detail: None, attempts: vec![], toolchain_match: None, + differential: None, }), ..Default::default() }); diff --git a/src/dynamic/corpus.rs b/src/dynamic/corpus.rs index fb91f989..a01c7a26 100644 --- a/src/dynamic/corpus.rs +++ b/src/dynamic/corpus.rs @@ -44,7 +44,8 @@ pub use crate::dynamic::oracle::Oracle; /// | 1 | 2025-11-01 | Initial corpus (SQLi, CMDI, PATH_TRAV, SSRF, XSS) | /// | 2 | 2025-12-15 | SSRF OOB-variant added; oracle semantics tightened | /// | 3 | 2026-05-12 | Migrated to `CuratedPayload`; provenance + fixture_paths enforced; SSRF OOB-nonce slot added | -pub const CORPUS_VERSION: u32 = 3; +/// | 4 | 2026-05-14 | Phase 07: `benign_control` paired refs + benign payloads added to SQLI / CMDI / SSRF (file-scheme) | +pub const CORPUS_VERSION: u32 = 4; /// Where a payload originated. #[derive(Debug, Clone, Copy, PartialEq, Eq)] @@ -58,6 +59,18 @@ pub enum PayloadProvenance { ExternalReport, } +/// Reference from a vulnerable payload to its paired benign control. +/// +/// Resolved at call time by scanning the same cap's payload slice for an +/// `is_benign == true` entry whose `label` matches. Stored as `&'static +/// str` (rather than a back-pointer to [`CuratedPayload`]) so the corpus +/// tables stay `const`-declarable. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct PayloadRef { + /// Label of the benign-control entry inside the same cap's payload set. + pub label: &'static str, +} + /// A single payload entry in the curated corpus. /// /// Governs both static payload bytes (or an OOB-nonce template) and the @@ -99,6 +112,15 @@ pub struct CuratedPayload { /// path and has not been migrated to /// [`Oracle::SinkProbe`](crate::dynamic::oracle::Oracle::SinkProbe) yet. pub probe_predicates: &'static [ProbePredicate], + /// Paired benign-control payload inside the same cap's slice. + /// + /// `Some(PayloadRef)` on a vulnerable entry means the differential rule + /// (Phase 07, §4.1) compares this entry's oracle firing against the + /// referenced benign. `None` marks the entry as having no paired + /// control — the runner downgrades any would-be `Confirmed` to + /// [`crate::evidence::InconclusiveReason::NoBenignControl`]. + /// Always `None` on benign entries themselves. + pub benign_control: Option, } /// Backward-compatible type alias. @@ -187,6 +209,24 @@ pub fn benign_payload_for(cap: Cap) -> Option<&'static CuratedPayload> { payloads_for(cap).iter().find(|p| p.is_benign) } +/// Resolve a [`CuratedPayload::benign_control`] reference to the matching +/// benign entry inside the same cap's payload slice. +/// +/// Returns `None` when the vulnerable payload has no paired control +/// (`benign_control == None`) or when the named label is missing / +/// non-benign in the corpus. The runner treats the `None` result as +/// `NoControl` and downgrades the verdict to +/// [`crate::evidence::InconclusiveReason::NoBenignControl`]. +pub fn resolve_benign_control( + vuln_payload: &CuratedPayload, + cap: Cap, +) -> Option<&'static CuratedPayload> { + let r = vuln_payload.benign_control?; + payloads_for(cap) + .iter() + .find(|p| p.is_benign && p.label == r.label) +} + /// Materialise the effective bytes for a payload. /// /// For static payloads (`oob_nonce_slot == false`) returns the `bytes` slice @@ -367,6 +407,52 @@ mod tests { let p = SSRF_PAYLOADS.iter().find(|p| p.oob_nonce_slot).expect("must have OOB payload"); assert!(materialise_bytes(p, None).is_none(), "no OOB URL → None"); } + + #[test] + fn benign_control_refs_resolve_for_paired_caps() { + let cases: &[(Cap, &str, &str)] = &[ + (Cap::SQL_QUERY, "sqli-tautology", "sqli-benign"), + (Cap::SQL_QUERY, "sqli-union-nyx", "sqli-benign"), + (Cap::CODE_EXEC, "cmdi-echo-marker", "cmdi-benign"), + (Cap::FILE_IO, "path-traversal-passwd", "path-traversal-benign"), + (Cap::SSRF, "ssrf-file-scheme", "ssrf-benign"), + (Cap::HTML_ESCAPE, "xss-script-marker", "xss-benign-text"), + ]; + for (cap, vuln_label, benign_label) in cases { + let vuln = payloads_for(*cap) + .iter() + .find(|p| p.label == *vuln_label) + .unwrap_or_else(|| panic!("missing vuln payload {vuln_label} for {cap:?}")); + let resolved = resolve_benign_control(vuln, *cap) + .unwrap_or_else(|| panic!("missing benign control for {vuln_label}")); + assert_eq!(resolved.label, *benign_label); + assert!(resolved.is_benign, "resolved control must be marked benign"); + } + } + + #[test] + fn oob_payload_has_no_benign_control() { + let p = SSRF_PAYLOADS + .iter() + .find(|p| p.oob_nonce_slot) + .expect("OOB payload"); + assert!(p.benign_control.is_none(), "OOB-nonce payload is intentionally NoControl"); + assert!(resolve_benign_control(p, Cap::SSRF).is_none()); + } + + #[test] + fn benign_entries_are_terminal() { + let caps = [Cap::SQL_QUERY, Cap::CODE_EXEC, Cap::FILE_IO, Cap::SSRF, Cap::HTML_ESCAPE]; + for cap in caps { + for p in payloads_for(cap).iter().filter(|p| p.is_benign) { + assert!( + p.benign_control.is_none(), + "benign payload {} must not chain to another control", + p.label, + ); + } + } + } } // ── SQLi ───────────────────────────────────────────────────────────────────── @@ -384,6 +470,7 @@ const SQLI: &[CuratedPayload] = &[ fixture_paths: &["tests/benchmark/corpus/rust/sqli/sqli_rusqlite_format.rs"], oob_nonce_slot: false, probe_predicates: &[], + benign_control: Some(PayloadRef { label: "sqli-benign" }), }, CuratedPayload { bytes: b"' UNION SELECT 'NYX_SQL_CONFIRMED'--", @@ -396,25 +483,60 @@ const SQLI: &[CuratedPayload] = &[ fixture_paths: &["tests/benchmark/corpus/rust/sqli/sqli_rusqlite_format.rs"], oob_nonce_slot: false, probe_predicates: &[], + benign_control: Some(PayloadRef { label: "sqli-benign" }), + }, + // Benign control: ordinary value that should never produce the SQL marker. + CuratedPayload { + bytes: b"benign_safe_sqli_NYX_BENIGN", + label: "sqli-benign", + oracle: Oracle::OutputContains("NYX_SQL_CONFIRMED"), + is_benign: true, + provenance: PayloadProvenance::Curated, + since_corpus_version: 4, + deprecated_at_corpus_version: None, + fixture_paths: &["tests/benchmark/corpus/rust/sqli/sqli_rusqlite_format.rs"], + oob_nonce_slot: false, + probe_predicates: &[], + benign_control: None, }, ]; // ── Command injection ───────────────────────────────────────────────────────── -const CMDI: &[CuratedPayload] = &[CuratedPayload { - bytes: b"; echo NYX_PWN_CMDI", - label: "cmdi-echo-marker", - oracle: Oracle::OutputContains("NYX_PWN_CMDI"), - is_benign: false, - provenance: PayloadProvenance::Curated, - since_corpus_version: 1, - deprecated_at_corpus_version: None, - fixture_paths: &[ - "tests/benchmark/corpus/rust/cmdi/cmdi_command.rs", - "tests/benchmark/corpus/rust/cmdi/cmdi_args.rs", - ], - oob_nonce_slot: false, - probe_predicates: &[], -}]; +const CMDI: &[CuratedPayload] = &[ + CuratedPayload { + bytes: b"; echo NYX_PWN_CMDI", + label: "cmdi-echo-marker", + oracle: Oracle::OutputContains("NYX_PWN_CMDI"), + is_benign: false, + provenance: PayloadProvenance::Curated, + since_corpus_version: 1, + deprecated_at_corpus_version: None, + fixture_paths: &[ + "tests/benchmark/corpus/rust/cmdi/cmdi_command.rs", + "tests/benchmark/corpus/rust/cmdi/cmdi_args.rs", + ], + oob_nonce_slot: false, + probe_predicates: &[], + benign_control: Some(PayloadRef { label: "cmdi-benign" }), + }, + // Benign control: plain text that should never produce the cmdi marker. + CuratedPayload { + bytes: b"benign_safe_cmdi_NYX_BENIGN", + label: "cmdi-benign", + oracle: Oracle::OutputContains("NYX_PWN_CMDI"), + is_benign: true, + provenance: PayloadProvenance::Curated, + since_corpus_version: 4, + deprecated_at_corpus_version: None, + fixture_paths: &[ + "tests/benchmark/corpus/rust/cmdi/cmdi_command.rs", + "tests/benchmark/corpus/rust/cmdi/cmdi_args.rs", + ], + oob_nonce_slot: false, + probe_predicates: &[], + benign_control: None, + }, +]; // ── Path traversal ──────────────────────────────────────────────────────────── // Benign payload reads a known-safe file (Python's os module source path). @@ -435,6 +557,7 @@ const PATH_TRAV: &[CuratedPayload] = &[ ], oob_nonce_slot: false, probe_predicates: &[], + benign_control: Some(PayloadRef { label: "path-traversal-benign" }), }, CuratedPayload { bytes: b"benign_safe_file_that_does_not_exist_NYX_BENIGN", @@ -447,6 +570,7 @@ const PATH_TRAV: &[CuratedPayload] = &[ fixture_paths: &["tests/benchmark/corpus/rust/path_traversal/path_file_open.rs"], oob_nonce_slot: false, probe_predicates: &[], + benign_control: None, }, ]; @@ -473,6 +597,7 @@ const SSRF_PAYLOADS: &[CuratedPayload] = &[ fixture_paths: &["tests/benchmark/corpus/rust/ssrf/ssrf_reqwest.rs"], oob_nonce_slot: false, probe_predicates: &[], + benign_control: Some(PayloadRef { label: "ssrf-benign" }), }, CuratedPayload { // `bytes` is unused when `oob_nonce_slot = true`; the runner @@ -487,6 +612,26 @@ const SSRF_PAYLOADS: &[CuratedPayload] = &[ fixture_paths: &["tests/benchmark/corpus/rust/ssrf/ssrf_reqwest.rs"], oob_nonce_slot: true, probe_predicates: &[], + // OOB-nonce payloads are self-confirming via the listener; no + // benign counterpart is meaningful (a benign URL can never hit + // the nonce listener), so this entry sits at `NoControl`. + benign_control: None, + }, + // Benign control for the file-scheme SSRF variant. Fetched the same + // way as the vuln payload but cannot resolve to a body containing the + // `daemon:` marker. + CuratedPayload { + bytes: b"benign_safe_ssrf_NYX_BENIGN", + label: "ssrf-benign", + oracle: Oracle::OutputContains("daemon:"), + is_benign: true, + provenance: PayloadProvenance::Curated, + since_corpus_version: 4, + deprecated_at_corpus_version: None, + fixture_paths: &["tests/benchmark/corpus/rust/ssrf/ssrf_reqwest.rs"], + oob_nonce_slot: false, + probe_predicates: &[], + benign_control: None, }, ]; @@ -505,6 +650,7 @@ const XSS: &[CuratedPayload] = &[ fixture_paths: &["tests/benchmark/corpus/rust/xss/axum_html/main.rs"], oob_nonce_slot: false, probe_predicates: &[], + benign_control: Some(PayloadRef { label: "xss-benign-text" }), }, CuratedPayload { bytes: b"Hello World", @@ -517,5 +663,6 @@ const XSS: &[CuratedPayload] = &[ fixture_paths: &["tests/benchmark/corpus/rust/xss/axum_html/main.rs"], oob_nonce_slot: false, probe_predicates: &[], + benign_control: None, }, ]; diff --git a/src/dynamic/differential.rs b/src/dynamic/differential.rs new file mode 100644 index 00000000..2c4f0ec3 --- /dev/null +++ b/src/dynamic/differential.rs @@ -0,0 +1,141 @@ +//! Differential confirmation rule for dynamic verification (Phase 07). +//! +//! `Confirmed` requires the vulnerable payload's oracle to fire **and** +//! the paired benign control's oracle to *not* fire (§4.1). This module +//! is the single source of truth for that rule. Everything else (runner, +//! verifier, tests) collapses to "look up paired benign + call +//! [`evaluate`]". +//! +//! # Rule table +//! +//! | vuln fires | benign fires | verdict | +//! |------------|--------------|-------------------------------| +//! | true | false | `Confirmed` | +//! | true | true | `OracleCollisionSuspected` | +//! | false | false | `NotConfirmed` | +//! | false | true | `ReversedDifferential` | +//! +//! "Fires" means [`crate::dynamic::oracle::oracle_fired`] returned `true` +//! against the run's [`SandboxOutcome`] + drained [`SinkProbe`] set — +//! invariant across `Oracle::OutputContains` and `Oracle::SinkProbe`. + +use crate::dynamic::probe::SinkProbe; +use crate::evidence::{ + DifferentialOutcome, DifferentialProbeArg, DifferentialProbeRecord, DifferentialVerdict, +}; + +/// Apply the differential confirmation rule. +/// +/// `vuln_probe_fires` and `benign_probe_fires` are the boolean firing +/// results of [`crate::dynamic::oracle::oracle_fired`] for the +/// vulnerable payload and its paired benign control respectively. The +/// rule has no side effects and does not consult the raw probe trace — +/// callers attach those separately via [`DifferentialOutcome`] for +/// forensic display. +pub fn evaluate(vuln_probe_fires: bool, benign_probe_fires: bool) -> DifferentialVerdict { + match (vuln_probe_fires, benign_probe_fires) { + (true, false) => DifferentialVerdict::Confirmed, + (true, true) => DifferentialVerdict::OracleCollisionSuspected, + (false, false) => DifferentialVerdict::NotConfirmed, + (false, true) => DifferentialVerdict::ReversedDifferential, + } +} + +/// Build a [`DifferentialOutcome`] for inclusion in a +/// [`crate::evidence::VerifyResult`]. +/// +/// Translates the runner's native [`SinkProbe`] traces into the +/// feature-agnostic [`DifferentialProbeRecord`] shape stored on +/// `VerifyResult`. The verdict comes from [`evaluate`] applied to the +/// caller's already-computed firing booleans (the runner has them in +/// hand from the oracle call). +pub fn build_outcome( + vuln_label: &str, + vuln_probe_fires: bool, + vuln_probes: &[SinkProbe], + benign_label: &str, + benign_probe_fires: bool, + benign_probes: &[SinkProbe], +) -> DifferentialOutcome { + DifferentialOutcome { + verdict: evaluate(vuln_probe_fires, benign_probe_fires), + vuln_label: vuln_label.to_owned(), + benign_label: benign_label.to_owned(), + vuln_probes: vuln_probes.iter().map(sink_probe_to_record).collect(), + benign_probes: benign_probes.iter().map(sink_probe_to_record).collect(), + } +} + +fn sink_probe_to_record(p: &SinkProbe) -> DifferentialProbeRecord { + use crate::dynamic::probe::ProbeArg; + DifferentialProbeRecord { + sink_callee: p.sink_callee.clone(), + args: p + .args + .iter() + .map(|a| match a { + ProbeArg::String(s) => DifferentialProbeArg::String(s.clone()), + ProbeArg::Bytes(b) => DifferentialProbeArg::Bytes(b.clone()), + ProbeArg::Int(i) => DifferentialProbeArg::Int(*i), + }) + .collect(), + captured_at_ns: p.captured_at_ns, + payload_id: p.payload_id.clone(), + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn rule_a_both_fire_is_collision() { + assert_eq!(evaluate(true, true), DifferentialVerdict::OracleCollisionSuspected); + } + + #[test] + fn rule_b_only_vuln_fires_is_confirmed() { + assert_eq!(evaluate(true, false), DifferentialVerdict::Confirmed); + } + + #[test] + fn rule_c_neither_fires_is_not_confirmed() { + assert_eq!(evaluate(false, false), DifferentialVerdict::NotConfirmed); + } + + #[test] + fn rule_d_only_benign_fires_is_reversed() { + assert_eq!(evaluate(false, true), DifferentialVerdict::ReversedDifferential); + } + + #[test] + fn build_outcome_carries_both_traces() { + use crate::dynamic::probe::{ProbeArg, SinkProbe}; + let vuln = vec![SinkProbe { + sink_callee: "os.system".into(), + args: vec![ProbeArg::String("; echo X".into())], + captured_at_ns: 1, + payload_id: "cmdi-echo-marker".into(), + }]; + let benign = vec![SinkProbe { + sink_callee: "os.system".into(), + args: vec![ProbeArg::String("safe".into())], + captured_at_ns: 2, + payload_id: "cmdi-benign".into(), + }]; + let outcome = build_outcome( + "cmdi-echo-marker", + true, + &vuln, + "cmdi-benign", + false, + &benign, + ); + assert_eq!(outcome.verdict, DifferentialVerdict::Confirmed); + assert_eq!(outcome.vuln_label, "cmdi-echo-marker"); + assert_eq!(outcome.benign_label, "cmdi-benign"); + assert_eq!(outcome.vuln_probes.len(), 1); + assert_eq!(outcome.benign_probes.len(), 1); + assert_eq!(outcome.vuln_probes[0].sink_callee, "os.system"); + } +} diff --git a/src/dynamic/mod.rs b/src/dynamic/mod.rs index 0773e5df..35b2bc64 100644 --- a/src/dynamic/mod.rs +++ b/src/dynamic/mod.rs @@ -67,6 +67,7 @@ pub mod build_sandbox; pub mod corpus; +pub mod differential; pub mod harness; pub mod lang; pub mod mount_filter; diff --git a/src/dynamic/repro.rs b/src/dynamic/repro.rs index 9fb6c02a..60650c3e 100644 --- a/src/dynamic/repro.rs +++ b/src/dynamic/repro.rs @@ -424,6 +424,7 @@ mod tests { sink_hit: true, }], toolchain_match: Some("exact".into()), + differential: None, } } diff --git a/src/dynamic/runner.rs b/src/dynamic/runner.rs index 024467ec..5a7e8ac9 100644 --- a/src/dynamic/runner.rs +++ b/src/dynamic/runner.rs @@ -6,12 +6,16 @@ //! the result into a [`crate::dynamic::report::VerifyResult`]. use crate::dynamic::build_sandbox; -use crate::dynamic::corpus::{benign_payload_for, materialise_bytes, payloads_for, Payload}; +use crate::dynamic::corpus::{ + materialise_bytes, payloads_for, resolve_benign_control, Payload, +}; +use crate::dynamic::differential; use crate::dynamic::harness::{self, HarnessError}; use crate::dynamic::oracle::oracle_fired; use crate::dynamic::probe::{ProbeChannel, SinkProbe}; use crate::dynamic::sandbox::{self, SandboxBackend, SandboxError, SandboxOptions, SandboxOutcome}; use crate::dynamic::spec::HarnessSpec; +use crate::evidence::{DifferentialOutcome, DifferentialVerdict}; use crate::symbol::Lang; use std::sync::Arc; @@ -31,6 +35,18 @@ pub struct RunOutcome { /// Harness sources for repro artifacts. pub harness_source: String, pub entry_source: String, + /// Phase 07 differential-confirmation trace. Carries the verdict + + /// raw probe traces from both the vulnerable run and the paired + /// benign-control run when one was executed. `None` when no benign + /// control was available (the runner sets [`Self::no_benign_control`] + /// in that case) or when execution never reached the differential + /// step. + pub differential: Option, + /// `true` when a vuln payload tripped its oracle + sink-hit gate but + /// the matching [`crate::dynamic::corpus::CuratedPayload::benign_control`] + /// reference was `None` (or unresolved). The verifier maps this to + /// [`crate::evidence::InconclusiveReason::NoBenignControl`]. + pub no_benign_control: bool, } #[derive(Debug)] @@ -219,11 +235,12 @@ pub fn run_spec(spec: &HarnessSpec, opts: &SandboxOptions) -> Result = payloads.iter().filter(|p| !p.is_benign).collect(); - let benign_payload = benign_payload_for(spec.expected_cap); let mut attempts = Vec::with_capacity(vuln_payloads.len()); let mut triggered_by = None; let mut oracle_collision = false; + let mut no_benign_control = false; + let mut differential_outcome: Option = None; for (i, payload) in vuln_payloads.iter().enumerate() { // Materialise payload bytes (OOB nonce-slot payloads generate a URL). @@ -263,35 +280,57 @@ pub fn run_spec(spec: &HarnessSpec, opts: &SandboxOptions) -> Result = probe_channel + let vuln_probes: Vec = probe_channel .as_ref() .map(|ch| ch.drain()) .unwrap_or_default(); - let fired = oracle_fired(&payload.oracle, &outcome, &probes); + let vuln_fired = oracle_fired(&payload.oracle, &outcome, &vuln_probes); let sink_hit = outcome.sink_hit; - let triggered = if fired && sink_hit { - // Full confirmation: oracle + probe both fired. - // Check differential: if benign payload also triggers oracle, downgrade. - if let Some(benign) = benign_payload { - let benign_bytes = materialise_bytes(benign, None) - .map(|b| b.into_owned()) - .unwrap_or_default(); - if let Some(ch) = &probe_channel { - let _ = ch.clear(); + // Differential rule (Phase 07, §4.1). Only when the vuln oracle + // fired *and* the in-harness sink-hit sentinel was observed do we + // consult the paired benign control. Oracle-fires-without-sink + // stays on the legacy `oracle_collision` path so the existing + // `Inconclusive(OracleCollisionSuspected)` semantics survive. + let triggered = if vuln_fired && sink_hit { + match resolve_benign_control(payload, spec.expected_cap) { + None => { + no_benign_control = true; + false + } + Some(benign) => { + let benign_bytes = materialise_bytes(benign, None) + .map(|b| b.into_owned()) + .unwrap_or_default(); + if let Some(ch) = &probe_channel { + let _ = ch.clear(); + } + let benign_outcome = + sandbox::run(&harness, &benign_bytes, &effective_opts)?; + let benign_probes: Vec = probe_channel + .as_ref() + .map(|ch| ch.drain()) + .unwrap_or_default(); + let benign_fired = oracle_fired( + &benign.oracle, + &benign_outcome, + &benign_probes, + ); + let outcome_record = differential::build_outcome( + payload.label, + vuln_fired, + &vuln_probes, + benign.label, + benign_fired, + &benign_probes, + ); + let confirmed = outcome_record.verdict == DifferentialVerdict::Confirmed; + differential_outcome = Some(outcome_record); + confirmed } - let benign_outcome = sandbox::run(&harness, &benign_bytes, &effective_opts)?; - let benign_probes: Vec = probe_channel - .as_ref() - .map(|ch| ch.drain()) - .unwrap_or_default(); - let benign_fired = oracle_fired(&benign.oracle, &benign_outcome, &benign_probes); - !benign_fired - } else { - true } - } else if fired && !sink_hit { + } else if vuln_fired && !sink_hit { // Oracle fired but probe didn't — likely collision. oracle_collision = true; false @@ -302,7 +341,7 @@ pub fn run_spec(spec: &HarnessSpec, opts: &SandboxOptions) -> Result Result VerifyResult { detail: None, attempts: vec![], toolchain_match: None, + differential: None, }; } } @@ -524,6 +528,7 @@ fn build_verdict( detail: None, attempts: attempts.clone(), toolchain_match: Some(toolchain_match.to_owned()), + differential: run.differential.clone(), }, &run.harness_source, &run.entry_source, @@ -543,6 +548,7 @@ fn build_verdict( detail: Some(format!("repro write failed: {}", repro_result.unwrap_err())), attempts, toolchain_match: Some(toolchain_match.to_owned()), + differential: run.differential, }; } @@ -555,9 +561,82 @@ fn build_verdict( detail: None, attempts, toolchain_match: Some(toolchain_match.to_owned()), + differential: run.differential, + } + } else if run.no_benign_control { + // Phase 07 §4.1: vuln oracle + sink-hit fired but the + // paired benign control was missing. Downgrade to + // `Inconclusive(NoBenignControl)` rather than stamping + // `Confirmed` from a one-sided observation. + VerifyResult { + finding_id: finding_id.to_owned(), + status: VerifyStatus::Inconclusive, + triggered_payload: None, + reason: None, + inconclusive_reason: Some(InconclusiveReason::NoBenignControl), + detail: Some( + "vulnerable oracle fired but no paired benign control payload for differential confirmation".to_owned(), + ), + attempts, + toolchain_match: Some(toolchain_match.to_owned()), + differential: None, + } + } else if let Some(d) = run.differential.as_ref() { + // Differential ran but didn't produce `Confirmed`. Map + // the rule's verdict onto the corresponding inconclusive + // reason or fall through to `NotConfirmed`. + match d.verdict { + crate::evidence::DifferentialVerdict::OracleCollisionSuspected => { + VerifyResult { + finding_id: finding_id.to_owned(), + status: VerifyStatus::Inconclusive, + triggered_payload: None, + reason: None, + inconclusive_reason: Some( + InconclusiveReason::OracleCollisionSuspected, + ), + detail: Some( + "differential rule: both vulnerable and benign payloads fired the oracle".to_owned(), + ), + attempts, + toolchain_match: Some(toolchain_match.to_owned()), + differential: run.differential, + } + } + crate::evidence::DifferentialVerdict::ReversedDifferential => { + VerifyResult { + finding_id: finding_id.to_owned(), + status: VerifyStatus::Inconclusive, + triggered_payload: None, + reason: None, + inconclusive_reason: Some( + InconclusiveReason::ReversedDifferential, + ), + detail: Some( + "differential rule: only the benign control fired the oracle".to_owned(), + ), + attempts, + toolchain_match: Some(toolchain_match.to_owned()), + differential: run.differential, + } + } + crate::evidence::DifferentialVerdict::Confirmed + | crate::evidence::DifferentialVerdict::NotConfirmed => VerifyResult { + finding_id: finding_id.to_owned(), + status: VerifyStatus::NotConfirmed, + triggered_payload: None, + reason: None, + inconclusive_reason: None, + detail: None, + attempts, + toolchain_match: Some(toolchain_match.to_owned()), + differential: run.differential, + }, } } else if run.oracle_collision { - // Oracle fired but probe didn't — likely collision. + // Oracle fired but the sink-hit sentinel did not — + // legacy single-payload collision path, predates the + // differential rule. VerifyResult { finding_id: finding_id.to_owned(), status: VerifyStatus::Inconclusive, @@ -567,6 +646,7 @@ fn build_verdict( detail: Some("oracle fired but sink-reachability probe did not".to_owned()), attempts, toolchain_match: Some(toolchain_match.to_owned()), + differential: None, } } else { VerifyResult { @@ -578,6 +658,7 @@ fn build_verdict( detail: None, attempts, toolchain_match: Some(toolchain_match.to_owned()), + differential: None, } } } @@ -590,6 +671,7 @@ fn build_verdict( detail: None, attempts: vec![], toolchain_match: None, + differential: None, }, Err(RunError::Harness(e)) => { // Defence-in-depth residual for `EntryKindUnsupported` from the @@ -631,6 +713,7 @@ fn build_verdict( detail, attempts: vec![], toolchain_match: None, + differential: None, } } Err(RunError::BuildFailed { stderr, attempts: build_att }) => VerifyResult { @@ -642,6 +725,7 @@ fn build_verdict( detail: Some(format!("build failed after {build_att} attempts: {stderr}")), attempts: vec![], toolchain_match: None, + differential: None, }, Err(RunError::Sandbox(e)) => VerifyResult { finding_id: finding_id.to_owned(), @@ -652,6 +736,7 @@ fn build_verdict( detail: Some(format!("sandbox failed: {e:?}")), attempts: vec![], toolchain_match: None, + differential: None, }, } } @@ -730,6 +815,7 @@ mod tests { detail: None, attempts: vec![], toolchain_match: Some("exact".to_owned()), + differential: None, }; // Insert. @@ -778,6 +864,7 @@ mod tests { detail: None, attempts: vec![], toolchain_match: Some("exact".to_owned()), + differential: None, }; insert_verdict_cache(&db_path, "spec_aaa", "hash_xyz", "", "python-3.11", &result); @@ -812,6 +899,7 @@ mod tests { detail: None, attempts: vec![], toolchain_match: None, + differential: None, }; insert_verdict_cache(db_path, "spec", "hash", "", "python-3", &result); assert!(!db_path.exists(), "insert must not create a new DB"); @@ -865,6 +953,7 @@ mod tests { detail: None, attempts: vec![], toolchain_match: Some("exact".to_owned()), + differential: None, }; // Insert directly with the old corpus_version bypassing the helper. diff --git a/src/evidence.rs b/src/evidence.rs index e2887658..85c0c130 100644 --- a/src/evidence.rs +++ b/src/evidence.rs @@ -298,6 +298,17 @@ pub enum InconclusiveReason { supported: Vec, hint: String, }, + /// The capability's corpus lacks a paired benign control payload, so + /// the differential-confirmation rule (§4.1) cannot be evaluated. + /// Downgrades the verdict from a would-be `Confirmed` because the + /// vulnerable-only firing might still be caused by a coincidental + /// oracle match (a benign control would rule that out). + NoBenignControl, + /// The differential rule observed `!vuln_probe_fires && benign_probe_fires`: + /// the benign control triggered the oracle but the vulnerable payload + /// did not. Surfaces a misconfigured corpus, a swapped pair, or an + /// oracle that fires unconditionally; never a valid `Confirmed`. + ReversedDifferential, } /// High-level outcome of a dynamic verification attempt. @@ -331,6 +342,76 @@ pub struct AttemptSummary { pub sink_hit: bool, } +/// Outcome of the Phase 07 differential confirmation rule. +/// +/// Reflects which side of the (vulnerable, benign-control) probe pair +/// fired the oracle. Stored on [`VerifyResult::differential`] so +/// operators can see the actual rule input that produced the verdict. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "PascalCase")] +pub enum DifferentialVerdict { + /// Vulnerable payload fired the oracle and the benign control did not. + Confirmed, + /// Both vulnerable and benign payloads fired the oracle — the oracle + /// cannot discriminate; downgrade to + /// [`InconclusiveReason::OracleCollisionSuspected`]. + OracleCollisionSuspected, + /// Neither payload fired. + NotConfirmed, + /// Only the benign payload fired (vulnerable did not). Surfaces a + /// misconfigured corpus or a swapped pair; downgrade to + /// [`InconclusiveReason::ReversedDifferential`]. + ReversedDifferential, +} + +/// Probe-arg snapshot stored on [`DifferentialOutcome`]. +/// +/// Mirrors `crate::dynamic::probe::ProbeArg` without depending on the +/// `dynamic` feature. The conversion is centralised in +/// `crate::dynamic::differential::build_outcome`. +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] +#[serde(tag = "kind", content = "value")] +pub enum DifferentialProbeArg { + String(String), + Bytes(Vec), + Int(i64), +} + +/// One probe observation captured during a differential payload run. +/// +/// Mirrors `crate::dynamic::probe::SinkProbe` without depending on the +/// `dynamic` feature. Embedded inside +/// [`DifferentialOutcome::vuln_probes`] / +/// [`DifferentialOutcome::benign_probes`] for forensic review. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct DifferentialProbeRecord { + pub sink_callee: String, + pub args: Vec, + pub captured_at_ns: u64, + pub payload_id: String, +} + +/// Full record of a Phase 07 differential confirmation run. +/// +/// Captures the rule's verdict plus the raw probe traces from both the +/// vulnerable payload run and the benign-control run. Stored on +/// [`VerifyResult::differential`]. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct DifferentialOutcome { + pub verdict: DifferentialVerdict, + /// Label of the vulnerable payload (matches + /// [`AttemptSummary::payload_label`] for the same run). + pub vuln_label: String, + /// Label of the benign-control payload. + pub benign_label: String, + /// Probe records drained from the vulnerable run. + #[serde(default, skip_serializing_if = "Vec::is_empty")] + pub vuln_probes: Vec, + /// Probe records drained from the benign run. + #[serde(default, skip_serializing_if = "Vec::is_empty")] + pub benign_probes: Vec, +} + /// Result of a dynamic verification attempt for one finding. /// /// Always present when `config.scanner.verify` is true and the `dynamic` @@ -362,6 +443,14 @@ pub struct VerifyResult { /// `"exact"` = precise match; `"drift"` = closest approximation used. #[serde(default, skip_serializing_if = "Option::is_none")] pub toolchain_match: Option, + /// Phase 07 differential-confirmation trace. Present whenever the + /// verifier ran both a vulnerable payload and its paired benign + /// control (status `Confirmed` and the `OracleCollisionSuspected` / + /// `ReversedDifferential` Inconclusive paths). `None` for verdicts + /// that never reached the differential step (e.g. `NoPayloadsForCap`, + /// `BuildFailed`, `NoBenignControl`, `NotConfirmed` with vuln-only). + #[serde(default, skip_serializing_if = "Option::is_none")] + pub differential: Option, } // ───────────────────────────────────────────────────────────────────────────── diff --git a/src/fmt.rs b/src/fmt.rs index 60393f50..f525c41b 100644 --- a/src/fmt.rs +++ b/src/fmt.rs @@ -538,6 +538,8 @@ fn format_inconclusive_reason(r: &crate::evidence::InconclusiveReason) -> String "entry kind {attempted} unsupported for {lang:?} (supported: {supported:?})" ) } + InconclusiveReason::NoBenignControl => "no benign control payload".to_string(), + InconclusiveReason::ReversedDifferential => "reversed differential".to_string(), } } diff --git a/src/rank.rs b/src/rank.rs index d3ae9c65..37ddccb6 100644 --- a/src/rank.rs +++ b/src/rank.rs @@ -1158,6 +1158,7 @@ mod tests { sink_hit: true, }], toolchain_match: Some("exact".into()), + differential: None, } } @@ -1177,6 +1178,7 @@ mod tests { sink_hit: false, }], toolchain_match: Some("exact".into()), + differential: None, } } @@ -1190,6 +1192,7 @@ mod tests { detail: None, attempts: vec![], toolchain_match: None, + differential: None, } } @@ -1203,6 +1206,7 @@ mod tests { detail: None, attempts: vec![], toolchain_match: None, + differential: None, } } @@ -1216,6 +1220,7 @@ mod tests { detail: None, attempts: vec![], toolchain_match: None, + differential: None, } } diff --git a/tests/console_snapshot.rs b/tests/console_snapshot.rs index d67a6f94..d9c01723 100644 --- a/tests/console_snapshot.rs +++ b/tests/console_snapshot.rs @@ -71,6 +71,7 @@ fn diag_with_verdict(status: VerifyStatus) -> Diag { sink_hit: true, }], toolchain_match: Some("exact".into()), + differential: None, }, VerifyStatus::NotConfirmed => VerifyResult { finding_id: "abc123".into(), @@ -87,6 +88,7 @@ fn diag_with_verdict(status: VerifyStatus) -> Diag { sink_hit: false, }], toolchain_match: Some("exact".into()), + differential: None, }, VerifyStatus::Unsupported => VerifyResult { finding_id: "abc123".into(), @@ -97,6 +99,7 @@ fn diag_with_verdict(status: VerifyStatus) -> Diag { detail: None, attempts: vec![], toolchain_match: None, + differential: None, }, VerifyStatus::Inconclusive => VerifyResult { finding_id: "abc123".into(), @@ -107,6 +110,7 @@ fn diag_with_verdict(status: VerifyStatus) -> Diag { detail: Some("build failed after 3 attempts: linker error".into()), attempts: vec![], toolchain_match: None, + differential: None, }, }; diff --git a/tests/fix_validation_e2e.rs b/tests/fix_validation_e2e.rs index 54e95bb5..0b38442b 100644 --- a/tests/fix_validation_e2e.rs +++ b/tests/fix_validation_e2e.rs @@ -52,6 +52,7 @@ fn set_verdict( detail: None, attempts: vec![], toolchain_match: None, + differential: None, }); } } @@ -164,6 +165,7 @@ fn new_confirmed_fails_no_new_confirmed_gate() { detail: None, attempts: vec![], toolchain_match: None, + differential: None, }); } } diff --git a/tests/go_fixtures.rs b/tests/go_fixtures.rs index e3274ad1..6fb87d6e 100644 --- a/tests/go_fixtures.rs +++ b/tests/go_fixtures.rs @@ -56,6 +56,7 @@ mod go_fixture_tests { detail: None, attempts: vec![], toolchain_match: None, + differential: None, }; } diff --git a/tests/java_fixtures.rs b/tests/java_fixtures.rs index 5e4426fb..d09cca93 100644 --- a/tests/java_fixtures.rs +++ b/tests/java_fixtures.rs @@ -56,6 +56,7 @@ mod java_fixture_tests { detail: None, attempts: vec![], toolchain_match: None, + differential: None, }; } diff --git a/tests/js_fixtures.rs b/tests/js_fixtures.rs index a45afcf2..fac4591e 100644 --- a/tests/js_fixtures.rs +++ b/tests/js_fixtures.rs @@ -59,6 +59,7 @@ mod js_fixture_tests { detail: None, attempts: vec![], toolchain_match: None, + differential: None, }; } diff --git a/tests/json_snapshot.rs b/tests/json_snapshot.rs index d289fe87..79043011 100644 --- a/tests/json_snapshot.rs +++ b/tests/json_snapshot.rs @@ -57,6 +57,7 @@ fn json_dynamic_verdict_confirmed_serialises_correctly() { sink_hit: true, }], toolchain_match: Some("exact".into()), + differential: None, }), ..Default::default() }); @@ -94,6 +95,7 @@ fn json_dynamic_verdict_not_confirmed_serialises_correctly() { detail: None, attempts: vec![], toolchain_match: Some("exact".into()), + differential: None, }), ..Default::default() }); @@ -156,6 +158,7 @@ fn json_unsupported_verdict_has_reason() { detail: None, attempts: vec![], toolchain_match: None, + differential: None, }), ..Default::default() }); diff --git a/tests/oracle_differential.rs b/tests/oracle_differential.rs new file mode 100644 index 00000000..9fc01140 --- /dev/null +++ b/tests/oracle_differential.rs @@ -0,0 +1,156 @@ +//! Phase 07 — differential confirmation rule (`differential::evaluate`). +//! +//! These tests pin the pure-function behaviour of the differential rule +//! (§4.1): given the (vulnerable, benign-control) oracle firing booleans +//! produce the right verdict. Each case has a matching paragraph in the +//! plan's acceptance criteria. +//! +//! The harness here does *not* spawn a sandbox — it exercises the rule +//! independently of payload corpus, sandbox availability, or per-language +//! toolchains. Integration coverage that runs both payloads end-to-end +//! lives in `tests/{python,rust}_fixtures.rs` and the golden harness from +//! Phase 05. + +#![cfg(feature = "dynamic")] + +use nyx_scanner::dynamic::differential::{build_outcome, evaluate}; +use nyx_scanner::dynamic::probe::{ProbeArg, SinkProbe}; +use nyx_scanner::evidence::DifferentialVerdict; + +// ── Rule table ────────────────────────────────────────────────────────────── +// +// | vuln fires | benign fires | verdict | +// |------------|--------------|-------------------------------| +// | true | true | OracleCollisionSuspected (a) | +// | true | false | Confirmed (b) | +// | false | false | NotConfirmed (c) | +// | false | true | ReversedDifferential (d) | + +#[test] +fn case_a_both_fire_is_oracle_collision() { + assert_eq!( + evaluate(true, true), + DifferentialVerdict::OracleCollisionSuspected, + "both vulnerable and benign firing must downgrade to OracleCollisionSuspected" + ); +} + +#[test] +fn case_b_only_vuln_fires_is_confirmed() { + assert_eq!( + evaluate(true, false), + DifferentialVerdict::Confirmed, + "vuln fires + benign silent is the canonical Confirmed shape" + ); +} + +#[test] +fn case_c_neither_fires_is_not_confirmed() { + assert_eq!( + evaluate(false, false), + DifferentialVerdict::NotConfirmed, + "zero firings is plain NotConfirmed (nothing to triage)" + ); +} + +#[test] +fn case_d_only_benign_fires_is_reversed_differential() { + assert_eq!( + evaluate(false, true), + DifferentialVerdict::ReversedDifferential, + "only-benign-fires surfaces a misconfigured corpus, never a real Confirmed" + ); +} + +// ── build_outcome plumbing ─────────────────────────────────────────────────── +// +// `build_outcome` is what the runner actually calls — it stamps the +// verdict and converts native [`SinkProbe`] records into the serde-stable +// shape stored on `VerifyResult`. These tests pin the conversion. + +fn sample_probe(callee: &str, arg: &str, label: &str) -> SinkProbe { + SinkProbe { + sink_callee: callee.into(), + args: vec![ProbeArg::String(arg.into())], + captured_at_ns: 1, + payload_id: label.into(), + } +} + +#[test] +fn build_outcome_confirmed_carries_both_traces() { + let vuln = vec![sample_probe("os.system", "; echo NYX_PWN_CMDI", "cmdi-echo-marker")]; + let benign = vec![sample_probe("os.system", "benign_safe_cmdi", "cmdi-benign")]; + let outcome = build_outcome( + "cmdi-echo-marker", + true, + &vuln, + "cmdi-benign", + false, + &benign, + ); + assert_eq!(outcome.verdict, DifferentialVerdict::Confirmed); + assert_eq!(outcome.vuln_label, "cmdi-echo-marker"); + assert_eq!(outcome.benign_label, "cmdi-benign"); + assert_eq!(outcome.vuln_probes.len(), 1); + assert_eq!(outcome.benign_probes.len(), 1); + assert_eq!(outcome.vuln_probes[0].sink_callee, "os.system"); + assert_eq!(outcome.vuln_probes[0].payload_id, "cmdi-echo-marker"); + assert_eq!(outcome.benign_probes[0].payload_id, "cmdi-benign"); +} + +#[test] +fn build_outcome_oracle_collision_keeps_both_traces() { + let vuln = vec![sample_probe("os.system", "a", "v")]; + let benign = vec![sample_probe("os.system", "b", "b")]; + let outcome = build_outcome("v", true, &vuln, "b", true, &benign); + assert_eq!(outcome.verdict, DifferentialVerdict::OracleCollisionSuspected); + assert_eq!(outcome.vuln_probes.len(), 1); + assert_eq!(outcome.benign_probes.len(), 1); +} + +#[test] +fn build_outcome_not_confirmed_records_empty_traces() { + let outcome = build_outcome("v", false, &[], "b", false, &[]); + assert_eq!(outcome.verdict, DifferentialVerdict::NotConfirmed); + assert!(outcome.vuln_probes.is_empty()); + assert!(outcome.benign_probes.is_empty()); +} + +#[test] +fn build_outcome_reversed_records_benign_only_trace() { + let benign = vec![sample_probe("os.system", "x", "b")]; + let outcome = build_outcome("v", false, &[], "b", true, &benign); + assert_eq!(outcome.verdict, DifferentialVerdict::ReversedDifferential); + assert!(outcome.vuln_probes.is_empty()); + assert_eq!(outcome.benign_probes.len(), 1); +} + +// ── Serde stability ────────────────────────────────────────────────────────── +// +// `VerifyResult.differential` is part of the public verdict JSON shape +// (consumed by SARIF emitters, the React frontend, and the verdict cache). +// Pin the wire format. + +#[test] +fn differential_outcome_serialises_as_pascal_case_verdict() { + let outcome = build_outcome("v", true, &[], "b", false, &[]); + let json = serde_json::to_value(&outcome).expect("serialise"); + assert_eq!(json["verdict"], "Confirmed"); + assert_eq!(json["vuln_label"], "v"); + assert_eq!(json["benign_label"], "b"); +} + +#[test] +fn differential_verdict_round_trips_through_json() { + for v in [ + DifferentialVerdict::Confirmed, + DifferentialVerdict::OracleCollisionSuspected, + DifferentialVerdict::NotConfirmed, + DifferentialVerdict::ReversedDifferential, + ] { + let json = serde_json::to_string(&v).unwrap(); + let back: DifferentialVerdict = serde_json::from_str(&json).unwrap(); + assert_eq!(v, back); + } +} diff --git a/tests/php_fixtures.rs b/tests/php_fixtures.rs index 968a19b0..7276ce3c 100644 --- a/tests/php_fixtures.rs +++ b/tests/php_fixtures.rs @@ -56,6 +56,7 @@ mod php_fixture_tests { detail: None, attempts: vec![], toolchain_match: None, + differential: None, }; } diff --git a/tests/repro_determinism.rs b/tests/repro_determinism.rs index bd16d699..f7f3eec1 100644 --- a/tests/repro_determinism.rs +++ b/tests/repro_determinism.rs @@ -65,6 +65,7 @@ mod repro_determinism_tests { sink_hit: true, }], toolchain_match: Some("exact".into()), + differential: None, } } diff --git a/tests/sarif_dynamic_verdict_tests.rs b/tests/sarif_dynamic_verdict_tests.rs index a5649d02..d67914ba 100644 --- a/tests/sarif_dynamic_verdict_tests.rs +++ b/tests/sarif_dynamic_verdict_tests.rs @@ -73,6 +73,7 @@ fn sarif_confirmed_verdict_sets_partial_fingerprint() { sink_hit: true, }], toolchain_match: Some("exact".into()), + differential: None, }; let result = sarif_result(diag_with_verdict(verdict)); @@ -105,6 +106,7 @@ fn sarif_not_confirmed_verdict_sets_partial_fingerprint() { detail: None, attempts: vec![], toolchain_match: Some("exact".into()), + differential: None, }; let result = sarif_result(diag_with_verdict(verdict)); @@ -131,6 +133,7 @@ fn sarif_unsupported_verdict_sets_partial_fingerprint() { detail: None, attempts: vec![], toolchain_match: None, + differential: None, }; let result = sarif_result(diag_with_verdict(verdict)); @@ -162,6 +165,7 @@ fn sarif_inconclusive_verdict_sets_partial_fingerprint() { detail: Some("build failed after 3 attempts".into()), attempts: vec![], toolchain_match: None, + differential: None, }; let result = sarif_result(diag_with_verdict(verdict)); @@ -209,6 +213,7 @@ fn sarif_confirmed_verdict_nyx_dynamic_verdict_contains_triggered_payload() { detail: None, attempts: vec![], toolchain_match: Some("exact".into()), + differential: None, }; let result = sarif_result(diag_with_verdict(verdict)); @@ -239,6 +244,7 @@ fn sarif_all_four_statuses_produce_partial_fingerprint() { detail: None, attempts: vec![], toolchain_match: None, + differential: None, }; let result = sarif_result(diag_with_verdict(verdict));