diff --git a/Cargo.toml b/Cargo.toml index 61fcdb43..b6a8105d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -6,9 +6,9 @@ rust-version = "1.88" description = "A multi-language static analysis tool for detecting security vulnerabilities" license = "GPL-3.0-or-later" authors = ["Eli Peter "] -homepage = "https://github.com/elicpeter/nyx" +homepage = "https://nyxsec.dev/scanner" repository = "https://github.com/elicpeter/nyx" -documentation = "https://elicpeter.github.io/nyx/" +documentation = "https://nyxsec.dev/docs/nyx/" keywords = ["security", "vulnerability", "scanner", "static-analysis", "cli"] categories = ["security", "command-line-utilities", "development-tools", "parser-implementations", "text-processing"] readme = "README.md" diff --git a/frontend/src/api/types.ts b/frontend/src/api/types.ts index e53e8abc..71659e50 100644 --- a/frontend/src/api/types.ts +++ b/frontend/src/api/types.ts @@ -3,7 +3,12 @@ export type Confidence = 'Low' | 'Medium' | 'High'; export type FlowStepKind = 'source' | 'assignment' | 'call' | 'phi' | 'sink'; // Dynamic verification types (from src/evidence.rs VerifyStatus / VerifyResult) -export type VerifyStatus = 'Confirmed' | 'NotConfirmed' | 'Inconclusive' | 'Unsupported'; +export type VerifyStatus = + | 'Confirmed' + | 'PartiallyConfirmed' + | 'NotConfirmed' + | 'Inconclusive' + | 'Unsupported'; export interface AttemptSummary { payload_label: string; @@ -29,6 +34,7 @@ export interface VerifyResult { export interface DynamicVerificationSummary { total: number; confirmed: number; + partially_confirmed: number; not_confirmed: number; inconclusive: number; unsupported: number; diff --git a/frontend/src/components/VerdictBadge.tsx b/frontend/src/components/VerdictBadge.tsx index f6505f38..804bc18b 100644 --- a/frontend/src/components/VerdictBadge.tsx +++ b/frontend/src/components/VerdictBadge.tsx @@ -2,6 +2,7 @@ import type { VerifyResult, VerifyStatus } from '../api/types'; const STATUS_LABELS: Record = { Confirmed: 'Confirmed', + PartiallyConfirmed: 'Partially confirmed', NotConfirmed: 'Not confirmed', Inconclusive: 'Inconclusive', Unsupported: 'Unsupported', @@ -15,6 +16,10 @@ function verdictTooltip(verdict: VerifyResult): string { return triggered_payload ? `Confirmed via payload: ${triggered_payload}` : 'Dynamically confirmed exploitable'; + case 'PartiallyConfirmed': + return detail + ? `Partially confirmed (sink reached): ${detail}` + : 'Partially confirmed: sink reached but exploit chain did not complete'; case 'NotConfirmed': return (verdict.attempts?.length ?? 0) > 0 ? `Not confirmed after ${verdict.attempts?.length ?? 0} payload attempt(s)` diff --git a/frontend/src/components/overview/OverviewWidgets.tsx b/frontend/src/components/overview/OverviewWidgets.tsx index 4284cbe9..d962195a 100644 --- a/frontend/src/components/overview/OverviewWidgets.tsx +++ b/frontend/src/components/overview/OverviewWidgets.tsx @@ -244,13 +244,14 @@ export function ScannerQualityPanel({ const dynamic = quality.dynamic_verification ?? { total: 0, confirmed: 0, + partially_confirmed: 0, not_confirmed: 0, inconclusive: 0, unsupported: 0, }; const dynamicDetail = dynamic.total > 0 - ? `${dynamic.total.toLocaleString()} verdicts · ${dynamic.not_confirmed.toLocaleString()} not confirmed · ${dynamic.inconclusive.toLocaleString()} inconclusive · ${dynamic.unsupported.toLocaleString()} unsupported` + ? `${dynamic.total.toLocaleString()} verdicts · ${dynamic.partially_confirmed.toLocaleString()} partially confirmed · ${dynamic.not_confirmed.toLocaleString()} not confirmed · ${dynamic.inconclusive.toLocaleString()} inconclusive · ${dynamic.unsupported.toLocaleString()} unsupported` : 'no dynamic verdicts in latest scan'; const rows: Array<{ diff --git a/frontend/src/pages/FindingsPage.tsx b/frontend/src/pages/FindingsPage.tsx index 3e8cef1d..40dd9c61 100644 --- a/frontend/src/pages/FindingsPage.tsx +++ b/frontend/src/pages/FindingsPage.tsx @@ -31,6 +31,7 @@ function formatTriageState(state: string): string { function formatVerificationStatus(status: string): string { if (status === 'NotConfirmed') return 'Not confirmed'; + if (status === 'PartiallyConfirmed') return 'Partially confirmed'; return status || 'Unverified'; } diff --git a/frontend/src/styles/global.css b/frontend/src/styles/global.css index 41ab69fa..ace0dbef 100644 --- a/frontend/src/styles/global.css +++ b/frontend/src/styles/global.css @@ -2668,6 +2668,10 @@ tr.selected td { background: var(--success-bg); color: var(--success); } +.badge-dyn-partiallyconfirmed { + background: var(--conf-medium-bg); + color: var(--conf-medium); +} .badge-dyn-notconfirmed { background: var(--bg-secondary); color: var(--text-secondary); diff --git a/frontend/src/test/components/dynamicVerdictSection.test.tsx b/frontend/src/test/components/dynamicVerdictSection.test.tsx index 26ed1f3e..ddc43418 100644 --- a/frontend/src/test/components/dynamicVerdictSection.test.tsx +++ b/frontend/src/test/components/dynamicVerdictSection.test.tsx @@ -43,6 +43,19 @@ describe('DynamicVerdictSection', () => { ).toBeInTheDocument(); }); + it('renders PartiallyConfirmed badge', () => { + render( + , + ); + expect( + screen.getByTestId('verdict-badge-partiallyconfirmed'), + ).toBeInTheDocument(); + }); + it('does not crash when the API omits an empty attempts array', () => { render( { unmount(); for (const status of [ + 'PartiallyConfirmed', 'NotConfirmed', 'Unsupported', 'Inconclusive', diff --git a/frontend/src/test/components/verdictBadge.test.tsx b/frontend/src/test/components/verdictBadge.test.tsx index 9a55c338..30f504d6 100644 --- a/frontend/src/test/components/verdictBadge.test.tsx +++ b/frontend/src/test/components/verdictBadge.test.tsx @@ -35,6 +35,21 @@ describe('VerdictBadge', () => { expect(badge.textContent).toContain('🔥'); }); + it('renders PartiallyConfirmed badge with amber class and no flame', () => { + render( + , + ); + const badge = screen.getByTestId('verdict-badge-partiallyconfirmed'); + expect(badge).toBeInTheDocument(); + expect(badge.className).toContain('badge-dyn-partiallyconfirmed'); + expect(badge.textContent).not.toContain('🔥'); + expect(badge.getAttribute('title')).toContain('sink reached'); + }); + it('renders NotConfirmed badge with correct class', () => { render(); const badge = screen.getByTestId('verdict-badge-notconfirmed'); @@ -107,9 +122,10 @@ describe('VerdictBadge', () => { expect(badge.textContent?.replace('🔥 ', '')).toBe('C'); }); - it('renders all four VerifyStatus variants without crashing', () => { + it('renders all five VerifyStatus variants without crashing', () => { const statuses: VerifyResult['status'][] = [ 'Confirmed', + 'PartiallyConfirmed', 'NotConfirmed', 'Unsupported', 'Inconclusive', diff --git a/src/baseline.rs b/src/baseline.rs index 1bf8ceef..d987e4f8 100644 --- a/src/baseline.rs +++ b/src/baseline.rs @@ -308,6 +308,10 @@ pub fn check_gate(diff: &VerdictDiff, gate: &str) -> bool { && matches!( e.current_status, Some(VerifyStatus::Confirmed) + // PartiallyConfirmed = sink still reachable at + // runtime, so a baseline-Confirmed finding that is + // now partial has NOT been resolved. + | Some(VerifyStatus::PartiallyConfirmed) | Some(VerifyStatus::Inconclusive) | Some(VerifyStatus::Unsupported) ) @@ -323,6 +327,7 @@ pub fn check_gate(diff: &VerdictDiff, gate: &str) -> bool { fn status_str(s: Option) -> &'static str { match s { Some(VerifyStatus::Confirmed) => "Confirmed", + Some(VerifyStatus::PartiallyConfirmed) => "PartiallyConfirmed", Some(VerifyStatus::NotConfirmed) => "NotConfirmed", Some(VerifyStatus::Inconclusive) => "Inconclusive", Some(VerifyStatus::Unsupported) => "Unsupported", diff --git a/src/chain/feasibility.rs b/src/chain/feasibility.rs index 8c1599cd..895ef44e 100644 --- a/src/chain/feasibility.rs +++ b/src/chain/feasibility.rs @@ -37,8 +37,11 @@ pub enum Feasibility { /// but where the static evidence is strong. InconclusiveHighConf, /// Everything else — no dynamic verification, dynamic verdict was - /// `NotConfirmed`/`Unsupported`, or dynamic was `Inconclusive` but - /// static confidence is not `High`. + /// `NotConfirmed`/`PartiallyConfirmed`/`Unsupported`, or dynamic was + /// `Inconclusive` but static confidence is not `High`. A + /// `PartiallyConfirmed` verdict proves only that the sink is reachable, + /// not that the exploit chain completes, so it stays conservative here: + /// it must not inflate a multi-hop path score. Unverified, } diff --git a/src/commands/scan.rs b/src/commands/scan.rs index 4b09fdef..13699ffe 100644 --- a/src/commands/scan.rs +++ b/src/commands/scan.rs @@ -242,6 +242,7 @@ pub fn compute_stable_hash(diag: &Diag) -> u64 { pub struct DynamicVerificationSummary { pub total: usize, pub confirmed: usize, + pub partially_confirmed: usize, pub not_confirmed: usize, pub inconclusive: usize, pub unsupported: usize, @@ -261,6 +262,9 @@ impl DynamicVerificationSummary { summary.total += 1; match verdict.status { crate::evidence::VerifyStatus::Confirmed => summary.confirmed += 1, + crate::evidence::VerifyStatus::PartiallyConfirmed => { + summary.partially_confirmed += 1 + } crate::evidence::VerifyStatus::NotConfirmed => summary.not_confirmed += 1, crate::evidence::VerifyStatus::Inconclusive => summary.inconclusive += 1, crate::evidence::VerifyStatus::Unsupported => summary.unsupported += 1, @@ -282,10 +286,11 @@ pub fn format_dynamic_verification_summary(summary: &DynamicVerificationSummary) "verdicts" }; format!( - "{} {} ({} confirmed, {} not confirmed, {} inconclusive, {} unsupported)", + "{} {} ({} confirmed, {} partially confirmed, {} not confirmed, {} inconclusive, {} unsupported)", summary.total, noun, summary.confirmed, + summary.partially_confirmed, summary.not_confirmed, summary.inconclusive, summary.unsupported diff --git a/src/dynamic/differential.rs b/src/dynamic/differential.rs index dd5eb430..5a6d5ecb 100644 --- a/src/dynamic/differential.rs +++ b/src/dynamic/differential.rs @@ -1,19 +1,23 @@ -//! Differential confirmation rule for dynamic verification (Phase 07). +//! Differential confirmation rule for dynamic verification (Phase 07 / 26). //! -//! `Confirmed` requires the vulnerable payload's oracle to fire **and** -//! the paired benign control's oracle to *not* fire (§4.1). This module -//! is the single source of truth for that rule. Everything else (runner, -//! verifier, tests) collapses to "look up paired benign + call -//! [`evaluate`]". +//! `Confirmed` requires **at least one** vulnerable payload's oracle to +//! fire **and every** paired benign control's oracle to *not* fire +//! (§4.1, extended for multi-payload aggregation in Phase 26). This +//! module is the single source of truth for that rule. Everything else +//! (runner, verifier, tests) collapses to "collect firing sets + call +//! [`evaluate_sets`]". //! -//! # Rule table +//! # Rule table (set aggregation) //! -//! | vuln fires | benign fires | verdict | -//! |------------|--------------|-------------------------------| -//! | true | false | `Confirmed` | -//! | true | true | `OracleCollisionSuspected` | -//! | false | false | `NotConfirmed` | -//! | false | true | `ReversedDifferential` | +//! | any vuln fires | any benign fires | verdict | +//! |----------------|------------------|----------------------------| +//! | true | false | `Confirmed` | +//! | true | true | `OracleCollisionSuspected` | +//! | false | false | `NotConfirmed` | +//! | false | true | `ReversedDifferential` | +//! +//! The scalar [`evaluate`] is the single-payload, single-control +//! specialisation of [`evaluate_sets`] and delegates to it. //! //! "Fires" means [`crate::dynamic::oracle::oracle_fired`] returned `true` //! against the run's [`SandboxOutcome`] + drained [`SinkProbe`] set — @@ -24,8 +28,33 @@ use crate::evidence::{ DifferentialOutcome, DifferentialProbeArg, DifferentialProbeRecord, DifferentialVerdict, }; -/// Apply the differential confirmation rule. +/// Apply the differential confirmation rule over **sets** of firing +/// results (Phase 26 multi-payload aggregation). /// +/// `vuln_fired` is one boolean per vulnerable payload attempt; +/// `benign_fired` is one boolean per paired benign control that actually +/// ran. Aggregation is "any vuln vs any benign" with global ambient-noise +/// scoring across the run: a *single* benign control firing anywhere +/// vetoes `Confirmed` (the oracle cannot discriminate), and a *single* +/// vulnerable payload firing is enough positive evidence. +/// +/// Empty slices behave as "nothing fired" on that side, so +/// `evaluate_sets(&[], &[])` is `NotConfirmed`. +pub fn evaluate_sets(vuln_fired: &[bool], benign_fired: &[bool]) -> DifferentialVerdict { + let any_vuln = vuln_fired.iter().any(|&b| b); + let any_benign = benign_fired.iter().any(|&b| b); + match (any_vuln, any_benign) { + (true, false) => DifferentialVerdict::Confirmed, + (true, true) => DifferentialVerdict::OracleCollisionSuspected, + (false, false) => DifferentialVerdict::NotConfirmed, + (false, true) => DifferentialVerdict::ReversedDifferential, + } +} + +/// Apply the differential confirmation rule to a single +/// (vulnerable, benign-control) pair. +/// +/// Single-element specialisation of [`evaluate_sets`]. /// `vuln_probe_fires` and `benign_probe_fires` are the boolean firing /// results of [`crate::dynamic::oracle::oracle_fired`] for the /// vulnerable payload and its paired benign control respectively. The @@ -33,12 +62,7 @@ use crate::evidence::{ /// callers attach those separately via [`DifferentialOutcome`] for /// forensic display. pub fn evaluate(vuln_probe_fires: bool, benign_probe_fires: bool) -> DifferentialVerdict { - match (vuln_probe_fires, benign_probe_fires) { - (true, false) => DifferentialVerdict::Confirmed, - (true, true) => DifferentialVerdict::OracleCollisionSuspected, - (false, false) => DifferentialVerdict::NotConfirmed, - (false, true) => DifferentialVerdict::ReversedDifferential, - } + evaluate_sets(&[vuln_probe_fires], &[benign_probe_fires]) } /// Build a [`DifferentialOutcome`] for inclusion in a @@ -139,6 +163,61 @@ mod tests { ); } + #[test] + fn sets_any_vuln_no_benign_is_confirmed() { + // One of several vuln payloads firing is enough; no benign fired. + assert_eq!( + evaluate_sets(&[false, true, false], &[false, false]), + DifferentialVerdict::Confirmed + ); + } + + #[test] + fn sets_one_benign_firing_vetoes_confirmed() { + // A single benign control firing anywhere downgrades to collision, + // even when a vuln payload also fired (global ambient-noise veto). + assert_eq!( + evaluate_sets(&[true, true], &[false, true, false]), + DifferentialVerdict::OracleCollisionSuspected + ); + } + + #[test] + fn sets_no_vuln_no_benign_is_not_confirmed() { + assert_eq!( + evaluate_sets(&[false, false], &[false]), + DifferentialVerdict::NotConfirmed + ); + } + + #[test] + fn sets_no_vuln_some_benign_is_reversed() { + assert_eq!( + evaluate_sets(&[false], &[true]), + DifferentialVerdict::ReversedDifferential + ); + } + + #[test] + fn sets_empty_is_not_confirmed() { + assert_eq!(evaluate_sets(&[], &[]), DifferentialVerdict::NotConfirmed); + } + + #[test] + fn sets_empty_benign_with_vuln_is_confirmed() { + // No benign control ran at all → no veto possible → Confirmed. + assert_eq!(evaluate_sets(&[true], &[]), DifferentialVerdict::Confirmed); + } + + #[test] + fn scalar_evaluate_matches_singleton_sets() { + for &v in &[false, true] { + for &b in &[false, true] { + assert_eq!(evaluate(v, b), evaluate_sets(&[v], &[b])); + } + } + } + #[test] fn oob_self_confirmed_outcome_carries_only_vuln_trace() { use crate::dynamic::probe::{ProbeArg, ProbeKind, ProbeWitness, SinkProbe}; diff --git a/src/dynamic/runner.rs b/src/dynamic/runner.rs index 6265bd6d..b646b28b 100644 --- a/src/dynamic/runner.rs +++ b/src/dynamic/runner.rs @@ -59,10 +59,29 @@ const MAX_BUILD_ATTEMPTS: u32 = 2; pub struct RunOutcome { pub spec: HarnessSpec, pub attempts: Vec, - /// First attempt that fired the sink with `oracle_fired && sink_hit`. + /// Index into [`Self::attempts`] of the attempt the confirm verdict is + /// attributed to. Set by the Phase 26 set aggregation when + /// [`crate::dynamic::differential::evaluate_sets`] returns a + /// Confirmed-class verdict (any vuln payload fired the oracle + sink + /// while every paired benign control stayed clean), or when an + /// OOB-nonce payload self-confirmed. `None` otherwise. pub triggered_by: Option, /// Whether the oracle fired but the sink probe did not (oracle collision). pub oracle_collision: bool, + /// Phase 26: a vuln payload's in-harness sink-reachability probe fired + /// (`outcome.sink_hit`) but its oracle marker was never observed (no file + /// write / no OOB callback / output lacked the proof token), *and* the + /// paired benign control neither reached the sink nor fired its oracle. + /// The benign-control differential is the discriminator: it proves the + /// vuln input specifically drives the sink, ruling out safe code that + /// merely reaches the sink (e.g. array-form `exec` with inert + /// metacharacters, which the benign control also reaches). The verifier + /// maps this to [`crate::evidence::VerifyStatus::PartiallyConfirmed`]: the + /// sink is reachable under the vuln input but the exploit chain did not + /// complete. Never set when a Confirmed-class verdict or a colliding + /// differential was produced (those take precedence at the verify + /// boundary). + pub sink_reached_no_oracle: bool, /// Number of build attempts consumed. pub build_attempts: u32, /// Harness sources for repro artifacts. @@ -454,6 +473,24 @@ pub fn run_spec(spec: &HarnessSpec, opts: &SandboxOptions) -> Result = None; + // Phase 26 set aggregation, phase A: per-vuln-payload run record. + // Every vuln payload runs to completion (no early break) so the + // differential rule can aggregate across the whole set — a single + // benign control firing anywhere must be able to veto a `Confirmed`. + struct VulnRun { + /// Index into `vuln_payloads` (for benign-control resolution). + payload_index: usize, + /// Index into `attempts` (what `triggered_by` points at). + attempt_index: usize, + vuln_fired: bool, + sink_hit: bool, + oob_nonce_slot: bool, + oob_callback_seen: bool, + vuln_probes: Vec, + } + let mut vuln_runs: Vec = Vec::with_capacity(vuln_payloads.len()); + + // ── Phase A: run every vuln payload, record its firing signals ────── for (i, payload) in vuln_payloads.iter().enumerate() { // Materialise payload bytes (OOB nonce-slot payloads generate a URL). let (oob_nonce, effective_bytes) = if payload.oob_nonce_slot { @@ -480,11 +517,12 @@ pub fn run_spec(spec: &HarnessSpec, opts: &SandboxOptions) -> Result Result Result Result Result { - // Phase 05 OOB closure: OOB-nonce payloads with - // `benign_control = None` are structurally self- - // confirming when the listener observed the callback. - // A benign URL cannot hit a per-finding nonce, so the - // OOB observation is independent network-level - // evidence the sink fired. Skip the no-benign-control - // downgrade and emit - // [`DifferentialVerdict::ConfirmedProvenOob`]. - if payload.oob_nonce_slot && outcome.oob_callback_seen { - let mut outcome_record = differential::build_oob_self_confirmed_outcome( - payload.label, - &vuln_probes, - ); - middleware_demotion::apply_demotion( - &mut outcome_record, - spec.framework.as_ref(), - spec.lang, - ); - let confirmed = - middleware_demotion::is_triggering_verdict(outcome_record.verdict); - differential_outcome = Some(outcome_record); - confirmed - } else { - no_benign_control = true; - false - } - } - Some(benign) => { - let benign_bytes = materialise_bytes(benign, None) - .map(|b| b.into_owned()) - .unwrap_or_default(); - if let Some(ch) = &probe_channel { - let _ = ch.clear(); - } - let benign_outcome = sandbox::run(&harness, &benign_bytes, &effective_opts)?; - let benign_probes: Vec = probe_channel - .as_ref() - .map(|ch| ch.drain()) - .unwrap_or_default(); - let benign_stub_events: Vec = effective_opts - .stub_harness - .as_ref() - .map(|h| h.drain_all()) - .unwrap_or_default(); - let benign_fired = oracle_fired_with_stubs( - &benign.oracle, - &benign_outcome, - &benign_probes, - &benign_stub_events, + // Legacy single-payload collision: oracle fired without the + // in-harness sink-hit sentinel. Phase 26 partial-confirmation is + // deliberately NOT decided here: a vuln run that reaches the sink + // without firing its oracle is ambiguous — it could be a real engine + // gap (the vuln input drives the sink but the exploit chain could not + // be observed) or merely safe code that happens to reach the sink + // (e.g. array-form `exec` with inert metacharacters). The call is + // deferred to the differential check in Phase B, which compares the + // benign control's sink reachability. + if vuln_fired && !sink_hit { + oracle_collision = true; + } + + let oob_callback_seen = outcome.oob_callback_seen; + attempts.push(Attempt { + payload_label: payload.label, + outcome, + oracle_fired: vuln_fired, + triggered: false, + }); + vuln_runs.push(VulnRun { + payload_index: i, + attempt_index, + vuln_fired, + sink_hit, + oob_nonce_slot: payload.oob_nonce_slot, + oob_callback_seen, + vuln_probes, + }); + } + + // ── Phase B: differential confirmation + partial-confirmation gate ── + // Two candidate classes drive a paired benign-control run: + // • confirm candidate — vuln oracle fired *and* the in-harness sink-hit + // sentinel was observed. Collected into the set aggregation (§4.1). + // • partial candidate — the sink-hit sentinel fired but the oracle did + // not. The benign control's sink reachability decides whether this is + // a real engine gap (`PartiallyConfirmed`) or safe code that merely + // reaches the sink (`NotConfirmed`). + // Oracle-fires-without-sink stays on the legacy `oracle_collision` path. + let mut vuln_fires: Vec = Vec::new(); + let mut benign_fires: Vec = Vec::new(); + // (attempt_index, differential outcome) per confirm candidate. + let mut candidates: Vec<(usize, DifferentialOutcome)> = Vec::new(); + // Phase 26: set when a partial candidate's vuln run reached the sink that + // its benign control did *not* — a sink-reachability differential proving + // the vuln input specifically drives the sink even though the exploit + // chain could not be observed completing. + let mut partial_signal = false; + + for vr in &vuln_runs { + let is_confirm_candidate = vr.vuln_fired && vr.sink_hit; + let is_partial_candidate = vr.sink_hit && !vr.vuln_fired; + if !is_confirm_candidate && !is_partial_candidate { + continue; + } + // The partial signal is a single bool; once established, skip further + // partial-only probing. Confirm candidates always run — the set + // aggregation needs every one. + if is_partial_candidate && !is_confirm_candidate && partial_signal { + continue; + } + let payload = vuln_payloads[vr.payload_index]; + // Match the resolution scope to the payload-slice scope so a benign + // control declared in another language is still found when this run + // was driven off the lang-agnostic union (see `used_lang_slice`). + // When the run did use the per-language slice, the lang-aware + // resolver keeps a mismatched language from producing a Confirmed. + let resolved = if used_lang_slice { + resolve_benign_control_lang(payload, spec.expected_cap, spec.lang) + } else { + resolve_benign_control(payload, spec.expected_cap) + }; + match resolved { + None => { + // Phase 05 OOB closure: OOB-nonce payloads with + // `benign_control = None` are structurally self-confirming + // when the listener observed the callback. A benign URL + // cannot hit a per-finding nonce, so the OOB observation is + // independent network-level evidence the sink fired. Skip + // the no-benign-control downgrade and emit + // [`DifferentialVerdict::ConfirmedProvenOob`]. + if is_confirm_candidate && vr.oob_nonce_slot && vr.oob_callback_seen { + let mut outcome_record = differential::build_oob_self_confirmed_outcome( + payload.label, + &vr.vuln_probes, ); + middleware_demotion::apply_demotion( + &mut outcome_record, + spec.framework.as_ref(), + spec.lang, + ); + // No paired benign control runs, so this candidate + // contributes only to the vuln side of the set. + vuln_fires.push(true); + candidates.push((vr.attempt_index, outcome_record)); + } else if is_confirm_candidate { + no_benign_control = true; + } + // A partial candidate without a benign control cannot rule out + // "safe code that reaches the sink", so it raises no partial + // signal and falls through to `NotConfirmed`. + } + Some(benign) => { + let benign_bytes = materialise_bytes(benign, None) + .map(|b| b.into_owned()) + .unwrap_or_default(); + if let Some(ch) = &probe_channel { + let _ = ch.clear(); + } + let benign_outcome = sandbox::run(&harness, &benign_bytes, &effective_opts)?; + let benign_sink_hit = benign_outcome.sink_hit; + let benign_probes: Vec = probe_channel + .as_ref() + .map(|ch| ch.drain()) + .unwrap_or_default(); + let benign_stub_events: Vec = effective_opts + .stub_harness + .as_ref() + .map(|h| h.drain_all()) + .unwrap_or_default(); + let benign_fired = oracle_fired_with_stubs( + &benign.oracle, + &benign_outcome, + &benign_probes, + &benign_stub_events, + ); + + if is_confirm_candidate { let mut outcome_record = differential::build_outcome( payload.label, - vuln_fired, - &vuln_probes, + vr.vuln_fired, + &vr.vuln_probes, benign.label, benign_fired, &benign_probes, ); // Phase 05 OOB closure: when an OOB-nonce payload also - // carries a paired benign control, promote - // `Confirmed` → `ConfirmedProvenOob` whenever the - // listener observed the per-finding nonce. The - // upgrade preserves the differential trace (benign - // run still recorded) and surfaces the stronger - // network-level evidence to operators. + // carries a paired benign control, promote `Confirmed` → + // `ConfirmedProvenOob` whenever the listener observed the + // per-finding nonce. The upgrade preserves the differential + // trace (benign run still recorded) and surfaces the + // stronger network-level evidence to operators. if outcome_record.verdict == DifferentialVerdict::Confirmed - && payload.oob_nonce_slot - && outcome.oob_callback_seen + && vr.oob_nonce_slot + && vr.oob_callback_seen { outcome_record.verdict = DifferentialVerdict::ConfirmedProvenOob; } @@ -661,30 +758,68 @@ pub fn run_spec(spec: &HarnessSpec, opts: &SandboxOptions) -> Result Result bool { - if matches!(status, VerifyStatus::Confirmed) && self.keep_all_confirmed { + if matches!( + status, + VerifyStatus::Confirmed | VerifyStatus::PartiallyConfirmed + ) && self.keep_all_confirmed + { + // PartiallyConfirmed is a low-volume, high-value triage signal + // (each is a candidate real engine gap), so it rides the same + // keep-all switch as Confirmed rather than being sampled away. return true; } if matches!(status, VerifyStatus::Inconclusive) && self.keep_all_inconclusive { @@ -389,6 +396,7 @@ pub fn emit_with_policy(event: &TelemetryEvent, policy: &SamplingPolicy) { fn parse_status(s: &str) -> Option { match s { "Confirmed" => Some(VerifyStatus::Confirmed), + "PartiallyConfirmed" => Some(VerifyStatus::PartiallyConfirmed), "NotConfirmed" => Some(VerifyStatus::NotConfirmed), "Inconclusive" => Some(VerifyStatus::Inconclusive), "Unsupported" => Some(VerifyStatus::Unsupported), diff --git a/src/dynamic/verify.rs b/src/dynamic/verify.rs index e3d86881..0a7f846a 100644 --- a/src/dynamic/verify.rs +++ b/src/dynamic/verify.rs @@ -987,9 +987,16 @@ fn build_verdict( if let Some(i) = run.triggered_by { let triggered_payload = run.attempts[i].payload_label.to_string(); + // Resolve repro bytes by label, not by index: OOB payloads + // skipped for lack of a listener leave `attempts` shorter + // than `vuln_payloads`, so a positional lookup can pull the + // wrong payload's bytes. The label is the stable key. let payloads = payloads_for(spec.expected_cap); - let vuln_payloads: Vec<_> = payloads.iter().filter(|p| !p.is_benign).collect(); - let payload_bytes = vuln_payloads.get(i).map(|p| p.bytes).unwrap_or(b""); + let payload_bytes = payloads + .iter() + .find(|p| !p.is_benign && p.label == triggered_payload) + .map(|p| p.bytes) + .unwrap_or(b""); let hardening_outcome = summarize_hardening(&run.attempts[i].outcome); // Emit repro artifact. @@ -1156,6 +1163,33 @@ fn build_verdict( hardening_outcome: None, }, } + } else if run.sink_reached_no_oracle { + // Phase 26: a vuln payload's in-harness sink-reachability + // probe fired but its oracle marker never did, and the run + // produced no Confirmed-class verdict and no colliding + // differential. The sink is reachable at runtime yet the + // exploit chain did not complete (no marker file written, + // no OOB callback observed, output lacked the proof token). + // Surface `PartiallyConfirmed` so engine work can ratchet on + // the real sink-reachability gap without overstating it as a + // confirmed exploit. No repro artifact is written: there is + // no proven exploit to reproduce. + VerifyResult { + finding_id: finding_id.to_owned(), + status: VerifyStatus::PartiallyConfirmed, + triggered_payload: None, + reason: None, + inconclusive_reason: None, + detail: Some( + "sink-reachability probe fired but the oracle marker was not observed; exploit chain did not complete".to_owned(), + ), + attempts, + toolchain_match: Some(toolchain_match.to_owned()), + differential: None, + replay_stable: None, + wrong: None, + hardening_outcome: None, + } } else if run.oracle_collision { // Oracle fired but the sink-hit sentinel did not — // legacy single-payload collision path, predates the @@ -1735,4 +1769,141 @@ mod tests { "current corpus_version entry must be a cache hit" ); } + + fn partial_spec() -> HarnessSpec { + HarnessSpec { + finding_id: "deadbeefcafef00d".into(), + entry_file: "app.py".into(), + entry_name: "login".into(), + entry_kind: crate::dynamic::spec::EntryKind::Function, + lang: crate::symbol::Lang::Python, + toolchain_id: "python-3.11".into(), + payload_slot: crate::dynamic::spec::PayloadSlot::Param(0), + expected_cap: crate::labels::Cap::SQL_QUERY, + constraint_hints: vec![], + sink_file: "app.py".into(), + sink_line: 10, + spec_hash: "cafecafecafe0001".into(), + derivation: SpecDerivationStrategy::FromFlowSteps, + stubs_required: vec![], + framework: None, + java_toolchain: crate::dynamic::spec::JavaToolchain::default(), + } + } + + /// Phase 26: a vuln payload whose sink-reachability probe fired but whose + /// oracle marker never did — and no Confirmed-class verdict, no + /// differential outcome, no benign-control gap — must surface as + /// `PartiallyConfirmed`, carry no `triggered_payload`, and write no repro. + #[test] + fn build_verdict_sink_reached_no_oracle_maps_to_partially_confirmed() { + use crate::dynamic::runner::{Attempt, RunOutcome}; + use crate::dynamic::sandbox::SandboxOutcome; + + let opts = VerifyOptions::from_config(&Config::default()); + let run = RunOutcome { + spec: partial_spec(), + attempts: vec![Attempt { + payload_label: "sqli-tautology", + outcome: SandboxOutcome { + exit_code: Some(0), + stdout: b"__NYX_SINK_HIT__".to_vec(), + stderr: Vec::new(), + timed_out: false, + oob_callback_seen: false, + sink_hit: true, + duration: std::time::Duration::ZERO, + hardening_outcome: None, + }, + oracle_fired: false, + triggered: false, + }], + triggered_by: None, + oracle_collision: false, + sink_reached_no_oracle: true, + build_attempts: 1, + harness_source: String::new(), + entry_source: String::new(), + differential: None, + no_benign_control: false, + unrelated_crash: false, + }; + + let verdict = build_verdict( + "deadbeefcafef00d", + &partial_spec(), + Ok(run), + "exact", + &opts, + std::time::Duration::ZERO, + ); + + assert_eq!(verdict.status, VerifyStatus::PartiallyConfirmed); + assert!( + verdict.triggered_payload.is_none(), + "PartiallyConfirmed must not claim a triggering payload" + ); + assert!( + verdict + .detail + .as_deref() + .unwrap_or_default() + .contains("sink-reachability probe fired"), + "detail must explain the sink reached but the chain did not complete: {:?}", + verdict.detail + ); + // The sink-hit attempt must survive into the surfaced attempt list. + assert_eq!(verdict.attempts.len(), 1); + assert!(verdict.attempts[0].sink_hit); + assert!(!verdict.attempts[0].triggered); + } + + /// Regression guard: a clean run (no sink hit, no oracle) must stay + /// `NotConfirmed` — the `PartiallyConfirmed` branch must not swallow the + /// ordinary negative case. + #[test] + fn build_verdict_clean_run_stays_not_confirmed() { + use crate::dynamic::runner::{Attempt, RunOutcome}; + use crate::dynamic::sandbox::SandboxOutcome; + + let opts = VerifyOptions::from_config(&Config::default()); + let run = RunOutcome { + spec: partial_spec(), + attempts: vec![Attempt { + payload_label: "sqli-tautology", + outcome: SandboxOutcome { + exit_code: Some(0), + stdout: Vec::new(), + stderr: Vec::new(), + timed_out: false, + oob_callback_seen: false, + sink_hit: false, + duration: std::time::Duration::ZERO, + hardening_outcome: None, + }, + oracle_fired: false, + triggered: false, + }], + triggered_by: None, + oracle_collision: false, + sink_reached_no_oracle: false, + build_attempts: 1, + harness_source: String::new(), + entry_source: String::new(), + differential: None, + no_benign_control: false, + unrelated_crash: false, + }; + + let verdict = build_verdict( + "deadbeefcafef00d", + &partial_spec(), + Ok(run), + "exact", + &opts, + std::time::Duration::ZERO, + ); + + assert_eq!(verdict.status, VerifyStatus::NotConfirmed); + } } diff --git a/src/evidence.rs b/src/evidence.rs index 2c749f36..53436893 100644 --- a/src/evidence.rs +++ b/src/evidence.rs @@ -727,6 +727,14 @@ pub enum VerifyStatus { /// Sink fired with at least one payload. The static finding is exploitable /// against the live target. Confirmed, + /// The in-harness sink-reachability probe fired (sink reached) but the + /// oracle marker was never observed (no file write / no OOB callback / + /// output did not contain the proof token), so the exploit chain did not + /// complete. Semantically `{ sink_reached: true, exit_propagated: false }`. + /// Ranks above `NotConfirmed` (runtime corroboration that the sink is + /// reachable) but below `Confirmed` (no proven exploit). Used so engine + /// work can ratchet on real sink-reachability gaps without overstating. + PartiallyConfirmed, /// All payloads ran cleanly. Either the path is infeasible at runtime /// or the corpus is too narrow. Treat as "static-only", not "false positive". NotConfirmed, diff --git a/src/fmt.rs b/src/fmt.rs index a4f30b73..b2816b8c 100644 --- a/src/fmt.rs +++ b/src/fmt.rs @@ -558,6 +558,7 @@ fn format_dynamic_verdict_annotation(dv: &crate::evidence::VerifyResult) -> Stri let pid = dv.triggered_payload.as_deref().unwrap_or("unknown"); format!("[DYN: confirmed via {pid}]") } + VerifyStatus::PartiallyConfirmed => "[DYN: partially confirmed (sink reached)]".to_string(), VerifyStatus::NotConfirmed => "[DYN: not confirmed]".to_string(), VerifyStatus::Unsupported => { let reason = dv diff --git a/src/rank.rs b/src/rank.rs index 4d0ef69f..2b5b2096 100644 --- a/src/rank.rs +++ b/src/rank.rs @@ -258,6 +258,12 @@ fn dynamic_verdict_delta(diag: &Diag) -> Option { let dv = diag.evidence.as_ref()?.dynamic_verdict.as_ref()?; match dv.status { VerifyStatus::Confirmed => Some(20.0), + // PartiallyConfirmed: the sink was reached at runtime but the + // exploit chain did not complete. Runtime corroboration that the + // sink is reachable is a positive signal, but weaker than a proven + // exploit, so it earns a modest bump rather than the full Confirmed + // boost. + VerifyStatus::PartiallyConfirmed => Some(8.0), // Apply penalty only when the corpus was actually exhausted (attempts // were made); a NotConfirmed with zero attempts means something went // wrong before payload execution, which is an Inconclusive path, not diff --git a/src/server/models.rs b/src/server/models.rs index 7f382d7b..b5e143d2 100644 --- a/src/server/models.rs +++ b/src/server/models.rs @@ -293,6 +293,7 @@ fn status_for_diag(d: &Diag) -> &'static str { pub fn dynamic_status_label(status: VerifyStatus) -> &'static str { match status { VerifyStatus::Confirmed => "Confirmed", + VerifyStatus::PartiallyConfirmed => "PartiallyConfirmed", VerifyStatus::NotConfirmed => "NotConfirmed", VerifyStatus::Inconclusive => "Inconclusive", VerifyStatus::Unsupported => "Unsupported", diff --git a/tests/console_snapshot.rs b/tests/console_snapshot.rs index 41339e39..fecd0484 100644 --- a/tests/console_snapshot.rs +++ b/tests/console_snapshot.rs @@ -76,6 +76,28 @@ fn diag_with_verdict(status: VerifyStatus) -> Diag { wrong: None, hardening_outcome: None, }, + VerifyStatus::PartiallyConfirmed => VerifyResult { + finding_id: "abc123".into(), + status, + triggered_payload: None, + reason: None, + inconclusive_reason: None, + detail: Some( + "sink-reachability probe fired but the oracle marker was not observed; exploit chain did not complete".into(), + ), + attempts: vec![AttemptSummary { + payload_label: "sqli-tautology".into(), + exit_code: Some(0), + timed_out: false, + triggered: false, + sink_hit: true, + }], + toolchain_match: Some("exact".into()), + differential: None, + replay_stable: None, + wrong: None, + hardening_outcome: None, + }, VerifyStatus::NotConfirmed => VerifyResult { finding_id: "abc123".into(), status, @@ -158,6 +180,17 @@ fn console_not_confirmed_shows_annotation() { ); } +#[test] +fn console_partially_confirmed_shows_sink_reached() { + let diag = diag_with_verdict(VerifyStatus::PartiallyConfirmed); + let output = render_console(&[diag], "proj", None, &[]); + let stripped = strip_ansi(&output); + assert!( + stripped.contains("[DYN: partially confirmed (sink reached)]"), + "expected DYN partially-confirmed annotation, got:\n{stripped}" + ); +} + #[test] fn console_unsupported_shows_reason() { let diag = diag_with_verdict(VerifyStatus::Unsupported); diff --git a/tests/eval_corpus/report.py b/tests/eval_corpus/report.py index d674ed50..f2864298 100644 --- a/tests/eval_corpus/report.py +++ b/tests/eval_corpus/report.py @@ -78,6 +78,7 @@ def load_previous_agg(path: str) -> dict: "fn": 0, "unsupported": 0, "confirmed": 0, + "partially_confirmed": 0, "wrong_confirmed": 0, "stable_replays": 0, "total": 0, @@ -92,6 +93,7 @@ def load_previous_agg(path: str) -> dict: "fn", "unsupported", "confirmed", + "partially_confirmed", "wrong_confirmed", "stable_replays", "total", @@ -139,6 +141,7 @@ def main() -> int: "fn": 0, "unsupported": 0, "confirmed": 0, + "partially_confirmed": 0, "wrong_confirmed": 0, "stable_replays": 0, "total": 0, @@ -153,6 +156,7 @@ def main() -> int: "fn", "unsupported", "confirmed", + "partially_confirmed", "wrong_confirmed", "stable_replays", "total", @@ -160,17 +164,22 @@ def main() -> int: agg[k][field] += c.get(field, 0) print("\n=== Aggregated eval corpus report ===") - print(f"{'Cap':<20} {'Lang':<12} {'TP':>5} {'FP':>5} {'FN':>5} {'Prec':>6} {'Rec':>6} {'Unsup%':>7}") - print("-" * 72) + print( + f"{'Cap':<20} {'Lang':<12} {'TP':>5} {'FP':>5} {'FN':>5} " + f"{'Prec':>6} {'Rec':>6} {'Unsup%':>7} {'Conf%':>7} {'Part%':>7}" + ) + print("-" * 88) for k, v in sorted(agg.items()): prec = v["tp"] / max(v["tp"] + v["fp"], 1) rec = v["tp"] / max(v["tp"] + v["fn"], 1) unsup = v["unsupported"] / max(v["total"], 1) + conf = v["confirmed"] / max(v["total"], 1) + part = v["partially_confirmed"] / max(v["total"], 1) print( f"{k[0]:<20} {k[1]:<12} " f"{v['tp']:>5} {v['fp']:>5} {v['fn']:>5} " f"{prec:>6.2f} {rec:>6.2f} " - f"{unsup*100:>6.1f}%" + f"{unsup*100:>6.1f}% {conf*100:>6.1f}% {part*100:>6.1f}%" ) gate_failed = False diff --git a/tests/eval_corpus/tabulate.py b/tests/eval_corpus/tabulate.py index 36c3702d..2cd6302a 100644 --- a/tests/eval_corpus/tabulate.py +++ b/tests/eval_corpus/tabulate.py @@ -387,7 +387,7 @@ def main() -> int: break # Per-cell tallies: {(cap, lang): {tp, fp, fn, unsupported, confirmed, - # wrong_confirmed, stable_replays, total}} + # partially_confirmed, wrong_confirmed, stable_replays, total}} cells: dict[tuple[str, str], dict] = defaultdict( lambda: { "tp": 0, @@ -395,6 +395,7 @@ def main() -> int: "fn": 0, "unsupported": 0, "confirmed": 0, + "partially_confirmed": 0, "wrong_confirmed": 0, "stable_replays": 0, "total": 0, @@ -412,6 +413,8 @@ def main() -> int: status = dv.get("status") if status == "Unsupported": cells[key]["unsupported"] += 1 + elif status == "PartiallyConfirmed": + cells[key]["partially_confirmed"] += 1 elif status == "Confirmed": cells[key]["confirmed"] += 1 # Repro-stability and false-Confirmed counts are optional diff --git a/tests/sarif_dynamic_verdict_tests.rs b/tests/sarif_dynamic_verdict_tests.rs index 27686236..dcbac33f 100644 --- a/tests/sarif_dynamic_verdict_tests.rs +++ b/tests/sarif_dynamic_verdict_tests.rs @@ -235,9 +235,10 @@ fn sarif_confirmed_verdict_nyx_dynamic_verdict_contains_triggered_payload() { } #[test] -fn sarif_all_four_statuses_produce_partial_fingerprint() { +fn sarif_all_statuses_produce_partial_fingerprint() { let statuses = [ (VerifyStatus::Confirmed, "Confirmed"), + (VerifyStatus::PartiallyConfirmed, "PartiallyConfirmed"), (VerifyStatus::NotConfirmed, "NotConfirmed"), (VerifyStatus::Unsupported, "Unsupported"), (VerifyStatus::Inconclusive, "Inconclusive"),