mirror of
https://github.com/elicpeter/nyx.git
synced 2026-06-09 19:45:13 +02:00
feat(dynamic): add PartiallyConfirmed status for finer-grained sink-reachability categorization, update dynamic verification, telemetry, and reporting systems
This commit is contained in:
parent
635b213825
commit
c0501884ae
23 changed files with 658 additions and 142 deletions
|
|
@ -6,9 +6,9 @@ rust-version = "1.88"
|
|||
description = "A multi-language static analysis tool for detecting security vulnerabilities"
|
||||
license = "GPL-3.0-or-later"
|
||||
authors = ["Eli Peter <elicpeter@example.com>"]
|
||||
homepage = "https://github.com/elicpeter/nyx"
|
||||
homepage = "https://nyxsec.dev/scanner"
|
||||
repository = "https://github.com/elicpeter/nyx"
|
||||
documentation = "https://elicpeter.github.io/nyx/"
|
||||
documentation = "https://nyxsec.dev/docs/nyx/"
|
||||
keywords = ["security", "vulnerability", "scanner", "static-analysis", "cli"]
|
||||
categories = ["security", "command-line-utilities", "development-tools", "parser-implementations", "text-processing"]
|
||||
readme = "README.md"
|
||||
|
|
|
|||
|
|
@ -3,7 +3,12 @@ export type Confidence = 'Low' | 'Medium' | 'High';
|
|||
export type FlowStepKind = 'source' | 'assignment' | 'call' | 'phi' | 'sink';
|
||||
|
||||
// Dynamic verification types (from src/evidence.rs VerifyStatus / VerifyResult)
|
||||
export type VerifyStatus = 'Confirmed' | 'NotConfirmed' | 'Inconclusive' | 'Unsupported';
|
||||
export type VerifyStatus =
|
||||
| 'Confirmed'
|
||||
| 'PartiallyConfirmed'
|
||||
| 'NotConfirmed'
|
||||
| 'Inconclusive'
|
||||
| 'Unsupported';
|
||||
|
||||
export interface AttemptSummary {
|
||||
payload_label: string;
|
||||
|
|
@ -29,6 +34,7 @@ export interface VerifyResult {
|
|||
export interface DynamicVerificationSummary {
|
||||
total: number;
|
||||
confirmed: number;
|
||||
partially_confirmed: number;
|
||||
not_confirmed: number;
|
||||
inconclusive: number;
|
||||
unsupported: number;
|
||||
|
|
|
|||
|
|
@ -2,6 +2,7 @@ import type { VerifyResult, VerifyStatus } from '../api/types';
|
|||
|
||||
const STATUS_LABELS: Record<VerifyStatus, string> = {
|
||||
Confirmed: 'Confirmed',
|
||||
PartiallyConfirmed: 'Partially confirmed',
|
||||
NotConfirmed: 'Not confirmed',
|
||||
Inconclusive: 'Inconclusive',
|
||||
Unsupported: 'Unsupported',
|
||||
|
|
@ -15,6 +16,10 @@ function verdictTooltip(verdict: VerifyResult): string {
|
|||
return triggered_payload
|
||||
? `Confirmed via payload: ${triggered_payload}`
|
||||
: 'Dynamically confirmed exploitable';
|
||||
case 'PartiallyConfirmed':
|
||||
return detail
|
||||
? `Partially confirmed (sink reached): ${detail}`
|
||||
: 'Partially confirmed: sink reached but exploit chain did not complete';
|
||||
case 'NotConfirmed':
|
||||
return (verdict.attempts?.length ?? 0) > 0
|
||||
? `Not confirmed after ${verdict.attempts?.length ?? 0} payload attempt(s)`
|
||||
|
|
|
|||
|
|
@ -244,13 +244,14 @@ export function ScannerQualityPanel({
|
|||
const dynamic = quality.dynamic_verification ?? {
|
||||
total: 0,
|
||||
confirmed: 0,
|
||||
partially_confirmed: 0,
|
||||
not_confirmed: 0,
|
||||
inconclusive: 0,
|
||||
unsupported: 0,
|
||||
};
|
||||
const dynamicDetail =
|
||||
dynamic.total > 0
|
||||
? `${dynamic.total.toLocaleString()} verdicts · ${dynamic.not_confirmed.toLocaleString()} not confirmed · ${dynamic.inconclusive.toLocaleString()} inconclusive · ${dynamic.unsupported.toLocaleString()} unsupported`
|
||||
? `${dynamic.total.toLocaleString()} verdicts · ${dynamic.partially_confirmed.toLocaleString()} partially confirmed · ${dynamic.not_confirmed.toLocaleString()} not confirmed · ${dynamic.inconclusive.toLocaleString()} inconclusive · ${dynamic.unsupported.toLocaleString()} unsupported`
|
||||
: 'no dynamic verdicts in latest scan';
|
||||
|
||||
const rows: Array<{
|
||||
|
|
|
|||
|
|
@ -31,6 +31,7 @@ function formatTriageState(state: string): string {
|
|||
|
||||
function formatVerificationStatus(status: string): string {
|
||||
if (status === 'NotConfirmed') return 'Not confirmed';
|
||||
if (status === 'PartiallyConfirmed') return 'Partially confirmed';
|
||||
return status || 'Unverified';
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -2668,6 +2668,10 @@ tr.selected td {
|
|||
background: var(--success-bg);
|
||||
color: var(--success);
|
||||
}
|
||||
.badge-dyn-partiallyconfirmed {
|
||||
background: var(--conf-medium-bg);
|
||||
color: var(--conf-medium);
|
||||
}
|
||||
.badge-dyn-notconfirmed {
|
||||
background: var(--bg-secondary);
|
||||
color: var(--text-secondary);
|
||||
|
|
|
|||
|
|
@ -43,6 +43,19 @@ describe('DynamicVerdictSection', () => {
|
|||
).toBeInTheDocument();
|
||||
});
|
||||
|
||||
it('renders PartiallyConfirmed badge', () => {
|
||||
render(
|
||||
<DynamicVerdictSection
|
||||
verdict={makeVerdict('PartiallyConfirmed', {
|
||||
detail: 'sink reached but exploit chain did not complete',
|
||||
})}
|
||||
/>,
|
||||
);
|
||||
expect(
|
||||
screen.getByTestId('verdict-badge-partiallyconfirmed'),
|
||||
).toBeInTheDocument();
|
||||
});
|
||||
|
||||
it('does not crash when the API omits an empty attempts array', () => {
|
||||
render(
|
||||
<DynamicVerdictSection
|
||||
|
|
@ -82,6 +95,7 @@ describe('DynamicVerdictSection', () => {
|
|||
unmount();
|
||||
|
||||
for (const status of [
|
||||
'PartiallyConfirmed',
|
||||
'NotConfirmed',
|
||||
'Unsupported',
|
||||
'Inconclusive',
|
||||
|
|
|
|||
|
|
@ -35,6 +35,21 @@ describe('VerdictBadge', () => {
|
|||
expect(badge.textContent).toContain('🔥');
|
||||
});
|
||||
|
||||
it('renders PartiallyConfirmed badge with amber class and no flame', () => {
|
||||
render(
|
||||
<VerdictBadge
|
||||
verdict={makeVerdict('PartiallyConfirmed', {
|
||||
detail: 'sink-reachability probe fired but the oracle marker was not observed',
|
||||
})}
|
||||
/>,
|
||||
);
|
||||
const badge = screen.getByTestId('verdict-badge-partiallyconfirmed');
|
||||
expect(badge).toBeInTheDocument();
|
||||
expect(badge.className).toContain('badge-dyn-partiallyconfirmed');
|
||||
expect(badge.textContent).not.toContain('🔥');
|
||||
expect(badge.getAttribute('title')).toContain('sink reached');
|
||||
});
|
||||
|
||||
it('renders NotConfirmed badge with correct class', () => {
|
||||
render(<VerdictBadge verdict={makeVerdict('NotConfirmed')} />);
|
||||
const badge = screen.getByTestId('verdict-badge-notconfirmed');
|
||||
|
|
@ -107,9 +122,10 @@ describe('VerdictBadge', () => {
|
|||
expect(badge.textContent?.replace('🔥 ', '')).toBe('C');
|
||||
});
|
||||
|
||||
it('renders all four VerifyStatus variants without crashing', () => {
|
||||
it('renders all five VerifyStatus variants without crashing', () => {
|
||||
const statuses: VerifyResult['status'][] = [
|
||||
'Confirmed',
|
||||
'PartiallyConfirmed',
|
||||
'NotConfirmed',
|
||||
'Unsupported',
|
||||
'Inconclusive',
|
||||
|
|
|
|||
|
|
@ -308,6 +308,10 @@ pub fn check_gate(diff: &VerdictDiff, gate: &str) -> bool {
|
|||
&& matches!(
|
||||
e.current_status,
|
||||
Some(VerifyStatus::Confirmed)
|
||||
// PartiallyConfirmed = sink still reachable at
|
||||
// runtime, so a baseline-Confirmed finding that is
|
||||
// now partial has NOT been resolved.
|
||||
| Some(VerifyStatus::PartiallyConfirmed)
|
||||
| Some(VerifyStatus::Inconclusive)
|
||||
| Some(VerifyStatus::Unsupported)
|
||||
)
|
||||
|
|
@ -323,6 +327,7 @@ pub fn check_gate(diff: &VerdictDiff, gate: &str) -> bool {
|
|||
fn status_str(s: Option<VerifyStatus>) -> &'static str {
|
||||
match s {
|
||||
Some(VerifyStatus::Confirmed) => "Confirmed",
|
||||
Some(VerifyStatus::PartiallyConfirmed) => "PartiallyConfirmed",
|
||||
Some(VerifyStatus::NotConfirmed) => "NotConfirmed",
|
||||
Some(VerifyStatus::Inconclusive) => "Inconclusive",
|
||||
Some(VerifyStatus::Unsupported) => "Unsupported",
|
||||
|
|
|
|||
|
|
@ -37,8 +37,11 @@ pub enum Feasibility {
|
|||
/// but where the static evidence is strong.
|
||||
InconclusiveHighConf,
|
||||
/// Everything else — no dynamic verification, dynamic verdict was
|
||||
/// `NotConfirmed`/`Unsupported`, or dynamic was `Inconclusive` but
|
||||
/// static confidence is not `High`.
|
||||
/// `NotConfirmed`/`PartiallyConfirmed`/`Unsupported`, or dynamic was
|
||||
/// `Inconclusive` but static confidence is not `High`. A
|
||||
/// `PartiallyConfirmed` verdict proves only that the sink is reachable,
|
||||
/// not that the exploit chain completes, so it stays conservative here:
|
||||
/// it must not inflate a multi-hop path score.
|
||||
Unverified,
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -242,6 +242,7 @@ pub fn compute_stable_hash(diag: &Diag) -> u64 {
|
|||
pub struct DynamicVerificationSummary {
|
||||
pub total: usize,
|
||||
pub confirmed: usize,
|
||||
pub partially_confirmed: usize,
|
||||
pub not_confirmed: usize,
|
||||
pub inconclusive: usize,
|
||||
pub unsupported: usize,
|
||||
|
|
@ -261,6 +262,9 @@ impl DynamicVerificationSummary {
|
|||
summary.total += 1;
|
||||
match verdict.status {
|
||||
crate::evidence::VerifyStatus::Confirmed => summary.confirmed += 1,
|
||||
crate::evidence::VerifyStatus::PartiallyConfirmed => {
|
||||
summary.partially_confirmed += 1
|
||||
}
|
||||
crate::evidence::VerifyStatus::NotConfirmed => summary.not_confirmed += 1,
|
||||
crate::evidence::VerifyStatus::Inconclusive => summary.inconclusive += 1,
|
||||
crate::evidence::VerifyStatus::Unsupported => summary.unsupported += 1,
|
||||
|
|
@ -282,10 +286,11 @@ pub fn format_dynamic_verification_summary(summary: &DynamicVerificationSummary)
|
|||
"verdicts"
|
||||
};
|
||||
format!(
|
||||
"{} {} ({} confirmed, {} not confirmed, {} inconclusive, {} unsupported)",
|
||||
"{} {} ({} confirmed, {} partially confirmed, {} not confirmed, {} inconclusive, {} unsupported)",
|
||||
summary.total,
|
||||
noun,
|
||||
summary.confirmed,
|
||||
summary.partially_confirmed,
|
||||
summary.not_confirmed,
|
||||
summary.inconclusive,
|
||||
summary.unsupported
|
||||
|
|
|
|||
|
|
@ -1,19 +1,23 @@
|
|||
//! Differential confirmation rule for dynamic verification (Phase 07).
|
||||
//! Differential confirmation rule for dynamic verification (Phase 07 / 26).
|
||||
//!
|
||||
//! `Confirmed` requires the vulnerable payload's oracle to fire **and**
|
||||
//! the paired benign control's oracle to *not* fire (§4.1). This module
|
||||
//! is the single source of truth for that rule. Everything else (runner,
|
||||
//! verifier, tests) collapses to "look up paired benign + call
|
||||
//! [`evaluate`]".
|
||||
//! `Confirmed` requires **at least one** vulnerable payload's oracle to
|
||||
//! fire **and every** paired benign control's oracle to *not* fire
|
||||
//! (§4.1, extended for multi-payload aggregation in Phase 26). This
|
||||
//! module is the single source of truth for that rule. Everything else
|
||||
//! (runner, verifier, tests) collapses to "collect firing sets + call
|
||||
//! [`evaluate_sets`]".
|
||||
//!
|
||||
//! # Rule table
|
||||
//! # Rule table (set aggregation)
|
||||
//!
|
||||
//! | vuln fires | benign fires | verdict |
|
||||
//! |------------|--------------|-------------------------------|
|
||||
//! | true | false | `Confirmed` |
|
||||
//! | true | true | `OracleCollisionSuspected` |
|
||||
//! | false | false | `NotConfirmed` |
|
||||
//! | false | true | `ReversedDifferential` |
|
||||
//! | any vuln fires | any benign fires | verdict |
|
||||
//! |----------------|------------------|----------------------------|
|
||||
//! | true | false | `Confirmed` |
|
||||
//! | true | true | `OracleCollisionSuspected` |
|
||||
//! | false | false | `NotConfirmed` |
|
||||
//! | false | true | `ReversedDifferential` |
|
||||
//!
|
||||
//! The scalar [`evaluate`] is the single-payload, single-control
|
||||
//! specialisation of [`evaluate_sets`] and delegates to it.
|
||||
//!
|
||||
//! "Fires" means [`crate::dynamic::oracle::oracle_fired`] returned `true`
|
||||
//! against the run's [`SandboxOutcome`] + drained [`SinkProbe`] set —
|
||||
|
|
@ -24,8 +28,33 @@ use crate::evidence::{
|
|||
DifferentialOutcome, DifferentialProbeArg, DifferentialProbeRecord, DifferentialVerdict,
|
||||
};
|
||||
|
||||
/// Apply the differential confirmation rule.
|
||||
/// Apply the differential confirmation rule over **sets** of firing
|
||||
/// results (Phase 26 multi-payload aggregation).
|
||||
///
|
||||
/// `vuln_fired` is one boolean per vulnerable payload attempt;
|
||||
/// `benign_fired` is one boolean per paired benign control that actually
|
||||
/// ran. Aggregation is "any vuln vs any benign" with global ambient-noise
|
||||
/// scoring across the run: a *single* benign control firing anywhere
|
||||
/// vetoes `Confirmed` (the oracle cannot discriminate), and a *single*
|
||||
/// vulnerable payload firing is enough positive evidence.
|
||||
///
|
||||
/// Empty slices behave as "nothing fired" on that side, so
|
||||
/// `evaluate_sets(&[], &[])` is `NotConfirmed`.
|
||||
pub fn evaluate_sets(vuln_fired: &[bool], benign_fired: &[bool]) -> DifferentialVerdict {
|
||||
let any_vuln = vuln_fired.iter().any(|&b| b);
|
||||
let any_benign = benign_fired.iter().any(|&b| b);
|
||||
match (any_vuln, any_benign) {
|
||||
(true, false) => DifferentialVerdict::Confirmed,
|
||||
(true, true) => DifferentialVerdict::OracleCollisionSuspected,
|
||||
(false, false) => DifferentialVerdict::NotConfirmed,
|
||||
(false, true) => DifferentialVerdict::ReversedDifferential,
|
||||
}
|
||||
}
|
||||
|
||||
/// Apply the differential confirmation rule to a single
|
||||
/// (vulnerable, benign-control) pair.
|
||||
///
|
||||
/// Single-element specialisation of [`evaluate_sets`].
|
||||
/// `vuln_probe_fires` and `benign_probe_fires` are the boolean firing
|
||||
/// results of [`crate::dynamic::oracle::oracle_fired`] for the
|
||||
/// vulnerable payload and its paired benign control respectively. The
|
||||
|
|
@ -33,12 +62,7 @@ use crate::evidence::{
|
|||
/// callers attach those separately via [`DifferentialOutcome`] for
|
||||
/// forensic display.
|
||||
pub fn evaluate(vuln_probe_fires: bool, benign_probe_fires: bool) -> DifferentialVerdict {
|
||||
match (vuln_probe_fires, benign_probe_fires) {
|
||||
(true, false) => DifferentialVerdict::Confirmed,
|
||||
(true, true) => DifferentialVerdict::OracleCollisionSuspected,
|
||||
(false, false) => DifferentialVerdict::NotConfirmed,
|
||||
(false, true) => DifferentialVerdict::ReversedDifferential,
|
||||
}
|
||||
evaluate_sets(&[vuln_probe_fires], &[benign_probe_fires])
|
||||
}
|
||||
|
||||
/// Build a [`DifferentialOutcome`] for inclusion in a
|
||||
|
|
@ -139,6 +163,61 @@ mod tests {
|
|||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn sets_any_vuln_no_benign_is_confirmed() {
|
||||
// One of several vuln payloads firing is enough; no benign fired.
|
||||
assert_eq!(
|
||||
evaluate_sets(&[false, true, false], &[false, false]),
|
||||
DifferentialVerdict::Confirmed
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn sets_one_benign_firing_vetoes_confirmed() {
|
||||
// A single benign control firing anywhere downgrades to collision,
|
||||
// even when a vuln payload also fired (global ambient-noise veto).
|
||||
assert_eq!(
|
||||
evaluate_sets(&[true, true], &[false, true, false]),
|
||||
DifferentialVerdict::OracleCollisionSuspected
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn sets_no_vuln_no_benign_is_not_confirmed() {
|
||||
assert_eq!(
|
||||
evaluate_sets(&[false, false], &[false]),
|
||||
DifferentialVerdict::NotConfirmed
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn sets_no_vuln_some_benign_is_reversed() {
|
||||
assert_eq!(
|
||||
evaluate_sets(&[false], &[true]),
|
||||
DifferentialVerdict::ReversedDifferential
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn sets_empty_is_not_confirmed() {
|
||||
assert_eq!(evaluate_sets(&[], &[]), DifferentialVerdict::NotConfirmed);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn sets_empty_benign_with_vuln_is_confirmed() {
|
||||
// No benign control ran at all → no veto possible → Confirmed.
|
||||
assert_eq!(evaluate_sets(&[true], &[]), DifferentialVerdict::Confirmed);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn scalar_evaluate_matches_singleton_sets() {
|
||||
for &v in &[false, true] {
|
||||
for &b in &[false, true] {
|
||||
assert_eq!(evaluate(v, b), evaluate_sets(&[v], &[b]));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn oob_self_confirmed_outcome_carries_only_vuln_trace() {
|
||||
use crate::dynamic::probe::{ProbeArg, ProbeKind, ProbeWitness, SinkProbe};
|
||||
|
|
|
|||
|
|
@ -59,10 +59,29 @@ const MAX_BUILD_ATTEMPTS: u32 = 2;
|
|||
pub struct RunOutcome {
|
||||
pub spec: HarnessSpec,
|
||||
pub attempts: Vec<Attempt>,
|
||||
/// First attempt that fired the sink with `oracle_fired && sink_hit`.
|
||||
/// Index into [`Self::attempts`] of the attempt the confirm verdict is
|
||||
/// attributed to. Set by the Phase 26 set aggregation when
|
||||
/// [`crate::dynamic::differential::evaluate_sets`] returns a
|
||||
/// Confirmed-class verdict (any vuln payload fired the oracle + sink
|
||||
/// while every paired benign control stayed clean), or when an
|
||||
/// OOB-nonce payload self-confirmed. `None` otherwise.
|
||||
pub triggered_by: Option<usize>,
|
||||
/// Whether the oracle fired but the sink probe did not (oracle collision).
|
||||
pub oracle_collision: bool,
|
||||
/// Phase 26: a vuln payload's in-harness sink-reachability probe fired
|
||||
/// (`outcome.sink_hit`) but its oracle marker was never observed (no file
|
||||
/// write / no OOB callback / output lacked the proof token), *and* the
|
||||
/// paired benign control neither reached the sink nor fired its oracle.
|
||||
/// The benign-control differential is the discriminator: it proves the
|
||||
/// vuln input specifically drives the sink, ruling out safe code that
|
||||
/// merely reaches the sink (e.g. array-form `exec` with inert
|
||||
/// metacharacters, which the benign control also reaches). The verifier
|
||||
/// maps this to [`crate::evidence::VerifyStatus::PartiallyConfirmed`]: the
|
||||
/// sink is reachable under the vuln input but the exploit chain did not
|
||||
/// complete. Never set when a Confirmed-class verdict or a colliding
|
||||
/// differential was produced (those take precedence at the verify
|
||||
/// boundary).
|
||||
pub sink_reached_no_oracle: bool,
|
||||
/// Number of build attempts consumed.
|
||||
pub build_attempts: u32,
|
||||
/// Harness sources for repro artifacts.
|
||||
|
|
@ -454,6 +473,24 @@ pub fn run_spec(spec: &HarnessSpec, opts: &SandboxOptions) -> Result<RunOutcome,
|
|||
let mut unrelated_crash = false;
|
||||
let mut differential_outcome: Option<DifferentialOutcome> = None;
|
||||
|
||||
// Phase 26 set aggregation, phase A: per-vuln-payload run record.
|
||||
// Every vuln payload runs to completion (no early break) so the
|
||||
// differential rule can aggregate across the whole set — a single
|
||||
// benign control firing anywhere must be able to veto a `Confirmed`.
|
||||
struct VulnRun {
|
||||
/// Index into `vuln_payloads` (for benign-control resolution).
|
||||
payload_index: usize,
|
||||
/// Index into `attempts` (what `triggered_by` points at).
|
||||
attempt_index: usize,
|
||||
vuln_fired: bool,
|
||||
sink_hit: bool,
|
||||
oob_nonce_slot: bool,
|
||||
oob_callback_seen: bool,
|
||||
vuln_probes: Vec<SinkProbe>,
|
||||
}
|
||||
let mut vuln_runs: Vec<VulnRun> = Vec::with_capacity(vuln_payloads.len());
|
||||
|
||||
// ── Phase A: run every vuln payload, record its firing signals ──────
|
||||
for (i, payload) in vuln_payloads.iter().enumerate() {
|
||||
// Materialise payload bytes (OOB nonce-slot payloads generate a URL).
|
||||
let (oob_nonce, effective_bytes) = if payload.oob_nonce_slot {
|
||||
|
|
@ -480,11 +517,12 @@ pub fn run_spec(spec: &HarnessSpec, opts: &SandboxOptions) -> Result<RunOutcome,
|
|||
let _ = ch.clear();
|
||||
}
|
||||
|
||||
let attempt_index = attempts.len();
|
||||
trace_record(
|
||||
trace_handle.as_ref(),
|
||||
TraceStage::SandboxStarted,
|
||||
Some(format!(
|
||||
"attempt={i} payload={} oracle={}",
|
||||
"attempt={attempt_index} payload={} oracle={}",
|
||||
payload.label,
|
||||
oracle_short_name(&payload.oracle)
|
||||
)),
|
||||
|
|
@ -495,7 +533,7 @@ pub fn run_spec(spec: &HarnessSpec, opts: &SandboxOptions) -> Result<RunOutcome,
|
|||
trace_handle.as_ref(),
|
||||
TraceStage::OracleWait,
|
||||
Some(format!(
|
||||
"attempt={i} exit_code={:?} timed_out={}",
|
||||
"attempt={attempt_index} exit_code={:?} timed_out={}",
|
||||
outcome.exit_code, outcome.timed_out
|
||||
)),
|
||||
);
|
||||
|
|
@ -508,9 +546,9 @@ pub fn run_spec(spec: &HarnessSpec, opts: &SandboxOptions) -> Result<RunOutcome,
|
|||
// failure — the harness "linked" against deps that don't resolve at
|
||||
// run time — so route through `RunError::BuildFailed` to keep the
|
||||
// SKIP-on-BuildFailed branch in the e2e corpus tests honest. Only
|
||||
// checked on the first vuln payload because the missing dep won't
|
||||
// appear later in the run.
|
||||
if i == 0 && is_runtime_import_error(&outcome) {
|
||||
// checked on the first actually-run payload because the missing dep
|
||||
// won't appear later in the run.
|
||||
if attempts.is_empty() && is_runtime_import_error(&outcome) {
|
||||
return Err(RunError::BuildFailed {
|
||||
stderr: String::from_utf8_lossy(&outcome.stderr).into_owned(),
|
||||
attempts: build_attempts,
|
||||
|
|
@ -546,7 +584,7 @@ pub fn run_spec(spec: &HarnessSpec, opts: &SandboxOptions) -> Result<RunOutcome,
|
|||
trace_handle.as_ref(),
|
||||
TraceStage::OracleObserved,
|
||||
Some(format!(
|
||||
"attempt={i} fired={vuln_fired} sink_hit={sink_hit}"
|
||||
"attempt={attempt_index} fired={vuln_fired} sink_hit={sink_hit}"
|
||||
)),
|
||||
);
|
||||
|
||||
|
|
@ -566,93 +604,152 @@ pub fn run_spec(spec: &HarnessSpec, opts: &SandboxOptions) -> Result<RunOutcome,
|
|||
unrelated_crash = true;
|
||||
}
|
||||
|
||||
// Differential rule (Phase 07, §4.1). Only when the vuln oracle
|
||||
// fired *and* the in-harness sink-hit sentinel was observed do we
|
||||
// consult the paired benign control. Oracle-fires-without-sink
|
||||
// stays on the legacy `oracle_collision` path so the existing
|
||||
// `Inconclusive(OracleCollisionSuspected)` semantics survive.
|
||||
let triggered = if vuln_fired && sink_hit {
|
||||
// Match the resolution scope to the payload-slice scope so a
|
||||
// benign control declared in another language is still found
|
||||
// when this run was driven off the lang-agnostic union (see
|
||||
// `used_lang_slice` above). When the run did use the
|
||||
// per-language slice, the lang-aware resolver keeps a
|
||||
// mismatched language from silently producing a Confirmed.
|
||||
let resolved = if used_lang_slice {
|
||||
resolve_benign_control_lang(payload, spec.expected_cap, spec.lang)
|
||||
} else {
|
||||
resolve_benign_control(payload, spec.expected_cap)
|
||||
};
|
||||
match resolved {
|
||||
None => {
|
||||
// Phase 05 OOB closure: OOB-nonce payloads with
|
||||
// `benign_control = None` are structurally self-
|
||||
// confirming when the listener observed the callback.
|
||||
// A benign URL cannot hit a per-finding nonce, so the
|
||||
// OOB observation is independent network-level
|
||||
// evidence the sink fired. Skip the no-benign-control
|
||||
// downgrade and emit
|
||||
// [`DifferentialVerdict::ConfirmedProvenOob`].
|
||||
if payload.oob_nonce_slot && outcome.oob_callback_seen {
|
||||
let mut outcome_record = differential::build_oob_self_confirmed_outcome(
|
||||
payload.label,
|
||||
&vuln_probes,
|
||||
);
|
||||
middleware_demotion::apply_demotion(
|
||||
&mut outcome_record,
|
||||
spec.framework.as_ref(),
|
||||
spec.lang,
|
||||
);
|
||||
let confirmed =
|
||||
middleware_demotion::is_triggering_verdict(outcome_record.verdict);
|
||||
differential_outcome = Some(outcome_record);
|
||||
confirmed
|
||||
} else {
|
||||
no_benign_control = true;
|
||||
false
|
||||
}
|
||||
}
|
||||
Some(benign) => {
|
||||
let benign_bytes = materialise_bytes(benign, None)
|
||||
.map(|b| b.into_owned())
|
||||
.unwrap_or_default();
|
||||
if let Some(ch) = &probe_channel {
|
||||
let _ = ch.clear();
|
||||
}
|
||||
let benign_outcome = sandbox::run(&harness, &benign_bytes, &effective_opts)?;
|
||||
let benign_probes: Vec<SinkProbe> = probe_channel
|
||||
.as_ref()
|
||||
.map(|ch| ch.drain())
|
||||
.unwrap_or_default();
|
||||
let benign_stub_events: Vec<StubEvent> = effective_opts
|
||||
.stub_harness
|
||||
.as_ref()
|
||||
.map(|h| h.drain_all())
|
||||
.unwrap_or_default();
|
||||
let benign_fired = oracle_fired_with_stubs(
|
||||
&benign.oracle,
|
||||
&benign_outcome,
|
||||
&benign_probes,
|
||||
&benign_stub_events,
|
||||
// Legacy single-payload collision: oracle fired without the
|
||||
// in-harness sink-hit sentinel. Phase 26 partial-confirmation is
|
||||
// deliberately NOT decided here: a vuln run that reaches the sink
|
||||
// without firing its oracle is ambiguous — it could be a real engine
|
||||
// gap (the vuln input drives the sink but the exploit chain could not
|
||||
// be observed) or merely safe code that happens to reach the sink
|
||||
// (e.g. array-form `exec` with inert metacharacters). The call is
|
||||
// deferred to the differential check in Phase B, which compares the
|
||||
// benign control's sink reachability.
|
||||
if vuln_fired && !sink_hit {
|
||||
oracle_collision = true;
|
||||
}
|
||||
|
||||
let oob_callback_seen = outcome.oob_callback_seen;
|
||||
attempts.push(Attempt {
|
||||
payload_label: payload.label,
|
||||
outcome,
|
||||
oracle_fired: vuln_fired,
|
||||
triggered: false,
|
||||
});
|
||||
vuln_runs.push(VulnRun {
|
||||
payload_index: i,
|
||||
attempt_index,
|
||||
vuln_fired,
|
||||
sink_hit,
|
||||
oob_nonce_slot: payload.oob_nonce_slot,
|
||||
oob_callback_seen,
|
||||
vuln_probes,
|
||||
});
|
||||
}
|
||||
|
||||
// ── Phase B: differential confirmation + partial-confirmation gate ──
|
||||
// Two candidate classes drive a paired benign-control run:
|
||||
// • confirm candidate — vuln oracle fired *and* the in-harness sink-hit
|
||||
// sentinel was observed. Collected into the set aggregation (§4.1).
|
||||
// • partial candidate — the sink-hit sentinel fired but the oracle did
|
||||
// not. The benign control's sink reachability decides whether this is
|
||||
// a real engine gap (`PartiallyConfirmed`) or safe code that merely
|
||||
// reaches the sink (`NotConfirmed`).
|
||||
// Oracle-fires-without-sink stays on the legacy `oracle_collision` path.
|
||||
let mut vuln_fires: Vec<bool> = Vec::new();
|
||||
let mut benign_fires: Vec<bool> = Vec::new();
|
||||
// (attempt_index, differential outcome) per confirm candidate.
|
||||
let mut candidates: Vec<(usize, DifferentialOutcome)> = Vec::new();
|
||||
// Phase 26: set when a partial candidate's vuln run reached the sink that
|
||||
// its benign control did *not* — a sink-reachability differential proving
|
||||
// the vuln input specifically drives the sink even though the exploit
|
||||
// chain could not be observed completing.
|
||||
let mut partial_signal = false;
|
||||
|
||||
for vr in &vuln_runs {
|
||||
let is_confirm_candidate = vr.vuln_fired && vr.sink_hit;
|
||||
let is_partial_candidate = vr.sink_hit && !vr.vuln_fired;
|
||||
if !is_confirm_candidate && !is_partial_candidate {
|
||||
continue;
|
||||
}
|
||||
// The partial signal is a single bool; once established, skip further
|
||||
// partial-only probing. Confirm candidates always run — the set
|
||||
// aggregation needs every one.
|
||||
if is_partial_candidate && !is_confirm_candidate && partial_signal {
|
||||
continue;
|
||||
}
|
||||
let payload = vuln_payloads[vr.payload_index];
|
||||
// Match the resolution scope to the payload-slice scope so a benign
|
||||
// control declared in another language is still found when this run
|
||||
// was driven off the lang-agnostic union (see `used_lang_slice`).
|
||||
// When the run did use the per-language slice, the lang-aware
|
||||
// resolver keeps a mismatched language from producing a Confirmed.
|
||||
let resolved = if used_lang_slice {
|
||||
resolve_benign_control_lang(payload, spec.expected_cap, spec.lang)
|
||||
} else {
|
||||
resolve_benign_control(payload, spec.expected_cap)
|
||||
};
|
||||
match resolved {
|
||||
None => {
|
||||
// Phase 05 OOB closure: OOB-nonce payloads with
|
||||
// `benign_control = None` are structurally self-confirming
|
||||
// when the listener observed the callback. A benign URL
|
||||
// cannot hit a per-finding nonce, so the OOB observation is
|
||||
// independent network-level evidence the sink fired. Skip
|
||||
// the no-benign-control downgrade and emit
|
||||
// [`DifferentialVerdict::ConfirmedProvenOob`].
|
||||
if is_confirm_candidate && vr.oob_nonce_slot && vr.oob_callback_seen {
|
||||
let mut outcome_record = differential::build_oob_self_confirmed_outcome(
|
||||
payload.label,
|
||||
&vr.vuln_probes,
|
||||
);
|
||||
middleware_demotion::apply_demotion(
|
||||
&mut outcome_record,
|
||||
spec.framework.as_ref(),
|
||||
spec.lang,
|
||||
);
|
||||
// No paired benign control runs, so this candidate
|
||||
// contributes only to the vuln side of the set.
|
||||
vuln_fires.push(true);
|
||||
candidates.push((vr.attempt_index, outcome_record));
|
||||
} else if is_confirm_candidate {
|
||||
no_benign_control = true;
|
||||
}
|
||||
// A partial candidate without a benign control cannot rule out
|
||||
// "safe code that reaches the sink", so it raises no partial
|
||||
// signal and falls through to `NotConfirmed`.
|
||||
}
|
||||
Some(benign) => {
|
||||
let benign_bytes = materialise_bytes(benign, None)
|
||||
.map(|b| b.into_owned())
|
||||
.unwrap_or_default();
|
||||
if let Some(ch) = &probe_channel {
|
||||
let _ = ch.clear();
|
||||
}
|
||||
let benign_outcome = sandbox::run(&harness, &benign_bytes, &effective_opts)?;
|
||||
let benign_sink_hit = benign_outcome.sink_hit;
|
||||
let benign_probes: Vec<SinkProbe> = probe_channel
|
||||
.as_ref()
|
||||
.map(|ch| ch.drain())
|
||||
.unwrap_or_default();
|
||||
let benign_stub_events: Vec<StubEvent> = effective_opts
|
||||
.stub_harness
|
||||
.as_ref()
|
||||
.map(|h| h.drain_all())
|
||||
.unwrap_or_default();
|
||||
let benign_fired = oracle_fired_with_stubs(
|
||||
&benign.oracle,
|
||||
&benign_outcome,
|
||||
&benign_probes,
|
||||
&benign_stub_events,
|
||||
);
|
||||
|
||||
if is_confirm_candidate {
|
||||
let mut outcome_record = differential::build_outcome(
|
||||
payload.label,
|
||||
vuln_fired,
|
||||
&vuln_probes,
|
||||
vr.vuln_fired,
|
||||
&vr.vuln_probes,
|
||||
benign.label,
|
||||
benign_fired,
|
||||
&benign_probes,
|
||||
);
|
||||
// Phase 05 OOB closure: when an OOB-nonce payload also
|
||||
// carries a paired benign control, promote
|
||||
// `Confirmed` → `ConfirmedProvenOob` whenever the
|
||||
// listener observed the per-finding nonce. The
|
||||
// upgrade preserves the differential trace (benign
|
||||
// run still recorded) and surfaces the stronger
|
||||
// network-level evidence to operators.
|
||||
// carries a paired benign control, promote `Confirmed` →
|
||||
// `ConfirmedProvenOob` whenever the listener observed the
|
||||
// per-finding nonce. The upgrade preserves the differential
|
||||
// trace (benign run still recorded) and surfaces the
|
||||
// stronger network-level evidence to operators.
|
||||
if outcome_record.verdict == DifferentialVerdict::Confirmed
|
||||
&& payload.oob_nonce_slot
|
||||
&& outcome.oob_callback_seen
|
||||
&& vr.oob_nonce_slot
|
||||
&& vr.oob_callback_seen
|
||||
{
|
||||
outcome_record.verdict = DifferentialVerdict::ConfirmedProvenOob;
|
||||
}
|
||||
|
|
@ -661,30 +758,68 @@ pub fn run_spec(spec: &HarnessSpec, opts: &SandboxOptions) -> Result<RunOutcome,
|
|||
spec.framework.as_ref(),
|
||||
spec.lang,
|
||||
);
|
||||
let confirmed =
|
||||
middleware_demotion::is_triggering_verdict(outcome_record.verdict);
|
||||
differential_outcome = Some(outcome_record);
|
||||
confirmed
|
||||
vuln_fires.push(vr.vuln_fired);
|
||||
benign_fires.push(benign_fired);
|
||||
candidates.push((vr.attempt_index, outcome_record));
|
||||
} else {
|
||||
// Partial candidate: the vuln run reached the sink without
|
||||
// firing the oracle. It is a real engine gap only when the
|
||||
// benign control neither reached the sink nor fired its
|
||||
// oracle — i.e. the vuln input specifically drives the sink.
|
||||
// If the benign control also reaches the sink, the code path
|
||||
// is shared and safe (e.g. array-form `exec`), so no partial
|
||||
// signal is raised and the run stays `NotConfirmed`.
|
||||
if !benign_sink_hit && !benign_fired {
|
||||
partial_signal = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
} else if vuln_fired && !sink_hit {
|
||||
// Oracle fired but probe didn't — likely collision.
|
||||
oracle_collision = true;
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
// ── Phase 26 aggregation ────────────────────────────────────────────
|
||||
// `evaluate_sets` collapses the firing sets to a single verdict: any
|
||||
// vuln payload firing + no benign control firing → Confirmed; any
|
||||
// benign firing anywhere → OracleCollisionSuspected (global ambient-
|
||||
// noise veto). A ConfirmedProvenOob candidate is terminal positive
|
||||
// evidence (a per-finding OOB nonce cannot be hit by ambient noise), so
|
||||
// it confirms even if some unrelated payload's benign tripped a noisy
|
||||
// oracle.
|
||||
if !candidates.is_empty() {
|
||||
let aggregate = differential::evaluate_sets(&vuln_fires, &benign_fires);
|
||||
let has_proven_oob = candidates
|
||||
.iter()
|
||||
.any(|(_, r)| r.verdict == DifferentialVerdict::ConfirmedProvenOob);
|
||||
let confirmed_class =
|
||||
has_proven_oob || matches!(aggregate, DifferentialVerdict::Confirmed);
|
||||
if confirmed_class {
|
||||
// Representative outcome: prefer the strongest (ProvenOob), else
|
||||
// the first candidate carrying a triggering verdict. Iteration
|
||||
// follows payload order, so the choice is deterministic.
|
||||
let chosen = candidates
|
||||
.iter()
|
||||
.find(|(_, r)| r.verdict == DifferentialVerdict::ConfirmedProvenOob)
|
||||
.or_else(|| {
|
||||
candidates
|
||||
.iter()
|
||||
.find(|(_, r)| middleware_demotion::is_triggering_verdict(r.verdict))
|
||||
})
|
||||
.cloned();
|
||||
if let Some((idx, record)) = chosen {
|
||||
attempts[idx].triggered = true;
|
||||
triggered_by = Some(idx);
|
||||
differential_outcome = Some(record);
|
||||
}
|
||||
} else {
|
||||
false
|
||||
};
|
||||
|
||||
attempts.push(Attempt {
|
||||
payload_label: payload.label,
|
||||
outcome,
|
||||
oracle_fired: vuln_fired,
|
||||
triggered,
|
||||
});
|
||||
|
||||
if triggered {
|
||||
triggered_by = Some(i);
|
||||
break;
|
||||
// Ambient-noise veto: at least one benign control fired and no
|
||||
// terminal OOB evidence exists. Surface a colliding candidate
|
||||
// so the verifier downgrades to
|
||||
// `Inconclusive(OracleCollisionSuspected)`.
|
||||
differential_outcome = candidates
|
||||
.iter()
|
||||
.find(|(_, r)| r.verdict == DifferentialVerdict::OracleCollisionSuspected)
|
||||
.or_else(|| candidates.first())
|
||||
.map(|(_, r)| r.clone());
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -699,6 +834,7 @@ pub fn run_spec(spec: &HarnessSpec, opts: &SandboxOptions) -> Result<RunOutcome,
|
|||
differential: differential_outcome,
|
||||
no_benign_control,
|
||||
unrelated_crash,
|
||||
sink_reached_no_oracle: partial_signal,
|
||||
})
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -305,7 +305,14 @@ impl SamplingPolicy {
|
|||
/// Decide whether an event with the given status / spec_hash should be
|
||||
/// written. Deterministic for a fixed `(self, status, spec_hash)`.
|
||||
pub fn should_sample(&self, status: VerifyStatus, spec_hash: &str) -> bool {
|
||||
if matches!(status, VerifyStatus::Confirmed) && self.keep_all_confirmed {
|
||||
if matches!(
|
||||
status,
|
||||
VerifyStatus::Confirmed | VerifyStatus::PartiallyConfirmed
|
||||
) && self.keep_all_confirmed
|
||||
{
|
||||
// PartiallyConfirmed is a low-volume, high-value triage signal
|
||||
// (each is a candidate real engine gap), so it rides the same
|
||||
// keep-all switch as Confirmed rather than being sampled away.
|
||||
return true;
|
||||
}
|
||||
if matches!(status, VerifyStatus::Inconclusive) && self.keep_all_inconclusive {
|
||||
|
|
@ -389,6 +396,7 @@ pub fn emit_with_policy(event: &TelemetryEvent, policy: &SamplingPolicy) {
|
|||
fn parse_status(s: &str) -> Option<VerifyStatus> {
|
||||
match s {
|
||||
"Confirmed" => Some(VerifyStatus::Confirmed),
|
||||
"PartiallyConfirmed" => Some(VerifyStatus::PartiallyConfirmed),
|
||||
"NotConfirmed" => Some(VerifyStatus::NotConfirmed),
|
||||
"Inconclusive" => Some(VerifyStatus::Inconclusive),
|
||||
"Unsupported" => Some(VerifyStatus::Unsupported),
|
||||
|
|
|
|||
|
|
@ -987,9 +987,16 @@ fn build_verdict(
|
|||
|
||||
if let Some(i) = run.triggered_by {
|
||||
let triggered_payload = run.attempts[i].payload_label.to_string();
|
||||
// Resolve repro bytes by label, not by index: OOB payloads
|
||||
// skipped for lack of a listener leave `attempts` shorter
|
||||
// than `vuln_payloads`, so a positional lookup can pull the
|
||||
// wrong payload's bytes. The label is the stable key.
|
||||
let payloads = payloads_for(spec.expected_cap);
|
||||
let vuln_payloads: Vec<_> = payloads.iter().filter(|p| !p.is_benign).collect();
|
||||
let payload_bytes = vuln_payloads.get(i).map(|p| p.bytes).unwrap_or(b"");
|
||||
let payload_bytes = payloads
|
||||
.iter()
|
||||
.find(|p| !p.is_benign && p.label == triggered_payload)
|
||||
.map(|p| p.bytes)
|
||||
.unwrap_or(b"");
|
||||
let hardening_outcome = summarize_hardening(&run.attempts[i].outcome);
|
||||
|
||||
// Emit repro artifact.
|
||||
|
|
@ -1156,6 +1163,33 @@ fn build_verdict(
|
|||
hardening_outcome: None,
|
||||
},
|
||||
}
|
||||
} else if run.sink_reached_no_oracle {
|
||||
// Phase 26: a vuln payload's in-harness sink-reachability
|
||||
// probe fired but its oracle marker never did, and the run
|
||||
// produced no Confirmed-class verdict and no colliding
|
||||
// differential. The sink is reachable at runtime yet the
|
||||
// exploit chain did not complete (no marker file written,
|
||||
// no OOB callback observed, output lacked the proof token).
|
||||
// Surface `PartiallyConfirmed` so engine work can ratchet on
|
||||
// the real sink-reachability gap without overstating it as a
|
||||
// confirmed exploit. No repro artifact is written: there is
|
||||
// no proven exploit to reproduce.
|
||||
VerifyResult {
|
||||
finding_id: finding_id.to_owned(),
|
||||
status: VerifyStatus::PartiallyConfirmed,
|
||||
triggered_payload: None,
|
||||
reason: None,
|
||||
inconclusive_reason: None,
|
||||
detail: Some(
|
||||
"sink-reachability probe fired but the oracle marker was not observed; exploit chain did not complete".to_owned(),
|
||||
),
|
||||
attempts,
|
||||
toolchain_match: Some(toolchain_match.to_owned()),
|
||||
differential: None,
|
||||
replay_stable: None,
|
||||
wrong: None,
|
||||
hardening_outcome: None,
|
||||
}
|
||||
} else if run.oracle_collision {
|
||||
// Oracle fired but the sink-hit sentinel did not —
|
||||
// legacy single-payload collision path, predates the
|
||||
|
|
@ -1735,4 +1769,141 @@ mod tests {
|
|||
"current corpus_version entry must be a cache hit"
|
||||
);
|
||||
}
|
||||
|
||||
fn partial_spec() -> HarnessSpec {
|
||||
HarnessSpec {
|
||||
finding_id: "deadbeefcafef00d".into(),
|
||||
entry_file: "app.py".into(),
|
||||
entry_name: "login".into(),
|
||||
entry_kind: crate::dynamic::spec::EntryKind::Function,
|
||||
lang: crate::symbol::Lang::Python,
|
||||
toolchain_id: "python-3.11".into(),
|
||||
payload_slot: crate::dynamic::spec::PayloadSlot::Param(0),
|
||||
expected_cap: crate::labels::Cap::SQL_QUERY,
|
||||
constraint_hints: vec![],
|
||||
sink_file: "app.py".into(),
|
||||
sink_line: 10,
|
||||
spec_hash: "cafecafecafe0001".into(),
|
||||
derivation: SpecDerivationStrategy::FromFlowSteps,
|
||||
stubs_required: vec![],
|
||||
framework: None,
|
||||
java_toolchain: crate::dynamic::spec::JavaToolchain::default(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Phase 26: a vuln payload whose sink-reachability probe fired but whose
|
||||
/// oracle marker never did — and no Confirmed-class verdict, no
|
||||
/// differential outcome, no benign-control gap — must surface as
|
||||
/// `PartiallyConfirmed`, carry no `triggered_payload`, and write no repro.
|
||||
#[test]
|
||||
fn build_verdict_sink_reached_no_oracle_maps_to_partially_confirmed() {
|
||||
use crate::dynamic::runner::{Attempt, RunOutcome};
|
||||
use crate::dynamic::sandbox::SandboxOutcome;
|
||||
|
||||
let opts = VerifyOptions::from_config(&Config::default());
|
||||
let run = RunOutcome {
|
||||
spec: partial_spec(),
|
||||
attempts: vec![Attempt {
|
||||
payload_label: "sqli-tautology",
|
||||
outcome: SandboxOutcome {
|
||||
exit_code: Some(0),
|
||||
stdout: b"__NYX_SINK_HIT__".to_vec(),
|
||||
stderr: Vec::new(),
|
||||
timed_out: false,
|
||||
oob_callback_seen: false,
|
||||
sink_hit: true,
|
||||
duration: std::time::Duration::ZERO,
|
||||
hardening_outcome: None,
|
||||
},
|
||||
oracle_fired: false,
|
||||
triggered: false,
|
||||
}],
|
||||
triggered_by: None,
|
||||
oracle_collision: false,
|
||||
sink_reached_no_oracle: true,
|
||||
build_attempts: 1,
|
||||
harness_source: String::new(),
|
||||
entry_source: String::new(),
|
||||
differential: None,
|
||||
no_benign_control: false,
|
||||
unrelated_crash: false,
|
||||
};
|
||||
|
||||
let verdict = build_verdict(
|
||||
"deadbeefcafef00d",
|
||||
&partial_spec(),
|
||||
Ok(run),
|
||||
"exact",
|
||||
&opts,
|
||||
std::time::Duration::ZERO,
|
||||
);
|
||||
|
||||
assert_eq!(verdict.status, VerifyStatus::PartiallyConfirmed);
|
||||
assert!(
|
||||
verdict.triggered_payload.is_none(),
|
||||
"PartiallyConfirmed must not claim a triggering payload"
|
||||
);
|
||||
assert!(
|
||||
verdict
|
||||
.detail
|
||||
.as_deref()
|
||||
.unwrap_or_default()
|
||||
.contains("sink-reachability probe fired"),
|
||||
"detail must explain the sink reached but the chain did not complete: {:?}",
|
||||
verdict.detail
|
||||
);
|
||||
// The sink-hit attempt must survive into the surfaced attempt list.
|
||||
assert_eq!(verdict.attempts.len(), 1);
|
||||
assert!(verdict.attempts[0].sink_hit);
|
||||
assert!(!verdict.attempts[0].triggered);
|
||||
}
|
||||
|
||||
/// Regression guard: a clean run (no sink hit, no oracle) must stay
|
||||
/// `NotConfirmed` — the `PartiallyConfirmed` branch must not swallow the
|
||||
/// ordinary negative case.
|
||||
#[test]
|
||||
fn build_verdict_clean_run_stays_not_confirmed() {
|
||||
use crate::dynamic::runner::{Attempt, RunOutcome};
|
||||
use crate::dynamic::sandbox::SandboxOutcome;
|
||||
|
||||
let opts = VerifyOptions::from_config(&Config::default());
|
||||
let run = RunOutcome {
|
||||
spec: partial_spec(),
|
||||
attempts: vec![Attempt {
|
||||
payload_label: "sqli-tautology",
|
||||
outcome: SandboxOutcome {
|
||||
exit_code: Some(0),
|
||||
stdout: Vec::new(),
|
||||
stderr: Vec::new(),
|
||||
timed_out: false,
|
||||
oob_callback_seen: false,
|
||||
sink_hit: false,
|
||||
duration: std::time::Duration::ZERO,
|
||||
hardening_outcome: None,
|
||||
},
|
||||
oracle_fired: false,
|
||||
triggered: false,
|
||||
}],
|
||||
triggered_by: None,
|
||||
oracle_collision: false,
|
||||
sink_reached_no_oracle: false,
|
||||
build_attempts: 1,
|
||||
harness_source: String::new(),
|
||||
entry_source: String::new(),
|
||||
differential: None,
|
||||
no_benign_control: false,
|
||||
unrelated_crash: false,
|
||||
};
|
||||
|
||||
let verdict = build_verdict(
|
||||
"deadbeefcafef00d",
|
||||
&partial_spec(),
|
||||
Ok(run),
|
||||
"exact",
|
||||
&opts,
|
||||
std::time::Duration::ZERO,
|
||||
);
|
||||
|
||||
assert_eq!(verdict.status, VerifyStatus::NotConfirmed);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -727,6 +727,14 @@ pub enum VerifyStatus {
|
|||
/// Sink fired with at least one payload. The static finding is exploitable
|
||||
/// against the live target.
|
||||
Confirmed,
|
||||
/// The in-harness sink-reachability probe fired (sink reached) but the
|
||||
/// oracle marker was never observed (no file write / no OOB callback /
|
||||
/// output did not contain the proof token), so the exploit chain did not
|
||||
/// complete. Semantically `{ sink_reached: true, exit_propagated: false }`.
|
||||
/// Ranks above `NotConfirmed` (runtime corroboration that the sink is
|
||||
/// reachable) but below `Confirmed` (no proven exploit). Used so engine
|
||||
/// work can ratchet on real sink-reachability gaps without overstating.
|
||||
PartiallyConfirmed,
|
||||
/// All payloads ran cleanly. Either the path is infeasible at runtime
|
||||
/// or the corpus is too narrow. Treat as "static-only", not "false positive".
|
||||
NotConfirmed,
|
||||
|
|
|
|||
|
|
@ -558,6 +558,7 @@ fn format_dynamic_verdict_annotation(dv: &crate::evidence::VerifyResult) -> Stri
|
|||
let pid = dv.triggered_payload.as_deref().unwrap_or("unknown");
|
||||
format!("[DYN: confirmed via {pid}]")
|
||||
}
|
||||
VerifyStatus::PartiallyConfirmed => "[DYN: partially confirmed (sink reached)]".to_string(),
|
||||
VerifyStatus::NotConfirmed => "[DYN: not confirmed]".to_string(),
|
||||
VerifyStatus::Unsupported => {
|
||||
let reason = dv
|
||||
|
|
|
|||
|
|
@ -258,6 +258,12 @@ fn dynamic_verdict_delta(diag: &Diag) -> Option<f64> {
|
|||
let dv = diag.evidence.as_ref()?.dynamic_verdict.as_ref()?;
|
||||
match dv.status {
|
||||
VerifyStatus::Confirmed => Some(20.0),
|
||||
// PartiallyConfirmed: the sink was reached at runtime but the
|
||||
// exploit chain did not complete. Runtime corroboration that the
|
||||
// sink is reachable is a positive signal, but weaker than a proven
|
||||
// exploit, so it earns a modest bump rather than the full Confirmed
|
||||
// boost.
|
||||
VerifyStatus::PartiallyConfirmed => Some(8.0),
|
||||
// Apply penalty only when the corpus was actually exhausted (attempts
|
||||
// were made); a NotConfirmed with zero attempts means something went
|
||||
// wrong before payload execution, which is an Inconclusive path, not
|
||||
|
|
|
|||
|
|
@ -293,6 +293,7 @@ fn status_for_diag(d: &Diag) -> &'static str {
|
|||
pub fn dynamic_status_label(status: VerifyStatus) -> &'static str {
|
||||
match status {
|
||||
VerifyStatus::Confirmed => "Confirmed",
|
||||
VerifyStatus::PartiallyConfirmed => "PartiallyConfirmed",
|
||||
VerifyStatus::NotConfirmed => "NotConfirmed",
|
||||
VerifyStatus::Inconclusive => "Inconclusive",
|
||||
VerifyStatus::Unsupported => "Unsupported",
|
||||
|
|
|
|||
|
|
@ -76,6 +76,28 @@ fn diag_with_verdict(status: VerifyStatus) -> Diag {
|
|||
wrong: None,
|
||||
hardening_outcome: None,
|
||||
},
|
||||
VerifyStatus::PartiallyConfirmed => VerifyResult {
|
||||
finding_id: "abc123".into(),
|
||||
status,
|
||||
triggered_payload: None,
|
||||
reason: None,
|
||||
inconclusive_reason: None,
|
||||
detail: Some(
|
||||
"sink-reachability probe fired but the oracle marker was not observed; exploit chain did not complete".into(),
|
||||
),
|
||||
attempts: vec![AttemptSummary {
|
||||
payload_label: "sqli-tautology".into(),
|
||||
exit_code: Some(0),
|
||||
timed_out: false,
|
||||
triggered: false,
|
||||
sink_hit: true,
|
||||
}],
|
||||
toolchain_match: Some("exact".into()),
|
||||
differential: None,
|
||||
replay_stable: None,
|
||||
wrong: None,
|
||||
hardening_outcome: None,
|
||||
},
|
||||
VerifyStatus::NotConfirmed => VerifyResult {
|
||||
finding_id: "abc123".into(),
|
||||
status,
|
||||
|
|
@ -158,6 +180,17 @@ fn console_not_confirmed_shows_annotation() {
|
|||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn console_partially_confirmed_shows_sink_reached() {
|
||||
let diag = diag_with_verdict(VerifyStatus::PartiallyConfirmed);
|
||||
let output = render_console(&[diag], "proj", None, &[]);
|
||||
let stripped = strip_ansi(&output);
|
||||
assert!(
|
||||
stripped.contains("[DYN: partially confirmed (sink reached)]"),
|
||||
"expected DYN partially-confirmed annotation, got:\n{stripped}"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn console_unsupported_shows_reason() {
|
||||
let diag = diag_with_verdict(VerifyStatus::Unsupported);
|
||||
|
|
|
|||
|
|
@ -78,6 +78,7 @@ def load_previous_agg(path: str) -> dict:
|
|||
"fn": 0,
|
||||
"unsupported": 0,
|
||||
"confirmed": 0,
|
||||
"partially_confirmed": 0,
|
||||
"wrong_confirmed": 0,
|
||||
"stable_replays": 0,
|
||||
"total": 0,
|
||||
|
|
@ -92,6 +93,7 @@ def load_previous_agg(path: str) -> dict:
|
|||
"fn",
|
||||
"unsupported",
|
||||
"confirmed",
|
||||
"partially_confirmed",
|
||||
"wrong_confirmed",
|
||||
"stable_replays",
|
||||
"total",
|
||||
|
|
@ -139,6 +141,7 @@ def main() -> int:
|
|||
"fn": 0,
|
||||
"unsupported": 0,
|
||||
"confirmed": 0,
|
||||
"partially_confirmed": 0,
|
||||
"wrong_confirmed": 0,
|
||||
"stable_replays": 0,
|
||||
"total": 0,
|
||||
|
|
@ -153,6 +156,7 @@ def main() -> int:
|
|||
"fn",
|
||||
"unsupported",
|
||||
"confirmed",
|
||||
"partially_confirmed",
|
||||
"wrong_confirmed",
|
||||
"stable_replays",
|
||||
"total",
|
||||
|
|
@ -160,17 +164,22 @@ def main() -> int:
|
|||
agg[k][field] += c.get(field, 0)
|
||||
|
||||
print("\n=== Aggregated eval corpus report ===")
|
||||
print(f"{'Cap':<20} {'Lang':<12} {'TP':>5} {'FP':>5} {'FN':>5} {'Prec':>6} {'Rec':>6} {'Unsup%':>7}")
|
||||
print("-" * 72)
|
||||
print(
|
||||
f"{'Cap':<20} {'Lang':<12} {'TP':>5} {'FP':>5} {'FN':>5} "
|
||||
f"{'Prec':>6} {'Rec':>6} {'Unsup%':>7} {'Conf%':>7} {'Part%':>7}"
|
||||
)
|
||||
print("-" * 88)
|
||||
for k, v in sorted(agg.items()):
|
||||
prec = v["tp"] / max(v["tp"] + v["fp"], 1)
|
||||
rec = v["tp"] / max(v["tp"] + v["fn"], 1)
|
||||
unsup = v["unsupported"] / max(v["total"], 1)
|
||||
conf = v["confirmed"] / max(v["total"], 1)
|
||||
part = v["partially_confirmed"] / max(v["total"], 1)
|
||||
print(
|
||||
f"{k[0]:<20} {k[1]:<12} "
|
||||
f"{v['tp']:>5} {v['fp']:>5} {v['fn']:>5} "
|
||||
f"{prec:>6.2f} {rec:>6.2f} "
|
||||
f"{unsup*100:>6.1f}%"
|
||||
f"{unsup*100:>6.1f}% {conf*100:>6.1f}% {part*100:>6.1f}%"
|
||||
)
|
||||
|
||||
gate_failed = False
|
||||
|
|
|
|||
|
|
@ -387,7 +387,7 @@ def main() -> int:
|
|||
break
|
||||
|
||||
# Per-cell tallies: {(cap, lang): {tp, fp, fn, unsupported, confirmed,
|
||||
# wrong_confirmed, stable_replays, total}}
|
||||
# partially_confirmed, wrong_confirmed, stable_replays, total}}
|
||||
cells: dict[tuple[str, str], dict] = defaultdict(
|
||||
lambda: {
|
||||
"tp": 0,
|
||||
|
|
@ -395,6 +395,7 @@ def main() -> int:
|
|||
"fn": 0,
|
||||
"unsupported": 0,
|
||||
"confirmed": 0,
|
||||
"partially_confirmed": 0,
|
||||
"wrong_confirmed": 0,
|
||||
"stable_replays": 0,
|
||||
"total": 0,
|
||||
|
|
@ -412,6 +413,8 @@ def main() -> int:
|
|||
status = dv.get("status")
|
||||
if status == "Unsupported":
|
||||
cells[key]["unsupported"] += 1
|
||||
elif status == "PartiallyConfirmed":
|
||||
cells[key]["partially_confirmed"] += 1
|
||||
elif status == "Confirmed":
|
||||
cells[key]["confirmed"] += 1
|
||||
# Repro-stability and false-Confirmed counts are optional
|
||||
|
|
|
|||
|
|
@ -235,9 +235,10 @@ fn sarif_confirmed_verdict_nyx_dynamic_verdict_contains_triggered_payload() {
|
|||
}
|
||||
|
||||
#[test]
|
||||
fn sarif_all_four_statuses_produce_partial_fingerprint() {
|
||||
fn sarif_all_statuses_produce_partial_fingerprint() {
|
||||
let statuses = [
|
||||
(VerifyStatus::Confirmed, "Confirmed"),
|
||||
(VerifyStatus::PartiallyConfirmed, "PartiallyConfirmed"),
|
||||
(VerifyStatus::NotConfirmed, "NotConfirmed"),
|
||||
(VerifyStatus::Unsupported, "Unsupported"),
|
||||
(VerifyStatus::Inconclusive, "Inconclusive"),
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue