feat(dynamic): add PartiallyConfirmed status for finer-grained sink-reachability categorization, update dynamic verification, telemetry, and reporting systems

This commit is contained in:
elipeter 2026-05-29 14:35:39 -05:00
parent 635b213825
commit c0501884ae
23 changed files with 658 additions and 142 deletions

View file

@ -6,9 +6,9 @@ rust-version = "1.88"
description = "A multi-language static analysis tool for detecting security vulnerabilities"
license = "GPL-3.0-or-later"
authors = ["Eli Peter <elicpeter@example.com>"]
homepage = "https://github.com/elicpeter/nyx"
homepage = "https://nyxsec.dev/scanner"
repository = "https://github.com/elicpeter/nyx"
documentation = "https://elicpeter.github.io/nyx/"
documentation = "https://nyxsec.dev/docs/nyx/"
keywords = ["security", "vulnerability", "scanner", "static-analysis", "cli"]
categories = ["security", "command-line-utilities", "development-tools", "parser-implementations", "text-processing"]
readme = "README.md"

View file

@ -3,7 +3,12 @@ export type Confidence = 'Low' | 'Medium' | 'High';
export type FlowStepKind = 'source' | 'assignment' | 'call' | 'phi' | 'sink';
// Dynamic verification types (from src/evidence.rs VerifyStatus / VerifyResult)
export type VerifyStatus = 'Confirmed' | 'NotConfirmed' | 'Inconclusive' | 'Unsupported';
export type VerifyStatus =
| 'Confirmed'
| 'PartiallyConfirmed'
| 'NotConfirmed'
| 'Inconclusive'
| 'Unsupported';
export interface AttemptSummary {
payload_label: string;
@ -29,6 +34,7 @@ export interface VerifyResult {
export interface DynamicVerificationSummary {
total: number;
confirmed: number;
partially_confirmed: number;
not_confirmed: number;
inconclusive: number;
unsupported: number;

View file

@ -2,6 +2,7 @@ import type { VerifyResult, VerifyStatus } from '../api/types';
const STATUS_LABELS: Record<VerifyStatus, string> = {
Confirmed: 'Confirmed',
PartiallyConfirmed: 'Partially confirmed',
NotConfirmed: 'Not confirmed',
Inconclusive: 'Inconclusive',
Unsupported: 'Unsupported',
@ -15,6 +16,10 @@ function verdictTooltip(verdict: VerifyResult): string {
return triggered_payload
? `Confirmed via payload: ${triggered_payload}`
: 'Dynamically confirmed exploitable';
case 'PartiallyConfirmed':
return detail
? `Partially confirmed (sink reached): ${detail}`
: 'Partially confirmed: sink reached but exploit chain did not complete';
case 'NotConfirmed':
return (verdict.attempts?.length ?? 0) > 0
? `Not confirmed after ${verdict.attempts?.length ?? 0} payload attempt(s)`

View file

@ -244,13 +244,14 @@ export function ScannerQualityPanel({
const dynamic = quality.dynamic_verification ?? {
total: 0,
confirmed: 0,
partially_confirmed: 0,
not_confirmed: 0,
inconclusive: 0,
unsupported: 0,
};
const dynamicDetail =
dynamic.total > 0
? `${dynamic.total.toLocaleString()} verdicts · ${dynamic.not_confirmed.toLocaleString()} not confirmed · ${dynamic.inconclusive.toLocaleString()} inconclusive · ${dynamic.unsupported.toLocaleString()} unsupported`
? `${dynamic.total.toLocaleString()} verdicts · ${dynamic.partially_confirmed.toLocaleString()} partially confirmed · ${dynamic.not_confirmed.toLocaleString()} not confirmed · ${dynamic.inconclusive.toLocaleString()} inconclusive · ${dynamic.unsupported.toLocaleString()} unsupported`
: 'no dynamic verdicts in latest scan';
const rows: Array<{

View file

@ -31,6 +31,7 @@ function formatTriageState(state: string): string {
function formatVerificationStatus(status: string): string {
if (status === 'NotConfirmed') return 'Not confirmed';
if (status === 'PartiallyConfirmed') return 'Partially confirmed';
return status || 'Unverified';
}

View file

@ -2668,6 +2668,10 @@ tr.selected td {
background: var(--success-bg);
color: var(--success);
}
.badge-dyn-partiallyconfirmed {
background: var(--conf-medium-bg);
color: var(--conf-medium);
}
.badge-dyn-notconfirmed {
background: var(--bg-secondary);
color: var(--text-secondary);

View file

@ -43,6 +43,19 @@ describe('DynamicVerdictSection', () => {
).toBeInTheDocument();
});
it('renders PartiallyConfirmed badge', () => {
render(
<DynamicVerdictSection
verdict={makeVerdict('PartiallyConfirmed', {
detail: 'sink reached but exploit chain did not complete',
})}
/>,
);
expect(
screen.getByTestId('verdict-badge-partiallyconfirmed'),
).toBeInTheDocument();
});
it('does not crash when the API omits an empty attempts array', () => {
render(
<DynamicVerdictSection
@ -82,6 +95,7 @@ describe('DynamicVerdictSection', () => {
unmount();
for (const status of [
'PartiallyConfirmed',
'NotConfirmed',
'Unsupported',
'Inconclusive',

View file

@ -35,6 +35,21 @@ describe('VerdictBadge', () => {
expect(badge.textContent).toContain('🔥');
});
it('renders PartiallyConfirmed badge with amber class and no flame', () => {
render(
<VerdictBadge
verdict={makeVerdict('PartiallyConfirmed', {
detail: 'sink-reachability probe fired but the oracle marker was not observed',
})}
/>,
);
const badge = screen.getByTestId('verdict-badge-partiallyconfirmed');
expect(badge).toBeInTheDocument();
expect(badge.className).toContain('badge-dyn-partiallyconfirmed');
expect(badge.textContent).not.toContain('🔥');
expect(badge.getAttribute('title')).toContain('sink reached');
});
it('renders NotConfirmed badge with correct class', () => {
render(<VerdictBadge verdict={makeVerdict('NotConfirmed')} />);
const badge = screen.getByTestId('verdict-badge-notconfirmed');
@ -107,9 +122,10 @@ describe('VerdictBadge', () => {
expect(badge.textContent?.replace('🔥 ', '')).toBe('C');
});
it('renders all four VerifyStatus variants without crashing', () => {
it('renders all five VerifyStatus variants without crashing', () => {
const statuses: VerifyResult['status'][] = [
'Confirmed',
'PartiallyConfirmed',
'NotConfirmed',
'Unsupported',
'Inconclusive',

View file

@ -308,6 +308,10 @@ pub fn check_gate(diff: &VerdictDiff, gate: &str) -> bool {
&& matches!(
e.current_status,
Some(VerifyStatus::Confirmed)
// PartiallyConfirmed = sink still reachable at
// runtime, so a baseline-Confirmed finding that is
// now partial has NOT been resolved.
| Some(VerifyStatus::PartiallyConfirmed)
| Some(VerifyStatus::Inconclusive)
| Some(VerifyStatus::Unsupported)
)
@ -323,6 +327,7 @@ pub fn check_gate(diff: &VerdictDiff, gate: &str) -> bool {
fn status_str(s: Option<VerifyStatus>) -> &'static str {
match s {
Some(VerifyStatus::Confirmed) => "Confirmed",
Some(VerifyStatus::PartiallyConfirmed) => "PartiallyConfirmed",
Some(VerifyStatus::NotConfirmed) => "NotConfirmed",
Some(VerifyStatus::Inconclusive) => "Inconclusive",
Some(VerifyStatus::Unsupported) => "Unsupported",

View file

@ -37,8 +37,11 @@ pub enum Feasibility {
/// but where the static evidence is strong.
InconclusiveHighConf,
/// Everything else — no dynamic verification, dynamic verdict was
/// `NotConfirmed`/`Unsupported`, or dynamic was `Inconclusive` but
/// static confidence is not `High`.
/// `NotConfirmed`/`PartiallyConfirmed`/`Unsupported`, or dynamic was
/// `Inconclusive` but static confidence is not `High`. A
/// `PartiallyConfirmed` verdict proves only that the sink is reachable,
/// not that the exploit chain completes, so it stays conservative here:
/// it must not inflate a multi-hop path score.
Unverified,
}

View file

@ -242,6 +242,7 @@ pub fn compute_stable_hash(diag: &Diag) -> u64 {
pub struct DynamicVerificationSummary {
pub total: usize,
pub confirmed: usize,
pub partially_confirmed: usize,
pub not_confirmed: usize,
pub inconclusive: usize,
pub unsupported: usize,
@ -261,6 +262,9 @@ impl DynamicVerificationSummary {
summary.total += 1;
match verdict.status {
crate::evidence::VerifyStatus::Confirmed => summary.confirmed += 1,
crate::evidence::VerifyStatus::PartiallyConfirmed => {
summary.partially_confirmed += 1
}
crate::evidence::VerifyStatus::NotConfirmed => summary.not_confirmed += 1,
crate::evidence::VerifyStatus::Inconclusive => summary.inconclusive += 1,
crate::evidence::VerifyStatus::Unsupported => summary.unsupported += 1,
@ -282,10 +286,11 @@ pub fn format_dynamic_verification_summary(summary: &DynamicVerificationSummary)
"verdicts"
};
format!(
"{} {} ({} confirmed, {} not confirmed, {} inconclusive, {} unsupported)",
"{} {} ({} confirmed, {} partially confirmed, {} not confirmed, {} inconclusive, {} unsupported)",
summary.total,
noun,
summary.confirmed,
summary.partially_confirmed,
summary.not_confirmed,
summary.inconclusive,
summary.unsupported

View file

@ -1,19 +1,23 @@
//! Differential confirmation rule for dynamic verification (Phase 07).
//! Differential confirmation rule for dynamic verification (Phase 07 / 26).
//!
//! `Confirmed` requires the vulnerable payload's oracle to fire **and**
//! the paired benign control's oracle to *not* fire (§4.1). This module
//! is the single source of truth for that rule. Everything else (runner,
//! verifier, tests) collapses to "look up paired benign + call
//! [`evaluate`]".
//! `Confirmed` requires **at least one** vulnerable payload's oracle to
//! fire **and every** paired benign control's oracle to *not* fire
//! (§4.1, extended for multi-payload aggregation in Phase 26). This
//! module is the single source of truth for that rule. Everything else
//! (runner, verifier, tests) collapses to "collect firing sets + call
//! [`evaluate_sets`]".
//!
//! # Rule table
//! # Rule table (set aggregation)
//!
//! | vuln fires | benign fires | verdict |
//! |------------|--------------|-------------------------------|
//! | true | false | `Confirmed` |
//! | true | true | `OracleCollisionSuspected` |
//! | false | false | `NotConfirmed` |
//! | false | true | `ReversedDifferential` |
//! | any vuln fires | any benign fires | verdict |
//! |----------------|------------------|----------------------------|
//! | true | false | `Confirmed` |
//! | true | true | `OracleCollisionSuspected` |
//! | false | false | `NotConfirmed` |
//! | false | true | `ReversedDifferential` |
//!
//! The scalar [`evaluate`] is the single-payload, single-control
//! specialisation of [`evaluate_sets`] and delegates to it.
//!
//! "Fires" means [`crate::dynamic::oracle::oracle_fired`] returned `true`
//! against the run's [`SandboxOutcome`] + drained [`SinkProbe`] set —
@ -24,8 +28,33 @@ use crate::evidence::{
DifferentialOutcome, DifferentialProbeArg, DifferentialProbeRecord, DifferentialVerdict,
};
/// Apply the differential confirmation rule.
/// Apply the differential confirmation rule over **sets** of firing
/// results (Phase 26 multi-payload aggregation).
///
/// `vuln_fired` is one boolean per vulnerable payload attempt;
/// `benign_fired` is one boolean per paired benign control that actually
/// ran. Aggregation is "any vuln vs any benign" with global ambient-noise
/// scoring across the run: a *single* benign control firing anywhere
/// vetoes `Confirmed` (the oracle cannot discriminate), and a *single*
/// vulnerable payload firing is enough positive evidence.
///
/// Empty slices behave as "nothing fired" on that side, so
/// `evaluate_sets(&[], &[])` is `NotConfirmed`.
pub fn evaluate_sets(vuln_fired: &[bool], benign_fired: &[bool]) -> DifferentialVerdict {
let any_vuln = vuln_fired.iter().any(|&b| b);
let any_benign = benign_fired.iter().any(|&b| b);
match (any_vuln, any_benign) {
(true, false) => DifferentialVerdict::Confirmed,
(true, true) => DifferentialVerdict::OracleCollisionSuspected,
(false, false) => DifferentialVerdict::NotConfirmed,
(false, true) => DifferentialVerdict::ReversedDifferential,
}
}
/// Apply the differential confirmation rule to a single
/// (vulnerable, benign-control) pair.
///
/// Single-element specialisation of [`evaluate_sets`].
/// `vuln_probe_fires` and `benign_probe_fires` are the boolean firing
/// results of [`crate::dynamic::oracle::oracle_fired`] for the
/// vulnerable payload and its paired benign control respectively. The
@ -33,12 +62,7 @@ use crate::evidence::{
/// callers attach those separately via [`DifferentialOutcome`] for
/// forensic display.
pub fn evaluate(vuln_probe_fires: bool, benign_probe_fires: bool) -> DifferentialVerdict {
match (vuln_probe_fires, benign_probe_fires) {
(true, false) => DifferentialVerdict::Confirmed,
(true, true) => DifferentialVerdict::OracleCollisionSuspected,
(false, false) => DifferentialVerdict::NotConfirmed,
(false, true) => DifferentialVerdict::ReversedDifferential,
}
evaluate_sets(&[vuln_probe_fires], &[benign_probe_fires])
}
/// Build a [`DifferentialOutcome`] for inclusion in a
@ -139,6 +163,61 @@ mod tests {
);
}
#[test]
fn sets_any_vuln_no_benign_is_confirmed() {
// One of several vuln payloads firing is enough; no benign fired.
assert_eq!(
evaluate_sets(&[false, true, false], &[false, false]),
DifferentialVerdict::Confirmed
);
}
#[test]
fn sets_one_benign_firing_vetoes_confirmed() {
// A single benign control firing anywhere downgrades to collision,
// even when a vuln payload also fired (global ambient-noise veto).
assert_eq!(
evaluate_sets(&[true, true], &[false, true, false]),
DifferentialVerdict::OracleCollisionSuspected
);
}
#[test]
fn sets_no_vuln_no_benign_is_not_confirmed() {
assert_eq!(
evaluate_sets(&[false, false], &[false]),
DifferentialVerdict::NotConfirmed
);
}
#[test]
fn sets_no_vuln_some_benign_is_reversed() {
assert_eq!(
evaluate_sets(&[false], &[true]),
DifferentialVerdict::ReversedDifferential
);
}
#[test]
fn sets_empty_is_not_confirmed() {
assert_eq!(evaluate_sets(&[], &[]), DifferentialVerdict::NotConfirmed);
}
#[test]
fn sets_empty_benign_with_vuln_is_confirmed() {
// No benign control ran at all → no veto possible → Confirmed.
assert_eq!(evaluate_sets(&[true], &[]), DifferentialVerdict::Confirmed);
}
#[test]
fn scalar_evaluate_matches_singleton_sets() {
for &v in &[false, true] {
for &b in &[false, true] {
assert_eq!(evaluate(v, b), evaluate_sets(&[v], &[b]));
}
}
}
#[test]
fn oob_self_confirmed_outcome_carries_only_vuln_trace() {
use crate::dynamic::probe::{ProbeArg, ProbeKind, ProbeWitness, SinkProbe};

View file

@ -59,10 +59,29 @@ const MAX_BUILD_ATTEMPTS: u32 = 2;
pub struct RunOutcome {
pub spec: HarnessSpec,
pub attempts: Vec<Attempt>,
/// First attempt that fired the sink with `oracle_fired && sink_hit`.
/// Index into [`Self::attempts`] of the attempt the confirm verdict is
/// attributed to. Set by the Phase 26 set aggregation when
/// [`crate::dynamic::differential::evaluate_sets`] returns a
/// Confirmed-class verdict (any vuln payload fired the oracle + sink
/// while every paired benign control stayed clean), or when an
/// OOB-nonce payload self-confirmed. `None` otherwise.
pub triggered_by: Option<usize>,
/// Whether the oracle fired but the sink probe did not (oracle collision).
pub oracle_collision: bool,
/// Phase 26: a vuln payload's in-harness sink-reachability probe fired
/// (`outcome.sink_hit`) but its oracle marker was never observed (no file
/// write / no OOB callback / output lacked the proof token), *and* the
/// paired benign control neither reached the sink nor fired its oracle.
/// The benign-control differential is the discriminator: it proves the
/// vuln input specifically drives the sink, ruling out safe code that
/// merely reaches the sink (e.g. array-form `exec` with inert
/// metacharacters, which the benign control also reaches). The verifier
/// maps this to [`crate::evidence::VerifyStatus::PartiallyConfirmed`]: the
/// sink is reachable under the vuln input but the exploit chain did not
/// complete. Never set when a Confirmed-class verdict or a colliding
/// differential was produced (those take precedence at the verify
/// boundary).
pub sink_reached_no_oracle: bool,
/// Number of build attempts consumed.
pub build_attempts: u32,
/// Harness sources for repro artifacts.
@ -454,6 +473,24 @@ pub fn run_spec(spec: &HarnessSpec, opts: &SandboxOptions) -> Result<RunOutcome,
let mut unrelated_crash = false;
let mut differential_outcome: Option<DifferentialOutcome> = None;
// Phase 26 set aggregation, phase A: per-vuln-payload run record.
// Every vuln payload runs to completion (no early break) so the
// differential rule can aggregate across the whole set — a single
// benign control firing anywhere must be able to veto a `Confirmed`.
struct VulnRun {
/// Index into `vuln_payloads` (for benign-control resolution).
payload_index: usize,
/// Index into `attempts` (what `triggered_by` points at).
attempt_index: usize,
vuln_fired: bool,
sink_hit: bool,
oob_nonce_slot: bool,
oob_callback_seen: bool,
vuln_probes: Vec<SinkProbe>,
}
let mut vuln_runs: Vec<VulnRun> = Vec::with_capacity(vuln_payloads.len());
// ── Phase A: run every vuln payload, record its firing signals ──────
for (i, payload) in vuln_payloads.iter().enumerate() {
// Materialise payload bytes (OOB nonce-slot payloads generate a URL).
let (oob_nonce, effective_bytes) = if payload.oob_nonce_slot {
@ -480,11 +517,12 @@ pub fn run_spec(spec: &HarnessSpec, opts: &SandboxOptions) -> Result<RunOutcome,
let _ = ch.clear();
}
let attempt_index = attempts.len();
trace_record(
trace_handle.as_ref(),
TraceStage::SandboxStarted,
Some(format!(
"attempt={i} payload={} oracle={}",
"attempt={attempt_index} payload={} oracle={}",
payload.label,
oracle_short_name(&payload.oracle)
)),
@ -495,7 +533,7 @@ pub fn run_spec(spec: &HarnessSpec, opts: &SandboxOptions) -> Result<RunOutcome,
trace_handle.as_ref(),
TraceStage::OracleWait,
Some(format!(
"attempt={i} exit_code={:?} timed_out={}",
"attempt={attempt_index} exit_code={:?} timed_out={}",
outcome.exit_code, outcome.timed_out
)),
);
@ -508,9 +546,9 @@ pub fn run_spec(spec: &HarnessSpec, opts: &SandboxOptions) -> Result<RunOutcome,
// failure — the harness "linked" against deps that don't resolve at
// run time — so route through `RunError::BuildFailed` to keep the
// SKIP-on-BuildFailed branch in the e2e corpus tests honest. Only
// checked on the first vuln payload because the missing dep won't
// appear later in the run.
if i == 0 && is_runtime_import_error(&outcome) {
// checked on the first actually-run payload because the missing dep
// won't appear later in the run.
if attempts.is_empty() && is_runtime_import_error(&outcome) {
return Err(RunError::BuildFailed {
stderr: String::from_utf8_lossy(&outcome.stderr).into_owned(),
attempts: build_attempts,
@ -546,7 +584,7 @@ pub fn run_spec(spec: &HarnessSpec, opts: &SandboxOptions) -> Result<RunOutcome,
trace_handle.as_ref(),
TraceStage::OracleObserved,
Some(format!(
"attempt={i} fired={vuln_fired} sink_hit={sink_hit}"
"attempt={attempt_index} fired={vuln_fired} sink_hit={sink_hit}"
)),
);
@ -566,93 +604,152 @@ pub fn run_spec(spec: &HarnessSpec, opts: &SandboxOptions) -> Result<RunOutcome,
unrelated_crash = true;
}
// Differential rule (Phase 07, §4.1). Only when the vuln oracle
// fired *and* the in-harness sink-hit sentinel was observed do we
// consult the paired benign control. Oracle-fires-without-sink
// stays on the legacy `oracle_collision` path so the existing
// `Inconclusive(OracleCollisionSuspected)` semantics survive.
let triggered = if vuln_fired && sink_hit {
// Match the resolution scope to the payload-slice scope so a
// benign control declared in another language is still found
// when this run was driven off the lang-agnostic union (see
// `used_lang_slice` above). When the run did use the
// per-language slice, the lang-aware resolver keeps a
// mismatched language from silently producing a Confirmed.
let resolved = if used_lang_slice {
resolve_benign_control_lang(payload, spec.expected_cap, spec.lang)
} else {
resolve_benign_control(payload, spec.expected_cap)
};
match resolved {
None => {
// Phase 05 OOB closure: OOB-nonce payloads with
// `benign_control = None` are structurally self-
// confirming when the listener observed the callback.
// A benign URL cannot hit a per-finding nonce, so the
// OOB observation is independent network-level
// evidence the sink fired. Skip the no-benign-control
// downgrade and emit
// [`DifferentialVerdict::ConfirmedProvenOob`].
if payload.oob_nonce_slot && outcome.oob_callback_seen {
let mut outcome_record = differential::build_oob_self_confirmed_outcome(
payload.label,
&vuln_probes,
);
middleware_demotion::apply_demotion(
&mut outcome_record,
spec.framework.as_ref(),
spec.lang,
);
let confirmed =
middleware_demotion::is_triggering_verdict(outcome_record.verdict);
differential_outcome = Some(outcome_record);
confirmed
} else {
no_benign_control = true;
false
}
}
Some(benign) => {
let benign_bytes = materialise_bytes(benign, None)
.map(|b| b.into_owned())
.unwrap_or_default();
if let Some(ch) = &probe_channel {
let _ = ch.clear();
}
let benign_outcome = sandbox::run(&harness, &benign_bytes, &effective_opts)?;
let benign_probes: Vec<SinkProbe> = probe_channel
.as_ref()
.map(|ch| ch.drain())
.unwrap_or_default();
let benign_stub_events: Vec<StubEvent> = effective_opts
.stub_harness
.as_ref()
.map(|h| h.drain_all())
.unwrap_or_default();
let benign_fired = oracle_fired_with_stubs(
&benign.oracle,
&benign_outcome,
&benign_probes,
&benign_stub_events,
// Legacy single-payload collision: oracle fired without the
// in-harness sink-hit sentinel. Phase 26 partial-confirmation is
// deliberately NOT decided here: a vuln run that reaches the sink
// without firing its oracle is ambiguous — it could be a real engine
// gap (the vuln input drives the sink but the exploit chain could not
// be observed) or merely safe code that happens to reach the sink
// (e.g. array-form `exec` with inert metacharacters). The call is
// deferred to the differential check in Phase B, which compares the
// benign control's sink reachability.
if vuln_fired && !sink_hit {
oracle_collision = true;
}
let oob_callback_seen = outcome.oob_callback_seen;
attempts.push(Attempt {
payload_label: payload.label,
outcome,
oracle_fired: vuln_fired,
triggered: false,
});
vuln_runs.push(VulnRun {
payload_index: i,
attempt_index,
vuln_fired,
sink_hit,
oob_nonce_slot: payload.oob_nonce_slot,
oob_callback_seen,
vuln_probes,
});
}
// ── Phase B: differential confirmation + partial-confirmation gate ──
// Two candidate classes drive a paired benign-control run:
// • confirm candidate — vuln oracle fired *and* the in-harness sink-hit
// sentinel was observed. Collected into the set aggregation (§4.1).
// • partial candidate — the sink-hit sentinel fired but the oracle did
// not. The benign control's sink reachability decides whether this is
// a real engine gap (`PartiallyConfirmed`) or safe code that merely
// reaches the sink (`NotConfirmed`).
// Oracle-fires-without-sink stays on the legacy `oracle_collision` path.
let mut vuln_fires: Vec<bool> = Vec::new();
let mut benign_fires: Vec<bool> = Vec::new();
// (attempt_index, differential outcome) per confirm candidate.
let mut candidates: Vec<(usize, DifferentialOutcome)> = Vec::new();
// Phase 26: set when a partial candidate's vuln run reached the sink that
// its benign control did *not* — a sink-reachability differential proving
// the vuln input specifically drives the sink even though the exploit
// chain could not be observed completing.
let mut partial_signal = false;
for vr in &vuln_runs {
let is_confirm_candidate = vr.vuln_fired && vr.sink_hit;
let is_partial_candidate = vr.sink_hit && !vr.vuln_fired;
if !is_confirm_candidate && !is_partial_candidate {
continue;
}
// The partial signal is a single bool; once established, skip further
// partial-only probing. Confirm candidates always run — the set
// aggregation needs every one.
if is_partial_candidate && !is_confirm_candidate && partial_signal {
continue;
}
let payload = vuln_payloads[vr.payload_index];
// Match the resolution scope to the payload-slice scope so a benign
// control declared in another language is still found when this run
// was driven off the lang-agnostic union (see `used_lang_slice`).
// When the run did use the per-language slice, the lang-aware
// resolver keeps a mismatched language from producing a Confirmed.
let resolved = if used_lang_slice {
resolve_benign_control_lang(payload, spec.expected_cap, spec.lang)
} else {
resolve_benign_control(payload, spec.expected_cap)
};
match resolved {
None => {
// Phase 05 OOB closure: OOB-nonce payloads with
// `benign_control = None` are structurally self-confirming
// when the listener observed the callback. A benign URL
// cannot hit a per-finding nonce, so the OOB observation is
// independent network-level evidence the sink fired. Skip
// the no-benign-control downgrade and emit
// [`DifferentialVerdict::ConfirmedProvenOob`].
if is_confirm_candidate && vr.oob_nonce_slot && vr.oob_callback_seen {
let mut outcome_record = differential::build_oob_self_confirmed_outcome(
payload.label,
&vr.vuln_probes,
);
middleware_demotion::apply_demotion(
&mut outcome_record,
spec.framework.as_ref(),
spec.lang,
);
// No paired benign control runs, so this candidate
// contributes only to the vuln side of the set.
vuln_fires.push(true);
candidates.push((vr.attempt_index, outcome_record));
} else if is_confirm_candidate {
no_benign_control = true;
}
// A partial candidate without a benign control cannot rule out
// "safe code that reaches the sink", so it raises no partial
// signal and falls through to `NotConfirmed`.
}
Some(benign) => {
let benign_bytes = materialise_bytes(benign, None)
.map(|b| b.into_owned())
.unwrap_or_default();
if let Some(ch) = &probe_channel {
let _ = ch.clear();
}
let benign_outcome = sandbox::run(&harness, &benign_bytes, &effective_opts)?;
let benign_sink_hit = benign_outcome.sink_hit;
let benign_probes: Vec<SinkProbe> = probe_channel
.as_ref()
.map(|ch| ch.drain())
.unwrap_or_default();
let benign_stub_events: Vec<StubEvent> = effective_opts
.stub_harness
.as_ref()
.map(|h| h.drain_all())
.unwrap_or_default();
let benign_fired = oracle_fired_with_stubs(
&benign.oracle,
&benign_outcome,
&benign_probes,
&benign_stub_events,
);
if is_confirm_candidate {
let mut outcome_record = differential::build_outcome(
payload.label,
vuln_fired,
&vuln_probes,
vr.vuln_fired,
&vr.vuln_probes,
benign.label,
benign_fired,
&benign_probes,
);
// Phase 05 OOB closure: when an OOB-nonce payload also
// carries a paired benign control, promote
// `Confirmed` → `ConfirmedProvenOob` whenever the
// listener observed the per-finding nonce. The
// upgrade preserves the differential trace (benign
// run still recorded) and surfaces the stronger
// network-level evidence to operators.
// carries a paired benign control, promote `Confirmed` →
// `ConfirmedProvenOob` whenever the listener observed the
// per-finding nonce. The upgrade preserves the differential
// trace (benign run still recorded) and surfaces the
// stronger network-level evidence to operators.
if outcome_record.verdict == DifferentialVerdict::Confirmed
&& payload.oob_nonce_slot
&& outcome.oob_callback_seen
&& vr.oob_nonce_slot
&& vr.oob_callback_seen
{
outcome_record.verdict = DifferentialVerdict::ConfirmedProvenOob;
}
@ -661,30 +758,68 @@ pub fn run_spec(spec: &HarnessSpec, opts: &SandboxOptions) -> Result<RunOutcome,
spec.framework.as_ref(),
spec.lang,
);
let confirmed =
middleware_demotion::is_triggering_verdict(outcome_record.verdict);
differential_outcome = Some(outcome_record);
confirmed
vuln_fires.push(vr.vuln_fired);
benign_fires.push(benign_fired);
candidates.push((vr.attempt_index, outcome_record));
} else {
// Partial candidate: the vuln run reached the sink without
// firing the oracle. It is a real engine gap only when the
// benign control neither reached the sink nor fired its
// oracle — i.e. the vuln input specifically drives the sink.
// If the benign control also reaches the sink, the code path
// is shared and safe (e.g. array-form `exec`), so no partial
// signal is raised and the run stays `NotConfirmed`.
if !benign_sink_hit && !benign_fired {
partial_signal = true;
}
}
}
} else if vuln_fired && !sink_hit {
// Oracle fired but probe didn't — likely collision.
oracle_collision = true;
false
}
}
// ── Phase 26 aggregation ────────────────────────────────────────────
// `evaluate_sets` collapses the firing sets to a single verdict: any
// vuln payload firing + no benign control firing → Confirmed; any
// benign firing anywhere → OracleCollisionSuspected (global ambient-
// noise veto). A ConfirmedProvenOob candidate is terminal positive
// evidence (a per-finding OOB nonce cannot be hit by ambient noise), so
// it confirms even if some unrelated payload's benign tripped a noisy
// oracle.
if !candidates.is_empty() {
let aggregate = differential::evaluate_sets(&vuln_fires, &benign_fires);
let has_proven_oob = candidates
.iter()
.any(|(_, r)| r.verdict == DifferentialVerdict::ConfirmedProvenOob);
let confirmed_class =
has_proven_oob || matches!(aggregate, DifferentialVerdict::Confirmed);
if confirmed_class {
// Representative outcome: prefer the strongest (ProvenOob), else
// the first candidate carrying a triggering verdict. Iteration
// follows payload order, so the choice is deterministic.
let chosen = candidates
.iter()
.find(|(_, r)| r.verdict == DifferentialVerdict::ConfirmedProvenOob)
.or_else(|| {
candidates
.iter()
.find(|(_, r)| middleware_demotion::is_triggering_verdict(r.verdict))
})
.cloned();
if let Some((idx, record)) = chosen {
attempts[idx].triggered = true;
triggered_by = Some(idx);
differential_outcome = Some(record);
}
} else {
false
};
attempts.push(Attempt {
payload_label: payload.label,
outcome,
oracle_fired: vuln_fired,
triggered,
});
if triggered {
triggered_by = Some(i);
break;
// Ambient-noise veto: at least one benign control fired and no
// terminal OOB evidence exists. Surface a colliding candidate
// so the verifier downgrades to
// `Inconclusive(OracleCollisionSuspected)`.
differential_outcome = candidates
.iter()
.find(|(_, r)| r.verdict == DifferentialVerdict::OracleCollisionSuspected)
.or_else(|| candidates.first())
.map(|(_, r)| r.clone());
}
}
@ -699,6 +834,7 @@ pub fn run_spec(spec: &HarnessSpec, opts: &SandboxOptions) -> Result<RunOutcome,
differential: differential_outcome,
no_benign_control,
unrelated_crash,
sink_reached_no_oracle: partial_signal,
})
}

View file

@ -305,7 +305,14 @@ impl SamplingPolicy {
/// Decide whether an event with the given status / spec_hash should be
/// written. Deterministic for a fixed `(self, status, spec_hash)`.
pub fn should_sample(&self, status: VerifyStatus, spec_hash: &str) -> bool {
if matches!(status, VerifyStatus::Confirmed) && self.keep_all_confirmed {
if matches!(
status,
VerifyStatus::Confirmed | VerifyStatus::PartiallyConfirmed
) && self.keep_all_confirmed
{
// PartiallyConfirmed is a low-volume, high-value triage signal
// (each is a candidate real engine gap), so it rides the same
// keep-all switch as Confirmed rather than being sampled away.
return true;
}
if matches!(status, VerifyStatus::Inconclusive) && self.keep_all_inconclusive {
@ -389,6 +396,7 @@ pub fn emit_with_policy(event: &TelemetryEvent, policy: &SamplingPolicy) {
fn parse_status(s: &str) -> Option<VerifyStatus> {
match s {
"Confirmed" => Some(VerifyStatus::Confirmed),
"PartiallyConfirmed" => Some(VerifyStatus::PartiallyConfirmed),
"NotConfirmed" => Some(VerifyStatus::NotConfirmed),
"Inconclusive" => Some(VerifyStatus::Inconclusive),
"Unsupported" => Some(VerifyStatus::Unsupported),

View file

@ -987,9 +987,16 @@ fn build_verdict(
if let Some(i) = run.triggered_by {
let triggered_payload = run.attempts[i].payload_label.to_string();
// Resolve repro bytes by label, not by index: OOB payloads
// skipped for lack of a listener leave `attempts` shorter
// than `vuln_payloads`, so a positional lookup can pull the
// wrong payload's bytes. The label is the stable key.
let payloads = payloads_for(spec.expected_cap);
let vuln_payloads: Vec<_> = payloads.iter().filter(|p| !p.is_benign).collect();
let payload_bytes = vuln_payloads.get(i).map(|p| p.bytes).unwrap_or(b"");
let payload_bytes = payloads
.iter()
.find(|p| !p.is_benign && p.label == triggered_payload)
.map(|p| p.bytes)
.unwrap_or(b"");
let hardening_outcome = summarize_hardening(&run.attempts[i].outcome);
// Emit repro artifact.
@ -1156,6 +1163,33 @@ fn build_verdict(
hardening_outcome: None,
},
}
} else if run.sink_reached_no_oracle {
// Phase 26: a vuln payload's in-harness sink-reachability
// probe fired but its oracle marker never did, and the run
// produced no Confirmed-class verdict and no colliding
// differential. The sink is reachable at runtime yet the
// exploit chain did not complete (no marker file written,
// no OOB callback observed, output lacked the proof token).
// Surface `PartiallyConfirmed` so engine work can ratchet on
// the real sink-reachability gap without overstating it as a
// confirmed exploit. No repro artifact is written: there is
// no proven exploit to reproduce.
VerifyResult {
finding_id: finding_id.to_owned(),
status: VerifyStatus::PartiallyConfirmed,
triggered_payload: None,
reason: None,
inconclusive_reason: None,
detail: Some(
"sink-reachability probe fired but the oracle marker was not observed; exploit chain did not complete".to_owned(),
),
attempts,
toolchain_match: Some(toolchain_match.to_owned()),
differential: None,
replay_stable: None,
wrong: None,
hardening_outcome: None,
}
} else if run.oracle_collision {
// Oracle fired but the sink-hit sentinel did not —
// legacy single-payload collision path, predates the
@ -1735,4 +1769,141 @@ mod tests {
"current corpus_version entry must be a cache hit"
);
}
fn partial_spec() -> HarnessSpec {
HarnessSpec {
finding_id: "deadbeefcafef00d".into(),
entry_file: "app.py".into(),
entry_name: "login".into(),
entry_kind: crate::dynamic::spec::EntryKind::Function,
lang: crate::symbol::Lang::Python,
toolchain_id: "python-3.11".into(),
payload_slot: crate::dynamic::spec::PayloadSlot::Param(0),
expected_cap: crate::labels::Cap::SQL_QUERY,
constraint_hints: vec![],
sink_file: "app.py".into(),
sink_line: 10,
spec_hash: "cafecafecafe0001".into(),
derivation: SpecDerivationStrategy::FromFlowSteps,
stubs_required: vec![],
framework: None,
java_toolchain: crate::dynamic::spec::JavaToolchain::default(),
}
}
/// Phase 26: a vuln payload whose sink-reachability probe fired but whose
/// oracle marker never did — and no Confirmed-class verdict, no
/// differential outcome, no benign-control gap — must surface as
/// `PartiallyConfirmed`, carry no `triggered_payload`, and write no repro.
#[test]
fn build_verdict_sink_reached_no_oracle_maps_to_partially_confirmed() {
use crate::dynamic::runner::{Attempt, RunOutcome};
use crate::dynamic::sandbox::SandboxOutcome;
let opts = VerifyOptions::from_config(&Config::default());
let run = RunOutcome {
spec: partial_spec(),
attempts: vec![Attempt {
payload_label: "sqli-tautology",
outcome: SandboxOutcome {
exit_code: Some(0),
stdout: b"__NYX_SINK_HIT__".to_vec(),
stderr: Vec::new(),
timed_out: false,
oob_callback_seen: false,
sink_hit: true,
duration: std::time::Duration::ZERO,
hardening_outcome: None,
},
oracle_fired: false,
triggered: false,
}],
triggered_by: None,
oracle_collision: false,
sink_reached_no_oracle: true,
build_attempts: 1,
harness_source: String::new(),
entry_source: String::new(),
differential: None,
no_benign_control: false,
unrelated_crash: false,
};
let verdict = build_verdict(
"deadbeefcafef00d",
&partial_spec(),
Ok(run),
"exact",
&opts,
std::time::Duration::ZERO,
);
assert_eq!(verdict.status, VerifyStatus::PartiallyConfirmed);
assert!(
verdict.triggered_payload.is_none(),
"PartiallyConfirmed must not claim a triggering payload"
);
assert!(
verdict
.detail
.as_deref()
.unwrap_or_default()
.contains("sink-reachability probe fired"),
"detail must explain the sink reached but the chain did not complete: {:?}",
verdict.detail
);
// The sink-hit attempt must survive into the surfaced attempt list.
assert_eq!(verdict.attempts.len(), 1);
assert!(verdict.attempts[0].sink_hit);
assert!(!verdict.attempts[0].triggered);
}
/// Regression guard: a clean run (no sink hit, no oracle) must stay
/// `NotConfirmed` — the `PartiallyConfirmed` branch must not swallow the
/// ordinary negative case.
#[test]
fn build_verdict_clean_run_stays_not_confirmed() {
use crate::dynamic::runner::{Attempt, RunOutcome};
use crate::dynamic::sandbox::SandboxOutcome;
let opts = VerifyOptions::from_config(&Config::default());
let run = RunOutcome {
spec: partial_spec(),
attempts: vec![Attempt {
payload_label: "sqli-tautology",
outcome: SandboxOutcome {
exit_code: Some(0),
stdout: Vec::new(),
stderr: Vec::new(),
timed_out: false,
oob_callback_seen: false,
sink_hit: false,
duration: std::time::Duration::ZERO,
hardening_outcome: None,
},
oracle_fired: false,
triggered: false,
}],
triggered_by: None,
oracle_collision: false,
sink_reached_no_oracle: false,
build_attempts: 1,
harness_source: String::new(),
entry_source: String::new(),
differential: None,
no_benign_control: false,
unrelated_crash: false,
};
let verdict = build_verdict(
"deadbeefcafef00d",
&partial_spec(),
Ok(run),
"exact",
&opts,
std::time::Duration::ZERO,
);
assert_eq!(verdict.status, VerifyStatus::NotConfirmed);
}
}

View file

@ -727,6 +727,14 @@ pub enum VerifyStatus {
/// Sink fired with at least one payload. The static finding is exploitable
/// against the live target.
Confirmed,
/// The in-harness sink-reachability probe fired (sink reached) but the
/// oracle marker was never observed (no file write / no OOB callback /
/// output did not contain the proof token), so the exploit chain did not
/// complete. Semantically `{ sink_reached: true, exit_propagated: false }`.
/// Ranks above `NotConfirmed` (runtime corroboration that the sink is
/// reachable) but below `Confirmed` (no proven exploit). Used so engine
/// work can ratchet on real sink-reachability gaps without overstating.
PartiallyConfirmed,
/// All payloads ran cleanly. Either the path is infeasible at runtime
/// or the corpus is too narrow. Treat as "static-only", not "false positive".
NotConfirmed,

View file

@ -558,6 +558,7 @@ fn format_dynamic_verdict_annotation(dv: &crate::evidence::VerifyResult) -> Stri
let pid = dv.triggered_payload.as_deref().unwrap_or("unknown");
format!("[DYN: confirmed via {pid}]")
}
VerifyStatus::PartiallyConfirmed => "[DYN: partially confirmed (sink reached)]".to_string(),
VerifyStatus::NotConfirmed => "[DYN: not confirmed]".to_string(),
VerifyStatus::Unsupported => {
let reason = dv

View file

@ -258,6 +258,12 @@ fn dynamic_verdict_delta(diag: &Diag) -> Option<f64> {
let dv = diag.evidence.as_ref()?.dynamic_verdict.as_ref()?;
match dv.status {
VerifyStatus::Confirmed => Some(20.0),
// PartiallyConfirmed: the sink was reached at runtime but the
// exploit chain did not complete. Runtime corroboration that the
// sink is reachable is a positive signal, but weaker than a proven
// exploit, so it earns a modest bump rather than the full Confirmed
// boost.
VerifyStatus::PartiallyConfirmed => Some(8.0),
// Apply penalty only when the corpus was actually exhausted (attempts
// were made); a NotConfirmed with zero attempts means something went
// wrong before payload execution, which is an Inconclusive path, not

View file

@ -293,6 +293,7 @@ fn status_for_diag(d: &Diag) -> &'static str {
pub fn dynamic_status_label(status: VerifyStatus) -> &'static str {
match status {
VerifyStatus::Confirmed => "Confirmed",
VerifyStatus::PartiallyConfirmed => "PartiallyConfirmed",
VerifyStatus::NotConfirmed => "NotConfirmed",
VerifyStatus::Inconclusive => "Inconclusive",
VerifyStatus::Unsupported => "Unsupported",

View file

@ -76,6 +76,28 @@ fn diag_with_verdict(status: VerifyStatus) -> Diag {
wrong: None,
hardening_outcome: None,
},
VerifyStatus::PartiallyConfirmed => VerifyResult {
finding_id: "abc123".into(),
status,
triggered_payload: None,
reason: None,
inconclusive_reason: None,
detail: Some(
"sink-reachability probe fired but the oracle marker was not observed; exploit chain did not complete".into(),
),
attempts: vec![AttemptSummary {
payload_label: "sqli-tautology".into(),
exit_code: Some(0),
timed_out: false,
triggered: false,
sink_hit: true,
}],
toolchain_match: Some("exact".into()),
differential: None,
replay_stable: None,
wrong: None,
hardening_outcome: None,
},
VerifyStatus::NotConfirmed => VerifyResult {
finding_id: "abc123".into(),
status,
@ -158,6 +180,17 @@ fn console_not_confirmed_shows_annotation() {
);
}
#[test]
fn console_partially_confirmed_shows_sink_reached() {
let diag = diag_with_verdict(VerifyStatus::PartiallyConfirmed);
let output = render_console(&[diag], "proj", None, &[]);
let stripped = strip_ansi(&output);
assert!(
stripped.contains("[DYN: partially confirmed (sink reached)]"),
"expected DYN partially-confirmed annotation, got:\n{stripped}"
);
}
#[test]
fn console_unsupported_shows_reason() {
let diag = diag_with_verdict(VerifyStatus::Unsupported);

View file

@ -78,6 +78,7 @@ def load_previous_agg(path: str) -> dict:
"fn": 0,
"unsupported": 0,
"confirmed": 0,
"partially_confirmed": 0,
"wrong_confirmed": 0,
"stable_replays": 0,
"total": 0,
@ -92,6 +93,7 @@ def load_previous_agg(path: str) -> dict:
"fn",
"unsupported",
"confirmed",
"partially_confirmed",
"wrong_confirmed",
"stable_replays",
"total",
@ -139,6 +141,7 @@ def main() -> int:
"fn": 0,
"unsupported": 0,
"confirmed": 0,
"partially_confirmed": 0,
"wrong_confirmed": 0,
"stable_replays": 0,
"total": 0,
@ -153,6 +156,7 @@ def main() -> int:
"fn",
"unsupported",
"confirmed",
"partially_confirmed",
"wrong_confirmed",
"stable_replays",
"total",
@ -160,17 +164,22 @@ def main() -> int:
agg[k][field] += c.get(field, 0)
print("\n=== Aggregated eval corpus report ===")
print(f"{'Cap':<20} {'Lang':<12} {'TP':>5} {'FP':>5} {'FN':>5} {'Prec':>6} {'Rec':>6} {'Unsup%':>7}")
print("-" * 72)
print(
f"{'Cap':<20} {'Lang':<12} {'TP':>5} {'FP':>5} {'FN':>5} "
f"{'Prec':>6} {'Rec':>6} {'Unsup%':>7} {'Conf%':>7} {'Part%':>7}"
)
print("-" * 88)
for k, v in sorted(agg.items()):
prec = v["tp"] / max(v["tp"] + v["fp"], 1)
rec = v["tp"] / max(v["tp"] + v["fn"], 1)
unsup = v["unsupported"] / max(v["total"], 1)
conf = v["confirmed"] / max(v["total"], 1)
part = v["partially_confirmed"] / max(v["total"], 1)
print(
f"{k[0]:<20} {k[1]:<12} "
f"{v['tp']:>5} {v['fp']:>5} {v['fn']:>5} "
f"{prec:>6.2f} {rec:>6.2f} "
f"{unsup*100:>6.1f}%"
f"{unsup*100:>6.1f}% {conf*100:>6.1f}% {part*100:>6.1f}%"
)
gate_failed = False

View file

@ -387,7 +387,7 @@ def main() -> int:
break
# Per-cell tallies: {(cap, lang): {tp, fp, fn, unsupported, confirmed,
# wrong_confirmed, stable_replays, total}}
# partially_confirmed, wrong_confirmed, stable_replays, total}}
cells: dict[tuple[str, str], dict] = defaultdict(
lambda: {
"tp": 0,
@ -395,6 +395,7 @@ def main() -> int:
"fn": 0,
"unsupported": 0,
"confirmed": 0,
"partially_confirmed": 0,
"wrong_confirmed": 0,
"stable_replays": 0,
"total": 0,
@ -412,6 +413,8 @@ def main() -> int:
status = dv.get("status")
if status == "Unsupported":
cells[key]["unsupported"] += 1
elif status == "PartiallyConfirmed":
cells[key]["partially_confirmed"] += 1
elif status == "Confirmed":
cells[key]["confirmed"] += 1
# Repro-stability and false-Confirmed counts are optional

View file

@ -235,9 +235,10 @@ fn sarif_confirmed_verdict_nyx_dynamic_verdict_contains_triggered_payload() {
}
#[test]
fn sarif_all_four_statuses_produce_partial_fingerprint() {
fn sarif_all_statuses_produce_partial_fingerprint() {
let statuses = [
(VerifyStatus::Confirmed, "Confirmed"),
(VerifyStatus::PartiallyConfirmed, "PartiallyConfirmed"),
(VerifyStatus::NotConfirmed, "NotConfirmed"),
(VerifyStatus::Unsupported, "Unsupported"),
(VerifyStatus::Inconclusive, "Inconclusive"),