feat(dynamic): add PartiallyConfirmed status for finer-grained sink-reachability categorization, update dynamic verification, telemetry, and reporting systems

2026-06-09 19:45:13 +02:00 · 2026-05-29 14:35:39 -05:00 · 2026-05-29 14:35:39 -05:00 · c0501884ae
commit c0501884ae
parent 635b213825
23 changed files with 658 additions and 142 deletions
--- a/Cargo.toml
+++ b/Cargo.toml
@ -6,9 +6,9 @@ rust-version = "1.88"
 description = "A multi-language static analysis tool for detecting security vulnerabilities"
 license = "GPL-3.0-or-later"
 authors = ["Eli Peter <elicpeter@example.com>"]
-homepage = "https://github.com/elicpeter/nyx"
+homepage = "https://nyxsec.dev/scanner"
 repository = "https://github.com/elicpeter/nyx"
-documentation = "https://elicpeter.github.io/nyx/"
+documentation = "https://nyxsec.dev/docs/nyx/"
 keywords = ["security", "vulnerability", "scanner", "static-analysis", "cli"]
 categories = ["security", "command-line-utilities", "development-tools", "parser-implementations", "text-processing"]
 readme = "README.md"
--- a/frontend/src/api/types.ts
+++ b/frontend/src/api/types.ts
@ -3,7 +3,12 @@ export type Confidence = 'Low' | 'Medium' | 'High';
 export type FlowStepKind = 'source' | 'assignment' | 'call' | 'phi' | 'sink';

 // Dynamic verification types (from src/evidence.rs VerifyStatus / VerifyResult)
-export type VerifyStatus = 'Confirmed' | 'NotConfirmed' | 'Inconclusive' | 'Unsupported';
+export type VerifyStatus =
+  | 'Confirmed'
+  | 'PartiallyConfirmed'
+  | 'NotConfirmed'
+  | 'Inconclusive'
+  | 'Unsupported';

 export interface AttemptSummary {
  payload_label: string;
@ -29,6 +34,7 @@ export interface VerifyResult {
 export interface DynamicVerificationSummary {
  total: number;
  confirmed: number;
+  partially_confirmed: number;
  not_confirmed: number;
  inconclusive: number;
  unsupported: number;
--- a/frontend/src/components/VerdictBadge.tsx
+++ b/frontend/src/components/VerdictBadge.tsx
@ -2,6 +2,7 @@ import type { VerifyResult, VerifyStatus } from '../api/types';

 const STATUS_LABELS: Record<VerifyStatus, string> = {
  Confirmed: 'Confirmed',
+  PartiallyConfirmed: 'Partially confirmed',
  NotConfirmed: 'Not confirmed',
  Inconclusive: 'Inconclusive',
  Unsupported: 'Unsupported',
@ -15,6 +16,10 @@ function verdictTooltip(verdict: VerifyResult): string {
      return triggered_payload
        ? `Confirmed via payload: ${triggered_payload}`
        : 'Dynamically confirmed exploitable';
+    case 'PartiallyConfirmed':
+      return detail
+        ? `Partially confirmed (sink reached): ${detail}`
+        : 'Partially confirmed: sink reached but exploit chain did not complete';
    case 'NotConfirmed':
      return (verdict.attempts?.length ?? 0) > 0
        ? `Not confirmed after ${verdict.attempts?.length ?? 0} payload attempt(s)`
--- a/frontend/src/components/overview/OverviewWidgets.tsx
+++ b/frontend/src/components/overview/OverviewWidgets.tsx
@ -244,13 +244,14 @@ export function ScannerQualityPanel({
  const dynamic = quality.dynamic_verification ?? {
    total: 0,
    confirmed: 0,
+    partially_confirmed: 0,
    not_confirmed: 0,
    inconclusive: 0,
    unsupported: 0,
  };
  const dynamicDetail =
    dynamic.total > 0
-      ? `${dynamic.total.toLocaleString()} verdicts · ${dynamic.not_confirmed.toLocaleString()} not confirmed · ${dynamic.inconclusive.toLocaleString()} inconclusive · ${dynamic.unsupported.toLocaleString()} unsupported`
+      ? `${dynamic.total.toLocaleString()} verdicts · ${dynamic.partially_confirmed.toLocaleString()} partially confirmed · ${dynamic.not_confirmed.toLocaleString()} not confirmed · ${dynamic.inconclusive.toLocaleString()} inconclusive · ${dynamic.unsupported.toLocaleString()} unsupported`
      : 'no dynamic verdicts in latest scan';

  const rows: Array<{
--- a/frontend/src/pages/FindingsPage.tsx
+++ b/frontend/src/pages/FindingsPage.tsx
@ -31,6 +31,7 @@ function formatTriageState(state: string): string {

 function formatVerificationStatus(status: string): string {
  if (status === 'NotConfirmed') return 'Not confirmed';
+  if (status === 'PartiallyConfirmed') return 'Partially confirmed';
  return status || 'Unverified';
 }

--- a/frontend/src/styles/global.css
+++ b/frontend/src/styles/global.css
@ -2668,6 +2668,10 @@ tr.selected td {
  background: var(--success-bg);
  color: var(--success);
 }
+.badge-dyn-partiallyconfirmed {
+  background: var(--conf-medium-bg);
+  color: var(--conf-medium);
+}
 .badge-dyn-notconfirmed {
  background: var(--bg-secondary);
  color: var(--text-secondary);
--- a/frontend/src/test/components/dynamicVerdictSection.test.tsx
+++ b/frontend/src/test/components/dynamicVerdictSection.test.tsx
@ -43,6 +43,19 @@ describe('DynamicVerdictSection', () => {
    ).toBeInTheDocument();
  });

+  it('renders PartiallyConfirmed badge', () => {
+    render(
+      <DynamicVerdictSection
+        verdict={makeVerdict('PartiallyConfirmed', {
+          detail: 'sink reached but exploit chain did not complete',
+        })}
+      />,
+    );
+    expect(
+      screen.getByTestId('verdict-badge-partiallyconfirmed'),
+    ).toBeInTheDocument();
+  });
+
  it('does not crash when the API omits an empty attempts array', () => {
    render(
      <DynamicVerdictSection
@ -82,6 +95,7 @@ describe('DynamicVerdictSection', () => {
    unmount();

    for (const status of [
+      'PartiallyConfirmed',
      'NotConfirmed',
      'Unsupported',
      'Inconclusive',
--- a/frontend/src/test/components/verdictBadge.test.tsx
+++ b/frontend/src/test/components/verdictBadge.test.tsx
@ -35,6 +35,21 @@ describe('VerdictBadge', () => {
    expect(badge.textContent).toContain('🔥');
  });

+  it('renders PartiallyConfirmed badge with amber class and no flame', () => {
+    render(
+      <VerdictBadge
+        verdict={makeVerdict('PartiallyConfirmed', {
+          detail: 'sink-reachability probe fired but the oracle marker was not observed',
+        })}
+      />,
+    );
+    const badge = screen.getByTestId('verdict-badge-partiallyconfirmed');
+    expect(badge).toBeInTheDocument();
+    expect(badge.className).toContain('badge-dyn-partiallyconfirmed');
+    expect(badge.textContent).not.toContain('🔥');
+    expect(badge.getAttribute('title')).toContain('sink reached');
+  });
+
  it('renders NotConfirmed badge with correct class', () => {
    render(<VerdictBadge verdict={makeVerdict('NotConfirmed')} />);
    const badge = screen.getByTestId('verdict-badge-notconfirmed');
@ -107,9 +122,10 @@ describe('VerdictBadge', () => {
    expect(badge.textContent?.replace('🔥 ', '')).toBe('C');
  });

-  it('renders all four VerifyStatus variants without crashing', () => {
+  it('renders all five VerifyStatus variants without crashing', () => {
    const statuses: VerifyResult['status'][] = [
      'Confirmed',
+      'PartiallyConfirmed',
      'NotConfirmed',
      'Unsupported',
      'Inconclusive',
--- a/src/baseline.rs
+++ b/src/baseline.rs
@ -308,6 +308,10 @@ pub fn check_gate(diff: &VerdictDiff, gate: &str) -> bool {
                && matches!(
                    e.current_status,
                    Some(VerifyStatus::Confirmed)
+                        // PartiallyConfirmed = sink still reachable at
+                        // runtime, so a baseline-Confirmed finding that is
+                        // now partial has NOT been resolved.
+                        | Some(VerifyStatus::PartiallyConfirmed)
                        | Some(VerifyStatus::Inconclusive)
                        | Some(VerifyStatus::Unsupported)
                )
@ -323,6 +327,7 @@ pub fn check_gate(diff: &VerdictDiff, gate: &str) -> bool {
 fn status_str(s: Option<VerifyStatus>) -> &'static str {
    match s {
        Some(VerifyStatus::Confirmed) => "Confirmed",
+        Some(VerifyStatus::PartiallyConfirmed) => "PartiallyConfirmed",
        Some(VerifyStatus::NotConfirmed) => "NotConfirmed",
        Some(VerifyStatus::Inconclusive) => "Inconclusive",
        Some(VerifyStatus::Unsupported) => "Unsupported",
--- a/src/chain/feasibility.rs
+++ b/src/chain/feasibility.rs
@ -37,8 +37,11 @@ pub enum Feasibility {
    /// but where the static evidence is strong.
    InconclusiveHighConf,
    /// Everything else — no dynamic verification, dynamic verdict was
-    /// `NotConfirmed`/`Unsupported`, or dynamic was `Inconclusive` but
-    /// static confidence is not `High`.
+    /// `NotConfirmed`/`PartiallyConfirmed`/`Unsupported`, or dynamic was
+    /// `Inconclusive` but static confidence is not `High`.  A
+    /// `PartiallyConfirmed` verdict proves only that the sink is reachable,
+    /// not that the exploit chain completes, so it stays conservative here:
+    /// it must not inflate a multi-hop path score.
    Unverified,
 }

--- a/src/commands/scan.rs
+++ b/src/commands/scan.rs
@ -242,6 +242,7 @@ pub fn compute_stable_hash(diag: &Diag) -> u64 {
 pub struct DynamicVerificationSummary {
    pub total: usize,
    pub confirmed: usize,
+    pub partially_confirmed: usize,
    pub not_confirmed: usize,
    pub inconclusive: usize,
    pub unsupported: usize,
@ -261,6 +262,9 @@ impl DynamicVerificationSummary {
            summary.total += 1;
            match verdict.status {
                crate::evidence::VerifyStatus::Confirmed => summary.confirmed += 1,
+                crate::evidence::VerifyStatus::PartiallyConfirmed => {
+                    summary.partially_confirmed += 1
+                }
                crate::evidence::VerifyStatus::NotConfirmed => summary.not_confirmed += 1,
                crate::evidence::VerifyStatus::Inconclusive => summary.inconclusive += 1,
                crate::evidence::VerifyStatus::Unsupported => summary.unsupported += 1,
@ -282,10 +286,11 @@ pub fn format_dynamic_verification_summary(summary: &DynamicVerificationSummary)
        "verdicts"
    };
    format!(
-        "{} {} ({} confirmed, {} not confirmed, {} inconclusive, {} unsupported)",
+        "{} {} ({} confirmed, {} partially confirmed, {} not confirmed, {} inconclusive, {} unsupported)",
        summary.total,
        noun,
        summary.confirmed,
+        summary.partially_confirmed,
        summary.not_confirmed,
        summary.inconclusive,
        summary.unsupported
--- a/src/dynamic/differential.rs
+++ b/src/dynamic/differential.rs
@ -1,19 +1,23 @@
-//! Differential confirmation rule for dynamic verification (Phase 07).
+//! Differential confirmation rule for dynamic verification (Phase 07 / 26).
 //!
-//! `Confirmed` requires the vulnerable payload's oracle to fire **and**
-//! the paired benign control's oracle to *not* fire (§4.1).  This module
-//! is the single source of truth for that rule.  Everything else (runner,
-//! verifier, tests) collapses to "look up paired benign + call
-//! [`evaluate`]".
+//! `Confirmed` requires **at least one** vulnerable payload's oracle to
+//! fire **and every** paired benign control's oracle to *not* fire
+//! (§4.1, extended for multi-payload aggregation in Phase 26).  This
+//! module is the single source of truth for that rule.  Everything else
+//! (runner, verifier, tests) collapses to "collect firing sets + call
+//! [`evaluate_sets`]".
 //!
-//! # Rule table
+//! # Rule table (set aggregation)
 //!
-//! | vuln fires | benign fires | verdict                       |
-//! |------------|--------------|-------------------------------|
-//! | true       | false        | `Confirmed`                    |
-//! | true       | true         | `OracleCollisionSuspected`     |
-//! | false      | false        | `NotConfirmed`                 |
-//! | false      | true         | `ReversedDifferential`         |
+//! | any vuln fires | any benign fires | verdict                    |
+//! |----------------|------------------|----------------------------|
+//! | true           | false            | `Confirmed`                 |
+//! | true           | true             | `OracleCollisionSuspected`  |
+//! | false          | false            | `NotConfirmed`              |
+//! | false          | true             | `ReversedDifferential`      |
+//!
+//! The scalar [`evaluate`] is the single-payload, single-control
+//! specialisation of [`evaluate_sets`] and delegates to it.
 //!
 //! "Fires" means [`crate::dynamic::oracle::oracle_fired`] returned `true`
 //! against the run's [`SandboxOutcome`] + drained [`SinkProbe`] set —
@ -24,8 +28,33 @@ use crate::evidence::{
    DifferentialOutcome, DifferentialProbeArg, DifferentialProbeRecord, DifferentialVerdict,
 };

-/// Apply the differential confirmation rule.
+/// Apply the differential confirmation rule over **sets** of firing
+/// results (Phase 26 multi-payload aggregation).
 ///
+/// `vuln_fired` is one boolean per vulnerable payload attempt;
+/// `benign_fired` is one boolean per paired benign control that actually
+/// ran.  Aggregation is "any vuln vs any benign" with global ambient-noise
+/// scoring across the run: a *single* benign control firing anywhere
+/// vetoes `Confirmed` (the oracle cannot discriminate), and a *single*
+/// vulnerable payload firing is enough positive evidence.
+///
+/// Empty slices behave as "nothing fired" on that side, so
+/// `evaluate_sets(&[], &[])` is `NotConfirmed`.
+pub fn evaluate_sets(vuln_fired: &[bool], benign_fired: &[bool]) -> DifferentialVerdict {
+    let any_vuln = vuln_fired.iter().any(|&b| b);
+    let any_benign = benign_fired.iter().any(|&b| b);
+    match (any_vuln, any_benign) {
+        (true, false) => DifferentialVerdict::Confirmed,
+        (true, true) => DifferentialVerdict::OracleCollisionSuspected,
+        (false, false) => DifferentialVerdict::NotConfirmed,
+        (false, true) => DifferentialVerdict::ReversedDifferential,
+    }
+}
+
+/// Apply the differential confirmation rule to a single
+/// (vulnerable, benign-control) pair.
+///
+/// Single-element specialisation of [`evaluate_sets`].
 /// `vuln_probe_fires` and `benign_probe_fires` are the boolean firing
 /// results of [`crate::dynamic::oracle::oracle_fired`] for the
 /// vulnerable payload and its paired benign control respectively.  The
@ -33,12 +62,7 @@ use crate::evidence::{
 /// callers attach those separately via [`DifferentialOutcome`] for
 /// forensic display.
 pub fn evaluate(vuln_probe_fires: bool, benign_probe_fires: bool) -> DifferentialVerdict {
-    match (vuln_probe_fires, benign_probe_fires) {
-        (true, false) => DifferentialVerdict::Confirmed,
-        (true, true) => DifferentialVerdict::OracleCollisionSuspected,
-        (false, false) => DifferentialVerdict::NotConfirmed,
-        (false, true) => DifferentialVerdict::ReversedDifferential,
-    }
+    evaluate_sets(&[vuln_probe_fires], &[benign_probe_fires])
 }

 /// Build a [`DifferentialOutcome`] for inclusion in a
@ -139,6 +163,61 @@ mod tests {
        );
    }

+    #[test]
+    fn sets_any_vuln_no_benign_is_confirmed() {
+        // One of several vuln payloads firing is enough; no benign fired.
+        assert_eq!(
+            evaluate_sets(&[false, true, false], &[false, false]),
+            DifferentialVerdict::Confirmed
+        );
+    }
+
+    #[test]
+    fn sets_one_benign_firing_vetoes_confirmed() {
+        // A single benign control firing anywhere downgrades to collision,
+        // even when a vuln payload also fired (global ambient-noise veto).
+        assert_eq!(
+            evaluate_sets(&[true, true], &[false, true, false]),
+            DifferentialVerdict::OracleCollisionSuspected
+        );
+    }
+
+    #[test]
+    fn sets_no_vuln_no_benign_is_not_confirmed() {
+        assert_eq!(
+            evaluate_sets(&[false, false], &[false]),
+            DifferentialVerdict::NotConfirmed
+        );
+    }
+
+    #[test]
+    fn sets_no_vuln_some_benign_is_reversed() {
+        assert_eq!(
+            evaluate_sets(&[false], &[true]),
+            DifferentialVerdict::ReversedDifferential
+        );
+    }
+
+    #[test]
+    fn sets_empty_is_not_confirmed() {
+        assert_eq!(evaluate_sets(&[], &[]), DifferentialVerdict::NotConfirmed);
+    }
+
+    #[test]
+    fn sets_empty_benign_with_vuln_is_confirmed() {
+        // No benign control ran at all → no veto possible → Confirmed.
+        assert_eq!(evaluate_sets(&[true], &[]), DifferentialVerdict::Confirmed);
+    }
+
+    #[test]
+    fn scalar_evaluate_matches_singleton_sets() {
+        for &v in &[false, true] {
+            for &b in &[false, true] {
+                assert_eq!(evaluate(v, b), evaluate_sets(&[v], &[b]));
+            }
+        }
+    }
+
    #[test]
    fn oob_self_confirmed_outcome_carries_only_vuln_trace() {
        use crate::dynamic::probe::{ProbeArg, ProbeKind, ProbeWitness, SinkProbe};
--- a/src/dynamic/runner.rs
+++ b/src/dynamic/runner.rs
@ -59,10 +59,29 @@ const MAX_BUILD_ATTEMPTS: u32 = 2;
 pub struct RunOutcome {
    pub spec: HarnessSpec,
    pub attempts: Vec<Attempt>,
-    /// First attempt that fired the sink with `oracle_fired && sink_hit`.
+    /// Index into [`Self::attempts`] of the attempt the confirm verdict is
+    /// attributed to.  Set by the Phase 26 set aggregation when
+    /// [`crate::dynamic::differential::evaluate_sets`] returns a
+    /// Confirmed-class verdict (any vuln payload fired the oracle + sink
+    /// while every paired benign control stayed clean), or when an
+    /// OOB-nonce payload self-confirmed.  `None` otherwise.
    pub triggered_by: Option<usize>,
    /// Whether the oracle fired but the sink probe did not (oracle collision).
    pub oracle_collision: bool,
+    /// Phase 26: a vuln payload's in-harness sink-reachability probe fired
+    /// (`outcome.sink_hit`) but its oracle marker was never observed (no file
+    /// write / no OOB callback / output lacked the proof token), *and* the
+    /// paired benign control neither reached the sink nor fired its oracle.
+    /// The benign-control differential is the discriminator: it proves the
+    /// vuln input specifically drives the sink, ruling out safe code that
+    /// merely reaches the sink (e.g. array-form `exec` with inert
+    /// metacharacters, which the benign control also reaches).  The verifier
+    /// maps this to [`crate::evidence::VerifyStatus::PartiallyConfirmed`]: the
+    /// sink is reachable under the vuln input but the exploit chain did not
+    /// complete.  Never set when a Confirmed-class verdict or a colliding
+    /// differential was produced (those take precedence at the verify
+    /// boundary).
+    pub sink_reached_no_oracle: bool,
    /// Number of build attempts consumed.
    pub build_attempts: u32,
    /// Harness sources for repro artifacts.
@ -454,6 +473,24 @@ pub fn run_spec(spec: &HarnessSpec, opts: &SandboxOptions) -> Result<RunOutcome,
    let mut unrelated_crash = false;
    let mut differential_outcome: Option<DifferentialOutcome> = None;

+    // Phase 26 set aggregation, phase A: per-vuln-payload run record.
+    // Every vuln payload runs to completion (no early break) so the
+    // differential rule can aggregate across the whole set — a single
+    // benign control firing anywhere must be able to veto a `Confirmed`.
+    struct VulnRun {
+        /// Index into `vuln_payloads` (for benign-control resolution).
+        payload_index: usize,
+        /// Index into `attempts` (what `triggered_by` points at).
+        attempt_index: usize,
+        vuln_fired: bool,
+        sink_hit: bool,
+        oob_nonce_slot: bool,
+        oob_callback_seen: bool,
+        vuln_probes: Vec<SinkProbe>,
+    }
+    let mut vuln_runs: Vec<VulnRun> = Vec::with_capacity(vuln_payloads.len());
+
+    // ── Phase A: run every vuln payload, record its firing signals ──────
    for (i, payload) in vuln_payloads.iter().enumerate() {
        // Materialise payload bytes (OOB nonce-slot payloads generate a URL).
        let (oob_nonce, effective_bytes) = if payload.oob_nonce_slot {
@ -480,11 +517,12 @@ pub fn run_spec(spec: &HarnessSpec, opts: &SandboxOptions) -> Result<RunOutcome,
            let _ = ch.clear();
        }

+        let attempt_index = attempts.len();
        trace_record(
            trace_handle.as_ref(),
            TraceStage::SandboxStarted,
            Some(format!(
-                "attempt={i} payload={} oracle={}",
+                "attempt={attempt_index} payload={} oracle={}",
                payload.label,
                oracle_short_name(&payload.oracle)
            )),
@ -495,7 +533,7 @@ pub fn run_spec(spec: &HarnessSpec, opts: &SandboxOptions) -> Result<RunOutcome,
            trace_handle.as_ref(),
            TraceStage::OracleWait,
            Some(format!(
-                "attempt={i} exit_code={:?} timed_out={}",
+                "attempt={attempt_index} exit_code={:?} timed_out={}",
                outcome.exit_code, outcome.timed_out
            )),
        );
@ -508,9 +546,9 @@ pub fn run_spec(spec: &HarnessSpec, opts: &SandboxOptions) -> Result<RunOutcome,
        // failure — the harness "linked" against deps that don't resolve at
        // run time — so route through `RunError::BuildFailed` to keep the
        // SKIP-on-BuildFailed branch in the e2e corpus tests honest.  Only
-        // checked on the first vuln payload because the missing dep won't
-        // appear later in the run.
-        if i == 0 && is_runtime_import_error(&outcome) {
+        // checked on the first actually-run payload because the missing dep
+        // won't appear later in the run.
+        if attempts.is_empty() && is_runtime_import_error(&outcome) {
            return Err(RunError::BuildFailed {
                stderr: String::from_utf8_lossy(&outcome.stderr).into_owned(),
                attempts: build_attempts,
@ -546,7 +584,7 @@ pub fn run_spec(spec: &HarnessSpec, opts: &SandboxOptions) -> Result<RunOutcome,
            trace_handle.as_ref(),
            TraceStage::OracleObserved,
            Some(format!(
-                "attempt={i} fired={vuln_fired} sink_hit={sink_hit}"
+                "attempt={attempt_index} fired={vuln_fired} sink_hit={sink_hit}"
            )),
        );

@ -566,93 +604,152 @@ pub fn run_spec(spec: &HarnessSpec, opts: &SandboxOptions) -> Result<RunOutcome,
            unrelated_crash = true;
        }

-        // Differential rule (Phase 07, §4.1).  Only when the vuln oracle
-        // fired *and* the in-harness sink-hit sentinel was observed do we
-        // consult the paired benign control.  Oracle-fires-without-sink
-        // stays on the legacy `oracle_collision` path so the existing
-        // `Inconclusive(OracleCollisionSuspected)` semantics survive.
-        let triggered = if vuln_fired && sink_hit {
-            // Match the resolution scope to the payload-slice scope so a
-            // benign control declared in another language is still found
-            // when this run was driven off the lang-agnostic union (see
-            // `used_lang_slice` above).  When the run did use the
-            // per-language slice, the lang-aware resolver keeps a
-            // mismatched language from silently producing a Confirmed.
-            let resolved = if used_lang_slice {
-                resolve_benign_control_lang(payload, spec.expected_cap, spec.lang)
-            } else {
-                resolve_benign_control(payload, spec.expected_cap)
-            };
-            match resolved {
-                None => {
-                    // Phase 05 OOB closure: OOB-nonce payloads with
-                    // `benign_control = None` are structurally self-
-                    // confirming when the listener observed the callback.
-                    // A benign URL cannot hit a per-finding nonce, so the
-                    // OOB observation is independent network-level
-                    // evidence the sink fired.  Skip the no-benign-control
-                    // downgrade and emit
-                    // [`DifferentialVerdict::ConfirmedProvenOob`].
-                    if payload.oob_nonce_slot && outcome.oob_callback_seen {
-                        let mut outcome_record = differential::build_oob_self_confirmed_outcome(
-                            payload.label,
-                            &vuln_probes,
-                        );
-                        middleware_demotion::apply_demotion(
-                            &mut outcome_record,
-                            spec.framework.as_ref(),
-                            spec.lang,
-                        );
-                        let confirmed =
-                            middleware_demotion::is_triggering_verdict(outcome_record.verdict);
-                        differential_outcome = Some(outcome_record);
-                        confirmed
-                    } else {
-                        no_benign_control = true;
-                        false
-                    }
-                }
-                Some(benign) => {
-                    let benign_bytes = materialise_bytes(benign, None)
-                        .map(|b| b.into_owned())
-                        .unwrap_or_default();
-                    if let Some(ch) = &probe_channel {
-                        let _ = ch.clear();
-                    }
-                    let benign_outcome = sandbox::run(&harness, &benign_bytes, &effective_opts)?;
-                    let benign_probes: Vec<SinkProbe> = probe_channel
-                        .as_ref()
-                        .map(|ch| ch.drain())
-                        .unwrap_or_default();
-                    let benign_stub_events: Vec<StubEvent> = effective_opts
-                        .stub_harness
-                        .as_ref()
-                        .map(|h| h.drain_all())
-                        .unwrap_or_default();
-                    let benign_fired = oracle_fired_with_stubs(
-                        &benign.oracle,
-                        &benign_outcome,
-                        &benign_probes,
-                        &benign_stub_events,
+        // Legacy single-payload collision: oracle fired without the
+        // in-harness sink-hit sentinel.  Phase 26 partial-confirmation is
+        // deliberately NOT decided here: a vuln run that reaches the sink
+        // without firing its oracle is ambiguous — it could be a real engine
+        // gap (the vuln input drives the sink but the exploit chain could not
+        // be observed) or merely safe code that happens to reach the sink
+        // (e.g. array-form `exec` with inert metacharacters).  The call is
+        // deferred to the differential check in Phase B, which compares the
+        // benign control's sink reachability.
+        if vuln_fired && !sink_hit {
+            oracle_collision = true;
+        }
+
+        let oob_callback_seen = outcome.oob_callback_seen;
+        attempts.push(Attempt {
+            payload_label: payload.label,
+            outcome,
+            oracle_fired: vuln_fired,
+            triggered: false,
+        });
+        vuln_runs.push(VulnRun {
+            payload_index: i,
+            attempt_index,
+            vuln_fired,
+            sink_hit,
+            oob_nonce_slot: payload.oob_nonce_slot,
+            oob_callback_seen,
+            vuln_probes,
+        });
+    }
+
+    // ── Phase B: differential confirmation + partial-confirmation gate ──
+    // Two candidate classes drive a paired benign-control run:
+    //   • confirm candidate — vuln oracle fired *and* the in-harness sink-hit
+    //     sentinel was observed.  Collected into the set aggregation (§4.1).
+    //   • partial candidate — the sink-hit sentinel fired but the oracle did
+    //     not.  The benign control's sink reachability decides whether this is
+    //     a real engine gap (`PartiallyConfirmed`) or safe code that merely
+    //     reaches the sink (`NotConfirmed`).
+    // Oracle-fires-without-sink stays on the legacy `oracle_collision` path.
+    let mut vuln_fires: Vec<bool> = Vec::new();
+    let mut benign_fires: Vec<bool> = Vec::new();
+    // (attempt_index, differential outcome) per confirm candidate.
+    let mut candidates: Vec<(usize, DifferentialOutcome)> = Vec::new();
+    // Phase 26: set when a partial candidate's vuln run reached the sink that
+    // its benign control did *not* — a sink-reachability differential proving
+    // the vuln input specifically drives the sink even though the exploit
+    // chain could not be observed completing.
+    let mut partial_signal = false;
+
+    for vr in &vuln_runs {
+        let is_confirm_candidate = vr.vuln_fired && vr.sink_hit;
+        let is_partial_candidate = vr.sink_hit && !vr.vuln_fired;
+        if !is_confirm_candidate && !is_partial_candidate {
+            continue;
+        }
+        // The partial signal is a single bool; once established, skip further
+        // partial-only probing.  Confirm candidates always run — the set
+        // aggregation needs every one.
+        if is_partial_candidate && !is_confirm_candidate && partial_signal {
+            continue;
+        }
+        let payload = vuln_payloads[vr.payload_index];
+        // Match the resolution scope to the payload-slice scope so a benign
+        // control declared in another language is still found when this run
+        // was driven off the lang-agnostic union (see `used_lang_slice`).
+        // When the run did use the per-language slice, the lang-aware
+        // resolver keeps a mismatched language from producing a Confirmed.
+        let resolved = if used_lang_slice {
+            resolve_benign_control_lang(payload, spec.expected_cap, spec.lang)
+        } else {
+            resolve_benign_control(payload, spec.expected_cap)
+        };
+        match resolved {
+            None => {
+                // Phase 05 OOB closure: OOB-nonce payloads with
+                // `benign_control = None` are structurally self-confirming
+                // when the listener observed the callback.  A benign URL
+                // cannot hit a per-finding nonce, so the OOB observation is
+                // independent network-level evidence the sink fired.  Skip
+                // the no-benign-control downgrade and emit
+                // [`DifferentialVerdict::ConfirmedProvenOob`].
+                if is_confirm_candidate && vr.oob_nonce_slot && vr.oob_callback_seen {
+                    let mut outcome_record = differential::build_oob_self_confirmed_outcome(
+                        payload.label,
+                        &vr.vuln_probes,
                    );
+                    middleware_demotion::apply_demotion(
+                        &mut outcome_record,
+                        spec.framework.as_ref(),
+                        spec.lang,
+                    );
+                    // No paired benign control runs, so this candidate
+                    // contributes only to the vuln side of the set.
+                    vuln_fires.push(true);
+                    candidates.push((vr.attempt_index, outcome_record));
+                } else if is_confirm_candidate {
+                    no_benign_control = true;
+                }
+                // A partial candidate without a benign control cannot rule out
+                // "safe code that reaches the sink", so it raises no partial
+                // signal and falls through to `NotConfirmed`.
+            }
+            Some(benign) => {
+                let benign_bytes = materialise_bytes(benign, None)
+                    .map(|b| b.into_owned())
+                    .unwrap_or_default();
+                if let Some(ch) = &probe_channel {
+                    let _ = ch.clear();
+                }
+                let benign_outcome = sandbox::run(&harness, &benign_bytes, &effective_opts)?;
+                let benign_sink_hit = benign_outcome.sink_hit;
+                let benign_probes: Vec<SinkProbe> = probe_channel
+                    .as_ref()
+                    .map(|ch| ch.drain())
+                    .unwrap_or_default();
+                let benign_stub_events: Vec<StubEvent> = effective_opts
+                    .stub_harness
+                    .as_ref()
+                    .map(|h| h.drain_all())
+                    .unwrap_or_default();
+                let benign_fired = oracle_fired_with_stubs(
+                    &benign.oracle,
+                    &benign_outcome,
+                    &benign_probes,
+                    &benign_stub_events,
+                );
+
+                if is_confirm_candidate {
                    let mut outcome_record = differential::build_outcome(
                        payload.label,
-                        vuln_fired,
-                        &vuln_probes,
+                        vr.vuln_fired,
+                        &vr.vuln_probes,
                        benign.label,
                        benign_fired,
                        &benign_probes,
                    );
                    // Phase 05 OOB closure: when an OOB-nonce payload also
-                    // carries a paired benign control, promote
-                    // `Confirmed` → `ConfirmedProvenOob` whenever the
-                    // listener observed the per-finding nonce.  The
-                    // upgrade preserves the differential trace (benign
-                    // run still recorded) and surfaces the stronger
-                    // network-level evidence to operators.
+                    // carries a paired benign control, promote `Confirmed` →
+                    // `ConfirmedProvenOob` whenever the listener observed the
+                    // per-finding nonce.  The upgrade preserves the differential
+                    // trace (benign run still recorded) and surfaces the
+                    // stronger network-level evidence to operators.
                    if outcome_record.verdict == DifferentialVerdict::Confirmed
-                        && payload.oob_nonce_slot
-                        && outcome.oob_callback_seen
+                        && vr.oob_nonce_slot
+                        && vr.oob_callback_seen
                    {
                        outcome_record.verdict = DifferentialVerdict::ConfirmedProvenOob;
                    }
@ -661,30 +758,68 @@ pub fn run_spec(spec: &HarnessSpec, opts: &SandboxOptions) -> Result<RunOutcome,
                        spec.framework.as_ref(),
                        spec.lang,
                    );
-                    let confirmed =
-                        middleware_demotion::is_triggering_verdict(outcome_record.verdict);
-                    differential_outcome = Some(outcome_record);
-                    confirmed
+                    vuln_fires.push(vr.vuln_fired);
+                    benign_fires.push(benign_fired);
+                    candidates.push((vr.attempt_index, outcome_record));
+                } else {
+                    // Partial candidate: the vuln run reached the sink without
+                    // firing the oracle.  It is a real engine gap only when the
+                    // benign control neither reached the sink nor fired its
+                    // oracle — i.e. the vuln input specifically drives the sink.
+                    // If the benign control also reaches the sink, the code path
+                    // is shared and safe (e.g. array-form `exec`), so no partial
+                    // signal is raised and the run stays `NotConfirmed`.
+                    if !benign_sink_hit && !benign_fired {
+                        partial_signal = true;
+                    }
                }
            }
-        } else if vuln_fired && !sink_hit {
-            // Oracle fired but probe didn't — likely collision.
-            oracle_collision = true;
-            false
+        }
+    }
+
+    // ── Phase 26 aggregation ────────────────────────────────────────────
+    // `evaluate_sets` collapses the firing sets to a single verdict: any
+    // vuln payload firing + no benign control firing → Confirmed; any
+    // benign firing anywhere → OracleCollisionSuspected (global ambient-
+    // noise veto).  A ConfirmedProvenOob candidate is terminal positive
+    // evidence (a per-finding OOB nonce cannot be hit by ambient noise), so
+    // it confirms even if some unrelated payload's benign tripped a noisy
+    // oracle.
+    if !candidates.is_empty() {
+        let aggregate = differential::evaluate_sets(&vuln_fires, &benign_fires);
+        let has_proven_oob = candidates
+            .iter()
+            .any(|(_, r)| r.verdict == DifferentialVerdict::ConfirmedProvenOob);
+        let confirmed_class =
+            has_proven_oob || matches!(aggregate, DifferentialVerdict::Confirmed);
+        if confirmed_class {
+            // Representative outcome: prefer the strongest (ProvenOob), else
+            // the first candidate carrying a triggering verdict.  Iteration
+            // follows payload order, so the choice is deterministic.
+            let chosen = candidates
+                .iter()
+                .find(|(_, r)| r.verdict == DifferentialVerdict::ConfirmedProvenOob)
+                .or_else(|| {
+                    candidates
+                        .iter()
+                        .find(|(_, r)| middleware_demotion::is_triggering_verdict(r.verdict))
+                })
+                .cloned();
+            if let Some((idx, record)) = chosen {
+                attempts[idx].triggered = true;
+                triggered_by = Some(idx);
+                differential_outcome = Some(record);
+            }
        } else {
-            false
-        };
-
-        attempts.push(Attempt {
-            payload_label: payload.label,
-            outcome,
-            oracle_fired: vuln_fired,
-            triggered,
-        });
-
-        if triggered {
-            triggered_by = Some(i);
-            break;
+            // Ambient-noise veto: at least one benign control fired and no
+            // terminal OOB evidence exists.  Surface a colliding candidate
+            // so the verifier downgrades to
+            // `Inconclusive(OracleCollisionSuspected)`.
+            differential_outcome = candidates
+                .iter()
+                .find(|(_, r)| r.verdict == DifferentialVerdict::OracleCollisionSuspected)
+                .or_else(|| candidates.first())
+                .map(|(_, r)| r.clone());
        }
    }

@ -699,6 +834,7 @@ pub fn run_spec(spec: &HarnessSpec, opts: &SandboxOptions) -> Result<RunOutcome,
        differential: differential_outcome,
        no_benign_control,
        unrelated_crash,
+        sink_reached_no_oracle: partial_signal,
    })
 }

--- a/src/dynamic/telemetry.rs
+++ b/src/dynamic/telemetry.rs
@ -305,7 +305,14 @@ impl SamplingPolicy {
    /// Decide whether an event with the given status / spec_hash should be
    /// written.  Deterministic for a fixed `(self, status, spec_hash)`.
    pub fn should_sample(&self, status: VerifyStatus, spec_hash: &str) -> bool {
-        if matches!(status, VerifyStatus::Confirmed) && self.keep_all_confirmed {
+        if matches!(
+            status,
+            VerifyStatus::Confirmed | VerifyStatus::PartiallyConfirmed
+        ) && self.keep_all_confirmed
+        {
+            // PartiallyConfirmed is a low-volume, high-value triage signal
+            // (each is a candidate real engine gap), so it rides the same
+            // keep-all switch as Confirmed rather than being sampled away.
            return true;
        }
        if matches!(status, VerifyStatus::Inconclusive) && self.keep_all_inconclusive {
@ -389,6 +396,7 @@ pub fn emit_with_policy(event: &TelemetryEvent, policy: &SamplingPolicy) {
 fn parse_status(s: &str) -> Option<VerifyStatus> {
    match s {
        "Confirmed" => Some(VerifyStatus::Confirmed),
+        "PartiallyConfirmed" => Some(VerifyStatus::PartiallyConfirmed),
        "NotConfirmed" => Some(VerifyStatus::NotConfirmed),
        "Inconclusive" => Some(VerifyStatus::Inconclusive),
        "Unsupported" => Some(VerifyStatus::Unsupported),
--- a/src/dynamic/verify.rs
+++ b/src/dynamic/verify.rs
@ -987,9 +987,16 @@ fn build_verdict(

            if let Some(i) = run.triggered_by {
                let triggered_payload = run.attempts[i].payload_label.to_string();
+                // Resolve repro bytes by label, not by index: OOB payloads
+                // skipped for lack of a listener leave `attempts` shorter
+                // than `vuln_payloads`, so a positional lookup can pull the
+                // wrong payload's bytes.  The label is the stable key.
                let payloads = payloads_for(spec.expected_cap);
-                let vuln_payloads: Vec<_> = payloads.iter().filter(|p| !p.is_benign).collect();
-                let payload_bytes = vuln_payloads.get(i).map(|p| p.bytes).unwrap_or(b"");
+                let payload_bytes = payloads
+                    .iter()
+                    .find(|p| !p.is_benign && p.label == triggered_payload)
+                    .map(|p| p.bytes)
+                    .unwrap_or(b"");
                let hardening_outcome = summarize_hardening(&run.attempts[i].outcome);

                // Emit repro artifact.
@ -1156,6 +1163,33 @@ fn build_verdict(
                        hardening_outcome: None,
                    },
                }
+            } else if run.sink_reached_no_oracle {
+                // Phase 26: a vuln payload's in-harness sink-reachability
+                // probe fired but its oracle marker never did, and the run
+                // produced no Confirmed-class verdict and no colliding
+                // differential.  The sink is reachable at runtime yet the
+                // exploit chain did not complete (no marker file written,
+                // no OOB callback observed, output lacked the proof token).
+                // Surface `PartiallyConfirmed` so engine work can ratchet on
+                // the real sink-reachability gap without overstating it as a
+                // confirmed exploit.  No repro artifact is written: there is
+                // no proven exploit to reproduce.
+                VerifyResult {
+                    finding_id: finding_id.to_owned(),
+                    status: VerifyStatus::PartiallyConfirmed,
+                    triggered_payload: None,
+                    reason: None,
+                    inconclusive_reason: None,
+                    detail: Some(
+                        "sink-reachability probe fired but the oracle marker was not observed; exploit chain did not complete".to_owned(),
+                    ),
+                    attempts,
+                    toolchain_match: Some(toolchain_match.to_owned()),
+                    differential: None,
+                    replay_stable: None,
+                    wrong: None,
+                    hardening_outcome: None,
+                }
            } else if run.oracle_collision {
                // Oracle fired but the sink-hit sentinel did not —
                // legacy single-payload collision path, predates the
@ -1735,4 +1769,141 @@ mod tests {
            "current corpus_version entry must be a cache hit"
        );
    }
+
+    fn partial_spec() -> HarnessSpec {
+        HarnessSpec {
+            finding_id: "deadbeefcafef00d".into(),
+            entry_file: "app.py".into(),
+            entry_name: "login".into(),
+            entry_kind: crate::dynamic::spec::EntryKind::Function,
+            lang: crate::symbol::Lang::Python,
+            toolchain_id: "python-3.11".into(),
+            payload_slot: crate::dynamic::spec::PayloadSlot::Param(0),
+            expected_cap: crate::labels::Cap::SQL_QUERY,
+            constraint_hints: vec![],
+            sink_file: "app.py".into(),
+            sink_line: 10,
+            spec_hash: "cafecafecafe0001".into(),
+            derivation: SpecDerivationStrategy::FromFlowSteps,
+            stubs_required: vec![],
+            framework: None,
+            java_toolchain: crate::dynamic::spec::JavaToolchain::default(),
+        }
+    }
+
+    /// Phase 26: a vuln payload whose sink-reachability probe fired but whose
+    /// oracle marker never did — and no Confirmed-class verdict, no
+    /// differential outcome, no benign-control gap — must surface as
+    /// `PartiallyConfirmed`, carry no `triggered_payload`, and write no repro.
+    #[test]
+    fn build_verdict_sink_reached_no_oracle_maps_to_partially_confirmed() {
+        use crate::dynamic::runner::{Attempt, RunOutcome};
+        use crate::dynamic::sandbox::SandboxOutcome;
+
+        let opts = VerifyOptions::from_config(&Config::default());
+        let run = RunOutcome {
+            spec: partial_spec(),
+            attempts: vec![Attempt {
+                payload_label: "sqli-tautology",
+                outcome: SandboxOutcome {
+                    exit_code: Some(0),
+                    stdout: b"__NYX_SINK_HIT__".to_vec(),
+                    stderr: Vec::new(),
+                    timed_out: false,
+                    oob_callback_seen: false,
+                    sink_hit: true,
+                    duration: std::time::Duration::ZERO,
+                    hardening_outcome: None,
+                },
+                oracle_fired: false,
+                triggered: false,
+            }],
+            triggered_by: None,
+            oracle_collision: false,
+            sink_reached_no_oracle: true,
+            build_attempts: 1,
+            harness_source: String::new(),
+            entry_source: String::new(),
+            differential: None,
+            no_benign_control: false,
+            unrelated_crash: false,
+        };
+
+        let verdict = build_verdict(
+            "deadbeefcafef00d",
+            &partial_spec(),
+            Ok(run),
+            "exact",
+            &opts,
+            std::time::Duration::ZERO,
+        );
+
+        assert_eq!(verdict.status, VerifyStatus::PartiallyConfirmed);
+        assert!(
+            verdict.triggered_payload.is_none(),
+            "PartiallyConfirmed must not claim a triggering payload"
+        );
+        assert!(
+            verdict
+                .detail
+                .as_deref()
+                .unwrap_or_default()
+                .contains("sink-reachability probe fired"),
+            "detail must explain the sink reached but the chain did not complete: {:?}",
+            verdict.detail
+        );
+        // The sink-hit attempt must survive into the surfaced attempt list.
+        assert_eq!(verdict.attempts.len(), 1);
+        assert!(verdict.attempts[0].sink_hit);
+        assert!(!verdict.attempts[0].triggered);
+    }
+
+    /// Regression guard: a clean run (no sink hit, no oracle) must stay
+    /// `NotConfirmed` — the `PartiallyConfirmed` branch must not swallow the
+    /// ordinary negative case.
+    #[test]
+    fn build_verdict_clean_run_stays_not_confirmed() {
+        use crate::dynamic::runner::{Attempt, RunOutcome};
+        use crate::dynamic::sandbox::SandboxOutcome;
+
+        let opts = VerifyOptions::from_config(&Config::default());
+        let run = RunOutcome {
+            spec: partial_spec(),
+            attempts: vec![Attempt {
+                payload_label: "sqli-tautology",
+                outcome: SandboxOutcome {
+                    exit_code: Some(0),
+                    stdout: Vec::new(),
+                    stderr: Vec::new(),
+                    timed_out: false,
+                    oob_callback_seen: false,
+                    sink_hit: false,
+                    duration: std::time::Duration::ZERO,
+                    hardening_outcome: None,
+                },
+                oracle_fired: false,
+                triggered: false,
+            }],
+            triggered_by: None,
+            oracle_collision: false,
+            sink_reached_no_oracle: false,
+            build_attempts: 1,
+            harness_source: String::new(),
+            entry_source: String::new(),
+            differential: None,
+            no_benign_control: false,
+            unrelated_crash: false,
+        };
+
+        let verdict = build_verdict(
+            "deadbeefcafef00d",
+            &partial_spec(),
+            Ok(run),
+            "exact",
+            &opts,
+            std::time::Duration::ZERO,
+        );
+
+        assert_eq!(verdict.status, VerifyStatus::NotConfirmed);
+    }
 }
--- a/src/evidence.rs
+++ b/src/evidence.rs
@ -727,6 +727,14 @@ pub enum VerifyStatus {
    /// Sink fired with at least one payload. The static finding is exploitable
    /// against the live target.
    Confirmed,
+    /// The in-harness sink-reachability probe fired (sink reached) but the
+    /// oracle marker was never observed (no file write / no OOB callback /
+    /// output did not contain the proof token), so the exploit chain did not
+    /// complete. Semantically `{ sink_reached: true, exit_propagated: false }`.
+    /// Ranks above `NotConfirmed` (runtime corroboration that the sink is
+    /// reachable) but below `Confirmed` (no proven exploit). Used so engine
+    /// work can ratchet on real sink-reachability gaps without overstating.
+    PartiallyConfirmed,
    /// All payloads ran cleanly. Either the path is infeasible at runtime
    /// or the corpus is too narrow. Treat as "static-only", not "false positive".
    NotConfirmed,
--- a/src/fmt.rs
+++ b/src/fmt.rs
@ -558,6 +558,7 @@ fn format_dynamic_verdict_annotation(dv: &crate::evidence::VerifyResult) -> Stri
            let pid = dv.triggered_payload.as_deref().unwrap_or("unknown");
            format!("[DYN: confirmed via {pid}]")
        }
+        VerifyStatus::PartiallyConfirmed => "[DYN: partially confirmed (sink reached)]".to_string(),
        VerifyStatus::NotConfirmed => "[DYN: not confirmed]".to_string(),
        VerifyStatus::Unsupported => {
            let reason = dv
--- a/src/rank.rs
+++ b/src/rank.rs
@ -258,6 +258,12 @@ fn dynamic_verdict_delta(diag: &Diag) -> Option<f64> {
    let dv = diag.evidence.as_ref()?.dynamic_verdict.as_ref()?;
    match dv.status {
        VerifyStatus::Confirmed => Some(20.0),
+        // PartiallyConfirmed: the sink was reached at runtime but the
+        // exploit chain did not complete.  Runtime corroboration that the
+        // sink is reachable is a positive signal, but weaker than a proven
+        // exploit, so it earns a modest bump rather than the full Confirmed
+        // boost.
+        VerifyStatus::PartiallyConfirmed => Some(8.0),
        // Apply penalty only when the corpus was actually exhausted (attempts
        // were made); a NotConfirmed with zero attempts means something went
        // wrong before payload execution, which is an Inconclusive path, not
--- a/src/server/models.rs
+++ b/src/server/models.rs
@ -293,6 +293,7 @@ fn status_for_diag(d: &Diag) -> &'static str {
 pub fn dynamic_status_label(status: VerifyStatus) -> &'static str {
    match status {
        VerifyStatus::Confirmed => "Confirmed",
+        VerifyStatus::PartiallyConfirmed => "PartiallyConfirmed",
        VerifyStatus::NotConfirmed => "NotConfirmed",
        VerifyStatus::Inconclusive => "Inconclusive",
        VerifyStatus::Unsupported => "Unsupported",
--- a/tests/console_snapshot.rs
+++ b/tests/console_snapshot.rs
@ -76,6 +76,28 @@ fn diag_with_verdict(status: VerifyStatus) -> Diag {
            wrong: None,
            hardening_outcome: None,
        },
+        VerifyStatus::PartiallyConfirmed => VerifyResult {
+            finding_id: "abc123".into(),
+            status,
+            triggered_payload: None,
+            reason: None,
+            inconclusive_reason: None,
+            detail: Some(
+                "sink-reachability probe fired but the oracle marker was not observed; exploit chain did not complete".into(),
+            ),
+            attempts: vec![AttemptSummary {
+                payload_label: "sqli-tautology".into(),
+                exit_code: Some(0),
+                timed_out: false,
+                triggered: false,
+                sink_hit: true,
+            }],
+            toolchain_match: Some("exact".into()),
+            differential: None,
+            replay_stable: None,
+            wrong: None,
+            hardening_outcome: None,
+        },
        VerifyStatus::NotConfirmed => VerifyResult {
            finding_id: "abc123".into(),
            status,
@ -158,6 +180,17 @@ fn console_not_confirmed_shows_annotation() {
    );
 }

+#[test]
+fn console_partially_confirmed_shows_sink_reached() {
+    let diag = diag_with_verdict(VerifyStatus::PartiallyConfirmed);
+    let output = render_console(&[diag], "proj", None, &[]);
+    let stripped = strip_ansi(&output);
+    assert!(
+        stripped.contains("[DYN: partially confirmed (sink reached)]"),
+        "expected DYN partially-confirmed annotation, got:\n{stripped}"
+    );
+}
+
 #[test]
 fn console_unsupported_shows_reason() {
    let diag = diag_with_verdict(VerifyStatus::Unsupported);
--- a/tests/eval_corpus/report.py
+++ b/tests/eval_corpus/report.py
@ -78,6 +78,7 @@ def load_previous_agg(path: str) -> dict:
            "fn": 0,
            "unsupported": 0,
            "confirmed": 0,
+            "partially_confirmed": 0,
            "wrong_confirmed": 0,
            "stable_replays": 0,
            "total": 0,
@ -92,6 +93,7 @@ def load_previous_agg(path: str) -> dict:
                "fn",
                "unsupported",
                "confirmed",
+                "partially_confirmed",
                "wrong_confirmed",
                "stable_replays",
                "total",
@ -139,6 +141,7 @@ def main() -> int:
            "fn": 0,
            "unsupported": 0,
            "confirmed": 0,
+            "partially_confirmed": 0,
            "wrong_confirmed": 0,
            "stable_replays": 0,
            "total": 0,
@ -153,6 +156,7 @@ def main() -> int:
                "fn",
                "unsupported",
                "confirmed",
+                "partially_confirmed",
                "wrong_confirmed",
                "stable_replays",
                "total",
@ -160,17 +164,22 @@ def main() -> int:
                agg[k][field] += c.get(field, 0)

    print("\n=== Aggregated eval corpus report ===")
-    print(f"{'Cap':<20} {'Lang':<12} {'TP':>5} {'FP':>5} {'FN':>5} {'Prec':>6} {'Rec':>6} {'Unsup%':>7}")
-    print("-" * 72)
+    print(
+        f"{'Cap':<20} {'Lang':<12} {'TP':>5} {'FP':>5} {'FN':>5} "
+        f"{'Prec':>6} {'Rec':>6} {'Unsup%':>7} {'Conf%':>7} {'Part%':>7}"
+    )
+    print("-" * 88)
    for k, v in sorted(agg.items()):
        prec = v["tp"] / max(v["tp"] + v["fp"], 1)
        rec = v["tp"] / max(v["tp"] + v["fn"], 1)
        unsup = v["unsupported"] / max(v["total"], 1)
+        conf = v["confirmed"] / max(v["total"], 1)
+        part = v["partially_confirmed"] / max(v["total"], 1)
        print(
            f"{k[0]:<20} {k[1]:<12} "
            f"{v['tp']:>5} {v['fp']:>5} {v['fn']:>5} "
            f"{prec:>6.2f} {rec:>6.2f} "
-            f"{unsup*100:>6.1f}%"
+            f"{unsup*100:>6.1f}% {conf*100:>6.1f}% {part*100:>6.1f}%"
        )

    gate_failed = False
--- a/tests/eval_corpus/tabulate.py
+++ b/tests/eval_corpus/tabulate.py
@ -387,7 +387,7 @@ def main() -> int:
                    break

    # Per-cell tallies: {(cap, lang): {tp, fp, fn, unsupported, confirmed,
-    # wrong_confirmed, stable_replays, total}}
+    # partially_confirmed, wrong_confirmed, stable_replays, total}}
    cells: dict[tuple[str, str], dict] = defaultdict(
        lambda: {
            "tp": 0,
@ -395,6 +395,7 @@ def main() -> int:
            "fn": 0,
            "unsupported": 0,
            "confirmed": 0,
+            "partially_confirmed": 0,
            "wrong_confirmed": 0,
            "stable_replays": 0,
            "total": 0,
@ -412,6 +413,8 @@ def main() -> int:
            status = dv.get("status")
            if status == "Unsupported":
                cells[key]["unsupported"] += 1
+            elif status == "PartiallyConfirmed":
+                cells[key]["partially_confirmed"] += 1
            elif status == "Confirmed":
                cells[key]["confirmed"] += 1
                # Repro-stability and false-Confirmed counts are optional
--- a/tests/sarif_dynamic_verdict_tests.rs
+++ b/tests/sarif_dynamic_verdict_tests.rs
@ -235,9 +235,10 @@ fn sarif_confirmed_verdict_nyx_dynamic_verdict_contains_triggered_payload() {
 }

 #[test]
-fn sarif_all_four_statuses_produce_partial_fingerprint() {
+fn sarif_all_statuses_produce_partial_fingerprint() {
    let statuses = [
        (VerifyStatus::Confirmed, "Confirmed"),
+        (VerifyStatus::PartiallyConfirmed, "PartiallyConfirmed"),
        (VerifyStatus::NotConfirmed, "NotConfirmed"),
        (VerifyStatus::Unsupported, "Unsupported"),
        (VerifyStatus::Inconclusive, "Inconclusive"),