feat(dynamic): add PartiallyConfirmed status for finer-grained sink-reachability categorization, update dynamic verification, telemetry, and reporting systems

This commit is contained in:
elipeter 2026-05-29 14:35:39 -05:00
parent 635b213825
commit c0501884ae
23 changed files with 658 additions and 142 deletions

View file

@ -76,6 +76,28 @@ fn diag_with_verdict(status: VerifyStatus) -> Diag {
wrong: None,
hardening_outcome: None,
},
VerifyStatus::PartiallyConfirmed => VerifyResult {
finding_id: "abc123".into(),
status,
triggered_payload: None,
reason: None,
inconclusive_reason: None,
detail: Some(
"sink-reachability probe fired but the oracle marker was not observed; exploit chain did not complete".into(),
),
attempts: vec![AttemptSummary {
payload_label: "sqli-tautology".into(),
exit_code: Some(0),
timed_out: false,
triggered: false,
sink_hit: true,
}],
toolchain_match: Some("exact".into()),
differential: None,
replay_stable: None,
wrong: None,
hardening_outcome: None,
},
VerifyStatus::NotConfirmed => VerifyResult {
finding_id: "abc123".into(),
status,
@ -158,6 +180,17 @@ fn console_not_confirmed_shows_annotation() {
);
}
#[test]
fn console_partially_confirmed_shows_sink_reached() {
let diag = diag_with_verdict(VerifyStatus::PartiallyConfirmed);
let output = render_console(&[diag], "proj", None, &[]);
let stripped = strip_ansi(&output);
assert!(
stripped.contains("[DYN: partially confirmed (sink reached)]"),
"expected DYN partially-confirmed annotation, got:\n{stripped}"
);
}
#[test]
fn console_unsupported_shows_reason() {
let diag = diag_with_verdict(VerifyStatus::Unsupported);

View file

@ -78,6 +78,7 @@ def load_previous_agg(path: str) -> dict:
"fn": 0,
"unsupported": 0,
"confirmed": 0,
"partially_confirmed": 0,
"wrong_confirmed": 0,
"stable_replays": 0,
"total": 0,
@ -92,6 +93,7 @@ def load_previous_agg(path: str) -> dict:
"fn",
"unsupported",
"confirmed",
"partially_confirmed",
"wrong_confirmed",
"stable_replays",
"total",
@ -139,6 +141,7 @@ def main() -> int:
"fn": 0,
"unsupported": 0,
"confirmed": 0,
"partially_confirmed": 0,
"wrong_confirmed": 0,
"stable_replays": 0,
"total": 0,
@ -153,6 +156,7 @@ def main() -> int:
"fn",
"unsupported",
"confirmed",
"partially_confirmed",
"wrong_confirmed",
"stable_replays",
"total",
@ -160,17 +164,22 @@ def main() -> int:
agg[k][field] += c.get(field, 0)
print("\n=== Aggregated eval corpus report ===")
print(f"{'Cap':<20} {'Lang':<12} {'TP':>5} {'FP':>5} {'FN':>5} {'Prec':>6} {'Rec':>6} {'Unsup%':>7}")
print("-" * 72)
print(
f"{'Cap':<20} {'Lang':<12} {'TP':>5} {'FP':>5} {'FN':>5} "
f"{'Prec':>6} {'Rec':>6} {'Unsup%':>7} {'Conf%':>7} {'Part%':>7}"
)
print("-" * 88)
for k, v in sorted(agg.items()):
prec = v["tp"] / max(v["tp"] + v["fp"], 1)
rec = v["tp"] / max(v["tp"] + v["fn"], 1)
unsup = v["unsupported"] / max(v["total"], 1)
conf = v["confirmed"] / max(v["total"], 1)
part = v["partially_confirmed"] / max(v["total"], 1)
print(
f"{k[0]:<20} {k[1]:<12} "
f"{v['tp']:>5} {v['fp']:>5} {v['fn']:>5} "
f"{prec:>6.2f} {rec:>6.2f} "
f"{unsup*100:>6.1f}%"
f"{unsup*100:>6.1f}% {conf*100:>6.1f}% {part*100:>6.1f}%"
)
gate_failed = False

View file

@ -387,7 +387,7 @@ def main() -> int:
break
# Per-cell tallies: {(cap, lang): {tp, fp, fn, unsupported, confirmed,
# wrong_confirmed, stable_replays, total}}
# partially_confirmed, wrong_confirmed, stable_replays, total}}
cells: dict[tuple[str, str], dict] = defaultdict(
lambda: {
"tp": 0,
@ -395,6 +395,7 @@ def main() -> int:
"fn": 0,
"unsupported": 0,
"confirmed": 0,
"partially_confirmed": 0,
"wrong_confirmed": 0,
"stable_replays": 0,
"total": 0,
@ -412,6 +413,8 @@ def main() -> int:
status = dv.get("status")
if status == "Unsupported":
cells[key]["unsupported"] += 1
elif status == "PartiallyConfirmed":
cells[key]["partially_confirmed"] += 1
elif status == "Confirmed":
cells[key]["confirmed"] += 1
# Repro-stability and false-Confirmed counts are optional

View file

@ -235,9 +235,10 @@ fn sarif_confirmed_verdict_nyx_dynamic_verdict_contains_triggered_payload() {
}
#[test]
fn sarif_all_four_statuses_produce_partial_fingerprint() {
fn sarif_all_statuses_produce_partial_fingerprint() {
let statuses = [
(VerifyStatus::Confirmed, "Confirmed"),
(VerifyStatus::PartiallyConfirmed, "PartiallyConfirmed"),
(VerifyStatus::NotConfirmed, "NotConfirmed"),
(VerifyStatus::Unsupported, "Unsupported"),
(VerifyStatus::Inconclusive, "Inconclusive"),