[pitboss] phase 07: M6 — Evidence consumers: formatters, ranking, UI

This commit is contained in:
pitboss 2026-05-12 13:26:52 -04:00
parent 6f8a645077
commit bfdfcb9d1a
18 changed files with 3208 additions and 46 deletions

View file

@ -80,6 +80,24 @@ impl OobListener {
.map(|h| h.contains(nonce))
.unwrap_or(false)
}
/// Polls until `nonce` is recorded or `timeout` elapses.
///
/// Returns immediately on hit; polls every 5 ms otherwise.
/// Prefer this over a fixed sleep + `was_nonce_hit` at call sites.
pub fn wait_for_nonce(&self, nonce: &str, timeout: Duration) -> bool {
let deadline = std::time::Instant::now() + timeout;
loop {
if self.was_nonce_hit(nonce) {
return true;
}
let remaining = deadline.saturating_duration_since(std::time::Instant::now());
if remaining.is_zero() {
return false;
}
std::thread::sleep(remaining.min(Duration::from_millis(5)));
}
}
}
impl Drop for OobListener {

View file

@ -8,7 +8,7 @@
use crate::dynamic::build_sandbox;
use crate::dynamic::corpus::{benign_payload_for, materialise_bytes, payloads_for, Oracle, Payload};
use crate::dynamic::harness::{self, HarnessError};
use crate::dynamic::sandbox::{self, SandboxError, SandboxOptions, SandboxOutcome};
use crate::dynamic::sandbox::{self, SandboxBackend, SandboxError, SandboxOptions, SandboxOutcome};
use crate::dynamic::spec::HarnessSpec;
use crate::symbol::Lang;
@ -214,7 +214,11 @@ pub fn run_spec(spec: &HarnessSpec, opts: &SandboxOptions) -> Result<RunOutcome,
let (oob_nonce, effective_bytes) = if payload.oob_nonce_slot {
if let Some(ref listener) = opts.oob_listener {
let nonce = generate_nonce();
let url = listener.nonce_url(&nonce);
let url = if uses_docker_backend(opts) {
listener.nonce_url_for_host("host-gateway", &nonce)
} else {
listener.nonce_url(&nonce)
};
let bytes = url.into_bytes();
(Some(nonce), bytes)
} else {
@ -229,12 +233,10 @@ pub fn run_spec(spec: &HarnessSpec, opts: &SandboxOptions) -> Result<RunOutcome,
// For OOB payloads, check the nonce listener and update the outcome flag.
if let (Some(nonce), Some(listener)) = (&oob_nonce, &opts.oob_listener) {
// Give the harness a brief window to complete the callback before we check.
// The sandbox run already waited for process exit, so the callback should
// have arrived. A short sleep handles edge cases where the OS hasn't yet
// delivered the TCP segment to the listener thread.
std::thread::sleep(std::time::Duration::from_millis(50));
if listener.was_nonce_hit(nonce) {
// Poll until the nonce arrives or the budget expires. The sandbox run
// already waited for process exit so the callback should arrive quickly;
// 200 ms covers OS TCP delivery jitter without burning wall-clock at scale.
if listener.wait_for_nonce(nonce, std::time::Duration::from_millis(200)) {
outcome.oob_callback_seen = true;
}
}
@ -287,6 +289,18 @@ pub fn run_spec(spec: &HarnessSpec, opts: &SandboxOptions) -> Result<RunOutcome,
})
}
/// Returns true when the active backend will use Docker for execution.
///
/// Used at URL-generation time so Docker runs embed `host-gateway` rather than
/// `127.0.0.1` (the container's loopback ≠ the host's loopback).
fn uses_docker_backend(opts: &SandboxOptions) -> bool {
match opts.backend {
SandboxBackend::Docker => true,
SandboxBackend::Auto => sandbox::docker_available(),
SandboxBackend::Process => false,
}
}
fn oracle_fired(oracle: &Oracle, outcome: &SandboxOutcome) -> bool {
match oracle {
Oracle::OutputContains(needle) => {

View file

@ -424,6 +424,14 @@ fn render_diag(d: &Diag, width: usize) -> String {
));
}
// ── Dynamic verification annotation ──────────────────────────────
if let Some(ev) = d.evidence.as_ref() {
if let Some(ref dv) = ev.dynamic_verdict {
let annotation = format_dynamic_verdict_annotation(dv);
out.push_str(&format!("{indent_str}{}\n", style(&annotation).dim()));
}
}
out
}
@ -453,6 +461,67 @@ fn state_remediation_hint(rule_id: &str) -> Option<&'static str> {
}
}
/// Format a dynamic verification annotation line.
///
/// Spec §5.4: `[DYN: confirmed via {payload}]` / `[DYN: not confirmed]` /
/// `[DYN: unsupported ({reason})]` / `[DYN: inconclusive ({reason})]`
fn format_dynamic_verdict_annotation(dv: &crate::evidence::VerifyResult) -> String {
use crate::evidence::VerifyStatus;
match dv.status {
VerifyStatus::Confirmed => {
let pid = dv.triggered_payload.as_deref().unwrap_or("unknown");
format!("[DYN: confirmed via {pid}]")
}
VerifyStatus::NotConfirmed => "[DYN: not confirmed]".to_string(),
VerifyStatus::Unsupported => {
let reason = dv
.reason
.as_ref()
.map(format_unsupported_reason)
.unwrap_or_else(|| "unknown".to_string());
format!("[DYN: unsupported ({reason})]")
}
VerifyStatus::Inconclusive => {
let reason = dv
.inconclusive_reason
.map(format_inconclusive_reason)
.unwrap_or_else(|| {
dv.detail
.as_deref()
.map(|d| d.chars().take(40).collect())
.unwrap_or_else(|| "unknown".to_string())
});
format!("[DYN: inconclusive ({reason})]")
}
}
}
fn format_unsupported_reason(r: &crate::evidence::UnsupportedReason) -> String {
use crate::evidence::UnsupportedReason;
match r {
UnsupportedReason::BackendUnavailable => "backend unavailable".to_string(),
UnsupportedReason::EntryKindUnsupported => "entry kind not supported".to_string(),
UnsupportedReason::ConfidenceTooLow => "confidence too low".to_string(),
UnsupportedReason::NoFlowSteps => "no flow steps".to_string(),
UnsupportedReason::NoPayloadsForCap => "no payloads for cap".to_string(),
UnsupportedReason::SpecDerivationFailed => "spec derivation failed".to_string(),
UnsupportedReason::RequiredFileRedactedForSecrets(_) => {
"file redacted for secrets".to_string()
}
UnsupportedReason::LangUnsupported => "language not supported".to_string(),
}
}
fn format_inconclusive_reason(r: crate::evidence::InconclusiveReason) -> String {
use crate::evidence::InconclusiveReason;
match r {
InconclusiveReason::OracleCollisionSuspected => "oracle collision".to_string(),
InconclusiveReason::NonReproducible => "non-reproducible".to_string(),
InconclusiveReason::BuildFailed => "build failed".to_string(),
InconclusiveReason::SandboxError => "sandbox error".to_string(),
}
}
/// Colored severity tag with icon. The tag is the visual anchor of each finding.
///
/// - HIGH: bold red

View file

@ -282,6 +282,21 @@ pub fn build_sarif(diags: &[Diag], scan_root: &Path) -> Value {
}
}
// Dynamic verification vendor extension (§5.4).
// `partialFingerprints.dynamic_verdict_status` is a stable string
// consumers can key on without parsing the full verdict object.
// `properties.nyx_dynamic_verdict` carries the full VerifyResult.
if let Some(dv) = d.evidence.as_ref().and_then(|ev| ev.dynamic_verdict.as_ref()) {
result["partialFingerprints"] = json!({
"dynamic_verdict_status": serde_json::to_value(dv.status)
.unwrap_or(Value::Null)
});
props.insert(
"nyx_dynamic_verdict".into(),
serde_json::to_value(dv).unwrap_or(Value::Null),
);
}
// Add rollup data if present
if let Some(ref rollup) = d.rollup {
props.insert(

View file

@ -90,6 +90,22 @@ pub fn compute_attack_rank(diag: &Diag) -> AttackRank {
}
}
// ── 7a. Dynamic verification delta ─────────────────────────────
//
// `Confirmed` findings are verified exploitable — boost rank so they
// surface above equivalent static-only findings.
// `NotConfirmed` findings where all available payloads were tried
// (corpus exhausted) receive a mild downward nudge.
// All other verdicts (Unsupported, Inconclusive, no verdict) are
// unaffected: no data is better than speculative data.
//
// TODO(M7): calibrate N (boost) and M (penalty) from telemetry
// collected here. Placeholder values: N=20, M=5.
if let Some(delta) = dynamic_verdict_delta(diag) {
score += delta;
components.push(("dynamic_verdict".into(), format!("{delta:+}")));
}
// ── 7. Completeness penalty (engine provenance notes) ────────────
//
// When the analysis engine hit a cap, widening, or lowering bail,
@ -204,6 +220,26 @@ pub fn rank_diags(diags: &mut [Diag]) {
// Scoring helpers
// ─────────────────────────────────────────────────────────────────────────────
/// Rank delta from the dynamic verification verdict.
///
/// Returns `None` when there is no verdict (static-only scan) or the verdict
/// does not change the score (Unsupported, Inconclusive).
///
/// TODO(M7): N=20 and M=5 are placeholders; calibrate from telemetry.
fn dynamic_verdict_delta(diag: &Diag) -> Option<f64> {
use crate::evidence::VerifyStatus;
let dv = diag.evidence.as_ref()?.dynamic_verdict.as_ref()?;
match dv.status {
VerifyStatus::Confirmed => Some(20.0),
// Apply penalty only when the corpus was actually exhausted (attempts
// were made); a NotConfirmed with zero attempts means something went
// wrong before payload execution, which is an Inconclusive path, not
// a meaningful negative signal.
VerifyStatus::NotConfirmed if !dv.attempts.is_empty() => Some(-5.0),
_ => None,
}
}
/// Bonus based on analysis kind inferred from rule ID + evidence.
fn analysis_kind_bonus(rule_id: &str, evidence: Option<&Evidence>) -> f64 {
if rule_id.starts_with("taint-data-exfiltration") {