[pitboss] phase 07: M6 — Evidence consumers: formatters, ranking, UI

This commit is contained in:
pitboss 2026-05-12 13:26:52 -04:00
parent 6f8a645077
commit bfdfcb9d1a
18 changed files with 3208 additions and 46 deletions

View file

@ -80,6 +80,24 @@ impl OobListener {
.map(|h| h.contains(nonce))
.unwrap_or(false)
}
/// Polls until `nonce` is recorded or `timeout` elapses.
///
/// Returns immediately on hit; polls every 5 ms otherwise.
/// Prefer this over a fixed sleep + `was_nonce_hit` at call sites.
pub fn wait_for_nonce(&self, nonce: &str, timeout: Duration) -> bool {
let deadline = std::time::Instant::now() + timeout;
loop {
if self.was_nonce_hit(nonce) {
return true;
}
let remaining = deadline.saturating_duration_since(std::time::Instant::now());
if remaining.is_zero() {
return false;
}
std::thread::sleep(remaining.min(Duration::from_millis(5)));
}
}
}
impl Drop for OobListener {

View file

@ -8,7 +8,7 @@
use crate::dynamic::build_sandbox;
use crate::dynamic::corpus::{benign_payload_for, materialise_bytes, payloads_for, Oracle, Payload};
use crate::dynamic::harness::{self, HarnessError};
use crate::dynamic::sandbox::{self, SandboxError, SandboxOptions, SandboxOutcome};
use crate::dynamic::sandbox::{self, SandboxBackend, SandboxError, SandboxOptions, SandboxOutcome};
use crate::dynamic::spec::HarnessSpec;
use crate::symbol::Lang;
@ -214,7 +214,11 @@ pub fn run_spec(spec: &HarnessSpec, opts: &SandboxOptions) -> Result<RunOutcome,
let (oob_nonce, effective_bytes) = if payload.oob_nonce_slot {
if let Some(ref listener) = opts.oob_listener {
let nonce = generate_nonce();
let url = listener.nonce_url(&nonce);
let url = if uses_docker_backend(opts) {
listener.nonce_url_for_host("host-gateway", &nonce)
} else {
listener.nonce_url(&nonce)
};
let bytes = url.into_bytes();
(Some(nonce), bytes)
} else {
@ -229,12 +233,10 @@ pub fn run_spec(spec: &HarnessSpec, opts: &SandboxOptions) -> Result<RunOutcome,
// For OOB payloads, check the nonce listener and update the outcome flag.
if let (Some(nonce), Some(listener)) = (&oob_nonce, &opts.oob_listener) {
// Give the harness a brief window to complete the callback before we check.
// The sandbox run already waited for process exit, so the callback should
// have arrived. A short sleep handles edge cases where the OS hasn't yet
// delivered the TCP segment to the listener thread.
std::thread::sleep(std::time::Duration::from_millis(50));
if listener.was_nonce_hit(nonce) {
// Poll until the nonce arrives or the budget expires. The sandbox run
// already waited for process exit so the callback should arrive quickly;
// 200 ms covers OS TCP delivery jitter without burning wall-clock at scale.
if listener.wait_for_nonce(nonce, std::time::Duration::from_millis(200)) {
outcome.oob_callback_seen = true;
}
}
@ -287,6 +289,18 @@ pub fn run_spec(spec: &HarnessSpec, opts: &SandboxOptions) -> Result<RunOutcome,
})
}
/// Returns true when the active backend will use Docker for execution.
///
/// Used at URL-generation time so Docker runs embed `host-gateway` rather than
/// `127.0.0.1` (the container's loopback ≠ the host's loopback).
fn uses_docker_backend(opts: &SandboxOptions) -> bool {
match opts.backend {
SandboxBackend::Docker => true,
SandboxBackend::Auto => sandbox::docker_available(),
SandboxBackend::Process => false,
}
}
fn oracle_fired(oracle: &Oracle, outcome: &SandboxOutcome) -> bool {
match oracle {
Oracle::OutputContains(needle) => {