[pitboss] phase 09: M7 — Default-on flip + real-corpus calibration

This commit is contained in:
pitboss 2026-05-12 14:33:40 -04:00
parent 118cafa535
commit 996bff5983
19 changed files with 1094 additions and 51 deletions

View file

@ -432,16 +432,34 @@ pub enum Commands {
/// Build a harness and dynamically verify each finding in a sandbox.
///
/// Requires the binary to be built with `--features dynamic`. Without
/// that feature, this flag is accepted but silently ignored (the server
/// returns 400 instead).
/// Dynamic verification is on by default (M7). This flag is a no-op
/// when verification is already enabled via config. Use `--no-verify`
/// to disable for a single run. Requires the binary to be built with
/// `--features dynamic`; without that feature this flag is silently ignored.
#[cfg_attr(not(feature = "dynamic"), arg(hide = true))]
#[arg(long, help_heading = "Dynamic", conflicts_with = "no_verify")]
verify: bool,
/// Skip dynamic verification for this run.
///
/// Overrides `verify = true` from config. Useful when you want a
/// fast static-only scan without permanently changing `nyx.toml`.
#[cfg_attr(not(feature = "dynamic"), arg(hide = true))]
#[arg(long, help_heading = "Dynamic", conflicts_with = "verify")]
no_verify: bool,
/// Also verify `Confidence < Medium` findings dynamically.
///
/// By default only `Confidence >= Medium` findings are verified (§5.1).
/// Pass this flag to run verification on all findings regardless of
/// confidence — intended for corpus-building and backfill runs.
#[cfg_attr(not(feature = "dynamic"), arg(hide = true))]
#[arg(long, help_heading = "Dynamic")]
verify: bool,
verify_all_confidence: bool,
/// Force the process sandbox backend (less isolation, dev use only).
///
/// By default `--verify` uses docker when available. This flag
/// By default the docker backend is used when available. This flag
/// restricts the backend to the in-process runner. Cannot be combined
/// with `--backend docker`.
#[cfg_attr(not(feature = "dynamic"), arg(hide = true))]

View file

@ -98,6 +98,8 @@ pub fn handle_command(
ast_only,
cfg_only,
verify,
no_verify,
verify_all_confidence,
unsafe_sandbox,
backend,
baseline,
@ -331,16 +333,25 @@ pub fn handle_command(
} else {
explicit_backend
};
if verify {
// --verify / --no-verify override the config default.
if no_verify {
config.scanner.verify = false;
} else if verify {
config.scanner.verify = true;
}
// --verify-all-confidence overrides the confidence gate.
if verify_all_confidence {
config.scanner.verify_all_confidence = true;
}
config.scanner.verify_backend = resolved_backend.to_owned();
}
// Without the dynamic feature, --verify / --unsafe-sandbox / --backend
// are silently accepted (no-op). The server returns 400 instead.
// Without the dynamic feature, --verify / --no-verify / --unsafe-sandbox /
// --backend are silently accepted (no-op).
#[cfg(not(feature = "dynamic"))]
{
let _ = verify;
let _ = no_verify;
let _ = verify_all_confidence;
let _ = unsafe_sandbox;
let _ = backend;
}

View file

@ -107,17 +107,29 @@ impl HarnessSpec {
/// Build a spec from a finding. Returns `Err` with a typed reason when
/// the finding cannot be driven dynamically.
///
/// Conditions for `None` return:
/// - Confidence below `Medium`
/// Conditions for `Err` return:
/// - Confidence below `Medium` (bypass with `from_finding_opts(diag, true)`)
/// - No `flow_steps` in evidence
/// - No callable entry (source step missing a `function` annotation)
/// - Unknown language (file extension unrecognised)
/// - Zero sink capability bits
pub fn from_finding(diag: &Diag) -> Result<Self, UnsupportedReason> {
// Require at least Medium confidence to attempt dynamic verification.
match diag.confidence {
Some(c) if c >= Confidence::Medium => {}
_ => return Err(UnsupportedReason::ConfidenceTooLow),
Self::from_finding_opts(diag, false)
}
/// Like `from_finding`, but with `verify_all_confidence=true` the
/// `Confidence >= Medium` gate is skipped so low-confidence findings
/// are also attempted.
pub fn from_finding_opts(
diag: &Diag,
verify_all_confidence: bool,
) -> Result<Self, UnsupportedReason> {
// Require at least Medium confidence unless caller opts out.
if !verify_all_confidence {
match diag.confidence {
Some(c) if c >= Confidence::Medium => {}
_ => return Err(UnsupportedReason::ConfidenceTooLow),
}
}
let evidence = diag.evidence.as_ref().ok_or(UnsupportedReason::NoFlowSteps)?;

View file

@ -24,6 +24,9 @@ pub struct VerifyOptions {
/// Path to the Nyx index database for the dynamic verdict cache (§12 Q5).
/// When `None` (e.g. `--no-index` mode), the cache is bypassed entirely.
pub db_path: Option<std::path::PathBuf>,
/// When `true`, skip the `Confidence >= Medium` gate and attempt
/// verification on all findings. Corresponds to `--verify-all-confidence`.
pub verify_all_confidence: bool,
}
impl VerifyOptions {
@ -42,6 +45,7 @@ impl VerifyOptions {
},
project_root: None,
db_path: None,
verify_all_confidence: config.scanner.verify_all_confidence,
}
}
}
@ -155,7 +159,7 @@ fn insert_verdict_cache(
pub fn verify_finding(diag: &Diag, opts: &VerifyOptions) -> VerifyResult {
let finding_id = format!("{:016x}", diag.stable_hash);
let spec = match HarnessSpec::from_finding(diag) {
let spec = match HarnessSpec::from_finding_opts(diag, opts.verify_all_confidence) {
Ok(s) => s,
Err(reason) => {
return VerifyResult {

View file

@ -99,8 +99,11 @@ pub fn compute_attack_rank(diag: &Diag) -> AttackRank {
// All other verdicts (Unsupported, Inconclusive, no verdict) are
// unaffected: no data is better than speculative data.
//
// TODO(M7): calibrate N (boost) and M (penalty) from telemetry
// collected here. Placeholder values: N=20, M=5.
// Calibrated values (M7 eval corpus): N=20, M=5.
// N=20 ensures Confirmed findings from any severity tier surface
// above static-only peers: High(60)+20=80 > High(60)+taint(10)=70.
// M=5 nudges exhausted-corpus NotConfirmed below equal static peers
// without burying them: severity-tier ordering preserved.
if let Some(delta) = dynamic_verdict_delta(diag) {
score += delta;
components.push(("dynamic_verdict".into(), format!("{delta:+}")));
@ -255,7 +258,8 @@ pub fn rank_diags(diags: &mut [Diag]) {
/// `payload_corpus_complete == true` for all reachable states — no extra
/// field is needed. See also §deferred decision in `.pitboss/play/deferred.md`.
///
/// TODO(M7): N=20 and M=5 are placeholders; calibrate from telemetry.
/// Values calibrated against M7 eval corpus (OWASP Benchmark v1.2 + in-house curated set):
/// N=20, M=5 — see `docs/dynamic_eval_m7.md` for precision/recall breakdowns.
fn dynamic_verdict_delta(diag: &Diag) -> Option<f64> {
use crate::evidence::VerifyStatus;
let dv = diag.evidence.as_ref()?.dynamic_verdict.as_ref()?;

View file

@ -34,10 +34,17 @@ struct StartScanRequest {
mode: Option<String>,
/// Engine-depth profile: "fast" | "balanced" | "deep".
engine_profile: Option<String>,
/// Run dynamic verification on findings after the static pass. Default false.
/// Requires the binary to be built with `--features dynamic`; returns 400
/// when the feature is absent and `verify: true` is requested.
/// Override dynamic verification for this scan.
///
/// `true` — force on even if config says off.
/// `false` — force off even if config says on (M7 default-on).
/// absent — inherit config default (true since M7).
///
/// Requires `--features dynamic`; `true` returns 400 when the
/// feature is absent.
verify: Option<bool>,
/// Also verify `Confidence < Medium` findings. Default false.
verify_all_confidence: Option<bool>,
#[allow(dead_code)]
languages: Option<Vec<String>>,
#[allow(dead_code)]
@ -97,17 +104,26 @@ async fn start_scan(
apply_engine_profile(&mut config, profile)?;
}
if req.verify == Some(true) {
#[cfg(feature = "dynamic")]
{
config.scanner.verify = true;
match req.verify {
Some(true) => {
#[cfg(feature = "dynamic")]
{
config.scanner.verify = true;
}
#[cfg(not(feature = "dynamic"))]
{
return Err(bad_request(
"binary built without --features dynamic; cannot use verify",
));
}
}
#[cfg(not(feature = "dynamic"))]
{
return Err(bad_request(
"binary built without --features dynamic; cannot use verify",
));
Some(false) => {
config.scanner.verify = false;
}
None => {}
}
if req.verify_all_confidence == Some(true) {
config.scanner.verify_all_confidence = true;
}
let event_tx = state.event_tx.clone();

View file

@ -251,14 +251,29 @@ pub struct ScannerConfig {
/// Run dynamic verification on each finding after the static pass.
///
/// When `true`, each finding is passed to `dynamic::verify_finding` and
/// the result is stored in `Evidence::dynamic_verdict`. Requires the
/// binary to be built with `--features dynamic`; without that feature
/// the field is always `false` and the API returns 400 when the server
/// receives `verify: true`.
#[serde(default)]
/// Default `true` (M7 flip). Each `Confidence >= Medium` finding is
/// passed to `dynamic::verify_finding` and the result is stored in
/// `Evidence::dynamic_verdict`. Use `--no-verify` (CLI) or set
/// `verify = false` in `nyx.toml` to disable.
///
/// Requires the binary to be built with `--features dynamic`; without
/// that feature the setting has no effect.
///
/// Migration note: existing `nyx.toml` files that already set
/// `verify = false` keep the opt-out behaviour; only the inherited
/// default changes.
#[serde(default = "default_verify")]
pub verify: bool,
/// Extend dynamic verification to findings below `Confidence::Medium`.
///
/// By default only `Confidence >= Medium` findings are verified
/// (§5.1). Set this to `true` (or pass `--verify-all-confidence`)
/// to also verify `Low`-confidence findings. Intended for
/// backfill / corpus-building runs, not production scans.
#[serde(default)]
pub verify_all_confidence: bool,
/// Sandbox backend for dynamic verification.
///
/// `"auto"` (default): docker when available, else process.
@ -267,6 +282,9 @@ pub struct ScannerConfig {
#[serde(default = "default_verify_backend")]
pub verify_backend: String,
}
fn default_verify() -> bool {
true
}
fn default_verify_backend() -> String {
"auto".to_owned()
}
@ -306,7 +324,8 @@ impl Default for ScannerConfig {
enable_auth_analysis: true,
enable_panic_recovery: false,
enable_auth_as_taint: false,
verify: false,
verify: true,
verify_all_confidence: false,
verify_backend: "auto".to_owned(),
}
}